1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
| Pattern imgPattern = Pattern.compile("<img (.+?)>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL); Pattern videoPattern = Pattern.compile("<video (.+?)>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL); Pattern filePattern = Pattern.compile("<a (.+?)>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL);
Pattern urlPattern = Pattern.compile("(http(s?):\\/\\/|www\\.)(([\\w\\-]+\\.){1,}?([\\w\\-.~]+\\/?)*[\\p{Alnum}.,%_=?&#\\-+()\\[\\]\\*$~@!:/{};']*)", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL);
private String keyword = "cms-download";
String content = "<img class=\"cms-download\" src=\"http://abc.com\">abc";
Matcher imgMatcher = imgPattern.matcher(content); List<String> oldUrl = new ArrayList<>(); List<String> newUrl = new ArrayList<>(); while (imgMatcher.find()) { String imgTag = content.substring(imgMatcher.start(1), imgMatcher.end()); if (imgTag.contains(keyword)) { Matcher urlMatcher = urlPattern.matcher(imgTag); if (urlMatcher.find()) { String url = imgTag.substring(urlMatcher.start(1), urlMatcher.end()); oldUrl.add(url); newUrl.add(downloadImage(url, site)); } } } for (int i = 0; i < oldUrl.size(); i++) { content = content.replace(oldUrl.get(i), newUrl.get(i)); } return content;
|