Java Regex

示例

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
Pattern imgPattern = Pattern.compile("<img (.+?)>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL);
Pattern videoPattern = Pattern.compile("<video (.+?)>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL);
Pattern filePattern = Pattern.compile("<a (.+?)>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL);

Pattern urlPattern = Pattern.compile("(http(s?):\\/\\/|www\\.)(([\\w\\-]+\\.){1,}?([\\w\\-.~]+\\/?)*[\\p{Alnum}.,%_=?&#\\-+()\\[\\]\\*$~@!:/{};']*)", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL);

private String keyword = "cms-download";

String content = "<img class=\"cms-download\" src=\"http://abc.com\">abc";

Matcher imgMatcher = imgPattern.matcher(content);
List<String> oldUrl = new ArrayList<>();
List<String> newUrl = new ArrayList<>();
while (imgMatcher.find()) {
String imgTag = content.substring(imgMatcher.start(1), imgMatcher.end());
if (imgTag.contains(keyword)) {
Matcher urlMatcher = urlPattern.matcher(imgTag);
if (urlMatcher.find()) {
String url = imgTag.substring(urlMatcher.start(1), urlMatcher.end());
oldUrl.add(url);
newUrl.add(downloadImage(url, site));
}
}
}
for (int i = 0; i < oldUrl.size(); i++) {
content = content.replace(oldUrl.get(i), newUrl.get(i));
}
return content;