java 过滤HTML代码中的标签,获取文本内容
public static String delHtmlTags(String htmlStr) {
String scriptRegex="<script[^>]*?>[\\s\\S]*?<\\/script>";
String styleRegex="<style[^>]*?>[\\s\\S]*?<\\/style>";
String htmlRegex="<[^>]+>";
String spaceRegex = "\\s*|\t|\r|\n";
htmlStr = htmlStr.replaceAll(scriptRegex, "");
htmlStr = htmlStr.replaceAll(styleRegex, "");
htmlStr = htmlStr.replaceAll(htmlRegex, "");
htmlStr = htmlStr.replaceAll(spaceRegex, "");
htmlStr = htmlStr.replaceAll(" ", "");
htmlStr = htmlStr.replaceAll(" ", "");
return htmlStr.trim();
}