本文整理了Java中org.jsoup.Jsoup.parse()
方法的一些代码示例,展示了Jsoup.parse()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Jsoup.parse()
方法的具体详情如下:
包路径:org.jsoup.Jsoup
类名称:Jsoup
方法名:parse
[英]Parse the contents of a file as HTML. The location of the file is used as the base URI to qualify relative URLs.
[中]将文件内容解析为HTML。文件的位置用作基本URI以限定相对URL。
代码示例来源:origin: code4craft/webmagic
public Html(String text, String url) {
try {
this.document = Jsoup.parse(text, url);
} catch (Exception e) {
this.document = null;
logger.warn("parse document error ", e);
}
}
代码示例来源:origin: code4craft/webmagic
public Html(String text) {
try {
this.document = Jsoup.parse(text);
} catch (Exception e) {
this.document = null;
logger.warn("parse document error ", e);
}
}
代码示例来源:origin: square/retrofit
@Override public Page convert(ResponseBody responseBody) throws IOException {
Document document = Jsoup.parse(responseBody.string());
List<String> links = new ArrayList<>();
for (Element element : document.select("a[href]")) {
links.add(element.attr("href"));
}
return new Page(document.title(), Collections.unmodifiableList(links));
}
}
代码示例来源:origin: code4craft/webmagic
public List<Element> selectElements(String text) {
if (text != null) {
return selectElements(Jsoup.parse(text));
} else {
return new ArrayList<Element>();
}
}
代码示例来源:origin: code4craft/webmagic
@Override
public String select(String text) {
if (text != null) {
return select(Jsoup.parse(text));
}
return null;
}
代码示例来源:origin: code4craft/webmagic
@Override
public List<String> selectList(String text) {
if (text != null) {
return selectList(Jsoup.parse(text));
} else {
return new ArrayList<String>();
}
}
代码示例来源:origin: code4craft/webmagic
public Element selectElement(String text) {
if (text != null) {
return selectElement(Jsoup.parse(text));
}
return null;
}
代码示例来源:origin: jphp-group/jphp
@Signature
public static Document parseText(String text, String baseUri) {
return Jsoup.parse(text, baseUri);
}
}
代码示例来源:origin: ChinaSilence/any-video
/**
* 文本预处理
* 英文小写 -> 移除code -> 移除Html标签
*/
private String preHandle(String content){
content = content.toLowerCase();
content = content.replaceAll(" ", "").replaceAll("<code[\\s\\S]*?</code>", "");
return Jsoup.parse(content).text();
}
代码示例来源:origin: square/okhttp
Document document = Jsoup.parse(response.body().string(), url.toString());
for (Element element : document.select("a[href]")) {
String href = element.attr("href");
代码示例来源:origin: k9mail/k-9
/**
* Convert an HTML string to a plain text string.
* @param html HTML string to convert.
* @return Plain text result.
*/
public static String htmlToText(final String html) {
Document document = Jsoup.parse(html);
return HtmlToPlainText.toPlainText(document.body())
.replace(PREVIEW_OBJECT_CHARACTER, PREVIEW_OBJECT_REPLACEMENT)
.replace(NBSP_CHARACTER, NBSP_REPLACEMENT);
}
代码示例来源:origin: k9mail/k-9
public Document sanitize(String html) {
Document dirtyDocument = Jsoup.parse(html);
Document cleanedDocument = cleaner.clean(dirtyDocument);
headCleaner.clean(dirtyDocument, cleanedDocument);
return cleanedDocument;
}
}
代码示例来源:origin: javaee-samples/javaee7-samples
public static String formatHTML(String html) {
try {
return parse(html, "", xmlParser()).toString();
} catch (Exception e) {
return html;
}
}
代码示例来源:origin: JpressProjects/jpress
public static String getText(String html) {
if (StrUtils.isBlank(html)) {
return html;
}
return Jsoup.parse(html).text();
}
代码示例来源:origin: k9mail/k-9
private void assertHtmlContainsElement(String html, String cssQuery, int numberOfExpectedOccurrences) {
Document document = Jsoup.parse(html);
int numberOfFoundElements = document.select(cssQuery).size();
assertEquals("Expected to find '" + cssQuery + "' " + numberOfExpectedOccurrences + " time(s) in:\n" + html,
numberOfExpectedOccurrences, numberOfFoundElements);
}
}
代码示例来源:origin: JpressProjects/jpress
public String replaceSrcTemplateSrcPath(String content) {
if (StrUtils.isBlank(content)) {
return content;
}
Document doc = Jsoup.parse(content);
Elements jsElements = doc.select("script[src]");
replace(jsElements, "src");
Elements imgElements = doc.select("img[src]");
replace(imgElements, "src");
Elements linkElements = doc.select("link[href]");
replace(linkElements, "href");
return doc.toString();
}
代码示例来源:origin: jphp-group/jphp
@Signature
public static Document parse(Environment env, Memory source, String encoding, String baseUri) throws IOException {
InputStream is = Stream.getInputStream(env, source);
try {
return Jsoup.parse(is, encoding, baseUri);
} finally {
Stream.closeStream(env, is);
}
}
代码示例来源:origin: seven332/EhViewer
public static String parse(String body) throws ParseException {
try {
Document d = Jsoup.parse(body, EhUrl.URL_FORUMS);
Element userlinks = d.getElementById("userlinks");
Element child = userlinks.child(0).child(0).child(0);
return child.attr("href");
} catch (Throwable e) {
ExceptionUtils.throwIfFatal(e);
throw new ParseException("Parse forums error", body);
}
}
}
代码示例来源:origin: k9mail/k-9
@Test
public void wrapMessageContent_putsMessageContentInBody() {
String content = "Some text";
String html = HtmlConverter.wrapMessageContent(content);
assertEquals(content, Jsoup.parse(html).body().text());
}
代码示例来源:origin: k9mail/k-9
private String stripSignatureInternal(String content) {
Document document = Jsoup.parse(content);
AdvancedNodeTraversor nodeTraversor = new AdvancedNodeTraversor(new StripSignatureFilter());
nodeTraversor.filter(document.body());
return HtmlProcessor.toCompactString(document);
}
内容来源于网络,如有侵权,请联系作者删除!