本文整理了Java中org.jsoup.nodes.Document.baseUri()
方法的一些代码示例,展示了Document.baseUri()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Document.baseUri()
方法的具体详情如下:
包路径:org.jsoup.nodes.Document
类名称:Document
方法名:baseUri
暂无
代码示例来源:origin: org.jsoup/jsoup
/**
Create a new Element, with this document's base uri. Does not make the new element a child of this document.
@param tagName element tag name (e.g. {@code a})
@return new element
*/
public Element createElement(String tagName) {
return new Element(Tag.valueOf(tagName, ParseSettings.preserveCase), this.baseUri());
}
代码示例来源:origin: code4craft/webmagic
/**
* Only document can be select
* See: https://github.com/code4craft/webmagic/issues/113
*
* @param elementIterator elementIterator
* @return element element
*/
private Element checkElementAndConvert(ListIterator<Element> elementIterator) {
Element element = elementIterator.next();
if (!(element instanceof Document)) {
Document root = new Document(element.ownerDocument().baseUri());
Element clone = element.clone();
root.appendChild(clone);
elementIterator.set(root);
return root;
}
return element;
}
代码示例来源:origin: org.jsoup/jsoup
/**
Creates a new, clean document, from the original dirty document, containing only elements allowed by the whitelist.
The original document is not modified. Only elements from the dirt document's <code>body</code> are used.
@param dirtyDocument Untrusted base document to clean.
@return cleaned document.
*/
public Document clean(Document dirtyDocument) {
Validate.notNull(dirtyDocument);
Document clean = Document.createShell(dirtyDocument.baseUri());
if (dirtyDocument.body() != null) // frameset documents won't have a body. the clean doc will have empty body.
copySafeNodes(dirtyDocument.body(), clean.body());
return clean;
}
代码示例来源:origin: org.jsoup/jsoup
/**
Determines if the input document <b>body</b>is valid, against the whitelist. It is considered valid if all the tags and attributes
in the input HTML are allowed by the whitelist, and that there is no content in the <code>head</code>.
<p>
This method can be used as a validator for user input. An invalid document will still be cleaned successfully
using the {@link #clean(Document)} document. If using as a validator, it is recommended to still clean the document
to ensure enforced attributes are set correctly, and that the output is tidied.
</p>
@param dirtyDocument document to test
@return true if no tags or attributes need to be removed; false if they do
*/
public boolean isValid(Document dirtyDocument) {
Validate.notNull(dirtyDocument);
Document clean = Document.createShell(dirtyDocument.baseUri());
int numDiscarded = copySafeNodes(dirtyDocument.body(), clean.body());
return numDiscarded == 0
&& dirtyDocument.head().childNodes().size() == 0; // because we only look at the body, but we start from a shell, make sure there's nothing in the head
}
代码示例来源:origin: com.vaadin/vaadin-server
document.baseUri());
document.child(0).before(doctype);
代码示例来源:origin: us.codecraft/xsoup
@Override
public String getDocumentURI() {
return document.baseUri();
}
代码示例来源:origin: code4craft/xsoup
@Override
public String getDocumentURI() {
return document.baseUri();
}
代码示例来源:origin: stackoverflow.com
Document doc = con.get();
String uri = doc.baseUri();
returnObj.put(RETURN_FINAL_URL, uri);
代码示例来源:origin: us.codecraft/webmagic-core
/**
* Only document can be select
* See: https://github.com/code4craft/webmagic/issues/113
*
* @param elementIterator elementIterator
* @return element element
*/
private Element checkElementAndConvert(ListIterator<Element> elementIterator) {
Element element = elementIterator.next();
if (!(element instanceof Document)) {
Document root = new Document(element.ownerDocument().baseUri());
Element clone = element.clone();
root.appendChild(clone);
elementIterator.set(root);
return root;
}
return element;
}
代码示例来源:origin: com.cv4j.netdiscovery/netdiscovery-core
/**
* Only document can be select
* See: https://github.com/code4craft/webmagic/issues/113
*
* @param elementIterator elementIterator
* @return element element
*/
private Element checkElementAndConvert(ListIterator<Element> elementIterator) {
Element element = elementIterator.next();
if (!(element instanceof Document)) {
Document root = new Document(element.ownerDocument().baseUri());
Element clone = element.clone();
root.appendChild(clone);
elementIterator.set(root);
return root;
}
return element;
}
代码示例来源:origin: opacapp/opacclient
private void loadResList(Document resDoc, List<ReservedItem> items) throws IOException {
items.addAll(parseResList(resDoc));
String nextPageUrl = findNextPageUrl(resDoc);
if (nextPageUrl != null) {
Document doc = Jsoup.parse(httpGet(nextPageUrl, getDefaultEncoding()));
doc.setBaseUri(resDoc.baseUri());
loadResList(doc, items);
}
}
代码示例来源:origin: opacapp/opacclient
private void loadMediaList(Document lentDoc, List<LentItem> items)
throws IOException {
items.addAll(parseMediaList(lentDoc));
String nextPageUrl = findNextPageUrl(lentDoc);
if (nextPageUrl != null) {
Document doc = Jsoup.parse(httpGet(nextPageUrl, getDefaultEncoding()));
doc.setBaseUri(lentDoc.baseUri());
loadMediaList(doc, items);
}
}
代码示例来源:origin: cn.edu.hfut.dmic.webcollector/WebCollector
public News getNews() throws Exception {
News news = new News();
Element contentElement;
try {
contentElement = getContentElement();
news.setContentElement(contentElement);
} catch (Exception ex) {
LOG.info("news content extraction failed,extraction abort", ex);
throw new Exception(ex);
}
if (doc.baseUri() != null) {
news.setUrl(doc.baseUri());
}
try {
news.setTime(getTime(contentElement));
} catch (Exception ex) {
LOG.info("news title extraction failed", ex);
}
try {
news.setTitle(getTitle(contentElement));
} catch (Exception ex) {
LOG.info("title extraction failed", ex);
}
return news;
}
代码示例来源:origin: avluis/Hentoid
Timber.d("URI : %s", doc.baseUri());
if (doc.baseUri().contains(HENTAICAFE.getUrl() + "/78-2/") || // ignore tags page
doc.baseUri().contains(HENTAICAFE.getUrl() + "/artists/")) { // ignore artist page
代码示例来源:origin: avluis/Hentoid
result = new Content();
String url = doc.baseUri();
String protocol = url.substring(0,5);
if ("https".equals(protocol)) protocol = "https:";
代码示例来源:origin: avluis/Hentoid
@Override
protected Content parseContent(Document doc) {
Content result = new Content();
result.setUrl(doc.baseUri().substring(doc.baseUri().indexOf('/', 9)));
String coverUrl = doc.select("div#imgholder")
.select("a")
.select("img")
.attr("src");
result.setCoverImageUrl(coverUrl);
String title = doc.select("div#mangainfo")
.select("div")
.select("h1")
.text();
result.setTitle(title);
String lastOptionUrl = doc.select("div#selectpage")
.select("select")
.select("option")
.last()
.attr("value");
int nbPages = Integer.parseInt(lastOptionUrl.substring(lastOptionUrl.lastIndexOf('/') + 1));
result.setQtyPages(nbPages);
AttributeMap attributes = new AttributeMap();
result.setAttributes(attributes);
result.setSite(Site.PANDA);
return result;
}
代码示例来源:origin: org.kantega.openaksess/openaksess-core
@Override
public Document runFilter(Document document) {
final Document clean = Document.createShell(document.baseUri());
if (document.body() != null) // frameset documents won't have a body. the clean doc will have empty body.
copySafeNodes(document.body(), clean.body());
return clean;
}
代码示例来源:origin: com.vaadin/flow-server
return new TextNode(element.getText(), document.baseUri());
代码示例来源:origin: avluis/Hentoid
parseAttributes(attributes, AttributeType.CHARACTER, characterElements, true);
if (doc.baseUri().contains("comics")) {
result.setSite(Site.ASMHENTAI_COMICS);
} else {
代码示例来源:origin: com.vaadin/flow-server
static Document getBootstrapPage(BootstrapContext context) {
Document document = new Document("");
DocumentType doctype = new DocumentType("html", "", "",
document.baseUri());
document.appendChild(doctype);
Element html = document.appendElement("html");
内容来源于网络,如有侵权,请联系作者删除!