org.jsoup.nodes.Document.baseUri()方法的使用及代码示例

x33g5p2x  于2022-01-18 转载在 其他  
字(7.5k)|赞(0)|评价(0)|浏览(197)

本文整理了Java中org.jsoup.nodes.Document.baseUri()方法的一些代码示例,展示了Document.baseUri()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Document.baseUri()方法的具体详情如下:
包路径:org.jsoup.nodes.Document
类名称:Document
方法名:baseUri

Document.baseUri介绍

暂无

代码示例

代码示例来源:origin: org.jsoup/jsoup

/**
 Create a new Element, with this document's base uri. Does not make the new element a child of this document.
 @param tagName element tag name (e.g. {@code a})
 @return new element
 */
public Element createElement(String tagName) {
  return new Element(Tag.valueOf(tagName, ParseSettings.preserveCase), this.baseUri());
}

代码示例来源:origin: code4craft/webmagic

/**
 * Only document can be select
 * See: https://github.com/code4craft/webmagic/issues/113
 *
 * @param elementIterator elementIterator
 * @return element element
 */
private Element checkElementAndConvert(ListIterator<Element> elementIterator) {
  Element element = elementIterator.next();
  if (!(element instanceof Document)) {
    Document root = new Document(element.ownerDocument().baseUri());
    Element clone = element.clone();
    root.appendChild(clone);
    elementIterator.set(root);
    return root;
  }
  return element;
}

代码示例来源:origin: org.jsoup/jsoup

/**
 Creates a new, clean document, from the original dirty document, containing only elements allowed by the whitelist.
 The original document is not modified. Only elements from the dirt document's <code>body</code> are used.
 @param dirtyDocument Untrusted base document to clean.
 @return cleaned document.
 */
public Document clean(Document dirtyDocument) {
  Validate.notNull(dirtyDocument);
  Document clean = Document.createShell(dirtyDocument.baseUri());
  if (dirtyDocument.body() != null) // frameset documents won't have a body. the clean doc will have empty body.
    copySafeNodes(dirtyDocument.body(), clean.body());
  return clean;
}

代码示例来源:origin: org.jsoup/jsoup

/**
 Determines if the input document <b>body</b>is valid, against the whitelist. It is considered valid if all the tags and attributes
 in the input HTML are allowed by the whitelist, and that there is no content in the <code>head</code>.
 <p>
 This method can be used as a validator for user input. An invalid document will still be cleaned successfully
 using the {@link #clean(Document)} document. If using as a validator, it is recommended to still clean the document
 to ensure enforced attributes are set correctly, and that the output is tidied.
 </p>
 @param dirtyDocument document to test
 @return true if no tags or attributes need to be removed; false if they do
 */
public boolean isValid(Document dirtyDocument) {
  Validate.notNull(dirtyDocument);
  Document clean = Document.createShell(dirtyDocument.baseUri());
  int numDiscarded = copySafeNodes(dirtyDocument.body(), clean.body());
  return numDiscarded == 0
    && dirtyDocument.head().childNodes().size() == 0; // because we only look at the body, but we start from a shell, make sure there's nothing in the head
}

代码示例来源:origin: com.vaadin/vaadin-server

document.baseUri());
document.child(0).before(doctype);

代码示例来源:origin: us.codecraft/xsoup

@Override
public String getDocumentURI() {
  return document.baseUri();
}

代码示例来源:origin: code4craft/xsoup

@Override
public String getDocumentURI() {
  return document.baseUri();
}

代码示例来源:origin: stackoverflow.com

Document doc = con.get();
String uri = doc.baseUri();
returnObj.put(RETURN_FINAL_URL, uri);

代码示例来源:origin: us.codecraft/webmagic-core

/**
 * Only document can be select
 * See: https://github.com/code4craft/webmagic/issues/113
 *
 * @param elementIterator elementIterator
 * @return element element
 */
private Element checkElementAndConvert(ListIterator<Element> elementIterator) {
  Element element = elementIterator.next();
  if (!(element instanceof Document)) {
    Document root = new Document(element.ownerDocument().baseUri());
    Element clone = element.clone();
    root.appendChild(clone);
    elementIterator.set(root);
    return root;
  }
  return element;
}

代码示例来源:origin: com.cv4j.netdiscovery/netdiscovery-core

/**
 * Only document can be select
 * See: https://github.com/code4craft/webmagic/issues/113
 *
 * @param elementIterator elementIterator
 * @return element element
 */
private Element checkElementAndConvert(ListIterator<Element> elementIterator) {
  Element element = elementIterator.next();
  if (!(element instanceof Document)) {
    Document root = new Document(element.ownerDocument().baseUri());
    Element clone = element.clone();
    root.appendChild(clone);
    elementIterator.set(root);
    return root;
  }
  return element;
}

代码示例来源:origin: opacapp/opacclient

private void loadResList(Document resDoc, List<ReservedItem> items) throws IOException {
  items.addAll(parseResList(resDoc));
  String nextPageUrl = findNextPageUrl(resDoc);
  if (nextPageUrl != null) {
    Document doc = Jsoup.parse(httpGet(nextPageUrl, getDefaultEncoding()));
    doc.setBaseUri(resDoc.baseUri());
    loadResList(doc, items);
  }
}

代码示例来源:origin: opacapp/opacclient

private void loadMediaList(Document lentDoc, List<LentItem> items)
    throws IOException {
  items.addAll(parseMediaList(lentDoc));
  String nextPageUrl = findNextPageUrl(lentDoc);
  if (nextPageUrl != null) {
    Document doc = Jsoup.parse(httpGet(nextPageUrl, getDefaultEncoding()));
    doc.setBaseUri(lentDoc.baseUri());
    loadMediaList(doc, items);
  }
}

代码示例来源:origin: cn.edu.hfut.dmic.webcollector/WebCollector

public News getNews() throws Exception {
  News news = new News();
  Element contentElement;
  try {
    contentElement = getContentElement();
    news.setContentElement(contentElement);
  } catch (Exception ex) {
    LOG.info("news content extraction failed,extraction abort", ex);
    throw new Exception(ex);
  }
  if (doc.baseUri() != null) {
    news.setUrl(doc.baseUri());
  }
  try {
    news.setTime(getTime(contentElement));
  } catch (Exception ex) {
    LOG.info("news title extraction failed", ex);
  }
  try {
    news.setTitle(getTitle(contentElement));
  } catch (Exception ex) {
    LOG.info("title extraction failed", ex);
  }
  return news;
}

代码示例来源:origin: avluis/Hentoid

Timber.d("URI : %s", doc.baseUri());
if (doc.baseUri().contains(HENTAICAFE.getUrl() + "/78-2/") ||           // ignore tags page
    doc.baseUri().contains(HENTAICAFE.getUrl() + "/artists/")) {    // ignore artist page

代码示例来源:origin: avluis/Hentoid

result = new Content();
String url = doc.baseUri();
String protocol = url.substring(0,5);
if ("https".equals(protocol)) protocol = "https:";

代码示例来源:origin: avluis/Hentoid

@Override
protected Content parseContent(Document doc) {
  Content result = new Content();
  result.setUrl(doc.baseUri().substring(doc.baseUri().indexOf('/', 9)));
  String coverUrl = doc.select("div#imgholder")
      .select("a")
      .select("img")
      .attr("src");
  result.setCoverImageUrl(coverUrl);
  String title = doc.select("div#mangainfo")
      .select("div")
      .select("h1")
      .text();
  result.setTitle(title);
  String lastOptionUrl = doc.select("div#selectpage")
      .select("select")
      .select("option")
      .last()
      .attr("value");
  int nbPages = Integer.parseInt(lastOptionUrl.substring(lastOptionUrl.lastIndexOf('/') + 1));
  result.setQtyPages(nbPages);
  AttributeMap attributes = new AttributeMap();
  result.setAttributes(attributes);
  result.setSite(Site.PANDA);
  return result;
}

代码示例来源:origin: org.kantega.openaksess/openaksess-core

@Override
public Document runFilter(Document document) {
  final Document clean = Document.createShell(document.baseUri());
  if (document.body() != null) // frameset documents won't have a body. the clean doc will have empty body.
    copySafeNodes(document.body(), clean.body());
  return clean;
}

代码示例来源:origin: com.vaadin/flow-server

return new TextNode(element.getText(), document.baseUri());

代码示例来源:origin: avluis/Hentoid

parseAttributes(attributes, AttributeType.CHARACTER, characterElements, true);
if (doc.baseUri().contains("comics")) {
  result.setSite(Site.ASMHENTAI_COMICS);
} else {

代码示例来源:origin: com.vaadin/flow-server

static Document getBootstrapPage(BootstrapContext context) {
  Document document = new Document("");
  DocumentType doctype = new DocumentType("html", "", "",
      document.baseUri());
  document.appendChild(doctype);
  Element html = document.appendElement("html");

相关文章