org.jsoup.nodes.Document.<init>()方法的使用及代码示例

x33g5p2x  于2022-01-18 转载在 其他  
字(7.0k)|赞(0)|评价(0)|浏览(210)

本文整理了Java中org.jsoup.nodes.Document.<init>()方法的一些代码示例,展示了Document.<init>()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Document.<init>()方法的具体详情如下:
包路径:org.jsoup.nodes.Document
类名称:Document
方法名:<init>

Document.<init>介绍

[英]Create a new, empty Document.
[中]创建一个新的空文档。

代码示例

代码示例来源:origin: org.jsoup/jsoup

/**
 Get the HTML representation of this attribute; e.g. {@code href="index.html"}.
 @return HTML
 */
public String html() {
  StringBuilder accum = new StringBuilder();
  
  try {
    html(accum, (new Document("")).outputSettings());
  } catch(IOException exception) {
    throw new SerializationException(exception);
  }
  return accum.toString();
}

代码示例来源:origin: code4craft/webmagic

/**
 * Only document can be select
 * See: https://github.com/code4craft/webmagic/issues/113
 *
 * @param elementIterator elementIterator
 * @return element element
 */
private Element checkElementAndConvert(ListIterator<Element> elementIterator) {
  Element element = elementIterator.next();
  if (!(element instanceof Document)) {
    Document root = new Document(element.ownerDocument().baseUri());
    Element clone = element.clone();
    root.appendChild(clone);
    elementIterator.set(root);
    return root;
  }
  return element;
}

代码示例来源:origin: org.jsoup/jsoup

/**
 Get the HTML representation of these attributes.
 @return HTML
 @throws SerializationException if the HTML representation of the attributes cannot be constructed.
 */
public String html() {
  StringBuilder accum = new StringBuilder();
  try {
    html(accum, (new Document("")).outputSettings()); // output settings a bit funky, but this html() seldom used
  } catch (IOException e) { // ought never happen
    throw new SerializationException(e);
  }
  return accum.toString();
}

代码示例来源:origin: org.jsoup/jsoup

protected void initialiseParse(Reader input, String baseUri, ParseErrorList errors, ParseSettings settings) {
  Validate.notNull(input, "String input must not be null");
  Validate.notNull(baseUri, "BaseURI must not be null");
  doc = new Document(baseUri);
  this.settings = settings;
  reader = new CharacterReader(input);
  this.errors = errors;
  currentToken = null;
  tokeniser = new Tokeniser(reader, errors);
  stack = new ArrayList<>(32);
  this.baseUri = baseUri;
}

代码示例来源:origin: org.jsoup/jsoup

Document.OutputSettings getOutputSettings() {
  Document owner = ownerDocument();
  return owner != null ? owner.outputSettings() : (new Document("")).outputSettings();
}

代码示例来源:origin: org.jsoup/jsoup

/**
 Create a valid, empty shell of a document, suitable for adding more elements to.
 @param baseUri baseUri of document
 @return document with html, head, and body elements.
 */
public static Document createShell(String baseUri) {
  Validate.notNull(baseUri);
  Document doc = new Document(baseUri);
  Element html = doc.appendElement("html");
  html.appendElement("head");
  html.appendElement("body");
  return doc;
}

代码示例来源:origin: org.jsoup/jsoup

static Document parseInputStream(InputStream input, String charsetName, String baseUri, Parser parser) throws IOException  {
  if (input == null) // empty body
    return new Document(baseUri);
  input = ConstrainableInputStream.wrap(input, bufferSize, 0);

代码示例来源:origin: com.vaadin/vaadin-server

public DesignContext() {
  this(new Document(""));
}

代码示例来源:origin: com.vaadin/vaadin-server

Document doc = new Document("");
DocumentType docType = new DocumentType("html", "", "", "");
doc.appendChild(docType);

代码示例来源:origin: javagaorui5944/ProxyIpPool

public static Document get(String url) {
  int trys = 3;
  try {
    return get(url, trys);
  } catch (Exception e) {
  }
  // 4次请求之后无法解析返回空文档
  return new Document("");
}

代码示例来源:origin: javagaorui5944/ProxyIpPool

public static Document proxyGet(String url, String ip, int port) {
  int trys = 3;
  try {
    return proxyGet(url, trys, ip, port);
  } catch (Exception e) {
    e.printStackTrace();
  }
  // 4次请求之后无法解析返回空文档
  return new Document("");
}

代码示例来源:origin: com.vaadin/flow-server

/**
 * Gets the outer HTML for the element.
 * <p>
 * This operation recursively iterates the element and all children and
 * should not be called unnecessarily.
 *
 * @return the outer HTML for the element
 */
public String getOuterHTML() {
  return ElementUtil.toJsoup(new Document(""), this).outerHtml();
}

代码示例来源:origin: basis-technology-corp/Java-readability

public Document parse(String data, String baseUri) throws SAXException, IOException {
  InputSource source = new InputSource();
  source.setCharacterStream(new StringReader(data));
  SAXParser nekoParser = new SAXParser();
  Document document = new Document(baseUri);
  nekoParser.setContentHandler(new Handler(document));
  nekoParser.setErrorHandler(new LocalErrorHandler());
  nekoParser.parse(source);
  return document;
}

代码示例来源:origin: us.codecraft/webmagic-core

/**
 * Only document can be select
 * See: https://github.com/code4craft/webmagic/issues/113
 *
 * @param elementIterator elementIterator
 * @return element element
 */
private Element checkElementAndConvert(ListIterator<Element> elementIterator) {
  Element element = elementIterator.next();
  if (!(element instanceof Document)) {
    Document root = new Document(element.ownerDocument().baseUri());
    Element clone = element.clone();
    root.appendChild(clone);
    elementIterator.set(root);
    return root;
  }
  return element;
}

代码示例来源:origin: com.cv4j.netdiscovery/netdiscovery-core

/**
 * Only document can be select
 * See: https://github.com/code4craft/webmagic/issues/113
 *
 * @param elementIterator elementIterator
 * @return element element
 */
private Element checkElementAndConvert(ListIterator<Element> elementIterator) {
  Element element = elementIterator.next();
  if (!(element instanceof Document)) {
    Document root = new Document(element.ownerDocument().baseUri());
    Element clone = element.clone();
    root.appendChild(clone);
    elementIterator.set(root);
    return root;
  }
  return element;
}

代码示例来源:origin: basis-technology-corp/Java-readability

public Document parse(InputStream data, String baseUri) throws SAXException, IOException {
  InputSource source = new InputSource();
  source.setByteStream(data);
  SAXParser nekoParser = new SAXParser();
  Document document = new Document(baseUri);
  nekoParser.setContentHandler(new Handler(document));
  nekoParser.setErrorHandler(new LocalErrorHandler());
  nekoParser.parse(source);
  return document;
}

代码示例来源:origin: astamuse/asta4d

public Component(Element elem, AttributesRequire attrs) throws Exception {
  Document doc = new Document("");
  doc.appendElement("body");
  doc.body().appendChild(elem);
  renderedElement = renderTemplate(doc, attrs);
}

代码示例来源:origin: astamuse/asta4d

public String toHtml() {
  Document doc = new Document("");
  doc.appendChild(toElement());
  RenderUtil.applyMessages(doc);
  RenderUtil.applyClearAction(doc, true);
  return doc.html();
}

代码示例来源:origin: com.vaadin/flow-server

static Document getBootstrapPage(BootstrapContext context) {
  Document document = new Document("");
  DocumentType doctype = new DocumentType("html", "", "",
      document.baseUri());

代码示例来源:origin: chimbori/crux

static Document postprocess(Element topNode) {
 Log.i("postprocess");
 Document doc = new Document("");
 if (topNode == null) {
  return doc;
 }
 removeNodesWithNegativeScores(topNode);
 replaceLineBreaksWithSpaces(topNode);
 removeUnlikelyChildNodes(topNode);
 removeTagsButRetainContent(topNode);
 removeTagsNotLikelyToBeParagraphs(topNode);
 removeTopLevelTagsNotLikelyToBeParagraphs(topNode);
 removeShortParagraphs(topNode);
 removeDisallowedAttributes(topNode);
 for (Node node : topNode.childNodes()) {
  doc.appendChild(node.clone());  // TODO: Don’t copy each item separately.
 }
 return doc;
}

相关文章