org.apache.uima.cas.CAS.setDocumentText()方法的使用及代码示例

x33g5p2x  于2022-01-18 转载在 其他  
字(10.0k)|赞(0)|评价(0)|浏览(120)

本文整理了Java中org.apache.uima.cas.CAS.setDocumentText()方法的一些代码示例,展示了CAS.setDocumentText()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。CAS.setDocumentText()方法的具体详情如下:
包路径:org.apache.uima.cas.CAS
类名称:CAS
方法名:setDocumentText

CAS.setDocumentText介绍

[英]Set the document text. Once set, Sofa data is immutable, and cannot be set again until the CAS has been reset.
[中]设置文档文本。一旦设置,Sofa数据是不可变的,并且在重置CAS之前无法再次设置。

代码示例

代码示例来源:origin: nlpie/biomedicus

@Override
public void finishDestination() {
 destinationView.setDocumentText(sofaBuilder.toString());
 completedAnnotations.forEach(destinationView::addFsToIndexes);
}

代码示例来源:origin: de.tudarmstadt.ukp.dkpro.core/de.tudarmstadt.ukp.dkpro.core.io.pdf-asl

@Override
protected void endDocument(final PDDocument aPdf)
  throws IOException
{
  cas.setDocumentText(text.toString());
  if (log.isTraceEnabled()) {
    log.trace("</document>");
  }
}

代码示例来源:origin: org.apache.uima/uimaj-tools

/**
 * Inits the cas.
 */
private final void initCas() {
 this.cas.setDocumentLanguage(this.language);
 this.cas.setDocumentText(this.textArea.getText());
}

代码示例来源:origin: CLLKazan/UIMA-Ext

/**
 * {@inheritDoc}
 */
@Override
public void getNext(CAS cas) throws IOException, CollectionException {
  String text = consumeLine();
  cas.setDocumentText(text);
}

代码示例来源:origin: nlpie/biomedicus

@Nonnull
@Override
public Document addDocument(@Nonnull String name, @Nonnull String text) {
 CAS view = cas.createView(name);
 view.setDocumentText(text);
 return new CASDocument(view, labelAdapters);
}

代码示例来源:origin: de.tudarmstadt.ukp.dkpro.bigdata/de.tudarmstadt.ukp.dkpro.bigdata.io.hadoop

@Override
  public void convertValue(Text keyFrom, Text valueFrom, CASWritable valueTo)
  {
    CAS cas = valueTo.getCAS();
    cas.reset();
    Text doc = valueFrom;
    if (textExtractor != null)
      doc = textExtractor.extractDocumentText(keyFrom, valueFrom);
    cas.setDocumentText(doc.toString());
  }
}

代码示例来源:origin: org.nd4j/canova-data-nlp

/**
 * Use the given analysis engine and process the given text
 * You must release the return cas yourself
 * @param text the text to rpocess
 * @return the processed cas
 */
public  CAS process(String text) {
  CAS cas = retrieve();
  
  cas.setDocumentText(text);
  try {
    analysisEngine.process(cas);
  } catch (AnalysisEngineProcessException e) {
    if(text != null && !text.isEmpty())
      return process(text);
    throw new RuntimeException(e);
  }
  
  return cas;
  
  
}

代码示例来源:origin: org.datavec/datavec-data-nlp

/**
 * Use the given analysis engine and process the given text
 * You must release the return cas yourself
 * @param text the text to rpocess
 * @return the processed cas
 */
public CAS process(String text) {
  CAS cas = retrieve();
  cas.setDocumentText(text);
  try {
    analysisEngine.process(cas);
  } catch (AnalysisEngineProcessException e) {
    if (text != null && !text.isEmpty())
      return process(text);
    throw new RuntimeException(e);
  }
  return cas;
}

代码示例来源:origin: org.apache.uima/uimaj-ep-cas-editor-ide

private InputStream getDocument(String fileName, String text, String language,
    SerialFormat format) {
 String failedToImportLine = "Failed to import: " + fileName + "\n\n";
 CAS cas = createEmtpyCAS();
 cas.setDocumentText(removeNonXmlChars(text));
 cas.setDocumentLanguage(language);
 ByteArrayOutputStream out = new ByteArrayOutputStream(40000);
 try {
  CasIOUtils.save(cas, out, format);
 } catch (IOException e) {
  throw new TaeError(failedToImportLine + e.getMessage(), e);
 }
 return new ByteArrayInputStream(out.toByteArray());
}

代码示例来源:origin: jpatanooga/Canova

/**
 * Use the given analysis engine and process the given text
 * You must release the return cas yourself
 * @param text the text to rpocess
 * @return the processed cas
 */
public  CAS process(String text) {
  CAS cas = retrieve();
  
  cas.setDocumentText(text);
  try {
    analysisEngine.process(cas);
  } catch (AnalysisEngineProcessException e) {
    if(text != null && !text.isEmpty())
      return process(text);
    throw new RuntimeException(e);
  }
  
  return cas;
  
  
}

代码示例来源:origin: de.tudarmstadt.ukp.dkpro.core/de.tudarmstadt.ukp.dkpro.core.io.xml-asl

@Override
public void getNext(CAS cas)
  throws IOException
{
  // Initialize CAS with document meta data
  initCas(cas, currentFileResource, null);
  if (!StringUtils.isWhitespace(language)) {
    cas.setDocumentLanguage(language);
  }
  // The buffer where document text is to be stored
  StringBuilder documentText = new StringBuilder();
  Node node = nodes.poll();
  if (node != null) {
    processNode(cas, node, documentText);
  }
  // Set document text in cas or error if nothing gets parsed out
  String documentTextString = documentText.toString();
  if (StringUtils.isWhitespace(documentTextString)) {
    cas.setDocumentText("[Parse error]");
  }
  else {
    cas.setDocumentText(documentTextString);
  }
}

代码示例来源:origin: DigitalPebble/behemoth

protected void doProcess(BehemothDocument behemoth, Reporter reporter) throws AnalysisEngineProcessException {
  // does the input document have a some text?
  // if not - skip it
  if (behemoth.getText() == null) {
    LOG.debug(behemoth.getUrl().toString() + " has null text");
  } else {
    // detect language if specified by user
    String lang = this.config.get("uima.language", "en");
    cas.setDocumentLanguage(lang);
    cas.setDocumentText(behemoth.getText());
    // process it
    tae.process(cas);
    convertCASToBehemoth(cas, behemoth, reporter);
  }
}

代码示例来源:origin: CLLKazan/UIMA-Ext

/**
 * {@inheritDoc}
 */
@Override
public void getNext(CAS cas) throws IOException, CollectionException {
  if (!dbIterator.hasNext()) {
    throw new NoSuchElementException();
  }
  DbTuple tuple = dbIterator.next();
  consumedCount++;
  cas.setDocumentText(tuple.text);
  try {
    DocumentMetadata docMeta = new DocumentMetadata(cas.getJCas());
    docMeta.setSourceUri(tuple.url);
    docMeta.addToIndexes();
  } catch (CASException e) {
    throw new CollectionException(e);
  }
}

代码示例来源:origin: CLLKazan/UIMA-Ext

/**
 * {@inheritDoc}
 */
@Override
public void getNext(CAS aCAS) throws IOException, CollectionException {
  if (!hasNext()) {
    throw new CollectionException(new NoSuchElementException());
  }
  final int curFileIdx = lastReadFileIdx + 1;
  File file = files.get(curFileIdx);
  lastReadFileIdx = curFileIdx;
  //
  String fileContent = FileUtils.readFileToString(file, encoding);
  aCAS.setDocumentText(fileContent);
  try {
    DocumentMetadata docMeta = new DocumentMetadata(aCAS.getJCas());
    docMeta.setSourceUri(getURIForMetadata(file).toString());
    docMeta.addToIndexes();
  } catch (CASException e) {
    throw new IllegalStateException(e);
  }
}

代码示例来源:origin: de.tudarmstadt.ukp.dkpro.core/de.tudarmstadt.ukp.dkpro.core.io.text-asl

@Override
  public void getNext(CAS aJCas)
    throws IOException, CollectionException
  {
    Resource res = nextFile();
    initCas(aJCas, res);

    try (InputStream is = new BufferedInputStream(
        CompressionUtils.getInputStream(res.getLocation(), res.getInputStream()))) {
      String text;

      if (ENCODING_AUTO.equals(sourceEncoding)) {
        CharsetDetector detector = new CharsetDetector();
        text = IOUtils.toString(detector.getReader(is, null));
      }
      else {
        text = IOUtils.toString(is, sourceEncoding);
      }
      
      aJCas.setDocumentText(text);        
    }
  }
}

代码示例来源:origin: org.apache.uima/textmarker-core

public static void main(String[] args) throws Exception {
 URL url = TextMarkerEngine.class.getClassLoader().getResource("PlainTextAnnotator.xml");
 if (url == null) {
  url = PlainTextAnnotator.class.getClassLoader().getResource(
      "org/apache/uima/textmarker/engine/PlainTextAnnotator.xml");
 }
 XMLInputSource in = new XMLInputSource(url);
 ResourceSpecifier specifier = UIMAFramework.getXMLParser().parseResourceSpecifier(in);
 AnalysisEngine ae = UIMAFramework.produceAnalysisEngine(specifier);
 CAS cas = ae.newCAS();
 cas.setDocumentText(FileUtils.file2String(new File(
     "D:/work/workspace-textmarker/Test/input/list1.txt"), "UTF-8"));
 ae.process(cas);
 AnnotationIndex<AnnotationFS> annotationIndex = cas.getAnnotationIndex();
 for (AnnotationFS annotationFS : annotationIndex) {
  System.out.println(annotationFS.getType().getShortName() + " :  "
      + annotationFS.getCoveredText());
 }
}

代码示例来源:origin: org.apache.uima/ConceptMapper

public void runCPM(String text) {
 cas.setDocumentText(text);
 cas.setDocumentLanguage(langID);
 try {
  ae.process(cas);
 } catch (AnalysisEngineProcessException e) {
  e.printStackTrace();
 }
 processCAS(cas);
 cas.reset();
}

代码示例来源:origin: org.apache.uima/uimaj-tools

public void process(CAS aCAS) throws AnalysisEngineProcessException {
 // get handle to CAS view containing XML document
 CAS xmlCas = aCAS.getView("xmlDocument");
 InputStream xmlStream = xmlCas.getSofa().getSofaDataStream();
 // parse with detag handler
 DetagHandler handler = new DetagHandler();
 try {
  SAXParser parser = parserFactory.newSAXParser();
  parser.parse(xmlStream, handler);
 } catch (Exception e) {
  throw new AnalysisEngineProcessException(e);
 }
 // create the plain text view and set its document text
 CAS plainTextView = aCAS.createView("plainTextDocument");
 plainTextView.setDocumentText(handler.getDetaggedText());
 plainTextView.setDocumentLanguage(aCAS.getView("_InitialView").getDocumentLanguage());
 // Index the SourceDocumentInformation object, if there is one, in the new sofa.
 // This is needed by the SemanticSearchCasIndexer
 Iterator iter = xmlCas.getAnnotationIndex(sourceDocInfoType).iterator();
 if (iter.hasNext()) {
  FeatureStructure sourceDocInfoFs = (FeatureStructure) iter.next();
  plainTextView.getIndexRepository().addFS(sourceDocInfoFs);
 }
}

代码示例来源:origin: org.apache.lucene/lucene-analyzers-uima

/**
 * analyzes the tokenizer input using the given analysis engine
 * <p>
 * {@link #cas} will be filled with  extracted metadata (UIMA annotations, feature structures)
 *
 * @throws IOException If there is a low-level I/O error.
 */
protected void analyzeInput() throws ResourceInitializationException, AnalysisEngineProcessException, IOException {
 if (ae == null) {
  ae = AEProviderFactory.getInstance().getAEProvider(null, descriptorPath, configurationParameters).getAE();
 }
 if (cas == null) {
  cas = ae.newCAS();
 } else {
  cas.reset();
 }
 cas.setDocumentText(toString(input));
 ae.process(cas);
}

代码示例来源:origin: nlpie/biomedicus

CASArtifact(
  @Nullable LabelAdapters labelAdapters,
  CAS cas,
  String artifactID
) {
 this.labelAdapters = labelAdapters;
 this.cas = cas;
 TypeSystem typeSystem = cas.getTypeSystem();
 metadataType = typeSystem.getType("ArtifactMetadata");
 keyFeature = metadataType.getFeatureByBaseName("key");
 valueFeature = metadataType.getFeatureByBaseName("value");
 metadataCas = cas.createView("metadata");
 metadataCas.setDocumentText("");
 Type idType = typeSystem.getType("ArtifactID");
 Feature idFeat = idType.getFeatureByBaseName("artifactID");
 this.artifactID = artifactID;
 FeatureStructure documentIdFs = metadataCas.createFS(idType);
 documentIdFs.setStringValue(idFeat, artifactID);
 metadataCas.addFsToIndexes(documentIdFs);
 metadataIndex = metadataCas.getIndexRepository().getIndex("metadata", metadataType);
 casMetadata = new CASMetadata();
}

相关文章