edu.stanford.nlp.pipeline.Annotation.set()方法的使用及代码示例

x33g5p2x  于2022-01-16 转载在 其他  
字(13.8k)|赞(0)|评价(0)|浏览(83)

本文整理了Java中edu.stanford.nlp.pipeline.Annotation.set()方法的一些代码示例,展示了Annotation.set()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Annotation.set()方法的具体详情如下:
包路径:edu.stanford.nlp.pipeline.Annotation
类名称:Annotation
方法名:set

Annotation.set介绍

暂无

代码示例

代码示例来源:origin: stanfordnlp/CoreNLP

/**
 * The text becomes the CoreAnnotations.TextAnnotation of the newly
 * created Annotation.
 */
public Annotation(String text) {
 this.set(CoreAnnotations.TextAnnotation.class, text);
}

代码示例来源:origin: stanfordnlp/CoreNLP

private void setAnnotations(Annotation annotation,
              List<CoreMap> quotes,
              List<CoreMap> unclosed,
              String message) {
 annotation.set(CoreAnnotations.QuotationsAnnotation.class, quotes);
 log.info(message);
 if (EXTRACT_UNCLOSED) {
  annotation.set(CoreAnnotations.UnclosedQuotationsAnnotation.class, unclosed);
 }
}

代码示例来源:origin: stanfordnlp/CoreNLP

public static Annotation makeQuote(String surfaceForm, int begin, int end,
                  List<CoreLabel> quoteTokens,
                  int tokenOffset,
                  int sentenceBeginIndex,
                  int sentenceEndIndex,
                  String docID) {
 Annotation quote = new Annotation(surfaceForm);
 // create a quote annotation with text and token offsets
 quote.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, begin);
 quote.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, end);
 if (docID != null) {
  quote.set(CoreAnnotations.DocIDAnnotation.class, docID);
 }
 if (quoteTokens != null) {
  quote.set(CoreAnnotations.TokensAnnotation.class, quoteTokens);
  quote.set(CoreAnnotations.TokenBeginAnnotation.class, tokenOffset);
  quote.set(CoreAnnotations.TokenEndAnnotation.class, tokenOffset + quoteTokens.size() - 1);
 }
 quote.set(CoreAnnotations.SentenceBeginAnnotation.class, sentenceBeginIndex);
 quote.set(CoreAnnotations.SentenceEndAnnotation.class, sentenceEndIndex);
 return quote;
}

代码示例来源:origin: stanfordnlp/CoreNLP

private static CoreMap constructSentence(List<CoreLabel> sentenceTokens, CoreMap prevSentence, CoreMap sentence) {
  // get the sentence text from the first and last character offsets
  int begin = sentenceTokens.get(0).get(CoreAnnotations.CharacterOffsetBeginAnnotation.class);
  int last = sentenceTokens.size() - 1;
  int end = sentenceTokens.get(last).get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
  String sentenceText = prevSentence.get(CoreAnnotations.TextAnnotation.class) + sentence.get(CoreAnnotations.TextAnnotation.class);

  // create a sentence annotation with text and token offsets
  Annotation newSentence = new Annotation(sentenceText);
  newSentence.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, begin);
  newSentence.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, end);
  newSentence.set(CoreAnnotations.TokensAnnotation.class, sentenceTokens);
  newSentence.set(CoreAnnotations.TokenBeginAnnotation.class, prevSentence.get(CoreAnnotations.TokenBeginAnnotation.class));
  newSentence.set(CoreAnnotations.TokenEndAnnotation.class, sentence.get(CoreAnnotations.TokenEndAnnotation.class));
  newSentence.set(CoreAnnotations.ParagraphIndexAnnotation.class, sentence.get(CoreAnnotations.ParagraphIndexAnnotation.class));

  newSentence.set(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class, getParse(newSentence));

  return newSentence;
//    newSentence.set(CoreAnnotations.SentenceIndexAnnotation.class, sentences.size());
 }

代码示例来源:origin: stanfordnlp/CoreNLP

/** Make a new Annotation from a List of tokenized sentences. */
public Annotation(List<CoreMap> sentences) {
 super();
 this.set(CoreAnnotations.SentencesAnnotation.class, sentences);
 List<CoreLabel> tokens = new ArrayList<>();
 StringBuilder text = new StringBuilder();
 for (CoreMap sentence : sentences) {
  List<CoreLabel> sentenceTokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
  tokens.addAll(sentenceTokens);
  if (sentence.containsKey(CoreAnnotations.TextAnnotation.class)) {
   text.append(sentence.get(CoreAnnotations.TextAnnotation.class));
  } else {
   // If there is no text in the sentence, fake it as best as we can
   if (text.length() > 0) {
    text.append('\n');
   }
   text.append(SentenceUtils.listToString(sentenceTokens));
  }
 }
 this.set(CoreAnnotations.TokensAnnotation.class, tokens);
 this.set(CoreAnnotations.TextAnnotation.class, text.toString());
}

代码示例来源:origin: stanfordnlp/CoreNLP

/**
 * Deep copy of the sentence: we create new entity/relation/event lists here.
 * However,  we do not deep copy the ExtractionObjects themselves!
 * @param sentence
 */
public static Annotation sentenceDeepMentionCopy(Annotation sentence) {
 Annotation newSent = new Annotation(sentence.get(CoreAnnotations.TextAnnotation.class));
 newSent.set(CoreAnnotations.TokensAnnotation.class, sentence.get(CoreAnnotations.TokensAnnotation.class));
 newSent.set(TreeCoreAnnotations.TreeAnnotation.class, sentence.get(TreeCoreAnnotations.TreeAnnotation.class));
 newSent.set(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class, sentence.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class));
 newSent.set(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class, sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class));
 newSent.set(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class, sentence.get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class));
 newSent.set(CoreAnnotations.DocIDAnnotation.class, sentence.get(CoreAnnotations.DocIDAnnotation.class));
 // deep copy of all mentions lists
 List<EntityMention> ents = sentence.get(MachineReadingAnnotations.EntityMentionsAnnotation.class);
 if(ents != null) newSent.set(MachineReadingAnnotations.EntityMentionsAnnotation.class, new ArrayList<>(ents));
 List<RelationMention> rels = sentence.get(MachineReadingAnnotations.RelationMentionsAnnotation.class);
 if(rels != null) newSent.set(MachineReadingAnnotations.RelationMentionsAnnotation.class, new ArrayList<>(rels));
 List<EventMention> evs = sentence.get(MachineReadingAnnotations.EventMentionsAnnotation.class);
 if(evs != null) newSent.set(MachineReadingAnnotations.EventMentionsAnnotation.class, new ArrayList<>(evs));
 return newSent;
}

代码示例来源:origin: stanfordnlp/CoreNLP

public static Annotation textToAnnotation(AnnotationPipeline pipeline, String text, String date) {
 Annotation annotation = new Annotation(text);
 annotation.set(CoreAnnotations.DocDateAnnotation.class, date);
 pipeline.annotate(annotation);
 return annotation;
}

代码示例来源:origin: stanfordnlp/CoreNLP

/**
 * TODO(gabor) JavaDoc
 *
 * @param sentence
 * @param pipeline
 */
public static void annotate(CoreMap sentence, AnnotationPipeline pipeline) {
 Annotation ann = new Annotation(StringUtils.join(sentence.get(CoreAnnotations.TokensAnnotation.class), " "));
 ann.set(CoreAnnotations.TokensAnnotation.class, sentence.get(CoreAnnotations.TokensAnnotation.class));
 ann.set(CoreAnnotations.SentencesAnnotation.class, Collections.singletonList(sentence));
 pipeline.annotate(ann);
}

代码示例来源:origin: stanfordnlp/CoreNLP

/** Keeps only the first percentage sentences from the given corpus */
private static Annotation keepPercentage(Annotation corpus, double percentage) {
 log.info("Using fraction of train: " + percentage);
 if (percentage >= 1.0) {
  return corpus;
 }
 Annotation smaller = new Annotation("");
 List<CoreMap> sents = new ArrayList<>();
 List<CoreMap> fullSents = corpus.get(SentencesAnnotation.class);
 double smallSize = (double) fullSents.size() * percentage;
 for (int i = 0; i < smallSize; i ++) {
  sents.add(fullSents.get(i));
 }
 log.info("TRAIN corpus size reduced from " + fullSents.size() + " to " + sents.size());
 smaller.set(SentencesAnnotation.class, sents);
 return smaller;
}

代码示例来源:origin: stanfordnlp/CoreNLP

@Override
public void annotate(Annotation annotation) {
 if(verbose)
  System.out.println("Adding column data classifier annotation...");
 String text = DUMMY_LABEL_COLUMN + annotation.get(CoreAnnotations.TextAnnotation.class);
 if(verbose)
  System.out.println("Dummy column: " + text);
 // todo [cdm 2016]: At the moment this is hardwired to only work with answer = col 0, datum = col 1 classifier
 Datum<String,String> datum = cdcClassifier.makeDatumFromLine(text);
 if(verbose)
  System.out.println("Datum: " + datum.toString());
 String label = cdcClassifier.classOf(datum);
 annotation.set(CoreAnnotations.ColumnDataClassifierAnnotation.class,label);
 if(verbose)
  System.out.println(
      String.format("annotation=%s",annotation.get(CoreAnnotations.ColumnDataClassifierAnnotation.class)));
 if(verbose)
  System.out.println("Done.");
}

代码示例来源:origin: stanfordnlp/CoreNLP

private static CoreMap wordsToSentence(List<String> sentWords) {
 String sentText = StringUtils.join(sentWords, " ");
 Annotation sentence = new Annotation(sentText);
 List<CoreLabel> tokens = new ArrayList<>(sentWords.size());
 for (String text:sentWords) {
  CoreLabel token = tokenFactory.makeToken();
  token.set(CoreAnnotations.TextAnnotation.class, text);
  tokens.add(token);
 }
 sentence.set(CoreAnnotations.TokensAnnotation.class, tokens);
 return sentence;
}

代码示例来源:origin: stanfordnlp/CoreNLP

public void annotate(final Annotation annotation, final Consumer<Annotation> callback){
 if (PropertiesUtils.getInt(properties, "threads", 1) == 1) {
  annotate(annotation);
  callback.accept(annotation);
 } else {
  try {
   availableProcessors.acquire();
  } catch (InterruptedException e) {
   throw new RuntimeInterruptedException(e);
  }
  new Thread(() -> {
   try {
    annotate(annotation);
   } catch (Throwable t) {
    annotation.set(CoreAnnotations.ExceptionAnnotation.class, t);
   }
   callback.accept(annotation);
   availableProcessors.release();
  }).start();
 }
}

代码示例来源:origin: stanfordnlp/CoreNLP

public Annotation process(String sentence, String dateString, Annotator timeAnnotator) {
 log.info("Processing text \"" + sentence + "\" with dateString = " + dateString);
 Annotation anno = new Annotation(sentence);
 if (dateString != null && ! dateString.isEmpty()) {
  anno.set(CoreAnnotations.DocDateAnnotation.class, dateString);
 }
 pipeline.annotate(anno);
 timeAnnotator.annotate(anno);
 return anno;
}

代码示例来源:origin: stanfordnlp/CoreNLP

@Override
public void annotate(Annotation annotation){
 try {
  if (!annotation.containsKey(CoreAnnotations.SentencesAnnotation.class)) {
   log.error("this coreference resolution system requires SentencesAnnotation!");
   return;
  }
  if (hasSpeakerAnnotations(annotation)) {
   annotation.set(CoreAnnotations.UseMarkedDiscourseAnnotation.class, true);
  }
  Document corefDoc = corefSystem.docMaker.makeDocument(annotation);
  Map<Integer, CorefChain> result = corefSystem.coref(corefDoc);
  annotation.set(CorefCoreAnnotations.CorefChainAnnotation.class, result);
  // for backward compatibility
  if(OLD_FORMAT) annotateOldFormat(result, corefDoc);
 } catch (RuntimeException e) {
  throw e;
 } catch (Exception e) {
  throw new RuntimeException(e);
 }
}

代码示例来源:origin: stanfordnlp/CoreNLP

protected Tree parse(List<CoreLabel> tokens,
           List<ParserConstraint> constraints) {
 CoreMap sent = new Annotation("");
 sent.set(CoreAnnotations.TokensAnnotation.class, tokens);
 sent.set(ParserAnnotations.ConstraintAnnotation.class, constraints);
 Annotation doc = new Annotation("");
 List<CoreMap> sents = new ArrayList<>();
 sents.add(sent);
 doc.set(CoreAnnotations.SentencesAnnotation.class, sents);
 getParser().annotate(doc);
 sents = doc.get(CoreAnnotations.SentencesAnnotation.class);
 return sents.get(0).get(TreeCoreAnnotations.TreeAnnotation.class);
}

代码示例来源:origin: stanfordnlp/CoreNLP

private Tree parse(List<CoreLabel> tokens,
          List<ParserConstraint> constraints) {
 CoreMap sent = new Annotation("");
 sent.set(CoreAnnotations.TokensAnnotation.class, tokens);
 sent.set(ParserAnnotations.ConstraintAnnotation.class, constraints);
 Annotation doc = new Annotation("");
 List<CoreMap> sents = new ArrayList<>(1);
 sents.add(sent);
 doc.set(CoreAnnotations.SentencesAnnotation.class, sents);
 getParser().annotate(doc);
 sents = doc.get(CoreAnnotations.SentencesAnnotation.class);
 return sents.get(0).get(TreeCoreAnnotations.TreeAnnotation.class);
}

代码示例来源:origin: stanfordnlp/CoreNLP

private Tree parse(List<CoreLabel> tokens,
          List<ParserConstraint> constraints) {
 CoreMap sent = new Annotation("");
 sent.set(CoreAnnotations.TokensAnnotation.class, tokens);
 sent.set(ParserAnnotations.ConstraintAnnotation.class, constraints);
 Annotation doc = new Annotation("");
 List<CoreMap> sents = new ArrayList<>(1);
 sents.add(sent);
 doc.set(CoreAnnotations.SentencesAnnotation.class, sents);
 getParser().annotate(doc);
 sents = doc.get(CoreAnnotations.SentencesAnnotation.class);
 return sents.get(0).get(TreeCoreAnnotations.TreeAnnotation.class);
}

代码示例来源:origin: stanfordnlp/CoreNLP

public void annotate(Annotation ann) {
 Document document;
 try {
  document = docMaker.makeDocument(ann);
 } catch (Exception e) {
  throw new RuntimeException("Error making document", e);
 }
 CorefUtils.checkForInterrupt();
 corefAlgorithm.runCoref(document);
 if (removeSingletonClusters) {
  CorefUtils.removeSingletonClusters(document);
 }
 CorefUtils.checkForInterrupt();
 Map<Integer, CorefChain> result = Generics.newHashMap();
 for (CorefCluster c : document.corefClusters.values()) {
  result.put(c.clusterID, new CorefChain(c, document.positions));
 }
 ann.set(CorefCoreAnnotations.CorefChainAnnotation.class, result);
}

代码示例来源:origin: stanfordnlp/CoreNLP

@Override
public void annotate(Annotation annotation) {
 if (annotation.containsKey(CoreAnnotations.TokensAnnotation.class)) {
  List<CoreLabel> tokens = annotation.get(CoreAnnotations.TokensAnnotation.class);
  if (DEBUG) { log.info("CleanXML: starting tokens: " + tokens); }
  List<CoreLabel> newTokens = process(annotation, tokens);
  // We assume that if someone is using this annotator, they don't
  // want the old tokens any more and get rid of them
  // redo the token indexes if xml tokens have been removed
  setTokenBeginTokenEnd(newTokens);
  annotation.set(CoreAnnotations.TokensAnnotation.class, newTokens);
  if (DEBUG) { log.info("CleanXML: ending tokens: " + annotation.get(CoreAnnotations.TokensAnnotation.class)); }
 }
}

代码示例来源:origin: stanfordnlp/CoreNLP

public Document nextDoc() throws Exception {
 InputDoc input = reader.nextDoc();
 if (input == null) {
  return null;
 }
 if (!CorefProperties.useConstituencyParse(props)) {
  for (CoreMap sentence : input.annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
   sentence.remove(TreeCoreAnnotations.TreeAnnotation.class);
  }
 }
 getStanfordCoreNLP(props).annotate(input.annotation);
 if (CorefProperties.conll(props)) {
  input.annotation.set(CoreAnnotations.UseMarkedDiscourseAnnotation.class, true);
 }
 return makeDocument(input);
}

相关文章