edu.stanford.nlp.pipeline.Annotation.get()方法的使用及代码示例

x33g5p2x 于2022-01-16 转载在其他

字(11.4k)|赞(0)|评价(0)|浏览(116)

本文整理了Java中edu.stanford.nlp.pipeline.Annotation.get()方法的一些代码示例，展示了Annotation.get()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台，是从一些精选项目中提取出来的代码，具有较强的参考意义，能在一定程度帮忙到你。Annotation.get()方法的具体详情如下：
包路径：edu.stanford.nlp.pipeline.Annotation
类名称：Annotation
方法名：get

Annotation.get介绍

暂无

代码示例

代码示例来源：origin: stanfordnlp/CoreNLP

public Document(InputDoc input, List<List<Mention>> mentions) {
 this();
 this.annotation = input.annotation;
 this.predictedMentions = mentions;
 this.goldMentions = input.goldMentions;
 this.docInfo = input.docInfo;
 this.numSentences = input.annotation.get(SentencesAnnotation.class).size();
 this.conllDoc = input.conllDoc;   // null if it's not conll input
}

代码示例来源：origin: stanfordnlp/CoreNLP

protected int getQuoteParagraph(CoreMap quote) {
  List<CoreMap> sentences = doc.get(CoreAnnotations.SentencesAnnotation.class);
  return sentences.get(quote.get(CoreAnnotations.SentenceBeginAnnotation.class)).get(CoreAnnotations.ParagraphIndexAnnotation.class);
 }
}

代码示例来源：origin: stanfordnlp/CoreNLP

public List<Integer> scanForAnimates(Pair<Integer, Integer> span) {
 List<Integer> animateIndices = new ArrayList<>();
 List<CoreLabel> tokens = doc.get(CoreAnnotations.TokensAnnotation.class);
 for(int i = span.first; i <= span.second && i < tokens.size() ; i++)
 {
  CoreLabel token = tokens.get(i);
  if(animacySet.contains(token.word()))
   animateIndices.add(i);
 }
 return animateIndices;
}

代码示例来源：origin: stanfordnlp/CoreNLP

public static Temporal parseOrNull(String str) {
 Annotation doc = new Annotation(str);
 pipeline.annotate(doc);
 if (doc.get(CoreAnnotations.SentencesAnnotation.class) == null) {
  return null;
 }
 if (doc.get(CoreAnnotations.SentencesAnnotation.class).isEmpty()) {
  return null;
 }
 List<CoreMap> timexAnnotations = doc.get(TimeAnnotations.TimexAnnotations.class);
 if (timexAnnotations.size() > 1) {
  return null;
 } else if (timexAnnotations.isEmpty()) {
  return null;
 }
 CoreMap timex = timexAnnotations.get(0);
 if (timex.get(TimeExpression.Annotation.class) == null) {
  return null;
 } else {
  return timex.get(TimeExpression.Annotation.class).getTemporal();
 }
}

代码示例来源：origin: stanfordnlp/CoreNLP

public MentionData getClosestMention(CoreMap quote) {
  MentionData closestBackward = findClosestMentionInSpanBackward(new Pair<>(0, quote.get(CoreAnnotations.TokenBeginAnnotation.class) - 1));
  MentionData closestForward = findClosestMentionInSpanForward(new Pair<>(quote.get(CoreAnnotations.TokenEndAnnotation.class), doc.get(CoreAnnotations.TokensAnnotation.class).size() - 1));
  int backDistance = quote.get(CoreAnnotations.TokenBeginAnnotation.class) - closestBackward.end;
  int forwardDistance = closestForward.begin - quote.get(CoreAnnotations.TokenEndAnnotation.class) + 1;
  if(backDistance < forwardDistance) {
    return closestBackward;
  } else {
    return closestForward;
  }
}

代码示例来源：origin: stanfordnlp/CoreNLP

public String tokenRangeToString(int token_idx) {
 return doc.get(CoreAnnotations.TokensAnnotation.class).get(token_idx).word();
}

代码示例来源：origin: stanfordnlp/CoreNLP

/**
 * Set index for each token and sentence in the document.
 * @param doc
 */
private static void setTokenIndices(Document doc) {
 int token_index = 0;
 for (CoreMap sent : doc.annotation.get(SentencesAnnotation.class)) {
  for (CoreLabel token : sent.get(TokensAnnotation.class)) {
   token.set(TokenBeginAnnotation.class, token_index++);
  }
 }
}

代码示例来源：origin: stanfordnlp/CoreNLP

private static void recallErrors(List<List<Mention>> goldMentions, List<List<Mention>> predictedMentions, Annotation doc) throws IOException {
 List<CoreMap> coreMaps = doc.get(CoreAnnotations.SentencesAnnotation.class);
 int numSentences = goldMentions.size();
 for (int i=0;i<numSentences;i++){
  CoreMap coreMap = coreMaps.get(i);
  List<CoreLabel> words = coreMap.get(CoreAnnotations.TokensAnnotation.class);
  Tree tree = coreMap.get(TreeCoreAnnotations.TreeAnnotation.class);
  List<Mention> goldMentionsSent = goldMentions.get(i);
  List<Pair<Integer,Integer>> goldMentionsSpans = extractSpans(goldMentionsSent);
  for (Pair<Integer,Integer> mentionSpan: goldMentionsSpans){
   logger.finer("RECALL ERROR\n");
   logger.finer(coreMap + "\n");
   for (int x=mentionSpan.first;x<mentionSpan.second;x++){
    logger.finer(words.get(x).value() + " ");
   }
   logger.finer("\n"+tree + "\n");
  }
 }
}

代码示例来源：origin: stanfordnlp/CoreNLP

private static void mentionReordering(Document doc, HeadFinder headFinder) throws Exception {
 List<List<Mention>> mentions = doc.predictedMentions;
 List<CoreMap> sentences = doc.annotation.get(SentencesAnnotation.class);
 for (int i=0 ; i<sentences.size() ; i++) {
  List<Mention> mentionsInSent = mentions.get(i);
  mentions.set(i, mentionReorderingBySpan(mentionsInSent));
 }
}

代码示例来源：origin: stanfordnlp/CoreNLP

private static int getQuoteChapter(Annotation doc, CoreMap quote) {
 List<CoreMap> sentences = doc.get(CoreAnnotations.SentencesAnnotation.class);
 return sentences.get(quote.get(CoreAnnotations.SentenceBeginAnnotation.class)).get(ChapterAnnotator.ChapterAnnotation.class);
}

代码示例来源：origin: stanfordnlp/CoreNLP

public void oneNameSentence(Annotation doc) {
  List<CoreMap> quotes = doc.get(CoreAnnotations.QuotationsAnnotation.class);
  for(CoreMap quote : quotes) {
   if (quote.get(QuoteAttributionAnnotator.MentionAnnotation.class) != null) {
    continue;
   }
   Pair<Integer, Integer> range = QuoteAttributionUtils.getRemainderInSentence(doc, quote);
   if(range == null) {
    continue;
   }

   Pair<ArrayList<String>, ArrayList<Pair<Integer, Integer>>> namesAndNameIndices = scanForNames(range);
   ArrayList<String> names = namesAndNameIndices.first;
   ArrayList<Pair<Integer, Integer>> nameIndices = namesAndNameIndices.second;

   ArrayList<Integer> pronounsIndices = scanForPronouns(range);
   if (names.size() == 1) {
    List<Person> p = characterMap.get(names.get(0));

    //guess if exactly one name
    if (p.size() == 1 && pronounsIndices.size() == 0) {
     fillInMention(quote, tokenRangeToString(nameIndices.get(0)), nameIndices.get(0).first, nameIndices.get(0).second,
         sieveName, NAME);
    }
   }
  }
 }
}

代码示例来源：origin: stanfordnlp/CoreNLP

/** {@inheritDoc} */
@Override
public void annotate(Annotation annotation) {
 super.annotate(annotation);
 List<CoreLabel> words = annotation.get(CoreAnnotations.TokensAnnotation.class);
 if (words != null) {
  numWords += words.size();
 }
}

代码示例来源：origin: stanfordnlp/CoreNLP

public  boolean rangeContainsCharIndex(Pair<Integer, Integer> tokenRange, int charIndex) {
 List<CoreLabel> tokens = doc.get(CoreAnnotations.TokensAnnotation.class);
 CoreLabel startToken = tokens.get(tokenRange.first());
 CoreLabel endToken = tokens.get(tokenRange.second());
 int startTokenCharBegin  = startToken.beginPosition();
 int endTokenCharEnd = endToken.endPosition();
 return (startTokenCharBegin <= charIndex && charIndex <= endTokenCharEnd);
}

代码示例来源：origin: stanfordnlp/CoreNLP

/**
 * Set index for each token and sentence in the document.
 * @param doc
 */
private static void setTokenIndices(Document doc) {
 int token_index = 0;
 for (CoreMap sent : doc.annotation.get(SentencesAnnotation.class)) {
  for (CoreLabel token : sent.get(TokensAnnotation.class)) {
   token.set(TokenBeginAnnotation.class, token_index++);
  }
 }
}

代码示例来源：origin: stanfordnlp/CoreNLP

public static void addEnhancedSentences(Annotation doc) {
 //for every sentence that begins a paragraph: append this sentence and the previous one and see if sentence splitter would make a single sentence out of it. If so, add as extra sentence.
 //for each sieve that potentially uses augmentedSentences in original:
 List<CoreMap> sentences = doc.get(CoreAnnotations.SentencesAnnotation.class);
 WordToSentenceProcessor wsp =
     new WordToSentenceProcessor(WordToSentenceProcessor.NewlineIsSentenceBreak.NEVER); //create SentenceSplitter that never splits on newline
 int prevParagraph = 0;
 for(int i = 1; i < sentences.size(); i++) {
  CoreMap sentence = sentences.get(i);
  CoreMap prevSentence = sentences.get(i-1);
  List<CoreLabel> tokensConcat = new ArrayList<>();
  tokensConcat.addAll(prevSentence.get(CoreAnnotations.TokensAnnotation.class));
  tokensConcat.addAll(sentence.get(CoreAnnotations.TokensAnnotation.class));
  List<List<CoreLabel>> sentenceTokens = wsp.process(tokensConcat);
  if(sentenceTokens.size() == 1) { //wsp would have put them into a single sentence --> add enhanced sentence.
   sentence.set(EnhancedSentenceAnnotation.class, constructSentence(sentenceTokens.get(0), prevSentence, sentence));
  }
 }
}

代码示例来源：origin: stanfordnlp/CoreNLP

private static void findGoldMentionHeads(Document doc) {
 List<CoreMap> sentences = doc.annotation.get(SentencesAnnotation.class);
 for (int i=0 ; i<sentences.size() ; i++ ) {
  DependencyCorefMentionFinder.findHeadInDependency(sentences.get(i), doc.goldMentions.get(i));
 }
}

代码示例来源：origin: stanfordnlp/CoreNLP

public static int getQuoteParagraphIndex(Annotation doc, CoreMap quote) {
 List<CoreMap> sentences = doc.get(CoreAnnotations.SentencesAnnotation.class);
 return sentences.get(quote.get(CoreAnnotations.SentenceBeginAnnotation.class)).get(CoreAnnotations.ParagraphIndexAnnotation.class);
}

代码示例来源：origin: stanfordnlp/CoreNLP

/**
 * Convert a CoreNLP Annotation object to a Document.
 * @param ann The CoreNLP Annotation object.
 */
@SuppressWarnings("Convert2streamapi")
public Document(Properties props, Annotation ann) {
 this.defaultProps = props;
 StanfordCoreNLP.getDefaultAnnotatorPool(props, new AnnotatorImplementations());  // cache the annotator pool
 this.impl = new ProtobufAnnotationSerializer(false).toProtoBuilder(ann);
 List<CoreMap> sentences = ann.get(CoreAnnotations.SentencesAnnotation.class);
 this.sentences = new ArrayList<>(sentences.size());
 for (CoreMap sentence : sentences) {
  this.sentences.add(new Sentence(this, this.serializer.toProtoBuilder(sentence), sentence.get(CoreAnnotations.TextAnnotation.class), this.defaultProps));
 }
}

代码示例来源：origin: stanfordnlp/CoreNLP

/** Print raw document for analysis */
public static String printRawDoc(Document document, boolean gold, boolean printClusterID) throws FileNotFoundException {
 StringBuilder sb = new StringBuilder();
 List<CoreMap> sentences = document.annotation.get(CoreAnnotations.SentencesAnnotation.class);
 StringBuilder doc = new StringBuilder();
 for(int i = 0 ; i<sentences.size(); i++) {
  doc.append(sentenceStringWithMention(i, document, gold, printClusterID));
  doc.append("\n");
 }
 sb.append("PRINT RAW DOC START\n");
 sb.append(document.annotation.get(CoreAnnotations.DocIDAnnotation.class)).append("\n");
 if (gold) {
  sb.append("New DOC: (GOLD MENTIONS) ==================================================\n");
 } else {
  sb.append("New DOC: (Predicted Mentions) ==================================================\n");
 }
 sb.append(doc.toString()).append("\n");
 sb.append("PRINT RAW DOC END").append("\n");
 return sb.toString();
}

代码示例来源：origin: stanfordnlp/CoreNLP

private static CoreMap constructCoreMap(Annotation doc, Pair<Integer, Integer> run) {
 List<CoreLabel> tokens = doc.get(CoreAnnotations.TokensAnnotation.class);
 // check if the second part of the run is a *NL* token, adjust accordingly
 int endTokenIndex = run.second;
 while (endTokenIndex > 0 && tokens.get(endTokenIndex).get(CoreAnnotations.IsNewlineAnnotation.class)) {
  endTokenIndex--;
 }
 // get the sentence text from the first and last character offsets
 int begin = tokens.get(run.first).get(CoreAnnotations.CharacterOffsetBeginAnnotation.class);
 int end = tokens.get(endTokenIndex).get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
 String sentenceText = doc.get(CoreAnnotations.TextAnnotation.class).substring(begin, end);
 List<CoreLabel> sentenceTokens = tokens.subList(run.first, endTokenIndex+1);
 // create a sentence annotation with text and token offsets
 CoreMap sentence = new Annotation(sentenceText);
 sentence.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, begin);
 sentence.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, end);
 sentence.set(CoreAnnotations.TokensAnnotation.class, sentenceTokens);
 return sentence;
}

内容来源于网络，如有侵权，请联系作者删除！

相关文章

热门标签

Java query python Node 开发语言 request Util 数据库 Table 后端算法 Logger Message Element Parser

最新文章

高级程序员和新手小白程序员区别你是那个等级看解决bug速度
浏览(1000) 发布于 5个月前
还在用双层for循环吗？太慢了
浏览(925) 发布于 5个月前
我用EasyExcel优化了公司的导出（附踩坑记录）
浏览(967) 发布于 5个月前
记录因Sharding Jdbc批量操作引发的一次fullGC
浏览(802) 发布于 5个月前
进大厂必须要会的单元测试
浏览(801) 发布于 5个月前

Annotation类方法