本文整理了Java中edu.stanford.nlp.pipeline.Annotation.get()
方法的一些代码示例,展示了Annotation.get()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Annotation.get()
方法的具体详情如下:
包路径:edu.stanford.nlp.pipeline.Annotation
类名称:Annotation
方法名:get
暂无
代码示例来源:origin: stanfordnlp/CoreNLP
public Document(InputDoc input, List<List<Mention>> mentions) {
this();
this.annotation = input.annotation;
this.predictedMentions = mentions;
this.goldMentions = input.goldMentions;
this.docInfo = input.docInfo;
this.numSentences = input.annotation.get(SentencesAnnotation.class).size();
this.conllDoc = input.conllDoc; // null if it's not conll input
}
代码示例来源:origin: stanfordnlp/CoreNLP
protected int getQuoteParagraph(CoreMap quote) {
List<CoreMap> sentences = doc.get(CoreAnnotations.SentencesAnnotation.class);
return sentences.get(quote.get(CoreAnnotations.SentenceBeginAnnotation.class)).get(CoreAnnotations.ParagraphIndexAnnotation.class);
}
}
代码示例来源:origin: stanfordnlp/CoreNLP
public List<Integer> scanForAnimates(Pair<Integer, Integer> span) {
List<Integer> animateIndices = new ArrayList<>();
List<CoreLabel> tokens = doc.get(CoreAnnotations.TokensAnnotation.class);
for(int i = span.first; i <= span.second && i < tokens.size() ; i++)
{
CoreLabel token = tokens.get(i);
if(animacySet.contains(token.word()))
animateIndices.add(i);
}
return animateIndices;
}
代码示例来源:origin: stanfordnlp/CoreNLP
public static Temporal parseOrNull(String str) {
Annotation doc = new Annotation(str);
pipeline.annotate(doc);
if (doc.get(CoreAnnotations.SentencesAnnotation.class) == null) {
return null;
}
if (doc.get(CoreAnnotations.SentencesAnnotation.class).isEmpty()) {
return null;
}
List<CoreMap> timexAnnotations = doc.get(TimeAnnotations.TimexAnnotations.class);
if (timexAnnotations.size() > 1) {
return null;
} else if (timexAnnotations.isEmpty()) {
return null;
}
CoreMap timex = timexAnnotations.get(0);
if (timex.get(TimeExpression.Annotation.class) == null) {
return null;
} else {
return timex.get(TimeExpression.Annotation.class).getTemporal();
}
}
代码示例来源:origin: stanfordnlp/CoreNLP
public MentionData getClosestMention(CoreMap quote) {
MentionData closestBackward = findClosestMentionInSpanBackward(new Pair<>(0, quote.get(CoreAnnotations.TokenBeginAnnotation.class) - 1));
MentionData closestForward = findClosestMentionInSpanForward(new Pair<>(quote.get(CoreAnnotations.TokenEndAnnotation.class), doc.get(CoreAnnotations.TokensAnnotation.class).size() - 1));
int backDistance = quote.get(CoreAnnotations.TokenBeginAnnotation.class) - closestBackward.end;
int forwardDistance = closestForward.begin - quote.get(CoreAnnotations.TokenEndAnnotation.class) + 1;
if(backDistance < forwardDistance) {
return closestBackward;
} else {
return closestForward;
}
}
代码示例来源:origin: stanfordnlp/CoreNLP
public String tokenRangeToString(int token_idx) {
return doc.get(CoreAnnotations.TokensAnnotation.class).get(token_idx).word();
}
代码示例来源:origin: stanfordnlp/CoreNLP
/**
* Set index for each token and sentence in the document.
* @param doc
*/
private static void setTokenIndices(Document doc) {
int token_index = 0;
for (CoreMap sent : doc.annotation.get(SentencesAnnotation.class)) {
for (CoreLabel token : sent.get(TokensAnnotation.class)) {
token.set(TokenBeginAnnotation.class, token_index++);
}
}
}
代码示例来源:origin: stanfordnlp/CoreNLP
private static void recallErrors(List<List<Mention>> goldMentions, List<List<Mention>> predictedMentions, Annotation doc) throws IOException {
List<CoreMap> coreMaps = doc.get(CoreAnnotations.SentencesAnnotation.class);
int numSentences = goldMentions.size();
for (int i=0;i<numSentences;i++){
CoreMap coreMap = coreMaps.get(i);
List<CoreLabel> words = coreMap.get(CoreAnnotations.TokensAnnotation.class);
Tree tree = coreMap.get(TreeCoreAnnotations.TreeAnnotation.class);
List<Mention> goldMentionsSent = goldMentions.get(i);
List<Pair<Integer,Integer>> goldMentionsSpans = extractSpans(goldMentionsSent);
for (Pair<Integer,Integer> mentionSpan: goldMentionsSpans){
logger.finer("RECALL ERROR\n");
logger.finer(coreMap + "\n");
for (int x=mentionSpan.first;x<mentionSpan.second;x++){
logger.finer(words.get(x).value() + " ");
}
logger.finer("\n"+tree + "\n");
}
}
}
代码示例来源:origin: stanfordnlp/CoreNLP
private static void mentionReordering(Document doc, HeadFinder headFinder) throws Exception {
List<List<Mention>> mentions = doc.predictedMentions;
List<CoreMap> sentences = doc.annotation.get(SentencesAnnotation.class);
for (int i=0 ; i<sentences.size() ; i++) {
List<Mention> mentionsInSent = mentions.get(i);
mentions.set(i, mentionReorderingBySpan(mentionsInSent));
}
}
代码示例来源:origin: stanfordnlp/CoreNLP
private static int getQuoteChapter(Annotation doc, CoreMap quote) {
List<CoreMap> sentences = doc.get(CoreAnnotations.SentencesAnnotation.class);
return sentences.get(quote.get(CoreAnnotations.SentenceBeginAnnotation.class)).get(ChapterAnnotator.ChapterAnnotation.class);
}
代码示例来源:origin: stanfordnlp/CoreNLP
public void oneNameSentence(Annotation doc) {
List<CoreMap> quotes = doc.get(CoreAnnotations.QuotationsAnnotation.class);
for(CoreMap quote : quotes) {
if (quote.get(QuoteAttributionAnnotator.MentionAnnotation.class) != null) {
continue;
}
Pair<Integer, Integer> range = QuoteAttributionUtils.getRemainderInSentence(doc, quote);
if(range == null) {
continue;
}
Pair<ArrayList<String>, ArrayList<Pair<Integer, Integer>>> namesAndNameIndices = scanForNames(range);
ArrayList<String> names = namesAndNameIndices.first;
ArrayList<Pair<Integer, Integer>> nameIndices = namesAndNameIndices.second;
ArrayList<Integer> pronounsIndices = scanForPronouns(range);
if (names.size() == 1) {
List<Person> p = characterMap.get(names.get(0));
//guess if exactly one name
if (p.size() == 1 && pronounsIndices.size() == 0) {
fillInMention(quote, tokenRangeToString(nameIndices.get(0)), nameIndices.get(0).first, nameIndices.get(0).second,
sieveName, NAME);
}
}
}
}
}
代码示例来源:origin: stanfordnlp/CoreNLP
/** {@inheritDoc} */
@Override
public void annotate(Annotation annotation) {
super.annotate(annotation);
List<CoreLabel> words = annotation.get(CoreAnnotations.TokensAnnotation.class);
if (words != null) {
numWords += words.size();
}
}
代码示例来源:origin: stanfordnlp/CoreNLP
public boolean rangeContainsCharIndex(Pair<Integer, Integer> tokenRange, int charIndex) {
List<CoreLabel> tokens = doc.get(CoreAnnotations.TokensAnnotation.class);
CoreLabel startToken = tokens.get(tokenRange.first());
CoreLabel endToken = tokens.get(tokenRange.second());
int startTokenCharBegin = startToken.beginPosition();
int endTokenCharEnd = endToken.endPosition();
return (startTokenCharBegin <= charIndex && charIndex <= endTokenCharEnd);
}
代码示例来源:origin: stanfordnlp/CoreNLP
/**
* Set index for each token and sentence in the document.
* @param doc
*/
private static void setTokenIndices(Document doc) {
int token_index = 0;
for (CoreMap sent : doc.annotation.get(SentencesAnnotation.class)) {
for (CoreLabel token : sent.get(TokensAnnotation.class)) {
token.set(TokenBeginAnnotation.class, token_index++);
}
}
}
代码示例来源:origin: stanfordnlp/CoreNLP
public static void addEnhancedSentences(Annotation doc) {
//for every sentence that begins a paragraph: append this sentence and the previous one and see if sentence splitter would make a single sentence out of it. If so, add as extra sentence.
//for each sieve that potentially uses augmentedSentences in original:
List<CoreMap> sentences = doc.get(CoreAnnotations.SentencesAnnotation.class);
WordToSentenceProcessor wsp =
new WordToSentenceProcessor(WordToSentenceProcessor.NewlineIsSentenceBreak.NEVER); //create SentenceSplitter that never splits on newline
int prevParagraph = 0;
for(int i = 1; i < sentences.size(); i++) {
CoreMap sentence = sentences.get(i);
CoreMap prevSentence = sentences.get(i-1);
List<CoreLabel> tokensConcat = new ArrayList<>();
tokensConcat.addAll(prevSentence.get(CoreAnnotations.TokensAnnotation.class));
tokensConcat.addAll(sentence.get(CoreAnnotations.TokensAnnotation.class));
List<List<CoreLabel>> sentenceTokens = wsp.process(tokensConcat);
if(sentenceTokens.size() == 1) { //wsp would have put them into a single sentence --> add enhanced sentence.
sentence.set(EnhancedSentenceAnnotation.class, constructSentence(sentenceTokens.get(0), prevSentence, sentence));
}
}
}
代码示例来源:origin: stanfordnlp/CoreNLP
private static void findGoldMentionHeads(Document doc) {
List<CoreMap> sentences = doc.annotation.get(SentencesAnnotation.class);
for (int i=0 ; i<sentences.size() ; i++ ) {
DependencyCorefMentionFinder.findHeadInDependency(sentences.get(i), doc.goldMentions.get(i));
}
}
代码示例来源:origin: stanfordnlp/CoreNLP
public static int getQuoteParagraphIndex(Annotation doc, CoreMap quote) {
List<CoreMap> sentences = doc.get(CoreAnnotations.SentencesAnnotation.class);
return sentences.get(quote.get(CoreAnnotations.SentenceBeginAnnotation.class)).get(CoreAnnotations.ParagraphIndexAnnotation.class);
}
代码示例来源:origin: stanfordnlp/CoreNLP
/**
* Convert a CoreNLP Annotation object to a Document.
* @param ann The CoreNLP Annotation object.
*/
@SuppressWarnings("Convert2streamapi")
public Document(Properties props, Annotation ann) {
this.defaultProps = props;
StanfordCoreNLP.getDefaultAnnotatorPool(props, new AnnotatorImplementations()); // cache the annotator pool
this.impl = new ProtobufAnnotationSerializer(false).toProtoBuilder(ann);
List<CoreMap> sentences = ann.get(CoreAnnotations.SentencesAnnotation.class);
this.sentences = new ArrayList<>(sentences.size());
for (CoreMap sentence : sentences) {
this.sentences.add(new Sentence(this, this.serializer.toProtoBuilder(sentence), sentence.get(CoreAnnotations.TextAnnotation.class), this.defaultProps));
}
}
代码示例来源:origin: stanfordnlp/CoreNLP
/** Print raw document for analysis */
public static String printRawDoc(Document document, boolean gold, boolean printClusterID) throws FileNotFoundException {
StringBuilder sb = new StringBuilder();
List<CoreMap> sentences = document.annotation.get(CoreAnnotations.SentencesAnnotation.class);
StringBuilder doc = new StringBuilder();
for(int i = 0 ; i<sentences.size(); i++) {
doc.append(sentenceStringWithMention(i, document, gold, printClusterID));
doc.append("\n");
}
sb.append("PRINT RAW DOC START\n");
sb.append(document.annotation.get(CoreAnnotations.DocIDAnnotation.class)).append("\n");
if (gold) {
sb.append("New DOC: (GOLD MENTIONS) ==================================================\n");
} else {
sb.append("New DOC: (Predicted Mentions) ==================================================\n");
}
sb.append(doc.toString()).append("\n");
sb.append("PRINT RAW DOC END").append("\n");
return sb.toString();
}
代码示例来源:origin: stanfordnlp/CoreNLP
private static CoreMap constructCoreMap(Annotation doc, Pair<Integer, Integer> run) {
List<CoreLabel> tokens = doc.get(CoreAnnotations.TokensAnnotation.class);
// check if the second part of the run is a *NL* token, adjust accordingly
int endTokenIndex = run.second;
while (endTokenIndex > 0 && tokens.get(endTokenIndex).get(CoreAnnotations.IsNewlineAnnotation.class)) {
endTokenIndex--;
}
// get the sentence text from the first and last character offsets
int begin = tokens.get(run.first).get(CoreAnnotations.CharacterOffsetBeginAnnotation.class);
int end = tokens.get(endTokenIndex).get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
String sentenceText = doc.get(CoreAnnotations.TextAnnotation.class).substring(begin, end);
List<CoreLabel> sentenceTokens = tokens.subList(run.first, endTokenIndex+1);
// create a sentence annotation with text and token offsets
CoreMap sentence = new Annotation(sentenceText);
sentence.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, begin);
sentence.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, end);
sentence.set(CoreAnnotations.TokensAnnotation.class, sentenceTokens);
return sentence;
}
内容来源于网络,如有侵权,请联系作者删除!