edu.stanford.nlp.pipeline.Annotation类的使用及代码示例

x33g5p2x  于2022-01-16 转载在 其他  
字(18.4k)|赞(0)|评价(0)|浏览(165)

本文整理了Java中edu.stanford.nlp.pipeline.Annotation类的一些代码示例,展示了Annotation类的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Annotation类的具体详情如下:
包路径:edu.stanford.nlp.pipeline.Annotation
类名称:Annotation

Annotation介绍

[英]An annotation representing a span of text in a document. Basically just an implementation of CoreMap that knows about text. You're meant to use the annotation keys in CoreAnnotations for common cases, but can define bespoke ones for unusual annotations.
[中]表示文档中文本范围的注释。基本上只是CoreMap的一个实现,它了解文本。您打算在CoreAnnotations中为常见情况使用注释键,但可以为不常见的注释定义定制的注释键。

代码示例

代码示例来源:origin: stanfordnlp/CoreNLP

private static void runPipeline(StanfordCoreNLP pipeline, String text, PrintWriter out) {
 Annotation annotation = new Annotation(text);
 pipeline.annotate(annotation);
 // An Annotation is a Map and you can get and use the various analyses individually.
 out.println();
 // The toString() method on an Annotation just prints the text of the Annotation
 // But you can see what is in it with other methods like toShorterString()
 out.println("The top level annotation");
 out.println(annotation.toShorterString());
 List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
 for (CoreMap sentence : sentences) {
  // Print out token annotations
  for (CoreLabel token:sentence.get(CoreAnnotations.TokensAnnotation.class)) {
   // Print out words, lemma, ne, and normalized ne
   String word = token.get(CoreAnnotations.TextAnnotation.class);
   String lemma = token.get(CoreAnnotations.LemmaAnnotation.class);
   String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class);
   String ne = token.get(CoreAnnotations.NamedEntityTagAnnotation.class);
   String normalized = token.get(CoreAnnotations.NormalizedNamedEntityTagAnnotation.class);
   out.println("token: " + "word="+word + ", lemma="+lemma + ", pos=" + pos + ", ne=" + ne + ", normalized=" + normalized);
  }
 }
 out.flush();
}

代码示例来源:origin: stanfordnlp/CoreNLP

private Tree parse(List<CoreLabel> tokens,
          List<ParserConstraint> constraints) {
 CoreMap sent = new Annotation("");
 sent.set(CoreAnnotations.TokensAnnotation.class, tokens);
 sent.set(ParserAnnotations.ConstraintAnnotation.class, constraints);
 Annotation doc = new Annotation("");
 List<CoreMap> sents = new ArrayList<>(1);
 sents.add(sent);
 doc.set(CoreAnnotations.SentencesAnnotation.class, sents);
 getParser().annotate(doc);
 sents = doc.get(CoreAnnotations.SentencesAnnotation.class);
 return sents.get(0).get(TreeCoreAnnotations.TreeAnnotation.class);
}

代码示例来源:origin: stanfordnlp/CoreNLP

@Override
public void annotate(Annotation annotation) {
 if (VERBOSE) {
  log.info("Adding number annotation ... ");
 }
 if (annotation.containsKey(CoreAnnotations.SentencesAnnotation.class)) {
  // classify tokens for each sentence
  for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
   List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
   doOneSentenceNew(tokens, annotation, sentence);
  }
  if (VERBOSE) {
   log.info("done. Output: " + annotation.get(CoreAnnotations.SentencesAnnotation.class));
  }
 } else if (annotation.containsKey(CoreAnnotations.TokensAnnotation.class)) {
  List<CoreLabel> tokens = annotation.get(CoreAnnotations.TokensAnnotation.class);
  doOneSentenceNew(tokens, annotation, null);
 } else {
  throw new RuntimeException("unable to find sentences in: " + annotation);
 }
}

代码示例来源:origin: stanfordnlp/CoreNLP

public static Annotation textToAnnotation(AnnotationPipeline pipeline, String text, String date) {
 Annotation annotation = new Annotation(text);
 annotation.set(CoreAnnotations.DocDateAnnotation.class, date);
 pipeline.annotate(annotation);
 return annotation;
}

代码示例来源:origin: stanfordnlp/CoreNLP

@Override
public void annotate(Annotation annotation) {
 if (annotation.containsKey(CoreAnnotations.SentencesAnnotation.class)) {
  for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
   doOneSentence(sentence);
  }
 } else {
  throw new RuntimeException("unable to find sentences in: " + annotation);
 }
}

代码示例来源:origin: stanfordnlp/CoreNLP

@Override
public void annotate(Annotation annotation) {
 if (annotation.containsKey(CoreAnnotations.TokensAnnotation.class)) {
  List<CoreLabel> tokens = annotation.get(CoreAnnotations.TokensAnnotation.class);
  if (DEBUG) { log.info("CleanXML: starting tokens: " + tokens); }
  List<CoreLabel> newTokens = process(annotation, tokens);
  // We assume that if someone is using this annotator, they don't
  // want the old tokens any more and get rid of them
  // redo the token indexes if xml tokens have been removed
  setTokenBeginTokenEnd(newTokens);
  annotation.set(CoreAnnotations.TokensAnnotation.class, newTokens);
  if (DEBUG) { log.info("CleanXML: ending tokens: " + annotation.get(CoreAnnotations.TokensAnnotation.class)); }
 }
}

代码示例来源:origin: stanfordnlp/CoreNLP

out = new PrintWriter(args[1]);
} else {
 out = new PrintWriter(System.out);
Properties props = new Properties();
props.load(IOUtils.readerFromString("StanfordCoreNLP-chinese.properties"));
StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
 document = new Annotation(IOUtils.slurpFileNoExceptions(args[0]));
} else {
 document = new Annotation("克林顿说,华盛顿将逐步落实对韩国的经济援助。金大中对克林顿的讲话报以掌声:克林顿总统在会谈中重申,他坚定地支持韩国摆脱经济危机。");
pipeline.annotate(document);
List<CoreMap> sentences =  document.get(CoreAnnotations.SentencesAnnotation.class);
out.println("Coreference information");
Map<Integer, CorefChain> corefChains =
  document.get(CorefCoreAnnotations.CorefChainAnnotation.class);
if (corefChains == null) { return; }
for (Map.Entry<Integer,CorefChain> entry: corefChains.entrySet()) {
  List<CoreLabel> tokens = sentences.get(m.sentNum - 1).get(CoreAnnotations.TokensAnnotation.class);
  out.println("  " + m + ":[" + tokens.get(m.startIndex - 1).beginPosition() + ", " +
      tokens.get(m.endIndex - 2).endPosition() + ')');

代码示例来源:origin: stanfordnlp/CoreNLP

xmlOut = new PrintWriter(args[2]);
StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
 annotation = new Annotation(IOUtils.slurpFileNoExceptions(args[0]));
} else {
 annotation = new Annotation("Kosgi Santosh sent an email to Stanford University. He didn't get a reply.");
pipeline.annotate(annotation);
pipeline.prettyPrint(annotation, out);
out.println(annotation.toShorterString());
List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
if (sentences != null && ! sentences.isEmpty()) {
 out.println(sentence.keySet());
 out.println(sentence.toShorterString());
 for (CoreMap token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
 tree.pennPrint(out);
   annotation.get(CorefCoreAnnotations.CorefChainAnnotation.class);
 if (corefChains == null) { return; }
 for (Map.Entry<Integer,CorefChain> entry: corefChains.entrySet()) {

代码示例来源:origin: stanfordnlp/CoreNLP

public static Map<String, DataInstance> runPOSNERParseOnTokens(Map<String, DataInstance> sents, Properties propsoriginal){
 PatternFactory.PatternType type = PatternFactory.PatternType.valueOf(propsoriginal.getProperty(Flags.patternType));
 Properties props = new Properties();
 List<String> anns = new ArrayList<>();
 anns.add("pos");
 anns.add("lemma");
 boolean useTargetParserParentRestriction = Boolean.parseBoolean(propsoriginal.getProperty(Flags.useTargetParserParentRestriction));
 boolean useTargetNERRestriction = Boolean.parseBoolean(propsoriginal.getProperty(Flags.useTargetNERRestriction));
 String posModelPath = props.getProperty(Flags.posModelPath);
  props.setProperty("pos.model", posModelPath);
 StanfordCoreNLP pipeline = new StanfordCoreNLP(props, false);
 Redwood.log(Redwood.DBG, "Annotating text");
  List<CoreMap> temp = new ArrayList<>();
  CoreMap s= new ArrayCoreMap();
  s.set(CoreAnnotations.TokensAnnotation.class, en.getValue().getTokens());
  temp.add(s);
  Annotation doc = new Annotation(temp);
  try {
   pipeline.annotate(doc);
   if (useTargetParserParentRestriction)
    inferParentParseTag(s.get(TreeAnnotation.class));
  } catch (Exception e) {
   log.warn("Ignoring error: for sentence  " + StringUtils.joinWords(en.getValue().getTokens(), " "));

代码示例来源:origin: stanfordnlp/CoreNLP

boolean debug = Boolean.parseBoolean(props.getProperty("debug", "false"));
String filepath = props.getProperty("i");
String outfile = props.getProperty("o");
if (filepath == null || outfile == null) {
 usage();
 System.exit(-1);
PrintWriter fout = new PrintWriter(outfile);
logger.info("Writing to " + outfile);
String ext = props.getProperty("ext");
 docCnt++;
 Annotation anno = doc.getAnnotation();
 if (debug) System.out.println("Document " + docCnt + ": " + anno.get(CoreAnnotations.DocIDAnnotation.class));
 for (CoreMap sentence:anno.get(CoreAnnotations.SentencesAnnotation.class)) {
  if (debug) System.out.println("Parse: " + sentence.get(TreeCoreAnnotations.TreeAnnotation.class));
  if (debug) System.out.println("Sentence Tokens: " + StringUtils.join(sentence.get(CoreAnnotations.TokensAnnotation.class), ","));
  writeTabSep(fout,sentence,doc.corefChainMap);
  sentCnt++;
  tokenCnt += sentence.get(CoreAnnotations.TokensAnnotation.class).size();
fout.close();
System.out.println("Total document count: " + docCnt);
System.out.println("Total sentence count: " + sentCnt);

代码示例来源:origin: stanfordnlp/CoreNLP

@Override
public void print(Annotation doc, OutputStream target, Options options) throws IOException {
 PrintWriter writer = new PrintWriter(IOUtils.encodedOutputStreamWriter(target, options.encoding));
 List<CoreMap> sentences = doc.get(CoreAnnotations.SentencesAnnotation.class);
 for (CoreMap sentence : sentences) {
  SemanticGraph sg = sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
  if (sg != null) {
   SemanticGraph enhancedSg = sentence.get(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class);
   writer.print(conllUWriter.printSemanticGraph(sg, enhancedSg));
  } else {
   writer.print(conllUWriter.printPOSAnnotations(sentence));
  }
 }
 writer.flush();
}

代码示例来源:origin: stanfordnlp/CoreNLP

if(compress) os = new GZIPOutputStream(os);
PrintWriter pw = new PrintWriter(os);
Map<Integer, CorefChain> chains = corpus.get(CorefCoreAnnotations.CorefChainAnnotation.class);
saveCorefChains(chains, pw);
List<Pair<IntTuple, IntTuple>> corefGraph = corpus.get(CorefCoreAnnotations.CorefGraphAnnotation.class);
if(corefGraph != null){
 boolean first = true;
 for(Pair<IntTuple, IntTuple> arc: corefGraph){
  if(! first) pw.print(" ");
  pw.printf("%d %d %d %d", arc.first.get(0), arc.first.get(1), arc.second.get(0), arc.second.get(1));
  first = false;
List<CoreMap> sentences = corpus.get(CoreAnnotations.SentencesAnnotation.class);
for(CoreMap sent: sentences){
 Tree tree = sent.get(TreeCoreAnnotations.TreeAnnotation.class);
 if(tree != null){
  String treeString = tree.toString();
 else pw.println();

代码示例来源:origin: stanfordnlp/CoreNLP

PrintWriter out;
if (args.length > 2) {
 out = new PrintWriter(args[2]);
} else {
 out = new PrintWriter(System.out);
StanfordCoreNLP pipeline = new StanfordCoreNLP(
    PropertiesUtils.asProperties("annotators", "tokenize,ssplit,pos,lemma,ner"));
Annotation annotation = new Annotation(IOUtils.slurpFileNoExceptions(args[1]));
pipeline.annotate(annotation);
List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
 List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
 out.println("Sentence #" + ++i);
 out.print("  Tokens:");
 for (CoreLabel token : tokens) {
  out.print(' ');
  out.print(token.toShortString("Text", "PartOfSpeech", "NamedEntityTag"));
 out.println();

代码示例来源:origin: stanfordnlp/CoreNLP

public static void testPP(String familyFile, String animateFile, String genderFile,
             String charactersFile, String modelFile) throws IOException, ClassNotFoundException {
 Properties props = new Properties();
 props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, depparse, quote, quoteattribution");
 props.setProperty("quoteattribution.familyWordsFile", familyFile);
 props.setProperty("quoteattribution.animacyWordsFile", animateFile);
 props.setProperty("quoteattribution.genderNamesFile", genderFile);
 props.setProperty("quoteattribution.charactersPath", charactersFile);
 props.setProperty("quoteattribution.modelPath", modelFile);
 
 StanfordCoreNLP coreNLP = new StanfordCoreNLP(props);
 Annotation processedAnnotation = coreNLP.process(test);
 List<CoreMap> quotes = processedAnnotation.get(CoreAnnotations.QuotationsAnnotation.class);
 for(CoreMap quote : quotes) {
  System.out.println("Quote: " + quote.get(CoreAnnotations.TextAnnotation.class));
  if(quote.get(QuoteAttributionAnnotator.MentionAnnotation.class) != null) {
   System.out.println("Predicted Mention: " + quote.get(QuoteAttributionAnnotator.MentionAnnotation.class) + " Predictor: " + quote.get(QuoteAttributionAnnotator.MentionSieveAnnotation.class));
  } else {
   System.out.println("Predicted Mention: none");
  }
  if(quote.get(QuoteAttributionAnnotator.SpeakerAnnotation.class) != null) {
   System.out.println("Predicted Speaker: " + quote.get(QuoteAttributionAnnotator.SpeakerAnnotation.class) + " Predictor: " + quote.get(QuoteAttributionAnnotator.SpeakerSieveAnnotation.class));
  } else {
   System.out.println("Predicted Speaker: none");
  }
  System.out.println("====");
 }
 System.out.println("Finished");
}

代码示例来源:origin: stanfordnlp/CoreNLP

private static void modifyUsingCoreNLPNER(Annotation doc) {
 Properties ann = new Properties();
 ann.setProperty("annotators", "pos, lemma, ner");
 StanfordCoreNLP pipeline = new StanfordCoreNLP(ann, false);
 pipeline.annotate(doc);
 for (CoreMap sentence : doc.get(CoreAnnotations.SentencesAnnotation.class)) {
  List<EntityMention> entities = sentence.get(MachineReadingAnnotations.EntityMentionsAnnotation.class);
  if (entities != null) {
   List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
   for (EntityMention en : entities) {
    //System.out.println("old ner tag for " + en.getExtentString() + " was " + en.getType());
    Span s = en.getExtent();
    Counter<String> allNertagforSpan = new ClassicCounter<>();
    for (int i = s.start(); i < s.end(); i++) {
     allNertagforSpan.incrementCount(tokens.get(i).ner());
    }
    String entityNertag = Counters.argmax(allNertagforSpan);
    en.setType(entityNertag);
    //System.out.println("new ner tag is " + entityNertag);
   }
  }
 }
}

代码示例来源:origin: stanfordnlp/CoreNLP

public static void main(String[] args) throws IOException {
 SUTimePipeline pipeline = new SUTimePipeline();
 Annotator timeAnnotator = pipeline.getTimeAnnotator("sutime", new Properties());
 BufferedReader is = new BufferedReader(new InputStreamReader(System.in));
 System.out.print("> ");
 for(String line; (line = is.readLine()) != null; ){
  Annotation ann = pipeline.process(line, null, timeAnnotator);
  System.out.println(ann.get(TimeAnnotations.TimexAnnotations.class));
  System.out.print("> ");
 }
}

代码示例来源:origin: stanfordnlp/CoreNLP

@Override
public void print(Annotation doc, OutputStream target, Options options) throws IOException {
 
 PrintWriter writer = new PrintWriter(IOUtils.encodedOutputStreamWriter(target, options.encoding));
 JSONWriter l0 = new JSONWriter(writer, options);
 if (doc.get(CoreAnnotations.SentencesAnnotation.class) != null) {
  doc.get(CoreAnnotations.SentencesAnnotation.class).stream().forEach(sentence -> {
   l0.object(l1 -> {
    l1.set("id", sentence.get(CoreAnnotations.SentenceIndexAnnotation.class) + 1);
    SemanticGraph sg;
    if (OUTPUT_REPRESENTATION.equalsIgnoreCase("basic")) {
     sg = sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
    } else if (OUTPUT_REPRESENTATION.equalsIgnoreCase("enhanced")) {
     sg = sentence.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
    } else {
     sg = sentence.get(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class);
    }
    l1.set("nodes", getNodes(sg));
   });
   l0.writer.append("\n");
   l0.writer.flush();
  });
  }
}

代码示例来源:origin: stanfordnlp/CoreNLP

public static void runTest(String test, String num) {
  System.out.println("Testing: " + test + " : num newline breaks: " + num);
  Annotation ann = new Annotation(test);

  Properties props = new Properties();
  props.setProperty("annotators", "tokenize,ssplit");
  StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
  pipeline.annotate(ann);

  Properties propsPara = new Properties();
  propsPara.setProperty("paragraphBreak", num);
  ParagraphAnnotator para = new ParagraphAnnotator(propsPara, true);
  para.annotate(ann);

  for (CoreMap sent : ann.get(CoreAnnotations.SentencesAnnotation.class)) {
   System.out.println(sent);
   System.out.println(sent.get(CoreAnnotations.ParagraphIndexAnnotation.class));
  }
 }
}

代码示例来源:origin: stanfordnlp/CoreNLP

public static void main(String[] args) throws Exception {
 Properties props = StringUtils.argsToProperties(args);
 StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
 String file = props.getProperty("file");
 String loadFile = props.getProperty("loadFile");
 if (loadFile != null && ! loadFile.isEmpty()) {
  CustomAnnotationSerializer ser = new CustomAnnotationSerializer(false, false);
  InputStream is = new FileInputStream(loadFile);
  Pair<Annotation, InputStream> pair = ser.read(is);
  pair.second.close();
  Annotation anno = pair.first;
  System.out.println(anno.toShorterString(StringUtils.EMPTY_STRING_ARRAY));
  is.close();
 } else if (file != null && ! file.equals("")) {
  String text = edu.stanford.nlp.io.IOUtils.slurpFile(file);
  Annotation doc = new Annotation(text);
  pipeline.annotate(doc);
  CustomAnnotationSerializer ser = new CustomAnnotationSerializer(false, false);
  PrintStream os = new PrintStream(new FileOutputStream(file + ".ser"));
  ser.write(doc, os).close();
  log.info("Serialized annotation saved in " + file + ".ser");
 } else {
  log.info("usage: CustomAnnotationSerializer [-file file] [-loadFile file]");
 }
}

代码示例来源:origin: stanfordnlp/CoreNLP

if ( ! doc.containsKey(CoreAnnotations.SentencesAnnotation.class)) {
  StanfordCoreNLP pipeline = mkStanfordCoreNLP(props);
  pipeline.annotate(doc);
  docWriter.set("sentences", doc.get(CoreAnnotations.SentencesAnnotation.class).stream().map(sentence -> (Consumer<JSONOutputter.Writer>) (JSONOutputter.Writer sentWriter) -> {
    Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
    while (matcher.find()) {
     sentWriter.set(Integer.toString(i++), (Consumer<JSONOutputter.Writer>) (JSONOutputter.Writer matchWriter) -> {
      matchWriter.set("match", matcher.getMatch().pennString());
      matchWriter.set("namedNodes", matcher.getNodeNames().stream().map(nodeName -> (Consumer<JSONOutputter.Writer>) (JSONOutputter.Writer namedNodeWriter) ->
       namedNodeWriter.set(nodeName, matcher.getNode(nodeName).pennString())
      ));
     });
byte[] content = pair.first.getBytes();
sendAndGetResponse(httpExchange, content);
if (completedAnnotation != null && ! StringUtils.isNullOrEmpty(props.getProperty("annotators"))) {
 callback.accept(new FinishedRequest(props, completedAnnotation, params.get("pattern"), null));

相关文章