本文整理了Java中edu.stanford.nlp.pipeline.Annotation
类的一些代码示例,展示了Annotation
类的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Annotation
类的具体详情如下:
包路径:edu.stanford.nlp.pipeline.Annotation
类名称:Annotation
[英]An annotation representing a span of text in a document. Basically just an implementation of CoreMap that knows about text. You're meant to use the annotation keys in CoreAnnotations for common cases, but can define bespoke ones for unusual annotations.
[中]表示文档中文本范围的注释。基本上只是CoreMap的一个实现,它了解文本。您打算在CoreAnnotations中为常见情况使用注释键,但可以为不常见的注释定义定制的注释键。
代码示例来源:origin: stanfordnlp/CoreNLP
private static void runPipeline(StanfordCoreNLP pipeline, String text, PrintWriter out) {
Annotation annotation = new Annotation(text);
pipeline.annotate(annotation);
// An Annotation is a Map and you can get and use the various analyses individually.
out.println();
// The toString() method on an Annotation just prints the text of the Annotation
// But you can see what is in it with other methods like toShorterString()
out.println("The top level annotation");
out.println(annotation.toShorterString());
List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
for (CoreMap sentence : sentences) {
// Print out token annotations
for (CoreLabel token:sentence.get(CoreAnnotations.TokensAnnotation.class)) {
// Print out words, lemma, ne, and normalized ne
String word = token.get(CoreAnnotations.TextAnnotation.class);
String lemma = token.get(CoreAnnotations.LemmaAnnotation.class);
String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class);
String ne = token.get(CoreAnnotations.NamedEntityTagAnnotation.class);
String normalized = token.get(CoreAnnotations.NormalizedNamedEntityTagAnnotation.class);
out.println("token: " + "word="+word + ", lemma="+lemma + ", pos=" + pos + ", ne=" + ne + ", normalized=" + normalized);
}
}
out.flush();
}
代码示例来源:origin: stanfordnlp/CoreNLP
private Tree parse(List<CoreLabel> tokens,
List<ParserConstraint> constraints) {
CoreMap sent = new Annotation("");
sent.set(CoreAnnotations.TokensAnnotation.class, tokens);
sent.set(ParserAnnotations.ConstraintAnnotation.class, constraints);
Annotation doc = new Annotation("");
List<CoreMap> sents = new ArrayList<>(1);
sents.add(sent);
doc.set(CoreAnnotations.SentencesAnnotation.class, sents);
getParser().annotate(doc);
sents = doc.get(CoreAnnotations.SentencesAnnotation.class);
return sents.get(0).get(TreeCoreAnnotations.TreeAnnotation.class);
}
代码示例来源:origin: stanfordnlp/CoreNLP
@Override
public void annotate(Annotation annotation) {
if (VERBOSE) {
log.info("Adding number annotation ... ");
}
if (annotation.containsKey(CoreAnnotations.SentencesAnnotation.class)) {
// classify tokens for each sentence
for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
doOneSentenceNew(tokens, annotation, sentence);
}
if (VERBOSE) {
log.info("done. Output: " + annotation.get(CoreAnnotations.SentencesAnnotation.class));
}
} else if (annotation.containsKey(CoreAnnotations.TokensAnnotation.class)) {
List<CoreLabel> tokens = annotation.get(CoreAnnotations.TokensAnnotation.class);
doOneSentenceNew(tokens, annotation, null);
} else {
throw new RuntimeException("unable to find sentences in: " + annotation);
}
}
代码示例来源:origin: stanfordnlp/CoreNLP
public static Annotation textToAnnotation(AnnotationPipeline pipeline, String text, String date) {
Annotation annotation = new Annotation(text);
annotation.set(CoreAnnotations.DocDateAnnotation.class, date);
pipeline.annotate(annotation);
return annotation;
}
代码示例来源:origin: stanfordnlp/CoreNLP
@Override
public void annotate(Annotation annotation) {
if (annotation.containsKey(CoreAnnotations.SentencesAnnotation.class)) {
for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
doOneSentence(sentence);
}
} else {
throw new RuntimeException("unable to find sentences in: " + annotation);
}
}
代码示例来源:origin: stanfordnlp/CoreNLP
@Override
public void annotate(Annotation annotation) {
if (annotation.containsKey(CoreAnnotations.TokensAnnotation.class)) {
List<CoreLabel> tokens = annotation.get(CoreAnnotations.TokensAnnotation.class);
if (DEBUG) { log.info("CleanXML: starting tokens: " + tokens); }
List<CoreLabel> newTokens = process(annotation, tokens);
// We assume that if someone is using this annotator, they don't
// want the old tokens any more and get rid of them
// redo the token indexes if xml tokens have been removed
setTokenBeginTokenEnd(newTokens);
annotation.set(CoreAnnotations.TokensAnnotation.class, newTokens);
if (DEBUG) { log.info("CleanXML: ending tokens: " + annotation.get(CoreAnnotations.TokensAnnotation.class)); }
}
}
代码示例来源:origin: stanfordnlp/CoreNLP
out = new PrintWriter(args[1]);
} else {
out = new PrintWriter(System.out);
Properties props = new Properties();
props.load(IOUtils.readerFromString("StanfordCoreNLP-chinese.properties"));
StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
document = new Annotation(IOUtils.slurpFileNoExceptions(args[0]));
} else {
document = new Annotation("克林顿说,华盛顿将逐步落实对韩国的经济援助。金大中对克林顿的讲话报以掌声:克林顿总统在会谈中重申,他坚定地支持韩国摆脱经济危机。");
pipeline.annotate(document);
List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
out.println("Coreference information");
Map<Integer, CorefChain> corefChains =
document.get(CorefCoreAnnotations.CorefChainAnnotation.class);
if (corefChains == null) { return; }
for (Map.Entry<Integer,CorefChain> entry: corefChains.entrySet()) {
List<CoreLabel> tokens = sentences.get(m.sentNum - 1).get(CoreAnnotations.TokensAnnotation.class);
out.println(" " + m + ":[" + tokens.get(m.startIndex - 1).beginPosition() + ", " +
tokens.get(m.endIndex - 2).endPosition() + ')');
代码示例来源:origin: stanfordnlp/CoreNLP
xmlOut = new PrintWriter(args[2]);
StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
annotation = new Annotation(IOUtils.slurpFileNoExceptions(args[0]));
} else {
annotation = new Annotation("Kosgi Santosh sent an email to Stanford University. He didn't get a reply.");
pipeline.annotate(annotation);
pipeline.prettyPrint(annotation, out);
out.println(annotation.toShorterString());
List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
if (sentences != null && ! sentences.isEmpty()) {
out.println(sentence.keySet());
out.println(sentence.toShorterString());
for (CoreMap token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
tree.pennPrint(out);
annotation.get(CorefCoreAnnotations.CorefChainAnnotation.class);
if (corefChains == null) { return; }
for (Map.Entry<Integer,CorefChain> entry: corefChains.entrySet()) {
代码示例来源:origin: stanfordnlp/CoreNLP
public static Map<String, DataInstance> runPOSNERParseOnTokens(Map<String, DataInstance> sents, Properties propsoriginal){
PatternFactory.PatternType type = PatternFactory.PatternType.valueOf(propsoriginal.getProperty(Flags.patternType));
Properties props = new Properties();
List<String> anns = new ArrayList<>();
anns.add("pos");
anns.add("lemma");
boolean useTargetParserParentRestriction = Boolean.parseBoolean(propsoriginal.getProperty(Flags.useTargetParserParentRestriction));
boolean useTargetNERRestriction = Boolean.parseBoolean(propsoriginal.getProperty(Flags.useTargetNERRestriction));
String posModelPath = props.getProperty(Flags.posModelPath);
props.setProperty("pos.model", posModelPath);
StanfordCoreNLP pipeline = new StanfordCoreNLP(props, false);
Redwood.log(Redwood.DBG, "Annotating text");
List<CoreMap> temp = new ArrayList<>();
CoreMap s= new ArrayCoreMap();
s.set(CoreAnnotations.TokensAnnotation.class, en.getValue().getTokens());
temp.add(s);
Annotation doc = new Annotation(temp);
try {
pipeline.annotate(doc);
if (useTargetParserParentRestriction)
inferParentParseTag(s.get(TreeAnnotation.class));
} catch (Exception e) {
log.warn("Ignoring error: for sentence " + StringUtils.joinWords(en.getValue().getTokens(), " "));
代码示例来源:origin: stanfordnlp/CoreNLP
boolean debug = Boolean.parseBoolean(props.getProperty("debug", "false"));
String filepath = props.getProperty("i");
String outfile = props.getProperty("o");
if (filepath == null || outfile == null) {
usage();
System.exit(-1);
PrintWriter fout = new PrintWriter(outfile);
logger.info("Writing to " + outfile);
String ext = props.getProperty("ext");
docCnt++;
Annotation anno = doc.getAnnotation();
if (debug) System.out.println("Document " + docCnt + ": " + anno.get(CoreAnnotations.DocIDAnnotation.class));
for (CoreMap sentence:anno.get(CoreAnnotations.SentencesAnnotation.class)) {
if (debug) System.out.println("Parse: " + sentence.get(TreeCoreAnnotations.TreeAnnotation.class));
if (debug) System.out.println("Sentence Tokens: " + StringUtils.join(sentence.get(CoreAnnotations.TokensAnnotation.class), ","));
writeTabSep(fout,sentence,doc.corefChainMap);
sentCnt++;
tokenCnt += sentence.get(CoreAnnotations.TokensAnnotation.class).size();
fout.close();
System.out.println("Total document count: " + docCnt);
System.out.println("Total sentence count: " + sentCnt);
代码示例来源:origin: stanfordnlp/CoreNLP
@Override
public void print(Annotation doc, OutputStream target, Options options) throws IOException {
PrintWriter writer = new PrintWriter(IOUtils.encodedOutputStreamWriter(target, options.encoding));
List<CoreMap> sentences = doc.get(CoreAnnotations.SentencesAnnotation.class);
for (CoreMap sentence : sentences) {
SemanticGraph sg = sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
if (sg != null) {
SemanticGraph enhancedSg = sentence.get(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class);
writer.print(conllUWriter.printSemanticGraph(sg, enhancedSg));
} else {
writer.print(conllUWriter.printPOSAnnotations(sentence));
}
}
writer.flush();
}
代码示例来源:origin: stanfordnlp/CoreNLP
if(compress) os = new GZIPOutputStream(os);
PrintWriter pw = new PrintWriter(os);
Map<Integer, CorefChain> chains = corpus.get(CorefCoreAnnotations.CorefChainAnnotation.class);
saveCorefChains(chains, pw);
List<Pair<IntTuple, IntTuple>> corefGraph = corpus.get(CorefCoreAnnotations.CorefGraphAnnotation.class);
if(corefGraph != null){
boolean first = true;
for(Pair<IntTuple, IntTuple> arc: corefGraph){
if(! first) pw.print(" ");
pw.printf("%d %d %d %d", arc.first.get(0), arc.first.get(1), arc.second.get(0), arc.second.get(1));
first = false;
List<CoreMap> sentences = corpus.get(CoreAnnotations.SentencesAnnotation.class);
for(CoreMap sent: sentences){
Tree tree = sent.get(TreeCoreAnnotations.TreeAnnotation.class);
if(tree != null){
String treeString = tree.toString();
else pw.println();
代码示例来源:origin: stanfordnlp/CoreNLP
PrintWriter out;
if (args.length > 2) {
out = new PrintWriter(args[2]);
} else {
out = new PrintWriter(System.out);
StanfordCoreNLP pipeline = new StanfordCoreNLP(
PropertiesUtils.asProperties("annotators", "tokenize,ssplit,pos,lemma,ner"));
Annotation annotation = new Annotation(IOUtils.slurpFileNoExceptions(args[1]));
pipeline.annotate(annotation);
List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
out.println("Sentence #" + ++i);
out.print(" Tokens:");
for (CoreLabel token : tokens) {
out.print(' ');
out.print(token.toShortString("Text", "PartOfSpeech", "NamedEntityTag"));
out.println();
代码示例来源:origin: stanfordnlp/CoreNLP
public static void testPP(String familyFile, String animateFile, String genderFile,
String charactersFile, String modelFile) throws IOException, ClassNotFoundException {
Properties props = new Properties();
props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, depparse, quote, quoteattribution");
props.setProperty("quoteattribution.familyWordsFile", familyFile);
props.setProperty("quoteattribution.animacyWordsFile", animateFile);
props.setProperty("quoteattribution.genderNamesFile", genderFile);
props.setProperty("quoteattribution.charactersPath", charactersFile);
props.setProperty("quoteattribution.modelPath", modelFile);
StanfordCoreNLP coreNLP = new StanfordCoreNLP(props);
Annotation processedAnnotation = coreNLP.process(test);
List<CoreMap> quotes = processedAnnotation.get(CoreAnnotations.QuotationsAnnotation.class);
for(CoreMap quote : quotes) {
System.out.println("Quote: " + quote.get(CoreAnnotations.TextAnnotation.class));
if(quote.get(QuoteAttributionAnnotator.MentionAnnotation.class) != null) {
System.out.println("Predicted Mention: " + quote.get(QuoteAttributionAnnotator.MentionAnnotation.class) + " Predictor: " + quote.get(QuoteAttributionAnnotator.MentionSieveAnnotation.class));
} else {
System.out.println("Predicted Mention: none");
}
if(quote.get(QuoteAttributionAnnotator.SpeakerAnnotation.class) != null) {
System.out.println("Predicted Speaker: " + quote.get(QuoteAttributionAnnotator.SpeakerAnnotation.class) + " Predictor: " + quote.get(QuoteAttributionAnnotator.SpeakerSieveAnnotation.class));
} else {
System.out.println("Predicted Speaker: none");
}
System.out.println("====");
}
System.out.println("Finished");
}
代码示例来源:origin: stanfordnlp/CoreNLP
private static void modifyUsingCoreNLPNER(Annotation doc) {
Properties ann = new Properties();
ann.setProperty("annotators", "pos, lemma, ner");
StanfordCoreNLP pipeline = new StanfordCoreNLP(ann, false);
pipeline.annotate(doc);
for (CoreMap sentence : doc.get(CoreAnnotations.SentencesAnnotation.class)) {
List<EntityMention> entities = sentence.get(MachineReadingAnnotations.EntityMentionsAnnotation.class);
if (entities != null) {
List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
for (EntityMention en : entities) {
//System.out.println("old ner tag for " + en.getExtentString() + " was " + en.getType());
Span s = en.getExtent();
Counter<String> allNertagforSpan = new ClassicCounter<>();
for (int i = s.start(); i < s.end(); i++) {
allNertagforSpan.incrementCount(tokens.get(i).ner());
}
String entityNertag = Counters.argmax(allNertagforSpan);
en.setType(entityNertag);
//System.out.println("new ner tag is " + entityNertag);
}
}
}
}
代码示例来源:origin: stanfordnlp/CoreNLP
public static void main(String[] args) throws IOException {
SUTimePipeline pipeline = new SUTimePipeline();
Annotator timeAnnotator = pipeline.getTimeAnnotator("sutime", new Properties());
BufferedReader is = new BufferedReader(new InputStreamReader(System.in));
System.out.print("> ");
for(String line; (line = is.readLine()) != null; ){
Annotation ann = pipeline.process(line, null, timeAnnotator);
System.out.println(ann.get(TimeAnnotations.TimexAnnotations.class));
System.out.print("> ");
}
}
代码示例来源:origin: stanfordnlp/CoreNLP
@Override
public void print(Annotation doc, OutputStream target, Options options) throws IOException {
PrintWriter writer = new PrintWriter(IOUtils.encodedOutputStreamWriter(target, options.encoding));
JSONWriter l0 = new JSONWriter(writer, options);
if (doc.get(CoreAnnotations.SentencesAnnotation.class) != null) {
doc.get(CoreAnnotations.SentencesAnnotation.class).stream().forEach(sentence -> {
l0.object(l1 -> {
l1.set("id", sentence.get(CoreAnnotations.SentenceIndexAnnotation.class) + 1);
SemanticGraph sg;
if (OUTPUT_REPRESENTATION.equalsIgnoreCase("basic")) {
sg = sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
} else if (OUTPUT_REPRESENTATION.equalsIgnoreCase("enhanced")) {
sg = sentence.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
} else {
sg = sentence.get(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class);
}
l1.set("nodes", getNodes(sg));
});
l0.writer.append("\n");
l0.writer.flush();
});
}
}
代码示例来源:origin: stanfordnlp/CoreNLP
public static void runTest(String test, String num) {
System.out.println("Testing: " + test + " : num newline breaks: " + num);
Annotation ann = new Annotation(test);
Properties props = new Properties();
props.setProperty("annotators", "tokenize,ssplit");
StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
pipeline.annotate(ann);
Properties propsPara = new Properties();
propsPara.setProperty("paragraphBreak", num);
ParagraphAnnotator para = new ParagraphAnnotator(propsPara, true);
para.annotate(ann);
for (CoreMap sent : ann.get(CoreAnnotations.SentencesAnnotation.class)) {
System.out.println(sent);
System.out.println(sent.get(CoreAnnotations.ParagraphIndexAnnotation.class));
}
}
}
代码示例来源:origin: stanfordnlp/CoreNLP
public static void main(String[] args) throws Exception {
Properties props = StringUtils.argsToProperties(args);
StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
String file = props.getProperty("file");
String loadFile = props.getProperty("loadFile");
if (loadFile != null && ! loadFile.isEmpty()) {
CustomAnnotationSerializer ser = new CustomAnnotationSerializer(false, false);
InputStream is = new FileInputStream(loadFile);
Pair<Annotation, InputStream> pair = ser.read(is);
pair.second.close();
Annotation anno = pair.first;
System.out.println(anno.toShorterString(StringUtils.EMPTY_STRING_ARRAY));
is.close();
} else if (file != null && ! file.equals("")) {
String text = edu.stanford.nlp.io.IOUtils.slurpFile(file);
Annotation doc = new Annotation(text);
pipeline.annotate(doc);
CustomAnnotationSerializer ser = new CustomAnnotationSerializer(false, false);
PrintStream os = new PrintStream(new FileOutputStream(file + ".ser"));
ser.write(doc, os).close();
log.info("Serialized annotation saved in " + file + ".ser");
} else {
log.info("usage: CustomAnnotationSerializer [-file file] [-loadFile file]");
}
}
代码示例来源:origin: stanfordnlp/CoreNLP
if ( ! doc.containsKey(CoreAnnotations.SentencesAnnotation.class)) {
StanfordCoreNLP pipeline = mkStanfordCoreNLP(props);
pipeline.annotate(doc);
docWriter.set("sentences", doc.get(CoreAnnotations.SentencesAnnotation.class).stream().map(sentence -> (Consumer<JSONOutputter.Writer>) (JSONOutputter.Writer sentWriter) -> {
Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
while (matcher.find()) {
sentWriter.set(Integer.toString(i++), (Consumer<JSONOutputter.Writer>) (JSONOutputter.Writer matchWriter) -> {
matchWriter.set("match", matcher.getMatch().pennString());
matchWriter.set("namedNodes", matcher.getNodeNames().stream().map(nodeName -> (Consumer<JSONOutputter.Writer>) (JSONOutputter.Writer namedNodeWriter) ->
namedNodeWriter.set(nodeName, matcher.getNode(nodeName).pennString())
));
});
byte[] content = pair.first.getBytes();
sendAndGetResponse(httpExchange, content);
if (completedAnnotation != null && ! StringUtils.isNullOrEmpty(props.getProperty("annotators"))) {
callback.accept(new FinishedRequest(props, completedAnnotation, params.get("pattern"), null));
内容来源于网络,如有侵权,请联系作者删除!