本文整理了Java中zemberek.morphology.analysis.WordAnalysis
类的一些代码示例,展示了WordAnalysis
类的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。WordAnalysis
类的具体详情如下:
包路径:zemberek.morphology.analysis.WordAnalysis
类名称:WordAnalysis
暂无
代码示例来源:origin: ahmetaa/zemberek-nlp
if (!w.getInput().equals(s.word)) {
Log.warn(
"Actual analysis token [%s] at index [%d] is different than word from training [%s] "
+ " for sentence [%s]", w.getInput(), i, s.word, sentence);
if (w.analysisCount() != s.wordAnalysis.size()) {
Log.warn(
"Actual analysis token [%s] has [%d] analyses but word from training file has [%d] "
+ " analyses for sentence [%s]",
w.getInput(), w.analysisCount(), s.wordAnalysis.size(), sentence);
break;
代码示例来源:origin: ahmetaa/zemberek-nlp
private String generateKeyFromParse(WordAnalysis results) {
StringBuilder key = new StringBuilder();
for (int i = 0; i < results.analysisCount(); i++) {
key.append(results.getAnalysisResults().get(i).formatMorphemesLexical());
if (i < results.analysisCount() - 1) {
key.append("\n");
}
}
return key.toString();
}
代码示例来源:origin: ahmetaa/zemberek-nlp
WordAnalysisProto toWordAnalysisProto(WordAnalysis analysis) {
return WordAnalysisProto.newBuilder()
.addAllAnalyses(analysis
.stream()
.map(this::toSingleAnalysisProto)
.collect(Collectors.toList()))
.build();
}
代码示例来源:origin: ahmetaa/zemberek-nlp
AmbiguityAnalysis(WordAnalysis wordAnalysis) {
this.token = wordAnalysis.getInput();
for (SingleAnalysis analysis : wordAnalysis) {
choices.add(new AnalysisDecision(token, analysis, Decision.UNDECIDED));
}
}
代码示例来源:origin: ahmetaa/zemberek-nlp
Predicate<WordAnalysis> maxAnalysisCount(int i) {
return p -> p.analysisCount() <= i;
}
代码示例来源:origin: ahmetaa/zemberek-nlp
List<SingleAnalysis> analyses = analysisData.getAnalysisResults();
if (analyses.size() == 0) {
analyses = new ArrayList<>(1);
analyses.add(SingleAnalysis.unknown(analysisData.getInput()));
代码示例来源:origin: ahmetaa/zemberek-nlp
String s = swaList.get(i).getWordAnalysis().getInput();
List<String> bigramContext = Lists.newArrayList(
normalize(swaList.get(i - 1).getWordAnalysis().getInput()),
normalize(swaList.get(i - 2).getWordAnalysis().getInput()),
normalize(swaList.get(i + 1).getWordAnalysis().getInput()),
normalize(swaList.get(i + 2).getWordAnalysis().getInput()));
normalize(swaList.get(i - 1).getWordAnalysis().getInput()),
normalize(swaList.get(i + 1).getWordAnalysis().getInput()));
Set<String> stems = wordResults.stream()
.map(a -> normalize(a.getDictionaryItem().lemma))
.collect(Collectors.toSet());
Log.info("%n%s : ", a.getWordAnalysis().getInput());
LinkedHashSet<String> items = new LinkedHashSet<>();
for (SingleAnalysis wa : a.getWordAnalysis()) {
代码示例来源:origin: ahmetaa/zemberek-nlp
if ((analyses.analysisCount() == 0) && current.length() > 3) {
if (candidates.isEmpty() || morphology.analyze(current).isCorrect()) {
candidates.add(current);
代码示例来源:origin: iorixxx/lucene-solr-analysis-turkish
private static List<SingleAnalysis> selectMorphemes(WordAnalysis results, String strategy) {
// if 0 or 1
if (results.analysisCount() < 2) return results.stream().collect(Collectors.toList());
switch (strategy) {
case "all":
return results.stream().collect(Collectors.toList());
case "maxMorpheme":
final int max = results.stream().map(morphParse -> morphParse.getMorphemeDataList().size()).max(Comparator.naturalOrder()).get();
return results.stream().filter(parse -> parse.getMorphemeDataList().size() == max).collect(Collectors.toList());
case "minMorpheme":
final int min = results.stream().map(morphParse -> morphParse.getMorphemeDataList().size()).min(Comparator.naturalOrder()).get();
return results.stream().filter(parse -> parse.getMorphemeDataList().size() == min).collect(Collectors.toList());
default:
throw new RuntimeException("unknown strategy " + strategy);
}
}
代码示例来源:origin: ahmetaa/zemberek-nlp
if (!a.isCorrect()) {
break;
List<String> z = a.getAnalysisResults().stream()
.map(DataConverter::convert)
.collect(Collectors.toList());
代码示例来源:origin: cbilgili/zemberek-nlp-server
public void initializeController() {
post("/find_pos", (req, res) -> {
String sentence = req.queryParams("sentence");
SentenceAnalysis analysis = morphology.analyzeAndDisambiguate(sentence);
List<POSResult> results = new ArrayList<>();
for (SentenceWordAnalysis entry : analysis.getWordAnalyses()) {
POSResult item = new POSResult();
SingleAnalysis bestAnalysis = entry.getBestAnalysis();
item.analysis = bestAnalysis.formatLexical();
item.normalizedInput = entry.wordAnalysis.getNormalizedInput();
item.pos = bestAnalysis.getPos().shortForm;
item.input = entry.wordAnalysis.getInput();
item.morphemesLexical = bestAnalysis.formatMorphemesLexical();
results.add(item);
}
return jsonConverter.toJson(results);
});
}
}
代码示例来源:origin: ahmetaa/zemberek-nlp
private static void filterVocab(Path vocabFile, Path outFile) throws IOException {
List<String> words = Files.readAllLines(vocabFile, StandardCharsets.UTF_8);
TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
List<String> result = new ArrayList<>();
for (String word : words) {
WordAnalysis analysis = morphology.analyze(word);
if (!analysis.isCorrect()) {
Log.warn("Cannot analyze %s", word);
continue;
}
result.add(word);
}
Files.write(outFile, result, StandardCharsets.UTF_8);
}
代码示例来源:origin: ahmetaa/zemberek-nlp
private boolean addLemmas() {
String word = termAttribute.toString();
WordAnalysis analysis = morphology.analyze(word);
Set<String> l = new HashSet<>(5);
//l.add(word);
analysis.forEach(s -> l.addAll(s.getLemmas()));
lemmas = new ArrayDeque<>(l);
return true;
}
代码示例来源:origin: ahmetaa/zemberek-nlp
private String replaceWordsWithLemma(String sentence) {
SentenceAnalysis analysis = morphology.analyzeAndDisambiguate(sentence);
List<String> res = new ArrayList<>();
for (SentenceWordAnalysis e : analysis) {
SingleAnalysis best = e.getBestAnalysis();
if (best.isUnknown()) {
res.add(e.getWordAnalysis().getInput());
continue;
}
List<String> lemmas = best.getLemmas();
res.add(lemmas.get(lemmas.size() - 1));
}
return String.join(" ", res);
}
代码示例来源:origin: ahmetaa/zemberek-nlp
private void printResults(WordAnalysis results) {
int i = 1;
if (results.analysisCount() == 0) {
Log.info("No Analysis.");
}
for (SingleAnalysis result : results) {
String str = result.formatLong();
if (result.getDictionaryItem().attributes.contains(RootAttribute.Runtime)) {
str = str + " (Generated by UnidentifiedTokenParser)";
}
Log.info(i + " - " + str);
i++;
}
}
}
代码示例来源:origin: cbilgili/zemberek-nlp-server
List<AnalyzeWordItem> analyze_list = new ArrayList<>();
SentenceItem sentence_item = new SentenceItem();
sentence_item.input = wordAnalysis.getWordAnalysis().getInput();
for (SingleAnalysis singleAnalysis : wordAnalysis.getWordAnalysis()) {
analyze_list.add(AnalyzeWordItem.fromSingleAnalysis(singleAnalysis));
List<AnalyzeWordItem> analyze_list = new ArrayList<>();
SentenceItem sentence_item = new SentenceItem();
sentence_item.input = wordAnalysis.getInput();
for (SingleAnalysis singleAnalysis : wordAnalysis.getAnalysisResults()) {
analyze_list.add(AnalyzeWordItem.fromSingleAnalysis(singleAnalysis));
List<AnalyzeWordItem> analyze_list = new ArrayList<>();
SentenceItem sentence_item = new SentenceItem();
sentence_item.input = wordAnalysis.getInput();
SingleAnalysis singleAnalysis = wordAnalysis.getAnalysisResults().get(0);
analyze_list.add(AnalyzeWordItem.fromSingleAnalysis(singleAnalysis));
sentence_item.results = analyze_list;
代码示例来源:origin: ahmetaa/zemberek-nlp
private static boolean isCorrectAndContainsNoProper(WordAnalysis analysis) {
if (!analysis.isCorrect()) {
return false;
}
for (SingleAnalysis s : analysis) {
if (s.getDictionaryItem().secondaryPos != SecondaryPos.ProperNoun &&
s.getDictionaryItem().secondaryPos != SecondaryPos.Abbreviation) {
return true;
}
}
return false;
}
代码示例来源:origin: ahmetaa/zemberek-nlp
public static void main(String[] args) throws IOException {
TurkishMorphology morphology = TurkishMorphology.builder()
.ignoreDiacriticsInAnalysis()
.setLexicon(RootLexicon.getDefault())
.build();
morphology.analyze("kisi").forEach(System.out::println);
}
代码示例来源:origin: ahmetaa/zemberek-nlp
public static void saveUnambiguous(
List<SentenceAnalysis> sentences,
Path out)
throws IOException {
try (PrintWriter pwMorph = new PrintWriter(out.toFile(), "utf-8")) {
for (SentenceAnalysis analysis : sentences) {
if (analysis.bestAnalysis().stream().anyMatch(SingleAnalysis::isUnknown)) {
continue;
}
pwMorph.format("S:%s%n", analysis.getSentence());
for (SentenceWordAnalysis sw : analysis) {
WordAnalysis wa = sw.getWordAnalysis();
pwMorph.println(wa.getInput());
SingleAnalysis best = sw.getBestAnalysis();
for (SingleAnalysis singleAnalysis : wa) {
boolean isBest = singleAnalysis.equals(best);
if (wa.analysisCount() == 1) {
pwMorph.println(singleAnalysis.formatLong());
} else {
pwMorph.format("%s%s%n", singleAnalysis.formatLong(), isBest ? "*" : "");
}
}
}
pwMorph.println();
}
}
}
代码示例来源:origin: ahmetaa/zemberek-nlp
private String splitWords(String sentence) {
List<String> tokens = Splitter.on(" ").splitToList(sentence);
// assume first is label. Remove label from sentence for morphological analysis.
String label = tokens.get(0);
tokens = tokens.subList(1, tokens.size());
sentence = String.join(" ", tokens);
if (sentence.length() == 0) {
return sentence;
}
SentenceAnalysis analysis = morphology.analyzeAndDisambiguate(sentence);
List<String> res = new ArrayList<>();
// add label first.
res.add(label);
for (SentenceWordAnalysis e : analysis) {
SingleAnalysis best = e.getBestAnalysis();
if (best.isUnknown()) {
res.add(e.getWordAnalysis().getInput());
continue;
}
List<String> lemmas = best.getLemmas();
res.add(lemmas.get(lemmas.size() - 1));
}
return String.join(" ", res);
}
内容来源于网络,如有侵权,请联系作者删除!