zemberek.morphology.analysis.WordAnalysis类的使用及代码示例

x33g5p2x  于2022-02-03 转载在 其他  
字(9.4k)|赞(0)|评价(0)|浏览(126)

本文整理了Java中zemberek.morphology.analysis.WordAnalysis类的一些代码示例,展示了WordAnalysis类的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。WordAnalysis类的具体详情如下:
包路径:zemberek.morphology.analysis.WordAnalysis
类名称:WordAnalysis

WordAnalysis介绍

暂无

代码示例

代码示例来源:origin: ahmetaa/zemberek-nlp

if (!w.getInput().equals(s.word)) {
 Log.warn(
   "Actual analysis token [%s] at index [%d] is different than word from training [%s] "
     + " for sentence [%s]", w.getInput(), i, s.word, sentence);
if (w.analysisCount() != s.wordAnalysis.size()) {
 Log.warn(
   "Actual analysis token [%s] has [%d] analyses but word from training file has [%d] "
     + " analyses for sentence [%s]",
   w.getInput(), w.analysisCount(), s.wordAnalysis.size(), sentence);
 break;

代码示例来源:origin: ahmetaa/zemberek-nlp

private String generateKeyFromParse(WordAnalysis results) {
 StringBuilder key = new StringBuilder();
 for (int i = 0; i < results.analysisCount(); i++) {
  key.append(results.getAnalysisResults().get(i).formatMorphemesLexical());
  if (i < results.analysisCount() - 1) {
   key.append("\n");
  }
 }
 return key.toString();
}

代码示例来源:origin: ahmetaa/zemberek-nlp

WordAnalysisProto toWordAnalysisProto(WordAnalysis analysis) {
 return WordAnalysisProto.newBuilder()
   .addAllAnalyses(analysis
     .stream()
     .map(this::toSingleAnalysisProto)
     .collect(Collectors.toList()))
   .build();
}

代码示例来源:origin: ahmetaa/zemberek-nlp

AmbiguityAnalysis(WordAnalysis wordAnalysis) {
 this.token = wordAnalysis.getInput();
 for (SingleAnalysis analysis : wordAnalysis) {
  choices.add(new AnalysisDecision(token, analysis, Decision.UNDECIDED));
 }
}

代码示例来源:origin: ahmetaa/zemberek-nlp

Predicate<WordAnalysis> maxAnalysisCount(int i) {
 return p -> p.analysisCount() <= i;
}

代码示例来源:origin: ahmetaa/zemberek-nlp

List<SingleAnalysis> analyses = analysisData.getAnalysisResults();
if (analyses.size() == 0) {
 analyses = new ArrayList<>(1);
 analyses.add(SingleAnalysis.unknown(analysisData.getInput()));

代码示例来源:origin: ahmetaa/zemberek-nlp

String s = swaList.get(i).getWordAnalysis().getInput();
List<String> bigramContext = Lists.newArrayList(
  normalize(swaList.get(i - 1).getWordAnalysis().getInput()),
  normalize(swaList.get(i - 2).getWordAnalysis().getInput()),
  normalize(swaList.get(i + 1).getWordAnalysis().getInput()),
  normalize(swaList.get(i + 2).getWordAnalysis().getInput()));
  normalize(swaList.get(i - 1).getWordAnalysis().getInput()),
  normalize(swaList.get(i + 1).getWordAnalysis().getInput()));
Set<String> stems = wordResults.stream()
  .map(a -> normalize(a.getDictionaryItem().lemma))
  .collect(Collectors.toSet());
Log.info("%n%s : ", a.getWordAnalysis().getInput());
LinkedHashSet<String> items = new LinkedHashSet<>();
for (SingleAnalysis wa : a.getWordAnalysis()) {

代码示例来源:origin: ahmetaa/zemberek-nlp

if ((analyses.analysisCount() == 0) && current.length() > 3) {
if (candidates.isEmpty() || morphology.analyze(current).isCorrect()) {
 candidates.add(current);

代码示例来源:origin: iorixxx/lucene-solr-analysis-turkish

private static List<SingleAnalysis> selectMorphemes(WordAnalysis results, String strategy) {
  // if 0 or 1
  if (results.analysisCount() < 2) return results.stream().collect(Collectors.toList());
  switch (strategy) {
    case "all":
      return results.stream().collect(Collectors.toList());
    case "maxMorpheme":
      final int max = results.stream().map(morphParse -> morphParse.getMorphemeDataList().size()).max(Comparator.naturalOrder()).get();
      return results.stream().filter(parse -> parse.getMorphemeDataList().size() == max).collect(Collectors.toList());
    case "minMorpheme":
      final int min = results.stream().map(morphParse -> morphParse.getMorphemeDataList().size()).min(Comparator.naturalOrder()).get();
      return results.stream().filter(parse -> parse.getMorphemeDataList().size() == min).collect(Collectors.toList());
    default:
      throw new RuntimeException("unknown strategy " + strategy);
  }
}

代码示例来源:origin: ahmetaa/zemberek-nlp

if (!a.isCorrect()) {
 break;
 List<String> z = a.getAnalysisResults().stream()
   .map(DataConverter::convert)
   .collect(Collectors.toList());

代码示例来源:origin: cbilgili/zemberek-nlp-server

public void initializeController() {
    post("/find_pos", (req, res) -> {
      String sentence = req.queryParams("sentence");
      SentenceAnalysis analysis = morphology.analyzeAndDisambiguate(sentence);
      List<POSResult> results = new ArrayList<>();
      for (SentenceWordAnalysis entry : analysis.getWordAnalyses()) {
        POSResult item = new POSResult();
        SingleAnalysis bestAnalysis = entry.getBestAnalysis();
        item.analysis = bestAnalysis.formatLexical();
        item.normalizedInput = entry.wordAnalysis.getNormalizedInput();
        item.pos = bestAnalysis.getPos().shortForm;
        item.input = entry.wordAnalysis.getInput();
        item.morphemesLexical = bestAnalysis.formatMorphemesLexical();
        results.add(item);
      }
      return jsonConverter.toJson(results);
    });
  }
}

代码示例来源:origin: ahmetaa/zemberek-nlp

private static void filterVocab(Path vocabFile, Path outFile) throws IOException {
 List<String> words = Files.readAllLines(vocabFile, StandardCharsets.UTF_8);
 TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
 List<String> result = new ArrayList<>();
 for (String word : words) {
  WordAnalysis analysis = morphology.analyze(word);
  if (!analysis.isCorrect()) {
   Log.warn("Cannot analyze %s", word);
   continue;
  }
  result.add(word);
 }
 Files.write(outFile, result, StandardCharsets.UTF_8);
}

代码示例来源:origin: ahmetaa/zemberek-nlp

private boolean addLemmas() {
 String word = termAttribute.toString();
 WordAnalysis analysis = morphology.analyze(word);
 Set<String> l = new HashSet<>(5);
 //l.add(word);
 analysis.forEach(s -> l.addAll(s.getLemmas()));
 lemmas = new ArrayDeque<>(l);
 return true;
}

代码示例来源:origin: ahmetaa/zemberek-nlp

private String replaceWordsWithLemma(String sentence) {
 SentenceAnalysis analysis = morphology.analyzeAndDisambiguate(sentence);
 List<String> res = new ArrayList<>();
 for (SentenceWordAnalysis e : analysis) {
  SingleAnalysis best = e.getBestAnalysis();
  if (best.isUnknown()) {
   res.add(e.getWordAnalysis().getInput());
   continue;
  }
  List<String> lemmas = best.getLemmas();
  res.add(lemmas.get(lemmas.size() - 1));
 }
 return String.join(" ", res);
}

代码示例来源:origin: ahmetaa/zemberek-nlp

private void printResults(WordAnalysis results) {
  int i = 1;
  if (results.analysisCount() == 0) {
   Log.info("No Analysis.");
  }
  for (SingleAnalysis result : results) {
   String str = result.formatLong();
   if (result.getDictionaryItem().attributes.contains(RootAttribute.Runtime)) {
    str = str + " (Generated by UnidentifiedTokenParser)";
   }
   Log.info(i + " - " + str);
   i++;
  }
 }
}

代码示例来源:origin: cbilgili/zemberek-nlp-server

List<AnalyzeWordItem> analyze_list = new ArrayList<>();
SentenceItem sentence_item = new SentenceItem();
sentence_item.input = wordAnalysis.getWordAnalysis().getInput();
for (SingleAnalysis singleAnalysis : wordAnalysis.getWordAnalysis()) {
  analyze_list.add(AnalyzeWordItem.fromSingleAnalysis(singleAnalysis));
List<AnalyzeWordItem> analyze_list = new ArrayList<>();
SentenceItem sentence_item = new SentenceItem();
sentence_item.input = wordAnalysis.getInput();
for (SingleAnalysis singleAnalysis : wordAnalysis.getAnalysisResults()) {
  analyze_list.add(AnalyzeWordItem.fromSingleAnalysis(singleAnalysis));
List<AnalyzeWordItem> analyze_list = new ArrayList<>();
SentenceItem sentence_item = new SentenceItem();
sentence_item.input = wordAnalysis.getInput();
SingleAnalysis singleAnalysis = wordAnalysis.getAnalysisResults().get(0);
analyze_list.add(AnalyzeWordItem.fromSingleAnalysis(singleAnalysis));
sentence_item.results = analyze_list;

代码示例来源:origin: ahmetaa/zemberek-nlp

private static boolean isCorrectAndContainsNoProper(WordAnalysis analysis) {
 if (!analysis.isCorrect()) {
  return false;
 }
 for (SingleAnalysis s : analysis) {
  if (s.getDictionaryItem().secondaryPos != SecondaryPos.ProperNoun &&
    s.getDictionaryItem().secondaryPos != SecondaryPos.Abbreviation) {
   return true;
  }
 }
 return false;
}

代码示例来源:origin: ahmetaa/zemberek-nlp

public static void main(String[] args) throws IOException {
 TurkishMorphology morphology = TurkishMorphology.builder()
   .ignoreDiacriticsInAnalysis()
   .setLexicon(RootLexicon.getDefault())
   .build();
 morphology.analyze("kisi").forEach(System.out::println);
}

代码示例来源:origin: ahmetaa/zemberek-nlp

public static void saveUnambiguous(
  List<SentenceAnalysis> sentences,
  Path out)
  throws IOException {
 try (PrintWriter pwMorph = new PrintWriter(out.toFile(), "utf-8")) {
  for (SentenceAnalysis analysis : sentences) {
   if (analysis.bestAnalysis().stream().anyMatch(SingleAnalysis::isUnknown)) {
    continue;
   }
   pwMorph.format("S:%s%n", analysis.getSentence());
   for (SentenceWordAnalysis sw : analysis) {
    WordAnalysis wa = sw.getWordAnalysis();
    pwMorph.println(wa.getInput());
    SingleAnalysis best = sw.getBestAnalysis();
    for (SingleAnalysis singleAnalysis : wa) {
     boolean isBest = singleAnalysis.equals(best);
     if (wa.analysisCount() == 1) {
      pwMorph.println(singleAnalysis.formatLong());
     } else {
      pwMorph.format("%s%s%n", singleAnalysis.formatLong(), isBest ? "*" : "");
     }
    }
   }
   pwMorph.println();
  }
 }
}

代码示例来源:origin: ahmetaa/zemberek-nlp

private String splitWords(String sentence) {
 List<String> tokens = Splitter.on(" ").splitToList(sentence);
 // assume first is label. Remove label from sentence for morphological analysis.
 String label = tokens.get(0);
 tokens = tokens.subList(1, tokens.size());
 sentence = String.join(" ", tokens);
 if (sentence.length() == 0) {
  return sentence;
 }
 SentenceAnalysis analysis = morphology.analyzeAndDisambiguate(sentence);
 List<String> res = new ArrayList<>();
 // add label first.
 res.add(label);
 for (SentenceWordAnalysis e : analysis) {
  SingleAnalysis best = e.getBestAnalysis();
  if (best.isUnknown()) {
   res.add(e.getWordAnalysis().getInput());
   continue;
  }
  List<String> lemmas = best.getLemmas();
  res.add(lemmas.get(lemmas.size() - 1));
 }
 return String.join(" ", res);
}

相关文章