de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence.getCoveredText()方法的使用及代码示例

x33g5p2x  于2022-01-30 转载在 其他  
字(12.4k)|赞(0)|评价(0)|浏览(107)

本文整理了Java中de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence.getCoveredText()方法的一些代码示例,展示了Sentence.getCoveredText()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Sentence.getCoveredText()方法的具体详情如下:
包路径:de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence
类名称:Sentence
方法名:getCoveredText

Sentence.getCoveredText介绍

暂无

代码示例

代码示例来源:origin: UKPLab/argument-reasoning-comprehension-task

@Override
  public boolean keepArgument(JCas jCas)
  {
    List<Sentence> sentences = new ArrayList<>(JCasUtil.select(jCas, Sentence.class));

    // remove one-sentence arguments
    if (sentences.size() == 1) {
      return false;
    }

    for (Sentence s : sentences) {
      if (s.getCoveredText().length() > MAX_SENTENCE_LENGTH) {
        return false;
      }
    }

    return true;
  }
}

代码示例来源:origin: hltfbk/Excitement-Open-Platform

/**
 * <B>NOTE:</B> Must be called only after one of the conversion methods was called.
 * @return a mapping between a root, and the text of the tree's sentence. This is an ordered map,
 * ordered by the order of sentences in the text.
 * @throws CasTreeConverterException
 */
public LinkedHashMap<BasicNode, String> getTreesToSentences() throws CasTreeConverterException {
  if (lastRootList == null || lastSentenceList == null) {
    throw new CasTreeConverterException("getTreesToSentences() called before a conversion method was called.");
  }
  if (lastRootList.size() != lastSentenceList.size()) {
    throw new CasTreeConverterException("Internal error - lastRootList(size=" + lastRootList.size() + ") and lastSentenceList(size=" +
                      lastSentenceList.size() + ") are in different sizes.");
  }
  
  LinkedHashMap<BasicNode, String> result = new LinkedHashMap<BasicNode, String>(lastRootList.size());
  Iterator<Sentence> iterSentences = lastSentenceList.iterator();
  for (BasicNode node : lastRootList) {
    Sentence sentence = iterSentences.next();
    result.put(node, sentence.getCoveredText());
  }
  
  return result;
}

代码示例来源:origin: UKPLab/argument-reasoning-comprehension-task

public static List<HITSentence> extractSentences(StandaloneArgument argument)
    throws IOException
{
  // extract sentences
  List<HITSentence> result = new ArrayList<>();
  ArrayList<Sentence> sentences = new ArrayList<>(
      JCasUtil.select(argument.getJCas(), Sentence.class));
  for (int i = 0; i < sentences.size(); i++) {
    Sentence sentence = sentences.get(i);
    HITSentence s = new HITSentence();
    // position
    s.position = i;
    // create unique id by combining argument id and sentence position
    s.sentenceId = StandaloneArgument.getSentenceID(argument, s.position);
    s.text = sentence.getCoveredText();
    result.add(s);
  }
  return result;
}

代码示例来源:origin: UKPLab/argument-reasoning-comprehension-task

private Collection<? extends HITSentence> extractSentencesForReasons(
    StandaloneArgument argument)
    throws IOException
{
  // extract sentences
  List<HITSentenceReason> result = new ArrayList<>();
  ArrayList<Sentence> sentences = new ArrayList<>(
      JCasUtil.select(argument.getJCas(), Sentence.class));
  for (int i = 0; i < sentences.size(); i++) {
    Sentence sentence = sentences.get(i);
    HITSentenceReason s = new HITSentenceReason();
    // position
    s.position = i;
    // create unique id by combining argument id and sentence position
    s.sentenceId = StandaloneArgument.getSentenceID(argument, s.position);
    s.text = sentence.getCoveredText();
    // find out whether this sentence is already covered by a claim
    List<Claim> coveringClaims = JCasUtil.selectCovering(Claim.class, sentence);
    s.disabled = !coveringClaims.isEmpty();
    // there can't be any claims at the moment!
    if (s.disabled) {
      throw new IllegalStateException("No claim annotations are allowed at this point");
    }
    result.add(s);
  }
  return result;
}

代码示例来源:origin: de.tudarmstadt.ukp.clarin.webanno/webanno-io-tsv

private void setTokenSentenceAddress(JCas aJCas)
{
  int sentNMumber = 1;
  for (Sentence sentence : select(aJCas, Sentence.class)) {
    int lineNumber = 1;
    for (Token token : selectCovered(Token.class, sentence)) {
      AnnotationUnit unit = new AnnotationUnit(token.getBegin(), token.getEnd(), false,
          token.getCoveredText());
      units.add(unit);
      if (lineNumber == 1) {
        sentenceUnits.put(unit, sentence.getCoveredText());
      }
      unitsLineNumber.put(unit, sentNMumber + "-" + lineNumber);
      lineNumber++;
    }
    sentNMumber++;
  }
}

代码示例来源:origin: webanno/webanno

private void setTokenSentenceAddress(JCas aJCas)
{
  int sentNMumber = 1;
  for (Sentence sentence : select(aJCas, Sentence.class)) {
    int lineNumber = 1;
    for (Token token : selectCovered(Token.class, sentence)) {
      AnnotationUnit unit = new AnnotationUnit(token.getBegin(), token.getEnd(), false,
          token.getCoveredText());
      units.add(unit);
      if (lineNumber == 1) {
        sentenceUnits.put(unit, sentence.getCoveredText());
      }
      unitsLineNumber.put(unit, sentNMumber + "-" + lineNumber);
      lineNumber++;
    }
    sentNMumber++;
  }
}

代码示例来源:origin: de.tudarmstadt.ukp.clarin.webanno/webanno-tsv

private void setTokenSentenceAddress(JCas aJCas) {
  int sentNMumber = 1;
  for (Sentence sentence : select(aJCas, Sentence.class)) {
    int lineNumber = 1;
    for (Token token : selectCovered(Token.class, sentence)) {
      AnnotationUnit unit = new AnnotationUnit(token.getBegin(), token.getEnd(), false,
          token.getCoveredText());
      units.add(unit);
      if (lineNumber == 1) {
        sentenceUnits.put(unit, sentence.getCoveredText());
      }
      unitsLineNumber.put(unit, sentNMumber + "-" + lineNumber);
      lineNumber++;
    }
    sentNMumber++;
  }
}

代码示例来源:origin: webanno/webanno

public void write(PrintWriter aOut, TsvSentence aSentence)
{
  String[] lines = splitPreserveAllTokens(aSentence.getUimaSentence().getCoveredText(),
      LINE_BREAK);
  for (String line : lines) {
    aOut.print(PREFIX_TEXT);
    aOut.print(escapeText(line));
    aOut.print(LINE_BREAK);
  }
  
  for (TsvToken token : aSentence.getTokens()) {
    write(aOut, token);
    aOut.write(LINE_BREAK);
    for (TsvSubToken subToken : token.getSubTokens()) {
      write(aOut, subToken);
      aOut.write(LINE_BREAK);
    }
  }
}

代码示例来源:origin: de.tudarmstadt.ukp.clarin.webanno/webanno-io-tsv

public void write(PrintWriter aOut, TsvSentence aSentence)
{
  String[] lines = splitPreserveAllTokens(aSentence.getUimaSentence().getCoveredText(),
      LINE_BREAK);
  for (String line : lines) {
    aOut.print(PREFIX_TEXT);
    aOut.print(escapeText(line));
    aOut.print(LINE_BREAK);
  }
  
  for (TsvToken token : aSentence.getTokens()) {
    write(aOut, token);
    aOut.write(LINE_BREAK);
    for (TsvSubToken subToken : token.getSubTokens()) {
      write(aOut, subToken);
      aOut.write(LINE_BREAK);
    }
  }
}

代码示例来源:origin: hltfbk/Excitement-Open-Platform

innerTool.setSentence(sentenceAnno.getCoveredText());
  innerTool.tokenize();
  tokenStrings = innerTool.getTokenizedSentence();
SortedMap<Integer, DockedToken> dockedTokens = DockedTokenFinder.find(sentenceAnno.getCoveredText(), tokenStrings, false, true);

代码示例来源:origin: de.tudarmstadt.ukp.clarin.webanno/webanno-api-dao

public static void tokenize(JCas aJCas)
{
  BreakIterator bi = BreakIterator.getWordInstance(Locale.US);
  for (Sentence s : select(aJCas, Sentence.class)) {
    bi.setText(s.getCoveredText());
    int last = bi.first();
    int cur = bi.next();
    while (cur != BreakIterator.DONE) {
      int[] span = new int[] { last, cur };
      trim(s.getCoveredText(), span);
      if (!isEmpty(span[0], span[1])) {
        Token seg = new Token(aJCas, span[0] + s.getBegin(), span[1] + s.getBegin());
        seg.addToIndexes(aJCas);
      }
      last = cur;
      cur = bi.next();
    }
  }
}

代码示例来源:origin: webanno/webanno

public static void tokenize(JCas aJCas)
{
  BreakIterator bi = BreakIterator.getWordInstance(Locale.US);
  for (Sentence s : select(aJCas, Sentence.class)) {
    bi.setText(s.getCoveredText());
    int last = bi.first();
    int cur = bi.next();
    while (cur != BreakIterator.DONE) {
      int[] span = new int[] { last, cur };
      trim(s.getCoveredText(), span);
      if (!isEmpty(span[0], span[1])) {
        Token seg = new Token(aJCas, span[0] + s.getBegin(), span[1] + s.getBegin());
        seg.addToIndexes(aJCas);
      }
      last = cur;
      cur = bi.next();
    }
  }
}

代码示例来源:origin: webanno/webanno

String sentenceText = sentence.getCoveredText().toLowerCase();
for (int i = -1; (i = sentenceText.indexOf(selectedText.toLowerCase(),
    i)) != -1; i = i + selectedText.length()) {

代码示例来源:origin: webanno/webanno

@Test
   @Ignore("No TEI yet to opensource ")
  public void testTeiReader()
    throws Exception
  {
    CollectionReaderDescription reader = createReaderDescription(TeiReader.class,
        TeiReader.PARAM_LANGUAGE, "en", TeiReader.PARAM_SOURCE_LOCATION,
        "classpath:/local/", TeiReader.PARAM_PATTERNS, new String[] { "[+]*.xml" });

    String firstSentence = "70 I DAG.";

    for (JCas jcas : new JCasIterable(reader)) {
      DocumentMetaData meta = DocumentMetaData.get(jcas);
      String text = jcas.getDocumentText();
      System.out.printf("%s - %d%n", meta.getDocumentId(), text.length());
      System.out.println(jcas.getDocumentLanguage());

      assertEquals(2235, JCasUtil.select(jcas, Token.class).size());
      assertEquals(745, JCasUtil.select(jcas, POS.class).size());
      assertEquals(745, JCasUtil.select(jcas, Lemma.class).size());
      assertEquals(0, JCasUtil.select(jcas, NamedEntity.class).size());
      assertEquals(30, JCasUtil.select(jcas, Sentence.class).size());

      assertEquals(firstSentence, JCasUtil.select(jcas, Sentence.class).iterator().next()
          .getCoveredText());
    }

  }
}

代码示例来源:origin: de.tudarmstadt.ukp.clarin.webanno/webanno-ui-automation

String sentenceText = sentence.getCoveredText().toLowerCase();
for (int i = -1; (i = sentenceText.indexOf(selectedText.toLowerCase(),
    i)) != -1; i = i + selectedText.length()) {

代码示例来源:origin: webanno/webanno

String sentenceText = sentence.getCoveredText().toLowerCase();
for (int i = -1; (i = sentenceText.indexOf(selectedText.toLowerCase(),
    i)) != -1; i = i + selectedText.length()) {

代码示例来源:origin: de.tudarmstadt.ukp.dkpro.teaching/de.tudarmstadt.ukp.dkpro.teaching.corpus

@Test
public void brownReaderTest()
  throws Exception
{
  CollectionReader reader = createCollectionReader(
      BrownCorpusReader.class,
      BrownCorpusReader.PARAM_PATH, "src/test/resources/test_corpora/brown/",
      BrownCorpusReader.PARAM_PATTERNS, new String[] {
        ResourceCollectionReaderBase.INCLUDE_PREFIX + "*.xml"
      }
  );
  String firstSentence = "The Fulton County Grand Jury said Friday an investigation of Atlanta's recent primary election produced `` no evidence '' that any irregularities took place . ";
  int i = 0;
  for (JCas jcas : new JCasIterable(reader)) {
    if (i == 0) {
      assertEquals(2239, JCasUtil.select(jcas, Token.class).size());
      assertEquals(2239, JCasUtil.select(jcas, POS.class).size());
      assertEquals(98, JCasUtil.select(jcas, Sentence.class).size());
      
      assertEquals(firstSentence, JCasUtil.select(jcas, Sentence.class).iterator().next().getCoveredText());
    }
    i++;
  }
  
  assertEquals(3, i);
}

代码示例来源:origin: de.tudarmstadt.ukp.dkpro.core/de.tudarmstadt.ukp.dkpro.core.opennlp-asl

@Override
  protected void process(JCas aJCas, String aText, int aZoneBegin)
    throws AnalysisEngineProcessException
  {
    if (isWriteSentence()) {
      Span[] sentences = sentenceModelProvider.getResource().sentPosDetect(aText);
      for (Span sSpan : sentences) {
        createSentence(aJCas, sSpan.getStart() + aZoneBegin, sSpan.getEnd() + aZoneBegin);
      }
    }
    
    if (isWriteToken()) {
      for (Sentence sent : selectCovered(aJCas, Sentence.class, aZoneBegin,
          aZoneBegin + aText.length())) {
        Span[] tokens = tokenModelProvider.getResource().tokenizePos(sent.getCoveredText());
        for (Span tSpan : tokens) {
          createToken(aJCas, tSpan.getStart() + sent.getBegin(),
              tSpan.getEnd() + sent.getBegin());
        }
      }
    }
  }
}

代码示例来源:origin: de.tudarmstadt.ukp.dkpro.teaching/de.tudarmstadt.ukp.dkpro.teaching.corpus

@Test
  public void tigerTest()
    throws Exception
  {
    
    CollectionReader reader = createCollectionReader(
        TigerCorpusReader.class,
        TigerCorpusReader.PARAM_FILE, "src/test/resources/test_corpora/tiger/tiger.txt"
    );

    String firstSentence = "`` Ross Perot wäre vielleicht ein prächtiger Diktator '' ";
    
    int i = 0;
    for (JCas jcas : new JCasIterable(reader)) {
      if (i == 0) {
        assertEquals(9, JCasUtil.select(jcas, Token.class).size());
        assertEquals(9, JCasUtil.select(jcas, Lemma.class).size());
        assertEquals(9, JCasUtil.select(jcas, POS.class).size());
        assertEquals(1, JCasUtil.select(jcas, Sentence.class).size());

        assertEquals(firstSentence, JCasUtil.select(jcas, Sentence.class).iterator().next().getCoveredText());
        
        assertEquals("Sentence 1", DocumentMetaData.get(jcas).getDocumentTitle());
      }
      i++;
    }
    
    assertEquals(20, i);
  }
}

代码示例来源:origin: de.tudarmstadt.ukp.dkpro.teaching/de.tudarmstadt.ukp.dkpro.teaching.corpus

@Test
public void wackyTest()
  throws Exception
{
  
  CollectionReader reader = createCollectionReader(
      WackyCorpusReader.class,
      WackyCorpusReader.PARAM_PATH, "src/test/resources/test_corpora/wacky/",
      WackyCorpusReader.PARAM_LANGUAGE_EDITION, WackyLanguageEdition.DEWAC.name()
  );
  String firstSentence = "Nikita ( La Femme Nikita ) Dieser Episodenführer wurde von September 1998 bis Mai 1999 von Konstantin C.W. Volkmann geschrieben und im Mai 2000 von Stefan Börzel übernommen . ";
  
  int i = 0;
  for (JCas jcas : new JCasIterable(reader)) {
    System.out.println(jcas.getDocumentText());
    if (i == 0) {
      assertEquals(11406, JCasUtil.select(jcas, Token.class).size());
      assertEquals(11406, JCasUtil.select(jcas, Lemma.class).size());
      assertEquals(11406, JCasUtil.select(jcas, POS.class).size());
      assertEquals(717, JCasUtil.select(jcas, Sentence.class).size());
      assertEquals(firstSentence, JCasUtil.select(jcas, Sentence.class).iterator().next().getCoveredText());
      
      assertEquals("\"http://www.epguides.de/nikita.htm\"", DocumentMetaData.get(jcas).getDocumentTitle());
    }
    i++;
  }
  
  assertEquals(4, i);
}

相关文章