de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence.addToIndexes()方法的使用及代码示例

x33g5p2x  于2022-01-30 转载在 其他  
字(10.2k)|赞(0)|评价(0)|浏览(88)

本文整理了Java中de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence.addToIndexes()方法的一些代码示例,展示了Sentence.addToIndexes()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Sentence.addToIndexes()方法的具体详情如下:
包路径:de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence
类名称:Sentence
方法名:addToIndexes

Sentence.addToIndexes介绍

暂无

代码示例

代码示例来源:origin: dkpro/dkpro-tc

protected void setSentence(JCas aJCas, int begin, int end)
{
  Sentence sentence = new Sentence(aJCas, begin, end);
  sentence.addToIndexes();
}

代码示例来源:origin: de.tudarmstadt.ukp.dkpro.core/de.tudarmstadt.ukp.dkpro.core.io.penntree-asl

private void annotateSentence(JCas aJCas, int aBegin, int aEnd)
{
  new Sentence(aJCas, aBegin, aEnd).addToIndexes();
}

代码示例来源:origin: de.tudarmstadt.ukp.clarin.webanno/webanno-tsv

private void createSentence(JCas aJCas, String aLine, int aBegin, int aEnd, int aPrevEnd) {		
  if (aPrevEnd + 1 < aBegin) {
    String pad = ""; // if there is plenty of spaces between sentences
    for (int i = aPrevEnd + 1; i < aBegin; i++) {
      pad = pad + " ";
    }
    coveredText.append(pad + aLine + LF);
  } else {
    coveredText.append(aLine + LF);
  }
  Sentence sentence = new Sentence(aJCas, aBegin, aEnd);
  sentence.addToIndexes();
}

代码示例来源:origin: de.tudarmstadt.ukp.dkpro.core/de.tudarmstadt.ukp.dkpro.core.tokit-asl

/**
 * Create sentences using the boundary pattern defined in {@link #lineBreak}.
 * 
 * @param aJCas
 *            the {@link JCas}
 * @param text
 *            the text.
 */
private void createSentences(JCas aJCas, String text)
{
  Matcher lineMatcher = lineBreak.matcher(text);
  int previousStart = 0;
  while (lineMatcher.find()) {
    int end = lineMatcher.start();
    Sentence sentence = new Sentence(aJCas, previousStart, end);
    sentence.addToIndexes(aJCas);
    previousStart = lineMatcher.end();
  }
}

代码示例来源:origin: webanno/webanno

protected Sentence createSentence(final JCas aJCas, final int aBegin,
    final int aEnd)
{
  int[] span = new int[] { aBegin, aEnd };
  trim(aJCas.getDocumentText(), span);
  if (!isEmpty(span[0], span[1])) {
    Sentence seg = new Sentence(aJCas, span[0], span[1]);
    seg.addToIndexes(aJCas);
    return seg;
  }
  else {
    return null;
  }
}

代码示例来源:origin: de.tudarmstadt.ukp.clarin.webanno/webanno-io-text

protected Sentence createSentence(final JCas aJCas, final int aBegin,
    final int aEnd)
{
  int[] span = new int[] { aBegin, aEnd };
  trim(aJCas.getDocumentText(), span);
  if (!isEmpty(span[0], span[1])) {
    Sentence seg = new Sentence(aJCas, span[0], span[1]);
    seg.addToIndexes(aJCas);
    return seg;
  }
  else {
    return null;
  }
}

代码示例来源:origin: oaqa/knn4qa

private void copyAnnotations(JCas jCasSrc, JCas jCasDst) {
  CasCopier   copier = new CasCopier(jCasSrc.getCas(), jCasDst.getCas());

  for (Sentence anSrc : JCasUtil.select(jCasSrc, Sentence.class)) {
   Sentence anDst = (Sentence) copier.copyFs(anSrc);
   anDst.addToIndexes();
  }
  
  for (Token anSrc : JCasUtil.select(jCasSrc, Token.class)) {
   Token anDst = (Token) copier.copyFs(anSrc);
   anDst.addToIndexes();      
  }    
  
  for (POS anSrc : JCasUtil.select(jCasSrc, POS.class)) {
   POS anDst = (POS) copier.copyFs(anSrc);
   anDst.addToIndexes();      
  }    
 }
}

代码示例来源:origin: de.tudarmstadt.ukp.dkpro.core/de.tudarmstadt.ukp.dkpro.core.api.segmentation-asl

protected Sentence createSentence(final JCas aJCas, final int aBegin, final int aEnd)
{
  int[] span = new int[] { aBegin, aEnd };
  trim(aJCas.getDocumentText(), span);
  if (!isEmpty(span[0], span[1]) && isWriteSentence()) {
    Sentence seg = new Sentence(aJCas, span[0], span[1]);
    seg.addToIndexes(aJCas);
    return seg;
  }
  else {
    return null;
  }
}

代码示例来源:origin: dkpro/dkpro-core

protected Sentence createSentence(final JCas aJCas, final int aBegin, final int aEnd)
{
  int[] span = new int[] { aBegin, aEnd };
  trim(aJCas.getDocumentText(), span);
  if (!isEmpty(span[0], span[1]) && isWriteSentence()) {
    Sentence seg = new Sentence(aJCas, span[0], span[1]);
    seg.addToIndexes(aJCas);
    return seg;
  }
  else {
    return null;
  }
}

代码示例来源:origin: webanno/webanno

@Test
  public void testTokenize() throws Exception
  {
    JCas jcas = JCasFactory.createText("i am one.i am two.", "en");
    new Sentence(jcas, 0, 9).addToIndexes();;
    new Sentence(jcas, 9, 18).addToIndexes();
    
    ImportExportServiceImpl.tokenize(jcas);
    
    assertEquals(asList("i am one.", "i am two."), toText(select(jcas, Sentence.class)));
    assertEquals(asList("i", "am", "one", ".", "i", "am", "two", "."),
        toText(select(jcas, Token.class)));
  }
}

代码示例来源:origin: de.tudarmstadt.ukp.clarin.webanno/webanno-io-tcf

private void convertSentences(JCas aJCas, TextCorpus aCorpusData, Map<String, Token> aTokens) {
  if (aCorpusData.getSentencesLayer() == null) {
    // No layer to read from.
    return;
  }
  for (int i = 0; i < aCorpusData.getSentencesLayer().size(); i++) {
    eu.clarin.weblicht.wlfxb.tc.api.Token[] sentencesTokens = aCorpusData
        .getSentencesLayer().getTokens(aCorpusData.getSentencesLayer().getSentence(i));
    Sentence outSentence = new Sentence(aJCas);
    outSentence.setBegin(aTokens.get(sentencesTokens[0].getID()).getBegin());
    outSentence.setEnd(
        aTokens.get(sentencesTokens[sentencesTokens.length - 1].getID()).getEnd());
    outSentence.addToIndexes();
  }
}

代码示例来源:origin: webanno/webanno

private void convertSentences(JCas aJCas, TextCorpus aCorpusData, Map<String, Token> aTokens) {
  if (aCorpusData.getSentencesLayer() == null) {
    // No layer to read from.
    return;
  }
  for (int i = 0; i < aCorpusData.getSentencesLayer().size(); i++) {
    eu.clarin.weblicht.wlfxb.tc.api.Token[] sentencesTokens = aCorpusData
        .getSentencesLayer().getTokens(aCorpusData.getSentencesLayer().getSentence(i));
    Sentence outSentence = new Sentence(aJCas);
    outSentence.setBegin(aTokens.get(sentencesTokens[0].getID()).getBegin());
    outSentence.setEnd(
        aTokens.get(sentencesTokens[sentencesTokens.length - 1].getID()).getEnd());
    outSentence.addToIndexes();
  }
}

代码示例来源:origin: de.tudarmstadt.ukp.clarin.webanno/de.tudarmstadt.ukp.clarin.webanno.tcf

private void convertSentences(JCas aJCas, TextCorpus aCorpusData,
    Map<String, Token> aTokens)
{
  if (aCorpusData.getSentencesLayer() == null) {
    // No layer to read from.
    return;
  }
  for (int i = 0; i < aCorpusData.getSentencesLayer().size(); i++) {
    eu.clarin.weblicht.wlfxb.tc.api.Token[] sentencesTokens = aCorpusData
        .getSentencesLayer().getTokens(aCorpusData.getSentencesLayer().getSentence(i));
    Sentence outSentence = new Sentence(aJCas);
    outSentence.setBegin(aTokens.get(sentencesTokens[0].getID()).getBegin());
    outSentence.setEnd(aTokens.get(sentencesTokens[sentencesTokens.length - 1].getID())
        .getEnd());
    outSentence.addToIndexes();
  }
}

代码示例来源:origin: de.tudarmstadt.ukp.dkpro.core/de.tudarmstadt.ukp.dkpro.core.io.tcf-asl

private void convertSentences(JCas aJCas, TextCorpus aCorpusData,
    Map<String, Token> aTokens)
{
  if (aCorpusData.getSentencesLayer() == null) {
    // No layer to read from.
    return;
  }
  for (int i = 0; i < aCorpusData.getSentencesLayer().size(); i++) {
    eu.clarin.weblicht.wlfxb.tc.api.Token[] sentencesTokens = aCorpusData
        .getSentencesLayer().getTokens(aCorpusData.getSentencesLayer().getSentence(i));
    Sentence outSentence = new Sentence(aJCas);
    outSentence.setBegin(aTokens.get(sentencesTokens[0].getID()).getBegin());
    outSentence.setEnd(aTokens.get(sentencesTokens[sentencesTokens.length - 1].getID())
        .getEnd());
    outSentence.addToIndexes();
  }
}

代码示例来源:origin: webanno/webanno

private static JCas makeJCasOneSentence(String aText) throws UIMAException
{
  JCas jcas = makeJCas();
  
  TokenBuilder<Token, Sentence> tb = new TokenBuilder<>(Token.class,
      Sentence.class);
  tb.buildTokens(jcas, aText);
  
  // Remove the sentences generated by the token builder which treats the line break as a
  // sentence break
  for (Sentence s : select(jcas, Sentence.class)) {
    s.removeFromIndexes();
  }
  
  // Add a new sentence covering the whole text
  new Sentence(jcas, 0, jcas.getDocumentText().length()).addToIndexes();
  
  return jcas;
}

代码示例来源:origin: webanno/webanno

public static void splitSentences(JCas aJCas)
{
  BreakIterator bi = BreakIterator.getSentenceInstance(Locale.US);
  bi.setText(aJCas.getDocumentText());
  int last = bi.first();
  int cur = bi.next();
  while (cur != BreakIterator.DONE) {
    int[] span = new int[] { last, cur };
    trim(aJCas.getDocumentText(), span);
    if (!isEmpty(span[0], span[1])) {
      Sentence seg = new Sentence(aJCas, span[0], span[1]);
      seg.addToIndexes(aJCas);
    }
    last = cur;
    cur = bi.next();
  }
}

代码示例来源:origin: de.tudarmstadt.ukp.clarin.webanno/webanno-api-dao

public static void splitSentences(JCas aJCas)
{
  BreakIterator bi = BreakIterator.getSentenceInstance(Locale.US);
  bi.setText(aJCas.getDocumentText());
  int last = bi.first();
  int cur = bi.next();
  while (cur != BreakIterator.DONE) {
    int[] span = new int[] { last, cur };
    trim(aJCas.getDocumentText(), span);
    if (!isEmpty(span[0], span[1])) {
      Sentence seg = new Sentence(aJCas, span[0], span[1]);
      seg.addToIndexes(aJCas);
    }
    last = cur;
    cur = bi.next();
  }
}

代码示例来源:origin: webanno/webanno

@Test
public void thatSpanCrossSentenceBehaviorOnRenderGeneratesErrors()
{
  neLayer.setCrossSentence(false);
  
  jcas.setDocumentText(StringUtils.repeat("a", 20));
  
  new Sentence(jcas, 0, 10).addToIndexes();
  new Sentence(jcas, 10, 20).addToIndexes();
  NamedEntity ne = new NamedEntity(jcas, 5, 15);
  ne.addToIndexes();
  
  SpanAdapter adapter = new SpanAdapter(featureSupportRegistry, null, neLayer, asList(),
      asList(new SpanCrossSentenceBehavior()));
  
  SpanRenderer sut = new SpanRenderer(adapter, featureSupportRegistry,
      asList(new SpanCrossSentenceBehavior()));
  
  VDocument vdoc = new VDocument();
  sut.render(jcas, asList(), vdoc, 0, jcas.getDocumentText().length());
  
  assertThat(vdoc.comments())
      .usingFieldByFieldElementComparator()
      .containsExactlyInAnyOrder(new VComment(ne, VCommentType.ERROR, 
          "Crossing sentence bounardies is not permitted."));
}

代码示例来源:origin: webanno/webanno

@Test
public void testTwoSentencesWithNoSpaceInBetween() throws Exception
{
  TypeSystemDescription global = TypeSystemDescriptionFactory.createTypeSystemDescription();
  TypeSystemDescription local = TypeSystemDescriptionFactory
      .createTypeSystemDescriptionFromPath(
          "src/test/resources/desc/type/webannoTestTypes.xml");
    TypeSystemDescription merged = CasCreationUtils.mergeTypeSystems(asList(global, local));
  
  JCas jcas = JCasFactory.createJCas(merged);
  
  DocumentMetaData.create(jcas).setDocumentId("doc");
  jcas.setDocumentText("onetwo");
  new Token(jcas, 0, 3).addToIndexes();
  new Sentence(jcas, 0, 3).addToIndexes();
  new Token(jcas, 3, 6).addToIndexes();
  new Sentence(jcas, 3, 6).addToIndexes();
  
  writeAndAssertEquals(jcas);
}

代码示例来源:origin: webanno/webanno

@Test
public void testZeroLengthSpanBetweenAdjacentTokens() throws Exception
{
  JCas jcas = makeJCas();
  jcas.setDocumentText("word.");
  new Token(jcas, 0,4).addToIndexes();
  new Token(jcas, 4,5).addToIndexes();
  new Sentence(jcas, 0,5).addToIndexes();
  
  CAS cas = jcas.getCas();
  Type simpleSpanType = cas.getTypeSystem().getType("webanno.custom.SimpleSpan");
  
  // Insert zero-width annotation between the adjacent tokens (at end of first token).
  AnnotationFS fs1a = cas.createAnnotation(simpleSpanType, 4, 4);
  cas.addFsToIndexes(fs1a);
  writeAndAssertEquals(jcas, 
      WebannoTsv3Writer.PARAM_SPAN_LAYERS, asList("webanno.custom.SimpleSpan"));
}

相关文章