本文整理了Java中de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence.addToIndexes()
方法的一些代码示例,展示了Sentence.addToIndexes()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Sentence.addToIndexes()
方法的具体详情如下:
包路径:de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence
类名称:Sentence
方法名:addToIndexes
暂无
代码示例来源:origin: dkpro/dkpro-tc
protected void setSentence(JCas aJCas, int begin, int end)
{
Sentence sentence = new Sentence(aJCas, begin, end);
sentence.addToIndexes();
}
代码示例来源:origin: de.tudarmstadt.ukp.dkpro.core/de.tudarmstadt.ukp.dkpro.core.io.penntree-asl
private void annotateSentence(JCas aJCas, int aBegin, int aEnd)
{
new Sentence(aJCas, aBegin, aEnd).addToIndexes();
}
代码示例来源:origin: de.tudarmstadt.ukp.clarin.webanno/webanno-tsv
private void createSentence(JCas aJCas, String aLine, int aBegin, int aEnd, int aPrevEnd) {
if (aPrevEnd + 1 < aBegin) {
String pad = ""; // if there is plenty of spaces between sentences
for (int i = aPrevEnd + 1; i < aBegin; i++) {
pad = pad + " ";
}
coveredText.append(pad + aLine + LF);
} else {
coveredText.append(aLine + LF);
}
Sentence sentence = new Sentence(aJCas, aBegin, aEnd);
sentence.addToIndexes();
}
代码示例来源:origin: de.tudarmstadt.ukp.dkpro.core/de.tudarmstadt.ukp.dkpro.core.tokit-asl
/**
* Create sentences using the boundary pattern defined in {@link #lineBreak}.
*
* @param aJCas
* the {@link JCas}
* @param text
* the text.
*/
private void createSentences(JCas aJCas, String text)
{
Matcher lineMatcher = lineBreak.matcher(text);
int previousStart = 0;
while (lineMatcher.find()) {
int end = lineMatcher.start();
Sentence sentence = new Sentence(aJCas, previousStart, end);
sentence.addToIndexes(aJCas);
previousStart = lineMatcher.end();
}
}
代码示例来源:origin: webanno/webanno
protected Sentence createSentence(final JCas aJCas, final int aBegin,
final int aEnd)
{
int[] span = new int[] { aBegin, aEnd };
trim(aJCas.getDocumentText(), span);
if (!isEmpty(span[0], span[1])) {
Sentence seg = new Sentence(aJCas, span[0], span[1]);
seg.addToIndexes(aJCas);
return seg;
}
else {
return null;
}
}
代码示例来源:origin: de.tudarmstadt.ukp.clarin.webanno/webanno-io-text
protected Sentence createSentence(final JCas aJCas, final int aBegin,
final int aEnd)
{
int[] span = new int[] { aBegin, aEnd };
trim(aJCas.getDocumentText(), span);
if (!isEmpty(span[0], span[1])) {
Sentence seg = new Sentence(aJCas, span[0], span[1]);
seg.addToIndexes(aJCas);
return seg;
}
else {
return null;
}
}
代码示例来源:origin: oaqa/knn4qa
private void copyAnnotations(JCas jCasSrc, JCas jCasDst) {
CasCopier copier = new CasCopier(jCasSrc.getCas(), jCasDst.getCas());
for (Sentence anSrc : JCasUtil.select(jCasSrc, Sentence.class)) {
Sentence anDst = (Sentence) copier.copyFs(anSrc);
anDst.addToIndexes();
}
for (Token anSrc : JCasUtil.select(jCasSrc, Token.class)) {
Token anDst = (Token) copier.copyFs(anSrc);
anDst.addToIndexes();
}
for (POS anSrc : JCasUtil.select(jCasSrc, POS.class)) {
POS anDst = (POS) copier.copyFs(anSrc);
anDst.addToIndexes();
}
}
}
代码示例来源:origin: de.tudarmstadt.ukp.dkpro.core/de.tudarmstadt.ukp.dkpro.core.api.segmentation-asl
protected Sentence createSentence(final JCas aJCas, final int aBegin, final int aEnd)
{
int[] span = new int[] { aBegin, aEnd };
trim(aJCas.getDocumentText(), span);
if (!isEmpty(span[0], span[1]) && isWriteSentence()) {
Sentence seg = new Sentence(aJCas, span[0], span[1]);
seg.addToIndexes(aJCas);
return seg;
}
else {
return null;
}
}
代码示例来源:origin: dkpro/dkpro-core
protected Sentence createSentence(final JCas aJCas, final int aBegin, final int aEnd)
{
int[] span = new int[] { aBegin, aEnd };
trim(aJCas.getDocumentText(), span);
if (!isEmpty(span[0], span[1]) && isWriteSentence()) {
Sentence seg = new Sentence(aJCas, span[0], span[1]);
seg.addToIndexes(aJCas);
return seg;
}
else {
return null;
}
}
代码示例来源:origin: webanno/webanno
@Test
public void testTokenize() throws Exception
{
JCas jcas = JCasFactory.createText("i am one.i am two.", "en");
new Sentence(jcas, 0, 9).addToIndexes();;
new Sentence(jcas, 9, 18).addToIndexes();
ImportExportServiceImpl.tokenize(jcas);
assertEquals(asList("i am one.", "i am two."), toText(select(jcas, Sentence.class)));
assertEquals(asList("i", "am", "one", ".", "i", "am", "two", "."),
toText(select(jcas, Token.class)));
}
}
代码示例来源:origin: de.tudarmstadt.ukp.clarin.webanno/webanno-io-tcf
private void convertSentences(JCas aJCas, TextCorpus aCorpusData, Map<String, Token> aTokens) {
if (aCorpusData.getSentencesLayer() == null) {
// No layer to read from.
return;
}
for (int i = 0; i < aCorpusData.getSentencesLayer().size(); i++) {
eu.clarin.weblicht.wlfxb.tc.api.Token[] sentencesTokens = aCorpusData
.getSentencesLayer().getTokens(aCorpusData.getSentencesLayer().getSentence(i));
Sentence outSentence = new Sentence(aJCas);
outSentence.setBegin(aTokens.get(sentencesTokens[0].getID()).getBegin());
outSentence.setEnd(
aTokens.get(sentencesTokens[sentencesTokens.length - 1].getID()).getEnd());
outSentence.addToIndexes();
}
}
代码示例来源:origin: webanno/webanno
private void convertSentences(JCas aJCas, TextCorpus aCorpusData, Map<String, Token> aTokens) {
if (aCorpusData.getSentencesLayer() == null) {
// No layer to read from.
return;
}
for (int i = 0; i < aCorpusData.getSentencesLayer().size(); i++) {
eu.clarin.weblicht.wlfxb.tc.api.Token[] sentencesTokens = aCorpusData
.getSentencesLayer().getTokens(aCorpusData.getSentencesLayer().getSentence(i));
Sentence outSentence = new Sentence(aJCas);
outSentence.setBegin(aTokens.get(sentencesTokens[0].getID()).getBegin());
outSentence.setEnd(
aTokens.get(sentencesTokens[sentencesTokens.length - 1].getID()).getEnd());
outSentence.addToIndexes();
}
}
代码示例来源:origin: de.tudarmstadt.ukp.clarin.webanno/de.tudarmstadt.ukp.clarin.webanno.tcf
private void convertSentences(JCas aJCas, TextCorpus aCorpusData,
Map<String, Token> aTokens)
{
if (aCorpusData.getSentencesLayer() == null) {
// No layer to read from.
return;
}
for (int i = 0; i < aCorpusData.getSentencesLayer().size(); i++) {
eu.clarin.weblicht.wlfxb.tc.api.Token[] sentencesTokens = aCorpusData
.getSentencesLayer().getTokens(aCorpusData.getSentencesLayer().getSentence(i));
Sentence outSentence = new Sentence(aJCas);
outSentence.setBegin(aTokens.get(sentencesTokens[0].getID()).getBegin());
outSentence.setEnd(aTokens.get(sentencesTokens[sentencesTokens.length - 1].getID())
.getEnd());
outSentence.addToIndexes();
}
}
代码示例来源:origin: de.tudarmstadt.ukp.dkpro.core/de.tudarmstadt.ukp.dkpro.core.io.tcf-asl
private void convertSentences(JCas aJCas, TextCorpus aCorpusData,
Map<String, Token> aTokens)
{
if (aCorpusData.getSentencesLayer() == null) {
// No layer to read from.
return;
}
for (int i = 0; i < aCorpusData.getSentencesLayer().size(); i++) {
eu.clarin.weblicht.wlfxb.tc.api.Token[] sentencesTokens = aCorpusData
.getSentencesLayer().getTokens(aCorpusData.getSentencesLayer().getSentence(i));
Sentence outSentence = new Sentence(aJCas);
outSentence.setBegin(aTokens.get(sentencesTokens[0].getID()).getBegin());
outSentence.setEnd(aTokens.get(sentencesTokens[sentencesTokens.length - 1].getID())
.getEnd());
outSentence.addToIndexes();
}
}
代码示例来源:origin: webanno/webanno
private static JCas makeJCasOneSentence(String aText) throws UIMAException
{
JCas jcas = makeJCas();
TokenBuilder<Token, Sentence> tb = new TokenBuilder<>(Token.class,
Sentence.class);
tb.buildTokens(jcas, aText);
// Remove the sentences generated by the token builder which treats the line break as a
// sentence break
for (Sentence s : select(jcas, Sentence.class)) {
s.removeFromIndexes();
}
// Add a new sentence covering the whole text
new Sentence(jcas, 0, jcas.getDocumentText().length()).addToIndexes();
return jcas;
}
代码示例来源:origin: webanno/webanno
public static void splitSentences(JCas aJCas)
{
BreakIterator bi = BreakIterator.getSentenceInstance(Locale.US);
bi.setText(aJCas.getDocumentText());
int last = bi.first();
int cur = bi.next();
while (cur != BreakIterator.DONE) {
int[] span = new int[] { last, cur };
trim(aJCas.getDocumentText(), span);
if (!isEmpty(span[0], span[1])) {
Sentence seg = new Sentence(aJCas, span[0], span[1]);
seg.addToIndexes(aJCas);
}
last = cur;
cur = bi.next();
}
}
代码示例来源:origin: de.tudarmstadt.ukp.clarin.webanno/webanno-api-dao
public static void splitSentences(JCas aJCas)
{
BreakIterator bi = BreakIterator.getSentenceInstance(Locale.US);
bi.setText(aJCas.getDocumentText());
int last = bi.first();
int cur = bi.next();
while (cur != BreakIterator.DONE) {
int[] span = new int[] { last, cur };
trim(aJCas.getDocumentText(), span);
if (!isEmpty(span[0], span[1])) {
Sentence seg = new Sentence(aJCas, span[0], span[1]);
seg.addToIndexes(aJCas);
}
last = cur;
cur = bi.next();
}
}
代码示例来源:origin: webanno/webanno
@Test
public void thatSpanCrossSentenceBehaviorOnRenderGeneratesErrors()
{
neLayer.setCrossSentence(false);
jcas.setDocumentText(StringUtils.repeat("a", 20));
new Sentence(jcas, 0, 10).addToIndexes();
new Sentence(jcas, 10, 20).addToIndexes();
NamedEntity ne = new NamedEntity(jcas, 5, 15);
ne.addToIndexes();
SpanAdapter adapter = new SpanAdapter(featureSupportRegistry, null, neLayer, asList(),
asList(new SpanCrossSentenceBehavior()));
SpanRenderer sut = new SpanRenderer(adapter, featureSupportRegistry,
asList(new SpanCrossSentenceBehavior()));
VDocument vdoc = new VDocument();
sut.render(jcas, asList(), vdoc, 0, jcas.getDocumentText().length());
assertThat(vdoc.comments())
.usingFieldByFieldElementComparator()
.containsExactlyInAnyOrder(new VComment(ne, VCommentType.ERROR,
"Crossing sentence bounardies is not permitted."));
}
代码示例来源:origin: webanno/webanno
@Test
public void testTwoSentencesWithNoSpaceInBetween() throws Exception
{
TypeSystemDescription global = TypeSystemDescriptionFactory.createTypeSystemDescription();
TypeSystemDescription local = TypeSystemDescriptionFactory
.createTypeSystemDescriptionFromPath(
"src/test/resources/desc/type/webannoTestTypes.xml");
TypeSystemDescription merged = CasCreationUtils.mergeTypeSystems(asList(global, local));
JCas jcas = JCasFactory.createJCas(merged);
DocumentMetaData.create(jcas).setDocumentId("doc");
jcas.setDocumentText("onetwo");
new Token(jcas, 0, 3).addToIndexes();
new Sentence(jcas, 0, 3).addToIndexes();
new Token(jcas, 3, 6).addToIndexes();
new Sentence(jcas, 3, 6).addToIndexes();
writeAndAssertEquals(jcas);
}
代码示例来源:origin: webanno/webanno
@Test
public void testZeroLengthSpanBetweenAdjacentTokens() throws Exception
{
JCas jcas = makeJCas();
jcas.setDocumentText("word.");
new Token(jcas, 0,4).addToIndexes();
new Token(jcas, 4,5).addToIndexes();
new Sentence(jcas, 0,5).addToIndexes();
CAS cas = jcas.getCas();
Type simpleSpanType = cas.getTypeSystem().getType("webanno.custom.SimpleSpan");
// Insert zero-width annotation between the adjacent tokens (at end of first token).
AnnotationFS fs1a = cas.createAnnotation(simpleSpanType, 4, 4);
cas.addFsToIndexes(fs1a);
writeAndAssertEquals(jcas,
WebannoTsv3Writer.PARAM_SPAN_LAYERS, asList("webanno.custom.SimpleSpan"));
}
内容来源于网络,如有侵权,请联系作者删除!