本文整理了Java中de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence.getCoveredText()
方法的一些代码示例,展示了Sentence.getCoveredText()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Sentence.getCoveredText()
方法的具体详情如下:
包路径:de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence
类名称:Sentence
方法名:getCoveredText
暂无
代码示例来源:origin: UKPLab/argument-reasoning-comprehension-task
@Override
public boolean keepArgument(JCas jCas)
{
List<Sentence> sentences = new ArrayList<>(JCasUtil.select(jCas, Sentence.class));
// remove one-sentence arguments
if (sentences.size() == 1) {
return false;
}
for (Sentence s : sentences) {
if (s.getCoveredText().length() > MAX_SENTENCE_LENGTH) {
return false;
}
}
return true;
}
}
代码示例来源:origin: hltfbk/Excitement-Open-Platform
/**
* <B>NOTE:</B> Must be called only after one of the conversion methods was called.
* @return a mapping between a root, and the text of the tree's sentence. This is an ordered map,
* ordered by the order of sentences in the text.
* @throws CasTreeConverterException
*/
public LinkedHashMap<BasicNode, String> getTreesToSentences() throws CasTreeConverterException {
if (lastRootList == null || lastSentenceList == null) {
throw new CasTreeConverterException("getTreesToSentences() called before a conversion method was called.");
}
if (lastRootList.size() != lastSentenceList.size()) {
throw new CasTreeConverterException("Internal error - lastRootList(size=" + lastRootList.size() + ") and lastSentenceList(size=" +
lastSentenceList.size() + ") are in different sizes.");
}
LinkedHashMap<BasicNode, String> result = new LinkedHashMap<BasicNode, String>(lastRootList.size());
Iterator<Sentence> iterSentences = lastSentenceList.iterator();
for (BasicNode node : lastRootList) {
Sentence sentence = iterSentences.next();
result.put(node, sentence.getCoveredText());
}
return result;
}
代码示例来源:origin: UKPLab/argument-reasoning-comprehension-task
public static List<HITSentence> extractSentences(StandaloneArgument argument)
throws IOException
{
// extract sentences
List<HITSentence> result = new ArrayList<>();
ArrayList<Sentence> sentences = new ArrayList<>(
JCasUtil.select(argument.getJCas(), Sentence.class));
for (int i = 0; i < sentences.size(); i++) {
Sentence sentence = sentences.get(i);
HITSentence s = new HITSentence();
// position
s.position = i;
// create unique id by combining argument id and sentence position
s.sentenceId = StandaloneArgument.getSentenceID(argument, s.position);
s.text = sentence.getCoveredText();
result.add(s);
}
return result;
}
代码示例来源:origin: UKPLab/argument-reasoning-comprehension-task
private Collection<? extends HITSentence> extractSentencesForReasons(
StandaloneArgument argument)
throws IOException
{
// extract sentences
List<HITSentenceReason> result = new ArrayList<>();
ArrayList<Sentence> sentences = new ArrayList<>(
JCasUtil.select(argument.getJCas(), Sentence.class));
for (int i = 0; i < sentences.size(); i++) {
Sentence sentence = sentences.get(i);
HITSentenceReason s = new HITSentenceReason();
// position
s.position = i;
// create unique id by combining argument id and sentence position
s.sentenceId = StandaloneArgument.getSentenceID(argument, s.position);
s.text = sentence.getCoveredText();
// find out whether this sentence is already covered by a claim
List<Claim> coveringClaims = JCasUtil.selectCovering(Claim.class, sentence);
s.disabled = !coveringClaims.isEmpty();
// there can't be any claims at the moment!
if (s.disabled) {
throw new IllegalStateException("No claim annotations are allowed at this point");
}
result.add(s);
}
return result;
}
代码示例来源:origin: de.tudarmstadt.ukp.clarin.webanno/webanno-io-tsv
private void setTokenSentenceAddress(JCas aJCas)
{
int sentNMumber = 1;
for (Sentence sentence : select(aJCas, Sentence.class)) {
int lineNumber = 1;
for (Token token : selectCovered(Token.class, sentence)) {
AnnotationUnit unit = new AnnotationUnit(token.getBegin(), token.getEnd(), false,
token.getCoveredText());
units.add(unit);
if (lineNumber == 1) {
sentenceUnits.put(unit, sentence.getCoveredText());
}
unitsLineNumber.put(unit, sentNMumber + "-" + lineNumber);
lineNumber++;
}
sentNMumber++;
}
}
代码示例来源:origin: webanno/webanno
private void setTokenSentenceAddress(JCas aJCas)
{
int sentNMumber = 1;
for (Sentence sentence : select(aJCas, Sentence.class)) {
int lineNumber = 1;
for (Token token : selectCovered(Token.class, sentence)) {
AnnotationUnit unit = new AnnotationUnit(token.getBegin(), token.getEnd(), false,
token.getCoveredText());
units.add(unit);
if (lineNumber == 1) {
sentenceUnits.put(unit, sentence.getCoveredText());
}
unitsLineNumber.put(unit, sentNMumber + "-" + lineNumber);
lineNumber++;
}
sentNMumber++;
}
}
代码示例来源:origin: de.tudarmstadt.ukp.clarin.webanno/webanno-tsv
private void setTokenSentenceAddress(JCas aJCas) {
int sentNMumber = 1;
for (Sentence sentence : select(aJCas, Sentence.class)) {
int lineNumber = 1;
for (Token token : selectCovered(Token.class, sentence)) {
AnnotationUnit unit = new AnnotationUnit(token.getBegin(), token.getEnd(), false,
token.getCoveredText());
units.add(unit);
if (lineNumber == 1) {
sentenceUnits.put(unit, sentence.getCoveredText());
}
unitsLineNumber.put(unit, sentNMumber + "-" + lineNumber);
lineNumber++;
}
sentNMumber++;
}
}
代码示例来源:origin: webanno/webanno
public void write(PrintWriter aOut, TsvSentence aSentence)
{
String[] lines = splitPreserveAllTokens(aSentence.getUimaSentence().getCoveredText(),
LINE_BREAK);
for (String line : lines) {
aOut.print(PREFIX_TEXT);
aOut.print(escapeText(line));
aOut.print(LINE_BREAK);
}
for (TsvToken token : aSentence.getTokens()) {
write(aOut, token);
aOut.write(LINE_BREAK);
for (TsvSubToken subToken : token.getSubTokens()) {
write(aOut, subToken);
aOut.write(LINE_BREAK);
}
}
}
代码示例来源:origin: de.tudarmstadt.ukp.clarin.webanno/webanno-io-tsv
public void write(PrintWriter aOut, TsvSentence aSentence)
{
String[] lines = splitPreserveAllTokens(aSentence.getUimaSentence().getCoveredText(),
LINE_BREAK);
for (String line : lines) {
aOut.print(PREFIX_TEXT);
aOut.print(escapeText(line));
aOut.print(LINE_BREAK);
}
for (TsvToken token : aSentence.getTokens()) {
write(aOut, token);
aOut.write(LINE_BREAK);
for (TsvSubToken subToken : token.getSubTokens()) {
write(aOut, subToken);
aOut.write(LINE_BREAK);
}
}
}
代码示例来源:origin: hltfbk/Excitement-Open-Platform
innerTool.setSentence(sentenceAnno.getCoveredText());
innerTool.tokenize();
tokenStrings = innerTool.getTokenizedSentence();
SortedMap<Integer, DockedToken> dockedTokens = DockedTokenFinder.find(sentenceAnno.getCoveredText(), tokenStrings, false, true);
代码示例来源:origin: de.tudarmstadt.ukp.clarin.webanno/webanno-api-dao
public static void tokenize(JCas aJCas)
{
BreakIterator bi = BreakIterator.getWordInstance(Locale.US);
for (Sentence s : select(aJCas, Sentence.class)) {
bi.setText(s.getCoveredText());
int last = bi.first();
int cur = bi.next();
while (cur != BreakIterator.DONE) {
int[] span = new int[] { last, cur };
trim(s.getCoveredText(), span);
if (!isEmpty(span[0], span[1])) {
Token seg = new Token(aJCas, span[0] + s.getBegin(), span[1] + s.getBegin());
seg.addToIndexes(aJCas);
}
last = cur;
cur = bi.next();
}
}
}
代码示例来源:origin: webanno/webanno
public static void tokenize(JCas aJCas)
{
BreakIterator bi = BreakIterator.getWordInstance(Locale.US);
for (Sentence s : select(aJCas, Sentence.class)) {
bi.setText(s.getCoveredText());
int last = bi.first();
int cur = bi.next();
while (cur != BreakIterator.DONE) {
int[] span = new int[] { last, cur };
trim(s.getCoveredText(), span);
if (!isEmpty(span[0], span[1])) {
Token seg = new Token(aJCas, span[0] + s.getBegin(), span[1] + s.getBegin());
seg.addToIndexes(aJCas);
}
last = cur;
cur = bi.next();
}
}
}
代码示例来源:origin: webanno/webanno
String sentenceText = sentence.getCoveredText().toLowerCase();
for (int i = -1; (i = sentenceText.indexOf(selectedText.toLowerCase(),
i)) != -1; i = i + selectedText.length()) {
代码示例来源:origin: webanno/webanno
@Test
@Ignore("No TEI yet to opensource ")
public void testTeiReader()
throws Exception
{
CollectionReaderDescription reader = createReaderDescription(TeiReader.class,
TeiReader.PARAM_LANGUAGE, "en", TeiReader.PARAM_SOURCE_LOCATION,
"classpath:/local/", TeiReader.PARAM_PATTERNS, new String[] { "[+]*.xml" });
String firstSentence = "70 I DAG.";
for (JCas jcas : new JCasIterable(reader)) {
DocumentMetaData meta = DocumentMetaData.get(jcas);
String text = jcas.getDocumentText();
System.out.printf("%s - %d%n", meta.getDocumentId(), text.length());
System.out.println(jcas.getDocumentLanguage());
assertEquals(2235, JCasUtil.select(jcas, Token.class).size());
assertEquals(745, JCasUtil.select(jcas, POS.class).size());
assertEquals(745, JCasUtil.select(jcas, Lemma.class).size());
assertEquals(0, JCasUtil.select(jcas, NamedEntity.class).size());
assertEquals(30, JCasUtil.select(jcas, Sentence.class).size());
assertEquals(firstSentence, JCasUtil.select(jcas, Sentence.class).iterator().next()
.getCoveredText());
}
}
}
代码示例来源:origin: de.tudarmstadt.ukp.clarin.webanno/webanno-ui-automation
String sentenceText = sentence.getCoveredText().toLowerCase();
for (int i = -1; (i = sentenceText.indexOf(selectedText.toLowerCase(),
i)) != -1; i = i + selectedText.length()) {
代码示例来源:origin: webanno/webanno
String sentenceText = sentence.getCoveredText().toLowerCase();
for (int i = -1; (i = sentenceText.indexOf(selectedText.toLowerCase(),
i)) != -1; i = i + selectedText.length()) {
代码示例来源:origin: de.tudarmstadt.ukp.dkpro.teaching/de.tudarmstadt.ukp.dkpro.teaching.corpus
@Test
public void brownReaderTest()
throws Exception
{
CollectionReader reader = createCollectionReader(
BrownCorpusReader.class,
BrownCorpusReader.PARAM_PATH, "src/test/resources/test_corpora/brown/",
BrownCorpusReader.PARAM_PATTERNS, new String[] {
ResourceCollectionReaderBase.INCLUDE_PREFIX + "*.xml"
}
);
String firstSentence = "The Fulton County Grand Jury said Friday an investigation of Atlanta's recent primary election produced `` no evidence '' that any irregularities took place . ";
int i = 0;
for (JCas jcas : new JCasIterable(reader)) {
if (i == 0) {
assertEquals(2239, JCasUtil.select(jcas, Token.class).size());
assertEquals(2239, JCasUtil.select(jcas, POS.class).size());
assertEquals(98, JCasUtil.select(jcas, Sentence.class).size());
assertEquals(firstSentence, JCasUtil.select(jcas, Sentence.class).iterator().next().getCoveredText());
}
i++;
}
assertEquals(3, i);
}
代码示例来源:origin: de.tudarmstadt.ukp.dkpro.core/de.tudarmstadt.ukp.dkpro.core.opennlp-asl
@Override
protected void process(JCas aJCas, String aText, int aZoneBegin)
throws AnalysisEngineProcessException
{
if (isWriteSentence()) {
Span[] sentences = sentenceModelProvider.getResource().sentPosDetect(aText);
for (Span sSpan : sentences) {
createSentence(aJCas, sSpan.getStart() + aZoneBegin, sSpan.getEnd() + aZoneBegin);
}
}
if (isWriteToken()) {
for (Sentence sent : selectCovered(aJCas, Sentence.class, aZoneBegin,
aZoneBegin + aText.length())) {
Span[] tokens = tokenModelProvider.getResource().tokenizePos(sent.getCoveredText());
for (Span tSpan : tokens) {
createToken(aJCas, tSpan.getStart() + sent.getBegin(),
tSpan.getEnd() + sent.getBegin());
}
}
}
}
}
代码示例来源:origin: de.tudarmstadt.ukp.dkpro.teaching/de.tudarmstadt.ukp.dkpro.teaching.corpus
@Test
public void tigerTest()
throws Exception
{
CollectionReader reader = createCollectionReader(
TigerCorpusReader.class,
TigerCorpusReader.PARAM_FILE, "src/test/resources/test_corpora/tiger/tiger.txt"
);
String firstSentence = "`` Ross Perot wäre vielleicht ein prächtiger Diktator '' ";
int i = 0;
for (JCas jcas : new JCasIterable(reader)) {
if (i == 0) {
assertEquals(9, JCasUtil.select(jcas, Token.class).size());
assertEquals(9, JCasUtil.select(jcas, Lemma.class).size());
assertEquals(9, JCasUtil.select(jcas, POS.class).size());
assertEquals(1, JCasUtil.select(jcas, Sentence.class).size());
assertEquals(firstSentence, JCasUtil.select(jcas, Sentence.class).iterator().next().getCoveredText());
assertEquals("Sentence 1", DocumentMetaData.get(jcas).getDocumentTitle());
}
i++;
}
assertEquals(20, i);
}
}
代码示例来源:origin: de.tudarmstadt.ukp.dkpro.teaching/de.tudarmstadt.ukp.dkpro.teaching.corpus
@Test
public void wackyTest()
throws Exception
{
CollectionReader reader = createCollectionReader(
WackyCorpusReader.class,
WackyCorpusReader.PARAM_PATH, "src/test/resources/test_corpora/wacky/",
WackyCorpusReader.PARAM_LANGUAGE_EDITION, WackyLanguageEdition.DEWAC.name()
);
String firstSentence = "Nikita ( La Femme Nikita ) Dieser Episodenführer wurde von September 1998 bis Mai 1999 von Konstantin C.W. Volkmann geschrieben und im Mai 2000 von Stefan Börzel übernommen . ";
int i = 0;
for (JCas jcas : new JCasIterable(reader)) {
System.out.println(jcas.getDocumentText());
if (i == 0) {
assertEquals(11406, JCasUtil.select(jcas, Token.class).size());
assertEquals(11406, JCasUtil.select(jcas, Lemma.class).size());
assertEquals(11406, JCasUtil.select(jcas, POS.class).size());
assertEquals(717, JCasUtil.select(jcas, Sentence.class).size());
assertEquals(firstSentence, JCasUtil.select(jcas, Sentence.class).iterator().next().getCoveredText());
assertEquals("\"http://www.epguides.de/nikita.htm\"", DocumentMetaData.get(jcas).getDocumentTitle());
}
i++;
}
assertEquals(4, i);
}
内容来源于网络,如有侵权,请联系作者删除!