本文整理了Java中com.chenlb.mmseg4j.Word
类的一些代码示例,展示了Word
类的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Word
类的具体详情如下:
包路径:com.chenlb.mmseg4j.Word
类名称:Word
[英]类似 lucene 的 token
[中]类似 卢森的 代币
代码示例来源:origin: looly/hutool
@Override
public int getStartOffset() {
return this.word.getStartOffset();
}
代码示例来源:origin: looly/hutool
@Override
public String getText() {
return word.getString();
}
代码示例来源:origin: looly/hutool
@Override
public int getEndOffset() {
return this.word.getEndOffset();
}
代码示例来源:origin: com.chenlb.mmseg4j/mmseg4j-solr
@Override
public final boolean incrementToken() throws IOException {
clearAttributes();
Word word = mmSeg.next();
if(word != null) {
termAtt.copyBuffer(word.getSen(), word.getWordOffset(), word.getLength());
offsetAtt.setOffset(word.getStartOffset(), word.getEndOffset());
typeAtt.setType(word.getType());
return true;
} else {
return false;
}
}
}
代码示例来源:origin: medcl/elasticsearch-analysis-mmseg
Word word = chunk.words[i];
if(word.getLength() < 3) {
cks.add(word);
} else {
char[] chs = word.getSen();
int offset = word.getWordOffset(), n = 0, wordEnd = word.getWordOffset()+word.getLength();
int senStartOffset = word.getStartOffset() - offset; //sen 在文件中的位置
int end = -1; //上一次找到的位置
for(; offset<wordEnd-1; offset++) {
int idx = search(chs, offset, 1);
if(idx > -1) {
cks.add(new Word(chs, senStartOffset, offset, 2));
end = offset+2;
n++;
} else if(offset >= end) { //有单字
cks.add(new Word(chs, senStartOffset, offset, 1));
end = offset+1;
cks.add(new Word(chs, senStartOffset, offset, 1));
代码示例来源:origin: medcl/elasticsearch-analysis-mmseg
public String getString() {
return new String(getSen(), getWordOffset(), getLength());
}
代码示例来源:origin: com.chenlb.mmseg4j/mmseg4j-core
public int getEndOffset() {
return getStartOffset() + getLength();
}
public int getDegree() {
代码示例来源:origin: medcl/elasticsearch-analysis-mmseg
/** Word Length */
public int getLen() {
if(len < 0) {
len = 0;
count = 0;
for(Word word : words) {
if(word != null) {
len += word.getLength();
count++;
}
}
}
return len;
}
代码示例来源:origin: medcl/elasticsearch-analysis-mmseg
private Word createWord(StringBuilder bufSentence, int startIdx, String type) {
return new Word(toChars(bufSentence), startIdx, type);
}
代码示例来源:origin: medcl/elasticsearch-analysis-mmseg
private Chunk createChunk(Sentence sen, char[] chs, int[] tailLen, int[] offsets, CharNode[] cns/*, char[][] cks*/) {
Chunk ck = new Chunk();
for(int i=0; i<3; i++) {
if(offsets[i] < chs.length) {
ck.words[i] = new Word(chs, sen.getStartOffset(), offsets[i], tailLen[i]+1);//new Word(cks[i], sen.getStartOffset()+offsets[i]);
if(tailLen[i] == 0) { //单字的要取得"字频计算出自由度"
CharNode cn = cns[i]; //dic.head(chs[offsets[i]]);
if(cn !=null) {
ck.words[i].setDegree(cn.getFreq());
}
}
}
}
return ck;
}
代码示例来源:origin: com.chenlb.mmseg4j/mmseg4j-core
/** Sum of Degree of Morphemic Freedom of One-Character */
public int getSumDegree() {
if(sumDegree < 0) {
int sum = 0;
for(Word word : words) {
if(word != null && word.getDegree() > -1) {
sum += word.getDegree();
}
}
sumDegree = sum;
}
return sumDegree;
}
代码示例来源:origin: chenlb/mmseg4j-solr
@Override
public final boolean incrementToken() throws IOException {
clearAttributes();
Word word = mmSeg.next();
if(word != null) {
termAtt.copyBuffer(word.getSen(), word.getWordOffset(), word.getLength());
offsetAtt.setOffset(word.getStartOffset(), word.getEndOffset());
typeAtt.setType(word.getType());
return true;
} else {
return false;
}
}
}
代码示例来源:origin: chenlb/mmseg4j-core
Word word = chunk.words[i];
if(word.getLength() < 3) {
cks.add(word);
} else {
char[] chs = word.getSen();
int offset = word.getWordOffset(), n = 0, wordEnd = word.getWordOffset()+word.getLength();
int senStartOffset = word.getStartOffset() - offset; //sen 在文件中的位置
int end = -1; //上一次找到的位置
for(; offset<wordEnd-1; offset++) {
int idx = search(chs, offset, 1);
if(idx > -1) {
cks.add(new Word(chs, senStartOffset, offset, 2));
end = offset+2;
n++;
} else if(offset >= end) { //有单字
cks.add(new Word(chs, senStartOffset, offset, 1));
end = offset+1;
cks.add(new Word(chs, senStartOffset, offset, 1));
代码示例来源:origin: chenlb/mmseg4j-core
public String getString() {
return new String(getSen(), getWordOffset(), getLength());
}
代码示例来源:origin: chenlb/mmseg4j-core
public int getEndOffset() {
return getStartOffset() + getLength();
}
public int getDegree() {
代码示例来源:origin: chenlb/mmseg4j-core
/** Word Length */
public int getLen() {
if(len < 0) {
len = 0;
count = 0;
for(Word word : words) {
if(word != null) {
len += word.getLength();
count++;
}
}
}
return len;
}
代码示例来源:origin: chenlb/mmseg4j-core
private Word createWord(StringBuilder bufSentence, int startIdx, String type) {
return new Word(toChars(bufSentence), startIdx, type);
}
代码示例来源:origin: com.chenlb.mmseg4j/mmseg4j-core
private Chunk createChunk(Sentence sen, char[] chs, int[] tailLen, int[] offsets, CharNode[] cns/*, char[][] cks*/) {
Chunk ck = new Chunk();
for(int i=0; i<3; i++) {
if(offsets[i] < chs.length) {
ck.words[i] = new Word(chs, sen.getStartOffset(), offsets[i], tailLen[i]+1);//new Word(cks[i], sen.getStartOffset()+offsets[i]);
if(tailLen[i] == 0) { //单字的要取得"字频计算出自由度"
CharNode cn = cns[i]; //dic.head(chs[offsets[i]]);
if(cn !=null) {
ck.words[i].setDegree(cn.getFreq());
}
}
}
}
return ck;
}
代码示例来源:origin: medcl/elasticsearch-analysis-mmseg
/** Sum of Degree of Morphemic Freedom of One-Character */
public int getSumDegree() {
if(sumDegree < 0) {
int sum = 0;
for(Word word : words) {
if(word != null && word.getDegree() > -1) {
sum += word.getDegree();
}
}
sumDegree = sum;
}
return sumDegree;
}
代码示例来源:origin: medcl/elasticsearch-analysis-mmseg
@Override
public final boolean incrementToken() throws IOException {
clearAttributes();
Word word = mmSeg.get().next();
if(word != null) {
//lucene 3.0
//termAtt.setTermBuffer(word.getSen(), word.getWordOffset(), word.getLength());
//lucene 3.1
termAtt.copyBuffer(word.getSen(), word.getWordOffset(), word.getLength());
offsetAtt.setOffset(word.getStartOffset(), word.getEndOffset());
typeAtt.setType(word.getType());
return true;
} else {
end();
return false;
}
}
}
内容来源于网络,如有侵权,请联系作者删除!