com.chenlb.mmseg4j.Word类的使用及代码示例

x33g5p2x  于2022-02-03 转载在 其他  
字(5.5k)|赞(0)|评价(0)|浏览(170)

本文整理了Java中com.chenlb.mmseg4j.Word类的一些代码示例,展示了Word类的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Word类的具体详情如下:
包路径:com.chenlb.mmseg4j.Word
类名称:Word

Word介绍

[英]类似 lucene 的 token
[中]类似 卢森的 代币

代码示例

代码示例来源:origin: looly/hutool

@Override
public int getStartOffset() {
  return this.word.getStartOffset();
}

代码示例来源:origin: looly/hutool

@Override
public String getText() {
  return word.getString();
}

代码示例来源:origin: looly/hutool

@Override
public int getEndOffset() {
  return this.word.getEndOffset();
}

代码示例来源:origin: com.chenlb.mmseg4j/mmseg4j-solr

@Override
  public final boolean incrementToken() throws IOException {
    clearAttributes();
    Word word = mmSeg.next();
    if(word != null) {
      termAtt.copyBuffer(word.getSen(), word.getWordOffset(), word.getLength());
      offsetAtt.setOffset(word.getStartOffset(), word.getEndOffset());
      typeAtt.setType(word.getType());
      return true;
    } else {
      return false;
    }
  }
}

代码示例来源:origin: medcl/elasticsearch-analysis-mmseg

Word word = chunk.words[i];
if(word.getLength() < 3) {
  cks.add(word);
} else {
  char[] chs = word.getSen();
  int offset = word.getWordOffset(), n = 0, wordEnd = word.getWordOffset()+word.getLength();
  int senStartOffset = word.getStartOffset() - offset;	//sen 在文件中的位置
  int end = -1;	//上一次找到的位置
  for(; offset<wordEnd-1; offset++) {
    int idx = search(chs, offset, 1);
    if(idx > -1) {
      cks.add(new Word(chs, senStartOffset, offset, 2));
      end = offset+2;
      n++;
    } else if(offset >= end) {	//有单字
      cks.add(new Word(chs, senStartOffset, offset, 1));
      end = offset+1;
    cks.add(new Word(chs, senStartOffset, offset, 1));

代码示例来源:origin: medcl/elasticsearch-analysis-mmseg

public String getString() {
  return new String(getSen(), getWordOffset(), getLength());
}

代码示例来源:origin: com.chenlb.mmseg4j/mmseg4j-core

public int getEndOffset() {
  return getStartOffset() + getLength();
}
public int getDegree() {

代码示例来源:origin: medcl/elasticsearch-analysis-mmseg

/** Word Length */
public int getLen() {
  if(len < 0) {
    len = 0;
    count = 0;
    for(Word word : words) {
      if(word != null) {
        len += word.getLength();
        count++;
      }
    }
  }
  return len;
}

代码示例来源:origin: medcl/elasticsearch-analysis-mmseg

private Word createWord(StringBuilder bufSentence, int startIdx, String type) {
  return new Word(toChars(bufSentence), startIdx, type);
}

代码示例来源:origin: medcl/elasticsearch-analysis-mmseg

private Chunk createChunk(Sentence sen, char[] chs, int[] tailLen, int[] offsets, CharNode[] cns/*, char[][] cks*/) {
  Chunk ck = new Chunk();
  
  for(int i=0; i<3; i++) {
    if(offsets[i] < chs.length) {
      ck.words[i] = new Word(chs, sen.getStartOffset(), offsets[i], tailLen[i]+1);//new Word(cks[i], sen.getStartOffset()+offsets[i]);
      if(tailLen[i] == 0) {	//单字的要取得"字频计算出自由度"
        CharNode cn = cns[i];    //dic.head(chs[offsets[i]]);
        if(cn !=null) {
          ck.words[i].setDegree(cn.getFreq());
        }
      }
    }
  }
  return ck;
}

代码示例来源:origin: com.chenlb.mmseg4j/mmseg4j-core

/** Sum of Degree of Morphemic Freedom of One-Character */
public int getSumDegree() {
  if(sumDegree < 0) {
    int sum = 0;
    for(Word word : words) {
      if(word != null && word.getDegree() > -1) {
        sum += word.getDegree();
      }
    }
    sumDegree = sum;
  }
  return sumDegree;
}

代码示例来源:origin: chenlb/mmseg4j-solr

@Override
  public final boolean incrementToken() throws IOException {
    clearAttributes();
    Word word = mmSeg.next();
    if(word != null) {
      termAtt.copyBuffer(word.getSen(), word.getWordOffset(), word.getLength());
      offsetAtt.setOffset(word.getStartOffset(), word.getEndOffset());
      typeAtt.setType(word.getType());
      return true;
    } else {
      return false;
    }
  }
}

代码示例来源:origin: chenlb/mmseg4j-core

Word word = chunk.words[i];
if(word.getLength() < 3) {
  cks.add(word);
} else {
  char[] chs = word.getSen();
  int offset = word.getWordOffset(), n = 0, wordEnd = word.getWordOffset()+word.getLength();
  int senStartOffset = word.getStartOffset() - offset;	//sen 在文件中的位置
  int end = -1;	//上一次找到的位置
  for(; offset<wordEnd-1; offset++) {
    int idx = search(chs, offset, 1);
    if(idx > -1) {
      cks.add(new Word(chs, senStartOffset, offset, 2));
      end = offset+2;
      n++;
    } else if(offset >= end) {	//有单字
      cks.add(new Word(chs, senStartOffset, offset, 1));
      end = offset+1;
    cks.add(new Word(chs, senStartOffset, offset, 1));

代码示例来源:origin: chenlb/mmseg4j-core

public String getString() {
  return new String(getSen(), getWordOffset(), getLength());
}

代码示例来源:origin: chenlb/mmseg4j-core

public int getEndOffset() {
  return getStartOffset() + getLength();
}
public int getDegree() {

代码示例来源:origin: chenlb/mmseg4j-core

/** Word Length */
public int getLen() {
  if(len < 0) {
    len = 0;
    count = 0;
    for(Word word : words) {
      if(word != null) {
        len += word.getLength();
        count++;
      }
    }
  }
  return len;
}

代码示例来源:origin: chenlb/mmseg4j-core

private Word createWord(StringBuilder bufSentence, int startIdx, String type) {
  return new Word(toChars(bufSentence), startIdx, type);
}

代码示例来源:origin: com.chenlb.mmseg4j/mmseg4j-core

private Chunk createChunk(Sentence sen, char[] chs, int[] tailLen, int[] offsets, CharNode[] cns/*, char[][] cks*/) {
  Chunk ck = new Chunk();
  
  for(int i=0; i<3; i++) {
    if(offsets[i] < chs.length) {
      ck.words[i] = new Word(chs, sen.getStartOffset(), offsets[i], tailLen[i]+1);//new Word(cks[i], sen.getStartOffset()+offsets[i]);
      if(tailLen[i] == 0) {	//单字的要取得"字频计算出自由度"
        CharNode cn = cns[i];    //dic.head(chs[offsets[i]]);
        if(cn !=null) {
          ck.words[i].setDegree(cn.getFreq());
        }
      }
    }
  }
  return ck;
}

代码示例来源:origin: medcl/elasticsearch-analysis-mmseg

/** Sum of Degree of Morphemic Freedom of One-Character */
public int getSumDegree() {
  if(sumDegree < 0) {
    int sum = 0;
    for(Word word : words) {
      if(word != null && word.getDegree() > -1) {
        sum += word.getDegree();
      }
    }
    sumDegree = sum;
  }
  return sumDegree;
}

代码示例来源:origin: medcl/elasticsearch-analysis-mmseg

@Override
  public final boolean incrementToken() throws IOException {
    clearAttributes();
    Word word = mmSeg.get().next();
    if(word != null) {
      //lucene 3.0
      //termAtt.setTermBuffer(word.getSen(), word.getWordOffset(), word.getLength());
      //lucene 3.1
      termAtt.copyBuffer(word.getSen(), word.getWordOffset(), word.getLength());
      offsetAtt.setOffset(word.getStartOffset(), word.getEndOffset());
      typeAtt.setType(word.getType());
      return true;
    } else {
      end();
      return false;
    }
  }
}

相关文章