org.apache.lucene.analysis.Token.termBuffer()方法的使用及代码示例

x33g5p2x  于2022-01-30 转载在 其他  
字(9.2k)|赞(0)|评价(0)|浏览(148)

本文整理了Java中org.apache.lucene.analysis.Token.termBuffer()方法的一些代码示例,展示了Token.termBuffer()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Token.termBuffer()方法的具体详情如下:
包路径:org.apache.lucene.analysis.Token
类名称:Token
方法名:termBuffer

Token.termBuffer介绍

[英]Characters for the term text.
[中]术语文本的字符。

代码示例

代码示例来源:origin: org.dspace.dependencies.solr/dspace-solr-core

private int tokType(Token t) {
 return charType(t.termBuffer()[0]);
}

代码示例来源:origin: org.compass-project/compass

public String getTermText() {
  return new String(token.termBuffer(), 0, token.termLength());
}

代码示例来源:origin: org.apache.lucene/com.springsource.org.apache.lucene

public final Token next(final Token reusableToken) throws IOException {
  assert reusableToken != null;
  Token nextToken = input.next(reusableToken);
  if (nextToken != null) {

   final char[] buffer = nextToken.termBuffer();
   final int length = nextToken.termLength();
   for(int i=0;i<length;i++)
    buffer[i] = Character.toLowerCase(buffer[i]);

   return nextToken;
  } else
   return null;
 }
}

代码示例来源:origin: org.apache.lucene/lucene-core-jfrog

public final Token next(final Token reusableToken) throws IOException {
  assert reusableToken != null;
  Token nextToken = input.next(reusableToken);
  if (nextToken != null) {

   final char[] buffer = nextToken.termBuffer();
   final int length = nextToken.termLength();
   for(int i=0;i<length;i++)
    buffer[i] = Character.toLowerCase(buffer[i]);

   return nextToken;
  } else
   return null;
 }
}

代码示例来源:origin: org.dspace.dependencies.solr/dspace-solr-core

/** True if token is for a common term. */
private boolean isCommon(Token token) {
 return commonWords != null
   && commonWords.contains(token.termBuffer(), 0, token.termLength());
}

代码示例来源:origin: org.dspace.dependencies.solr/dspace-solr-core

private static List<String> splitByTokenizer(String source, TokenizerFactory tokFactory){
 StringReader reader = new StringReader( source );
 TokenStream ts = loadTokenizer(tokFactory, reader);
 List<String> tokList = new ArrayList<String>();
 try {
  for( Token token = ts.next(); token != null; token = ts.next() ){
   String text = new String(token.termBuffer(), 0, token.termLength());
   if( text.length() > 0 )
    tokList.add( text );
  }
 } catch (IOException e) {
  throw new RuntimeException(e);
 }
 finally{
  reader.close();
 }
 return tokList;
}

代码示例来源:origin: org.dspace.dependencies.solr/dspace-solr-core

String getAnalyzedQuery( String query ) throws IOException
{
 if( analyzer == null ) {
  return query;
 }
 StringBuilder norm = new StringBuilder();
 TokenStream tokens = analyzer.reusableTokenStream( "", new StringReader( query ) );
 tokens.reset();
 
 Token token = tokens.next();
 while( token != null ) {
  norm.append( new String(token.termBuffer(), 0, token.termLength()) );
  token = tokens.next();
 }
 return norm.toString();
}

代码示例来源:origin: org.apache.lucene/com.springsource.org.apache.lucene

public Token next(final Token reusableToken) throws IOException {
 assert reusableToken != null;
 if (!done) {
  done = true;
  int upto = 0;
  reusableToken.clear();
  char[] buffer = reusableToken.termBuffer();
  while (true) {
   final int length = input.read(buffer, upto, buffer.length-upto);
   if (length == -1) break;
   upto += length;
   if (upto == buffer.length)
    buffer = reusableToken.resizeTermBuffer(1+buffer.length);
  }
  reusableToken.setTermLength(upto);
  return reusableToken;
 }
 return null;
}

代码示例来源:origin: org.apache.lucene/lucene-core-jfrog

public Token next(final Token reusableToken) throws IOException {
 assert reusableToken != null;
 if (!done) {
  done = true;
  int upto = 0;
  reusableToken.clear();
  char[] buffer = reusableToken.termBuffer();
  while (true) {
   final int length = input.read(buffer, upto, buffer.length-upto);
   if (length == -1) break;
   upto += length;
   if (upto == buffer.length)
    buffer = reusableToken.resizeTermBuffer(1+buffer.length);
  }
  reusableToken.setTermLength(upto);
  return reusableToken;
 }
 return null;
}

代码示例来源:origin: org.dspace.dependencies.solr/dspace-solr-core

private Token newTok(Token orig, int start, int end) {
 int startOff = orig.startOffset();
 int endOff = orig.endOffset();
 // if length by start + end offsets doesn't match the term text then assume
 // this is a synonym and don't adjust the offsets.
 if (orig.termLength() == endOff-startOff) {
  endOff = startOff + end;
  startOff += start;     
 }
 return (Token)orig.clone(orig.termBuffer(), start, (end - start), startOff, endOff);
}

代码示例来源:origin: org.apache.lucene/lucene-core-jfrog

public final Token next(final Token reusableToken) throws java.io.IOException {
 assert reusableToken != null;
 Token nextToken = input.next(reusableToken);
 if (nextToken != null) {
  final char[] buffer = nextToken.termBuffer();
  final int length = nextToken.termLength();
  // If no characters actually require rewriting then we
  // just return token as-is:
  for(int i=0;i<length;i++) {
   final char c = buffer[i];
   if (c >= '\u00c0' && c <= '\uFB06') {
    removeAccents(buffer, length);
    nextToken.setTermBuffer(output, 0, outputPos);
    break;
   }
  }
  return nextToken;
 } else
  return null;
}

代码示例来源:origin: org.dspace.dependencies.solr/dspace-solr-core

private SynonymMap match(SynonymMap map) throws IOException {
 SynonymMap result = null;
 if (map.submap != null) {
  Token tok = nextTok();
  if (tok != null) {
   // check for positionIncrement!=1?  if>1, should not match, if==0, check multiple at this level?
   SynonymMap subMap = map.submap.get(tok.termBuffer(), 0, tok.termLength());
   if (subMap != null) {
    // recurse
    result = match(subMap);
   }
   if (result != null) {
    matched.addFirst(tok);
   } else {
    // push back unmatched token
    pushTok(tok);
   }
  }
 }
 // if no longer sequence matched, so if this node has synonyms, it's the match.
 if (result==null && map.synonyms!=null) {
  result = map;
 }
 return result;
}

代码示例来源:origin: org.apache.lucene/com.springsource.org.apache.lucene

public final Token next(final Token reusableToken) throws java.io.IOException {
 assert reusableToken != null;
 Token nextToken = input.next(reusableToken);
 if (nextToken != null) {
  final char[] buffer = nextToken.termBuffer();
  final int length = nextToken.termLength();
  // If no characters actually require rewriting then we
  // just return token as-is:
  for(int i=0;i<length;i++) {
   final char c = buffer[i];
   if (c >= '\u00c0' && c <= '\uFB06') {
    removeAccents(buffer, length);
    nextToken.setTermBuffer(output, 0, outputPos);
    break;
   }
  }
  return nextToken;
 } else
  return null;
}

代码示例来源:origin: org.apache.lucene/lucene-core-jfrog

public final Token next(final Token reusableToken) throws IOException {
  assert reusableToken != null;
  Token nextToken = input.next(reusableToken);
  if (nextToken == null)
   return null;

  if (stemmer.stem(nextToken.termBuffer(), 0, nextToken.termLength()))
   nextToken.setTermBuffer(stemmer.getResultBuffer(), 0, stemmer.getResultLength());
  return nextToken;
 }
}

代码示例来源:origin: org.apache.lucene/com.springsource.org.apache.lucene

public final Token next(final Token reusableToken) throws IOException {
  assert reusableToken != null;
  Token nextToken = input.next(reusableToken);
  if (nextToken == null)
   return null;

  if (stemmer.stem(nextToken.termBuffer(), 0, nextToken.termLength()))
   nextToken.setTermBuffer(stemmer.getResultBuffer(), 0, stemmer.getResultLength());
  return nextToken;
 }
}

代码示例来源:origin: ajermakovics/eclipse-instasearch

private void applyToken(Token token)
{
  termAtt.setTermBuffer(token.termBuffer(), 0, token.termLength());
  posAtt.setPositionIncrement(token.getPositionIncrement());
  offsetAtt.setOffset(token.startOffset(), token.endOffset());
}

代码示例来源:origin: org.dspace.dependencies.solr/dspace-solr-core

protected Token process(Token t) throws IOException {
  Token tok = read();
  while (tok != null && tok.getPositionIncrement()==0) {
   if (null != t) {
    write(t);
    t = null;
   }
   boolean dup=false;
   for (Token outTok : output()) {
    int tokLen = tok.termLength();
    if (outTok.termLength() == tokLen && ArraysUtils.equals(outTok.termBuffer(), 0, tok.termBuffer(), 0, tokLen)) {
     dup=true;
     //continue;;
    }
   }
   if (!dup){
    write(tok);
   }
   tok = read();
  }
  if (tok != null) {
   pushBack(tok);
  }
  return t;
 }
}

代码示例来源:origin: org.apache.lucene/com.springsource.org.apache.lucene

/**
 * Returns the next input Token whose term() is not a stop word.
 */
public final Token next(final Token reusableToken) throws IOException {
 assert reusableToken != null;
 // return the first non-stop word found
 int skippedPositions = 0;
 for (Token nextToken = input.next(reusableToken); nextToken != null; nextToken = input.next(reusableToken)) {
  if (!stopWords.contains(nextToken.termBuffer(), 0, nextToken.termLength())) {
   if (enablePositionIncrements) {
    nextToken.setPositionIncrement(nextToken.getPositionIncrement() + skippedPositions);
   }
   return nextToken;
  }
  skippedPositions += nextToken.getPositionIncrement();
 }
 // reached EOS -- return null
 return null;
}

代码示例来源:origin: org.apache.lucene/lucene-core-jfrog

/**
 * Returns the next input Token whose term() is not a stop word.
 */
public final Token next(final Token reusableToken) throws IOException {
 assert reusableToken != null;
 // return the first non-stop word found
 int skippedPositions = 0;
 for (Token nextToken = input.next(reusableToken); nextToken != null; nextToken = input.next(reusableToken)) {
  if (!stopWords.contains(nextToken.termBuffer(), 0, nextToken.termLength())) {
   if (enablePositionIncrements) {
    nextToken.setPositionIncrement(nextToken.getPositionIncrement() + skippedPositions);
   }
   return nextToken;
  }
  skippedPositions += nextToken.getPositionIncrement();
 }
 // reached EOS -- return null
 return null;
}

代码示例来源:origin: org.dspace.dependencies.solr/dspace-solr-core

static NamedList<NamedList<Object>> getTokens(TokenStream tstream) throws IOException {
 // outer is namedList since order of tokens is important
 NamedList<NamedList<Object>> tokens = new NamedList<NamedList<Object>>();
 Token t = null;
 while (((t = tstream.next()) != null)) {
  NamedList<Object> token = new SimpleOrderedMap<Object>();
  tokens.add("token", token);
  token.add("value", new String(t.termBuffer(), 0, t.termLength()));
  token.add("start", t.startOffset());
  token.add("end", t.endOffset());
  token.add("posInc", t.getPositionIncrement());
  token.add("type", t.type());
  //TODO: handle payloads
 }
 return tokens;
}

相关文章