本文整理了Java中org.apache.lucene.analysis.Token.termLength()
方法的一些代码示例,展示了Token.termLength()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Token.termLength()
方法的具体详情如下:
包路径:org.apache.lucene.analysis.Token
类名称:Token
方法名:termLength
[英]Length of term text in the buffer.
[中]缓冲区中术语文本的长度。
代码示例来源:origin: org.compass-project/compass
public String getTermText() {
return new String(token.termBuffer(), 0, token.termLength());
}
代码示例来源:origin: org.apache.lucene/com.springsource.org.apache.lucene
public final Token next(final Token reusableToken) throws IOException {
assert reusableToken != null;
Token nextToken = input.next(reusableToken);
if (nextToken != null) {
final char[] buffer = nextToken.termBuffer();
final int length = nextToken.termLength();
for(int i=0;i<length;i++)
buffer[i] = Character.toLowerCase(buffer[i]);
return nextToken;
} else
return null;
}
}
代码示例来源:origin: org.apache.lucene/lucene-core-jfrog
public final Token next(final Token reusableToken) throws IOException {
assert reusableToken != null;
Token nextToken = input.next(reusableToken);
if (nextToken != null) {
final char[] buffer = nextToken.termBuffer();
final int length = nextToken.termLength();
for(int i=0;i<length;i++)
buffer[i] = Character.toLowerCase(buffer[i]);
return nextToken;
} else
return null;
}
}
代码示例来源:origin: org.dspace.dependencies.solr/dspace-solr-core
/** True if token is for a common term. */
private boolean isCommon(Token token) {
return commonWords != null
&& commonWords.contains(token.termBuffer(), 0, token.termLength());
}
代码示例来源:origin: org.dspace.dependencies.solr/dspace-solr-core
private static List<String> splitByTokenizer(String source, TokenizerFactory tokFactory){
StringReader reader = new StringReader( source );
TokenStream ts = loadTokenizer(tokFactory, reader);
List<String> tokList = new ArrayList<String>();
try {
for( Token token = ts.next(); token != null; token = ts.next() ){
String text = new String(token.termBuffer(), 0, token.termLength());
if( text.length() > 0 )
tokList.add( text );
}
} catch (IOException e) {
throw new RuntimeException(e);
}
finally{
reader.close();
}
return tokList;
}
代码示例来源:origin: org.dspace.dependencies.solr/dspace-solr-core
String getAnalyzedQuery( String query ) throws IOException
{
if( analyzer == null ) {
return query;
}
StringBuilder norm = new StringBuilder();
TokenStream tokens = analyzer.reusableTokenStream( "", new StringReader( query ) );
tokens.reset();
Token token = tokens.next();
while( token != null ) {
norm.append( new String(token.termBuffer(), 0, token.termLength()) );
token = tokens.next();
}
return norm.toString();
}
代码示例来源:origin: org.apache.lucene/com.springsource.org.apache.lucene
/**
* Returns the next input Token whose term() is the right len
*/
public final Token next(final Token reusableToken) throws IOException
{
assert reusableToken != null;
// return the first non-stop word found
for (Token nextToken = input.next(reusableToken); nextToken != null; nextToken = input.next(reusableToken))
{
int len = nextToken.termLength();
if (len >= min && len <= max) {
return nextToken;
}
// note: else we ignore it but should we index each part of it?
}
// reached EOS -- return null
return null;
}
}
代码示例来源:origin: org.apache.lucene/lucene-core-jfrog
/**
* Returns the next input Token whose term() is the right len
*/
public final Token next(final Token reusableToken) throws IOException
{
assert reusableToken != null;
// return the first non-stop word found
for (Token nextToken = input.next(reusableToken); nextToken != null; nextToken = input.next(reusableToken))
{
int len = nextToken.termLength();
if (len >= min && len <= max) {
return nextToken;
}
// note: else we ignore it but should we index each part of it?
}
// reached EOS -- return null
return null;
}
}
代码示例来源:origin: org.dspace.dependencies.solr/dspace-solr-core
private Token newTok(Token orig, int start, int end) {
int startOff = orig.startOffset();
int endOff = orig.endOffset();
// if length by start + end offsets doesn't match the term text then assume
// this is a synonym and don't adjust the offsets.
if (orig.termLength() == endOff-startOff) {
endOff = startOff + end;
startOff += start;
}
return (Token)orig.clone(orig.termBuffer(), start, (end - start), startOff, endOff);
}
代码示例来源:origin: org.apache.lucene/lucene-core-jfrog
public final Token next(final Token reusableToken) throws java.io.IOException {
assert reusableToken != null;
Token nextToken = input.next(reusableToken);
if (nextToken != null) {
final char[] buffer = nextToken.termBuffer();
final int length = nextToken.termLength();
// If no characters actually require rewriting then we
// just return token as-is:
for(int i=0;i<length;i++) {
final char c = buffer[i];
if (c >= '\u00c0' && c <= '\uFB06') {
removeAccents(buffer, length);
nextToken.setTermBuffer(output, 0, outputPos);
break;
}
}
return nextToken;
} else
return null;
}
代码示例来源:origin: org.dspace.dependencies.solr/dspace-solr-core
private SynonymMap match(SynonymMap map) throws IOException {
SynonymMap result = null;
if (map.submap != null) {
Token tok = nextTok();
if (tok != null) {
// check for positionIncrement!=1? if>1, should not match, if==0, check multiple at this level?
SynonymMap subMap = map.submap.get(tok.termBuffer(), 0, tok.termLength());
if (subMap != null) {
// recurse
result = match(subMap);
}
if (result != null) {
matched.addFirst(tok);
} else {
// push back unmatched token
pushTok(tok);
}
}
}
// if no longer sequence matched, so if this node has synonyms, it's the match.
if (result==null && map.synonyms!=null) {
result = map;
}
return result;
}
代码示例来源:origin: org.apache.lucene/com.springsource.org.apache.lucene
public final Token next(final Token reusableToken) throws java.io.IOException {
assert reusableToken != null;
Token nextToken = input.next(reusableToken);
if (nextToken != null) {
final char[] buffer = nextToken.termBuffer();
final int length = nextToken.termLength();
// If no characters actually require rewriting then we
// just return token as-is:
for(int i=0;i<length;i++) {
final char c = buffer[i];
if (c >= '\u00c0' && c <= '\uFB06') {
removeAccents(buffer, length);
nextToken.setTermBuffer(output, 0, outputPos);
break;
}
}
return nextToken;
} else
return null;
}
代码示例来源:origin: org.apache.lucene/lucene-core-jfrog
public final Token next(final Token reusableToken) throws IOException {
assert reusableToken != null;
Token nextToken = input.next(reusableToken);
if (nextToken == null)
return null;
if (stemmer.stem(nextToken.termBuffer(), 0, nextToken.termLength()))
nextToken.setTermBuffer(stemmer.getResultBuffer(), 0, stemmer.getResultLength());
return nextToken;
}
}
代码示例来源:origin: org.apache.lucene/com.springsource.org.apache.lucene
public final Token next(final Token reusableToken) throws IOException {
assert reusableToken != null;
Token nextToken = input.next(reusableToken);
if (nextToken == null)
return null;
if (stemmer.stem(nextToken.termBuffer(), 0, nextToken.termLength()))
nextToken.setTermBuffer(stemmer.getResultBuffer(), 0, stemmer.getResultLength());
return nextToken;
}
}
代码示例来源:origin: ajermakovics/eclipse-instasearch
private void applyToken(Token token)
{
termAtt.setTermBuffer(token.termBuffer(), 0, token.termLength());
posAtt.setPositionIncrement(token.getPositionIncrement());
offsetAtt.setOffset(token.startOffset(), token.endOffset());
}
代码示例来源:origin: org.dspace.dependencies.solr/dspace-solr-core
protected Token process(Token t) throws IOException {
Token tok = read();
while (tok != null && tok.getPositionIncrement()==0) {
if (null != t) {
write(t);
t = null;
}
boolean dup=false;
for (Token outTok : output()) {
int tokLen = tok.termLength();
if (outTok.termLength() == tokLen && ArraysUtils.equals(outTok.termBuffer(), 0, tok.termBuffer(), 0, tokLen)) {
dup=true;
//continue;;
}
}
if (!dup){
write(tok);
}
tok = read();
}
if (tok != null) {
pushBack(tok);
}
return t;
}
}
代码示例来源:origin: org.apache.lucene/lucene-core-jfrog
final int bufferLength = nextToken.termLength();
final String type = nextToken.type();
代码示例来源:origin: org.apache.lucene/com.springsource.org.apache.lucene
/**
* Returns the next input Token whose term() is not a stop word.
*/
public final Token next(final Token reusableToken) throws IOException {
assert reusableToken != null;
// return the first non-stop word found
int skippedPositions = 0;
for (Token nextToken = input.next(reusableToken); nextToken != null; nextToken = input.next(reusableToken)) {
if (!stopWords.contains(nextToken.termBuffer(), 0, nextToken.termLength())) {
if (enablePositionIncrements) {
nextToken.setPositionIncrement(nextToken.getPositionIncrement() + skippedPositions);
}
return nextToken;
}
skippedPositions += nextToken.getPositionIncrement();
}
// reached EOS -- return null
return null;
}
代码示例来源:origin: org.apache.lucene/lucene-core-jfrog
/**
* Returns the next input Token whose term() is not a stop word.
*/
public final Token next(final Token reusableToken) throws IOException {
assert reusableToken != null;
// return the first non-stop word found
int skippedPositions = 0;
for (Token nextToken = input.next(reusableToken); nextToken != null; nextToken = input.next(reusableToken)) {
if (!stopWords.contains(nextToken.termBuffer(), 0, nextToken.termLength())) {
if (enablePositionIncrements) {
nextToken.setPositionIncrement(nextToken.getPositionIncrement() + skippedPositions);
}
return nextToken;
}
skippedPositions += nextToken.getPositionIncrement();
}
// reached EOS -- return null
return null;
}
代码示例来源:origin: org.dspace.dependencies.solr/dspace-solr-core
static NamedList<NamedList<Object>> getTokens(TokenStream tstream) throws IOException {
// outer is namedList since order of tokens is important
NamedList<NamedList<Object>> tokens = new NamedList<NamedList<Object>>();
Token t = null;
while (((t = tstream.next()) != null)) {
NamedList<Object> token = new SimpleOrderedMap<Object>();
tokens.add("token", token);
token.add("value", new String(t.termBuffer(), 0, t.termLength()));
token.add("start", t.startOffset());
token.add("end", t.endOffset());
token.add("posInc", t.getPositionIncrement());
token.add("type", t.type());
//TODO: handle payloads
}
return tokens;
}
内容来源于网络,如有侵权,请联系作者删除!