本文整理了Java中org.apache.lucene.util.automaton.Operations.determinize()
方法的一些代码示例,展示了Operations.determinize()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Operations.determinize()
方法的具体详情如下:
包路径:org.apache.lucene.util.automaton.Operations
类名称:Operations
方法名:determinize
[英]Determinizes the given automaton.
Worst case complexity: exponential in number of states.
[中]确定给定的自动机。
最坏情况复杂性:状态数呈指数增长。
代码示例来源:origin: org.apache.lucene/lucene-core
public GraphTokenStreamFiniteStrings(TokenStream in) throws IOException {
Automaton aut = build(in);
this.det = Operations.removeDeadStates(Operations.determinize(aut, DEFAULT_MAX_DETERMINIZED_STATES));
}
代码示例来源:origin: org.apache.lucene/lucene-core
/**
* Returns the longest BytesRef that is a suffix of all accepted strings.
* Worst case complexity: exponential in number of states (this calls
* determinize).
* @param maxDeterminizedStates maximum number of states determinizing the
* automaton can result in. Set higher to allow more complex queries and
* lower to prevent memory exhaustion.
* @return common suffix, which can be an empty (length 0) BytesRef (never null)
*/
public static BytesRef getCommonSuffixBytesRef(Automaton a, int maxDeterminizedStates) {
// reverse the language of the automaton, then reverse its common prefix.
Automaton r = Operations.determinize(reverse(a), maxDeterminizedStates);
BytesRef ref = getCommonPrefixBytesRef(r);
reverseBytes(ref);
return ref;
}
代码示例来源:origin: org.apache.lucene/lucene-core
a = Operations.determinize(a, maxDeterminizedStates);
this.automaton = a;
points = a.getStartPoints();
代码示例来源:origin: org.apache.lucene/lucene-core
/**
* Returns a (deterministic) automaton that accepts the complement of the
* language of the given automaton.
* <p>
* Complexity: linear in number of states if already deterministic and
* exponential otherwise.
* @param maxDeterminizedStates maximum number of states determinizing the
* automaton can result in. Set higher to allow more complex queries and
* lower to prevent memory exhaustion.
*/
static public Automaton complement(Automaton a, int maxDeterminizedStates) {
a = totalize(determinize(a, maxDeterminizedStates));
int numStates = a.getNumStates();
for (int p=0;p<numStates;p++) {
a.setAccept(p, !a.isAccept(p));
}
return removeDeadStates(a);
}
代码示例来源:origin: org.apache.lucene/lucene-core
a = Operations.determinize(a, maxDeterminizedStates);
代码示例来源:origin: org.elasticsearch/elasticsearch
protected Automaton convertAutomaton(Automaton a) {
if (queryPrefix != null) {
a = Operations.concatenate(Arrays.asList(queryPrefix, a));
// This automaton should not blow up during determinize:
a = Operations.determinize(a, Integer.MAX_VALUE);
}
return a;
}
代码示例来源:origin: org.apache.lucene/lucene-core
automaton = Operations.determinize(automaton, maxDeterminizedStates);
代码示例来源:origin: org.apache.lucene/lucene-analyzers-common
/** Creates a new SimpleSplitPatternTokenizerFactory */
public SimplePatternSplitTokenizerFactory(Map<String,String> args) {
super(args);
maxDeterminizedStates = getInt(args, "maxDeterminizedStates", Operations.DEFAULT_MAX_DETERMINIZED_STATES);
dfa = Operations.determinize(new RegExp(require(args, PATTERN)).toAutomaton(), maxDeterminizedStates);
if (args.isEmpty() == false) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
代码示例来源:origin: org.apache.lucene/lucene-analyzers-common
/** Creates a new SimplePatternTokenizerFactory */
public SimplePatternTokenizerFactory(Map<String,String> args) {
super(args);
maxDeterminizedStates = getInt(args, "maxDeterminizedStates", Operations.DEFAULT_MAX_DETERMINIZED_STATES);
dfa = Operations.determinize(new RegExp(require(args, PATTERN)).toAutomaton(), maxDeterminizedStates);
if (args.isEmpty() == false) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
代码示例来源:origin: org.elasticsearch/elasticsearch
final Automaton toLookupAutomaton(final CharSequence key) throws IOException {
// TODO: is there a Reader from a CharSequence?
// Turn tokenstream into automaton:
Automaton automaton = null;
try (TokenStream ts = queryAnalyzer.tokenStream("", key.toString())) {
automaton = getTokenStreamToAutomaton().toAutomaton(ts);
}
automaton = replaceSep(automaton);
// TODO: we can optimize this somewhat by determinizing
// while we convert
// This automaton should not blow up during determinize:
automaton = Operations.determinize(automaton, Integer.MAX_VALUE);
return automaton;
}
代码示例来源:origin: org.elasticsearch/elasticsearch
@Override
protected Automaton convertAutomaton(Automaton a) {
if (unicodeAware) {
// FLORIAN EDIT: get converted Automaton from superclass
Automaton utf8automaton = new UTF32ToUTF8().convert(super.convertAutomaton(a));
// This automaton should not blow up during determinize:
utf8automaton = Operations.determinize(utf8automaton, Integer.MAX_VALUE);
return utf8automaton;
} else {
return super.convertAutomaton(a);
}
}
代码示例来源:origin: org.elasticsearch/elasticsearch
return Operations.determinize(a, DEFAULT_MAX_DETERMINIZED_STATES);
代码示例来源:origin: org.apache.lucene/lucene-analyzers-common
/**
* Converts the tokenStream to an automaton. Does *not* close it.
*/
public Automaton toAutomaton(boolean unicodeAware) throws IOException {
// TODO refactor this
// maybe we could hook up a modified automaton from TermAutomatonQuery here?
// Create corresponding automaton: labels are bytes
// from each analyzed token, with byte 0 used as
// separator between tokens:
final TokenStreamToAutomaton tsta;
if (preserveSep) {
tsta = new EscapingTokenStreamToAutomaton(SEP_LABEL);
} else {
// When we're not preserving sep, we don't steal 0xff
// byte, so we don't need to do any escaping:
tsta = new TokenStreamToAutomaton();
}
tsta.setPreservePositionIncrements(preservePositionIncrements);
tsta.setUnicodeArcs(unicodeAware);
Automaton automaton = tsta.toAutomaton(inputTokenStream);
// TODO: we can optimize this somewhat by determinizing
// while we convert
automaton = replaceSep(automaton, preserveSep, SEP_LABEL);
// This automaton should not blow up during determinize:
return Operations.determinize(automaton, maxGraphExpansions);
}
代码示例来源:origin: apache/servicemix-bundles
protected Automaton convertAutomaton(Automaton a) {
if (queryPrefix != null) {
a = Operations.concatenate(Arrays.asList(queryPrefix, a));
// This automaton should not blow up during determinize:
a = Operations.determinize(a, Integer.MAX_VALUE);
}
return a;
}
代码示例来源:origin: com.strapdata.elasticsearch/elasticsearch
protected Automaton convertAutomaton(Automaton a) {
if (queryPrefix != null) {
a = Operations.concatenate(Arrays.asList(queryPrefix, a));
// This automaton should not blow up during determinize:
a = Operations.determinize(a, Integer.MAX_VALUE);
}
return a;
}
代码示例来源:origin: org.apache.servicemix.bundles/org.apache.servicemix.bundles.elasticsearch
protected Automaton convertAutomaton(Automaton a) {
if (queryPrefix != null) {
a = Operations.concatenate(Arrays.asList(queryPrefix, a));
// This automaton should not blow up during determinize:
a = Operations.determinize(a, Integer.MAX_VALUE);
}
return a;
}
代码示例来源:origin: harbby/presto-connectors
@Override
protected Automaton convertAutomaton(Automaton a) {
if (unicodeAware) {
Automaton utf8automaton = new UTF32ToUTF8().convert(a);
utf8automaton = Operations.determinize(utf8automaton, DEFAULT_MAX_DETERMINIZED_STATES);
return utf8automaton;
} else {
return a;
}
}
代码示例来源:origin: org.apache.servicemix.bundles/org.apache.servicemix.bundles.elasticsearch
@Override
protected Automaton convertAutomaton(Automaton a) {
if (unicodeAware) {
// FLORIAN EDIT: get converted Automaton from superclass
Automaton utf8automaton = new UTF32ToUTF8().convert(super.convertAutomaton(a));
// This automaton should not blow up during determinize:
utf8automaton = Operations.determinize(utf8automaton, Integer.MAX_VALUE);
return utf8automaton;
} else {
return super.convertAutomaton(a);
}
}
代码示例来源:origin: com.strapdata.elasticsearch/elasticsearch
@Override
protected Automaton convertAutomaton(Automaton a) {
if (unicodeAware) {
// FLORIAN EDIT: get converted Automaton from superclass
Automaton utf8automaton = new UTF32ToUTF8().convert(super.convertAutomaton(a));
// This automaton should not blow up during determinize:
utf8automaton = Operations.determinize(utf8automaton, Integer.MAX_VALUE);
return utf8automaton;
} else {
return super.convertAutomaton(a);
}
}
代码示例来源:origin: wikimedia/search-highlighter
private Factory(String regexString, int maxDeterminizedStates) {
Automaton automaton = new RegExp(regexString).toAutomaton(maxDeterminizedStates);
forward = new OffsetReturningRunAutomaton(automaton, false);
if (hasLeadingWildcard(automaton)) {
Automaton reversed = Operations.determinize(Operations.reverse(
new RegExp("(" + regexString + ").*").toAutomaton(maxDeterminizedStates)), maxDeterminizedStates);
reverse = new AcceptReturningReverseRunAutomaton(reversed);
} else {
reverse = null;
}
}
内容来源于网络,如有侵权,请联系作者删除!