org.apache.lucene.util.automaton.Operations.determinize()方法的使用及代码示例

x33g5p2x  于2022-01-26 转载在 其他  
字(8.2k)|赞(0)|评价(0)|浏览(141)

本文整理了Java中org.apache.lucene.util.automaton.Operations.determinize()方法的一些代码示例,展示了Operations.determinize()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Operations.determinize()方法的具体详情如下:
包路径:org.apache.lucene.util.automaton.Operations
类名称:Operations
方法名:determinize

Operations.determinize介绍

[英]Determinizes the given automaton.

Worst case complexity: exponential in number of states.
[中]确定给定的自动机。
最坏情况复杂性:状态数呈指数增长。

代码示例

代码示例来源:origin: org.apache.lucene/lucene-core

public GraphTokenStreamFiniteStrings(TokenStream in) throws IOException {
 Automaton aut = build(in);
 this.det = Operations.removeDeadStates(Operations.determinize(aut, DEFAULT_MAX_DETERMINIZED_STATES));
}

代码示例来源:origin: org.apache.lucene/lucene-core

/**
 * Returns the longest BytesRef that is a suffix of all accepted strings.
 * Worst case complexity: exponential in number of states (this calls
 * determinize).
 * @param maxDeterminizedStates maximum number of states determinizing the
 *  automaton can result in.  Set higher to allow more complex queries and
 *  lower to prevent memory exhaustion.
 * @return common suffix, which can be an empty (length 0) BytesRef (never null)
 */
public static BytesRef getCommonSuffixBytesRef(Automaton a, int maxDeterminizedStates) {
 // reverse the language of the automaton, then reverse its common prefix.
 Automaton r = Operations.determinize(reverse(a), maxDeterminizedStates);
 BytesRef ref = getCommonPrefixBytesRef(r);
 reverseBytes(ref);
 return ref;
}

代码示例来源:origin: org.apache.lucene/lucene-core

a = Operations.determinize(a, maxDeterminizedStates);
this.automaton = a;
points = a.getStartPoints();

代码示例来源:origin: org.apache.lucene/lucene-core

/**
 * Returns a (deterministic) automaton that accepts the complement of the
 * language of the given automaton.
 * <p>
 * Complexity: linear in number of states if already deterministic and
 *  exponential otherwise.
 * @param maxDeterminizedStates maximum number of states determinizing the
 *  automaton can result in.  Set higher to allow more complex queries and
 *  lower to prevent memory exhaustion.
 */
static public Automaton complement(Automaton a, int maxDeterminizedStates) {
 a = totalize(determinize(a, maxDeterminizedStates));
 int numStates = a.getNumStates();
 for (int p=0;p<numStates;p++) {
  a.setAccept(p, !a.isAccept(p));
 }
 return removeDeadStates(a);
}

代码示例来源:origin: org.apache.lucene/lucene-core

a = Operations.determinize(a, maxDeterminizedStates);

代码示例来源:origin: org.elasticsearch/elasticsearch

protected Automaton convertAutomaton(Automaton a) {
 if (queryPrefix != null) {
  a = Operations.concatenate(Arrays.asList(queryPrefix, a));
  // This automaton should not blow up during determinize:
  a = Operations.determinize(a, Integer.MAX_VALUE);
 }
 return a;
}

代码示例来源:origin: org.apache.lucene/lucene-core

automaton = Operations.determinize(automaton, maxDeterminizedStates);

代码示例来源:origin: org.apache.lucene/lucene-analyzers-common

/** Creates a new SimpleSplitPatternTokenizerFactory */
public SimplePatternSplitTokenizerFactory(Map<String,String> args) {
 super(args);
 maxDeterminizedStates = getInt(args, "maxDeterminizedStates", Operations.DEFAULT_MAX_DETERMINIZED_STATES);
 dfa = Operations.determinize(new RegExp(require(args, PATTERN)).toAutomaton(), maxDeterminizedStates);
 if (args.isEmpty() == false) {
  throw new IllegalArgumentException("Unknown parameters: " + args);
 }
}

代码示例来源:origin: org.apache.lucene/lucene-analyzers-common

/** Creates a new SimplePatternTokenizerFactory */
public SimplePatternTokenizerFactory(Map<String,String> args) {
 super(args);
 maxDeterminizedStates = getInt(args, "maxDeterminizedStates", Operations.DEFAULT_MAX_DETERMINIZED_STATES);
 dfa = Operations.determinize(new RegExp(require(args, PATTERN)).toAutomaton(), maxDeterminizedStates);
 if (args.isEmpty() == false) {
  throw new IllegalArgumentException("Unknown parameters: " + args);
 }
}

代码示例来源:origin: org.elasticsearch/elasticsearch

final Automaton toLookupAutomaton(final CharSequence key) throws IOException {
  // TODO: is there a Reader from a CharSequence?
  // Turn tokenstream into automaton:
  Automaton automaton = null;
  try (TokenStream ts = queryAnalyzer.tokenStream("", key.toString())) {
    automaton = getTokenStreamToAutomaton().toAutomaton(ts);
  }
  automaton = replaceSep(automaton);
  // TODO: we can optimize this somewhat by determinizing
  // while we convert
  // This automaton should not blow up during determinize:
  automaton = Operations.determinize(automaton, Integer.MAX_VALUE);
  return automaton;
}

代码示例来源:origin: org.elasticsearch/elasticsearch

@Override
protected Automaton convertAutomaton(Automaton a) {
 if (unicodeAware) {
  // FLORIAN EDIT: get converted Automaton from superclass
  Automaton utf8automaton = new UTF32ToUTF8().convert(super.convertAutomaton(a));
  // This automaton should not blow up during determinize:
  utf8automaton = Operations.determinize(utf8automaton, Integer.MAX_VALUE);
  return utf8automaton;
 } else {
  return super.convertAutomaton(a);
 }
}

代码示例来源:origin: org.elasticsearch/elasticsearch

return Operations.determinize(a, DEFAULT_MAX_DETERMINIZED_STATES);

代码示例来源:origin: org.apache.lucene/lucene-analyzers-common

/**
 * Converts the tokenStream to an automaton.  Does *not* close it.
 */
public Automaton toAutomaton(boolean unicodeAware) throws IOException {
 // TODO refactor this
 // maybe we could hook up a modified automaton from TermAutomatonQuery here?
 // Create corresponding automaton: labels are bytes
 // from each analyzed token, with byte 0 used as
 // separator between tokens:
 final TokenStreamToAutomaton tsta;
 if (preserveSep) {
  tsta = new EscapingTokenStreamToAutomaton(SEP_LABEL);
 } else {
  // When we're not preserving sep, we don't steal 0xff
  // byte, so we don't need to do any escaping:
  tsta = new TokenStreamToAutomaton();
 }
 tsta.setPreservePositionIncrements(preservePositionIncrements);
 tsta.setUnicodeArcs(unicodeAware);
 Automaton automaton = tsta.toAutomaton(inputTokenStream);
 // TODO: we can optimize this somewhat by determinizing
 // while we convert
 automaton = replaceSep(automaton, preserveSep, SEP_LABEL);
 // This automaton should not blow up during determinize:
 return Operations.determinize(automaton, maxGraphExpansions);
}

代码示例来源:origin: apache/servicemix-bundles

protected Automaton convertAutomaton(Automaton a) {
 if (queryPrefix != null) {
  a = Operations.concatenate(Arrays.asList(queryPrefix, a));
  // This automaton should not blow up during determinize:
  a = Operations.determinize(a, Integer.MAX_VALUE);
 }
 return a;
}

代码示例来源:origin: com.strapdata.elasticsearch/elasticsearch

protected Automaton convertAutomaton(Automaton a) {
 if (queryPrefix != null) {
  a = Operations.concatenate(Arrays.asList(queryPrefix, a));
  // This automaton should not blow up during determinize:
  a = Operations.determinize(a, Integer.MAX_VALUE);
 }
 return a;
}

代码示例来源:origin: org.apache.servicemix.bundles/org.apache.servicemix.bundles.elasticsearch

protected Automaton convertAutomaton(Automaton a) {
 if (queryPrefix != null) {
  a = Operations.concatenate(Arrays.asList(queryPrefix, a));
  // This automaton should not blow up during determinize:
  a = Operations.determinize(a, Integer.MAX_VALUE);
 }
 return a;
}

代码示例来源:origin: harbby/presto-connectors

@Override
protected Automaton convertAutomaton(Automaton a) {
 if (unicodeAware) {
  Automaton utf8automaton = new UTF32ToUTF8().convert(a);
  utf8automaton = Operations.determinize(utf8automaton, DEFAULT_MAX_DETERMINIZED_STATES);
  return utf8automaton;
 } else {
  return a;
 }
}

代码示例来源:origin: org.apache.servicemix.bundles/org.apache.servicemix.bundles.elasticsearch

@Override
protected Automaton convertAutomaton(Automaton a) {
 if (unicodeAware) {
  // FLORIAN EDIT: get converted Automaton from superclass
  Automaton utf8automaton = new UTF32ToUTF8().convert(super.convertAutomaton(a));
  // This automaton should not blow up during determinize:
  utf8automaton = Operations.determinize(utf8automaton, Integer.MAX_VALUE);
  return utf8automaton;
 } else {
  return super.convertAutomaton(a);
 }
}

代码示例来源:origin: com.strapdata.elasticsearch/elasticsearch

@Override
protected Automaton convertAutomaton(Automaton a) {
 if (unicodeAware) {
  // FLORIAN EDIT: get converted Automaton from superclass
  Automaton utf8automaton = new UTF32ToUTF8().convert(super.convertAutomaton(a));
  // This automaton should not blow up during determinize:
  utf8automaton = Operations.determinize(utf8automaton, Integer.MAX_VALUE);
  return utf8automaton;
 } else {
  return super.convertAutomaton(a);
 }
}

代码示例来源:origin: wikimedia/search-highlighter

private Factory(String regexString, int maxDeterminizedStates) {
  Automaton automaton = new RegExp(regexString).toAutomaton(maxDeterminizedStates);
  forward = new OffsetReturningRunAutomaton(automaton, false);
  if (hasLeadingWildcard(automaton)) {
    Automaton reversed = Operations.determinize(Operations.reverse(
        new RegExp("(" + regexString + ").*").toAutomaton(maxDeterminizedStates)), maxDeterminizedStates);
    reverse = new AcceptReturningReverseRunAutomaton(reversed);
  } else {
    reverse = null;
  }
}

相关文章