org.apache.lucene.util.automaton.Automata类的使用及代码示例

x33g5p2x  于2022-01-17 转载在 其他  
字(11.5k)|赞(0)|评价(0)|浏览(109)

本文整理了Java中org.apache.lucene.util.automaton.Automata类的一些代码示例,展示了Automata类的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Automata类的具体详情如下:
包路径:org.apache.lucene.util.automaton.Automata
类名称:Automata

Automata介绍

[英]Construction of basic automata.
[中]基本自动机的构造。

代码示例

代码示例来源:origin: org.apache.lucene/lucene-core

break;
case REGEXP_CHAR:
 a = Automata.makeChar(c);
 break;
case REGEXP_CHAR_RANGE:
 a = Automata.makeCharRange(from, to);
 break;
case REGEXP_ANYCHAR:
 a = Automata.makeAnyChar();
 break;
case REGEXP_EMPTY:
 a = Automata.makeEmpty();
 break;
case REGEXP_STRING:
 a = Automata.makeString(s);
 break;
case REGEXP_ANYSTRING:
 a = Automata.makeAnyString();
 break;
case REGEXP_AUTOMATON:
 break;
case REGEXP_INTERVAL:
 a = Automata.makeDecimalInterval(min, max, digits);
 break;

代码示例来源:origin: org.apache.lucene/lucene-core

return Automata.makeEmpty();
 b = Automata.makeEmptyString();
} else if (min == 1) {
 b = new Automaton();

代码示例来源:origin: org.apache.lucene/lucene-core

public static Automaton toAutomaton(BytesRef lowerTerm, BytesRef upperTerm, boolean includeLower, boolean includeUpper) {
 if (lowerTerm == null) {
  // makeBinaryInterval is more picky than we are:
  includeLower = true;
 }
 if (upperTerm == null) {
  // makeBinaryInterval is more picky than we are:
  includeUpper = true;
 }
 return Automata.makeBinaryInterval(lowerTerm, includeLower, upperTerm, includeUpper);
}

代码示例来源:origin: org.apache.lucene/lucene-core

switch(c) {
 case WILDCARD_STRING: 
  automata.add(Automata.makeAnyString());
  break;
 case WILDCARD_CHAR:
  automata.add(Automata.makeAnyChar());
  break;
 case WILDCARD_ESCAPE:
   final int nextChar = wildcardText.codePointAt(i + length);
   length += Character.charCount(nextChar);
   automata.add(Automata.makeChar(nextChar));
   break;
  automata.add(Automata.makeChar(c));

代码示例来源:origin: org.apache.lucene/lucene-core

/**
 * Constructs sub-automaton corresponding to decimal numbers of value at least
 * x.substring(n) and length x.substring(n).length().
 */
private static int atLeast(Automaton.Builder builder, String x, int n, Collection<Integer> initials,
  boolean zeros) {
 int s = builder.createState();
 if (x.length() == n) {
  builder.setAccept(s, true);
 } else {
  if (zeros) {
   initials.add(s);
  }
  char c = x.charAt(n);
  builder.addTransition(s, atLeast(builder, x, n + 1, initials, zeros && c == '0'), c);
  if (c < '9') {
   builder.addTransition(s, anyOfRightLength(builder, x, n + 1), (char) (c + 1), '9');
  }
 }
 return s;
}

代码示例来源:origin: org.apache.lucene/lucene-core

/**
 * Constructs sub-automaton corresponding to decimal numbers of value at most
 * x.substring(n) and length x.substring(n).length().
 */
private static int atMost(Automaton.Builder builder, String x, int n) {
 int s = builder.createState();
 if (x.length() == n) {
  builder.setAccept(s, true);
 } else {
  char c = x.charAt(n);
  builder.addTransition(s, atMost(builder, x, (char) n + 1), c);
  if (c > '0') {
   builder.addTransition(s, anyOfRightLength(builder, x, n + 1), '0', (char) (c - 1));
  }
 }
 return s;
}

代码示例来源:origin: org.elasticsearch/elasticsearch

for (IntsRef string; (string = finiteStrings.next()) != null;) {
 if (string.length <= nonFuzzyPrefix || string.length < minFuzzyLength) {
  subs.add(Automata.makeString(string.ints, string.offset, string.length));
 } else {
  int ints[] = new int[string.length-nonFuzzyPrefix];
 return Automata.makeEmpty(); // matches nothing
} else if (subs.size() == 1) {

代码示例来源:origin: org.elasticsearch/elasticsearch

/** Return an {@link Automaton} that matches the given pattern. */
public static Automaton simpleMatchToAutomaton(String pattern) {
  List<Automaton> automata = new ArrayList<>();
  int previous = 0;
  for (int i = pattern.indexOf('*'); i != -1; i = pattern.indexOf('*', i + 1)) {
    automata.add(Automata.makeString(pattern.substring(previous, i)));
    automata.add(Automata.makeAnyString());
    previous = i + 1;
  }
  automata.add(Automata.makeString(pattern.substring(previous)));
  return Operations.concatenate(automata);
}

代码示例来源:origin: org.elasticsearch/elasticsearch

/**
 * Returns a function that filters a document map based on the given include and exclude rules.
 * @see #filter(Map, String[], String[]) for details
 */
public static Function<Map<String, ?>, Map<String, Object>> filter(String[] includes, String[] excludes) {
  CharacterRunAutomaton matchAllAutomaton = new CharacterRunAutomaton(Automata.makeAnyString());
  CharacterRunAutomaton include;
  if (includes == null || includes.length == 0) {
    include = matchAllAutomaton;
  } else {
    Automaton includeA = Regex.simpleMatchToAutomaton(includes);
    includeA = makeMatchDotsInFieldNames(includeA);
    include = new CharacterRunAutomaton(includeA);
  }
  Automaton excludeA;
  if (excludes == null || excludes.length == 0) {
    excludeA = Automata.makeEmpty();
  } else {
    excludeA = Regex.simpleMatchToAutomaton(excludes);
    excludeA = makeMatchDotsInFieldNames(excludeA);
  }
  CharacterRunAutomaton exclude = new CharacterRunAutomaton(excludeA);
  // NOTE: We cannot use Operations.minus because of the special case that
  // we want all sub properties to match as soon as an object matches
  return (map) -> filter(map,
    include, 0,
    exclude, 0,
    matchAllAutomaton);
}

代码示例来源:origin: org.elasticsearch/elasticsearch

/** Make matches on objects also match dots in field names.
 *  For instance, if the original simple regex is `foo`, this will translate
 *  it into `foo` OR `foo.*`. */
private static Automaton makeMatchDotsInFieldNames(Automaton automaton) {
  return Operations.union(
      automaton,
      Operations.concatenate(Arrays.asList(automaton, Automata.makeChar('.'), Automata.makeAnyString())));
}

代码示例来源:origin: org.apache.lucene/lucene-core

/**
  * Returns a new (deterministic and minimal) automaton that accepts the union
  * of the given collection of {@link BytesRef}s representing UTF-8 encoded
  * strings.
  * 
  * @param utf8Strings
  *          The input strings, UTF-8 encoded. The collection must be in sorted
  *          order.
  * 
  * @return An {@link Automaton} accepting all input strings. The resulting
  *         automaton is codepoint based (full unicode codepoints on
  *         transitions).
  */
 public static Automaton makeStringUnion(Collection<BytesRef> utf8Strings) {
  if (utf8Strings.isEmpty()) {
   return makeEmpty();
  } else {
   return DaciukMihovAutomatonBuilder.build(utf8Strings);
  }
 }
}

代码示例来源:origin: org.elasticsearch/elasticsearch

private Automaton toAutomaton() {
  Automaton a = null;
  if (include != null) {
    a = include.toAutomaton();
  } else if (includeValues != null) {
    a = Automata.makeStringUnion(includeValues);
  } else {
    a = Automata.makeAnyString();
  }
  if (exclude != null) {
    a = Operations.minus(a, exclude.toAutomaton(), Operations.DEFAULT_MAX_DETERMINIZED_STATES);
  } else if (excludeValues != null) {
    a = Operations.minus(a, Automata.makeStringUnion(excludeValues), Operations.DEFAULT_MAX_DETERMINIZED_STATES);
  }
  return a;
}

代码示例来源:origin: org.apache.lucene/lucene-core

assert prefix != null;
if (n == 0) {
 return Automata.makeString(prefix + UnicodeUtil.newString(word, 0, word.length));

代码示例来源:origin: org.apache.lucene/lucene-core

/**
 * Constructs sub-automaton corresponding to decimal numbers of value between
 * x.substring(n) and y.substring(n) and of length x.substring(n).length()
 * (which must be equal to y.substring(n).length()).
 */
private static int between(Automaton.Builder builder,
  String x, String y, int n,
  Collection<Integer> initials, boolean zeros) {
 int s = builder.createState();
 if (x.length() == n) {
  builder.setAccept(s, true);
 } else {
  if (zeros) {
   initials.add(s);
  }
  char cx = x.charAt(n);
  char cy = y.charAt(n);
  if (cx == cy) {
   builder.addTransition(s, between(builder, x, y, n + 1, initials, zeros && cx == '0'), cx);
  } else { // cx<cy
   builder.addTransition(s, atLeast(builder, x, n + 1, initials, zeros && cx == '0'), cx);
   builder.addTransition(s, atMost(builder, y, n + 1), cy);
   if (cx + 1 < cy) {
    builder.addTransition(s, anyOfRightLength(builder, x, n+1), (char) (cx + 1), (char) (cy - 1));
   }
  }
 }
 return s;
}

代码示例来源:origin: org.apache.lucene/lucene-core

cmp = -1;
if (min.length == 0 && minInclusive) {
 return makeAnyBinary();
 return makeEmpty();
} else {
 return makeBinary(min);
return makeEmpty();
 suffixIsZeros(max, min.length)) {
  return makeEmpty();
 } else {
  return makeBinary(min);

代码示例来源:origin: harbby/presto-connectors

Term prefix = pq.getPrefix();
if (prefix.field().equals(field)) {
 list.add(new CharacterRunAutomaton(Operations.concatenate(Automata.makeString(prefix.text()), 
                                 Automata.makeAnyString())) {
  @Override
  public String toString() {
 list.add(new CharacterRunAutomaton(Automata.makeEmpty()) {
  @Override
  public boolean run(char[] s, int offset, int length) {

代码示例来源:origin: harbby/presto-connectors

private static Automaton toContextAutomaton(final Map<IntsRef, ContextMetaData> contexts, final boolean matchAllContexts) {
 final Automaton matchAllAutomaton = Operations.repeat(Automata.makeAnyString());
 final Automaton sep = Automata.makeChar(ContextSuggestField.CONTEXT_SEPARATOR);
 if (matchAllContexts || contexts.size() == 0) {
  return Operations.concatenate(matchAllAutomaton, sep);
 } else {
  Automaton contextsAutomaton = null;
  for (Map.Entry<IntsRef, ContextMetaData> entry : contexts.entrySet()) {
   final ContextMetaData contextMetaData = entry.getValue();
   final IntsRef ref = entry.getKey();
   Automaton contextAutomaton = Automata.makeString(ref.ints, ref.offset, ref.length);
   if (contextMetaData.exact == false) {
    contextAutomaton = Operations.concatenate(contextAutomaton, matchAllAutomaton);
   }
   contextAutomaton = Operations.concatenate(contextAutomaton, sep);
   if (contextsAutomaton == null) {
    contextsAutomaton = contextAutomaton;
   } else {
    contextsAutomaton = Operations.union(contextsAutomaton, contextAutomaton);
   }
  }
  return contextsAutomaton;
 }
}

代码示例来源:origin: harbby/presto-connectors

/**
 * Create a automaton for a given context query this automaton will be used
 * to find the matching paths with the fst
 *
 * @param preserveSep set an additional char (<code>XAnalyzingSuggester.SEP_LABEL</code>) between each context query
 * @param queries list of {@link ContextQuery} defining the lookup context
 *
 * @return Automaton matching the given Query
 */
public static Automaton toAutomaton(boolean preserveSep, Iterable<ContextQuery> queries) {
  Automaton a = Automata.makeEmptyString();
  Automaton gap = Automata.makeChar(ContextMapping.SEPARATOR);
  if (preserveSep) {
    // if separators are preserved the fst contains a SEP_LABEL
    // behind each gap. To have a matching automaton, we need to
    // include the SEP_LABEL in the query as well
    gap = Operations.concatenate(gap, Automata.makeChar(XAnalyzingSuggester.SEP_LABEL));
  }
  for (ContextQuery query : queries) {
    a = Operations.concatenate(Arrays.asList(query.toAutomaton(), gap, a));
  }
  // TODO: should we limit this?  Do any of our ContextQuery impls really create exponential regexps?  GeoQuery looks safe (union
  // of strings).
  return Operations.determinize(a, Integer.MAX_VALUE);
}

代码示例来源:origin: harbby/presto-connectors

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
 final CompletionWeight innerWeight = ((CompletionWeight) innerQuery.createWeight(searcher, needsScores));
 // if separators are preserved the fst contains a SEP_LABEL
 // behind each gap. To have a matching automaton, we need to
 // include the SEP_LABEL in the query as well
 Automaton optionalSepLabel = Operations.optional(Automata.makeChar(CompletionAnalyzer.SEP_LABEL));
 Automaton prefixAutomaton = Operations.concatenate(optionalSepLabel, innerWeight.getAutomaton());
 Automaton contextsAutomaton = Operations.concatenate(toContextAutomaton(contexts, matchAllContexts), prefixAutomaton);
 contextsAutomaton = Operations.determinize(contextsAutomaton, Operations.DEFAULT_MAX_DETERMINIZED_STATES);
 final Map<IntsRef, Float> contextMap = new HashMap<>(contexts.size());
 final TreeSet<Integer> contextLengths = new TreeSet<>();
 for (Map.Entry<IntsRef, ContextMetaData> entry : contexts.entrySet()) {
  ContextMetaData contextMetaData = entry.getValue();
  contextMap.put(entry.getKey(), contextMetaData.boost);
  contextLengths.add(entry.getKey().length);
 }
 int[] contextLengthArray = new int[contextLengths.size()];
 final Iterator<Integer> iterator = contextLengths.descendingIterator();
 for (int i = 0; iterator.hasNext(); i++) {
  contextLengthArray[i] = iterator.next();
 }
 return new ContextCompletionWeight(this, contextsAutomaton, innerWeight, contextMap, contextLengthArray);
}

代码示例来源:origin: org.apache.lucene/lucene-core

/**
 * Constructs sub-automaton corresponding to decimal numbers of length
 * x.substring(n).length().
 */
private static int anyOfRightLength(Automaton.Builder builder, String x, int n) {
 int s = builder.createState();
 if (x.length() == n) {
  builder.setAccept(s, true);
 } else {
  builder.addTransition(s, anyOfRightLength(builder, x, n + 1), '0', '9');
 }
 return s;
}

相关文章