org.apache.lucene.util.fst.Builder.add()方法的使用及代码示例

x33g5p2x  于2022-01-17 转载在 其他  
字(12.5k)|赞(0)|评价(0)|浏览(101)

本文整理了Java中org.apache.lucene.util.fst.Builder.add()方法的一些代码示例,展示了Builder.add()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Builder.add()方法的具体详情如下:
包路径:org.apache.lucene.util.fst.Builder
类名称:Builder
方法名:add

Builder.add介绍

[英]Add the next input/output pair. The provided input must be sorted after the previous one according to IntsRef#compareTo. It's also OK to add the same input twice in a row with different outputs, as long as Outputs implements the Outputs#mergemethod. Note that input is fully consumed after this method is returned (so caller is free to reuse), but output is not. So if your outputs are changeable (eg ByteSequenceOutputs or IntSequenceOutputs) then you cannot reuse across calls.
[中]添加下一个输入/输出对。提供的输入必须根据IntsRef#compareTo排序在前一个输入之后。只要输出实现了outputs#merge方法,在一行中添加两次具有不同输出的相同输入也是可以的。请注意,返回此方法后,输入将被完全使用(因此调用方可以自由重用),但输出不会被完全使用。因此,如果输出是可变的(例如ByteSequenceOutput或IntSequenceOutputs),则不能跨调用重用。

代码示例

代码示例来源:origin: org.apache.lucene/lucene-core

private void append(Builder<BytesRef> builder, FST<BytesRef> subIndex, IntsRefBuilder scratchIntsRef) throws IOException {
  final BytesRefFSTEnum<BytesRef> subIndexEnum = new BytesRefFSTEnum<>(subIndex);
  BytesRefFSTEnum.InputOutput<BytesRef> indexEnt;
  while((indexEnt = subIndexEnum.next()) != null) {
   //if (DEBUG) {
   //  System.out.println("      add sub=" + indexEnt.input + " " + indexEnt.input + " output=" + indexEnt.output);
   //}
   builder.add(Util.toIntsRef(indexEnt.input, scratchIntsRef), indexEnt.output);
  }
 }
}

代码示例来源:origin: org.apache.lucene/lucene-core

assert bytes.length > 0;
scratchBytes.writeTo(bytes, 0);
indexBuilder.add(Util.toIntsRef(prefix, scratchIntsRef), new BytesRef(bytes, 0, bytes.length));
scratchBytes.reset();

代码示例来源:origin: org.apache.lucene/lucene-analyzers-common

private FST<CharsRef> parseConversions(LineNumberReader reader, int num) throws IOException, ParseException {
 Map<String,String> mappings = new TreeMap<>();
 
 for (int i = 0; i < num; i++) {
  String line = reader.readLine();
  String parts[] = line.split("\\s+");
  if (parts.length != 3) {
   throw new ParseException("invalid syntax: " + line, reader.getLineNumber());
  }
  if (mappings.put(parts[1], parts[2]) != null) {
   throw new IllegalStateException("duplicate mapping specified for: " + parts[1]);
  }
 }
 
 Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
 Builder<CharsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE2, outputs);
 IntsRefBuilder scratchInts = new IntsRefBuilder();
 for (Map.Entry<String,String> entry : mappings.entrySet()) {
  Util.toUTF16(entry.getKey(), scratchInts);
  builder.add(scratchInts.get(), new CharsRef(entry.getValue()));
 }
 
 return builder.finish();
}

代码示例来源:origin: org.apache.lucene/lucene-analyzers-common

private FST<IntsRef> affixFST(TreeMap<String,List<Integer>> affixes) throws IOException {
 IntSequenceOutputs outputs = IntSequenceOutputs.getSingleton();
 Builder<IntsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE4, outputs);
 IntsRefBuilder scratch = new IntsRefBuilder();
 for (Map.Entry<String,List<Integer>> entry : affixes.entrySet()) {
  Util.toUTF32(entry.getKey(), scratch);
  List<Integer> entries = entry.getValue();
  IntsRef output = new IntsRef(entries.size());
  for (Integer c : entries) {
   output.ints[output.length++] = c;
  }
  builder.add(scratch.get(), output);
 }
 return builder.finish();
}

代码示例来源:origin: org.apache.lucene/lucene-analyzers-common

/** Builds the NormalizeCharMap; call this once you
  *  are done calling {@link #add}. */
 public NormalizeCharMap build() {
  final FST<CharsRef> map;
  try {
   final Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
   final org.apache.lucene.util.fst.Builder<CharsRef> builder = new org.apache.lucene.util.fst.Builder<>(FST.INPUT_TYPE.BYTE2, outputs);
   final IntsRefBuilder scratch = new IntsRefBuilder();
   for(Map.Entry<String,String> ent : pendingPairs.entrySet()) {
    builder.add(Util.toUTF16(ent.getKey(), scratch),
          new CharsRef(ent.getValue()));
   }
   map = builder.finish();
   pendingPairs.clear();
  } catch (IOException ioe) {
   // Bogus FST IOExceptions!!  (will never happen)
   throw new RuntimeException(ioe);
  }
  return new NormalizeCharMap(map);
 }
}

代码示例来源:origin: org.apache.lucene/lucene-analyzers-common

builder.add(Util.toUTF32(input, scratchIntsRef), scratch.toBytesRef());

代码示例来源:origin: org.apache.lucene/lucene-analyzers-common

words.add(scratchInts.get(), currentOrds.get());
 words.add(scratchInts.get(), currentOrds.get());
 success2 = true;
} finally {

代码示例来源:origin: org.elasticsearch/elasticsearch

public void finishTerm(long defaultWeight) throws IOException {
  ArrayUtil.timSort(surfaceFormsAndPayload, 0, count);
  int deduplicator = 0;
  analyzed.append((byte) 0);
  analyzed.setLength(analyzed.length() + 1);
  analyzed.grow(analyzed.length());
  for (int i = 0; i < count; i++) {
    analyzed.setByteAt(analyzed.length() - 1, (byte) deduplicator++);
    Util.toIntsRef(analyzed.get(), scratchInts);
    SurfaceFormAndPayload candiate = surfaceFormsAndPayload[i];
    long cost = candiate.weight == -1 ? encodeWeight(Math.min(Integer.MAX_VALUE, defaultWeight)) : candiate.weight;
    builder.add(scratchInts.get(), outputs.newPair(cost, candiate.payload));
  }
  seenSurfaceForms.clear();
  count = 0;
}

代码示例来源:origin: org.apache.lucene/lucene-analyzers-common

/**
 * Returns an {@link StemmerOverrideMap} to be used with the {@link StemmerOverrideFilter}
 * @return an {@link StemmerOverrideMap} to be used with the {@link StemmerOverrideFilter}
 * @throws IOException if an {@link IOException} occurs;
 */
public StemmerOverrideMap build() throws IOException {
 ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
 org.apache.lucene.util.fst.Builder<BytesRef> builder = new org.apache.lucene.util.fst.Builder<>(
   FST.INPUT_TYPE.BYTE4, outputs);
 final int[] sort = hash.sort();
 IntsRefBuilder intsSpare = new IntsRefBuilder();
 final int size = hash.size();
 BytesRef spare = new BytesRef();
 for (int i = 0; i < size; i++) {
  int id = sort[i];
  BytesRef bytesRef = hash.get(id, spare);
  intsSpare.copyUTF8Bytes(bytesRef);
  builder.add(intsSpare.get(), new BytesRef(outputValues.get(id)));
 }
 return new StemmerOverrideMap(builder.finish(), ignoreCase);
}

代码示例来源:origin: org.elasticsearch/elasticsearch

builder.add(scratchInts.get(), outputs.newPair(cost, BytesRef.deepCopyOf(surface)));
} else {
 int payloadOffset = input.getPosition() + surface.length;
 System.arraycopy(bytes.bytes, payloadOffset, br.bytes, surface.length+1, payloadLength);
 br.length = br.bytes.length;
 builder.add(scratchInts.get(), outputs.newPair(cost, br));

代码示例来源:origin: org.infinispan/infinispan-embedded-query

private void append(Builder<BytesRef> builder, FST<BytesRef> subIndex, IntsRefBuilder scratchIntsRef) throws IOException {
  final BytesRefFSTEnum<BytesRef> subIndexEnum = new BytesRefFSTEnum<>(subIndex);
  BytesRefFSTEnum.InputOutput<BytesRef> indexEnt;
  while((indexEnt = subIndexEnum.next()) != null) {
   //if (DEBUG) {
   //  System.out.println("      add sub=" + indexEnt.input + " " + indexEnt.input + " output=" + indexEnt.output);
   //}
   builder.add(Util.toIntsRef(indexEnt.input, scratchIntsRef), indexEnt.output);
  }
 }
}

代码示例来源:origin: org.apache.servicemix.bundles/org.apache.servicemix.bundles.lucene

private void append(Builder<BytesRef> builder, FST<BytesRef> subIndex, IntsRefBuilder scratchIntsRef) throws IOException {
  final BytesRefFSTEnum<BytesRef> subIndexEnum = new BytesRefFSTEnum<>(subIndex);
  BytesRefFSTEnum.InputOutput<BytesRef> indexEnt;
  while((indexEnt = subIndexEnum.next()) != null) {
   //if (DEBUG) {
   //  System.out.println("      add sub=" + indexEnt.input + " " + indexEnt.input + " output=" + indexEnt.output);
   //}
   builder.add(Util.toIntsRef(indexEnt.input, scratchIntsRef), indexEnt.output);
  }
 }
}

代码示例来源:origin: org.apache.lucene/lucene-sandbox

private void append(Builder<Pair<BytesRef,Long>> builder, FST<Pair<BytesRef,Long>> subIndex, IntsRefBuilder scratchIntsRef) throws IOException {
  final BytesRefFSTEnum<Pair<BytesRef,Long>> subIndexEnum = new BytesRefFSTEnum<>(subIndex);
  BytesRefFSTEnum.InputOutput<Pair<BytesRef,Long>> indexEnt;
  while((indexEnt = subIndexEnum.next()) != null) {
   //if (DEBUG) {
   //  System.out.println("      add sub=" + indexEnt.input + " " + indexEnt.input + " output=" + indexEnt.output);
   //}
   builder.add(Util.toIntsRef(indexEnt.input, scratchIntsRef), indexEnt.output);
  }
 }
}

代码示例来源:origin: harbby/presto-connectors

private void append(Builder<BytesRef> builder, FST<BytesRef> subIndex, IntsRefBuilder scratchIntsRef) throws IOException {
  final BytesRefFSTEnum<BytesRef> subIndexEnum = new BytesRefFSTEnum<>(subIndex);
  BytesRefFSTEnum.InputOutput<BytesRef> indexEnt;
  while((indexEnt = subIndexEnum.next()) != null) {
   //if (DEBUG) {
   //  System.out.println("      add sub=" + indexEnt.input + " " + indexEnt.input + " output=" + indexEnt.output);
   //}
   builder.add(Util.toIntsRef(indexEnt.input, scratchIntsRef), indexEnt.output);
  }
 }
}

代码示例来源:origin: harbby/presto-connectors

private void append(Builder<Pair<BytesRef,Long>> builder, FST<Pair<BytesRef,Long>> subIndex, IntsRefBuilder scratchIntsRef) throws IOException {
  final BytesRefFSTEnum<Pair<BytesRef,Long>> subIndexEnum = new BytesRefFSTEnum<>(subIndex);
  BytesRefFSTEnum.InputOutput<Pair<BytesRef,Long>> indexEnt;
  while((indexEnt = subIndexEnum.next()) != null) {
   //if (DEBUG) {
   //  System.out.println("      add sub=" + indexEnt.input + " " + indexEnt.input + " output=" + indexEnt.output);
   //}
   builder.add(Util.toIntsRef(indexEnt.input, scratchIntsRef), indexEnt.output);
  }
 }
}

代码示例来源:origin: org.apache.lucene/lucene-classification

private void updateFST(SortedMap<String, Double> weights) throws IOException {
 PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
 Builder<Long> fstBuilder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
 BytesRefBuilder scratchBytes = new BytesRefBuilder();
 IntsRefBuilder scratchInts = new IntsRefBuilder();
 for (Map.Entry<String, Double> entry : weights.entrySet()) {
  scratchBytes.copyChars(entry.getKey());
  fstBuilder.add(Util.toIntsRef(scratchBytes.get(), scratchInts), entry
      .getValue().longValue());
 }
 fst = fstBuilder.finish();
}

代码示例来源:origin: org.infinispan/infinispan-embedded-query

private FST<IntsRef> affixFST(TreeMap<String,List<Integer>> affixes) throws IOException {
 IntSequenceOutputs outputs = IntSequenceOutputs.getSingleton();
 Builder<IntsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE4, outputs);
 IntsRefBuilder scratch = new IntsRefBuilder();
 for (Map.Entry<String,List<Integer>> entry : affixes.entrySet()) {
  Util.toUTF32(entry.getKey(), scratch);
  List<Integer> entries = entry.getValue();
  IntsRef output = new IntsRef(entries.size());
  for (Integer c : entries) {
   output.ints[output.length++] = c;
  }
  builder.add(scratch.get(), output);
 }
 return builder.finish();
}

代码示例来源:origin: org.apache.lucene/lucene-codecs

public FSTFieldWriter(FieldInfo fieldInfo, long termsFilePointer) throws IOException {
 this.fieldInfo = fieldInfo;
 fstOutputs = PositiveIntOutputs.getSingleton();
 fstBuilder = new Builder<>(FST.INPUT_TYPE.BYTE1, fstOutputs);
 indexStart = out.getFilePointer();
 ////System.out.println("VGW: field=" + fieldInfo.name);
 // Always put empty string in
 fstBuilder.add(new IntsRef(), termsFilePointer);
 startTermsFilePointer = termsFilePointer;
}

代码示例来源:origin: org.apache.lucene/lucene-codecs

private void append(Builder<Output> builder, FST<Output> subIndex, long termOrdOffset, IntsRefBuilder scratchIntsRef) throws IOException {
  final BytesRefFSTEnum<Output> subIndexEnum = new BytesRefFSTEnum<>(subIndex);
  BytesRefFSTEnum.InputOutput<Output> indexEnt;
  while ((indexEnt = subIndexEnum.next()) != null) {
   //if (DEBUG) {
   //  System.out.println("      add sub=" + indexEnt.input + " " + indexEnt.input + " output=" + indexEnt.output);
   //}
   Output output = indexEnt.output;
   //long blockTermCount = output.endOrd - output.startOrd + 1;
   Output newOutput = FST_OUTPUTS.newOutput(output.bytes, termOrdOffset+output.startOrd, output.endOrd-termOrdOffset);
   //System.out.println("  append sub=" + indexEnt.input + " output=" + indexEnt.output + " termOrdOffset=" + termOrdOffset + " blockTermCount=" + blockTermCount  + " newOutput=" + newOutput  + " endOrd=" + (termOrdOffset+Long.MAX_VALUE-output.endOrd));
   builder.add(Util.toIntsRef(indexEnt.input, scratchIntsRef), newOutput);
  }
 }
}

代码示例来源:origin: com.strapdata.elasticsearch/elasticsearch

public void finishTerm(long defaultWeight) throws IOException {
  ArrayUtil.timSort(surfaceFormsAndPayload, 0, count);
  int deduplicator = 0;
  analyzed.append((byte) 0);
  analyzed.setLength(analyzed.length() + 1);
  analyzed.grow(analyzed.length());
  for (int i = 0; i < count; i++) {
    analyzed.setByteAt(analyzed.length() - 1, (byte) deduplicator++);
    Util.toIntsRef(analyzed.get(), scratchInts);
    SurfaceFormAndPayload candiate = surfaceFormsAndPayload[i];
    long cost = candiate.weight == -1 ? encodeWeight(Math.min(Integer.MAX_VALUE, defaultWeight)) : candiate.weight;
    builder.add(scratchInts.get(), outputs.newPair(cost, candiate.payload));
  }
  seenSurfaceForms.clear();
  count = 0;
}

相关文章