本文整理了Java中org.apache.lucene.util.fst.Builder.add()
方法的一些代码示例,展示了Builder.add()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Builder.add()
方法的具体详情如下:
包路径:org.apache.lucene.util.fst.Builder
类名称:Builder
方法名:add
[英]Add the next input/output pair. The provided input must be sorted after the previous one according to IntsRef#compareTo. It's also OK to add the same input twice in a row with different outputs, as long as Outputs implements the Outputs#mergemethod. Note that input is fully consumed after this method is returned (so caller is free to reuse), but output is not. So if your outputs are changeable (eg ByteSequenceOutputs or IntSequenceOutputs) then you cannot reuse across calls.
[中]添加下一个输入/输出对。提供的输入必须根据IntsRef#compareTo排序在前一个输入之后。只要输出实现了outputs#merge方法,在一行中添加两次具有不同输出的相同输入也是可以的。请注意,返回此方法后,输入将被完全使用(因此调用方可以自由重用),但输出不会被完全使用。因此,如果输出是可变的(例如ByteSequenceOutput或IntSequenceOutputs),则不能跨调用重用。
代码示例来源:origin: org.apache.lucene/lucene-core
private void append(Builder<BytesRef> builder, FST<BytesRef> subIndex, IntsRefBuilder scratchIntsRef) throws IOException {
final BytesRefFSTEnum<BytesRef> subIndexEnum = new BytesRefFSTEnum<>(subIndex);
BytesRefFSTEnum.InputOutput<BytesRef> indexEnt;
while((indexEnt = subIndexEnum.next()) != null) {
//if (DEBUG) {
// System.out.println(" add sub=" + indexEnt.input + " " + indexEnt.input + " output=" + indexEnt.output);
//}
builder.add(Util.toIntsRef(indexEnt.input, scratchIntsRef), indexEnt.output);
}
}
}
代码示例来源:origin: org.apache.lucene/lucene-core
assert bytes.length > 0;
scratchBytes.writeTo(bytes, 0);
indexBuilder.add(Util.toIntsRef(prefix, scratchIntsRef), new BytesRef(bytes, 0, bytes.length));
scratchBytes.reset();
代码示例来源:origin: org.apache.lucene/lucene-analyzers-common
private FST<CharsRef> parseConversions(LineNumberReader reader, int num) throws IOException, ParseException {
Map<String,String> mappings = new TreeMap<>();
for (int i = 0; i < num; i++) {
String line = reader.readLine();
String parts[] = line.split("\\s+");
if (parts.length != 3) {
throw new ParseException("invalid syntax: " + line, reader.getLineNumber());
}
if (mappings.put(parts[1], parts[2]) != null) {
throw new IllegalStateException("duplicate mapping specified for: " + parts[1]);
}
}
Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
Builder<CharsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE2, outputs);
IntsRefBuilder scratchInts = new IntsRefBuilder();
for (Map.Entry<String,String> entry : mappings.entrySet()) {
Util.toUTF16(entry.getKey(), scratchInts);
builder.add(scratchInts.get(), new CharsRef(entry.getValue()));
}
return builder.finish();
}
代码示例来源:origin: org.apache.lucene/lucene-analyzers-common
private FST<IntsRef> affixFST(TreeMap<String,List<Integer>> affixes) throws IOException {
IntSequenceOutputs outputs = IntSequenceOutputs.getSingleton();
Builder<IntsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE4, outputs);
IntsRefBuilder scratch = new IntsRefBuilder();
for (Map.Entry<String,List<Integer>> entry : affixes.entrySet()) {
Util.toUTF32(entry.getKey(), scratch);
List<Integer> entries = entry.getValue();
IntsRef output = new IntsRef(entries.size());
for (Integer c : entries) {
output.ints[output.length++] = c;
}
builder.add(scratch.get(), output);
}
return builder.finish();
}
代码示例来源:origin: org.apache.lucene/lucene-analyzers-common
/** Builds the NormalizeCharMap; call this once you
* are done calling {@link #add}. */
public NormalizeCharMap build() {
final FST<CharsRef> map;
try {
final Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
final org.apache.lucene.util.fst.Builder<CharsRef> builder = new org.apache.lucene.util.fst.Builder<>(FST.INPUT_TYPE.BYTE2, outputs);
final IntsRefBuilder scratch = new IntsRefBuilder();
for(Map.Entry<String,String> ent : pendingPairs.entrySet()) {
builder.add(Util.toUTF16(ent.getKey(), scratch),
new CharsRef(ent.getValue()));
}
map = builder.finish();
pendingPairs.clear();
} catch (IOException ioe) {
// Bogus FST IOExceptions!! (will never happen)
throw new RuntimeException(ioe);
}
return new NormalizeCharMap(map);
}
}
代码示例来源:origin: org.apache.lucene/lucene-analyzers-common
builder.add(Util.toUTF32(input, scratchIntsRef), scratch.toBytesRef());
代码示例来源:origin: org.apache.lucene/lucene-analyzers-common
words.add(scratchInts.get(), currentOrds.get());
words.add(scratchInts.get(), currentOrds.get());
success2 = true;
} finally {
代码示例来源:origin: org.elasticsearch/elasticsearch
public void finishTerm(long defaultWeight) throws IOException {
ArrayUtil.timSort(surfaceFormsAndPayload, 0, count);
int deduplicator = 0;
analyzed.append((byte) 0);
analyzed.setLength(analyzed.length() + 1);
analyzed.grow(analyzed.length());
for (int i = 0; i < count; i++) {
analyzed.setByteAt(analyzed.length() - 1, (byte) deduplicator++);
Util.toIntsRef(analyzed.get(), scratchInts);
SurfaceFormAndPayload candiate = surfaceFormsAndPayload[i];
long cost = candiate.weight == -1 ? encodeWeight(Math.min(Integer.MAX_VALUE, defaultWeight)) : candiate.weight;
builder.add(scratchInts.get(), outputs.newPair(cost, candiate.payload));
}
seenSurfaceForms.clear();
count = 0;
}
代码示例来源:origin: org.apache.lucene/lucene-analyzers-common
/**
* Returns an {@link StemmerOverrideMap} to be used with the {@link StemmerOverrideFilter}
* @return an {@link StemmerOverrideMap} to be used with the {@link StemmerOverrideFilter}
* @throws IOException if an {@link IOException} occurs;
*/
public StemmerOverrideMap build() throws IOException {
ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
org.apache.lucene.util.fst.Builder<BytesRef> builder = new org.apache.lucene.util.fst.Builder<>(
FST.INPUT_TYPE.BYTE4, outputs);
final int[] sort = hash.sort();
IntsRefBuilder intsSpare = new IntsRefBuilder();
final int size = hash.size();
BytesRef spare = new BytesRef();
for (int i = 0; i < size; i++) {
int id = sort[i];
BytesRef bytesRef = hash.get(id, spare);
intsSpare.copyUTF8Bytes(bytesRef);
builder.add(intsSpare.get(), new BytesRef(outputValues.get(id)));
}
return new StemmerOverrideMap(builder.finish(), ignoreCase);
}
代码示例来源:origin: org.elasticsearch/elasticsearch
builder.add(scratchInts.get(), outputs.newPair(cost, BytesRef.deepCopyOf(surface)));
} else {
int payloadOffset = input.getPosition() + surface.length;
System.arraycopy(bytes.bytes, payloadOffset, br.bytes, surface.length+1, payloadLength);
br.length = br.bytes.length;
builder.add(scratchInts.get(), outputs.newPair(cost, br));
代码示例来源:origin: org.infinispan/infinispan-embedded-query
private void append(Builder<BytesRef> builder, FST<BytesRef> subIndex, IntsRefBuilder scratchIntsRef) throws IOException {
final BytesRefFSTEnum<BytesRef> subIndexEnum = new BytesRefFSTEnum<>(subIndex);
BytesRefFSTEnum.InputOutput<BytesRef> indexEnt;
while((indexEnt = subIndexEnum.next()) != null) {
//if (DEBUG) {
// System.out.println(" add sub=" + indexEnt.input + " " + indexEnt.input + " output=" + indexEnt.output);
//}
builder.add(Util.toIntsRef(indexEnt.input, scratchIntsRef), indexEnt.output);
}
}
}
代码示例来源:origin: org.apache.servicemix.bundles/org.apache.servicemix.bundles.lucene
private void append(Builder<BytesRef> builder, FST<BytesRef> subIndex, IntsRefBuilder scratchIntsRef) throws IOException {
final BytesRefFSTEnum<BytesRef> subIndexEnum = new BytesRefFSTEnum<>(subIndex);
BytesRefFSTEnum.InputOutput<BytesRef> indexEnt;
while((indexEnt = subIndexEnum.next()) != null) {
//if (DEBUG) {
// System.out.println(" add sub=" + indexEnt.input + " " + indexEnt.input + " output=" + indexEnt.output);
//}
builder.add(Util.toIntsRef(indexEnt.input, scratchIntsRef), indexEnt.output);
}
}
}
代码示例来源:origin: org.apache.lucene/lucene-sandbox
private void append(Builder<Pair<BytesRef,Long>> builder, FST<Pair<BytesRef,Long>> subIndex, IntsRefBuilder scratchIntsRef) throws IOException {
final BytesRefFSTEnum<Pair<BytesRef,Long>> subIndexEnum = new BytesRefFSTEnum<>(subIndex);
BytesRefFSTEnum.InputOutput<Pair<BytesRef,Long>> indexEnt;
while((indexEnt = subIndexEnum.next()) != null) {
//if (DEBUG) {
// System.out.println(" add sub=" + indexEnt.input + " " + indexEnt.input + " output=" + indexEnt.output);
//}
builder.add(Util.toIntsRef(indexEnt.input, scratchIntsRef), indexEnt.output);
}
}
}
代码示例来源:origin: harbby/presto-connectors
private void append(Builder<BytesRef> builder, FST<BytesRef> subIndex, IntsRefBuilder scratchIntsRef) throws IOException {
final BytesRefFSTEnum<BytesRef> subIndexEnum = new BytesRefFSTEnum<>(subIndex);
BytesRefFSTEnum.InputOutput<BytesRef> indexEnt;
while((indexEnt = subIndexEnum.next()) != null) {
//if (DEBUG) {
// System.out.println(" add sub=" + indexEnt.input + " " + indexEnt.input + " output=" + indexEnt.output);
//}
builder.add(Util.toIntsRef(indexEnt.input, scratchIntsRef), indexEnt.output);
}
}
}
代码示例来源:origin: harbby/presto-connectors
private void append(Builder<Pair<BytesRef,Long>> builder, FST<Pair<BytesRef,Long>> subIndex, IntsRefBuilder scratchIntsRef) throws IOException {
final BytesRefFSTEnum<Pair<BytesRef,Long>> subIndexEnum = new BytesRefFSTEnum<>(subIndex);
BytesRefFSTEnum.InputOutput<Pair<BytesRef,Long>> indexEnt;
while((indexEnt = subIndexEnum.next()) != null) {
//if (DEBUG) {
// System.out.println(" add sub=" + indexEnt.input + " " + indexEnt.input + " output=" + indexEnt.output);
//}
builder.add(Util.toIntsRef(indexEnt.input, scratchIntsRef), indexEnt.output);
}
}
}
代码示例来源:origin: org.apache.lucene/lucene-classification
private void updateFST(SortedMap<String, Double> weights) throws IOException {
PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
Builder<Long> fstBuilder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
BytesRefBuilder scratchBytes = new BytesRefBuilder();
IntsRefBuilder scratchInts = new IntsRefBuilder();
for (Map.Entry<String, Double> entry : weights.entrySet()) {
scratchBytes.copyChars(entry.getKey());
fstBuilder.add(Util.toIntsRef(scratchBytes.get(), scratchInts), entry
.getValue().longValue());
}
fst = fstBuilder.finish();
}
代码示例来源:origin: org.infinispan/infinispan-embedded-query
private FST<IntsRef> affixFST(TreeMap<String,List<Integer>> affixes) throws IOException {
IntSequenceOutputs outputs = IntSequenceOutputs.getSingleton();
Builder<IntsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE4, outputs);
IntsRefBuilder scratch = new IntsRefBuilder();
for (Map.Entry<String,List<Integer>> entry : affixes.entrySet()) {
Util.toUTF32(entry.getKey(), scratch);
List<Integer> entries = entry.getValue();
IntsRef output = new IntsRef(entries.size());
for (Integer c : entries) {
output.ints[output.length++] = c;
}
builder.add(scratch.get(), output);
}
return builder.finish();
}
代码示例来源:origin: org.apache.lucene/lucene-codecs
public FSTFieldWriter(FieldInfo fieldInfo, long termsFilePointer) throws IOException {
this.fieldInfo = fieldInfo;
fstOutputs = PositiveIntOutputs.getSingleton();
fstBuilder = new Builder<>(FST.INPUT_TYPE.BYTE1, fstOutputs);
indexStart = out.getFilePointer();
////System.out.println("VGW: field=" + fieldInfo.name);
// Always put empty string in
fstBuilder.add(new IntsRef(), termsFilePointer);
startTermsFilePointer = termsFilePointer;
}
代码示例来源:origin: org.apache.lucene/lucene-codecs
private void append(Builder<Output> builder, FST<Output> subIndex, long termOrdOffset, IntsRefBuilder scratchIntsRef) throws IOException {
final BytesRefFSTEnum<Output> subIndexEnum = new BytesRefFSTEnum<>(subIndex);
BytesRefFSTEnum.InputOutput<Output> indexEnt;
while ((indexEnt = subIndexEnum.next()) != null) {
//if (DEBUG) {
// System.out.println(" add sub=" + indexEnt.input + " " + indexEnt.input + " output=" + indexEnt.output);
//}
Output output = indexEnt.output;
//long blockTermCount = output.endOrd - output.startOrd + 1;
Output newOutput = FST_OUTPUTS.newOutput(output.bytes, termOrdOffset+output.startOrd, output.endOrd-termOrdOffset);
//System.out.println(" append sub=" + indexEnt.input + " output=" + indexEnt.output + " termOrdOffset=" + termOrdOffset + " blockTermCount=" + blockTermCount + " newOutput=" + newOutput + " endOrd=" + (termOrdOffset+Long.MAX_VALUE-output.endOrd));
builder.add(Util.toIntsRef(indexEnt.input, scratchIntsRef), newOutput);
}
}
}
代码示例来源:origin: com.strapdata.elasticsearch/elasticsearch
public void finishTerm(long defaultWeight) throws IOException {
ArrayUtil.timSort(surfaceFormsAndPayload, 0, count);
int deduplicator = 0;
analyzed.append((byte) 0);
analyzed.setLength(analyzed.length() + 1);
analyzed.grow(analyzed.length());
for (int i = 0; i < count; i++) {
analyzed.setByteAt(analyzed.length() - 1, (byte) deduplicator++);
Util.toIntsRef(analyzed.get(), scratchInts);
SurfaceFormAndPayload candiate = surfaceFormsAndPayload[i];
long cost = candiate.weight == -1 ? encodeWeight(Math.min(Integer.MAX_VALUE, defaultWeight)) : candiate.weight;
builder.add(scratchInts.get(), outputs.newPair(cost, candiate.payload));
}
seenSurfaceForms.clear();
count = 0;
}
内容来源于网络,如有侵权,请联系作者删除!