Lucene:是使用不区分大小写的MappingCharFilter还是在它之前应用LowerCaseFilter?

mcdcgff0  于 2023-08-05  发布在  Lucene
关注(0)|答案(1)|浏览(170)

有没有办法在MappingCharFilter之前有一个LowerCaseFilter?我意识到我可以在传入Lucene之前将搜索输入小写,但是这个Analyzer只针对特定的字段,所以将小写打包到分析器中会更好。我确实遇到了一个正则表达式过滤器,但我的map中有大约100个术语,并且不认为拥有大约100个正则表达式过滤器是明智的。

public class CustomAnalyzer : Analyzer
{
    protected override TokenStreamComponents CreateComponents(string fieldName, System.IO.TextReader reader)
    {
        var charFilter = new MappingCharFilter(MyNormalizedCharMap(), reader);
        Tokenizer tokenizer = new StandardTokenizer(IndexConfig.LUCENE_VERSION, charFilter);
        TokenStream tokenStream = new StandardFilter(IndexConfig.LUCENE_VERSION, tokenizer);

        return new TokenStreamComponents(tokenizer, tokenStream);
    }
}

字符串
我在想,也许我需要一个自定义的TokenFilter来支持这一点,如果你有一个很好的例子,如何为Lucene.net编写一个,请分享!

dxpyg8gm

dxpyg8gm1#

public class NormalizingAnalyzer : Analyzer
{
    private static Dictionary<string, string> _normalizingDictionary;

    static NormalizingAnalyzer()
    {
        _normalizingDictionary = MyNormalizingDictionary();
    }

    protected override TokenStreamComponents CreateComponents(string fieldName, System.IO.TextReader reader)
    {
        Tokenizer tokenizer = new StandardTokenizer(IndexConfig.LUCENE_VERSION, reader);
        TokenStream tokenStream = new LowerCaseFilter(IndexConfig.LUCENE_VERSION, tokenizer);
        tokenStream = new DictionaryReplaceFilter(tokenStream, _normalizingDictionary);

        return new TokenStreamComponents(tokenizer, tokenStream);
    }
}

字符串
或者设置LowerCaseFilter可以创建您的字典与ingore情况:第一个月

public sealed class DictionaryReplaceFilter : TokenFilter
{
    private readonly ICharTermAttribute _termAttribute;
    private readonly Dictionary<string, string> _termReplacements;

    public DictionaryReplaceFilter(TokenStream input, Dictionary<string, string> termReplacements) : base(input)
    {
        _termAttribute = this.GetAttribute<ICharTermAttribute>();
        _termReplacements = termReplacements;
    }

    public override bool IncrementToken()
    {
        if (m_input.IncrementToken())
        {
            string currentTerm = _termAttribute.ToString();

            // Check if the current term exists in the dictionary
            if (_termReplacements.TryGetValue(currentTerm, out string replacement))
            {
                _termAttribute.SetEmpty().Append(replacement);
            }

            return true;
        }
        return false;
    }
}

相关问题