【Lucene3.6.2入门系列】第08节_高级搜索之自定义评分

x33g5p2x  于2021-12-24 转载在 其他  
字(7.1k)|赞(0)|评价(0)|浏览(436)

完整版见 https://jadyer.github.io/2013/08/19/lucene-advanced-search-score/

  1. package com.jadyer.lucene;
  2. import java.io.File;
  3. import java.io.IOException;
  4. import java.util.Random;
  5. import org.apache.lucene.analysis.standard.StandardAnalyzer;
  6. import org.apache.lucene.document.Document;
  7. import org.apache.lucene.document.Field;
  8. import org.apache.lucene.document.NumericField;
  9. import org.apache.lucene.index.CorruptIndexException;
  10. import org.apache.lucene.index.IndexReader;
  11. import org.apache.lucene.index.IndexWriter;
  12. import org.apache.lucene.index.IndexWriterConfig;
  13. import org.apache.lucene.index.Term;
  14. import org.apache.lucene.search.IndexSearcher;
  15. import org.apache.lucene.search.Query;
  16. import org.apache.lucene.search.ScoreDoc;
  17. import org.apache.lucene.search.TermQuery;
  18. import org.apache.lucene.search.TopDocs;
  19. import org.apache.lucene.store.Directory;
  20. import org.apache.lucene.store.FSDirectory;
  21. import org.apache.lucene.util.Version;
  22. import com.jadyer.custom.MyNameScoreQuery;
  23. /**
  24. * 【Lucene3.6.2入门系列】第08节_高级搜索之自定义评分
  25. * @create Aug 19, 2013 12:13:14 PM
  26. * @author 玄玉<http://blog.csdn.net/jadyer>
  27. */
  28. public class AdvancedSearchByScore {
  29. private Directory directory;
  30. private IndexReader reader;
  31. public AdvancedSearchByScore(){
  32. /**文件大小*/
  33. int[] sizes = {90, 10, 20, 10, 60, 50};
  34. /**文件名*/
  35. String[] names = {"Michael.java", "Scofield.ini", "Tbag.txt", "Jack", "Jade", "Jadyer"};
  36. /**文件内容*/
  37. String[] contents = {"my java blog is http://blog.csdn.net/jadyer",
  38. "my Java Website is http://www.jadyer.cn",
  39. "my name is jadyer",
  40. "I am a Java Developer",
  41. "I am from Haerbin",
  42. "I like java of Lucene"};
  43. IndexWriter writer = null;
  44. Document doc = null;
  45. try {
  46. directory = FSDirectory.open(new File("myExample/01_index/"));
  47. writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_36, new StandardAnalyzer(Version.LUCENE_36)));
  48. writer.deleteAll();
  49. for(int i=0; i<sizes.length; i++){
  50. doc = new Document();
  51. doc.add(new NumericField("size", Field.Store.YES, true).setIntValue(sizes[i]));
  52. doc.add(new Field("name", names[i], Field.Store.YES, Field.Index.ANALYZED_NO_NORMS));
  53. doc.add(new Field("content", contents[i], Field.Store.NO, Field.Index.ANALYZED));
  54. //添加一个评分域,专门在自定义评分时使用
  55. //此时默认为Field.Store.NO和Field.Index.ANALYZED_NO_NORMS
  56. doc.add(new NumericField("fileScore").setIntValue(new Random().nextInt(600)));
  57. writer.addDocument(doc);
  58. }
  59. } catch (Exception e) {
  60. e.printStackTrace();
  61. } finally {
  62. if(null != writer){
  63. try {
  64. writer.close();
  65. } catch (IOException ce) {
  66. ce.printStackTrace();
  67. }
  68. }
  69. }
  70. }
  71. /**
  72. * 获取IndexReader实例
  73. */
  74. private IndexReader getIndexReader(){
  75. try {
  76. if(reader == null){
  77. reader = IndexReader.open(directory);
  78. }else{
  79. //if the index was changed since the provided reader was opened, open and return a new reader; else,return null
  80. //如果当前reader在打开期间index发生改变,则打开并返回一个新的IndexReader,否则返回null
  81. IndexReader ir = IndexReader.openIfChanged(reader);
  82. if(ir != null){
  83. reader.close(); //关闭原reader
  84. reader = ir; //赋予新reader
  85. }
  86. }
  87. return reader;
  88. }catch(Exception e) {
  89. e.printStackTrace();
  90. }
  91. return null; //发生异常则返回null
  92. }
  93. /**
  94. * 自定义评分搜索
  95. */
  96. public void searchByCustomScoreQuery(){
  97. IndexSearcher searcher = new IndexSearcher(this.getIndexReader());
  98. // //创建一个评分域
  99. // FieldScoreQuery fsq = new FieldScoreQuery("fileScore", FieldScoreQuery.Type.INT);
  100. // //创建自定义的CustomScoreQuery对象
  101. // Query query = new MyCustomScoreQuery(new TermQuery(new Term("content", "java")), fsq);
  102. Query query = new MyNameScoreQuery(new TermQuery(new Term("content", "java")));
  103. try {
  104. TopDocs tds = searcher.search(query, 10);
  105. for(ScoreDoc sd : tds.scoreDocs){
  106. Document doc = searcher.doc(sd.doc);
  107. System.out.print("文档编号=" + sd.doc + " 文档权值=" + doc.getBoost() + " 文档评分=" + sd.score + " ");
  108. System.out.println("size=" + doc.get("size") + " name=" + doc.get("name"));
  109. }
  110. } catch (CorruptIndexException e) {
  111. e.printStackTrace();
  112. } catch (IOException e) {
  113. e.printStackTrace();
  114. } finally {
  115. if(searcher != null){
  116. try {
  117. searcher.close();
  118. } catch (IOException e) {
  119. e.printStackTrace();
  120. }
  121. }
  122. }
  123. }
  124. /**
  125. * 测试一下评分效果
  126. */
  127. public static void main(String[] args) {
  128. new AdvancedSearchByScore().searchByCustomScoreQuery();
  129. }
  130. }

下面是我们自定义的评分类MyCustomScoreQuery.java

  1. package com.jadyer.custom;
  2. import java.io.IOException;
  3. import org.apache.lucene.index.IndexReader;
  4. import org.apache.lucene.search.Query;
  5. import org.apache.lucene.search.function.CustomScoreProvider;
  6. import org.apache.lucene.search.function.CustomScoreQuery;
  7. import org.apache.lucene.search.function.ValueSourceQuery;
  8. /**
  9. * 自定义评分的步骤
  10. * @see 1)创建一个类继承于CustomScoreQuery
  11. * @see 2)覆盖CustomScoreQuery.getCustomScoreProvider()方法
  12. * @see 3)创建一个类继承于CustomScoreProvider
  13. * @see 4)覆盖CustomScoreProvider.customScore()方法(我们的自定义评分主要就是在此方法中完成的)
  14. * @create Aug 6, 2013 10:30:46 AM
  15. * @author 玄玉<http://blog.csdn.net/jadyer>
  16. */
  17. public class MyCustomScoreQuery extends CustomScoreQuery {
  18. private static final long serialVersionUID = -2373017691291184609L;
  19. public MyCustomScoreQuery(Query subQuery, ValueSourceQuery valSrcQuery) {
  20. //ValueSourceQuery参数就是指专门用来做评分的Query,即评分域的FieldScoreQuery
  21. super(subQuery, valSrcQuery);
  22. }
  23. @Override
  24. protected CustomScoreProvider getCustomScoreProvider(IndexReader reader) throws IOException {
  25. //如果直接返回super的,就表示使用原有的评分规则,即通过[原有的评分*传入的评分域所获取的评分]来确定最终评分
  26. //return super.getCustomScoreProvider(reader);
  27. return new MyCustomScoreProvider(reader);
  28. }
  29. private class MyCustomScoreProvider extends CustomScoreProvider {
  30. public MyCustomScoreProvider(IndexReader reader) {
  31. super(reader);
  32. }
  33. @Override
  34. public float customScore(int doc, float subQueryScore, float valSrcScore) throws IOException {
  35. //subQueryScore--表示默认文档的打分,valSrcScore--表示评分域的打分
  36. //该方法的返回值就是文档评分,即ScoreDoc.score获取的结果
  37. System.out.println("subQueryScore=" + subQueryScore + " valSrcScore=" + valSrcScore);
  38. return subQueryScore/valSrcScore;
  39. }
  40. }
  41. }

下面是自定义的采用特殊文件名作为评分标准的评分类MyNameScoreQuery.java

  1. package com.jadyer.custom;
  2. import java.io.IOException;
  3. import org.apache.lucene.index.IndexReader;
  4. import org.apache.lucene.search.FieldCache;
  5. import org.apache.lucene.search.Query;
  6. import org.apache.lucene.search.function.CustomScoreProvider;
  7. import org.apache.lucene.search.function.CustomScoreQuery;
  8. /**
  9. * 采用特殊文件名作为评分标准
  10. * @create Aug 6, 2013 2:17:13 PM
  11. * @author 玄玉<http://blog.csdn.net/jadyer>
  12. */
  13. public class MyNameScoreQuery extends CustomScoreQuery {
  14. private static final long serialVersionUID = -2813985445544972520L;
  15. public MyNameScoreQuery(Query subQuery) {
  16. //由于这里是打算根据文件名来自定义评分,所以重写构造方法时不必传入评分域的ValueSourceQuery
  17. super(subQuery);
  18. }
  19. @Override
  20. protected CustomScoreProvider getCustomScoreProvider(IndexReader reader) throws IOException {
  21. return new FilenameScoreProvider(reader);
  22. }
  23. private class FilenameScoreProvider extends CustomScoreProvider {
  24. String[] filenames;
  25. public FilenameScoreProvider(IndexReader reader) {
  26. super(reader);
  27. try {
  28. //在IndexReader没有关闭之前,所有的数据都会存储到一个预缓存中(缺点是占用大量内存)
  29. //所以我们可以通过预缓存获取name域的值(获取到的是name域所有值,故使用数组)
  30. this.filenames = FieldCache.DEFAULT.getStrings(reader, "name");
  31. } catch (IOException e) {
  32. e.printStackTrace();
  33. }
  34. }
  35. @Override
  36. public float customScore(int doc, float subQueryScore, float valSrcScore) throws IOException {
  37. //由于FilenameScoreQuery构造方法没有传入ValueSourceQuery,故此处ValueSourceQuery默认为1.0
  38. System.out.println("subQueryScore=" + subQueryScore + " valSrcScore=" + valSrcScore);
  39. if(filenames[doc].endsWith(".java") || filenames[doc].endsWith(".ini")){
  40. //只加大java文件和ini文件的评分
  41. return subQueryScore*1.5f;
  42. }else{
  43. return subQueryScore/1.5f;
  44. }
  45. }
  46. }
  47. }

相关文章