【Lucene3.6.2入门系列】第06节_高级搜索之排序

x33g5p2x  于2021-12-24 转载在 其他  
字(5.8k)|赞(0)|评价(0)|浏览(451)

完整版见 https://jadyer.github.io/2013/08/19/lucene-advanced-search-sort/

  1. package com.jadyer.lucene;
  2. import java.io.File;
  3. import java.io.IOException;
  4. import java.text.SimpleDateFormat;
  5. import java.util.Date;
  6. import org.apache.lucene.analysis.standard.StandardAnalyzer;
  7. import org.apache.lucene.document.Document;
  8. import org.apache.lucene.document.Field;
  9. import org.apache.lucene.document.NumericField;
  10. import org.apache.lucene.index.IndexReader;
  11. import org.apache.lucene.index.IndexWriter;
  12. import org.apache.lucene.index.IndexWriterConfig;
  13. import org.apache.lucene.queryParser.QueryParser;
  14. import org.apache.lucene.search.IndexSearcher;
  15. import org.apache.lucene.search.ScoreDoc;
  16. import org.apache.lucene.search.Sort;
  17. import org.apache.lucene.search.SortField;
  18. import org.apache.lucene.search.TopDocs;
  19. import org.apache.lucene.store.Directory;
  20. import org.apache.lucene.store.FSDirectory;
  21. import org.apache.lucene.util.Version;
  22. /**
  23. * 【Lucene3.6.2入门系列】第06节_高级搜索之排序
  24. * @create Aug 19, 2013 10:38:19 AM
  25. * @author 玄玉<http://blog.csdn.net/jadyer>
  26. */
  27. public class AdvancedSearchBySort {
  28. private Directory directory;
  29. private IndexReader reader;
  30. public AdvancedSearchBySort(){
  31. /**文件大小*/
  32. int[] sizes = {90, 10, 20, 10, 60, 50};
  33. /**文件名*/
  34. String[] names = {"Michael.java", "Scofield.ini", "Tbag.txt", "Jack", "Jade", "Jadyer"};
  35. /**文件内容*/
  36. String[] contents = {"my java blog is http://blog.csdn.net/jadyer",
  37. "my Java Website is http://www.jadyer.cn",
  38. "my name is jadyer",
  39. "I am a Java Developer",
  40. "I am from Haerbin",
  41. "I like java of Lucene"};
  42. /**文件日期*/
  43. Date[] dates = new Date[sizes.length];
  44. SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd HH:mm:ss");
  45. IndexWriter writer = null;
  46. Document doc = null;
  47. try {
  48. dates[0] = sdf.parse("20130407 15:25:30");
  49. dates[1] = sdf.parse("20130407 16:30:45");
  50. dates[2] = sdf.parse("20130213 11:15:25");
  51. dates[3] = sdf.parse("20130808 09:30:55");
  52. dates[4] = sdf.parse("20130526 13:54:22");
  53. dates[5] = sdf.parse("20130701 17:35:34");
  54. directory = FSDirectory.open(new File("myExample/01_index/"));
  55. writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_36, new StandardAnalyzer(Version.LUCENE_36)));
  56. writer.deleteAll();
  57. for(int i=0; i<sizes.length; i++){
  58. doc = new Document();
  59. doc.add(new NumericField("size",Field.Store.YES, true).setIntValue(sizes[i]));
  60. doc.add(new Field("name", names[i], Field.Store.YES, Field.Index.ANALYZED_NO_NORMS));
  61. doc.add(new Field("content", contents[i], Field.Store.NO, Field.Index.ANALYZED));
  62. doc.add(new NumericField("date", Field.Store.YES, true).setLongValue(dates[i].getTime()));
  63. writer.addDocument(doc);
  64. }
  65. } catch (Exception e) {
  66. e.printStackTrace();
  67. } finally {
  68. if(null != writer){
  69. try {
  70. writer.close();
  71. } catch (IOException ce) {
  72. ce.printStackTrace();
  73. }
  74. }
  75. }
  76. }
  77. /**
  78. * 获取IndexReader实例
  79. */
  80. private IndexReader getIndexReader(){
  81. try {
  82. if(reader == null){
  83. reader = IndexReader.open(directory);
  84. }else{
  85. //if the index was changed since the provided reader was opened, open and return a new reader; else,return null
  86. //如果当前reader在打开期间index发生改变,则打开并返回一个新的IndexReader,否则返回null
  87. IndexReader ir = IndexReader.openIfChanged(reader);
  88. if(ir != null){
  89. reader.close(); //关闭原reader
  90. reader = ir; //赋予新reader
  91. }
  92. }
  93. return reader;
  94. }catch(Exception e) {
  95. e.printStackTrace();
  96. }
  97. return null; //发生异常则返回null
  98. }
  99. /**
  100. * 搜索排序
  101. * @see 关于Sort参数的可输入规则,如下所示
  102. * @see 1)Sort.INDEXORDER--使用文档编号从小到大的顺序进行排序
  103. * @see 2)Sort.RELEVANCE---使用文档评分从大到小的顺序进行排序,也是默认的排序规则,等价于search(query, 10)
  104. * @see 3)new Sort(new SortField("size", SortField.INT))-----------使用文件大小从小到大的顺序排序
  105. * @see 4)new Sort(new SortField("date", SortField.LONG))----------使用文件日期从以前到现在的顺序排序
  106. * @see 5)new Sort(new SortField("name", SortField.STRING))--------使用文件名从A到Z的顺序排序
  107. * @see 6)new Sort(new SortField("name", SortField.STRING, true))--使用文件名从Z到A的顺序排序
  108. * @see 7)new Sort(new SortField("size", SortField.INT), SortField.FIELD_SCORE)--先按照文件大小排序,再按照文档评分排序(可以指定多个排序规则)
  109. * @see 注意:以上7个Sort再打印文档评分时都是NaN,只有search(query, 10)才会正确打印文档评分
  110. * @param expr 搜索表达式
  111. * @param sort 排序规则
  112. */
  113. public void searchBySort(String expr, Sort sort){
  114. IndexSearcher searcher = new IndexSearcher(this.getIndexReader());
  115. QueryParser parser = new QueryParser(Version.LUCENE_36, "content", new StandardAnalyzer(Version.LUCENE_36));
  116. TopDocs tds = null;
  117. try {
  118. if(null == sort){
  119. tds = searcher.search(parser.parse(expr), 10);
  120. }else{
  121. tds = searcher.search(parser.parse(expr), 10, sort);
  122. }
  123. for(ScoreDoc sd : tds.scoreDocs){
  124. Document doc = searcher.doc(sd.doc);
  125. System.out.print("文档编号=" + sd.doc + " 文档权值=" + doc.getBoost() + " 文档评分=" + sd.score + " ");
  126. System.out.println("size=" + doc.get("size") + " date=" + new SimpleDateFormat("yyyyMMdd HH:mm:ss").format(new Date(Long.parseLong(doc.get("date")))) + " name=" + doc.get("name"));
  127. }
  128. } catch (Exception e) {
  129. e.printStackTrace();
  130. } finally {
  131. if(searcher != null){
  132. try {
  133. searcher.close();
  134. } catch (IOException e) {
  135. e.printStackTrace();
  136. }
  137. }
  138. }
  139. }
  140. /**
  141. * 测试一下排序效果
  142. */
  143. public static void main(String[] args) {
  144. AdvancedSearchBySort advancedSearch = new AdvancedSearchBySort();
  145. // //使用文档评分从大到小的顺序进行排序,也是默认的排序规则
  146. // advancedSearch.searchBySort("Java", null);
  147. // advancedSearch.searchBySort("Java", Sort.RELEVANCE);
  148. // //使用文档编号从小到大的顺序进行排序
  149. // advancedSearch.searchBySort("Java", Sort.INDEXORDER);
  150. // //使用文件大小从小到大的顺序排序
  151. // advancedSearch.searchBySort("Java", new Sort(new SortField("size", SortField.INT)));
  152. // //使用文件日期从以前到现在的顺序排序
  153. // advancedSearch.searchBySort("Java", new Sort(new SortField("date", SortField.LONG)));
  154. // //使用文件名从A到Z的顺序排序
  155. // advancedSearch.searchBySort("Java", new Sort(new SortField("name", SortField.STRING)));
  156. // //使用文件名从Z到A的顺序排序
  157. // advancedSearch.searchBySort("Java", new Sort(new SortField("name", SortField.STRING, true)));
  158. //先按照文件大小排序,再按照文档评分排序(可以指定多个排序规则)
  159. advancedSearch.searchBySort("Java", new Sort(new SortField("size", SortField.INT), SortField.FIELD_SCORE));
  160. }
  161. }

相关文章