【Lucene3.6.2入门系列】第09节_高级搜索之自定义QueryParser

x33g5p2x  于2021-12-24 转载在 其他  
字(8.1k)|赞(0)|评价(0)|浏览(573)

完整版见 https://jadyer.github.io/2013/08/19/lucene-advanced-search-queryparser/

  1. package com.jadyer.lucene;
  2. import java.io.File;
  3. import java.io.IOException;
  4. import java.text.SimpleDateFormat;
  5. import java.util.Date;
  6. import org.apache.lucene.analysis.standard.StandardAnalyzer;
  7. import org.apache.lucene.document.Document;
  8. import org.apache.lucene.document.Field;
  9. import org.apache.lucene.document.NumericField;
  10. import org.apache.lucene.index.IndexReader;
  11. import org.apache.lucene.index.IndexWriter;
  12. import org.apache.lucene.index.IndexWriterConfig;
  13. import org.apache.lucene.queryParser.ParseException;
  14. import org.apache.lucene.queryParser.QueryParser;
  15. import org.apache.lucene.search.IndexSearcher;
  16. import org.apache.lucene.search.Query;
  17. import org.apache.lucene.search.ScoreDoc;
  18. import org.apache.lucene.search.TopDocs;
  19. import org.apache.lucene.store.Directory;
  20. import org.apache.lucene.store.FSDirectory;
  21. import org.apache.lucene.util.Version;
  22. import com.jadyer.custom.MyQueryParser;
  23. /**
  24. * 【Lucene3.6.2入门系列】第09节_高级搜索之自定义QueryParser
  25. * @create Aug 19, 2013 2:07:32 PM
  26. * @author 玄玉<http://blog.csdn.net/jadyer>
  27. */
  28. public class AdvancedSearch {
  29. private Directory directory;
  30. private IndexReader reader;
  31. public AdvancedSearch(){
  32. /**文件大小*/
  33. int[] sizes = {90, 10, 20, 10, 60, 50};
  34. /**文件名*/
  35. String[] names = {"Michael.java", "Scofield.ini", "Tbag.txt", "Jack", "Jade", "Jadyer"};
  36. /**文件内容*/
  37. String[] contents = {"my java blog is http://blog.csdn.net/jadyer",
  38. "my Java Website is http://www.jadyer.cn",
  39. "my name is jadyer",
  40. "I am a Java Developer",
  41. "I am from Haerbin",
  42. "I like java of Lucene"};
  43. /**文件日期*/
  44. Date[] dates = new Date[sizes.length];
  45. SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd HH:mm:ss");
  46. IndexWriter writer = null;
  47. Document doc = null;
  48. try {
  49. dates[0] = sdf.parse("20130407 15:25:30");
  50. dates[1] = sdf.parse("20130407 16:30:45");
  51. dates[2] = sdf.parse("20130213 11:15:25");
  52. dates[3] = sdf.parse("20130808 09:30:55");
  53. dates[4] = sdf.parse("20130526 13:54:22");
  54. dates[5] = sdf.parse("20130701 17:35:34");
  55. directory = FSDirectory.open(new File("myExample/01_index/"));
  56. writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_36, new StandardAnalyzer(Version.LUCENE_36)));
  57. writer.deleteAll();
  58. for(int i=0; i<sizes.length; i++){
  59. doc = new Document();
  60. doc.add(new NumericField("size",Field.Store.YES, true).setIntValue(sizes[i]));
  61. doc.add(new Field("name", names[i], Field.Store.YES, Field.Index.ANALYZED_NO_NORMS));
  62. doc.add(new Field("content", contents[i], Field.Store.NO, Field.Index.ANALYZED));
  63. doc.add(new NumericField("date", Field.Store.YES, true).setLongValue(dates[i].getTime()));
  64. writer.addDocument(doc);
  65. }
  66. } catch (Exception e) {
  67. e.printStackTrace();
  68. } finally {
  69. if(null != writer){
  70. try {
  71. writer.close();
  72. } catch (IOException ce) {
  73. ce.printStackTrace();
  74. }
  75. }
  76. }
  77. }
  78. /**
  79. * 获取IndexReader实例
  80. */
  81. private IndexReader getIndexReader(){
  82. try {
  83. if(reader == null){
  84. reader = IndexReader.open(directory);
  85. }else{
  86. //if the index was changed since the provided reader was opened, open and return a new reader; else,return null
  87. //如果当前reader在打开期间index发生改变,则打开并返回一个新的IndexReader,否则返回null
  88. IndexReader ir = IndexReader.openIfChanged(reader);
  89. if(ir != null){
  90. reader.close(); //关闭原reader
  91. reader = ir; //赋予新reader
  92. }
  93. }
  94. return reader;
  95. }catch(Exception e) {
  96. e.printStackTrace();
  97. }
  98. return null; //发生异常则返回null
  99. }
  100. /**
  101. * 自定义QueryParser的搜索
  102. * @param expr 搜索的表达式
  103. */
  104. public void searchByCustomQueryParser(String expr){
  105. IndexSearcher searcher = new IndexSearcher(this.getIndexReader());
  106. QueryParser parser = new MyQueryParser(Version.LUCENE_36, "content", new StandardAnalyzer(Version.LUCENE_36));
  107. try {
  108. Query query = parser.parse(expr);
  109. TopDocs tds = searcher.search(query, 10);
  110. for(ScoreDoc sd : tds.scoreDocs){
  111. Document doc = searcher.doc(sd.doc);
  112. System.out.print("文档编号=" + sd.doc + " 文档权值=" + doc.getBoost() + " 文档评分=" + sd.score + " ");
  113. System.out.println("size=" + doc.get("size") + " date=" + new SimpleDateFormat("yyyyMMdd HH:mm:ss").format(new Date(Long.parseLong(doc.get("date")))) + " name=" + doc.get("name"));
  114. }
  115. } catch (ParseException e) {
  116. System.err.println(e.getMessage());
  117. } catch (Exception e) {
  118. e.printStackTrace();
  119. } finally {
  120. if(null != searcher){
  121. try {
  122. searcher.close(); //记得关闭IndexSearcher
  123. } catch (IOException e) {
  124. e.printStackTrace();
  125. }
  126. }
  127. }
  128. }
  129. /**
  130. * 测试一下搜索效果
  131. */
  132. public static void main(String[] args) {
  133. AdvancedSearch advancedSearch = new AdvancedSearch();
  134. advancedSearch.searchByCustomQueryParser("name:Jadk~");
  135. advancedSearch.searchByCustomQueryParser("name:Ja??er");
  136. System.out.println("------------------------------------------------------------------------");
  137. advancedSearch.searchByCustomQueryParser("name:Jade");
  138. System.out.println("------------------------------------------------------------------------");
  139. advancedSearch.searchByCustomQueryParser("name:[h TO n]");
  140. System.out.println("------------------------------------------------------------------------");
  141. advancedSearch.searchByCustomQueryParser("size:[20 TO 80]");
  142. System.out.println("------------------------------------------------------------------------");
  143. advancedSearch.searchByCustomQueryParser("date:[20130407 TO 20130701]");
  144. }
  145. }

下面是自定义的MyQueryParser.java(这里主要实现了以下两个功能)

1)禁用模糊搜索和通配符搜索,以提高搜索性能

2)扩展基于数字和日期的搜索,使之支持数字和日期的搜索

  1. package com.jadyer.custom;
  2. import java.text.SimpleDateFormat;
  3. import java.util.regex.Pattern;
  4. import org.apache.lucene.analysis.Analyzer;
  5. import org.apache.lucene.queryParser.ParseException;
  6. import org.apache.lucene.queryParser.QueryParser;
  7. import org.apache.lucene.search.NumericRangeQuery;
  8. import org.apache.lucene.search.Query;
  9. import org.apache.lucene.util.Version;
  10. /**
  11. * 自定义QueryParser
  12. * @see --------------------------------------------------------------------------------------------------
  13. * @see 实际使用QueryParser的过程中,通常会考虑两个问题
  14. * @see 1)限制性能低的QueryParser--对于某些QueryParser在搜索时会使得性能降低,故考虑禁用这些搜索以提升性能
  15. * @see 2)扩展基于数字和日期的搜索---有时需要进行一个数字的范围搜索,故需扩展原有的QueryParser才能实现此搜索
  16. * @see --------------------------------------------------------------------------------------------------
  17. * @see 限制性能低的QueryParser
  18. * @see 继承QueryParser类并重载相应方法,比如getFuzzyQuery和getWildcardQuery
  19. * @see 这样造成的结果就是,当输入普通的搜索表达式时,如'I AND Haerbin'可以正常搜索
  20. * @see 但输入'name:Jadk~'或者'name:Ja??er'时,就会执行到重载方法中,这时就可以自行处理了,比如本例中禁止该功能
  21. * @see --------------------------------------------------------------------------------------------------
  22. * @see 扩展基于数字和日期的查询
  23. * @see 思路就是继承QueryParser类后重载getRangeQuery()方法
  24. * @see 再针对数字和日期的'域',做特殊处理(使用NumericRangeQuery.newIntRange()方法来搜索)
  25. * @see --------------------------------------------------------------------------------------------------
  26. * @create Aug 6, 2013 4:13:42 PM
  27. * @author 玄玉<http://blog.csdn.net/jadyer>
  28. */
  29. public class MyQueryParser extends QueryParser {
  30. public MyQueryParser(Version matchVersion, String f, Analyzer a) {
  31. super(matchVersion, f, a);
  32. }
  33. @Override
  34. protected Query getWildcardQuery(String field, String termStr) throws ParseException {
  35. throw new ParseException("由于性能原因,已禁用通配符搜索,请输入更精确的信息进行搜索 ^_^ ^_^");
  36. }
  37. @Override
  38. protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException {
  39. throw new ParseException("由于性能原因,已禁用模糊搜索,请输入更精确的信息进行搜索 ^_^ ^_^");
  40. }
  41. @Override
  42. protected Query getRangeQuery(String field, String part1, String part2, boolean inclusive) throws ParseException {
  43. if(field.equals("size")){
  44. //默认的QueryParser.parse(String query)表达式中并不支持'size:[20 TO 80]'数字的域值
  45. //这样一来,针对数字的域值进行特殊处理,那么QueryParser表达式就支持数字了
  46. return NumericRangeQuery.newIntRange(field, Integer.parseInt(part1), Integer.parseInt(part2), inclusive, inclusive);
  47. }else if(field.equals("date")){
  48. String regex = "\\d{8}";
  49. String dateType = "yyyyMMdd";
  50. if(Pattern.matches(regex, part1) && Pattern.matches(regex, part2)){
  51. SimpleDateFormat sdf = new SimpleDateFormat(dateType);
  52. try {
  53. long min = sdf.parse(part1).getTime();
  54. long max = sdf.parse(part2).getTime();
  55. //使之支持日期的检索,应用时直接QueryParser.parse("date:[20130407 TO 20130701]")
  56. return NumericRangeQuery.newLongRange(field, min, max, inclusive, inclusive);
  57. } catch (java.text.ParseException e) {
  58. e.printStackTrace();
  59. }
  60. }else{
  61. throw new ParseException("Unknown date format, please use '" + dateType + "'");
  62. }
  63. }
  64. //如没找到匹配的Field域,那么返回默认的TermRangeQuery
  65. return super.getRangeQuery(field, part1, part2, inclusive);
  66. }
  67. }

相关文章