【Lucene3.6.2入门系列】第12节_近实时搜索

x33g5p2x  于2021-12-24 转载在 其他  
字(6.7k)|赞(0)|评价(0)|浏览(445)

完整版见 https://jadyer.github.io/2013/08/20/lucene-nrt/

  1. package com.jadyer.lucene;
  2. import java.io.File;
  3. import java.io.IOException;
  4. import org.apache.lucene.analysis.standard.StandardAnalyzer;
  5. import org.apache.lucene.document.Document;
  6. import org.apache.lucene.document.Field;
  7. import org.apache.lucene.index.IndexReader;
  8. import org.apache.lucene.index.IndexWriter;
  9. import org.apache.lucene.index.IndexWriterConfig;
  10. import org.apache.lucene.index.Term;
  11. import org.apache.lucene.search.IndexSearcher;
  12. import org.apache.lucene.search.NRTManager;
  13. import org.apache.lucene.search.NRTManagerReopenThread;
  14. import org.apache.lucene.search.Query;
  15. import org.apache.lucene.search.ScoreDoc;
  16. import org.apache.lucene.search.TermQuery;
  17. import org.apache.lucene.search.TopDocs;
  18. import org.apache.lucene.search.NRTManager.TrackingIndexWriter;
  19. import org.apache.lucene.store.Directory;
  20. import org.apache.lucene.store.FSDirectory;
  21. import org.apache.lucene.util.Version;
  22. /**
  23. * 【Lucene3.6.2入门系列】第12节_近实时搜索
  24. * @see 实时搜索(near-real-time)---->只要数据发生变化,则马上更新索引(IndexWriter.commit())
  25. * @see 近实时搜索------------------>数据发生变化时,先将索引保存到内存中,然后在一个统一的时间再对内存中的所有索引执行commit提交动作
  26. * @see 为了实现近实时搜索,Lucene3.0提供的方式叫做reopen,后来的版本中提供了两个线程安全的类NRTManager和SearcherManager
  27. * @see 不过这俩线程安全的类在Lucene3.5和3.6版本中的用法有点不太一样,这点要注意
  28. * @create Aug 7, 2013 4:19:58 PM
  29. * @author 玄玉<http://blog.csdn.net/jadyer>
  30. */
  31. public class HelloNRTSearch {
  32. private IndexWriter writer;
  33. private NRTManager nrtManager;
  34. private TrackingIndexWriter trackWriter;
  35. public HelloNRTSearch(){
  36. try {
  37. Directory directory = FSDirectory.open(new File("myExample/myIndex/"));
  38. writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_36, new StandardAnalyzer(Version.LUCENE_36)));
  39. trackWriter = new NRTManager.TrackingIndexWriter(writer);
  40. // /*
  41. // * Lucene3.5中的NRTManager是通过下面的方式创建的
  42. // * 并且Lucene3.5中可以直接使用NRTManager.getSearcherManager(true)获取到org.apache.lucene.search.SearcherManager
  43. // */
  44. // nrtManager = new NRTManager(writer,new org.apache.lucene.search.SearcherWarmer() {
  45. // @Override
  46. // public void warm(IndexSearcher s) throws IOException {
  47. // System.out.println("IndexSearcher.reopen时会自动调用此方法");
  48. // }
  49. // });
  50. nrtManager = new NRTManager(trackWriter, null);
  51. //启动一个Lucene提供的后台线程来自动定时的执行NRTManager.maybeRefresh()方法
  52. //这里的后俩参数,是根据这篇分析的文章写的http://blog.mikemccandless.com/2011/11/near-real-time-readers-with-lucenes.html
  53. NRTManagerReopenThread reopenThread = new NRTManagerReopenThread(nrtManager, 5.0, 0.025);
  54. reopenThread.setName("NRT Reopen Thread");
  55. reopenThread.setDaemon(true);
  56. reopenThread.start();
  57. } catch (Exception e) {
  58. e.printStackTrace();
  59. }
  60. }
  61. /**
  62. * 创建索引
  63. */
  64. public static void createIndex(){
  65. String[] ids = {"1", "2", "3", "4", "5", "6"};
  66. String[] names = {"Michael", "Scofield", "Tbag", "Jack", "Jade", "Jadyer"};
  67. String[] contents = {"my blog", "my website", "my name", "my job is JavaDeveloper", "I am from Haerbin", "I like Lucene"};
  68. IndexWriter writer = null;
  69. Document doc = null;
  70. try{
  71. Directory directory = FSDirectory.open(new File("myExample/myIndex/"));
  72. writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_36, new StandardAnalyzer(Version.LUCENE_36)));
  73. writer.deleteAll();
  74. for(int i=0; i<names.length; i++){
  75. doc = new Document();
  76. doc.add(new Field("id",ids[i],Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
  77. doc.add(new Field("name", names[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
  78. doc.add(new Field("content", contents[i], Field.Store.YES, Field.Index.ANALYZED));
  79. writer.addDocument(doc);
  80. }
  81. }catch(Exception e) {
  82. e.printStackTrace();
  83. }finally{
  84. if(null != writer){
  85. try {
  86. writer.close();
  87. } catch (IOException ce) {
  88. ce.printStackTrace();
  89. }
  90. }
  91. }
  92. }
  93. /**
  94. * 通过IndexReader获取文档数量
  95. */
  96. public static void getDocsCount(){
  97. IndexReader reader = null;
  98. try {
  99. reader = IndexReader.open(FSDirectory.open(new File("myExample/myIndex/")));
  100. System.out.println("maxDocs:" + reader.maxDoc());
  101. System.out.println("numDocs:" + reader.numDocs());
  102. System.out.println("deletedDocs:" + reader.numDeletedDocs());
  103. } catch (Exception e) {
  104. e.printStackTrace();
  105. } finally {
  106. if(reader != null){
  107. try {
  108. reader.close();
  109. } catch (IOException e) {
  110. e.printStackTrace();
  111. }
  112. }
  113. }
  114. }
  115. /**
  116. * 搜索文件
  117. */
  118. public void searchFile(){
  119. //Lucene3.5里面可以直接使用NRTManager.getSearcherManager(true).acquire()
  120. IndexSearcher searcher = nrtManager.acquire();
  121. Query query = new TermQuery(new Term("content", "my"));
  122. try{
  123. TopDocs tds = searcher.search(query, 10);
  124. for(ScoreDoc sd : tds.scoreDocs){
  125. Document doc = searcher.doc(sd.doc);
  126. System.out.print("文档编号=" + sd.doc + " 文档权值=" + doc.getBoost() + " 文档评分=" + sd.score + " ");
  127. System.out.println("id=" + doc.get("id") + " name=" + doc.get("name") + " content=" + doc.get("content"));
  128. }
  129. }catch(Exception e) {
  130. e.printStackTrace();
  131. }finally{
  132. try {
  133. //这里就不要IndexSearcher.close()啦,而是交由NRTManager来释放
  134. nrtManager.release(searcher);
  135. //Lucene-3.6.2文档中ReferenceManager.acquire()方法描述里建议再手工设置searcher为null,以防止在其它地方被意外的使用
  136. searcher = null;
  137. } catch (IOException e) {
  138. e.printStackTrace();
  139. }
  140. }
  141. }
  142. /**
  143. * 更新索引
  144. */
  145. public void updateIndex(){
  146. Document doc = new Document();
  147. doc.add(new Field("id", "11", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
  148. doc.add(new Field("name", "xuanyu", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
  149. doc.add(new Field("content", "my name is xuanyu", Field.Store.YES, Field.Index.ANALYZED));
  150. try{
  151. //Lucene3.5中可以直接使用org.apache.lucene.search.NRTManager.updateDocument(new Term("id", "1"), doc)
  152. trackWriter.updateDocument(new Term("id", "1"), doc);
  153. }catch(IOException e) {
  154. e.printStackTrace();
  155. }
  156. }
  157. /**
  158. * 删除索引
  159. */
  160. public void deleteIndex(){
  161. try {
  162. //Lucene3.5中可以直接使用org.apache.lucene.search.NRTManager.deleteDocuments(new Term("id", "2"))
  163. trackWriter.deleteDocuments(new Term("id", "2"));
  164. } catch (IOException e) {
  165. e.printStackTrace();
  166. }
  167. }
  168. /**
  169. * 提交索引内容的变更情况
  170. */
  171. public void commitIndex(){
  172. try {
  173. writer.commit();
  174. } catch (IOException e) {
  175. e.printStackTrace();
  176. }
  177. }
  178. }

下面是用JUnit4.x写的小测试

  1. package com.jadyer.test;
  2. import org.junit.After;
  3. import org.junit.Before;
  4. import org.junit.Test;
  5. import com.jadyer.lucene.HelloNRTSearch;
  6. public class HelloNRTSearchTest {
  7. @Before
  8. public void init(){
  9. HelloNRTSearch.createIndex();
  10. }
  11. @After
  12. public void destroy(){
  13. HelloNRTSearch.getDocsCount();
  14. }
  15. @Test
  16. public void searchFile(){
  17. HelloNRTSearch hello = new HelloNRTSearch();
  18. for(int i=0; i<5; i++){
  19. hello.searchFile();
  20. System.out.println("-----------------------------------------------------------");
  21. hello.deleteIndex();
  22. if(i == 2){
  23. hello.updateIndex();
  24. }
  25. try {
  26. System.out.println(".........开始休眠5s(模拟近实时搜索情景)");
  27. Thread.sleep(5000);
  28. System.out.println(".........休眠结束");
  29. } catch (InterruptedException e) {
  30. e.printStackTrace();
  31. }
  32. }
  33. //不能单独去new HelloNRTSearch,要保证它们是同一个对象,否则所做的delete和update不会被commit
  34. hello.commitIndex();
  35. }
  36. }

相关文章