`

lucene 搜索入门实例

 
阅读更多
  1. Y_indexer.java建索引
  2. packagecom.hapark.lucene;
  3. importjava.io.BufferedReader;
  4. importjava.io.File;
  5. importjava.io.FileInputStream;
  6. importjava.io.IOException;
  7. importjava.io.InputStreamReader;
  8. importjava.util.ArrayList;
  9. importjava.util.Date;
  10. importorg.apache.lucene.analysis.Analyzer;
  11. importorg.apache.lucene.analysis.standard.StandardAnalyzer;
  12. importorg.apache.lucene.document.Document;
  13. importorg.apache.lucene.document.Field;
  14. importorg.apache.lucene.index.IndexWriter;
  15. publicclassY_indexer{
  16. publicstaticvoidmain(String[]args)throwsException{
  17. /*指明要索引文件夹的位置,这里是D盘的y文件夹下*/
  18. FilefileDir=newFile("d:\\y");
  19. /*这里放索引文件的位置*/
  20. FileindexDir=newFile("d:\\index");
  21. AnalyzerluceneAnalyzer=newStandardAnalyzer();
  22. IndexWriterindexWriter=newIndexWriter(indexDir,luceneAnalyzer,
  23. true);
  24. ArrayListlist=newArrayList();
  25. getList(fileDir,list);
  26. File[]textFiles=(File[])list.toArray(newFile[0]);
  27. System.out.println(textFiles.length);
  28. longstartTime=newDate().getTime();
  29. //增加document到索引去
  30. for(inti=0;i<textFiles.length;i++){
  31. if(textFiles[i].isFile()
  32. &&textFiles[i].getName().endsWith(".html")){
  33. System.out.println("File"+textFiles[i].getCanonicalPath()
  34. +"正在被索引....");
  35. Stringtemp=FileReaderAll(textFiles[i].getCanonicalPath(),
  36. "GBK");
  37. System.out.println(temp);
  38. Documentdocument=newDocument();
  39. FieldFieldPath=newField("path",textFiles[i].getPath(),
  40. Field.Store.YES,Field.Index.NO);
  41. FieldFieldBody=newField("body",temp,Field.Store.YES,
  42. Field.Index.TOKENIZED,
  43. Field.TermVector.WITH_POSITIONS_OFFSETS);
  44. FieldFieldTitle=newField("title",temp,Field.Store.YES,
  45. Field.Index.TOKENIZED,
  46. Field.TermVector.WITH_POSITIONS_OFFSETS);
  47. document.add(FieldPath);
  48. document.add(FieldBody);
  49. document.add(FieldTitle);
  50. indexWriter.addDocument(document);
  51. }
  52. }
  53. //optimize()方法是对索引进行优化
  54. indexWriter.optimize();
  55. indexWriter.close();
  56. //测试一下索引的时间
  57. longendTime=newDate().getTime();
  58. System.out
  59. .println("这花费了"
  60. +(endTime-startTime)
  61. +"毫秒来把文档增加到索引里面去!"
  62. +fileDir.getPath());
  63. }
  64. /**
  65. *多层文件夹
  66. *@paramfile
  67. *@paramlist
  68. */
  69. publicstaticvoidgetList(Filefile,ArrayListlist){
  70. if(file.isDirectory()&&file.getName().indexOf(".")!=0){
  71. File[]file2=file.listFiles();
  72. for(inti=0;i<file2.length;i++)
  73. getList(file2[i],list);
  74. }else{
  75. list.add(file);
  76. }
  77. }
  78. publicstaticStringFileReaderAll(StringFileName,Stringcharset)
  79. throwsIOException{
  80. BufferedReaderreader=newBufferedReader(newInputStreamReader(
  81. newFileInputStream(FileName),charset));
  82. Stringline=newString();
  83. Stringtemp=newString();
  84. while((line=reader.readLine())!=null){
  85. temp+=line;
  86. }
  87. reader.close();
  88. returntemp;
  89. }
  90. }

  1. Y_searcherlucene搜索
  2. packagecom.hapark.lucene;
  3. importjava.io.IOException;
  4. importjava.util.ArrayList;
  5. importjava.util.Date;
  6. importjava.util.List;
  7. importorg.apache.lucene.analysis.Analyzer;
  8. importorg.apache.lucene.analysis.standard.StandardAnalyzer;
  9. importorg.apache.lucene.queryParser.ParseException;
  10. importorg.apache.lucene.queryParser.QueryParser;
  11. importorg.apache.lucene.search.Hits;
  12. importorg.apache.lucene.search.IndexSearcher;
  13. importorg.apache.lucene.search.Query;
  14. publicclassY_searcher{
  15. publicListsearch(){
  16. ListsearchResult=newArrayList();//创建一个List接口的一个实例类ArrayList类
  17. try{
  18. Hitshits=null;
  19. Stringkey="苏";
  20. Queryquery=null;
  21. IndexSearchersearcher=newIndexSearcher("d:\\yuyang");
  22. Analyzeranalyzer=newStandardAnalyzer();//创建一个Analyzer接口的一个实例类StandardAnalyzer
  23. QueryParserqp=newQueryParser("title",analyzer);
  24. query=qp.parse(key);
  25. if(searcher!=null){
  26. Datestart=newDate();
  27. hits=searcher.search(query);//遍历hist结果的length
  28. if(hits.length()==0){
  29. System.out.println("对不起。没你想要的结果!");
  30. }
  31. else{
  32. for(inti=0;i<hits.length();i++){
  33. Dateend=newDate();
  34. //System.out.println("找到:"+hits.length()+"Totalresult!");
  35. System.out.println("文件的路径:"+hits.doc(i).get("path"));
  36. //System.out.println("内容:"+hits.doc(i).get("body"));
  37. System.out.println(hits.doc(i).get("title"));
  38. System.out.println("检索完成,用时"+(end.getTime()-start.getTime())+"毫秒");
  39. }
  40. }
  41. }
  42. }
  43. catch(ParseExceptionex){
  44. }
  45. catch(IOExceptione){
  46. }
  47. returnsearchResult;
  48. }
  49. publicstaticvoidmain(Stringargs[]){
  50. Y_searchery_s=newY_searcher();
  51. y_s.search();
  52. }
  53. }

分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics