`
zl198751
  • 浏览: 273649 次
  • 性别: Icon_minigender_1
  • 来自: 上海
社区版块
存档分类
最新评论

Lucene 基本用法. 还可以sort,delete,update,Paoding.

阅读更多
import java.io.IOException;
import java.util.Arrays;

import net.paoding.analysis.analyzer.PaodingAnalyzer;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.TermPositionVector;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.highlight.TokenSources;
import org.apache.lucene.store.LockObtainFailedException;


public class test {
public static void main(String[] args) throws CorruptIndexException, LockObtainFailedException, IOException, ParseException {
     String IDNEX_PATH = "c:/temp/pao";

   //获取Paoding中文分词器

   Analyzer analyzer = new PaodingAnalyzer();

   //建立索引

   IndexWriter writer = new IndexWriter(IDNEX_PATH, analyzer, true);

   Document doc = new Document();
   Field field = new Field("content", "你好,世界!维护世界和平", Field.Store.YES,

       Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS);

   Field field2 = new Field("content2", "我要毁灭世界", Field.Store.YES,

           Field.Index.TOKENIZED); 
   doc.add(field);
   doc.add(field2);
 
   Document doc2 = new Document();
   Field field3 = new Field("content", "我要得到世界上最好的蛋糕", Field.Store.YES,

           Field.Index.TOKENIZED);
   Field field4 = new Field("content2", "蛋糕我最喜欢吃的", Field.Store.YES,

           Field.Index.TOKENIZED);
   doc2.add(field3);
   doc2.add(field4);
 
   writer.addDocument(doc);
   writer.addDocument(doc2);
 
   writer.close();

   System.out.println("Indexed success!");

  

   //检索

   IndexReader reader = IndexReader.open(IDNEX_PATH);

   QueryParser parser = new QueryParser("content2", analyzer);

   Query query = parser.parse("我要毁灭世界");

   Searcher searcher = new IndexSearcher(reader);

//   String key[] = {"世界" , "我要"};
//   String searchField[] = {"content", "content"};
//   BooleanClause.Occur[] flags = {BooleanClause.Occur.MUST, BooleanClause.Occur.MUST_NOT};
//   Query multiQuery = MultiFieldQueryParser.parse(key, searchField, flags, analyzer);
 
   Hits hits = searcher.search(query);

   if (hits.length() == 0) {

       System.out.println("hits.length=0");

   }


  for(int i = 0, j =hits.length();i < j;i++){
      Document readDoc = hits.doc(i);

      //高亮处理
      String text = readDoc.get("content2");
      String t2[] = readDoc.getValues("content");
    
      System.out.println(text);
    
      System.out.println(Arrays.toString(t2));
      TermPositionVector tpv = (TermPositionVector) reader.getTermFreqVector(i, "content");
      TokenStream ts = TokenSources.getTokenStream(tpv);
      System.out.println(ts);
  }



 
 
 
 
//   TermPositionVector tpv = (TermPositionVector) reader.getTermFreqVector(0, "content");
//
//   TokenStream ts = TokenSources.getTokenStream(tpv);
//
//   Formatter formatter = new Formatter() {
//
//       public String highlightTerm(String srcText, TokenGroup g) {
//
//           if (g.getTotalScore() <= 0) {
//
//               return srcText;
//
//           }
//
//           return "<b>" + srcText + "</b>";
//
//       }
//
//   };
//
//   Highlighter highlighter = new Highlighter(formatter, new QueryScorer(
//
//           query));
//
//   String result = highlighter.getBestFragments(ts, text, 5, "…");
//
//   System.out.println("result:\n\t" + result);

   reader.close();

}
}
分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics