import java.io.IOException;
import java.util.Arrays;
import net.paoding.analysis.analyzer.PaodingAnalyzer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.TermPositionVector;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.highlight.TokenSources;
import org.apache.lucene.store.LockObtainFailedException;
public class test {
public static void main(String[] args) throws CorruptIndexException, LockObtainFailedException, IOException, ParseException {
String IDNEX_PATH = "c:/temp/pao";
//获取Paoding中文分词器
Analyzer analyzer = new PaodingAnalyzer();
//建立索引
IndexWriter writer = new IndexWriter(IDNEX_PATH, analyzer, true);
Document doc = new Document();
Field field = new Field("content", "你好,世界!维护世界和平", Field.Store.YES,
Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
Field field2 = new Field("content2", "我要毁灭世界", Field.Store.YES,
Field.Index.TOKENIZED);
doc.add(field);
doc.add(field2);
Document doc2 = new Document();
Field field3 = new Field("content", "我要得到世界上最好的蛋糕", Field.Store.YES,
Field.Index.TOKENIZED);
Field field4 = new Field("content2", "蛋糕我最喜欢吃的", Field.Store.YES,
Field.Index.TOKENIZED);
doc2.add(field3);
doc2.add(field4);
writer.addDocument(doc);
writer.addDocument(doc2);
writer.close();
System.out.println("Indexed success!");
//检索
IndexReader reader = IndexReader.open(IDNEX_PATH);
QueryParser parser = new QueryParser("content2", analyzer);
Query query = parser.parse("我要毁灭世界");
Searcher searcher = new IndexSearcher(reader);
// String key[] = {"世界" , "我要"};
// String searchField[] = {"content", "content"};
// BooleanClause.Occur[] flags = {BooleanClause.Occur.MUST, BooleanClause.Occur.MUST_NOT};
// Query multiQuery = MultiFieldQueryParser.parse(key, searchField, flags, analyzer);
Hits hits = searcher.search(query);
if (hits.length() == 0) {
System.out.println("hits.length=0");
}
for(int i = 0, j =hits.length();i < j;i++){
Document readDoc = hits.doc(i);
//高亮处理
String text = readDoc.get("content2");
String t2[] = readDoc.getValues("content");
System.out.println(text);
System.out.println(Arrays.toString(t2));
TermPositionVector tpv = (TermPositionVector) reader.getTermFreqVector(i, "content");
TokenStream ts = TokenSources.getTokenStream(tpv);
System.out.println(ts);
}
// TermPositionVector tpv = (TermPositionVector) reader.getTermFreqVector(0, "content");
//
// TokenStream ts = TokenSources.getTokenStream(tpv);
//
// Formatter formatter = new Formatter() {
//
// public String highlightTerm(String srcText, TokenGroup g) {
//
// if (g.getTotalScore() <= 0) {
//
// return srcText;
//
// }
//
// return "<b>" + srcText + "</b>";
//
// }
//
// };
//
// Highlighter highlighter = new Highlighter(formatter, new QueryScorer(
//
// query));
//
// String result = highlighter.getBestFragments(ts, text, 5, "…");
//
// System.out.println("result:\n\t" + result);
reader.close();
}
}
分享到:
相关推荐
在使用lucene3与paoding集成的时候可能会出现以下错误: Exception in thread "main" java.lang.AbstractMethodError: org.apache.lucene.analysis.TokenStream.incrementToken()Z at org.apache.lucene.index....
C# Lucene.Net .Analysis.Cn.dll和Lucene.Net.dll两个文件
lucene-highlighter.jar lucene-highlighter.jar
这是一个lucene中文索引程序,采用paoding分词器,包括建立索引和搜索过程。
最新版linux lucene-8.10.0.tgz最新版linux lucene-8.10.0.tgz最新版linux lucene-8.10.0.tgz
lucene-2.9.4.tar.gz lucene-2.9.4.tar.gz lucene-2.9.4.tar.gz
lucene-queries-2.9.0.jar 内含有org.apache.lucene.search.DuplicateFilter
Lucene.in.Action.2nd.Edition Lucene
lucene3.0-highlighter.jar lucene3.0的高亮jar包,从lucene3.0源码中导出来的
最新版linux lucene-8.8.2.tgz最新版linux lucene-8.8.2.tgz最新版linux lucene-8.8.2.tgz
lucene.jar. lucene-1.4.3.jar.license.txt
Lucene采用3.03版,太新版和 ik paoding不兼容。 ----------------------------------------- 极易分词只支持1.9-2.4版,未测试 IKAnalyzer分词 2012版不支持,采用3.2.8版 在使用2012版时异常:...
org.apache.lucene.analysis.cjk.CJKAnalyzer .......
Lucene.Net基本用法.pdf
Manning.Lucene.in.Action.2nd.Edition.Jun.2010.MEAP.rar
lucene-3.0.2.zip lucene-3.0.2.zip
最新版Lucene.In.Action 2nd Edition,2010年7月版,相当清晰
lucene-queries-2.9.0.jar 内含有org.apache.lucene.search.DuplicateFilter. 可以帮助我们完成类似sql中的group by的查询。里面包含jar和源码以及maven开发需要的pom配置