Lucene版Hello world（世界，你好）

1、首先从lucene官网上下载lucene2.4.0（也可以点击直接下载，我这里用的这个版本，现在最高版本是3.0）

2、从极易软件下载极易分词器jar包（为汉语的世界，你好提供支持）

3、在Eclipse中新建Java工程，并将所需jar包（lucene-core-2.4.0.jar,lucene-analyzer-2.4.0.jar,lucene-highlighter-2.4.0.jar,je-analysis-1.5.3.jar）加入工程

4、差不多该开始了，在开始之前还需要建立两个文件夹，我这里是luceneDataSource放文件（用来建立索引库），luceneIndexs(存放索引库的位置)，最终的结构是：

5、好，我们开始，首先建立HelloWorld类，类里有两个方法createIndex和search分别是创建索引库和搜索，搜索出来的结果高亮显示，具体实现为：

package com.lucene.helloworld;

import java.util.logging.SimpleFormatter;

import jeasy.analysis.MMAnalyzer;

import org.apache.lucene.analysis.Analyzer;

import org.apache.lucene.analysis.standard.StandardAnalyzer;

import org.apache.lucene.document.Document;

import org.apache.lucene.index.IndexWriter;

import org.apache.lucene.index.IndexWriter.MaxFieldLength;

import org.apache.lucene.queryParser.MultiFieldQueryParser;

import org.apache.lucene.queryParser.QueryParser;

import org.apache.lucene.search.Filter;

import org.apache.lucene.search.IndexSearcher;

import org.apache.lucene.search.Query;

import org.apache.lucene.search.ScoreDoc;

import org.apache.lucene.search.TopDocs;

import org.apache.lucene.search.highlight.Formatter;

import org.apache.lucene.search.highlight.Fragmenter;

import org.apache.lucene.search.highlight.Highlighter;

import org.apache.lucene.search.highlight.QueryScorer;

import org.apache.lucene.search.highlight.Scorer;

import org.apache.lucene.search.highlight.SimpleFragmenter;

import org.apache.lucene.search.highlight.SimpleHTMLFormatter;

import org.junit.Test;

import com.lucene.util.File2DocumentUtils;

public class HelloWorld {

String zhFilePath = "F:""java""workspaces""LuceneTest""luceneDatasource""世界，你好.txt";

String filePath = "F:""java""workspaces""LuceneTest""luceneDatasource""IndexWriter addDocument's a javadoc .txt";

String indexPath = "F:""java""workspaces""LuceneTest""luceneIndexs";

// Analyzer analyzer = new StandardAnalyzer();

Analyzer mmAnalyzer = new MMAnalyzer(); // 词库分析，极易分词

/**

* 创建索引

* @throws Exception

@Test

public void createIndex() throws Exception {

IndexWriter indexWriter = new IndexWriter(indexPath, mmAnalyzer, true, MaxFieldLength.LIMITED);

// Document doc = File2DocumentUtils.file2Document(filePath);

Document zhDoc = File2DocumentUtils.file2Document(zhFilePath);

// indexWriter.addDocument(doc);

indexWriter.addDocument(zhDoc);

indexWriter.close();

}

/**

* 从索引库搜索

* @throws Exception

@Test

public void search() throws Exception {

// String queryString = "hello world";

String queryString = "世界,你好";

// 1、将搜索文件解析为Query对象

String[] fields = { "name", "content" };

QueryParser queryParser = new MultiFieldQueryParser(fields, mmAnalyzer);

Query query = queryParser.parse(queryString);

// 2、查询

IndexSearcher indexSearcher = new IndexSearcher(indexPath);

Filter filter = null;

TopDocs topDocs = indexSearcher.search(query, filter, 10000);

System.out.println("总共有【" + topDocs.totalHits + "】条结果匹配");

// start 准备高亮器

Formatter formatter = new SimpleHTMLFormatter("<font color=red>", "</font>");

Scorer scorer = new QueryScorer(query);

Highlighter highlighter = new Highlighter(formatter, scorer);

Fragmenter fragmenter = new SimpleFragmenter(50);

highlighter.setTextFragmenter(fragmenter);

// end 结束高亮器

// 3、打印输出结果

for (ScoreDoc scoreDoc : topDocs.scoreDocs) {

int docSn = scoreDoc.doc;

Document doc = indexSearcher.doc(docSn);

// start 高亮

// 返回高亮后的结果，如果当前属性值中没有出现关键字，会返回 null

String hc = highlighter.getBestFragment(mmAnalyzer, "content", doc.get("content"));

if (hc == null) {

String content = doc.get("content");

int endIndex = Math.min(50, content.length());

hc = content.substring(0, endIndex);

}

doc.getField("content").setValue(hc);

// end 高亮

File2DocumentUtils.printDocumentInfo(doc);

}

该类需要有一个工具类支持，来将file转换为Document，具体实现如下：

package com.lucene.util;

import java.io.BufferedReader;

import java.io.File;

import java.io.FileInputStream;

import java.io.InputStreamReader;

import org.apache.lucene.document.Document;

import org.apache.lucene.document.Field;

import org.apache.lucene.document.NumberTools;

import org.apache.lucene.document.Field.Index;

import org.apache.lucene.document.Field.Store;

publicclass File2DocumentUtils {

publicstatic Document file2Document(String path) {

File file = new File(path);

Document doc = new Document();

doc.add(new Field("name", file.getName(), Store.YES, Index.ANALYZED));

doc.add(new Field("content", readFileContent(file), Store.YES, Index.ANALYZED));

doc.add(new Field("size", NumberTools.longToString(file.length()), Store.YES, Index.NOT_ANALYZED));

doc.add(new Field("path", file.getAbsolutePath(), Store.YES, Index.NOT_ANALYZED));

return doc;

}

// public static void document2File(Document doc ){

// }

publicstatic String readFileContent(File file) {

try {

BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file)));

StringBuffer content = new StringBuffer();

for (String line = null; (line = reader.readLine()) != null;) {

content.append(line).append(""n");

}

return content.toString();

} catch (Exception e) {

thrownew RuntimeException(e);

}

publicstaticvoid printDocumentInfo(Document doc) {

// Field f = doc.getField("name");

// f.stringValue();

System.out.println("------------------------------");

System.out.println("name = " + doc.get("name"));

System.out.println("content = " + doc.get("content"));

System.out.println("size = " + NumberTools.stringToLong(doc.get("size")));

System.out.println("path = " + doc.get("path"));

}

6、到此我们结束，看下成果，英文版的我就不写了，想对来说比较容易，来看下中文版的结果

唯美古典的工作室

posted on 2009-12-03 15:27 唯美古典阅读(2902) 评论(0) 编辑收藏所属分类: Java入门、lucene

常用链接

留言簿

随笔分类

随笔档案

搜索

最新评论

阅读排行榜

评论排行榜


只有注册用户登录后才能发表评论。




网站导航: 博客园 IT新闻 Chat2DB C++博客博问管理