apache lucene 一个最简单的实例

就像每个程序都有一个Hello World来让人体验它一样，lucene也可以很简单的提供一个实例。如下（来自lucene in action的例子）有两个类组成：
一个是建立索引

package my;

import java.io.File;

import java.io.FileReader;

import java.io.IOException;

import java.util.Date;

import org.apache.lucene.analysis.standard.StandardAnalyzer;

import org.apache.lucene.document.Document;

import org.apache.lucene.document.Field;

import org.apache.lucene.index.IndexWriter;

public class Indexer {

public static void main(String[] args) throws Exception {

if (args.length != 2) {

throw new Exception("Usage: java " + Indexer.class.getName()

+ " <index dir> <data dir>");

}

File indexDir = new File(args[0]);

File dataDir = new File(args[1]);

long start = new Date().getTime();

int numIndexed = index(indexDir, dataDir);

long end = new Date().getTime();

System.out.println("Indexing " + numIndexed + " files took "

+ (end - start) + " milliseconds");

}

// open an index and start file directory traversal

public static int index(File indexDir, File dataDir) throws IOException {

if (!dataDir.exists() || !dataDir.isDirectory()) {

throw new IOException(dataDir

+ " does not exist or is not a directory");

}

IndexWriter writer = new IndexWriter(indexDir, new StandardAnalyzer(),

true);

writer.setUseCompoundFile(false);

indexDirectory(writer, dataDir);

int numIndexed = writer.docCount();

writer.optimize();

writer.close();

return numIndexed;

}

// recursive method that calls itself when it finds a directory

private static void indexDirectory(IndexWriter writer, File dir)

throws IOException {

File[] files = dir.listFiles();

for (int i = 0; i < files.length; i++) {

File f = files[i];

if (f.isDirectory()) {

indexDirectory(writer, f);

} else if (f.getName().endsWith(".txt")) {

indexFile(writer, f);

}

// method to actually index file using Lucene

private static void indexFile(IndexWriter writer, File f)

throws IOException {

if (f.isHidden() || !f.exists() || !f.canRead()) {

return;

}

System.out.println("Indexing " + f.getCanonicalPath());

Document doc = new Document();

doc.add(Field.Text("contents", new FileReader(f)));

doc.add(Field.Keyword("filename", f.getCanonicalPath()));

writer.addDocument(doc);

}

另一个是搜索：

package my;

import java.io.File;

import java.util.Date;

import org.apache.lucene.analysis.standard.StandardAnalyzer;

import org.apache.lucene.document.Document;

import org.apache.lucene.queryParser.QueryParser;

import org.apache.lucene.search.Hits;

import org.apache.lucene.search.IndexSearcher;

import org.apache.lucene.search.Query;

import org.apache.lucene.store.Directory;

import org.apache.lucene.store.FSDirectory;

public class Searcher {

public static void main(String[] args) throws Exception {

if (args.length != 2) {

throw new Exception("Usage: java " + Searcher.class.getName()

+ " <index dir> <auery>");

}

File indexDir = new File(args[0]);

String q = args[1];

if (!indexDir.exists() || !indexDir.isDirectory()) {

throw new Exception(indexDir

+ " does not exist or is not a directory.");

}

search(indexDir, q);

}

public static void search(File indexDir, String q) throws Exception {

Directory fsDir = FSDirectory.getDirectory(indexDir, false);

IndexSearcher is = new IndexSearcher(fsDir);

Query query = QueryParser.parse(q, "contents", new StandardAnalyzer());

long start = new Date().getTime();

Hits hits = is.search(query);

long end = new Date().getTime();

System.err.println("Found " + hits.length() + " document(s) (in "

+ (end - start) + " milliseconds) that matched query ‘" + q

+ "’:");

for (int i = 0; i < hits.length(); i++) {

Document doc = hits.doc(i);

System.out.println(doc.get("filename"));

}

ok，这样就简单实现了，在搜索目录下所有txt，找出包括某一个字符串的txt文件名的功能。
下篇文章将介绍一下lucene的核心类

posted on 2007-06-12 09:46 dreamstone 阅读(5197) 评论(5) 编辑收藏所属分类: 搜索引擎lucence

# re: apache lucene 一个最简单的实例 2010-05-25 09:35 yuanfangzhou

太好了，非常感谢！回复更多评论

# re: apache lucene 一个最简单的实例 2010-07-15 15:32 solidfish

你的博客写的很好，支持你哦！回复更多评论

# re: apache lucene 一个最简单的实例 2010-07-23 17:34 人字拖

good 回复更多评论

# re: apache lucene 一个最简单的实例[未登录] 2011-04-29 21:05 Talin

不错，很好。另外，我的博客里发了一篇类似的相关的入门示例，可以去看看http://javafans.info 回复更多评论

# re: apache lucene 一个最简单的实例[未登录] 2013-11-14 15:35 椰子

楼主，您好。你这个lucene的版本是？回复更多评论

新用户注册刷新评论列表


只有注册用户登录后才能发表评论。




网站导航: 博客园博客园最新博文博问管理
相关文章: lucene入门合集 lucene的中文分词器 lucene的丰富的各种查询（二） lucene的丰富的各种查询(一) 比较lucene各种英文分析器Analyzer lucene建立索引时候的用到的一些文档和目录操作 lucene 索引非txt文档 (pdf word rtf html xml) apache lucene 的核心类 apache lucene 一个最简单的实例 apache lucene介绍

# re: apache lucene 一个最简单的实例 2010-05-25 09:35 yuanfangzhou

# re: apache lucene 一个最简单的实例 2010-07-15 15:32 solidfish

# re: apache lucene 一个最简单的实例 2010-07-23 17:34 人字拖

# re: apache lucene 一个最简单的实例[未登录] 2011-04-29 21:05 Talin

# re: apache lucene 一个最简单的实例[未登录] 2013-11-14 15:35 椰子

DANCE WITH JAVA

导航

随笔分类(277)

随笔档案(238)

阅读排行榜

常用链接

统计

积分与排名

好友之家

最新评论

apache lucene 一个最简单的实例

评论