利用lucene给网站、系统增加搜索功能

Posted on 2010-10-07 15:53 penngo 阅读(3351) 评论(4) 编辑收藏所属分类: Java

有些网站的搜索功能都是直接使用like %关键词%方式对数据库进行关键词查找，不过这种方式速度比较慢，而且影响数据库服务器性能。
其实我们可以先把数据从数据库查询出来，利用lucene建立索引。以后每次查找都从索引中查找，可以提高查询速度和减轻服务器负担。
本篇用到的技术：lucene3.0.2,IKAnalyzer3.2.5
search.properties主要是配置搜索的信息，内容：

sql=select iId,title,content,credate from archeive //指定查找sql，需要建立索引的数据

update.field=iId

update.value=

search.condition=title,content //搜索时的查找字段

index.path=D:/project/Java/lucene/WebContent/WEB-INF/classes/Index //索引的保存地址

period=10000 //更新索引的时间间隔

com.search.util.SearchConfig主要是读取search.properties的信息。

public class SearchConfig {

private Properties searchPro;

private String searchFile = "search.properties";

private String SQL = "sql";

private String CONDITION = "search.condition";

private String INDEX = "index.path";

public SearchConfig(){

initSearch();

}

public void initSearch(){

searchPro = PropertiesUtil.getProperties(searchFile);

}

public String getSql(){

return searchPro.getProperty(SQL, "");

}

public String getCondition(){

return searchPro.getProperty(CONDITION, "");

}

public File getIndexPath(){

String path = searchPro.getProperty(INDEX, "");

File file = new File(path);

if (!file.exists()) {

file.mkdir();

}

return file;

}

public long getPeriod(){

String period = searchPro.getProperty("period", "0");

return Integer.valueOf(period);

}

public String getUpdateField(){

return searchPro.getProperty("update.field", "");

}

public String getUpdateValue(){

return searchPro.getProperty("update.value", "");

}

public void save(){

PropertiesUtil.saveProperties(searchPro, searchFile);

}

com.search.util.LuceneUtil代码介绍，主要是生成索引和搜索。

public class LuceneUtil {

private File indexpath = null;

private String sql = null;

private String condition = null;

private String updateField = null;

private String updateValue = null;

private SearchConfig sc = null;

public LuceneUtil() {

sc = new SearchConfig();

indexpath = sc.getIndexPath();

sql = sc.getSql();

condition = sc.getCondition();

updateField = sc.getUpdateField();

updateValue = sc.getUpdateValue();

if(!updateValue.equals("")){

sql = sql + " where " + updateField + " > " + updateValue;

}

public void createIndex() {

System.out.println("==========正在生成数据库索引。");

//把数据库中的数据查询出来，

ResultSet rs = SQLHelper.getResultSet(sql);

try {

//打开索引文件

FSDirectory directory = FSDirectory.open(indexpath);

Analyzer analyzer = new IKAnalyzer();

IndexWriter writer = new IndexWriter(FSDirectory.open(indexpath),

analyzer, true, IndexWriter.MaxFieldLength.LIMITED);

while (rs.next()) {

Document doc = new Document();

doc.add(new Field("id", String.valueOf(rs.getInt(1)),

Field.Store.YES, Field.Index.ANALYZED));

doc.add(new Field("title", rs.getString(2), Field.Store.YES,

Field.Index.ANALYZED));

doc.add(new Field("content", rs.getString(3), Field.Store.YES,

Field.Index.ANALYZED));

writer.addDocument(doc);

}

writer.close();

directory.close();

} catch (Exception e) {

e.printStackTrace();

}

public List<Document> search(String keyword) {

List<Document> list = new ArrayList<Document>();

try {

FSDirectory directory = FSDirectory.open(indexpath);

IndexReader reader = IndexReader.open(directory, true);

IndexSearcher isearcher = new IndexSearcher(reader);

isearcher.setSimilarity(new IKSimilarity());

if(keyword == null || keyword.equals("")){

return list;

}

Query query = IKQueryParser.parseMultiField(condition.split(","), keyword);

// 搜索相似度最高的10条记录

TopDocs topDocs = isearcher.search(query, 10);

// 输出结果

ScoreDoc[] scoreDocs = topDocs.scoreDocs;

for (int i = 0; i < topDocs.totalHits; i++) {

Document targetDoc = isearcher.doc(scoreDocs[i].doc);

list.add(targetDoc);

}

isearcher.close();

directory.close();

} catch (Exception e) {

e.printStackTrace();

}

return list;

}

com.search.listener.Indexlistener启动索引更新程序

public class Indexlistener implements ServletContextListener {

public void contextInitialized(javax.servlet.ServletContextEvent arg0) {

new IndexTask();

}

public void contextDestroyed(javax.servlet.ServletContextEvent arg0) {

}

com.search.listener.IndexTask定时更新索引

public class IndexTask {

public IndexTask(){

Timer timer = new Timer();

SearchConfig sc = new SearchConfig();

timer.schedule(new Task(), new Date(), sc.getPeriod());

}

static class Task extends TimerTask{

public void run(){

LuceneUtil lu = new LuceneUtil();

lu.createIndex();

}

com.search.util.RedHighlighter关键词高亮显示

public class RedHighlighter {

public static String getBestFragment(String keyword, String field, String word){

SearchConfig sc = new SearchConfig();

String condition = sc.getCondition();

try{

Query query = IKQueryParser.parseMultiField(condition.split(","), keyword);

SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter(

"<font color='red'>", "</font>");

Highlighter highlighter = new Highlighter(simpleHTMLFormatter,

new QueryScorer(query));

highlighter.setTextFragmenter(new SimpleFragmenter(100));

String c = highlighter.getBestFragment(new IKAnalyzer(),

field, word);

return c;

}

catch(Exception e){

e.printStackTrace();

}

return "";

}

index.jsp搜索页面

<%@ page language="java" contentType="text/html; charset=GBK"

pageEncoding="GBK"%>

<%@page import="com.search.util.LuceneUtil" %>

<%@page import="java.util.*" %>

<%@page import="org.apache.lucene.document.Document" %>

<%@page import="com.search.util.RedHighlighter" %>

<%@page import="java.net.URLEncoder"%><html>

<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"

"http://www.w3.org/TR/html4/loose.dtd">

<head>

</head>

//request.setCharacterEncoding("GBK");

String w = request.getParameter("w");

int size = 0;

long time = 0;

List<Document> list = null;

if(w != null && !w.equals("")){

w = new String(w.getBytes("ISO8859-1"), "GBK");

}

else{

w = "";

}

LuceneUtil lu = new LuceneUtil();

Date start = new Date();

list = lu.search(w);

Date end = new Date();

size = list.size();

time = end.getTime() - start.getTime();

function submit(){

}

</script>

<body>

<input type="text" class="txtSeach" id="w" name="w" value="<%=w %>"

><input type="submit"

class="btnSearch" onclick="submit" value="找一下">    <br>

</form>

</div>

<div id="searchInfo"><span style="float: left; margin-left: 15px;"></span>找到相关内容<%=size%>篇，

用时<%=time%>毫秒

</div>

if(list != null && list.size() > 0){

for(Document doc:list){

String title = RedHighlighter.getBestFragment(w, "title", doc.get("title"));

String content = RedHighlighter.getBestFragment(w, "content", doc.get

("content"));

<%=content %>

</div>

}

</div>

</body>

</html>

运行效果:

附件:完整代码

# re: 利用lucene给网站、系统增加搜索功能[未登录] 回复 更多评论

2010-10-07 20:40 by semovy

如何做得更强大,更专业呀

# re: 利用lucene给网站、系统增加搜索功能回复 更多评论

2010-10-07 21:09 by os

不错啊~ 不过要想搜索更准确点,就不能这么简单了貌似.

# re: 利用lucene给网站、系统增加搜索功能回复 更多评论

2010-10-07 22:08 by pengo

@os
搜索不准的话，可以研究下分词，我用的中文分词是IKAnalyzer。

# re: 利用lucene给网站、系统增加搜索功能 回复 更多评论

2010-10-12 15:58 by xpf7622

没有数据库脚本。

新用户注册刷新评论列表


只有注册用户登录后才能发表评论。




网站导航: 博客园博客园最新博文博问管理
相关文章: java获取剪贴板中的链接 java摄像头截图 swing程序在任务栏闪动效果实现数据库反向生成实体类 apache与tomcat负载集群的3种方法页面缓存的小测试浏览器客户端js中调用java代码客户端调用服务器端方法的简单例子 Mouse Hook java实现文件监控

penngo