Lucene2.4全文搜索代码

package com.laozizhu.article.util;

import java.io.IOException;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import javax.sql.DataSource;
import net.paoding.analysis.analyzer.PaodingAnalyzer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocCollector;

/**
* 基于庖丁解牛的Lucene 2.4的全文搜索代码。
*
* @author 老紫竹研究室(laozizhu.com)
*/
public class LucenePaoDing {
private static final String indexPath = "d:/indexpaoding/www.laozizhu.com";

/**
   * @param args
   * @throws Exception
   */
public static void main(String[] args) throws Exception {
    rebuildAll();
    String keyword = "Spring.jar";
    LucenePaoDing l = new LucenePaoDing();
    System.out.println("索引搜索\n------------------------------");
    System.out.println(l.seacherIndex(keyword));
}

public static void rebuildAll() {
    synchronized (indexPath) {
      LucenePaoDing l = new LucenePaoDing();
      DataSource ds = (DataSource) Factory.getBean("dataSource");
      Connection con = null;
      Statement stat = null;
      ResultSet rs = null;
      try {
        con = ds.getConnection();
        stat = con.createStatement();
        rs = stat.executeQuery("select id,subject,content from t_article");
        if (rs != null) {
          l.Index(rs);
        }
      } catch (Exception ex) {
        ex.printStackTrace();
      } finally {
        if (rs != null) {
          try {
            rs.close();
          } catch (Exception ex) {}
        }
        if (stat != null) {
          try {
            stat.close();
          } catch (Exception ex) {}
        }
        if (con != null) {
          try {
            con.close();
          } catch (Exception ex) {}
        }
      }
    }
}

public synchronized Analyzer getAnalyzer() {
return new PaodingAnalyzer();
}

private synchronized void Index(ResultSet rs) {// 通过结果集就可以获得数据源了
    try {
      IndexWriter writer = new IndexWriter(indexPath, getAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
      writer.setMaxFieldLength(10000000);
      Date start = new Date();
      int index = 1;
      while (rs.next()) {
        Document doc = new Document();// 一个文档相当与表的一条记录
        doc.add(new Field("id", rs.getString("id"), Field.Store.YES, Field.Index.NOT_ANALYZED));// 字段id放的是数据库表中的id，lucene的一条记录的一个字段下的数据可以放多个值，这点与数据库表不同
        doc.add(new Field("subject", rs.getString("subject"), Field.Store.YES, Field.Index.ANALYZED));
        doc.add(new Field("content", rs.getString("content"), Field.Store.YES, Field.Index.ANALYZED));
        writer.addDocument(doc);
        if (index++ == 1000) {
          writer.commit();
          index = 0;
        }
      }
      writer.commit();
      writer.optimize();// 优化
      writer.close();// 一定要关闭，否则不能把内存中的数据写到文件
      Date end = new Date();
      System.out.println("重建索引成功！！！！" + "用时" + (end.getTime() - start.getTime()) + "毫秒");
    } catch (IOException e) {
      System.out.println(e);
    } catch (SQLException e) {
      System.out.println(e);
    }
}

public void IndexSingle(long id, String subject, String content) {// 通过结果集就可以获得数据源了
    synchronized (indexPath) {
      try {
        IndexWriter writer = new IndexWriter(indexPath, getAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED);
        writer.setMaxFieldLength(10000000);
        Date start = new Date();
        Document doc = new Document();// 一个文档相当与表的一条记录
        doc.add(new Field("id", Long.toString(id), Field.Store.YES, Field.Index.NOT_ANALYZED));// 字段id放的是数据库表中的id，lucene的一条记录的一个字段下的数据可以放多个值，这点与数据库表不同
        doc.add(new Field("subject", subject, Field.Store.YES, Field.Index.ANALYZED));
        doc.add(new Field("content", content, Field.Store.YES, Field.Index.ANALYZED));
        writer.addDocument(doc);
        // writer.optimize();// 优化
        writer.close();// 一定要关闭，否则不能把内存中的数据写到文件
        Date end = new Date();
        System.out.println("索引建立成功！！！！" + "用时" + (end.getTime() - start.getTime()) + "毫秒");
      } catch (IOException e) {
        System.out.println(e);
      }
    }
}

/**
   * 最主要的搜索方法。
   *
   * @param queryString
   * @return
   */
public List<Long> seacherIndex(String queryString) {// 根据关键字搜索
    try {
      IndexSearcher isearcher = new IndexSearcher(indexPath);
      /* 下面这个表示要同时搜索这两个域，而且只要一个域里面有满足我们搜索的内容就行 */
      BooleanClause.Occur[] clauses = { BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD };
      TopDocCollector collector = new TopDocCollector(10);
      Query query = MultiFieldQueryParser.parse(queryString, new String[] { "subject", "content" }, clauses, getAnalyzer());
      isearcher.search(query, collector);
      ScoreDoc[] hits = collector.topDocs().scoreDocs;
      List<Long> rtn = new ArrayList<Long>();
      Long id;
      int docId;
      for (int i = 0; i < hits.length; i++) {
        docId = hits[i].doc;
        Document doc = isearcher.doc(docId);
        id = Long.parseLong(doc.get("id").trim());
        if (!rtn.contains(id)) {
          rtn.add(id);
        }
      }
      isearcher.close();
      return rtn;
    } catch (Exception e) {
      e.printStackTrace();
      return null;
    }
}
}

posted on 2009-03-09 17:24 蓝山阅读(471) 评论(0) 编辑收藏

常用链接

留言簿

随笔档案(20)

文章分类(8)

文章档案(9)

搜索

积分与排名

最新评论

阅读排行榜

评论排行榜


只有注册用户登录后才能发表评论。




网站导航: 博客园 IT新闻知识库 C++博客博问管理