﻿<?xml version="1.0" encoding="utf-8" standalone="yes"?><rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:trackback="http://madskills.com/public/xml/rss/module/trackback/" xmlns:wfw="http://wellformedweb.org/CommentAPI/" xmlns:slash="http://purl.org/rss/1.0/modules/slash/"><channel><title>BlogJava-桢知卓见-文章分类-lucene</title><link>http://www.blogjava.net/jalion/category/23708.html</link><description /><language>zh-cn</language><lastBuildDate>Mon, 02 Jul 2007 00:36:37 GMT</lastBuildDate><pubDate>Mon, 02 Jul 2007 00:36:37 GMT</pubDate><ttl>60</ttl><item><title>lucene应用第一篇——入门应用</title><link>http://www.blogjava.net/jalion/articles/127320.html</link><dc:creator>李桢</dc:creator><author>李桢</author><pubDate>Sun, 01 Jul 2007 02:09:00 GMT</pubDate><guid>http://www.blogjava.net/jalion/articles/127320.html</guid><wfw:comment>http://www.blogjava.net/jalion/comments/127320.html</wfw:comment><comments>http://www.blogjava.net/jalion/articles/127320.html#Feedback</comments><slash:comments>0</slash:comments><wfw:commentRss>http://www.blogjava.net/jalion/comments/commentRss/127320.html</wfw:commentRss><trackback:ping>http://www.blogjava.net/jalion/services/trackbacks/127320.html</trackback:ping><description><![CDATA[<p><strong><font color=#0000ff size=4>jar包:lucene-core-2.0.0.jar 可以到官方网站下载</font></strong></p>
<p><strong><font color=#0000ff size=4>今天先记下简单的lucene索引创建 搜索、删除和恢复</font></strong></p>
<p><font face=宋体 color=#000000>import java.io.IOException;</font></p>
<p><font face=宋体 color=#000000>import org.apache.lucene.analysis.standard.StandardAnalyzer;<br>import org.apache.lucene.document.Document;<br>import org.apache.lucene.document.Field;<br>import org.apache.lucene.index.IndexReader;<br>import org.apache.lucene.index.IndexWriter;<br>import org.apache.lucene.index.Term;<br>import org.apache.lucene.queryParser.QueryParser;<br>import org.apache.lucene.search.Hits;<br>import org.apache.lucene.search.IndexSearcher;<br>import org.apache.lucene.search.Query;<br>import org.apache.lucene.store.Directory;<br>import org.apache.lucene.store.RAMDirectory;</font></p>
<p><font face=宋体 color=#000000>public class SimpleLucene {<br>&nbsp;private void myCreateIndex(){<br>&nbsp;&nbsp;try {<br><font style="BACKGROUND-COLOR: #ffffff">&nbsp;&nbsp;<font color=#ff0000>&nbsp;/*<br>&nbsp;&nbsp;&nbsp; * 在硬盘指定目录创建索引<br>&nbsp;&nbsp;&nbsp; */</font></font><font style="BACKGROUND-COLOR: #ff0000"><br></font>&nbsp;&nbsp;&nbsp;IndexWriter writerdata=new IndexWriter("c:/index",new StandardAnalyzer(),true);&nbsp;<font color=#ff0000>//第３个参数为true的话会先清空目录，再创建,否则在原基础上增加<br></font>&nbsp;&nbsp;&nbsp;Document doc1=new Document();&nbsp; <font color=#ff0000>//可以把document 看成是数据中的一行记录<br></font>&nbsp;&nbsp;&nbsp;Field field1=new Field("data","中国人民解放军",Field.Store.YES,Field.Index.TOKENIZED);<font color=#ff0000>//参数说明见附图</font><br>&nbsp;&nbsp;&nbsp;doc1.add(field1);<br>&nbsp;&nbsp;&nbsp;writerdata.addDocument(doc1);<br>&nbsp;&nbsp;&nbsp;writerdata.optimize();<br>&nbsp;&nbsp;&nbsp;writerdata.close(); <font color=#ff0000>// 写完后要记得关闭<br>&nbsp;&nbsp;&nbsp;/*<br>&nbsp;&nbsp;&nbsp; * 在内存中创建索引,速度肯定比在硬盘中快啦，但计算机关闭后，索引将消失．<br>&nbsp;&nbsp;&nbsp; */&nbsp;<br></font>&nbsp;&nbsp;&nbsp;RAMDirectory ramdir=new RAMDirectory();<br>&nbsp;&nbsp;&nbsp;IndexWriter writerram=new IndexWriter(ramdir,new StandardAnalyzer(),true);<br>&nbsp;&nbsp;&nbsp;Document doc2=new Document();<br>&nbsp;&nbsp;&nbsp;Field field2=new Field("ram","什么时间能买起房子",Field.Store.YES,Field.Index.TOKENIZED);<br>&nbsp;&nbsp;&nbsp;doc2.add(field2);<br>&nbsp;&nbsp;&nbsp;writerram.addDocument(doc2);<br>&nbsp;&nbsp;&nbsp;writerram.optimize();<br>&nbsp;&nbsp;&nbsp;writerram.close();<br>&nbsp;&nbsp;&nbsp;System.out.print("创建成功");<br>&nbsp;&nbsp;} catch (Exception e) {<br>&nbsp;&nbsp;&nbsp;e.printStackTrace();<br>&nbsp;&nbsp;}<br>&nbsp;}<br>&nbsp;private void mySearch()<br>&nbsp;{<br>&nbsp;&nbsp;try {<br>&nbsp;&nbsp;&nbsp;IndexSearcher searcher=new IndexSearcher("c:/index");&nbsp;<font color=#000000> //读取索引目录<br></font>&nbsp;&nbsp;&nbsp;QueryParser parser=new QueryParser("data",new StandardAnalyzer()); <font color=#ff0000>//在哪个字段查询<br></font>&nbsp;&nbsp;&nbsp;Query&nbsp; query=parser.parse("中国"); <font color=#ff0000>//要查询的关键字</font><br>&nbsp;&nbsp;&nbsp;Hits hits=searcher.search(query); <font color=#ff0000>//返回查询结果<br></font>&nbsp;&nbsp;&nbsp;for (int i = 0; i &lt;hits.length(); i++) {<br>&nbsp;&nbsp;&nbsp;&nbsp;Document doc=new Document();<br>&nbsp;&nbsp;&nbsp;&nbsp;System.out.println(hits.doc(i).get("data")); <font color=#ff0000>//得到document 对象，根据字段返回数据<br></font>&nbsp;&nbsp;&nbsp;&nbsp;<br>&nbsp;&nbsp;&nbsp;}<br>&nbsp;&nbsp;&nbsp;if(hits.length()==0)<br>&nbsp;&nbsp;&nbsp;&nbsp;System.out.println("没有找到结果");<br>&nbsp;&nbsp;&nbsp;searcher.close();&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <font color=#ff0000>//记得关闭<br></font>&nbsp;&nbsp;} catch (Exception e) {<br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;e.printStackTrace();<br>&nbsp;&nbsp;}<br>&nbsp;<font color=#ff0000>&nbsp;/*<br>&nbsp;&nbsp; * 内存中查询道理是一样的，把索引地址和查询字段改变一下就ＯＫ<br>&nbsp;&nbsp; */<br></font>&nbsp;}<br>&nbsp;<font color=#ff0000>/*<br>&nbsp; * 删除索引<br>&nbsp; */</font><br>&nbsp;private void mydelete()<br>&nbsp;{<br>try {<br>&nbsp;&nbsp;&nbsp;<br>&nbsp;&nbsp;&nbsp;IndexReader reader=IndexReader.open("c:/index");<br>&nbsp;&nbsp;&nbsp;int num=reader.numDocs(); <font color=#ff0000>//得到索引的中doc的数量，相当于数据库中有几行记录<br></font>&nbsp;&nbsp;&nbsp;<br>&nbsp;&nbsp;&nbsp;reader.deleteDocuments(new Term("test1","中国")); <font color=#ff0000>//删除中包含中国的索引<br></font>&nbsp;&nbsp;&nbsp;<br>&nbsp;&nbsp;&nbsp;for (int i = 0; i &lt;num; i++) { <font color=#ff0000>//通过循环删除所有索引<br></font>&nbsp;&nbsp;<br>&nbsp;&nbsp;&nbsp;reader.deleteDocument(i); <font color=#ff0000>//这里的删除只是给文档做一个删除标记，看到执行的deleteDocument后，产生的Del文件用来记录这些标记过的文件</font><br>&nbsp;&nbsp;&nbsp;<br>&nbsp;&nbsp;&nbsp;}<br>&nbsp;&nbsp;&nbsp;reader.close(); <font color=#ff0000>//同样要关闭<br></font>&nbsp;&nbsp;} catch (Exception e) {<br>&nbsp;&nbsp;&nbsp;e.printStackTrace();</font></p>
<p><font face=宋体 color=#000000>&nbsp;&nbsp;}<br>&nbsp;}<br><font color=#ff0000>&nbsp;/*<br>&nbsp; * 恢复删除的索引<br>&nbsp; */</font><br>&nbsp;public void reDeleteIndex()<br>&nbsp;{<br>&nbsp;&nbsp;try {<br>&nbsp;&nbsp;&nbsp;IndexReader reader=IndexReader.open("c:/index");<br>&nbsp;&nbsp;&nbsp;reader.undeleteAll();<br>&nbsp;&nbsp;&nbsp;<br>&nbsp;&nbsp;&nbsp;reader.close();<br>&nbsp;&nbsp;} catch (Exception e) {<br>&nbsp;&nbsp;&nbsp;e.printStackTrace();<br>&nbsp;&nbsp;}<br>&nbsp;&nbsp;<br>&nbsp;}<br>&nbsp;public static void main(String[] args) {<br>&nbsp;&nbsp;<br>&nbsp;&nbsp;SimpleLucene rr=new SimpleLucene();<br>&nbsp;&nbsp;rr.myCreateIndex(); <font color=#ff0000>//创建索引<br></font>&nbsp;&nbsp;rr.mySearch();<font color=#ff0000> //执行搜索<br></font>&nbsp;&nbsp;rr.mydelete(); <font color=#ff0000>//执行删除索引<br></font>&nbsp;&nbsp;rr.reDeleteIndex();<font color=#ff0000> //执行恢复删除的索引<br></font>&nbsp;}</font></p>
<p><font face=宋体 color=#000000>}<br></font></p>
<p>
<table cellSpacing=0 cellPadding=0 border=1>
    <tbody>
        <tr>
            <td vAlign=top>
            <p align=left>Field.Index <br></p>
            <p>&#160;</p>
            <p>&#160;</p>
            </td>
            <td vAlign=top>
            <p align=left>Field.Store <br></p>
            <p>&#160;</p>
            <p>&#160;</p>
            </td>
            <td vAlign=top>
            <p align=left>说明 <br></p>
            <p>&#160;</p>
            <p>&#160;</p>
            </td>
        </tr>
        <tr>
            <td vAlign=top>
            <p align=left><code><strong>TOKENIZED(</strong></code><code><strong>分词)</strong></code> <br></p>
            <p>&#160;</p>
            <p>&#160;</p>
            </td>
            <td vAlign=top>
            <p align=left><code><strong>YES</strong></code> <br></p>
            <p>&#160;</p>
            <p>&#160;</p>
            </td>
            <td vAlign=top>
            <p align=left>文章的标题或内容(如果是内容的话不能太长)是可以被搜索的 <br></p>
            <p>&#160;</p>
            <p>&#160;</p>
            </td>
        </tr>
        <tr>
            <td vAlign=top>
            <p align=left><code><strong>TOKENIZED <br></strong></code></p>
            <p><code><strong></strong></code></p>
            <p>&#160;</p>
            </td>
            <td vAlign=top>
            <p align=left><code><strong>NO<br></strong></code></p>
            <p><code><strong></strong></code></p>
            <p>&#160;</p>
            </td>
            <td vAlign=top>
            <p align=left>文章的标题或内容(内容可以很长)也是可以被看过的 <br></p>
            <p>&#160;</p>
            <p>&#160;</p>
            </td>
        </tr>
        <tr>
            <td vAlign=top>
            <p align=left><code><strong>NO <br></strong></code></p>
            <p><code><strong></strong></code></p>
            <p>&#160;</p>
            </td>
            <td vAlign=top>
            <p align=left><code><strong>YES<br></strong></code></p>
            <p><code><strong></strong></code></p>
            <p>&#160;</p>
            </td>
            <td vAlign=top>
            <p align=left>这是不能被搜索的，它只是被搜索内容的附属物。如URL等 <br></p>
            <p>&#160;</p>
            <p>&#160;</p>
            </td>
        </tr>
        <tr>
            <td vAlign=top>
            <p align=left><code><strong>UN_TOKENIZED<br></strong></code></p>
            <p><code><strong></strong></code></p>
            <p>&#160;</p>
            </td>
            <td vAlign=top>
            <p align=left><code><strong>YES/NO <br></strong></code></p>
            <p><code><strong></strong></code></p>
            <p>&#160;</p>
            </td>
            <td vAlign=top>
            <p align=left>不被分词，它作为一个整体被搜索,搜一部分是搜不出来的 <br></p>
            <p>&#160;</p>
            <p>&#160;</p>
            </td>
        </tr>
        <tr>
            <td vAlign=top>
            <p align=left><code><strong>NO <br></strong></code></p>
            <p><code><strong></strong></code></p>
            <p>&#160;</p>
            </td>
            <td vAlign=top>
            <p align=left><code><strong>NO <br></strong></code></p>
            <p><code><strong></strong></code></p>
            <p>&#160;</p>
            </td>
            <td vAlign=top>
            <p align=left>没有这种用法 <br></p>
            </td>
        </tr>
    </tbody>
</table>
</p>
<img src ="http://www.blogjava.net/jalion/aggbug/127320.html" width = "1" height = "1" /><br><br><div align=right><a style="text-decoration:none;" href="http://www.blogjava.net/jalion/" target="_blank">李桢</a> 2007-07-01 10:09 <a href="http://www.blogjava.net/jalion/articles/127320.html#Feedback" target="_blank" style="text-decoration:none;">发表评论</a></div>]]></description></item></channel></rss>