﻿<?xml version="1.0" encoding="utf-8" standalone="yes"?><rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:trackback="http://madskills.com/public/xml/rss/module/trackback/" xmlns:wfw="http://wellformedweb.org/CommentAPI/" xmlns:slash="http://purl.org/rss/1.0/modules/slash/"><channel><title>BlogJava-xuyan5971-随笔分类-Lucene</title><link>http://www.blogjava.net/xuyan5971/category/41265.html</link><description /><language>zh-cn</language><lastBuildDate>Wed, 12 Aug 2009 09:52:29 GMT</lastBuildDate><pubDate>Wed, 12 Aug 2009 09:52:29 GMT</pubDate><ttl>60</ttl><item><title>lucene_根据索引搜索文件</title><link>http://www.blogjava.net/xuyan5971/archive/2009/08/12/290862.html</link><dc:creator>R99</dc:creator><author>R99</author><pubDate>Wed, 12 Aug 2009 08:59:00 GMT</pubDate><guid>http://www.blogjava.net/xuyan5971/archive/2009/08/12/290862.html</guid><wfw:comment>http://www.blogjava.net/xuyan5971/comments/290862.html</wfw:comment><comments>http://www.blogjava.net/xuyan5971/archive/2009/08/12/290862.html#Feedback</comments><slash:comments>0</slash:comments><wfw:commentRss>http://www.blogjava.net/xuyan5971/comments/commentRss/290862.html</wfw:commentRss><trackback:ping>http://www.blogjava.net/xuyan5971/services/trackbacks/290862.html</trackback:ping><description><![CDATA[<p><font style="background-color: #cce8cf">package org.apache.lucene.demo;</font></p>
<p><font style="background-color: #cce8cf">/**<br />
&nbsp;* Licensed to the Apache Software Foundation (ASF) under one or more<br />
&nbsp;* contributor license agreements.&nbsp; See the NOTICE file distributed with<br />
&nbsp;* this work for additional information regarding copyright ownership.<br />
&nbsp;* The ASF licenses this file to You under the Apache License, Version 2.0<br />
&nbsp;* (the "License"); you may not use this file except in compliance with<br />
&nbsp;* the License.&nbsp; You may obtain a copy of the License at<br />
&nbsp;*<br />
&nbsp;*&nbsp;&nbsp;&nbsp;&nbsp; http://www.apache.org/licenses/LICENSE-2.0<br />
&nbsp;*<br />
&nbsp;* Unless required by applicable law or agreed to in writing, software<br />
&nbsp;* distributed under the License is distributed on an "AS IS" BASIS,<br />
&nbsp;* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.<br />
&nbsp;* See the License for the specific language governing permissions and<br />
&nbsp;* limitations under the License.<br />
&nbsp;*/</font></p>
<p><font style="background-color: #cce8cf">import java.io.BufferedReader;<br />
import java.io.FileReader;<br />
import java.io.IOException;<br />
import java.io.InputStreamReader;<br />
import java.util.Date;</font></p>
<p><font style="background-color: #cce8cf">import org.apache.lucene.analysis.Analyzer;<br />
import org.apache.lucene.analysis.standard.StandardAnalyzer;<br />
import org.apache.lucene.document.Document;<br />
import org.apache.lucene.index.FilterIndexReader;<br />
import org.apache.lucene.index.IndexReader;<br />
import org.apache.lucene.queryParser.MultiFieldQueryParser;<br />
import org.apache.lucene.queryParser.QueryParser;<br />
import org.apache.lucene.search.BooleanClause;<br />
import org.apache.lucene.search.HitCollector;<br />
import org.apache.lucene.search.IndexSearcher;<br />
import org.apache.lucene.search.Query;<br />
import org.apache.lucene.search.ScoreDoc;<br />
import org.apache.lucene.search.Searcher;<br />
import org.apache.lucene.search.TopDocCollector;</font></p>
<p><font style="background-color: #cce8cf">/** Simple command-line based search demo. */<br />
public class SearchFiles {</font></p>
<p><font style="background-color: #cce8cf">&nbsp; /** Use the norms from one field for all fields.&nbsp; Norms are read into memory,<br />
&nbsp;&nbsp; * using a byte of memory per document per searched field.&nbsp; This can cause<br />
&nbsp;&nbsp; * search of large collections with a large number of fields to run out of<br />
&nbsp;&nbsp; * memory.&nbsp; If all of the fields contain only a single token, then the norms<br />
&nbsp;&nbsp; * are all identical, then single norm vector may be shared. */<br />
&nbsp; private static class OneNormsReader extends FilterIndexReader {<br />
&nbsp;&nbsp;&nbsp; private String field;</font></p>
<p><font style="background-color: #cce8cf">&nbsp;&nbsp;&nbsp; public OneNormsReader(IndexReader in, String field) {<br />
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; super(in);<br />
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; this.field = field;<br />
&nbsp;&nbsp;&nbsp; }</font></p>
<p><font style="background-color: #cce8cf">&nbsp;&nbsp;&nbsp; public byte[] norms(String field) throws IOException {<br />
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; return in.norms(this.field);<br />
&nbsp;&nbsp;&nbsp; }<br />
&nbsp; }</font></p>
<p><font style="background-color: #cce8cf">&nbsp; private SearchFiles() {}</font></p>
<p><font style="background-color: #cce8cf">&nbsp; /** Simple command-line based search demo. */<br />
&nbsp; public static void main(String[] args) throws Exception {<br />
&nbsp;&nbsp;&nbsp; String index = "index";<br />
&nbsp;&nbsp;&nbsp; String field = "content";<br />
&nbsp;&nbsp;&nbsp; boolean multipleFields = true;<br />
&nbsp;&nbsp;&nbsp; IndexReader reader = IndexReader.open(index);//IndexReader 根据 index 指定的路径 访问索引，扫描索引。<br />
&nbsp;&nbsp;&nbsp; Searcher searcher = new IndexSearcher(reader);<br />
&nbsp;&nbsp;&nbsp; Analyzer analyzer = new StandardAnalyzer();<br />
&nbsp;&nbsp;&nbsp; BufferedReader in =new BufferedReader(new InputStreamReader(System.in, "UTF-8"));<br />
&nbsp;&nbsp;&nbsp; while (true) {<br />
&nbsp;&nbsp;&nbsp;System.out.println("Enter query: ");<br />
&nbsp;&nbsp;&nbsp;String line = in.readLine();<br />
&nbsp;&nbsp;&nbsp;if (line == null || line.length() == -1)<br />
&nbsp;&nbsp;&nbsp;&nbsp;break;<br />
&nbsp;&nbsp;&nbsp;line = line.trim();<br />
&nbsp;&nbsp;&nbsp;if (line.length() == 0)<br />
&nbsp;&nbsp;&nbsp;&nbsp;break;<br />
&nbsp;&nbsp;&nbsp;if (!multipleFields) {<br />
&nbsp;&nbsp;&nbsp;&nbsp;QueryParser parser = new QueryParser(field, analyzer);<br />
&nbsp;&nbsp;&nbsp;&nbsp;Query query = parser.parse(field);// 根据指定的单个field查询<br />
&nbsp;&nbsp;&nbsp;&nbsp;parser.setDefaultOperator(parser.OR_OPERATOR.OR);<br />
&nbsp;&nbsp;&nbsp;&nbsp;//多个字符串以空格份格时，OR&nbsp; : a b&nbsp; 含有a或b均可。<br />
&nbsp;&nbsp;&nbsp;&nbsp;//AND&nbsp;&nbsp; a b 必须含有 a和b。<br />
&nbsp;&nbsp;&nbsp;&nbsp;doPagingSearch(searcher, query);<br />
&nbsp;&nbsp;&nbsp;&nbsp;<br />
&nbsp;&nbsp;&nbsp;} else {</font></p>
<p><font style="background-color: #cce8cf">&nbsp;&nbsp;&nbsp;&nbsp;String[] fields = new String[2];<br />
&nbsp;&nbsp;&nbsp;&nbsp;fields[0] = "contents";<br />
&nbsp;&nbsp;&nbsp;&nbsp;fields[1] = "name";<br />
&nbsp;&nbsp;&nbsp;&nbsp;BooleanClause.Occur[] flags = new BooleanClause.Occur[] {<br />
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD };<br />
&nbsp;&nbsp;&nbsp;&nbsp;//根据多个field查询时。should,should查询字段在 name或是contents任何一个中，均做为一条记录返回。<br />
&nbsp;&nbsp;&nbsp;&nbsp;//must,must .必须 即在 name 中，又在contents 中。<br />
&nbsp;&nbsp;&nbsp;&nbsp;Query query = MultiFieldQueryParser.parse(line, fields, flags,<br />
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;analyzer);<br />
&nbsp;&nbsp;&nbsp;&nbsp;doPagingSearch(searcher, query);<br />
&nbsp;&nbsp;&nbsp;}<br />
&nbsp;&nbsp;}<br />
&nbsp;&nbsp;&nbsp; reader.close();<br />
&nbsp; }<br />
&nbsp; <br />
&nbsp; /**<br />
&nbsp;&nbsp; * This method uses a custom HitCollector implementation which simply prints out<br />
&nbsp;&nbsp; * the docId and score of every matching document. <br />
&nbsp;&nbsp; * <br />
&nbsp;&nbsp; *&nbsp; This simulates the streaming search use case, where all hits are supposed to<br />
&nbsp;&nbsp; *&nbsp; be processed, regardless of their relevance.<br />
&nbsp;&nbsp; */</font></p>
<p><font style="background-color: #cce8cf">&nbsp; public static void doPagingSearch( Searcher searcher, Query query) throws IOException {<br />
&nbsp;<br />
&nbsp;&nbsp;&nbsp; // Collect enough docs to show 5 pages<br />
&nbsp;&nbsp;&nbsp; TopDocCollector collector = new TopDocCollector(20);//最多结果集个数。<br />
&nbsp;&nbsp;&nbsp; searcher.search(query, collector);<br />
&nbsp;&nbsp;&nbsp; ScoreDoc[] hits = collector.topDocs().scoreDocs;<br />
&nbsp;&nbsp;&nbsp; int numTotalHits = collector.getTotalHits();//搜索到的符合条件的记录总条数。<br />
&nbsp;&nbsp;&nbsp; System.out.println(numTotalHits + " total matching documents");</font></p>
<p><font style="background-color: #cce8cf">&nbsp;&nbsp;&nbsp; for(int i=0;i&lt;numTotalHits;i++){<br />
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; Document doc = searcher.doc(hits[i].doc);<br />
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; System.out.println("path.."+doc.get("path"));<br />
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; System.out.println("modified.."+doc.get("modified"));<br />
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; System.out.println("name.."+doc.get("name"));<br />
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; System.out.println("parent"+doc.get("parent"));<br />
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; System.out.println("content..."+doc.get("content"));<br />
&nbsp;&nbsp;&nbsp; }<br />
&nbsp; }<br />
}<br />
</font></p>
<img src ="http://www.blogjava.net/xuyan5971/aggbug/290862.html" width = "1" height = "1" /><br><br><div align=right><a style="text-decoration:none;" href="http://www.blogjava.net/xuyan5971/" target="_blank">R99</a> 2009-08-12 16:59 <a href="http://www.blogjava.net/xuyan5971/archive/2009/08/12/290862.html#Feedback" target="_blank" style="text-decoration:none;">发表评论</a></div>]]></description></item><item><title>lucence_对文件建立索引</title><link>http://www.blogjava.net/xuyan5971/archive/2009/08/12/290837.html</link><dc:creator>R99</dc:creator><author>R99</author><pubDate>Wed, 12 Aug 2009 07:38:00 GMT</pubDate><guid>http://www.blogjava.net/xuyan5971/archive/2009/08/12/290837.html</guid><wfw:comment>http://www.blogjava.net/xuyan5971/comments/290837.html</wfw:comment><comments>http://www.blogjava.net/xuyan5971/archive/2009/08/12/290837.html#Feedback</comments><slash:comments>0</slash:comments><wfw:commentRss>http://www.blogjava.net/xuyan5971/comments/commentRss/290837.html</wfw:commentRss><trackback:ping>http://www.blogjava.net/xuyan5971/services/trackbacks/290837.html</trackback:ping><description><![CDATA[<p><font style="background-color: #cce8cf">package org.apache.lucene.demo;</font></p>
<p><font style="background-color: #cce8cf">/**<br />
&nbsp;* Licensed to the Apache Software Foundation (ASF) under one or more<br />
&nbsp;* contributor license agreements.&nbsp; See the NOTICE file distributed with<br />
&nbsp;* this work for additional information regarding copyright ownership.<br />
&nbsp;* The ASF licenses this file to You under the Apache License, Version 2.0<br />
&nbsp;* (the "License"); you may not use this file except in compliance with<br />
&nbsp;* the License.&nbsp; You may obtain a copy of the License at<br />
&nbsp;*<br />
&nbsp;*&nbsp;&nbsp;&nbsp;&nbsp; http://www.apache.org/licenses/LICENSE-2.0<br />
&nbsp;*<br />
&nbsp;* Unless required by applicable law or agreed to in writing, software<br />
&nbsp;* distributed under the License is distributed on an "AS IS" BASIS,<br />
&nbsp;* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.<br />
&nbsp;* See the License for the specific language governing permissions and<br />
&nbsp;* limitations under the License.<br />
&nbsp;*/</font></p>
<p><font style="background-color: #cce8cf">import org.apache.lucene.analysis.standard.StandardAnalyzer;<br />
import org.apache.lucene.index.IndexWriter;</font></p>
<p><font style="background-color: #cce8cf">import java.io.File;<br />
import java.io.FileNotFoundException;<br />
import java.io.IOException;<br />
import java.util.Date;</font></p>
<p><font style="background-color: #cce8cf">/** Index all text files under a directory. */<br />
public class IndexFiles {<br />
&nbsp; <br />
&nbsp; private IndexFiles() {}</font></p>
<p><font style="background-color: #cce8cf">&nbsp; static final File INDEX_DIR = new File("index");//索引止录。建在当前目录的/index下<br />
&nbsp; <br />
&nbsp; /** Index all text files under a directory. */<br />
&nbsp; public static void main(String[] args) {//args[0] 文件路径.&nbsp; main 方法：对args[0]指定的文件路径下的所有文件建立索引。<br />
&nbsp;&nbsp;final File docDir = new File(args[0]);<br />
&nbsp;&nbsp;if (!docDir.exists() || !docDir.canRead()) {<br />
&nbsp;&nbsp;&nbsp;System.out .println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path");<br />
&nbsp;&nbsp;&nbsp;System.exit(1);<br />
&nbsp;&nbsp;}<br />
&nbsp;&nbsp;&nbsp; <br />
&nbsp;&nbsp;&nbsp; Date start = new Date();<br />
&nbsp;&nbsp;&nbsp; try {<br />
&nbsp;&nbsp;&nbsp;IndexWriter writer = new IndexWriter(INDEX_DIR, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);<br />
&nbsp;&nbsp;&nbsp;//IndexWriter负责创建和维护索引<br />
&nbsp;&nbsp;&nbsp;//IndexWriter(String path, Analyzer a, boolean create, IndexWriter.MaxFieldLength mfl)<br />
&nbsp;&nbsp;&nbsp;//path:路径 Analyzer:文本分析器&nbsp; create:是否创建新索引&nbsp; mfl 最大field数量<br />
&nbsp;&nbsp;&nbsp;System.out.println("Indexing to directory '" + INDEX_DIR + "'...");<br />
&nbsp;&nbsp;&nbsp;indexDocs(writer, docDir);<br />
&nbsp;&nbsp;&nbsp;System.out.println("Optimizing...");<br />
&nbsp;&nbsp;&nbsp;writer.optimize();//优化索引<br />
&nbsp;&nbsp;&nbsp;writer.close();//关闭<br />
&nbsp;&nbsp;&nbsp;Date end = new Date();<br />
&nbsp;&nbsp;&nbsp;System.out.println(end.getTime() - start.getTime() + " total milliseconds");<br />
&nbsp;&nbsp;} catch (IOException e) {<br />
&nbsp;&nbsp;&nbsp;System.out.println(" caught a " + e.getClass()<br />
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;+ "\n with message: " + e.getMessage());<br />
&nbsp;&nbsp;}<br />
&nbsp; }</font></p>
<p><font style="background-color: #cce8cf">&nbsp; static void indexDocs(IndexWriter writer, File file) throws IOException {<br />
&nbsp;&nbsp;// do not try to index files that cannot be read<br />
&nbsp;&nbsp;if (file.canRead()) {<br />
&nbsp;&nbsp;&nbsp;if (file.isDirectory()) {<br />
&nbsp;&nbsp;&nbsp;&nbsp;String[] files = file.list();<br />
&nbsp;&nbsp;&nbsp;&nbsp;// an IO error could occur<br />
&nbsp;&nbsp;&nbsp;&nbsp;if (files != null) {<br />
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;for (int i = 0; i &lt; files.length; i++) {<br />
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;indexDocs(writer, new File(file, files[i]));<br />
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;}<br />
&nbsp;&nbsp;&nbsp;&nbsp;}<br />
&nbsp;&nbsp;&nbsp;} else {<br />
&nbsp;&nbsp;&nbsp;&nbsp;System.out.println("adding " + file);<br />
&nbsp;&nbsp;&nbsp;&nbsp;try {<br />
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;writer.addDocument(FileDocument.Document(file));<br />
&nbsp;&nbsp;&nbsp;&nbsp;}<br />
&nbsp;&nbsp;&nbsp;&nbsp;// at least on windows, some temporary files raise this<br />
&nbsp;&nbsp;&nbsp;&nbsp;// exception with an "access denied" message<br />
&nbsp;&nbsp;&nbsp;&nbsp;// checking if the file can be read doesn't help<br />
&nbsp;&nbsp;&nbsp;&nbsp;catch (FileNotFoundException fnfe) {<br />
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;;<br />
&nbsp;&nbsp;&nbsp;&nbsp;}<br />
&nbsp;&nbsp;&nbsp;}<br />
&nbsp;&nbsp;}<br />
&nbsp;}<br />
}<br />
<br />
<br />
<br />
</p>
<p><font style="background-color: #cce8cf">package org.apache.lucene.demo;</font></p>
<p><font style="background-color: #cce8cf">/**<br />
&nbsp;* Licensed to the Apache Software Foundation (ASF) under one or more<br />
&nbsp;* contributor license agreements.&nbsp; See the NOTICE file distributed with<br />
&nbsp;* this work for additional information regarding copyright ownership.<br />
&nbsp;* The ASF licenses this file to You under the Apache License, Version 2.0<br />
&nbsp;* (the "License"); you may not use this file except in compliance with<br />
&nbsp;* the License.&nbsp; You may obtain a copy of the License at<br />
&nbsp;*<br />
&nbsp;*&nbsp;&nbsp;&nbsp;&nbsp; http://www.apache.org/licenses/LICENSE-2.0<br />
&nbsp;*<br />
&nbsp;* Unless required by applicable law or agreed to in writing, software<br />
&nbsp;* distributed under the License is distributed on an "AS IS" BASIS,<br />
&nbsp;* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.<br />
&nbsp;* See the License for the specific language governing permissions and<br />
&nbsp;* limitations under the License.<br />
&nbsp;*/</font></p>
<p><font style="background-color: #cce8cf">import java.io.File;<br />
import java.io.FileReader;</font></p>
<p><font style="background-color: #cce8cf">import org.apache.lucene.document.DateTools;<br />
import org.apache.lucene.document.Document;<br />
import org.apache.lucene.document.Field;</font></p>
<p><font style="background-color: #cce8cf">/** A utility for making Lucene Documents from a File. */</font></p>
<p><font style="background-color: #cce8cf">public class FileDocument {</font></p>
<p><font style="background-color: #cce8cf">&nbsp; public static Document Document(File f)<br />
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; throws java.io.FileNotFoundException {<br />
&nbsp; <br />
&nbsp;&nbsp;&nbsp; // make a new, empty document<br />
&nbsp;&nbsp;&nbsp; Document doc = new Document();<br />
&nbsp;&nbsp;&nbsp; <br />
&nbsp;&nbsp;&nbsp; doc.add(new Field("contents", new FileReader(f)));<br />
&nbsp;&nbsp;&nbsp; doc.add(new Field("path", f.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED));<br />
&nbsp;&nbsp;&nbsp; doc.add(new Field("modified", DateTools.timeToString(f.lastModified(), DateTools.Resolution.MINUTE),Field.Store.YES, Field.Index.ANALYZED));<br />
&nbsp;&nbsp;&nbsp; doc.add(new Field("name",f.getName(),Field.Store.YES,Field.Index.NOT_ANALYZED));<br />
&nbsp;&nbsp;&nbsp; //Field </font></p>
<p><font style="background-color: #cce8cf">&nbsp;&nbsp;&nbsp; // return the document<br />
&nbsp;&nbsp;&nbsp; return doc;<br />
&nbsp; }</font></p>
<p><font style="background-color: #cce8cf">&nbsp; private FileDocument() {}<br />
}<br />
&nbsp;&nbsp;&nbsp; <br />
</font></p>
<p><br />
</font></p>
<img src ="http://www.blogjava.net/xuyan5971/aggbug/290837.html" width = "1" height = "1" /><br><br><div align=right><a style="text-decoration:none;" href="http://www.blogjava.net/xuyan5971/" target="_blank">R99</a> 2009-08-12 15:38 <a href="http://www.blogjava.net/xuyan5971/archive/2009/08/12/290837.html#Feedback" target="_blank" style="text-decoration:none;">发表评论</a></div>]]></description></item></channel></rss>