SOLR安装与配置
SOLR安装与配置
我用的是 Solr3.3
安装前准备
Java 1.5 或更高版本
servlet 容器,如 Tomcat 5.5。
SOLR安装
将SOLR发行包解压,复制dist/apache-solr-3.3.0.war到指定目录,例如c:/solr
TOMCAT配置
server.xml
在server.xml主要设置侦听端口。一般来说,SOLR常用端口为8983。你也可以使用自己喜欢的端口,例如80或8080。 同时,注意将URIEncoding设置为UTF-8。例如AJP等更多Native优化,参考TOMCAT优化方案。
<Connector port="8983" maxHttpHeaderSize="8192" maxThreads="150" minSpareThreads="25" maxSpareThreads="75" enableLookups="false" redirectPort="8443" acceptCount="100" connectionTimeout="20000" disableUploadTimeout="true" URIEncoding="UTF-8" />
<Context docBase="c:/solr/apache-solr-3.3.0.war" debug="0" crossContext="true" > <Environment name="solr/home" type="java.lang.String" value="/web/solr" override="true" /> </Context>
package net.paoding.analysis.analyzer.solr;import java.io.Reader;import java.util.Map;import net.paoding.analysis.analyzer.PaodingTokenizer;import net.paoding.analysis.analyzer.TokenCollector;import net.paoding.analysis.analyzer.impl.MaxWordLengthTokenCollector;import net.paoding.analysis.analyzer.impl.MostWordsTokenCollector;import net.paoding.analysis.knife.PaodingMaker;import org.apache.lucene.analysis.Tokenizer;import org.apache.solr.analysis.BaseTokenizerFactory;/** * 实现Solr3.2分词器接口 * 基于PaodingTokenizer的实现 * * @author sunlightcs * * http://hi.juziku.com/sunlightcs/ * */public class ChineseTokenizerFactory extends BaseTokenizerFactory{/** * 最多切分 */ public static final String MOST_WORDS_MODE = "most-words"; /** * 按最大切分 */ public static final String MAX_WORD_LENGTH_MODE = "max-word-length"; private String mode = null; private TokenCollector tokenCollector = null; public void init(Map<String,String> args) { setMode(args.get("mode")); } public Tokenizer create(Reader input) { return new PaodingTokenizer(input, PaodingMaker.make(), tokenCollector); } /** * 默认按最多切分 */ public void setMode(String mode) { if (mode == null || "default".equalsIgnoreCase(mode) || MOST_WORDS_MODE.equalsIgnoreCase(mode)) { tokenCollector = new MostWordsTokenCollector(); } else { tokenCollector = new MaxWordLengthTokenCollector(); } }}<fieldType name="text" positionIncrementGap="100"> <analyzer type="index"> <!-- 使用paoding分词器,按最多切分 mode="most-words"这个是在上面哪个类里定义好的。--> <tokenizer mode="most-words"/> ··· ··· </analyzer> <analyzer type="query"> <tokenizer mode="most-words"/> ··· ··· </analyzer> </fieldType>