基于Solr 4.0正式版的IKAnalyzer分词库修改

Solr 4.0发布了,不过最新的IK分词还不支持,急着用,只能是自己修改了。相关阅读：Solr 4.0已正式发布了 http://www.linuxidc.com/Linux/2012-10/72372.htmIKAnalyzer.java/** * IK 中文分词版本 5.0 * IK Analyzer release 5.0 ** Licensed to the Apache Software Foundation （ASF） under one or more * contributor license agreements.See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * （the "License"）; you may not use this file except in compliance with * the License.You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * 源代码由林良益（linliangyi2005@gmail.com）提供 * 版权声明 2012，乌龙茶工作室 * provided by Linliangyi and copyright 2012 by Oolong studio **/package org.wltea.analyzer.lucene;import java.io.IOException;import java.io.Reader;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.TokenStream;import org.apache.lucene.analysis.Tokenizer;/** * IK分词器，Lucene Analyzer接口实现兼容Lucene 3.1以上版本 */public final class IKAnalyzer extends Analyzer {private boolean isMaxWordLength = false;/** * IK分词器Lucene Analyzer接口实现类默认最细粒度切分算法 */public IKAnalyzer（） {this（false）;}/** * IK分词器Lucene Analyzer接口实现类 ** @param isMaxWordLength *当为true时，分词器进行最大词长切分 */public IKAnalyzer（boolean isMaxWordLength） {super（）;this.setMaxWordLength（isMaxWordLength）;}@Overridepublic TokenStreamComponents createComponents（String fieldName,Reader reader） {Tokenizer tokenizer = new IKTokenizer（reader, isMaxWordLength（））;return new TokenStreamComponents（tokenizer, null）;}public void setMaxWordLength（boolean isMaxWordLength） {this.isMaxWordLength = isMaxWordLength;}public boolean isMaxWordLength（） {return isMaxWordLength;}}IKTokenizer.java/**
* IK 中文分词版本 5.0
* IK Analyzer release 5.0
*
* Licensed to the Apache Software Foundation （ASF） under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* （the "License"）; you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* 源代码由林良益（linliangyi2005@gmail.com）提供
* 版权声明 2012，乌龙茶工作室
* provided by Linliangyi and copyright 2012 by Oolong studio
* *
*/
package org.wltea.analyzer.lucene;import java.io.IOException;
import java.io.Reader;import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;import org.wltea.analyzer.core.IKSegmenter;
import org.wltea.analyzer.core.Lexeme;/**
* IK分词器 Lucene Tokenizer适配器类兼容Lucene 3.1以上版本
*/
public final class IKTokenizer extends Tokenizer { // IK分词器实现
private IKSegmenter _IKImplement;
// 词元文本属性
private CharTermAttribute termAtt;
// 词元位移属性
private OffsetAttribute offsetAtt;
// 记录最后一个词元的结束位置
private int finalOffset; /**
* Lucene 3.5 Tokenizer适配器类构造函数
*
* @param in
* @param useSmart
*/
public IKTokenizer（Reader in, boolean useSmart） {
super（in）;
offsetAtt = addAttribute（OffsetAttribute.class）;
termAtt = addAttribute（CharTermAttribute.class）;
_IKImplement = new IKSegmenter（in, useSmart）;
} /*
* （non-Javadoc）
*
* <A class=referer href="http://www.linuxidc.com" target=_blank>@see</A> org.apache.lucene.analysis.TokenStream#incrementToken（）
*/
@Override
public boolean incrementToken（） throws IOException {
// 清除所有的词元属性
clearAttributes（）;
Lexeme nextLexeme = _IKImplement.next（）;
if （nextLexeme ！= null） {
// 将Lexeme转成Attributes
// 设置词元文本
termAtt.append（nextLexeme.getLexemeText（））;
// 设置词元长度
termAtt.setLength（nextLexeme.getLength（））;
// 设置词元位移
offsetAtt.setOffset（nextLexeme.getBeginPosition（）,
nextLexeme.getEndPosition（））;
// 记录分词的最后位置
finalOffset = nextLexeme.getEndPosition（）;
// 返会true告知还有下个词元
return true;
}
// 返会false告知词元输出完毕
return false;
} /*
* （non-Javadoc）
*
* <A class=referer href="http://www.linuxidc.com" target=_blank>@see</A> org.apache.lucene.analysis.Tokenizer#reset（java.io.Reader）
*/
public void reset（Reader input） throws IOException {
super.reset（）;
_IKImplement.reset（input）;
} @Override
public final void end（） {
// set final offset
offsetAtt.setOffset（finalOffset, finalOffset）;
}
}IKTokenizerFactory.java/**
* IK 中文分词版本 5.0
* IK Analyzer release 5.0
*
* Licensed to the Apache Software Foundation （ASF） under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* （the "License"）; you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* 源代码由林良益（linliangyi2005@gmail.com）提供
* 版权声明 2012，乌龙茶工作室
* provided by Linliangyi and copyright 2012 by Oolong studio
*
*
*/
package org.wltea.analyzer.solr;import java.io.Reader;
import java.util.Map;import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.wltea.analyzer.lucene.IKTokenizer;/**
* IK中文分词
* Solr分词器工厂实现
*
* 2012-3-6
*
*/
public class IKTokenizerFactory extends TokenizerFactory {

private boolean useSmart = false;

@Override
public void init（Map<String, String> params） {
super.init（params）;
String useSmartParam = params.get（"useSmart"）;
this.useSmart = （useSmartParam ！= null ？ Boolean.parseBoolean（useSmartParam） : false）;
} /* （non-Javadoc）
* <A class=referer href="http://www.linuxidc.com" target=_blank>@see</A> org.apache.solr.analysis.TokenizerFactory#create（java.io.Reader）
*/
public Tokenizer create（Reader in） {
return new IKTokenizer（in , this.useSmart）;
}}Ubuntu 12.10 常用软件自动安装脚本2013腾讯校园招聘软件研发笔试题相关资讯 Solr Solr 4.0 ikanalyzer

全文搜索Apache Solr v6.0.1发布下（05月31日）
Solr5.3.0导入 Oracle 数据（05月22日）
CentOS 6.6 使用Tomcat6部署Solr5. （02月25日）

Solr6.0.0 + Tomcat8 配置问题（05月28日）
在Tomcat上面安装Solr4.8.0全过程（03月04日）
Apache Solr 5.5.0 发布下载，全文（02月24日）

本文评论查看全部评论（0）

表情：姓名：匿名字数

<div style="text-align: center;padding:20px;">
            <button class="layui-btn layui-bg-purple" id="dofav">收藏该网址</button>
        </div>
    </div>
</div>
<div class="copyright">
    <div id="footbar">
        版权所有©石家庄振强科技有限公司2024 <a href="https://beian.miit.gov.cn" target="_blank">冀ICP备08103738号-5</a> <a href="/storage/sitemap.xml">网站地图</a>
    </div>
</div>
<script> var _mtj = _mtj || [];
(function () {
    var mtj = document.createElement("script");
    mtj.src = "https://node12.aizhantj.com:21233/tjjs/?k=p2tceukth5c";
    var s = document.getElementsByTagName("script")[0];
    s.parentNode.insertBefore(mtj, s);
})(); </script>
<script src="/static/lib/layui/layui.js"></script>
<script src="/static/lib/jquery/jquery.js"></script>
<script src="/static/lib/ajax.js"></script>
<script>
    layui.use(function () {
        var layer = layui.layer;
        var $ = layui.jquery;
        $("#dofav").click(function () {
            var artid = $(".article").attr("artid");
            var params={
                artid:artid,
                addtype:'escdns_article',
                t:Math.random()
            };
            if(artid>0){
                ajax.request({
                    method: "/index/article/addfavorite", type: "post", callback: function (res) {
                        layer.msg(res.msg);
                    }
                },params);
            }
        })
    })
</script>
</body>
</html>