不可一日无烟卷

cixiren 发表于 2017-2-28 08:22:57

　　# 集群名
　　cluster.name: "cn-out-of-box"
　　# 节点名
　　node.name: "node1"
　　# 是否有资格成为主节点
　　node.master: true
　　# 是否存储索引数据
　　node.data: true
　　# 默认索引分片数
　　index.number_of_shards: 3
　　# 默认索引副本数
　　index.number_of_replicas: 1
　　# 临时文件存储路路径
　　#path.work: "/tmp/elasticsearch"
　　# 日志文件存储路路径
　　#path.logs:"/var/log/elasticsearch/logs"
　　# tcp传输端口
　　transport.tcp.port: 9300
　　# 是否压缩tcp传输数据
　　transport.tcp.compress: true
　　# http端口
　　http.port: 9200
　　# 是否开启http服务
　　#http.enabled: true
　　# 是否打开多播发现节点
　　discovery.zen.ping.multicast.enabled: true
　　# 慢查询日志参数
　　#index.search.slowlog.threshold.query.warn: 10s
　　#index.search.slowlog.threshold.query.info: 5s
　　#index.search.slowlog.threshold.query.debug: 2s
　　#index.search.slowlog.threshold.query.trace: 500ms
　　#index.search.slowlog.threshold.fetch.warn: 1s
　　#index.search.slowlog.threshold.fetch.info: 800ms
　　#index.search.slowlog.threshold.fetch.debug: 500ms
　　#index.search.slowlog.threshold.fetch.trace: 200ms
　　# 启用jetty插件提供http服务
　　http.type: com.sonian.elasticsearch.http.jetty.JettyHttpServerTransport
　　# sonian.elasticsearch.http.jetty:
　　# ==== 开启 https
　　#ssl_port: 9443
　　#config: jetty.xml,jetty-ssl.xml, jetty-gzip.xml
　　#keystore_password: "OBF:1nc01vuz1w8f1w1c1rbu1rac1w261w9b1vub1ndq"
　　# ==== 开启用户认证
　　# config: jetty.xml,jetty-hash-auth.xml,jetty-restrict-all.xml
　　# 索引配置
　　index:
　　# 分析配置
　　analysis:
　　# 分词器配置
　　tokenizer:
　　index_ansj_token:
　　type: ansj_index_token
　　is_name: false
　　is_num: false
　　is_quantifier: false
　　query_ansj_token:
　　type: ansj_query_token
　　is_name: false
　　is_num: false
　　is_quantifier: false
　　# ======== analysis-pinyin ========
　　# 完整拼音
　　my_pinyin:
　　type: pinyin
　　first_letter: prefix
　　padding_char: ' '
　　# 拼音首字母
　　pinyin_first_letter:
　　type: pinyin
　　first_letter: only
　　# ======== analysis-mmseg ========
　　# 简单正向匹配
　　#    example: 一个劲儿的说话
　　#    一个
　　#    一个劲
　　#    一个劲儿
　　#    一个劲儿的
　　mmseg_simple:
　　type: mmseg
　　seg_type: simple
　　# 匹配出所有的“三个词的词组”
　　# 并使用四种规则消歧(最大匹配、最大平均词语长度、词语长度的最小变化率、所有单字词词频的自然对数之和)
　　#    example: 研究生命起源
　　#    研_究_生
　　#    研_究_生命
　　#    研究生_命_起源
　　#    研究_生命_起源
　　mmseg_complex:
　　type: mmseg
　　seg_type: complex
　　# 基于complex的最多分词
　　#    example: 中国人民银行
　　#    中国|人民|银行
　　mmseg_maxword:
　　type: mmseg
　　seg_type: max_word
　　# ======== analysis-stconvert ========
　　# 简繁转换，只输出繁体
　　s2t_convert:
　　type: stconvert
　　delimiter: ","
　　convert_type: s2t
　　# 繁简转换，只输出简体
　　t2s_convert:
　　type: stconvert
　　delimiter: ","
　　convert_type: t2s
　　# 简繁转换，同时输出繁体简体
　　s2t_keep_both_convert:
　　type: stconvert
　　delimiter: ","
　　keep_both: 'true'
　　convert_type: s2t
　　# 繁简转换，同时输出简体繁体
　　t2s_keep_both_convert:
　　type: stconvert
　　delimiter: ","
　　keep_both: 'true'
　　convert_type: t2s
　　# ======== analysis-pattern ========
　　# 正则，分号分词
　　semicolon_spliter:
　　type: pattern
　　pattern: ";"
　　# 正则，%分词
　　pct_spliter:
　　type: pattern
　　pattern: "[%]+"
　　# ======== analysis-nGram ========
　　# 1~2字为一词
　　ngram_1_to_2:
　　type: nGram
　　min_gram: 1
　　max_gram: 2
　　# 1~3字为一词
　　ngram_1_to_3:
　　type: nGram
　　min_gram: 1
　　max_gram: 3
　　# 过滤器配置
　　filter:
　　# ======== ngram filter ========
　　ngram_min_3:
　　max_gram: 10
　　min_gram: 3
　　type: nGram
　　ngram_min_2:
　　max_gram: 10
　　min_gram: 2
　　type: nGram
　　ngram_min_1:
　　max_gram: 10
　　min_gram: 1
　　type: nGram
　　# ======== length filter ========
　　min2_length:
　　min: 2
　　max: 4
　　type: length
　　min3_length:
　　min: 3
　　max: 4
　　type: length
　　# ======== string2int filter ========
　　#    my_string2int:
　　#    type: string2int
　　#    redis_server: 127.0.0.1
　　#    redis_port: 6379
　　#    redis_key: index1_type2_name2
　　# ======== pinyin filter ========
　　pinyin_first_letter:
　　type: pinyin
　　first_letter: only
　　# 分析器配置
　　analyzer:
　　lowercase_keyword:
　　type: custom
　　filter:
　　- lowercase
　　tokenizer: standard
　　lowercase_keyword_ngram_min_size1:
　　type: custom
　　filter:
　　- lowercase
　　- stop
　　- trim
　　- unique
　　tokenizer: nGram
　　lowercase_keyword_ngram_min_size2:
　　type: custom
　　filter:
　　- lowercase
　　- min2_length
　　- stop
　　- trim
　　- unique
　　tokenizer: nGram
　　lowercase_keyword_ngram_min_size3:
　　type: custom
　　filter:
　　- lowercase
　　- min3_length
　　- stop
　　- trim
　　- unique
　　tokenizer: ngram_1_to_3
　　lowercase_keyword_ngram:
　　type: custom
　　filter:
　　- lowercase
　　- stop
　　- trim
　　- unique
　　tokenizer: ngram_1_to_3
　　lowercase_keyword_without_standard:
　　type: custom
　　filter:
　　- lowercase
　　tokenizer: keyword
　　lowercase_whitespace:
　　type: custom
　　filter:
　　- lowercase
　　tokenizer: whitespace
　　# ======== ik========
　　# ik分词器
　　ik:
　　alias:
　　- ik_analyzer
　　type: org.elasticsearch.index.analysis.IkAnalyzerProvider
　　# ik智能切分
　　ik_max_word:
　　type: ik
　　use_smart: false
　　# ik最细粒度切分
　　ik_smart:
　　type: ik
　　use_smart: true
　　# ======== mmseg========
　　# mmseg分词器
　　mmseg:
　　alias:
　　- mmseg_analyzer
　　type: org.elasticsearch.index.analysis.MMsegAnalyzerProvider
　　mmseg_maxword:
　　type: custom
　　filter:
　　- lowercase
　　tokenizer: mmseg_maxword
　　mmseg_complex:
　　type: custom
　　filter:
　　- lowercase
　　tokenizer: mmseg_complex
　　mmseg_simple:
　　type: custom
　　filter:
　　- lowercase
　　tokenizer: mmseg_simple
　　# ======== 正则 ========
　　comma_spliter:
　　type: pattern
　　pattern: "[,|\\s]+"
　　pct_spliter:
　　type: pattern
　　pattern: "[%]+"
　　custom_snowball_analyzer:
　　type: snowball
　　language: English
　　simple_english_analyzer:
　　type: custome
　　tokenizer: whitespace
　　filter:
　　- standard
　　- lowercase
　　- snowball
　　edge_ngram:
　　type: custom
　　tokenizer: edgeNGram
　　filter:
　　- lowercase
　　# ======== 拼音分析 ========
　　pinyin_ngram_analyzer:
　　type: custom
　　tokenizer: my_pinyin
　　filter:
　　- lowercase
　　- nGram
　　- trim
　　- unique
　　# ======== 拼音首字母分词 ========
　　pinyin_first_letter_analyzer:
　　type: custom
　　tokenizer: pinyin_first_letter
　　filter:
　　- standard
　　- lowercase
　　# ======== 拼音首字母分词并过滤 ========
　　pinyin_first_letter_keyword_analyzer:
　　alias:
　　- pinyin_first_letter_analyzer_keyword
　　type: custom
　　tokenizer: keyword
　　filter:
　　- pinyin_first_letter
　　- lowercase
　　# ======== 简繁体 ========
　　stconvert:
　　alias:
　　- st_analyzer
　　type: org.elasticsearch.index.analysis.STConvertAnalyzerProvider
　　s2t_convert:
　　type: stconvert
　　delimiter: ","
　　convert_type: s2t
　　t2s_convert:
　　type: stconvert
　　delimiter: ","
　　convert_type: t2s
　　s2t_keep_both_convert:
　　type: stconvert
　　delimiter: ","
　　keep_both: 'true'
　　convert_type: s2t
　　t2s_keep_both_convert:
　　type: stconvert
　　delimiter: ","
　　keep_both: 'true'
　　convert_type: t2s
　　#string2int:
　　#type: org.elasticsearch.index.analysis.String2IntAnalyzerProvider
　　# redis_server: 127.0.0.1
　　# redis_port: 6379
　　# redis_key: index1_type1_name1
　　#custom_string2int:
　　#type: custom
　　#tokenizer: whitespace
　　#filter:
　　#- string2int
　　#- lowercase
　　# 路径分析
　　path_analyzer:
　　type: custom
　　tokenizer: path_hierarchy
　　# ======== ansj ========
　　index_ansj:
　　alias:
　　- ansj_index_analyzer
　　type: ansj_index
　　user_path: ansj/user
　　ambiguity: ansj/ambiguity.dic
　　stop_path: ansj/stopLibrary.dic
　　#is_name: false
　　# s_num: true
　　#is_quantifier: true
　　redis: false
　　#pool:
　　#maxactive: 20
　　# maxidle: 10
　　#maxwait: 100
　　#testonborrow: true
　　#ip: 127.0.0.1:6379
　　#channel: ansj_term
　　query_ansj:
　　alias:
　　- ansj_query_analyzer
　　type: ansj_query
　　user_path: ansj/user
　　ambiguity: ansj/ambiguity.dic
　　stop_path: ansj/stopLibrary.dic
　　#is_name: false
　　# is_num: true
　　# is_quantifier: true
　　redis: false
　　#pool:
　　#maxactive: 20
　　# maxidle: 10
　　#maxwait: 100
　　#testonborrow: true
　　#ip: 127.0.0.1:6379
　　#channel: ansj_term
　　uax_url_email:
　　tokenizer: uax_url_email
　　filter:
　　# ======== combo ========
　　combo:
　　type: combo
　　sub_analyzers:
　　- ansj_index
　　- ik_smart
　　- mmseg_complex
　　- uax_url_email
　　- s2t_convert
　　- t2s_convert
　　- smartcn
　　- simple_english_analyzer
　　# 默认分析器
　　index.analysis.analyzer.default.type: combo
　　# 线程池设置
　　threadpool:
　　index:
　　type: fixed
　　size: 30
　　queue: -1
　　reject_policy: caller

页: [1]

运维网's Archiver

不可一日无烟卷