jcseg
/
jcseg.properties

# jcseg properties file.
# bug report chenxin <chenxin619315@gmail.com>

# Jcseg function
#maximum match length. (5-7)
jcseg.maxlen = 7

#recognized the chinese name.(1 to open and 0 to close it)
jcseg.icnname = 1

#maximum length for pair punctuation text.
jcseg.pptmaxlen = 7

#maximum length for chinese last name andron.
jcseg.cnmaxlnadron = 1

#Wether to clear the stopwords.(set 1 to clear stopwords and 0 to close it)
jcseg.clearstopword = 0

#Wether to convert the chinese numeric to arabic number. (set to 1 open it and 0 to close it)
# like '\u4E09\u4E07' to 30000.
jcseg.cnnumtoarabic = 1

#Wether to convert the chinese fraction to arabic fraction.
#@Note: for lucene,solr,elasticsearch eg.. close it.
jcseg.cnfratoarabic = 0

#Wether to keep the unrecognized word. (set 1 to keep unrecognized word and 0 to clear it)
jcseg.keepunregword = 1

#Wether to start the secondary segmentation for the complex english words.
jcseg.ensencondseg = 1

#min length of the secondary simple token. (better larger than 1)
jcseg.stokenminlen = 2

#thrshold for chinese name recognize.
# better not change it before you know what you are doing.
jcseg.nsthreshold = 1000000

#The punctuations that will be keep in an token.(Not the end of the token).
jcseg.keeppunctuations = @#%.&+


####about the lexicon
#abusolte path of the lexicon file.
#Multiple path support from jcseg 1.9.2, use ';' to split different path.
#example: lexicon.path = /home/chenxin/lex1;/home/chenxin/lex2 (Linux)
#        : lexicon.path = D:/jcseg/lexicon/1;D:/jcseg/lexicon/2 (WinNT)
#lexicon.path=/Code/java/JavaSE/jcseg/lexicon
#lexicon.path = {jar.dir}/lexicon ({jar.dir} means the base directory of jcseg-core-{version}.jar)
#@since 1.9.9 Jcseg default to load the lexicons in the classpath
lexicon.path = null

#Wether to load the modified lexicon file auto.
lexicon.autoload = 0

#Poll time for auto load. (seconds)
lexicon.polltime = 300


####lexicon load
#Wether to load the part of speech of the entry.
jcseg.loadpos = 1

#Wether to load the pinyin of the entry.
jcseg.loadpinyin = 0

#Wether to load the synoyms words of the entry.
jcseg.loadsyn = 1

#wether to load the entity of the entry
jcseg.loadentity = 1