79 lines
2.2 KiB
INI
79 lines
2.2 KiB
INI
# Configuration file for Japanese Desktop recognizer
|
|
|
|
# Configuration
|
|
Locale=JPN
|
|
Config=dev_rare
|
|
|
|
# The list of catagories to select from the .loc file for this configuration.
|
|
CatagoryList=basic,ExtPunc,ExtMath,JIS208-1,JIS208-2,IBMCorp,JIS212
|
|
|
|
# The file containing the list of FFF files to train from.
|
|
TrainSets="\
|
|
//tpg/reco/TextInk/jpn/jpn-boxed-20020131/J208--list.txt \
|
|
//tpg/reco/TextInk/jpn/jpn-boxed-20020131/J212--list.txt \
|
|
//tpg/reco/TextInk/jpn/jpn-boxed-20020131/address--list.txt \
|
|
//tpg/reco/TextInk/jpn/jpn-boxed-20020131/encaj--list.txt \
|
|
//tpg/reco/TextInk/jpn/jpn-boxed-20020131/ibm--list.txt \
|
|
//tpg/reco/TextInk/jpn/jpn-boxed-20020131/natural--list.txt \
|
|
//tpg/reco/TextInk/jpn/jpn-boxed-20020131/old--list.txt \
|
|
//tpg/reco/TextInk/jpn/jpn-boxed-20020131/symbols--list.txt \
|
|
//tpg/reco/TextInk/jpn/jpn-boxed-20020208/addpanel--list.txt \
|
|
"
|
|
# Old training set
|
|
# TrainSets="//tpg/reco/TextInk/jpn/OldData/jpnShipDesktop004-list.txt"
|
|
|
|
# Free input training set
|
|
FreeTrainSet="//tpg/reco/train/jpn/jpn-free-trn-01.tfl"
|
|
|
|
# Tuning sets
|
|
NatTuneSet="//tpg/reco/test/jpn/tune-natural000.tfl"
|
|
UniTuneSet="//tpg/reco/test/jpn/tune-uniform000.tfl"
|
|
FreeTuneSet="//tpg/reco/test/jpn/tune-free-natural000.tfl"
|
|
|
|
# Unicode text file to generate unigrams and bigrams from.
|
|
TextFile="//tpg/reco/corpora/jpn/all2.utf"
|
|
|
|
# List of tests to run on recognizer.
|
|
TestList="natural address JIS208 JIS212 free"
|
|
|
|
# Target size to shrink zilla to.
|
|
ZillaSize=7000000
|
|
|
|
# Select the recognizer for 1 and 2 stroke characters
|
|
UseFugu=1
|
|
UseOtter=1
|
|
UseSole=1
|
|
UseJaws=1
|
|
|
|
# Select the postprocessor
|
|
UseCrane=1
|
|
UseHawk=0
|
|
|
|
# Shrink parameter for otter.
|
|
OtterSize=1000000
|
|
OtterStrongCluster=70
|
|
OtterWeakCluster=25
|
|
|
|
# Otter limit on number of samples per (folded) code point
|
|
OtterSampleLimit=25000
|
|
|
|
# Number of classes to use for Centipede
|
|
CentipedeClasses=32
|
|
|
|
# Weight of natural frequency vs. uniform frequency in training set.
|
|
NatuFreqWeight=1.0
|
|
|
|
# Flag for WinCE configuration.
|
|
# Set to 1 if true, 0 if false.
|
|
IsWinCE=0
|
|
|
|
# Character detector settings
|
|
UseCharDet=1
|
|
|
|
# Break net and segmentation net settings
|
|
UseBrkNet=1
|
|
UseSegNet=1
|
|
|
|
# Whether to do IFELang3 tuning and testing
|
|
UseIFELang3=1
|