sudachi/dic/
dictionary.rs1use std::fs::File;
18use std::path::Path;
19
20use memmap2::Mmap;
21
22use crate::analysis::stateless_tokenizer::DictionaryAccess;
23use crate::config::Config;
24use crate::dic::grammar::Grammar;
25use crate::dic::lexicon_set::LexiconSet;
26use crate::dic::storage::{Storage, SudachiDicData};
27use crate::dic::{DictionaryLoader, LoadedDictionary};
28use crate::error::{SudachiError, SudachiResult};
29use crate::plugin::input_text::InputTextPlugin;
30use crate::plugin::oov::OovProviderPlugin;
31use crate::plugin::path_rewrite::PathRewritePlugin;
32use crate::plugin::Plugins;
33
34pub struct JapaneseDictionary {
44 storage: SudachiDicData,
45 plugins: Plugins,
46 _grammar: Grammar<'static>,
48 _lexicon: LexiconSet<'static>,
50}
51
52fn map_file(path: &Path) -> SudachiResult<Storage> {
53 let file = File::open(path)?;
54 let mapping = unsafe { Mmap::map(&file) }?;
55 Ok(Storage::File(mapping))
56}
57
58fn load_system_dic(cfg: &Config) -> SudachiResult<Storage> {
59 let p = cfg.resolved_system_dict()?;
60 map_file(&p).map_err(|e| e.with_context(p.as_os_str().to_string_lossy()))
61}
62
63impl JapaneseDictionary {
64 pub fn from_cfg(cfg: &Config) -> SudachiResult<JapaneseDictionary> {
67 let mut sb = SudachiDicData::new(load_system_dic(cfg)?);
68
69 for udic in cfg.resolved_user_dicts()? {
70 sb.add_user(
71 map_file(&udic).map_err(|e| e.with_context(udic.as_os_str().to_string_lossy()))?,
72 )
73 }
74
75 Self::from_cfg_storage(cfg, sb)
76 }
77
78 pub fn from_cfg_storage(
80 cfg: &Config,
81 storage: SudachiDicData,
82 ) -> SudachiResult<JapaneseDictionary> {
83 let mut basic_dict = LoadedDictionary::from_system_dictionary(
84 unsafe { storage.system_static_slice() },
85 cfg.complete_path(&cfg.character_definition_file)?.as_path(),
86 )?;
87
88 let plugins = {
89 let grammar = &mut basic_dict.grammar;
90 Plugins::load(cfg, grammar)?
91 };
92
93 if plugins.oov.is_empty() {
94 return Err(SudachiError::NoOOVPluginProvided);
95 }
96
97 for p in plugins.connect_cost.plugins() {
98 p.edit(&mut basic_dict.grammar);
99 }
100
101 let mut dic = JapaneseDictionary {
102 storage,
103 plugins,
104 _grammar: basic_dict.grammar,
105 _lexicon: basic_dict.lexicon_set,
106 };
107
108 let user_dicts: Vec<_> = dic.storage.user_static_slice();
110 for udic in user_dicts {
111 dic = dic.merge_user_dictionary(udic)?;
112 }
113
114 Ok(dic)
115 }
116
117 pub fn from_cfg_storage_with_embedded_chardef(
119 cfg: &Config,
120 storage: SudachiDicData,
121 ) -> SudachiResult<JapaneseDictionary> {
122 let mut basic_dict = LoadedDictionary::from_system_dictionary_embedded(unsafe {
123 storage.system_static_slice()
124 })?;
125
126 let plugins = {
127 let grammar = &mut basic_dict.grammar;
128 Plugins::load(cfg, grammar)?
129 };
130
131 if plugins.oov.is_empty() {
132 return Err(SudachiError::NoOOVPluginProvided);
133 }
134
135 for p in plugins.connect_cost.plugins() {
136 p.edit(&mut basic_dict.grammar);
137 }
138
139 let mut dic = JapaneseDictionary {
140 storage,
141 plugins,
142 _grammar: basic_dict.grammar,
143 _lexicon: basic_dict.lexicon_set,
144 };
145
146 let user_dicts: Vec<_> = dic.storage.user_static_slice();
148 for udic in user_dicts {
149 dic = dic.merge_user_dictionary(udic)?;
150 }
151
152 Ok(dic)
153 }
154
155 pub fn grammar(&self) -> &Grammar<'_> {
157 &self._grammar
158 }
159
160 pub fn lexicon(&self) -> &LexiconSet<'_> {
162 &self._lexicon
163 }
164
165 fn merge_user_dictionary(mut self, dictionary_bytes: &'static [u8]) -> SudachiResult<Self> {
166 let user_dict = DictionaryLoader::read_user_dictionary(dictionary_bytes)?;
167
168 let mut user_lexicon = user_dict.lexicon;
170 user_lexicon.update_cost(&self)?;
171
172 self._lexicon
173 .append(user_lexicon, self._grammar.pos_list.len())?;
174
175 if let Some(g) = user_dict.grammar {
176 self._grammar.merge(g);
177 }
178
179 Ok(self)
180 }
181}
182
183impl DictionaryAccess for JapaneseDictionary {
184 fn grammar(&self) -> &Grammar<'_> {
185 self.grammar()
186 }
187
188 fn lexicon(&self) -> &LexiconSet<'_> {
189 self.lexicon()
190 }
191
192 fn input_text_plugins(&self) -> &[Box<dyn InputTextPlugin + Sync + Send>] {
193 self.plugins.input_text.plugins()
194 }
195
196 fn oov_provider_plugins(&self) -> &[Box<dyn OovProviderPlugin + Sync + Send>] {
197 self.plugins.oov.plugins()
198 }
199
200 fn path_rewrite_plugins(&self) -> &[Box<dyn PathRewritePlugin + Sync + Send>] {
201 self.plugins.path_rewrite.plugins()
202 }
203}