sudachi/plugin/oov/
mod.rs

1/*
2 * Copyright (c) 2021-2024 Works Applications Co., Ltd.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17use crate::analysis::created::CreatedWords;
18use crate::analysis::Node;
19use serde_json::Value;
20
21use crate::config::Config;
22use crate::dic::grammar::Grammar;
23use crate::input_text::InputBuffer;
24use crate::plugin::oov::mecab_oov::MeCabOovPlugin;
25use crate::plugin::oov::regex_oov::RegexOovProvider;
26use crate::plugin::oov::simple_oov::SimpleOovPlugin;
27use crate::plugin::PluginCategory;
28use crate::prelude::*;
29
30pub mod mecab_oov;
31pub mod regex_oov;
32pub mod simple_oov;
33
34/// Trait of plugin to provide oov node during tokenization
35pub trait OovProviderPlugin: Sync + Send {
36    /// Loads necessary information for the plugin
37    fn set_up(
38        &mut self,
39        settings: &Value,
40        config: &Config,
41        grammar: &mut Grammar,
42    ) -> SudachiResult<()>;
43
44    /// Generate a list of oov nodes
45    /// offset - char idx
46    fn provide_oov(
47        &self,
48        input_text: &InputBuffer,
49        offset: usize,
50        other_words: CreatedWords,
51        result: &mut Vec<Node>,
52    ) -> SudachiResult<usize>;
53}
54
55impl PluginCategory for dyn OovProviderPlugin {
56    type BoxType = Box<dyn OovProviderPlugin + Sync + Send>;
57    type InitFnType = unsafe fn() -> SudachiResult<Self::BoxType>;
58    fn configurations(cfg: &Config) -> &[Value] {
59        &cfg.oov_provider_plugins
60    }
61
62    fn bundled_impl(name: &str) -> Option<Self::BoxType> {
63        match name {
64            "SimpleOovPlugin" => Some(Box::<SimpleOovPlugin>::default()),
65            "MeCabOovPlugin" => Some(Box::<MeCabOovPlugin>::default()),
66            "RegexOovProvider" => Some(Box::<RegexOovProvider>::default()),
67            _ => None,
68        }
69    }
70
71    fn do_setup(
72        ptr: &mut Self::BoxType,
73        settings: &Value,
74        config: &Config,
75        grammar: &mut Grammar,
76    ) -> SudachiResult<()> {
77        ptr.set_up(settings, config, grammar)
78    }
79}