sudachi/plugin/oov/simple_oov/
mod.rs

1/*
2 * Copyright (c) 2021 Works Applications Co., Ltd.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17use crate::analysis::created::CreatedWords;
18use serde::Deserialize;
19use serde_json::Value;
20
21use crate::analysis::Node;
22use crate::config::Config;
23use crate::dic::grammar::Grammar;
24use crate::dic::word_id::WordId;
25use crate::input_text::InputBuffer;
26use crate::plugin::oov::OovProviderPlugin;
27use crate::prelude::*;
28use crate::util::check_params::CheckParams;
29use crate::util::user_pos::{UserPosMode, UserPosSupport};
30
31/// Provides a OOV node with single character if no words found in the dictionary
32#[derive(Default)]
33pub struct SimpleOovPlugin {
34    left_id: u16,
35    right_id: u16,
36    cost: i16,
37    oov_pos_id: u16,
38}
39
40/// Struct corresponds with raw config json file.
41#[allow(non_snake_case)]
42#[derive(Deserialize)]
43struct PluginSettings {
44    oovPOS: Vec<String>,
45    leftId: i64,
46    rightId: i64,
47    cost: i64,
48    #[serde(default)]
49    userPOS: UserPosMode,
50}
51
52impl OovProviderPlugin for SimpleOovPlugin {
53    fn set_up(
54        &mut self,
55        settings: &Value,
56        _config: &Config,
57        mut grammar: &mut Grammar,
58    ) -> SudachiResult<()> {
59        let settings: PluginSettings = serde_json::from_value(settings.clone())?;
60
61        self.oov_pos_id = grammar.handle_user_pos(&settings.oovPOS, settings.userPOS)?;
62        self.left_id = grammar.check_left_id(settings.leftId)?;
63        self.right_id = grammar.check_right_id(settings.rightId)?;
64        self.cost = grammar.check_cost(settings.cost)?;
65        Ok(())
66    }
67
68    fn provide_oov(
69        &self,
70        input_text: &InputBuffer,
71        offset: usize,
72        other_words: CreatedWords,
73        result: &mut Vec<Node>,
74    ) -> SudachiResult<usize> {
75        if other_words.not_empty() {
76            return Ok(0);
77        }
78
79        let length = input_text.get_word_candidate_length(offset);
80
81        result.push(Node::new(
82            offset as u16,
83            (offset + length) as u16,
84            self.left_id,
85            self.right_id,
86            self.cost,
87            WordId::oov(self.oov_pos_id as u32),
88        ));
89        Ok(1)
90    }
91}