sudachi/plugin/input_text/prolonged_sound_mark/
mod.rs

1/*
2 * Copyright (c) 2021 Works Applications Co., Ltd.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17use regex::Regex;
18use serde::Deserialize;
19use serde_json::Value;
20use std::collections::HashSet;
21use std::fmt::Write;
22
23use crate::config::Config;
24use crate::dic::grammar::Grammar;
25use crate::input_text::{InputBuffer, InputEditor};
26use crate::plugin::input_text::InputTextPlugin;
27use crate::plugin::PluginError;
28use crate::prelude::*;
29
30#[cfg(test)]
31mod tests;
32
33/// Replace (consecutive) prolonged sound mark by one symbol.
34#[derive(Default)]
35pub struct ProlongedSoundMarkPlugin {
36    psm_set: HashSet<char>,
37    replace_symbol: String,
38    regex: Option<Regex>,
39}
40
41/// Struct corresponds with raw config json file.
42#[allow(non_snake_case)]
43#[derive(Deserialize)]
44struct PluginSettings {
45    prolongedSoundMarks: Vec<char>,
46    replacementSymbol: Option<String>,
47}
48
49impl ProlongedSoundMarkPlugin {
50    /// Convert prolongation marks to a Regex which will match at least two patterns
51    fn prolongs_as_regex<I: Iterator<Item = char>>(data: I) -> SudachiResult<Regex> {
52        let mut pattern = String::with_capacity(32);
53        pattern.push('[');
54        for symbol in data {
55            match symbol {
56                '-' | '[' | ']' | '^' | '\\' => {
57                    write!(pattern, "\\u{{{:X}}}", symbol as u32).expect("should not happen")
58                }
59                c => pattern.push(c),
60            }
61        }
62        pattern.push_str("]{2,}");
63        match Regex::new(&pattern) {
64            Ok(re) => Ok(re),
65            Err(e) => Err(SudachiError::PluginError(PluginError::InvalidDataFormat(
66                e.to_string(),
67            ))),
68        }
69    }
70}
71
72impl InputTextPlugin for ProlongedSoundMarkPlugin {
73    fn set_up(
74        &mut self,
75        settings: &Value,
76        _config: &Config,
77        _grammar: &Grammar,
78    ) -> SudachiResult<()> {
79        let settings: PluginSettings = serde_json::from_value(settings.clone())?;
80
81        let psm_set = settings.prolongedSoundMarks.into_iter().collect();
82        let replace_symbol = settings.replacementSymbol;
83
84        self.psm_set = psm_set;
85        self.replace_symbol = replace_symbol.unwrap_or("ー".to_string());
86        self.regex = Some(Self::prolongs_as_regex(self.psm_set.iter().cloned())?);
87        Ok(())
88    }
89
90    fn rewrite_impl<'a>(
91        &'a self,
92        input: &InputBuffer,
93        mut edit: InputEditor<'a>,
94    ) -> SudachiResult<InputEditor<'a>> {
95        let re = self.regex.as_ref().unwrap();
96        let data = input.current();
97
98        for m in re.find_iter(data) {
99            edit.replace_ref(m.range(), &self.replace_symbol)
100        }
101        Ok(edit)
102    }
103}