sudachi/analysis/
morpheme.rs1use crate::analysis::node::{LatticeNode, PathCost, ResultNode};
18use crate::analysis::stateless_tokenizer::DictionaryAccess;
19use crate::dic::lexicon::word_infos::WordInfo;
20use crate::dic::word_id::WordId;
21use crate::input_text::InputTextIndex;
22use crate::prelude::*;
23use std::cell::Ref;
24
25pub struct Morpheme<'a, T> {
27 list: &'a MorphemeList<T>,
28 index: usize,
29}
30
31impl<T: DictionaryAccess> Morpheme<'_, T> {
32 pub fn part_of_speech(&self) -> &[String] {
34 self.list
35 .dict()
36 .grammar()
37 .pos_components(self.part_of_speech_id())
38 }
39}
40
41impl<T: DictionaryAccess + Clone> Morpheme<'_, T> {
42 #[deprecated(note = "use split_into", since = "0.6.1")]
44 pub fn split(&self, mode: Mode) -> SudachiResult<MorphemeList<T>> {
45 #[allow(deprecated)]
46 self.list.split(mode, self.index)
47 }
48}
49
50impl<'a, T: DictionaryAccess> Morpheme<'a, T> {
51 pub(crate) fn for_list(list: &'a MorphemeList<T>, index: usize) -> Self {
52 Morpheme { list, index }
53 }
54
55 #[inline]
56 pub(crate) fn node(&self) -> &ResultNode {
57 self.list.node(self.index)
58 }
59
60 pub fn begin(&self) -> usize {
62 self.list.input().to_orig_byte_idx(self.node().begin())
63 }
64
65 pub fn end(&self) -> usize {
67 self.list.input().to_orig_byte_idx(self.node().end())
68 }
69
70 pub fn begin_c(&self) -> usize {
72 self.list.input().to_orig_char_idx(self.node().begin())
73 }
74
75 pub fn end_c(&self) -> usize {
77 self.list.input().to_orig_char_idx(self.node().end())
78 }
79
80 pub fn surface(&self) -> Ref<str> {
82 let inp = self.list.input();
83 Ref::map(inp, |i| i.orig_slice(self.node().bytes_range()))
84 }
85
86 pub fn part_of_speech_id(&self) -> u16 {
87 self.node().word_info().pos_id()
88 }
89
90 pub fn dictionary_form(&self) -> &str {
94 self.get_word_info().dictionary_form()
95 }
96
97 pub fn normalized_form(&self) -> &str {
101 self.get_word_info().normalized_form()
102 }
103
104 pub fn reading_form(&self) -> &str {
108 self.get_word_info().reading_form()
109 }
110
111 pub fn is_oov(&self) -> bool {
113 self.word_id().is_oov()
114 }
115
116 pub fn word_id(&self) -> WordId {
118 self.node().word_id()
119 }
120
121 pub fn dictionary_id(&self) -> i32 {
125 let wid = self.word_id();
126 if wid.is_oov() {
127 -1
128 } else {
129 wid.dic() as i32
130 }
131 }
132
133 pub fn synonym_group_ids(&self) -> &[u32] {
134 self.get_word_info().synonym_group_ids()
135 }
136
137 pub fn get_word_info(&self) -> &WordInfo {
138 self.node().word_info()
139 }
140
141 pub fn index(&self) -> usize {
143 self.index
144 }
145
146 pub fn split_into(&self, mode: Mode, out: &mut MorphemeList<T>) -> SudachiResult<bool> {
150 self.list.split_into(mode, self.index, out)
151 }
152
153 pub fn total_cost(&self) -> i32 {
155 return self.node().total_cost();
156 }
157}
158
159impl<T: DictionaryAccess> std::fmt::Debug for Morpheme<'_, T> {
160 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
161 f.debug_struct("Morpheme")
162 .field("surface", &self.surface())
163 .field("pos", &self.part_of_speech())
164 .field("normalized_form", &self.normalized_form())
165 .field("reading_form", &self.reading_form())
166 .field("dictionary_form", &self.dictionary_form())
167 .finish()
168 }
169}