Skip to content

Commit 75e6b1a

Browse files
authoredAug 12, 2023
follow opencc conversion chain (#688)
* follow opencc conversion chain * when a dict doesn't contain a word, pass as-is * de-duplication
1 parent 44ae945 commit 75e6b1a

File tree

1 file changed

+39
-10
lines changed

1 file changed

+39
-10
lines changed
 

‎src/rime/gear/simplifier.cc

+39-10
Original file line numberDiff line numberDiff line change
@@ -58,19 +58,48 @@ class Opencc {
5858
}
5959

6060
bool ConvertWord(const string& text, vector<string>* forms) {
61-
if (dict_ == nullptr)
62-
return false;
63-
opencc::Optional<const opencc::DictEntry*> item = dict_->Match(text);
64-
if (item.IsNull()) {
65-
// Match not found
61+
if (converter_ == nullptr) {
6662
return false;
67-
} else {
68-
const opencc::DictEntry* entry = item.Get();
69-
for (auto&& value : entry->Values()) {
70-
forms->push_back(std::move(value));
63+
}
64+
const list<opencc::ConversionPtr> conversions =
65+
converter_->GetConversionChain()->GetConversions();
66+
vector<string> original_words{text};
67+
bool matched = false;
68+
for (auto conversion : conversions) {
69+
opencc::DictPtr dict = conversion->GetDict();
70+
if (dict == nullptr) {
71+
return false;
7172
}
72-
return forms->size() > 0;
73+
set<string> word_set;
74+
vector<string> converted_words;
75+
for (const auto& original_word : original_words) {
76+
opencc::Optional<const opencc::DictEntry*> item =
77+
dict->Match(original_word);
78+
if (item.IsNull()) {
79+
// Current dictionary doesn't convert the word. We need to keep it for
80+
// other dicts in the chain. e.g. s2t.json expands 里 to 里 and 裏,
81+
// then t2tw.json passes 里 as-is and converts 裏 to 裡.
82+
if (word_set.insert(original_word).second) {
83+
converted_words.push_back(original_word);
84+
}
85+
continue;
86+
}
87+
matched = true;
88+
const opencc::DictEntry* entry = item.Get();
89+
for (const auto& converted_word : entry->Values()) {
90+
if (word_set.insert(converted_word).second) {
91+
converted_words.push_back(converted_word);
92+
}
93+
}
94+
}
95+
original_words.swap(converted_words);
96+
}
97+
if (!matched) {
98+
// No dictionary contains the word
99+
return false;
73100
}
101+
*forms = std::move(original_words);
102+
return forms->size() > 0;
74103
}
75104

76105
bool RandomConvertText(const string& text, string* simplified) {

0 commit comments

Comments
 (0)