Skip to content

Commit c515afb

Browse files
authored
perf: replace rbtree with vector of pair (#684)
1 parent e391bc2 commit c515afb

File tree

2 files changed

+12
-4
lines changed

2 files changed

+12
-4
lines changed

src/rime/dict/entry_collector.cc

+8-2
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@
44
//
55
// 2011-11-27 GONG Chen <chen.sst@gmail.com>
66
//
7+
#include <algorithm>
78
#include <fstream>
9+
#include <utility>
810
#include <boost/algorithm/string.hpp>
911
#include <boost/lexical_cast.hpp>
1012
#include <rime/algo/strings.h>
@@ -192,12 +194,14 @@ void EntryCollector::CreateEntry(const string& word,
192194
bool is_word = (e->raw_code.size() == 1);
193195
if (is_word) {
194196
auto& weights = words[e->text];
195-
if (weights.find(code_str) != weights.end()) {
197+
if (std::find_if(weights.begin(), weights.end(), [&](const auto& p) {
198+
return p.first == code_str;
199+
}) != weights.end()) {
196200
LOG(WARNING) << "duplicate word definition '" << e->text << "': ["
197201
<< code_str << "].";
198202
return;
199203
}
200-
weights[code_str] += e->weight;
204+
weights.push_back(std::make_pair(code_str, e->weight));
201205
total_weight[e->text] += e->weight;
202206
}
203207
entries.emplace_back(std::move(e));
@@ -214,6 +218,8 @@ bool EntryCollector::TranslateWord(const string& word, vector<string>* result) {
214218
}
215219
const auto& w = words.find(word);
216220
if (w != words.end()) {
221+
std::sort(w->second.begin(), w->second.end(),
222+
[](const auto& a, const auto& b) { return a.first < b.first; });
217223
for (const auto& v : w->second) {
218224
const double kMinimalWeight = 0.05; // 5%
219225
double min_weight = total_weight[word] * kMinimalWeight;

src/rime/dict/entry_collector.h

+4-2
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,10 @@ struct RawDictEntry {
2323

2424
// code -> weight
2525
using WeightMap = map<string, double>;
26-
// word -> { code -> weight }
27-
using WordMap = hash_map<string, WeightMap>;
26+
// word -> [ { code, weight } ]
27+
// For the sake of memory usage, don't use word -> { code -> weight } as there
28+
// may be many words, but may not be many representations for a word
29+
using WordMap = hash_map<string, vector<pair<string, double>>>;
2830
// [ (word, weight), ... ]
2931
using EncodeQueue = std::queue<pair<string, string>>;
3032

0 commit comments

Comments
 (0)