forked from leslie071564/CDB_handler
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCDB_Reader.py
80 lines (73 loc) · 2.95 KB
/
CDB_Reader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# -*-coding: utf-8 -*-
import re
import sys
reload(sys)
sys.setdefaultencoding('utf8')
import os.path
import codecs
import cdb
class CDB_Reader(object):
def __init__(self, keyMapFile, repeated_keys=False, numerical_keys=True):
# the options.
self.repeated_keys = repeated_keys
self.numerical_keys = numerical_keys
self.mapping = []
dbdir = os.path.dirname(keyMapFile)
basename = os.path.basename(keyMapFile)
basename = re.sub("keymap", "", basename)
CDB0 = "{}/{}{}".format(dbdir, basename, "0")
if os.path.isfile(CDB0):
self.mapping.append({'key': None, 'cdb': CDB0})
# check for validity
if os.path.isfile(keyMapFile) and os.path.getsize(keyMapFile) > 0:
CDB1 = "{}/{}{}".format(dbdir, basename, "1")
if not os.path.isfile(CDB1):
sys.stderr.write("The size of the keymapfile is 0, but %s \
exists. The size of the keymapfile should be more than 0!\n"
% (CDB1))
sys.exit(1)
# parse the keymap file.
with codecs.open(keyMapFile, 'r', 'utf-8') as f:
kvptn = re.compile(r"^(.+) ([^ ]+)$")
for line in iter(f.readline, ''):
line = line.strip()
if kvptn.match(line):
key, which_file = kvptn.match(line).groups()
else:
sys.stderr.write("malformed keymap.\n")
sys.exit(1)
CDBi = str("{}/{}".format(dbdir, which_file))
if os.path.isfile(CDBi):
self.mapping.append({'key': key, 'cdb': CDBi})
else:
sys.exit(1)
def get(self, searchKey, exhaustive=False):
# exhaustive must be True if keys are not sorted in ascending order
if exhaustive:
for i in range(len(self.mapping)):
nowCDB = self.mapping[i]['cdb']
targetCDB = cdb.init(nowCDB)
if self.repeated_keys:
value = targetCDB.getall(searchKey.encode('utf-8'))
else:
value = targetCDB.get(searchKey.encode('utf-8'))
if value:
return value
return None
else:
nowCDB = self.mapping[0]['cdb']
for i in range(1, len(self.mapping)):
nowKey = self.mapping[i]['key']
if self.numerical_keys:
if int(searchKey) < int(nowKey):
break
else:
if searchKey.encode('utf-8') < nowKey:
break
nowCDB = self.mapping[i]['cdb']
targetCDB = cdb.init(nowCDB)
if self.repeated_keys:
value = targetCDB.getall(searchKey.encode('utf-8'))
else:
value = targetCDB.get(searchKey.encode('utf-8'))
return value