Skip to content

Commit 174965f

Browse files
RobinDavidpatacca
authored andcommitted
update pypcode to use version >2.0.0
1 parent 5cbf4be commit 174965f

File tree

3 files changed

+39
-137
lines changed

3 files changed

+39
-137
lines changed

bindings/python/quokka/backends/pypcode.py

+32-130
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ def get_pypcode_context(
6060
6161
Arguments:
6262
arch: Quokka program architecture
63+
endian: Architecture endianness
6364
6465
Raises:
6566
PypcodeError: if the conversion for arch is not found
@@ -74,7 +75,7 @@ def get_pypcode_context(
7475
quokka.analysis.ArchARM64: "AARCH64:LE:64:v8A",
7576
quokka.analysis.ArchARMThumb: "ARM:LE:32:v8T",
7677
quokka.analysis.ArchMIPS: "MIPS:LE:32:default",
77-
quokka.analysis.ArchMIPS: "MIPS:LE:64:default",
78+
quokka.analysis.ArchMIPS64: "MIPS:LE:64:default",
7879
quokka.analysis.ArchPPC: "PowerPC:LE:32:default",
7980
quokka.analysis.ArchPPC64: "PowerPC:LE:64:default",
8081
}
@@ -93,105 +94,6 @@ def get_pypcode_context(
9394
return pypcode.Context(pcode_arch)
9495

9596

96-
def equality(self: pypcode.ContextObj, other: Any) -> bool:
97-
"""Check if two pypcode objets are the same
98-
99-
We use monkey patching to attach the equality method to other classes and rely on
100-
__slots__ to check which fields to check.
101-
102-
Arguments:
103-
self: First object
104-
other: Other variable
105-
106-
Returns:
107-
Boolean for equality
108-
"""
109-
return isinstance(other, self.__class__) and all(
110-
getattr(other, attr) == getattr(self, attr)
111-
for attr in self.__slots__
112-
if attr != "cobj"
113-
)
114-
115-
116-
def object_hash(obj: pypcode.ContextObj) -> int:
117-
"""Create a hash value for a pypcode object
118-
119-
This allows to create set of values.
120-
121-
Arguments:
122-
obj: Object to hash
123-
124-
Returns:
125-
An integer for the hash
126-
"""
127-
128-
assert isinstance(obj, pypcode.ContextObj)
129-
return sum(hash(getattr(obj, attr)) for attr in obj.__slots__ if attr != "cobj")
130-
131-
132-
pypcode.Varnode.__eq__ = equality
133-
pypcode.Varnode.__hash__ = object_hash
134-
135-
pypcode.AddrSpace.__eq__ = equality
136-
pypcode.AddrSpace.__hash__ = object_hash
137-
138-
pypcode.PcodeOp.__eq__ = equality
139-
pypcode.PcodeOp.__hash__ = object_hash
140-
141-
142-
def combine_instructions(
143-
block: quokka.Block, translated_instructions: Sequence[pypcode.Translation]
144-
) -> List[pypcode.PcodeOp]:
145-
"""Combine instructions between the Quokka and PyPcode
146-
147-
Some instruction are split between IDA and Ghidra, so we have to account for it.
148-
A problem for example is the support of prefixes (such LOCK) which are decoded as 2
149-
instructions by Ghidra (wrong) but only 1 by IDA (correct).
150-
151-
Arguments:
152-
block: Quokka block
153-
translated_instructions: Translated instructions by Pypcode
154-
155-
Raises
156-
PypcodeError: if the combination doesn't work
157-
158-
Returns:
159-
A list of Pypcode statements
160-
"""
161-
pcode_instructions: List[pypcode.PcodeOp] = []
162-
translated_instructions = iter(translated_instructions)
163-
164-
instruction: quokka.Instruction
165-
for instruction in block.instructions:
166-
instruction._pcode_insts = []
167-
remaining_size: int = instruction.size
168-
while remaining_size > 0:
169-
try:
170-
pcode_inst: pypcode.Translation = next(translated_instructions)
171-
except StopIteration as exc:
172-
logger.error(
173-
"Disassembly discrepancy between Pypcode / IDA: missing inst"
174-
)
175-
raise quokka.PypcodeError(
176-
f"Decoding error for block at 0x{block.start:x}"
177-
) from exc
178-
179-
remaining_size -= pcode_inst.length
180-
instruction._pcode_insts.extend(pcode_inst.ops)
181-
182-
if remaining_size < 0:
183-
logger.error(
184-
"Disassembly discrepancy between Pypcode / IDA: sizes mismatch"
185-
)
186-
raise quokka.PypcodeError(
187-
f"Decoding error for block at 0x{block.start:x}"
188-
)
189-
190-
pcode_instructions.extend(list(pcode_inst.ops))
191-
192-
return pcode_instructions
193-
194-
19597
def update_pypcode_context(program: quokka.Program, is_thumb: bool) -> pypcode.Context:
19698
"""Return an appropriate pypcode context for the decoding
19799
@@ -246,19 +148,22 @@ def pypcode_decode_block(block: quokka.Block) -> List[pypcode.PcodeOp]:
246148
block.program, first_instruction.thumb
247149
)
248150

249-
# Translate
250-
translation = context.translate(
251-
code=block.bytes,
252-
base=block.start,
253-
max_inst=0,
254-
)
255-
256-
if translation.error:
257-
logger.error(translation.error.explain)
258-
raise quokka.PypcodeError(f"Decoding error for block at 0x{block.start:x}")
151+
try:
152+
# Translate
153+
translation = context.translate(
154+
block.bytes, # buf
155+
block.start, # base_address
156+
0, # max_bytes
157+
0, # max_instructions
158+
)
159+
return translation.ops
259160

260-
pcode_instructions = combine_instructions(block, translation.instructions)
261-
return pcode_instructions
161+
except pypcode.BadDataError as e:
162+
logger.error(e)
163+
raise quokka.PypcodeError(f"Decoding error for block at 0x{block.start:x} (BadDataError)")
164+
except pypcode.UnimplError as e:
165+
logger.error(e)
166+
raise quokka.PypcodeError(f"Decoding error for block at 0x{block.start:x} (UnimplError)")
262167

263168

264169
def pypcode_decode_instruction(
@@ -268,7 +173,7 @@ def pypcode_decode_instruction(
268173
269174
This will return the list of Pcode operations done for the instruction.
270175
Note that a (binary) instruction is expected to have several pcode instructions
271-
associated.
176+
associated. When decoding a single instruction IMARK instructions are excluded!
272177
273178
Arguments:
274179
inst: Instruction to translate
@@ -281,22 +186,19 @@ def pypcode_decode_instruction(
281186
"""
282187

283188
context: pypcode.Context = update_pypcode_context(inst.program, inst.thumb)
284-
translation = context.translate(
285-
code=inst.bytes,
286-
base=inst.address,
287-
max_inst=1,
288-
)
289-
290-
if not translation.error:
291-
292-
instructions = translation.instructions
293-
if len(instructions) > 1:
294-
logger.warning("Mismatch of instruction size IDA/Pypcode")
295-
296-
instructions = list(
297-
itertools.chain.from_iterable(inst.ops for inst in instructions)
189+
try:
190+
translation = context.translate(
191+
inst.bytes, # buf
192+
inst.address, # base_address
193+
0, # max_bytes
194+
1, # max_instructions
298195
)
299-
return instructions
300196

301-
logger.error(translation.error.explain)
302-
raise quokka.PypcodeError("Unable to decode instruction")
197+
return [x for x in translation.ops if x.opcode != pypcode.OpCode.IMARK]
198+
199+
except pypcode.BadDataError as e:
200+
logger.error(e)
201+
raise quokka.PypcodeError(f"Unable to decode instruction (BadDataError)")
202+
except pypcode.UnimplError as e:
203+
logger.error(e)
204+
raise quokka.PypcodeError(f"Unable to decode instruction (UnimplError)")

pyproject.toml

+3-3
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,9 @@ test = [
2424
"pytest-mock",
2525
"pytest-cov",
2626
"coverage[toml]",
27-
"pypcode>=1.1.1",
27+
"pypcode>=1.1.2",
2828
]
29-
pypcode = ["pypcode>=1.1.1"]
29+
pypcode = ["pypcode>=1.1.2"]
3030
doc = [
3131
"mkdocs",
3232
"mkdocs-material",
@@ -45,7 +45,7 @@ dev = [
4545
"mypy",
4646
"mypy-protobuf",
4747
"nox",
48-
"pypcode>=1.1.1",
48+
"pypcode>=1.1.2",
4949
]
5050

5151
[tool.setuptools]

tests/python/tests/backends/test_pypcode.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,16 @@
88
def test_pypcode_context():
99

1010
context = pypcode_backend.get_pypcode_context(quokka.analysis.ArchX86)
11-
assert context.lang.id == "x86:LE:32:default"
11+
assert context.language.id == "x86:LE:32:default"
1212

1313
context = pypcode_backend.get_pypcode_context(quokka.analysis.ArchX64)
14-
assert context.lang.id == "x86:LE:64:default"
14+
assert context.language.id == "x86:LE:64:default"
1515

1616
context = pypcode_backend.get_pypcode_context(quokka.analysis.ArchARM64)
17-
assert context.lang.id == "AARCH64:LE:64:v8A"
17+
assert context.language.id == "AARCH64:LE:64:v8A"
1818

1919
context = pypcode_backend.get_pypcode_context(quokka.analysis.ArchARM)
20-
assert context.lang.id == "ARM:LE:32:v8"
20+
assert context.language.id == "ARM:LE:32:v8"
2121

2222
with pytest.raises(quokka.PypcodeError):
2323
pypcode_backend.get_pypcode_context(quokka.analysis.QuokkaArch)

0 commit comments

Comments
 (0)