Skip to content

Commit dc0b074

Browse files
committed
[disasm] identify functions
1 parent 2038db9 commit dc0b074

File tree

1 file changed

+47
-28
lines changed

1 file changed

+47
-28
lines changed

core/disasm/capstone_disasm.py

Lines changed: 47 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import mmap
33
import os
44

5+
import lief
56
from capstone import Cs, CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN
67

78
from core import logger
@@ -17,44 +18,62 @@ def __init__(self, file_path: str, chunk_size: int = 0x2000):
1718

1819
self.chunk_size = chunk_size
1920
self.max_workers = os.cpu_count() or 4
21+
self.functions = {}
2022

21-
def _disassemble_chunk(self, data: bytes, base_addr: int) -> list[str]:
23+
def _load_functions_with_lief(self):
24+
binary = lief.parse(self.file_path)
25+
if not binary:
26+
raise RuntimeError("Failed to parse Mach-O binary with LIEF")
27+
28+
if isinstance(binary, lief.MachO.Binary):
29+
func_count = 0
30+
31+
for f in binary.functions:
32+
addr = f.address
33+
name = f.name or f"fn_{addr:08x}"
34+
self.functions[addr] = name
35+
func_count += 1
36+
37+
logger.info(f"[LIEF] Detected {len(self.functions)} functions")
38+
39+
def _disassemble_chunk(self, data: bytes, base_addr: int):
2240
md = Cs(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN)
2341
md.detail = False
2442
result = []
2543

2644
for insn in md.disasm(data, base_addr):
45+
if insn.address in self.functions:
46+
result.append(f"{self.functions[insn.address]}:")
47+
2748
result.append(f"0x{insn.address:08x}:\t{insn.mnemonic}\t{insn.op_str}")
2849

2950
return result
3051

3152
def disassemble(self) -> str:
3253
logger.info(f"[Capstone] Loading file: {self.file_path}")
54+
self._load_functions_with_lief()
55+
56+
results = []
57+
58+
with open(self.file_path, "rb") as f, mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as mm:
59+
file_size = mm.size()
60+
offsets = range(0, file_size, self.chunk_size)
61+
62+
logger.info(f"[Capstone] File size: {file_size:,} bytes")
63+
logger.info(f"[Capstone] Using {self.max_workers} threads")
64+
65+
with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_workers) as executor:
66+
futures = [executor.submit(self._disassemble_chunk, mm[offset:offset + self.chunk_size], offset)
67+
for offset in offsets]
68+
69+
for i, fut in enumerate(concurrent.futures.as_completed(futures), 1):
70+
try:
71+
chunk_result = fut.result()
72+
results.extend(chunk_result)
73+
if i % 10 == 0:
74+
logger.info(f"[Capstone] Completed {i}/{len(futures)} chunks...")
75+
except Exception as e:
76+
logger.error(f"[Capstone] Error in chunk {i}: {e}")
3377

34-
with open(self.file_path, "rb") as f:
35-
with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as mm:
36-
file_size = mm.size()
37-
offsets = range(0, file_size, self.chunk_size)
38-
39-
logger.info(f"[Capstone] File size: {file_size:,} bytes")
40-
logger.info(f"[Capstone] Using {self.max_workers} threads")
41-
42-
results = []
43-
with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_workers) as executor:
44-
futures = []
45-
for offset in offsets:
46-
data = mm[offset:offset + self.chunk_size]
47-
futures.append(executor.submit(self._disassemble_chunk, data, offset))
48-
49-
for i, fut in enumerate(concurrent.futures.as_completed(futures), 1):
50-
try:
51-
chunk_result = fut.result()
52-
results.extend(chunk_result)
53-
if i % 10 == 0:
54-
logger.info(f"[Capstone] Completed {i}/{len(futures)} chunks...")
55-
except Exception as e:
56-
logger.error(f"[Capstone] Error in chunk {i}: {e}")
57-
58-
disasm_text = "\n".join(results)
59-
logger.info(f"[Capstone] Completed disassembly.")
60-
return disasm_text
78+
logger.info(f"[Capstone] Completed disassembly")
79+
return "\n".join(results)

0 commit comments

Comments
 (0)