Tags: obfuscated huffman-tree
Rating:
**Description**
> We found a mysterious program that none of our most talented hackers could even begin to figure out.
>
> Author: toshi
**Files provided**
- [`challenge`](https://github.com/Aurel300/empirectf/blob/master/writeups/2018-09-14-CSAW-CTF-Quals/files/kvm-challenge)
**Solution** (by [Mem2019](https://github.com/Mem2019))
The OS is obsfucated by using `hlt` instruction to implement the conditional or unconditional `jmp`, so we can patch it first
```python
hlt_tab = {0xc50b6060 : 0x454,
0x9d1fe433 : 0x3ed,
0x54a15b03 : 0x376,
0x8f6e2804 : 0x422,
0x8aeef509 : 0x389,
0x3493310d : 0x32c,
0x59c33d0f : 0x3e1,
0x968630d0 : 0x400,
0xef5bdd13 : 0x435,
0x64d8a529 : 0x3b8,
0x5f291a64 : 0x441,
0x5de72dd : 0x347,
0xfc2ff49f : 0x3ce}
text_end = 0x611
def replace_jmps(start,end):
for p in xrange(start,end):
if Byte(p) == 0xB8 and Byte(p + 5) == 0xF4 and Dword(p + 1) in hlt_tab:
jmp_addr = hlt_tab[Dword(p + 1)]
PatchByte(p, 0xE9)
PatchDword(p + 1, (jmp_addr - (p + 5)) & 0xffffffff)
PatchByte(p + 5, 0x90)
#Patch to hlt to jmp
```
There are only 4 conditional `jmp`, so analyze them by hand. Also, edit the function to extend it, so that the analysis in IDA will be easier.
After some reversing, we found that the program is a Huffman Tree. It will encode the input into the path going from root node to the corresponding leaf node, but in reversed order(which makes decoding very hard, since the ambiguity exists).
I got stucked in the algorithm for 3 hours, will add more details later if I have time.
```python
ROOT_OFF = 0x1300
def MyQword(addr):
ret = Qword(addr)
if ret == 0xFFFFFFFFFFFFFFFF:
return 0
else:
return ret
def MyByte(addr):
ret = Byte(addr)
if ret == 0xFF:
return 0
else:
return ret
#dfs to get the mapping
def get_path_to_char(node):
if MyQword(node) != 0xFF:
return [([],chr(MyQword(node)))]
right = MyQword(node + 0x10)
left = MyQword(node + 8)
ret = []
lmap = get_path_to_char(left)
for (p, c) in lmap:
ret.append((p + [0], c))
rmap = get_path_to_char(right)
for (p, c) in rmap:
ret.append((p + [1], c))
return ret
def begin_with(l, sl):
if len(sl) > len(l):
return False
for i in xrange(0, len(sl)):
if l[i] != sl[i]:
return False
return True
# recursion too long!!!
# #return lsit of strings of possibilities
# def get_all_poss(bits, mapping, pad):
# poss = []
# for (p,c) in mapping:
# if begin_with(bits, p):
# poss.append((len(p), c))
# ret = []
# for x in poss:
# #print poss
# print pad * ' ' + x[1]
# ret += map(lambda st : x[1] + st, get_all_poss(bits[x[0]:], mapping, pad + 1))
# #print ret
# return ret
#return lsit of strings of possibilities
def get_all_poss(obits, mapping, pad):
live_bits = [("",obits)]
while len(live_bits) != 1 or len(live_bits[0][1]) != 0:
(parsed,bits) = live_bits.pop()
poss = []
for (p,c) in mapping:
if begin_with(bits, p):
poss.append((len(p), c))
#get all poss
for x in poss:
#print x
live_bits.append((parsed + x[1],bits[x[0]:]))
#if len(live_bits) == 1:
print live_bits
return live_bits
def recover(data):
ret = []
bits = []
for x in data:
for i in range(0,8):
if x & (1 << i) != 0:
bits.append(1)
else:
bits.append(0)
print bits
mapping = get_path_to_char(ROOT_OFF)
#while len(bits) > 0: loop does not work well for ambiguoutyt
ret = get_all_poss(bits, mapping, 0)
return ret
# fails because it is in reverse order
# def recover(data):
# ret = []
# cur_node = ROOT_OFF
# for x in data:
# for i in range(0,8)[::-1]:
# print hex(cur_node)
# if x & (1 << i) != 0: #r
# cur_node = MyQword(cur_node + 0x10)
# else:
# cur_node = MyQword(cur_node + 8)
# if MyQword(cur_node) != 0xff:
# ret.append(MyQword(cur_node))
# cur_node = ROOT_OFF
# return ret
```
Even in the end I did not get the original input, but I've already got the flag, which is part of the input.