Tags: misc
Rating:
# Compress - Misc
## Challenge
```python=
from io import BufferedReader, BufferedWriter
from hashlib import sha256
import sys, os
BLOCK_LEN = 16 # in characters, consists of $BLOCK_LEN / $CHUNK_LEN chunks
CHUNK_LEN = 4 # in characters, $CHUNK_LEN chars in each chunk
def chunk(data:bytes):
return [data[CHUNK_LEN*i:CHUNK_LEN*(i+1)] for i in range(BLOCK_LEN//CHUNK_LEN)]
def compress_chunks(chunks: list[bytes]) -> bytes:
output = b""
cur = sha256()
for chunk in chunks:
output += sha256(chunk).digest()[:3]
cur.update(chunk)
output += cur.digest()[:3]
return output
def compress(fdr: BufferedReader, fdw: BufferedWriter) -> None:
assert (os.fstat(fdr.fileno()).st_size) % BLOCK_LEN == 0, f"Input length must be a multiple of {BLOCK_LEN}"
while (data := fdr.read(BLOCK_LEN)):
fdw.write(compress_chunks(chunk(data)))
if __name__ == "__main__":
sys.argv.append("compress")
sys.argv.append("D:/CTF/deadsec/2023/compress/test")
if len(sys.argv) != 3:
print(f"Usage: {sys.argv[0]} [compress|extract] inputfile")
exit(-1)
if sys.argv[1] == "compress":
with open(sys.argv[2], "rb") as fdr, open(sys.argv[2]+".z1p", "wb") as fdw:
compress(fdr, fdw)
elif sys.argv[1] == "extract":
print("Not implemented")
else:
print("Unknown operation")
```
## Solution
The algorithm compresses blocks of 16 bytes (16 characters) into blocks of 15 bytes. Each source block consists of 4 chunks, each of them is 4 bytes long. The output, however, consists of 5 chunks, each 3 bytes long. The first four chunks contain the compressed data, while the final chunks serves as somewhat of a checksum. The compressed chunks are generated by taking the first 3 bytes of SHA256 hashes of source chunks, while the checksum is generated by hashing the entire source block.
We can reverse the compression by creating a lookup table of possible sources for each block (using only printable ASCII characters, we are guaranteed some colisions when using only the first 3 bytes of the hash). Once a lookup table is prepared, we can just simply check compressed chunks in groups of 5. We use the first four to generate all possible permutations of source substitutions, while the fifth one serves as a checker to see whether the permutation is correct by just hashing the entire permutation and comparing the first 3 bytes with the final block.
### Code
```python=
from io import BufferedReader, BufferedWriter
from hashlib import sha256
import os
import string
import itertools
BLOCK_LEN = 15 # in characters, consists of $BLOCK_LEN / $CHUNK_LEN chunks
CHUNK_LEN = 3 # in characters, $CHUNK_LEN chars in each chunk
def chunk(data:bytes):
return [data[CHUNK_LEN*i:CHUNK_LEN*(i+1)] for i in range(BLOCK_LEN//CHUNK_LEN)]
def decompress_chunks(chunks: list[bytes]) -> bytes:
global subs
output = []
res = "????"
for chunk in chunks[:-1]:
try:
output.append(subs[chunk.hex()])
except KeyError:
pass
for attempt in itertools.product(*output):
att = "".join(attempt)
if sha256(att.encode()).digest()[:3] == chunks[-1]:
res = att
break
print(res)
return res.encode()
def decompress(fdr: BufferedReader, fdw: BufferedWriter) -> None:
assert (os.fstat(fdr.fileno()).st_size) % BLOCK_LEN == 0, f"Input length must be a multiple of {BLOCK_LEN}"
while (data := fdr.read(BLOCK_LEN)):
fdw.write(decompress_chunks(chunk(data)))
total = list(string.printable)
subs = dict()
for att in itertools.product(total, total, total, total):
att = "".join(att)
hsh = sha256(att.encode()).digest()[:3].hex()
try:
subs[hsh].append(att)
except KeyError:
subs[hsh] = []
subs[hsh].append(att)
with open("data.z1p", "rb") as fdr, open("data.sol", "wb+") as fdw:
decompress(fdr, fdw)
with open("data.sol", "rb") as f:
print(f"Dead{{{sha256(f.read()).hexdigest().lower()}}}")
```
### Decompressed file
```=
Congratulations, it seems like you have managed to write a decompressor for this file format!
Here is the "official" writeup:
from itertools import product
from hashlib import sha256 as H
from string import printable
from collections import defaultdict
from tqdm import tqdm
lookup = defaultdict(set)
for chunk in tqdm(product(printable[:-2], repeat=4), total=len(printable[:-2])**4):
tmp = ''.join(chunk).encode()
h = H(tmp).digest()[:3]
lookup[h].add(tmp)
with open("data.z1p", "rb") as fd:
while True:
h1,h2,h3,h4,hh = (fd.read(3) for _ in range(5))
if h1 == b'': break
for g1,g2,g3,g4 in product(lookup[h1], lookup[h2], lookup[h3], lookup[h4]):
if H(g1+g2+g3+g4).digest()[:3] == hh:
print((g1+g2+g3+g4).decode(), end="")
The flag is the sha256 hash of this entire file, as lower case, and wrapped in the flag format.
```
### Flag
```=
Dead{7eeee4060c39b7e5936cd652edc040126849bb3b03714df6fda711875c11e994}
```