Change table encoding to SHICHIRO

This commit is contained in:
Masahiko AMANO 2021-12-09 16:15:47 +03:00
parent 01ff16fddd
commit 06e53e7972

View File

@ -54,36 +54,53 @@ def huffman(data):
return codes return codes
def nanako_encode(table): def shichiro_encode(table):
table = ';'.join([f'{k};{table[k]}' for k in table]).split(';') table = ';'.join([f'{k};{table[k]}' for k in table]).split(';')
byts = [] bits = ''
for i in range(len(table)): for i in range(len(table)):
if i % 2: if i % 2:
num = table[i] code = table[i]
bitlen = bin(len(code))[2:]
if len(bitlen) > 7:
while len(bitlen) > 7:
bits += '1' + bitlen[:7]
bitlen = bitlen[7:]
bits += bitlen.rjust(8, '0') + bin(len(bitlen))[2:].rjust(8, '0')
else:
bits += bitlen.rjust(8, '0')
bits += code
else: else:
num = bin(int(table[i]))[2:] bits += bin(int(table[i]))[2:].rjust(8, '0')
while len(num) > 7: continue
byts.append(int('1' + num[:7], 2)) return bits
num = num[7:]
byts.append(int(num, 2))
byts.append(8 - len(num))
return byts
def nanako_decode(byts): def shichiro_decode(byts):
bits=''.join(byts)
dec = [] dec = []
table = {} table = {}
stack = '' stack = ''
i = 0 i = 0
while i < len(byts): c = 0
if byts[i][0] == '1': while i < len(bits) and len(bits) - i >= 8:
stack += byts[i][1:] if c % 2 == 0:
dec.append(bits[i:i+8])
i += 8
c += 1
continue
bitlen = ''
if bits[i] == '0':
bitlen = int(bits[i:i+8], 2)
i += 8
else: else:
stack += byts[i][int(byts[i + 1], 2):] while bits[i] == '1':
dec.append(stack[:]) bitlen += bits[i+1:i+8]
stack = '' i += 8
i += 1 bitlen = int(bitlen + bits[i+8-int(bits[i+8:i+16], 2):i+8], 2)
i += 1 i += 16
dec.append(bits[i:i+bitlen])
i += bitlen
c += 1
for i in range(0, len(dec), 2): for i in range(0, len(dec), 2):
table[dec[i + 1]] = int(dec[i], 2) table[dec[i + 1]] = int(dec[i], 2)
return table return table
@ -96,23 +113,23 @@ def compress_file(filename):
log.log(f'Original size: {len(data)} bytes.') log.log(f'Original size: {len(data)} bytes.')
log.log('Creating Huffman table...') log.log('Creating Huffman table...')
hf = huffman(data) hf = huffman(data)
table = nanako_encode(hf) table = shichiro_encode(hf)
log.log('Embedding Huffman table...') log.log('Embedding Huffman table...')
out = [] tablen = bin(len(table))[2:] # embed the table
ln = bin(len(table))[2:] # embed the table bits = ''
while len(ln) > 7: bitlen = bin(len(tablen))[2:]
out.append(int('1' + ln[:7], 2)) while len(bitlen) > 7:
ln = ln[7:] bits += '1' + bitlen[:7]
out += [int(ln, 2), 8 - len(ln)] + table bitlen = bitlen[7:]
log.log(f'Huffman table size: {len(out)} bytes.') bits += bitlen.rjust(8, '0') + bin(len(bitlen))[2:].rjust(8, '0') + tablen + table
log.log(f'Huffman table size: {len(bits)} bits.')
log.log('Compressing...') log.log('Compressing...')
stack = ''
for i in range(len(data)): # encode to Haffman for i in range(len(data)): # encode to Haffman
stack += hf[data[i]] bits += hf[data[i]]
while len(stack) >= 8: out = []
out.append(int(stack[:8], 2)) for i in range(0, len(bits), 8):
stack = stack[8:] out.append(int(bits[i:i+8].ljust(8, '0'), 2))
out += [int(stack.ljust(8, '0'), 2), len(stack)] out.append(len(bits) % 8)
log.log(f'Compressed size: {len(out)} bytes.') log.log(f'Compressed size: {len(out)} bytes.')
log.log(f"Saving to '{filename}.hfm'...") log.log(f"Saving to '{filename}.hfm'...")
with open(f'{filename}.hfm', 'wb') as file: # save Haffman code with open(f'{filename}.hfm', 'wb') as file: # save Haffman code
@ -130,19 +147,21 @@ def decompress_file(filename):
data[-2] = data[-2][:int(data[-1], 2)] data[-2] = data[-2][:int(data[-1], 2)]
del data[-1] del data[-1]
log.log('Extracting Huffman table...') log.log('Extracting Huffman table...')
ln = '' # extract the table bitlen = '' # extract the table
i = 0 i = 0
while 1: while 1:
if data[i][0] == '1': if data[i][0] == '1':
ln += data[i][1:] bitlen += data[i][1:]
else: else:
ln += data[i][int(data[i + 1], 2):] bitlen += data[i][int(data[i + 1], 2):]
break break
i += 1 i += 1
del data[:i + 2] del data[:i + 2]
table = nanako_decode(data[:int(ln, 2)])
del data[:int(ln, 2)]
data = ''.join(data) data = ''.join(data)
bitlen = int(bitlen, 2)
tablen = int(data[:bitlen], 2)
table = shichiro_decode(data[bitlen:tablen+bitlen])
data = data[bitlen+tablen:]
stack = '' stack = ''
out = [] out = []
log.log('Decompressing...') log.log('Decompressing...')