Change table encoding to SHICHIRO
This commit is contained in:
parent
01ff16fddd
commit
06e53e7972
95
huffman.py
95
huffman.py
@ -54,36 +54,53 @@ def huffman(data):
|
|||||||
return codes
|
return codes
|
||||||
|
|
||||||
|
|
||||||
def nanako_encode(table):
|
def shichiro_encode(table):
|
||||||
table = ';'.join([f'{k};{table[k]}' for k in table]).split(';')
|
table = ';'.join([f'{k};{table[k]}' for k in table]).split(';')
|
||||||
byts = []
|
bits = ''
|
||||||
for i in range(len(table)):
|
for i in range(len(table)):
|
||||||
if i % 2:
|
if i % 2:
|
||||||
num = table[i]
|
code = table[i]
|
||||||
|
bitlen = bin(len(code))[2:]
|
||||||
|
if len(bitlen) > 7:
|
||||||
|
while len(bitlen) > 7:
|
||||||
|
bits += '1' + bitlen[:7]
|
||||||
|
bitlen = bitlen[7:]
|
||||||
|
bits += bitlen.rjust(8, '0') + bin(len(bitlen))[2:].rjust(8, '0')
|
||||||
|
else:
|
||||||
|
bits += bitlen.rjust(8, '0')
|
||||||
|
bits += code
|
||||||
else:
|
else:
|
||||||
num = bin(int(table[i]))[2:]
|
bits += bin(int(table[i]))[2:].rjust(8, '0')
|
||||||
while len(num) > 7:
|
continue
|
||||||
byts.append(int('1' + num[:7], 2))
|
return bits
|
||||||
num = num[7:]
|
|
||||||
byts.append(int(num, 2))
|
|
||||||
byts.append(8 - len(num))
|
|
||||||
return byts
|
|
||||||
|
|
||||||
|
|
||||||
def nanako_decode(byts):
|
def shichiro_decode(byts):
|
||||||
|
bits=''.join(byts)
|
||||||
dec = []
|
dec = []
|
||||||
table = {}
|
table = {}
|
||||||
stack = ''
|
stack = ''
|
||||||
i = 0
|
i = 0
|
||||||
while i < len(byts):
|
c = 0
|
||||||
if byts[i][0] == '1':
|
while i < len(bits) and len(bits) - i >= 8:
|
||||||
stack += byts[i][1:]
|
if c % 2 == 0:
|
||||||
|
dec.append(bits[i:i+8])
|
||||||
|
i += 8
|
||||||
|
c += 1
|
||||||
|
continue
|
||||||
|
bitlen = ''
|
||||||
|
if bits[i] == '0':
|
||||||
|
bitlen = int(bits[i:i+8], 2)
|
||||||
|
i += 8
|
||||||
else:
|
else:
|
||||||
stack += byts[i][int(byts[i + 1], 2):]
|
while bits[i] == '1':
|
||||||
dec.append(stack[:])
|
bitlen += bits[i+1:i+8]
|
||||||
stack = ''
|
i += 8
|
||||||
i += 1
|
bitlen = int(bitlen + bits[i+8-int(bits[i+8:i+16], 2):i+8], 2)
|
||||||
i += 1
|
i += 16
|
||||||
|
dec.append(bits[i:i+bitlen])
|
||||||
|
i += bitlen
|
||||||
|
c += 1
|
||||||
for i in range(0, len(dec), 2):
|
for i in range(0, len(dec), 2):
|
||||||
table[dec[i + 1]] = int(dec[i], 2)
|
table[dec[i + 1]] = int(dec[i], 2)
|
||||||
return table
|
return table
|
||||||
@ -96,23 +113,23 @@ def compress_file(filename):
|
|||||||
log.log(f'Original size: {len(data)} bytes.')
|
log.log(f'Original size: {len(data)} bytes.')
|
||||||
log.log('Creating Huffman table...')
|
log.log('Creating Huffman table...')
|
||||||
hf = huffman(data)
|
hf = huffman(data)
|
||||||
table = nanako_encode(hf)
|
table = shichiro_encode(hf)
|
||||||
log.log('Embedding Huffman table...')
|
log.log('Embedding Huffman table...')
|
||||||
out = []
|
tablen = bin(len(table))[2:] # embed the table
|
||||||
ln = bin(len(table))[2:] # embed the table
|
bits = ''
|
||||||
while len(ln) > 7:
|
bitlen = bin(len(tablen))[2:]
|
||||||
out.append(int('1' + ln[:7], 2))
|
while len(bitlen) > 7:
|
||||||
ln = ln[7:]
|
bits += '1' + bitlen[:7]
|
||||||
out += [int(ln, 2), 8 - len(ln)] + table
|
bitlen = bitlen[7:]
|
||||||
log.log(f'Huffman table size: {len(out)} bytes.')
|
bits += bitlen.rjust(8, '0') + bin(len(bitlen))[2:].rjust(8, '0') + tablen + table
|
||||||
|
log.log(f'Huffman table size: {len(bits)} bits.')
|
||||||
log.log('Compressing...')
|
log.log('Compressing...')
|
||||||
stack = ''
|
|
||||||
for i in range(len(data)): # encode to Haffman
|
for i in range(len(data)): # encode to Haffman
|
||||||
stack += hf[data[i]]
|
bits += hf[data[i]]
|
||||||
while len(stack) >= 8:
|
out = []
|
||||||
out.append(int(stack[:8], 2))
|
for i in range(0, len(bits), 8):
|
||||||
stack = stack[8:]
|
out.append(int(bits[i:i+8].ljust(8, '0'), 2))
|
||||||
out += [int(stack.ljust(8, '0'), 2), len(stack)]
|
out.append(len(bits) % 8)
|
||||||
log.log(f'Compressed size: {len(out)} bytes.')
|
log.log(f'Compressed size: {len(out)} bytes.')
|
||||||
log.log(f"Saving to '{filename}.hfm'...")
|
log.log(f"Saving to '{filename}.hfm'...")
|
||||||
with open(f'{filename}.hfm', 'wb') as file: # save Haffman code
|
with open(f'{filename}.hfm', 'wb') as file: # save Haffman code
|
||||||
@ -130,19 +147,21 @@ def decompress_file(filename):
|
|||||||
data[-2] = data[-2][:int(data[-1], 2)]
|
data[-2] = data[-2][:int(data[-1], 2)]
|
||||||
del data[-1]
|
del data[-1]
|
||||||
log.log('Extracting Huffman table...')
|
log.log('Extracting Huffman table...')
|
||||||
ln = '' # extract the table
|
bitlen = '' # extract the table
|
||||||
i = 0
|
i = 0
|
||||||
while 1:
|
while 1:
|
||||||
if data[i][0] == '1':
|
if data[i][0] == '1':
|
||||||
ln += data[i][1:]
|
bitlen += data[i][1:]
|
||||||
else:
|
else:
|
||||||
ln += data[i][int(data[i + 1], 2):]
|
bitlen += data[i][int(data[i + 1], 2):]
|
||||||
break
|
break
|
||||||
i += 1
|
i += 1
|
||||||
del data[:i + 2]
|
del data[:i + 2]
|
||||||
table = nanako_decode(data[:int(ln, 2)])
|
|
||||||
del data[:int(ln, 2)]
|
|
||||||
data = ''.join(data)
|
data = ''.join(data)
|
||||||
|
bitlen = int(bitlen, 2)
|
||||||
|
tablen = int(data[:bitlen], 2)
|
||||||
|
table = shichiro_decode(data[bitlen:tablen+bitlen])
|
||||||
|
data = data[bitlen+tablen:]
|
||||||
stack = ''
|
stack = ''
|
||||||
out = []
|
out = []
|
||||||
log.log('Decompressing...')
|
log.log('Decompressing...')
|
||||||
|
|||||||
Reference in New Issue
Block a user