Implementing the LZ78 compression algorithm in python
Question:
I’ve been toying around with some compression algorithms lately but, for the last couple days, I’ve been having some real trouble implementing LZ78 in python. I’ve looked around online for some examples but haven’t really found anything reliable that both encodes and decodes input. Does anyone have any resources I could look at or code I could examine?
Thanks
Answers:
Here is both compression and decompression of both :
def compress(uncompressed):
"""Compress a string to a list of output symbols."""
# Build the dictionary.
dict_size = 256
dictionary = dict((chr(i), chr(i)) for i in xrange(dict_size))
# in Python 3: dictionary = {chr(i): chr(i) for i in range(dict_size)}
w = ""
result = []
for c in uncompressed:
wc = w + c
if wc in dictionary:
w = wc
else:
result.append(dictionary[w])
# Add wc to the dictionary.
dictionary[wc] = dict_size
dict_size += 1
w = c
# Output the code for w.
if w:
result.append(dictionary[w])
return result
def decompress(compressed):
"""Decompress a list of output ks to a string."""
from cStringIO import StringIO
# Build the dictionary.
dict_size = 256
dictionary = dict((chr(i), chr(i)) for i in xrange(dict_size))
# in Python 3: dictionary = {chr(i): chr(i) for i in range(dict_size)}
# use StringIO, otherwise this becomes O(N^2)
# due to string concatenation in a loop
result = StringIO()
w = compressed.pop(0)
result.write(w)
for k in compressed:
if k in dictionary:
entry = dictionary[k]
elif k == dict_size:
entry = w + w[0]
else:
raise ValueError('Bad compressed k: %s' % k)
result.write(entry)
# Add w+entry[0] to the dictionary.
dictionary[dict_size] = w + entry[0]
dict_size += 1
w = entry
return result.getvalue()
# How to use:
compressed = compress('TOBEORNOTTOBEORTOBEORNOT')
print (compressed)
decompressed = decompress(compressed)
print (decompressed)
Hope this helps
Here is my implementation of LZW compression algorithm from https://q4.github.io/thesis.pdf in python 3:
from typing import List
def LZW_compress(input:str)->List[int]:
if len(input) == 0:
return []
tree = {chr(65 + i):i for i in range(26)} # tree is initially filled with input alphabet.
output = []
w = ""
for x in input:
if w+x in tree.keys():
w += x
else:
output.append(tree[w])
tree[w+x] = len(tree)
w = x
output.append(tree[w])
return output
def LZW_decompress(input:List[int])->str:
tree = {i:chr(65 + i) for i in range(26)} # tree is initially filled with input alphabet.
output = ""
for i in range(len(input)):
w = tree[input[i]]
if i != 0:
tree[i+25] = tree[i+25] + w[0] # completing the incomplete assignment of prev. round.
w = tree[input[i]] # updating w in case: input[i] == i + 25
output += w
tree[i+26] = w # incomplete set. need to append the first character of the next phrase.
return output
# Example
plain = "HGHSHHBSYBVDHDHGKQQQAAAAAACCCCCCCCCCCCRASSFASASDKKSAHSVNFNNNNNNAHVJSV"
assert(plain == LZW_decompress(LZW_compress(plain)))
I’ve been toying around with some compression algorithms lately but, for the last couple days, I’ve been having some real trouble implementing LZ78 in python. I’ve looked around online for some examples but haven’t really found anything reliable that both encodes and decodes input. Does anyone have any resources I could look at or code I could examine?
Thanks
Here is both compression and decompression of both :
def compress(uncompressed):
"""Compress a string to a list of output symbols."""
# Build the dictionary.
dict_size = 256
dictionary = dict((chr(i), chr(i)) for i in xrange(dict_size))
# in Python 3: dictionary = {chr(i): chr(i) for i in range(dict_size)}
w = ""
result = []
for c in uncompressed:
wc = w + c
if wc in dictionary:
w = wc
else:
result.append(dictionary[w])
# Add wc to the dictionary.
dictionary[wc] = dict_size
dict_size += 1
w = c
# Output the code for w.
if w:
result.append(dictionary[w])
return result
def decompress(compressed):
"""Decompress a list of output ks to a string."""
from cStringIO import StringIO
# Build the dictionary.
dict_size = 256
dictionary = dict((chr(i), chr(i)) for i in xrange(dict_size))
# in Python 3: dictionary = {chr(i): chr(i) for i in range(dict_size)}
# use StringIO, otherwise this becomes O(N^2)
# due to string concatenation in a loop
result = StringIO()
w = compressed.pop(0)
result.write(w)
for k in compressed:
if k in dictionary:
entry = dictionary[k]
elif k == dict_size:
entry = w + w[0]
else:
raise ValueError('Bad compressed k: %s' % k)
result.write(entry)
# Add w+entry[0] to the dictionary.
dictionary[dict_size] = w + entry[0]
dict_size += 1
w = entry
return result.getvalue()
# How to use:
compressed = compress('TOBEORNOTTOBEORTOBEORNOT')
print (compressed)
decompressed = decompress(compressed)
print (decompressed)
Hope this helps
Here is my implementation of LZW compression algorithm from https://q4.github.io/thesis.pdf in python 3:
from typing import List
def LZW_compress(input:str)->List[int]:
if len(input) == 0:
return []
tree = {chr(65 + i):i for i in range(26)} # tree is initially filled with input alphabet.
output = []
w = ""
for x in input:
if w+x in tree.keys():
w += x
else:
output.append(tree[w])
tree[w+x] = len(tree)
w = x
output.append(tree[w])
return output
def LZW_decompress(input:List[int])->str:
tree = {i:chr(65 + i) for i in range(26)} # tree is initially filled with input alphabet.
output = ""
for i in range(len(input)):
w = tree[input[i]]
if i != 0:
tree[i+25] = tree[i+25] + w[0] # completing the incomplete assignment of prev. round.
w = tree[input[i]] # updating w in case: input[i] == i + 25
output += w
tree[i+26] = w # incomplete set. need to append the first character of the next phrase.
return output
# Example
plain = "HGHSHHBSYBVDHDHGKQQQAAAAAACCCCCCCCCCCCRASSFASASDKKSAHSVNFNNNNNNAHVJSV"
assert(plain == LZW_decompress(LZW_compress(plain)))