Implementing the LZ78 compression algorithm in python

Question

I’ve been toying around with some compression algorithms lately but, for the last couple days, I’ve been having some real trouble implementing LZ78 in python. I’ve looked around online for some examples but haven’t really found anything reliable that both encodes and decodes input. Does anyone have any resources I could look at or code I could examine?

Thanks

Asked By: Luke Hopkins

||

Source

Answer 1

Here is both compression and decompression of both :

def compress(uncompressed):
    """Compress a string to a list of output symbols."""

    # Build the dictionary.
    dict_size = 256
    dictionary = dict((chr(i), chr(i)) for i in xrange(dict_size))
    # in Python 3: dictionary = {chr(i): chr(i) for i in range(dict_size)}

    w = ""
    result = []
    for c in uncompressed:
        wc = w + c
        if wc in dictionary:
            w = wc
        else:
            result.append(dictionary[w])
            # Add wc to the dictionary.
            dictionary[wc] = dict_size
            dict_size += 1
            w = c

    # Output the code for w.
    if w:
        result.append(dictionary[w])
    return result


def decompress(compressed):
    """Decompress a list of output ks to a string."""
    from cStringIO import StringIO

    # Build the dictionary.
    dict_size = 256
    dictionary = dict((chr(i), chr(i)) for i in xrange(dict_size))
    # in Python 3: dictionary = {chr(i): chr(i) for i in range(dict_size)}

    # use StringIO, otherwise this becomes O(N^2)
    # due to string concatenation in a loop
    result = StringIO()
    w = compressed.pop(0)
    result.write(w)
    for k in compressed:
        if k in dictionary:
            entry = dictionary[k]
        elif k == dict_size:
            entry = w + w[0]
        else:
            raise ValueError('Bad compressed k: %s' % k)
        result.write(entry)

        # Add w+entry[0] to the dictionary.
        dictionary[dict_size] = w + entry[0]
        dict_size += 1

        w = entry
    return result.getvalue()


# How to use:
compressed = compress('TOBEORNOTTOBEORTOBEORNOT')
print (compressed)
decompressed = decompress(compressed)
print (decompressed)

Hope this helps

Answered By: coder3521

Answer 2

Here is my implementation of LZW compression algorithm from https://q4.github.io/thesis.pdf in python 3:

from typing import List

def LZW_compress(input:str)->List[int]:
    if len(input) == 0:
        return []
    tree = {chr(65 + i):i for i in range(26)} # tree is initially filled with input alphabet.
    output = []
    w = ""
    for x in input:
        if w+x in tree.keys():
            w += x
        else:
            output.append(tree[w])
            tree[w+x] = len(tree)
            w = x
    output.append(tree[w])
    return output


def LZW_decompress(input:List[int])->str:
    tree = {i:chr(65 + i) for i in range(26)} # tree is initially filled with input alphabet.
    output = ""
    for i in range(len(input)):
        w =  tree[input[i]]
        if i != 0:
            tree[i+25] = tree[i+25] + w[0] # completing the incomplete assignment of prev. round.
        w = tree[input[i]] # updating w in case: input[i] == i + 25 
        output += w
        tree[i+26] = w # incomplete set. need to append the first character of the next phrase.
    return output

# Example  
plain = "HGHSHHBSYBVDHDHGKQQQAAAAAACCCCCCCCCCCCRASSFASASDKKSAHSVNFNNNNNNAHVJSV"
assert(plain == LZW_decompress(LZW_compress(plain)))

Answered By: Yousef Javaherian

Implementing the LZ78 compression algorithm in python

Question:

Answers: