Program will take the name of the file (with path is also fine), and will output the compressed file in the directory of the program. This program also shows how to write file byte-wise required for compression.
################################ # Name: Dhruv Pancholi # Filename: huffman.py # Input Format: filename ################################ from heapq import * class Node(object): def __init__(self, freq, left = None, right = None, char = None): super(Node, self).__init__() self.freq = freq self.left = left self.right = right self.char = char self.code = [0, 0] def __lt__(self, other): return self.freq < other.freq def __gt__(self, other): return self.freq > other.freq def __eq__(self, other): if other == None: return False return self.freq == other.freq def calcFrequency(filename): filep = open(filename, 'r') freq = {} for line in filep: for c in line: if c not in freq: freq[c] = 1 else: freq[c] += 1 return freq def huffman(C): n = len(C) for i in xrange(n-1): z = Node(0) z.left = heappop(C) z.right = heappop(C) z.freq = z.left.freq + z.right.freq heappush(C, z) return heappop(C) def generateHuffman(root, code, height, code_dict): if root==None: return root.code[0] = code>>1 root.code[1] = height if root.char != None: code_dict[root.char] = root.code generateHuffman(root.left, code<<1, height+1, code_dict) generateHuffman(root.right, (code+1)<<1, height+1, code_dict) def generateHuffman1(root, code, height, code_dict): if root==None: return root.code = code if root.char != None: code_dict[root.char] = root.code generateHuffman1(root.left, code+'0', height+1, code_dict) generateHuffman1(root.right, code+'1', height+1, code_dict) def calcHuffman(filename): code_dict = {} freq = calcFrequency(filename) heap = [] for c in freq: heappush(heap, Node(freq[c], char = c)) root = huffman(heap) generateHuffman1(root, '', 0, code_dict) return code_dict def main(): filename = raw_input('Enter the name of the file to be compressed: ') huffman = calcHuffman(filename) filep = open(filename, 'r') s='' for line in filep: for c in line: s += huffman[c] for i in xrange(8-len(s)%8): s+='0' packed_data = ''.join(chr(int(s[i:i+8], 2)) for i in xrange(0, len(s), 8)) fileo = open('output.txt', 'w') fileo.write(packed_data) filep.close() fileo.close() import os print 'Name of the compressed file: output.txt' print "Compression ratio: %.6f"%(float(os.stat('output.txt').st_size)/float(os.stat(filename).st_size)) if __name__ == '__main__': main()