Huffman Coding in Python

Program will take the name of the file (with path is also fine), and will output the compressed file in the directory of the program. This program also shows how to write file byte-wise required for compression.

################################
# Name: Dhruv Pancholi
# Filename: huffman.py
# Input Format: filename
################################

from heapq import *

class Node(object):
	def __init__(self, freq, left = None, right = None, char = None):
		super(Node, self).__init__()
		self.freq = freq
		self.left = left
		self.right = right
		self.char = char
		self.code = [0, 0]
	def __lt__(self, other):
		return self.freq < other.freq
	def __gt__(self, other):
		return self.freq > other.freq
	def __eq__(self, other):
		if other == None:
			return False
		return self.freq == other.freq

def calcFrequency(filename):
	filep = open(filename, 'r')
	freq = {}
	for line in filep:
		for c in line:
			if c not in freq:
				freq[c] = 1
			else:
				freq[c] += 1
	return freq

def huffman(C):
	n = len(C)
	for i in xrange(n-1):
		z = Node(0)
		z.left = heappop(C)
		z.right = heappop(C)
		z.freq = z.left.freq + z.right.freq
		heappush(C, z)
	return heappop(C)

def generateHuffman(root, code, height, code_dict):
	if root==None:
		return
	root.code[0] = code>>1
	root.code[1] = height
	if root.char != None:
		code_dict[root.char] = root.code
	generateHuffman(root.left, code<<1, height+1, code_dict)
	generateHuffman(root.right, (code+1)<<1, height+1, code_dict)

def generateHuffman1(root, code, height, code_dict):
	if root==None:
		return
	root.code = code
	if root.char != None:
		code_dict[root.char] = root.code
	generateHuffman1(root.left, code+'0', height+1, code_dict)
	generateHuffman1(root.right, code+'1', height+1, code_dict)

def calcHuffman(filename):
	code_dict = {}
	freq = calcFrequency(filename)	
	heap = []
	for c in freq:
		heappush(heap, Node(freq[c], char = c))
	root = huffman(heap)
	generateHuffman1(root, '', 0, code_dict)
	return code_dict

def main():
	filename = raw_input('Enter the name of the file to be compressed: ')
	huffman = calcHuffman(filename)
	filep = open(filename, 'r')
	s=''
	for line in filep:
		for c in line:
			s += huffman[c]
	for i in xrange(8-len(s)%8):
		s+='0'
	packed_data = ''.join(chr(int(s[i:i+8], 2)) for i in xrange(0, len(s), 8))
	fileo = open('output.txt', 'w')
	fileo.write(packed_data)
	filep.close()
	fileo.close()
	import os
	print 'Name of the compressed file: output.txt'
	print "Compression ratio: %.6f"%(float(os.stat('output.txt').st_size)/float(os.stat(filename).st_size))

if __name__ == '__main__':
	main()

Hackerrank Algorithmic Crush Solution in Python

from heapq import *

class Node(object):
	def __init__(self, l):
		super(Node, self).__init__()
		# Start, Finish and Weight
		self.s = l[0]; self.f = l[1]; self.w = l[2]
	def __lt__(self, o):
		return self.f < o.f

N, M = map(int, raw_input().split())
A = []; heap = []

for m in xrange(M):
	A.append(Node(map(int, raw_input().split())))

A.sort(key = lambda x: x.s)

maxi = 0; m = 0
for a in A:
	while len(heap)>0 and heap[0].f<a.s:
		m -= heappop(heap).w
	m += a.w
	if m>maxi: maxi = m
	heappush(heap, a)

print maxi

Sample Input:

5 3
1 2 100
2 5 100
3 4 100

Sample Output:

200

Hackerrank Chief Hopper Solution in C++

#include <cmath>
#include <cstdio>
#include <vector>
#include <algorithm>

using namespace std;

typedef long long ll;

int main() {
    int N; scanf("%d", &N);
    vector <ll> Arr(N);
    for (int i=0;i<N;i++) {
        scanf("%lld", &Arr[i]);
    }
    ll ans = 0;
    for (int i = N-1;i>=0;i--) {
        ans = (Arr[i] + ans)/2 + (Arr[i] + ans)%2;
    }
    printf("%lld\n", ans);
    return 0;
}

Sample Input:

5
3 4 3 2 4

Sample Output:

4

Hackerrank Priyanka and Toys Solution in C++

#include<cstdio>
#include<vector>
#include<map>
#include<algorithm>
#include<queue>

using namespace std;

int main(){
	int N; scanf("%d", &N);
	priority_queue<int, vector<int>, greater<int> > q;
	int s;
	for(int n = 0; n<N; n++){
		scanf("%d", &s);
		q.push(s);
	}
	int current = 0, count = 0;
	while(!q.empty()){
		current = q.top(); q.pop();
		while(q.top()<=current+4 && !q.empty()) q.pop();
		count++;
	}
	if(count>0) printf("%d\n", count);
	else printf("0\n");
	return 0;
}

Sample Input:

5
1 2 3 17 10

Sample Output:

3

Hackerrank Two Arrays Solution in C++

#include<cstdio>
#include<vector>
#include<map>
#include<algorithm>
#include<queue>

using namespace std;

int main(){
	int N, K; scanf("%d %d", &N, &K);
	priority_queue<int, vector<int>, greater<int> > q;
	int s;
	for(int n = 0; n<N; n++){
		scanf("%d", &s);
		q.push(s);
	}
	int sum = 0, count = -1;
	while(sum<K && !q.empty()){
		sum += q.top(); q.pop();
		count++;
	}
	if(count>0) printf("%d\n", count);
	else printf("0\n");
	return 0;
}

Sample Input:

2
3 10
2 1 3
7 8 9
4 5
1 2 2 1
3 3 3 4

Sample Output:

YES
NO

Hackerrank Mark and Toys Solution in C++

#include<cstdio>
#include<vector>
#include<map>
#include<algorithm>
#include<queue>

using namespace std;

int main(){
	int N, K; scanf("%d %d", &N, &K);
	priority_queue<int, vector<int>, greater<int> > q;
	int s;
	for(int n = 0; n<N; n++){
		scanf("%d", &s);
		q.push(s);
	}
	int sum = 0, count = -1;
	while(sum<K && !q.empty()){
		sum += q.top(); q.pop();
		count++;
	}
	if(count>0) printf("%d\n", count);
	else printf("0\n");
	return 0;
}

Sample Input:

7 50
1 12 5 111 200 1000 10

Sample Output:

4

Hackerrank Jim and the Orders Solution in C++

#include<cstdio>
#include<vector>
#include<map>
#include<algorithm>

using namespace std;

int main(){
	int N; scanf("%d", &N);
	vector<pair<int, int> > A;
	int s, f;
	for(int n = 0; n<N; n++){
		scanf("%d %d", &s, &f);
		A.push_back(make_pair(s+f, n));
	}
	sort(A.begin(), A.end());
	for(int n = 0; n<N; n++){
		printf("%d ", A[n].second+1);
	}
	return 0;
}

Sample Input:

3
1 3
2 3
3 3

Sample Output:

1 2 3

Hackerrank Grid Challenge Solution in C++

#include<cstdio>
#include<vector>
#include<map>
#include<algorithm>
using namespace std;

int main(){
	int T; scanf("%d", &T);
	while(T--){
		int N; scanf("%d", &N);
		char A[N][N];
		for(int n = 0; n<N; n++){
			scanf("%s", A[n]);
			sort(A[n], A[n]+N);
		}
		bool b = false;
		for(int i = 0; i<N; i++){
			if(b) break;
			for(int j = 0; j<N-1; j++){
				if(A[j+1][i]<A[j][i]){
					b = true;
					break;
				}
			}
		}
		if(!b) printf("YES\n");
		else printf("NO\n");
	}
	return 0;
}

Sample Input:

1
5
ebacd
fghij
olmkn
trpqs
xywuv

Sample Output:

YES

Hackerrank Flowers Solution in C++

#include<cstdio>
#include<vector>
#include<map>
#include<algorithm>
#include<queue>

using namespace std;
#define p(x) printf("%d\n", x)

int main(){
	int N, K, s; scanf("%d %d", &N, &K);
	priority_queue<int> pq;
	for(int n = 0; n<N; n++) scanf("%d", &s), pq.push(s);
	int x = 1, iter = 0, sum = 0;
	while(!pq.empty()){
		sum += x*pq.top();
		pq.pop();
		iter++;
		if(iter%K==0) x++;
	}
	p(sum);
	return 0;
}

Sample Input:

3 3
2 5 6

Sample Output:

13

Single Link Clustering in Python

This post shows a simple and sweet implementation of Union-Find by Rank with Path Compression Data Structure in python. Which is used to solve the problem of single link clustering, whose Algorithm is in-turn similar to the Kruskal’s Algorithm of Minimum Spanning Tree.

import heapq

class UF(object):
	
	"""
	Implementation of union-find algorithm with path compression
	Instance Variables:
		@p: parent list
		@rank: rank of the given vertex, by its index
		@count: number of connected components
	"""
	def __init__(self, N):
		super(UF, self).__init__()
		self.count = N
		self.p = [i for i in xrange(N)]
		self.rank = [0 for i in xrange(N)]
	
	def find(self, x):
		"""
		Returns the parent vertex of the given connected component
		"""
		
		assert x>=0 and x<len(self.p)
		while x!=self.p[x]:
			# Line implementing the path compression
			self.p[x] = self.p[self.p[x]]
			x = self.p[x]
		return x
	
	def connected(self, x, y):
		return find(x) == find(y)
	
	def union(self, x, y):
		"""
		Union by rank
		"""
		
		i = self.find(x); j = self.find(y)
		if i==j: return
		if self.rank[i] < self.rank[j]: self.p[i] = j
		elif self.rank[i] > self.rank[j]: self.p[j] = i
		else: self.p[j] = i; self.rank[i] += 1
		self.count -= 1


def main():
	N = int(raw_input())
	uf = UF(N)
	edges = []
	# Taking input till the there are edges in the input file
	while True:
		try:
			# Input is of the format "v1 v2 edge_weight"
			edge = map(int, raw_input().split())
			# Considering the vertice index starts from 0
			edges.append([edge[2], edge[0]-1, edge[1]-1])
		except Exception, e:
			break
	heapq.heapify(edges)
	# Keep on adding till the edge inserted gives 3 connected
	# components in the graph
	while uf.count!=3:
		edge = heapq.heappop(edges)
		uf.union(edge[1], edge[2])
	# The last edge which changed from 4 connected components to
	# 3 connected components
	print "Weight of the last edge: "+edge[0]

if __name__ == '__main__':
	main()

Sample Input:

4
1 2 6808
1 3 5250
1 4 74
2 3 5250
2 4 74
3 4 6808

Sample Output:

Weight of the last edge: 74