Huffman Coding
Algorithms
Huffman coding is a lossless data compression algorithm that assigns variable-length codes to characters based on their frequencies, with more frequent characters getting shorter codes.
Algorithm
The algorithm builds a binary tree where characters are leaves, and the path from root to leaf determines the code. More frequent characters are placed closer to the root.
Time Complexity
The time complexity is , where is the number of unique characters.
Implementation
import heapq
from collections import Counter, defaultdict
class Node:
def __init__(self, char, freq):
self.char = char
self.freq = freq
self.left = None
self.right = None
def __lt__(self, other):
return self.freq < other.freq
def build_huffman_tree(text):
# Calculate frequency of each character
frequency = Counter(text)
# Create a priority queue of nodes
priority_queue = [Node(char, freq) for char, freq in frequency.items()]
heapq.heapify(priority_queue)
# Build the Huffman Tree
while len(priority_queue) > 1:
left = heapq.heappop(priority_queue)
right = heapq.heappop(priority_queue)
merged = Node(None, left.freq + right.freq)
merged.left = left
merged.right = right
heapq.heappush(priority_queue, merged)
return priority_queue[0]
def generate_codes(node, prefix='', codebook=defaultdict()):
if node is not None:
if node.char is not None:
codebook[node.char] = prefix
generate_codes(node.left, prefix + '0', codebook)
generate_codes(node.right, prefix + '1', codebook)
return codebook
def huffman_encoding(text):
root = build_huffman_tree(text)
huffman_codes = generate_codes(root)
encoded_text = ''.join(huffman_codes[char] for char in text)
return encoded_text, huffman_codes
def huffman_decoding(encoded_text, huffman_codes):
reverse_codes = {{v: k for k, v in huffman_codes.items()}}
current_code = ''
decoded_text = ''
for bit in encoded_text:
current_code += bit
if current_code in reverse_codes:
decoded_text += reverse_codes[current_code]
current_code = ''
return decoded_text
# Example usage
if __name__ == "__main__":
text = "this is an example for huffman encoding"
# Encode the text
encoded_text, huffman_codes = huffman_encoding(text)
print("Encoded text:", encoded_text)
print("Huffman Codes:", huffman_codes)
# Decode the text
decoded_text = huffman_decoding(encoded_text, huffman_codes)
print("Decoded text:", decoded_text)