//huffman.cpp //Huffman Encoding (Data Compression) //Compiler used: Dev-C++ 4.9.9.2 //FB - 201011301 #include #include #include #include //for width() #include //for sprintf() #define HELP_ERROR 99 #define width_unit 5 using namespace std; // (Templated d-heap) (on dynamic array of pointers) // priority queue // min (root=min) ((balanced) d-tree on dynamic array) d-heap template class Queue { public: Queue(int d=2); //constructor ~Queue(void); //destructor void enq(T*); //enqueue (to push) T* deq(void); //dequeue (to pop) T* front(void); //the front element bool empty(void) const; //is empty? bool full(void) const; //is full? private: int back; //the last element in the queue T* *arr; //dynamic array int size; //current size of the queue array static const int SIZE=10; //size increment step size int D; //max number of children for a parent>=2 //copy constructor and assignment are hidden to protect Queue(const Queue &); const Queue & operator=(const Queue &); //utility functions to fix the heap //when an element added or removed void reheapup(int, int); //fix heap upward void reheapdown(int, int); //fix heap downward void swap(T* &, T* &); //swap f. needed by reheapup/down functions }; //end class // constructor (creates a new queue) template Queue::Queue(int d) { if(d<2) d=2; //min a 2-heap is supported D=d; back=0; size=SIZE; arr=new T*[size]; } // is queue empty? template bool Queue::empty(void) const { return (back<=0); } // is queue full? template bool Queue::full(void) const { return (back>=size); } // the front element of the queue template T* Queue::deq(void) { if(empty()) { cerr<<"deq error! exiting..."< T* Queue::front(void) { if(empty()) { cerr<<"deq error! exiting..."< void Queue::enq(T* foo) { if(full()) //if the array is full then make it larger { int nsize=size+SIZE; //the size of the new array T* *narr=new T*[nsize]; //new array for(int i=0;i void Queue::reheapup(int root, int bottom) { int parent; //parent node (in the virtual tree) of the bottom element if(bottom > root) { parent=(bottom-1)/D; //compare the two node and if the order is wrong then swap them //and make a recursive call to continue upward in the virtual tree //until the whole tree heap order is restored if(*arr[parent] > *arr[bottom]) { swap(arr[parent], arr[bottom]); reheapup(root, parent); } } } // this is a recursive function to fix back the order in the queue // downwards after a new element added front (root) of the queue template void Queue::reheapdown(int root, int bottom) { int minchild, firstchild, child; firstchild=root*D+1; //the position of the first child of the root if(firstchild <= bottom) //if the child is in the queue { minchild=firstchild; //first child is the min child (temporarily) for(int i=2;i <= D;++i) { child=root*D+i; //position of the next child if(child <= bottom) //if the child is in the queue { //if the child is less than the current min child //then it will be the new min child if(*arr[child] < *arr[minchild]) { minchild=child; } } } //if the min child found is less then the root(parent node) //then swap them and call reheapdown() recursively and //continue to fix the order in the virtual tree downwards if(*arr[root] > *arr[minchild]) { swap(arr[root], arr[minchild]); reheapdown(minchild, bottom); } } } // the values of 2 variables will be swapped template void Queue::swap(T* &a, T* &b) { T* c; c=a; a=b; b=c; } // destructor (because default dest. does not erase the array) template Queue::~Queue(void) { delete[] arr; } // Huffman Tree class Tree { private: class Node { public: unsigned int freq; unsigned char ch; Node *left, *right; //constructor Node(void) :freq(0), ch('\0'), left(NULL), right(NULL) {} }; Node *root; //copy cons. and assign. op. overload prototypes are private to //prevent them to be used Tree(const Tree &); //copy constructor const Tree & operator=(const Tree &); //assignment oper. overload void chop(Node * N); //destroys the tree void print(ostream &, Node *, int) const; //prints the tree void print(Node *, int) const; //prints the tree public: Tree(void); //constructor ~Tree(void); //destructor friend ostream & operator<<(ostream &, const Tree &); //utility functions to get or set class members unsigned int get_freq(void) const; unsigned char get_char(void) const; void set_freq(unsigned int); void set_char(unsigned char); Node* get_left(void) const; Node* get_right(void) const; void set_left(Node *); void set_right(Node *); Node* get_root(void) const; //comparison operator overloads to compare //2 objects of the class bool operator==(const Tree &) const; bool operator!=(const Tree &) const; bool operator<(const Tree &) const; bool operator>(const Tree &) const; bool operator<=(const Tree &) const; bool operator>=(const Tree &) const; //to get H. string of a given char void huf(Node *, unsigned char, string, string &) const; //outputs the H. char-string pairs list void huf_list(Node *, string) const; //to get char equivalent of a H. string (if exists) bool get_huf_char(string, unsigned char &) const; string print_char(Node *) const; //prints chars }; //constructor Tree::Tree(void) { Node* N=new Node; root=N; } //recursive func to delete the whole tree void Tree::chop(Node *N) { if(N) { chop(N->left); chop(N->right); delete N; } } //destructor for tree objects Tree::~Tree(void) { chop(root); } unsigned int Tree::get_freq(void) const { return root->freq; } unsigned char Tree::get_char(void) const { return root->ch; } void Tree::set_freq(unsigned int f) { root->freq=f; } void Tree::set_char(unsigned char c) { root->ch=c; } Tree::Node* Tree::get_left(void) const { return root->left; } Tree::Node* Tree::get_right(void) const { return root->right; } void Tree::set_left(Node* N) { root->left=N; } void Tree::set_right(Node* N) { root->right=N; } Tree::Node* Tree::get_root(void) const { return root; } //the recursive tree output (w/ respect to its graph) fn. void Tree::print(ostream & ost, Node * curr, int level) const { if(curr) //if the current node is not null then { print(ost,curr->right,level+1); //try to go to right node //output the node data w/ respect to its level ost<freq<left,level+1); //try to go to left node } } //the recursive tree print (w/ respect to its graph) fn. void Tree::print(Node * curr, int level) const { if(curr) //if the current node is not null then { print(curr->right,level+1); //try to go to right node //print the node data w/ respect to its level cout<freq<left,level+1); //try to go to left node } } //utility fn to output a tree ostream & operator<<(ostream &ost, const Tree &t) { t.print(ost, t.root, 1); return ost; } //the comparison operator overloads to compare 2 Huffman trees bool Tree::operator==(const Tree & T) const { return (root->freq == T.root->freq); } bool Tree::operator!=(const Tree & T) const { return (root->freq != T.root->freq); } bool Tree::operator<(const Tree & T) const { return (root->freq < T.root->freq); } bool Tree::operator>(const Tree & T) const { return (root->freq > T.root->freq); } bool Tree::operator<=(const Tree & T) const { return (root->freq <= T.root->freq); } bool Tree::operator>=(const Tree & T) const { return (root->freq >= T.root->freq); } //Huffman string finder (recursive func.) //input : a tree node to start the search, a char to find its // Huffman string equivalent, current Huffman string according to // position on the Huffman tree, and a string (by reference) to // copy the resulted full Huffman string end of the search //return: none (except Huffman string by reference) void Tree::huf(Node* N, unsigned char c, string str, string & s) const { if(N) //if the node is not null { //compare char of the leaf node and the given char if(!N->left && !N->right && N->ch == c) { s=str; //if the char is found then copy the H. string } else { //continue to search if the same char still not found huf(N->left, c, str+"0",s); huf(N->right, c, str+"1",s); } } } //Huffman char-string lister (recursive func.) //input : a tree node to start the search, current Huffman string according to // position on the Huffman tree //output: whole list of char-H. string code list of the H. tree void Tree::huf_list(Node* N, string str) const { if(N) //if the node is not null { if(!N->left && !N->right) //if it is a leaf node cout<left, str+"0"); huf_list(N->right, str+"1"); } } } //char finder with given Huffman string //input : a Huffman string to traverse on the H. tree and // a u. char by ref. to copy the char found //return: true if a char is found else false bool Tree::get_huf_char(string s, unsigned char & c) const { Node * curr=root; for(unsigned int i=0;ileft; if(s[i]=='1') //go to right in the H. tree curr=curr->right; } bool found=false; if(!curr->left && !curr->right) //if it is a leaf node { found=true; c=curr->ch; } return found; } //input : a H. tree node //return: the same char as string or if the char is not printable // then its octal representation in the ASCII char set string Tree::print_char(Node * N) const { string s=""; if(!N->left && !N->right) //if it is a leaf node { unsigned char c=N->ch; //if the char is not printable then output its octal ASCII code if(iscntrl(c) || c==32) //32:blank char { //calculate the octal code of the char (3 digits) char* cp=new char; for(int i=0;i<3;++i) { sprintf(cp,"%i",c%8); c-=c%8; c/=8; s=(*cp)+s; } s='/'+s; // adds \ in front of the octal code } else s=c; } return s; } //the given bit will be written to the output file stream //input : unsigned char i(:0 or 1 : bit to write ; 2:EOF) void huf_write(unsigned char i, ofstream & outfile) { static int bit_pos=0; //0 to 7 (left to right) on the byte block static unsigned char c='\0'; //byte block to write if(i<2) //if not EOF { if(i==1) c=c | (i<<(7-bit_pos)); //add a 1 to the byte else //i==0 c=c & static_cast(255-(1<<(7-bit_pos))); //add a 0 ++bit_pos; bit_pos%=8; if(bit_pos==0) { outfile.put(c); c='\0'; } } else { outfile.put(c); } } //input : a input file stream to read bits //return: unsigned char (:0 or 1 as bit read or 2 as EOF) unsigned char huf_read(ifstream & infile) { static int bit_pos=0; //0 to 7 (left to right) on the byte block static unsigned char c=infile.get(); unsigned char i; i=(c>>(7-bit_pos))%2; //get the bit from the byte ++bit_pos; bit_pos%=8; if(bit_pos==0) if(!infile.eof()) { c=infile.get(); } else i=2; return i; } //Huffman Encoder void encoder(string ifile, string ofile, bool verbose) { ifstream infile(ifile.c_str(), ios::in|ios::binary); if(!infile) { cerr< q(3); //use a 3-(priority)heap Tree* tp; for(int i=0;i<256;++i) { //output char freq table to the output file //divide 32 bit u. int values into 4 bytes outfile.put(static_cast(f[i]>>24)); outfile.put(static_cast((f[i]>>16)%256)); outfile.put(static_cast((f[i]>>8)%256)); outfile.put(static_cast(f[i]%256)); if(f[i]>0) { //send freq-char pairs to the priority heap as Huffman trees tp=new Tree; (*tp).set_freq(f[i]); (*tp).set_char(static_cast(i)); q.enq(tp); } } //construct the main Huffman Tree Tree* tp2; Tree* tp3; do { tp=q.deq(); if(!q.empty()) { //get the 2 lowest freq. H. trees and combine them into one //and put back into the priority heap tp2=q.deq(); tp3=new Tree; (*tp3).set_freq((*tp).get_freq()+(*tp2).get_freq()); (*tp3).set_left((*tp).get_root()); (*tp3).set_right((*tp2).get_root()); q.enq(tp3); } } while(!q.empty()); //until all sub-trees combined into one //find H. strings of all chars in the H. tree and put into a string table string H_table[256]; unsigned char uc; for(unsigned short us=0;us<256;++us) { H_table[us]=""; uc=static_cast(us); (*tp).huf((*tp).get_root(), uc, "", H_table[us]); } if(verbose) { cout<<*tp<=0;--k) { infile.get(c); ch=c; f[i]+=ch*(j<<(8*k)); } } //re-construct the Huffman tree Queue q(3); //use a 3-(priority)heap (again) Tree* tp; for(int i=0;i<256;++i) { if(f[i]>0) { //send freq-char pairs to the priority heap as Huffman trees tp=new Tree; (*tp).set_freq(f[i]); (*tp).set_char(static_cast(i)); q.enq(tp); } } //construct the main Huffman Tree (as in Encoder func.) Tree* tp2; Tree* tp3; do { tp=q.deq(); if(!q.empty()) { //get the 2 lowest freq. H. trees and combine them into one //and put back into the priority heap tp2=q.deq(); tp3=new Tree; (*tp3).set_freq((*tp).get_freq()+(*tp2).get_freq()); (*tp3).set_left((*tp).get_root()); (*tp3).set_right((*tp2).get_root()); q.enq(tp3); } } while(!q.empty()); //until all sub-trees combined into one if(verbose) { cout<<*tp<0) //continue until no char left to decode { st=""; //current Huffman string do { //read H. strings bit by bit ch=huf_read(infile); if(ch==0) st=st+'0'; if(ch==1) st=st+'1'; } //search the H. tree while(!(*tp).get_huf_char(st, ch2)); //continue until a char is found //output the char to the output file outfile.put(static_cast(ch2)); --total_chars; } infile.close(); outfile.close(); } //end of Huffman decoder void usage_msg ( const string & pname ) { cerr << "Usage: " << pname << " : valid flags are :" << endl << " -i input_file : required" << endl << " -o output_file : required" << endl << " -e : turn on encoding mode ( default )" << endl << " -d : turn on decoding mode" << endl << " -v : verbose mode" << endl << " -h : this help screen" << endl; } int main( int argc, char * argv[] ) { string in_name; string out_name; bool encode = true; bool verbose = false; for ( int i = 1 ; i < argc ; ++i ) { if ( !strcmp( "-h", argv[i] ) || !strcmp( "--help", argv[i] ) ) { usage_msg( argv[0] ); exit( HELP_ERROR ); } else if ( !strcmp( "-i", argv[i] ) ) { cerr << "input file is '" << argv[++i] << "'" << endl; in_name = argv[i]; } else if ( !strcmp( "-o", argv[i] ) ) { cerr << "output file is '" << argv[++i] << "'" << endl; out_name = argv[i]; } else if ( !strcmp( "-d", argv[i] ) ) { encode = false; } else if ( !strcmp( "-e", argv[i] ) ) { encode = true; } else if ( !strcmp( "-v", argv[i] ) ) { verbose = true; } } if ( !in_name.size() || !out_name.size() ) { cerr << "input and output file are required, nothing to do!" << endl; usage_msg( argv[0] ); exit( HELP_ERROR ); } if ( encode ) { cerr << "running in encoder mode" << endl; encoder( in_name, out_name, verbose ); } else { cerr << "running in decoder mode" << endl; decoder( in_name, out_name, verbose ); } cerr << "done .... " << endl; return 0; }