huffman压缩存储和解压缩源代码
Huffman编解码程序代码
fprintf('Reading data...\n')data=imread('Lenna.bmp');data=uint8(data);fprintf('Done!\n')fprintf('Comqressing data...\n')[zipped,info]=norm2huff(data);fprintf('Done!\n')fprintf('Comqressing data...\n')unzipped=huff2norm(zipped,info);fprintf('Done!\n')isOK=isequal(data(:),unzipped(:))whos data zipped unzipped%huffman编码function [zipped,info]=norm2huff(vector)if~isa(vector,'uint8'),error('input argument must be a uint8 vector');endvector=vector(:)';f=frequency(vector);simbols=find(f~=0);f=f(simbols);[f,sortindex]=sort(f);simbols=simbols(sortindex);len=length(simbols);simbols_index=num2cell(1:len);codeword_tmp=cell(len,1);while length(f)>1,index1=simbols_index{1};index2=simbols_index{2};codeword_tmp(index1)=addnode(codeword_tmp(index1),uint8(0));codeword_tmp(index2)=addnode(codeword_tmp(index1),uint8(1));f=[sum(f(1:2)) f(3:end)];simbols_index=[{[index1 index2]} simbols_index(3:end)];[f,sortindex]=sort(f);simbols_index=simbols(sortindex);endcodeword=cell(256,1);code(simbols)=codeword_tmp;len=0;for index=1:length(vector),len=len+length(codeword{double(vector(index))+1});endstring=repmat(uint8(0),1,len);pointer=1;for index=1:length(vector),code=codeword{double(vector(index))+1};len=length(code);string(pointer+(0:len-1))=code;poiter=poiter+len;endlen=length(string);pad=8-mod(len,8);if pad>0,string=[string uint8(zeros(1,pad))];endcodeword=codeword(simbols);codelen=zeros(size(codeword));weights=2.^(0:23);maxcodelen=0;for index=1:length(codeword),len=length(codeword{index});if len>maxcodelen,maxcodelen=len;endif len>0,code=sum(weights(codeword{index}==1));code=bitset(code,len+1);codeword(index)=code;codelen(index)=len;endendcodeword=[codeword{:}];cols=length(string)/8;string=reshape(string,8,cols);weights=2.^(0:7);zipped=uint8(weights*double(string));huffcodes=sparse(1,1);for index=1:numel(codeword),huffcodes(codeword(index),1)=simbols(index); endinfo.pad=pad;info.huffcodes=huffcodes;info.rato=cols./length(vector);info.length=length(vector);info.maxcodelen=maxcodelen;%Huffman解码function codeword_new=addnode(codeword_old,item)codeword_new=cell(size(codeword_old)); codeword_new{index}=[item codeword_old{index}];function vector=huff2norm(zipped,info)if~isa(zipped,'uint8'),error('input argument must be a uint8 vector'); endlen=length(zipped);string=repmat(uint8(0),1,len.*8);bitindex=1:8;for index=1:len,string(bitindex+8.*(index-1)),unit8(bitget(zipped(index),bitindex));endstring=logical(string(:)');len=length(string);string((len-info.pad.1+1):end)=[];len=length(string);weights=2>^(0:51);vector=repmat(uint8(0),1info.length);vectorindex=1;codeindex=1;code=0;for index=1:len,code=bitset(code,codeindex,string(index));codeindex=codeindex+1;byte=decode(bitset(code,codeindex),info);if byte>0,vector(vectorindex)=byte-1;codeindex=1;code=0;vectorindex=vectorindex+1;endendfunction byte=decode(code,info)byte=info.huffcode(code);%计算元素出现概率function f=frequency(vector)if~isa(vector,'uint8'),error('input argument must be a unit8 vector'); endf=repmat(0,1,256);len=length(vector);for index=1:255,f(index+1)=sum(vector==uint8(index)); endf=f./len;。
huffman编码译码实现文件的压缩与解压.
数据结构课程设计题目名称:huffman编码与解码实现文件的压缩与解压专业年级:组长:小组成员:指导教师:二〇一二年十二月二十六日目录一、目标任务与问题分析 (2)1.1目标任务 (2)1.2问题分析 (2)二、算法分析 (2)2.1构造huffman树 (2)2.1.1 字符的统计 (2)2.1.2 huffman树节点的设计 (2)2.2构造huffman编码 (3)2.2.1 huffman编码的设计 (3)2.3 压缩文件与解压文件的实现 (3)三、执行效果 (4)3.1界面 (4)3.2每个字符的编码 (4)3.3操作部分 (5)3.4文件效果 (6)四、源程序 (7)五、参考文献 (16)huffman编码与解码实现文件的压缩与解压一、目标任务与问题分析1.1目标任务采用huffman编码思想实现文件的压缩和解压功能,可以将任意文件压缩,压缩后也可以解压出来。
这样即节约了存储空间,也不会破坏文件的完整性。
1.2问题分析本问题首先应该是利用哈夫曼思想,对需要压缩的文件中的个字符进行频率统计,为了能对任意的文件进行处理,应该所有的文件以二进制的方式进行处理,即对文件(不管包含的是字母还是汉字)采取一个个的字节处理,然后根据统计的频率结果构造哈夫曼树,然后对每个字符进行哈夫曼编码,然后逐一对被压缩的文件的每个字符构建的新的哈夫曼编码存入新的文件中即得到的压缩文件。
解压过程则利用相应的哈夫曼树及压缩文件中的二进制码将编码序列译码,对文件进行解压,得到解压文件。
二、算法分析2.1构造huffman树要利用哈夫曼编码对文本文件进行压缩,首先必须知道期字符相应的哈夫曼编码。
为了得到文件中字符的频率,一般的做法是扫描整个文本进行统计,编写程序统计文件中各个字符出现的频率。
由于一个字符的范围在[0-255]之间,即共256个状态,所以可以直接用256个哈夫曼树节点即数组(后面有节点的定义)空间来存储整个文件的信息,节点中包括对应字符信息,其中包括频率。
哈夫曼树与文件解压压缩C言代码
1.问题描述哈弗曼树的编码与译码—功能:实现对任何类型文件的压缩与解码—输入:源文件,压缩文件—输出:解码正确性判定,统计压缩率、编码与解码速度—要求:使用边编码边统计符号概率的方法〔自适应Huffman编码〕和事先统计概率的方法〔静态Huffman编码〕2.1程序清单程序书签:1.main函数2.压缩函数3.select函数4.encode函数5.解压函数#include <stdio.h>#include <string.h>#include <stdlib.h>#include <conio.h>#include <time.h>struct node{long weight; //权值unsigned char ch;//字符int parent,lchild,rchild;char code[256];//编码的位数最多为256位int CodeLength;//编码长度}hfmnode[512];void compress();void uncompress();//主函数void main(){int choice;printf("请选择1~3:\n");printf("1.压缩文件\n");printf("2.解压文件\n");printf("3.退出!\n");scanf("%d",&choice);if(choice==1)compress();else if(choice==2)uncompress();else if(choice==3)return;else printf("输入错误!");}//压缩函数void compress(){int i,j;char infile[20],outfile[20];FILE *ifp,*ofp;unsigned char c;//long FileLength,filelength=0;int n,m;//叶子数和结点数int s1,s2; //权值最小的两个结点的标号char codes[256];long sumlength=0;float rate,speed;int count=0;clock_t start1, start2,finish1,finish2;double duration1,duration2;void encode(struct node *nodep,int n);//编码函数int select(struct node *nodep,int pose);//用于建哈弗曼树中选择权值最小的结点的函数printf("请输入要压缩的文件名:");scanf("%s",infile);ifp=fopen(infile,"rb");if(ifp==NULL){printf("文件名输入错误,文件不存在!\n");return;}printf("请输入目标文件名:");scanf("%s",outfile);ofp=fopen(outfile,"wb");if(ofp==NULL){printf("文件名输入错误,文件不存在!\n");return;}start1=clock() ;//开始计时1//统计文件中字符的种类以与各类字符的个数//先用字符的ASCII码值代替结点下标FileLength=0;while(!feof(ifp)){fread(&c,1,1,ifp);hfmnode[c].weight++;FileLength++;}FileLength--; //文件中最后一个字符的个数会多统计一次,所以要减一hfmnode[c].weight--;//再将ASCII转换为字符存入到结点的ch成员里,同时给双亲、孩子赋初值-1n=0;for(i=0;i<256;i++)if(hfmnode[i].weight!=0){hfmnode[i].ch=(unsigned char)i;n++;//叶子数hfmnode[i].lchild=hfmnode[i].rchild=hfmnode[i].parent=-1;}m=2*n-1;//哈弗曼树结点总数j=0;for(i=0;i<256;i++)//去掉权值为0的结点if(hfmnode[i].weight!=0){hfmnode[j]=hfmnode[i];j++;}for(i=n;i<m;i++)//初始化根结点{hfmnode[i].lchild=hfmnode[i].rchild=-1;hfmnode[i].parent=-1;}//建立哈弗曼树for(i=n;i<m;i++){s1=select(hfmnode,i-1);hfmnode[i].lchild=s1;hfmnode[s1].parent=i;s2=select(hfmnode,i-1);hfmnode[i].rchild=s2;hfmnode[s2].parent=i;hfmnode[i].weight=hfmnode[s1].weight+hfmnode[s2].weight;}//编码encode(hfmnode,n);finish1=clock();duration1=(double)(finish1- start1) / CLOCKS_PER_SEC;/*printf( "哈弗曼树编码用时为:%f seconds\n", duration1 );*/printf("编码完成,是否查看编码信息: y or n?\n");c=getch();if(c=='y'){ printf("\n");printf("叶子数为%d,结点数为%d\n",n,m);for(i=0;i<n;i++)printf("%d号叶子结点的权值为:%ld,双亲为:%d,左右孩子:%d,编码为:%s\n",i,hfmnode[i].weight,hfmnode[i].parent,hfmnode[i].lchild,hfmnode[i].code);}start2=clock() ;//开始计时2fseek(ifp,0,SEEK_SET);//将ifp指针移到文件开头位置fwrite(&FileLength,4,1,ofp);//将FileLength写入目标文件的前4个字节的位置fseek(ofp,8,SEEK_SET);//再将目标文件指针ofp移到距文件开头8个字节位置codes[0]=0;//将编码信息写入目标文件while(!feof(ifp)){fread(&c,1,1,ifp);filelength++;for(i=0;i<n;i++)if(c==hfmnode[i].ch) break; //ch必须也为unsigned 型strcat(codes,hfmnode[i].code);while(strlen(codes)>=8){for(i=0;i<8;i++)//将codes的前8位01代码表示的字符存入c{if(codes[i]=='1')c=(c<<1)|1;else c=c<<1;}fwrite(&c,1,1,ofp); //将新的字符写入目标文件sumlength++;strcpy(codes,codes+8);//更新codes的值}if(filelength==FileLength) break;}//再将剩余的不足8位的01代码补全8位,继续写入if(strlen(codes)>0){strcat(codes,"00000000");for(i=0;i<8;i++){if(codes[i]=='1')c=(c<<1)|1;else c=c<<1;}fwrite(&c,1,1,ofp);sumlength++;}sumlength+=8;printf("编码区总长为:%ld个字节\n",sumlength-8);//将sumlength和n的值写入目标文件,为的是方便解压fseek(ofp,4,SEEK_SET);fwrite(&sumlength,4,1,ofp);//把sumlength写进目标文件的第5-8个字节里fseek(ofp,sumlength,SEEK_SET);fwrite(&n,4,1,ofp);//把叶子数n写进编码段后面的4个字节的位置//为方便解压,把编码信息存入n后面的位置//存储方式为:n*〔字符值〔1个字节〕+该字符的01编码的位数〔1个字节〕+编码〔字节数不确定,用count来计算总值〕〕for(i=0;i<n;i++){fwrite(&(hfmnode[i].ch),1,1,ofp);c=hfmnode[i].CodeLength;//编码最长为256位,因此只需用一个字节存储fwrite(&c,1,1,ofp);//写入字符的编码if(hfmnode[i].CodeLength%8!=0)for(j=hfmnode[i].CodeLength%8;j<8;j++)//把编码不足8位的在低位补0,赋值给C,再把C写入strcat(hfmnode[i].code,"0");while(hfmnode[i].code[0]!=0)//开始存入编码,每8位二进制数存入一个字节{c=0;for(j=0;j<8;j++){if(hfmnode[i].code[j]=='1')c=(c<<1)|1;else c=c<<1;}strcpy(hfmnode[i].code,hfmnode[i].code+8);//编码前移8位,继续存入编码count++; //编码占的字节数的总值fwrite(&c,1,1,ofp);}}printf("\n");finish2=clock();duration2=(double)(finish2- start2) / CLOCKS_PER_SEC;/*printf( "写入目标文件用时为:%f seconds\n", duration2);*/ printf( "压缩用时为:%f seconds\n", duration1+duration2);speed=(float)FileLength/(duration1+duration2)/1000;printf("\n压缩速率为:%5.2f KB/S\n",speed);printf("\n");printf("源文件长度为:%ld个字节\n",FileLength);sumlength=sumlength+4+n*2+count; //计算压缩后文件的长度printf("压缩后文件长度为:%ld个字节\n",sumlength);rate=(float)sumlength/(float)FileLength;printf("压缩率(百分比)为:%4.2f%%%\n",rate*100);fclose(ifp);fclose(ofp);return;}//返回书签//建立哈弗曼树中用于选择最小权值结点的函数int select(struct node *nodep,int pose){int i;int s1;long min=2147483647;//s初值为long型的最大值for(i=0;i<=pose;i++){if(nodep[i].parent!=-1)continue;if(nodep[i].weight<min){min=nodep[i].weight;s1=i;}}return s1;}//返回书签//哈弗曼编码函数void encode(struct node *nodep,int n){ //从叶子向根求每个字符的哈弗曼编码int start;int i,f,c;char codes[256];codes[n-1]='\0'; //编码结束符for(i=0;i<n;i++) //逐个字符求哈弗曼编码{start=n-1;for(c=i,f=nodep[i].parent;f!=-1;c=f,f=nodep[f].parent){start--;if(nodep[f].lchild==c)codes[start]='0';else codes[start]='1';}strcpy(nodep[i].code,&codes[start]);nodep[i].CodeLength=strlen(nodep[i].code);}}//返回书签//解压函数void uncompress() //解压文件{clock_t start, finish;double duration;FILE *ifp,*ofp;char infile[20],outfile[20];long FileLength,sumlength,filelength;int n,m;int i,j,k;char buf[256],codes[256];unsigned char c;int maxlength;float speed;printf("请输入要解压的文件名:");scanf("%s",infile);ifp=fopen(infile,"rb");if(ifp==NULL){printf("文件名输入错误,文件不存在!\n");return;}printf("请输入目标文件名:");scanf("%s",outfile);ofp=fopen(outfile,"wb");if(ofp==NULL){printf("文件名输入错误,文件不存在!\n");return;}start=clock() ;//开始计时fread(&FileLength,4,1,ifp);//从压缩文件读出FileLength、sumlengthfread(&sumlength,4,1,ifp);fseek(ifp,sumlength,SEEK_SET); //利用sumlength读出n的值fread(&n,4,1,ifp);printf("\n解码信息:源文件长度为%d个字节,字符种类n=%d\n",FileLength,n);for(i=0;i<n;i++)//读结点信息{fread(&hfmnode[i].ch,1,1,ifp);//字符fread(&c,1,1,ifp);//编码长度hfmnode[i].CodeLength=c;hfmnode[i].code[0]=0;if(hfmnode[i].CodeLength%8>0) m=hfmnode[i].CodeLength/8+1;//m为编码占的字节数else m=hfmnode[i].CodeLength/8;for(j=0;j<m;j++)//根据字节长度m读出编码{fread(&c,1,1,ifp);//此处c为01编码转换成的字符itoa(c,buf,2);//字符型编码转换成二进制型〔首位为1〕//如果编码不够8位,则说明缺少了8-k位0,因此应先在前面空缺位写0for(k=8;k>strlen(buf);k--){strcat(hfmnode[i].code,"0");}//再把二进制编码存进hfmnode.code中strcat(hfmnode[i].code,buf);}hfmnode[i].code[hfmnode[i].CodeLength]=0;//去掉编码中多余的0}//找出编码长度的最大值maxlength=0;for(i=0;i<n;i++)if(hfmnode[i].CodeLength>maxlength)maxlength=hfmnode[i].CodeLength;//开始写入目标文件fseek(ifp,8,SEEK_SET); //指针指向编码区,开始解码filelength=0;codes[0]=0;buf[0]=0;while(1){while(strlen(codes)<maxlength)//codes小于编码长度的最大值时,继续读码{fread(&c,1,1,ifp);itoa(c,buf,2);//还原编码for(k=8;k>strlen(buf);k--){strcat(codes,"0");//把缺掉的0补上}strcat(codes,buf);//codes中此时存的为一串01编码}for(i=0;i<n;i++){ //在codes中查找能使其前weight位和hfmnode.code相同的i值,weight 即为codelengthif(memcmp(hfmnode[i].code,codes,(unsignedint)hfmnode[i].CodeLength)==0) break;}strcpy(codes,codes+hfmnode[i].CodeLength);//更新codes的值c=hfmnode[i].ch;fwrite(&c,1,1,ofp);filelength++;if(filelength==FileLength) break;//写入结束}finish = clock();duration = (double)(finish - start) / CLOCKS_PER_SEC;printf( "\n解压完成,解压用时为:%f seconds\n", duration );fseek(ifp,0,SEEK_SET);FileLength=0;while(!feof(ifp)){fread(&c,1,1,ifp);FileLength++;}FileLength--;speed=(float)FileLength/duration/1000;/*printf("此文件长度为:%ld个字节\n",FileLength);*/printf("\n解压速度为:%5.2fKB/S\n",speed);fclose(ifp);fclose(ofp);return;}2.2程序运行结果:1.对文件〞测试.txt〞进行压缩,压缩后存储在文件〞目标.doc〞中,压缩速率为:2055.00KB/S,压缩率为64.92%。
哈夫曼编码译码代码
哈夫曼编码译码代码哈夫曼编码(Huffman Coding)是一种用于数据压缩的编码方法,通过对出现频率较高的字符使用较短的码字,对出现频率较低的字符使用较长的码字,从而实现数据的有效压缩。
以下是一个使用Java 实现哈夫曼编码和解码的示例代码:```javaimport java.util.ArrayList;import java.util.List;import java.util.PriorityQueue;public class HuffmanCoding {public static void main(String[] args) {String inputString = "This is a sample string";String encodedString = encode(inputString);String decodedString = decode(encodedString);System.out.println("Encoded String: " +encodedString);System.out.println("Decoded String: " + decodedString);}// 哈夫曼编码方法public static String encode(String inputString) {List<Character> characters = new ArrayList<>();List<Integer> frequencies = new ArrayList<>();for (char character : inputString.toCharArray()) {if (!characters.contains(character)) {characters.add(character);frequencies.add(1);} else {int index = characters.indexOf(character);frequencies.set(index, frequencies.get(index) + 1);}}// 创建最小堆,用于存储字符和频率PriorityQueue<CharacterFrequency> minHeap = new PriorityQueue<>();for (int i = 0; i < characters.size(); i++) {minHeap.add(new CharacterFrequency(characters.get(i), frequencies.get(i)));}// 构建哈夫曼树while (minHeap.size() > 1) {CharacterFrequency characterFrequency1 = minHeap.poll();CharacterFrequency characterFrequency2 = minHeap.poll();CharacterFrequency combinedCharacterFrequency = new CharacterFrequency(null,characterFrequency1.frequency + characterFrequency2.frequency);combinedCharacterFrequency.left = characterFrequency1;combinedCharacterFrequency.right = characterFrequency2;minHeap.add(combinedCharacterFrequency);}// 从根节点开始遍历哈夫曼树,生成编码StringBuilder encodedString = new StringBuilder();CharacterFrequency root = minHeap.poll();generateEncoding(root, encodedString);return encodedString.toString();}// 生成编码的辅助方法private static voidgenerateEncoding(CharacterFrequency characterFrequency, StringBuilder encodedString) {if (characterFrequency.left != null) {encodedString.append('0');generateEncoding(characterFrequency.left, encodedString);}if (characterFrequency.right != null) {encodedString.append('1');generateEncoding(characterFrequency.right, encodedString);}if (characterFrequency.character != null) {encodedString.append(characterFrequency.character);}}// 哈夫曼解码方法public static String decode(String encodedString) {List<Character> characters = new ArrayList<>();StringBuilder decodedString = new StringBuilder();int index = 0;while (index < encodedString.length()) {char c = encodedString.charAt(index);if (c == '0') {index++;CharacterFrequency characterFrequency = decodeNode(index, encodedString);characters.add(characterFrequency.character);} else if (c == '1') {index++;CharacterFrequency characterFrequency = decodeNode(index, encodedString);characters.add(characterFrequency.character);} else {characters.add(c);}}for (char character : characters.toCharArray()) {decodedString.append(character);}return decodedString.toString();}// 解码节点的辅助方法private static CharacterFrequency decodeNode(int index, String encodedString) {int numZeros = 0;while (encodedString.charAt(index) == '0') {numZeros++;index++;}int numOnes = 0;while (encodedString.charAt(index) == '1') {index++;}index--;CharacterFrequency characterFrequency = new CharacterFrequency(null,numZeros * numOnes);if (numZeros > 0) {characterFrequency.left = decodeNode(index - 1, encodedString);}if (numOnes > 0) {characterFrequency.right = decodeNode(index - 1, encodedString);}return characterFrequency;}// 字符频率类private static class CharacterFrequency {Character character;int frequency;CharacterFrequency left;CharacterFrequency right;public CharacterFrequency(Character character, int frequency) {this.character = character;this.frequency = frequency;}}// 字符频率比较器,用于构建最小堆private static class CharacterFrequencyComparator implements Comparator<CharacterFrequency> {@Overridepublic int compare(CharacterFrequencycharacterFrequency1, CharacterFrequency characterFrequency2) {return characterFrequency1.frequency - characterFrequency2.frequency;}}}```这段代码实现了哈夫曼编码和解码的功能。
哈夫曼树压缩文件代码
哈夫曼树压缩文件代码哈夫曼树压缩文件是一种常用的数据压缩技术,可以将大量重复出现的数据压缩成较小的文件,从而节省存储空间和传输带宽。
下面是一个简单的哈夫曼树压缩文件的代码示例:```pythonimport heapqimport osclass HuffmanCoding:def __init__(self, path):self.path = pathself.heap = []self.codes = {}self.reverse_codes = {}class HeapNode:def __init__(self, char, freq):self.char = charself.freq = freqself.left = Noneself.right = Nonedef __lt__(self, other):return self.freq < other.freqdef make_frequency_dict(self, text):frequency = {}for character in text:if character not in frequency:frequency[character] = 0frequency[character] += 1return frequencydef make_heap(self, frequency):for key in frequency:node = self.HeapNode(key, frequency[key])heapq.heappush(self.heap, node)def merge_codes(self):while len(self.heap) > 1:node1 = heapq.heappop(self.heap)node2 = heapq.heappop(self.heap)merged = self.HeapNode(None, node1.freq + node2.freq) merged.left = node1merged.right = node2heapq.heappush(self.heap, merged)def make_codes_helper(self, root, current_code):if root is None:returnif root.char is not None:self.codes[root.char] = current_codeself.reverse_codes[current_code] = root.charreturnself.make_codes_helper(root.left, current_code + '0') self.make_codes_helper(root.right, current_code + '1') def make_codes(self):root = heapq.heappop(self.heap)current_code = ''self.make_codes_helper(root, current_code)def get_encoded_text(self, text):encoded_text = ''for character in text:encoded_text += self.codes[character]return encoded_textdef pad_encoded_text(self, encoded_text):padding = 8 - len(encoded_text) % 8for i in range(padding):encoded_text += '0'padded_info = '{0:08b}'.format(padding)encoded_text = padded_info + encoded_textreturn encoded_textdef get_byte_array(self, padded_encoded_text):if len(padded_encoded_text) % 8 != 0:print('Encoded text not padded properly')exit(0)b = bytearray()for i in range(0, len(padded_encoded_text), 8):byte = padded_encoded_text[i:i + 8]b.append(int(byte, 2))return bdef compress(self):filename, file_extension = os.path.splitext(self.path)output_path = filename + '.bin'with open(self.path, 'r+') as file, open(output_path, 'wb') as output:text = file.read()text = text.rstrip()frequency = self.make_frequency_dict(text)self.make_heap(frequency)self.merge_codes()self.make_codes()encoded_text = self.get_encoded_text(text)padded_encoded_text =self.pad_encoded_text(encoded_text)b = self.get_byte_array(padded_encoded_text)output.write(bytes(b))print('Compressed')return output_pathdef remove_padding(self, padded_encoded_text):padded_info = padded_encoded_text[:8]padding = int(padded_info, 2)padded_encoded_text = padded_encoded_text[8:]encoded_text = padded_encoded_text[:-1 * padding]return encoded_textdef decode_text(self, encoded_text):current_code = ''decoded_text = ''for bit in encoded_text:current_code += bitif current_code in self.reverse_codes:character = self.reverse_codes[current_code]decoded_text += charactercurrent_code = ''return decoded_textdef decompress(self, input_path):filename, file_extension = os.path.splitext(self.path)output_path = filename + '_decompressed' + '.txt'with open(input_path, 'rb') as file, open(output_path, 'w') as output:bit_string = ''byte = file.read(1)while byte:byte = ord(byte)bits = bin(byte)[2:].rjust(8, '0')bit_string += bitsbyte = file.read(1)encoded_text = self.remove_padding(bit_string)decoded_text = self.decode_text(encoded_text)output.write(decoded_text)print('Decompressed')return output_path```使用此代码,您可以将文本文件压缩为二进制文件,并将其解压缩回原始文本格式。
java源程序 哈夫曼编码
java源程序哈夫曼编码实现Huffman编码需要编写一些代码,因此我将提供一个简单的Java程序,演示如何使用Huffman编码对文本进行压缩和解压缩。
这个例子仅用于演示基本概念,实际使用中可能需要进行更多的优化和错误处理。
```javaimport java.util.PriorityQueue;import java.util.HashMap;import java.util.Map;class HuffmanNode implements Comparable<HuffmanNode> {char data;int frequency;HuffmanNode left, right;public HuffmanNode(char data, int frequency) {this.data = data;this.frequency = frequency;}@Overridepublic int compareTo(HuffmanNode o) {return this.frequency - o.frequency;}}public class HuffmanCoding {private static Map<Character, String> huffmanCodes = new HashMap<>();public static void main(String[] args) {String inputText = "hello world";Map<Character, Integer> frequencyMap = buildFrequencyMap(inputText);HuffmanNode root = buildHuffmanTree(frequencyMap);generateHuffmanCodes(root, "", huffmanCodes);System.out.println("Original Text: " + inputText);String encodedText = encode(inputText);System.out.println("Encoded Text: " + encodedText);String decodedText = decode(encodedText, root);System.out.println("Decoded Text: " + decodedText);}private static Map<Character, Integer> buildFrequencyMap(String text) {Map<Character, Integer> frequencyMap = new HashMap<>();for (char c : text.toCharArray()) {frequencyMap.put(c, frequencyMap.getOrDefault(c, 0) + 1);}return frequencyMap;}private static HuffmanNode buildHuffmanTree(Map<Character, Integer> frequencyMap) { PriorityQueue<HuffmanNode> priorityQueue = new PriorityQueue<>();for (Map.Entry<Character, Integer> entry : frequencyMap.entrySet()) {priorityQueue.add(new HuffmanNode(entry.getKey(), entry.getValue()));}while (priorityQueue.size() > 1) {HuffmanNode left = priorityQueue.poll();HuffmanNode right = priorityQueue.poll();HuffmanNode mergedNode = new HuffmanNode('\0', left.frequency + right.frequency);mergedNode.left = left;mergedNode.right = right;priorityQueue.add(mergedNode);}return priorityQueue.poll();}private static void generateHuffmanCodes(HuffmanNode root, String code, Map<Character, String> huffmanCodes) {if (root != null) {if (root.left == null && root.right == null) {huffmanCodes.put(root.data, code);}generateHuffmanCodes(root.left, code + "0", huffmanCodes);generateHuffmanCodes(root.right, code + "1", huffmanCodes);}}private static String encode(String text) {StringBuilder encodedText = new StringBuilder();for (char c : text.toCharArray()) {encodedText.append(huffmanCodes.get(c));}return encodedText.toString();}private static String decode(String encodedText, HuffmanNode root) {StringBuilder decodedText = new StringBuilder();HuffmanNode current = root;for (char bit : encodedText.toCharArray()) {if (bit == '0') {current = current.left;} else if (bit == '1') {current = current.right;}if (current.left == null && current.right == null) {decodedText.append(current.data);current = root;}}return decodedText.toString();}}```请注意,这只是一个简单的示例,实际上Huffman编码可能涉及到更多的细节和考虑因素。
使用huffman编码压缩与解压缩(python)
使⽤huffman编码压缩与解压缩(python)⼀、huffman 编码1.1 huffman 编码介绍哈夫曼编码(Huffman Coding),⼜称霍夫曼编码,是⼀种编码⽅式,哈夫曼编码是可变字长编码(VLC)的⼀种。
Huffman于1952年提出⼀种编码⽅法,该⽅法完全依据字符出现概率来构造异字头的平均长度最短的码字,有时称之为最佳编码,⼀般就叫做Huffman编码(有时也称为霍夫曼编码)huffman 编码是最优码,也是即时码(证明较为复杂,在此不给出证明)1.2 huffman 编码此处介绍⼆元 huffman 编码给定⼀个字符集合 S={s0,s1,⋯,s q},每个字符的概率为 P={p0,p1,⋯,p q}将字符集合按概率由⼤到⼩重新排列,即使得p i≥p i+1将最末尾的两个字符s q−1和s q合并,记为s′,s′的概率为p′=p q−1+p q如果剩余的字符数不为 1,则回到第 1 步huffman 编码的过程⼤致就上述三步当编码完毕后,会构建出⼀棵码树,每个码字的编码可以从码树中获得举个简单的例⼦,对于字符集合 S={A,B,C},概率为 P=0.5,0.3,0.2将B,C合并,记为BC,p(BC)=p(B)+p(C)=0.3+0.2=0.5然后将A,BC合并,记为ABC,p(ABC)=p(A)+p(BC)=0.5+0.5=1记根节点 ABC 码字为空,从根节点向下遍历,往左⾛码字末尾添0,往右⾛添1,那么 A, B, C 的码字分别为 0, 10, 11,编码表就是 {A:0,B:10,C:11}1.3 huffman 编程实现见后⽂的代码实现1.4 测试⾸先在代码⽬录下新建⼀个newfile.txt,⾥边写⼊ABCDEFG下⾯的代码实现了读取newfile.txt并对其压缩,将结果输出⾄output.enc然后对output.enc进⾏解压缩,将解压缩结果输出⾄output.dec# 读⼊⽂件with open('newfile.txt', 'rb') as fp_in:str_bytes = fp_in.read()# 构建huffman编码fre_dic = bytes_fre(str_bytes)huffman_dic = build(fre_dic)# 对⽂本进⾏编码str_enc, padding = encode(str_bytes, huffman_dic, False)# 输出⾄⽂件 output.encwith open('output.enc', 'wb') as fp_out:fp_out.write(str_enc)# 对编码后的⽂本进⾏解码str_dec = decode(str_enc, huffman_dic, padding, False)# 输出⾄⽂件 output.decwith open('output.dec', 'wb') as fp_out:fp_out.write(str_dec)# 打印huffman字典和填充位print('huffman_dic:', huffman_dic)print('padding:', padding)观察压缩前和压缩后的⽂件,发现经过我们的程序压缩之后,⽂件⼩了很多使⽤ winhex 打开 newfile.txt, output.enc, output.dec 查看对应的⼗六进制数值观察 newfile.txt 和 output.dec 的⼗六进制数值,发现⼀模⼀样,说明我们的压缩和解压并没有问题,程序正确接下来来分析 output.enc ⽂件的内容output.enc 中的数值为 C1 4E 50,转化成⼆进制为 11000001 01001110 01010000Loading [MathJax]/jax/output/HTML-CSS/jax.js我们将中间构造出的编码表打印出来,可以得到编码表字符A B C D E F G编码11000001010011100101以及填充位 padding 的值为 4我们对 newfile.txt 中的字符 ABCDEFG 进⾏编码,结果为 11 000 001 010 011 100 101。
哈夫曼压缩解压算法源码
_inc = 1L << 24; _inc >>= _maxlev; _mask.lint.lng = 0; for (i=_maxlev; i>0; i--) {
int _n_;
struct _heap { long count; int node;
} _heap[ END+2 ];
#define hmove(a,b) {(b).count = (a).count; (b).node = (a).node;}
int ntEncode( outbuff,inbuff,buflen ) char *outbuff; char *inbuff; int buflen; {
outbuff = malloc( len ); if( outbuff == NULL ){
puts( "Not enough memory" ); exit( 1 ); } printf( "Packing %s to ",fname ); ch = *fname; *fname = '$'; printf( "%s... ",fname ); arc = ntEncode( outbuff,inbuff,len ); if( arc == 0 ){ puts( "Encode failure" ); exit( 1 ); } f = _creat( fname,FA_ARCH ); if( f < 0 ){ puts( "Create error" ); exit( 1 ); } er = _write( f,&ch,1 ); er = _write( f,outbuff,arc ); if( er < 0 ){ puts( "Write error" ); exit( 1 ); } _close( f ); printf( "Success. %.2f%%\n",((float)(len-arc)/(float)len)*100.0 ); } else{ len = ntGetorig( inbuff+1 ); outbuff = malloc( len ); if( outbuff == NULL ){ puts( "Not enough memory" ); exit( 1 ); } printf( "Unpacking %s to ",fname ); *fname = *inbuff; printf( "%s... ",fname ); arc = ntDecode( outbuff,inbuff+1 ); if( arc == 0 ){ puts( "Decode failure" ); exit( 1 ); } f = _creat( fname,FA_ARCH );
哈夫曼树与文件解压压缩C言代码
哈夫曼树与文件解压压缩C言代码1.问题描述哈弗曼树的编码与译码—功能:实现对任何类型文件的压缩与解码—输入:源文件,压缩文件—输出:解码正确性判定,统计压缩率、编码与解码速度—要求:使用边编码边统计符号概率的方法(自适应Huffman编码)和事先统计概率的方法(静态Huffman编码)2.1程序清单程序书签:1.main函数2.压缩函数3.select函数4.encode函数5.解压函数#include <stdio.h>#include <string.h>#include <stdlib.h>#include <conio.h>#include <time.h>struct node{long weight; //权值unsigned char ch;//字符int parent,lchild,rchild;char code[256];//编码的位数最多为256位int CodeLength;//编码长度}hfmnode[512];void compress();void uncompress();//主函数void main(){int choice;printf("请选择1~3:\n");printf("1.压缩文件\n");printf("2.解压文件\n");printf("3.退出!\n");scanf("%d",&choice);if(choice==1)compress();else if(choice==2)uncompress();else if(choice==3)return;else printf("输入错误!");}//压缩函数void compress(){int i,j;char infile[20],outfile[20];FILE *ifp,*ofp;unsigned char c;//long FileLength,filelength=0;int n,m;//叶子数和结点数int s1,s2; //权值最小的两个结点的标号char codes[256];long sumlength=0;float rate,speed;int count=0;clock_t start1, start2,finish1,finish2;double duration1,duration2;void encode(struct node *nodep,int n);//编码函数int select(struct node *nodep,int pose);//用于建哈弗曼树中选择权值最小的结点的函数printf("请输入要压缩的文件名:");scanf("%s",infile);ifp=fopen(infile,"rb");if(ifp==NULL){printf("文件名输入错误,文件不存在!\n");return;}printf("请输入目标文件名:");scanf("%s",outfile);ofp=fopen(outfile,"wb");if(ofp==NULL){printf("文件名输入错误,文件不存在!\n");return;}start1=clock() ;//开始计时1//统计文件中字符的种类以及各类字符的个数//先用字符的ASCII码值代替结点下标FileLength=0;while(!feof(ifp)){fread(&c,1,1,ifp);hfmnode[c].weight++;FileLength++;}FileLength--; //文件中最后一个字符的个数会多统计一次,所以要减一hfmnode[c].weight--;//再将ASCII转换为字符存入到结点的ch成员里,同时给双亲、孩子赋初值-1n=0;for(i=0;i<256;i++)if(hfmnode[i].weight!=0){hfmnode[i].ch=(unsigned char)i;n++;//叶子数hfmnode[i].lchild=hfmnode[i].rchild=hfmnode[i].parent=-1;}m=2*n-1;//哈弗曼树结点总数j=0;for(i=0;i<256;i++)//去掉权值为0的结点if(hfmnode[i].weight!=0){hfmnode[j]=hfmnode[i];j++;}for(i=n;i<m;i++)//初始化根结点{hfmnode[i].lchild=hfmnode[i].rchild=-1;hfmnode[i].parent=-1;}//建立哈弗曼树for(i=n;i<m;i++){s1=select(hfmnode,i-1);hfmnode[i].lchild=s1;hfmnode[s1].parent=i;s2=select(hfmnode,i-1);hfmnode[i].rchild=s2;hfmnode[s2].parent=i;hfmnode[i].weight=hfmnode[s1].weight+hfmnode[s2].weight;}//编码encode(hfmnode,n);finish1=clock();duration1=(double)(finish1- start1) / CLOCKS_PER_SEC;/*printf( "哈弗曼树编码用时为:%f seconds\n", duration1 );*/printf("编码完成,是否查看编码信息: y or n?\n");c=getch();if(c=='y'){ printf("\n");printf("叶子数为%d,结点数为%d\n",n,m);for(i=0;i<n;i++)printf("%d号叶子结点的权值为:%ld,双亲为:%d,左右孩子:%d,编码为:%s\n",i,hfmnode[i].weight,hfmnode[i].parent,hfmnode[i].lchild,hfmnode[i].code);}start2=clock() ;//开始计时2fseek(ifp,0,SEEK_SET);//将ifp指针移到文件开头位置fwrite(&FileLength,4,1,ofp);//将FileLength写入目标文件的前4个字节的位置fseek(ofp,8,SEEK_SET);//再将目标文件指针ofp移到距文件开头8个字节位置codes[0]=0;//将编码信息写入目标文件while(!feof(ifp)){fread(&c,1,1,ifp);filelength++;for(i=0;i<n;i++)if(c==hfmnode[i].ch) break; //ch必须也为unsigned 型strcat(codes,hfmnode[i].code);while(strlen(codes)>=8){for(i=0;i<8;i++)//将codes的前8位01代码表示的字符存入c{if(codes[i]=='1')c=(c<<1)|1;else c=c<<1;}fwrite(&c,1,1,ofp); //将新的字符写入目标文件sumlength++;strcpy(codes,codes+8);//更新codes的值}if(filelength==FileLength) break;}//再将剩余的不足8位的01代码补全8位,继续写入if(strlen(codes)>0){strcat(codes,"00000000");for(i=0;i<8;i++){if(codes[i]=='1')c=(c<<1)|1;else c=c<<1;}fwrite(&c,1,1,ofp);sumlength++;}sumlength+=8;printf("编码区总长为:%ld个字节\n",sumlength-8);//将sumlength和n的值写入目标文件,为的是方便解压fseek(ofp,4,SEEK_SET);fwrite(&sumlength,4,1,ofp);//把sumlength写进目标文件的第5-8个字节里fseek(ofp,sumlength,SEEK_SET);fwrite(&n,4,1,ofp);//把叶子数n写进编码段后面的4个字节的位置//为方便解压,把编码信息存入n后面的位置//存储方式为:n*(字符值(1个字节)+该字符的01编码的位数(1个字节)+编码(字节数不确定,用count来计算总值))for(i=0;i<n;i++){fwrite(&(hfmnode[i].ch),1,1,ofp);c=hfmnode[i].CodeLength;//编码最长为256位,因此只需用一个字节存储fwrite(&c,1,1,ofp);//写入字符的编码if(hfmnode[i].CodeLength%8!=0)for(j=hfmnode[i].CodeLength%8;j<8;j++)//把编码不足8位的在低位补0,赋值给C,再把C写入strcat(hfmnode[i].code,"0");while(hfmnode[i].code[0]!=0)//开始存入编码,每8位二进制数存入一个字节{c=0;for(j=0;j<8;j++){if(hfmnode[i].code[j]=='1')c=(c<<1)|1;else c=c<<1;}strcpy(hfmnode[i].code,hfmnode[i].code+8);//编码前移8位,继续存入编码count++; //编码占的字节数的总值fwrite(&c,1,1,ofp);}}printf("\n");finish2=clock();duration2=(double)(finish2- start2) / CLOCKS_PER_SEC;/*printf( "写入目标文件用时为:%f seconds\n", duration2);*/printf( "压缩用时为:%f seconds\n", duration1+duration2);speed=(float)FileLength/(duration1+duration2)/1000;printf("\n压缩速率为:%5.2f KB/S\n",speed);printf("\n");printf("源文件长度为:%ld个字节\n",FileLength);sumlength=sumlength+4+n*2+count; //计算压缩后文件的长度printf("压缩后文件长度为:%ld个字节\n",sumlength);rate=(float)sumlength/(float)FileLength;printf("压缩率(百分比)为:%4.2f%%%\n",rate*100);fclose(ifp);fclose(ofp);return;}//返回书签//建立哈弗曼树中用于选择最小权值结点的函数int select(struct node *nodep,int pose){int i;int s1;long min=2147483647;//s初值为long型的最大值for(i=0;i<=pose;i++){if(nodep[i].parent!=-1)continue;if(nodep[i].weight<min){min=nodep[i].weight;s1=i;}}return s1;}//返回书签//哈弗曼编码函数void encode(struct node *nodep,int n){ //从叶子向根求每个字符的哈弗曼编码int start;int i,f,c;char codes[256];codes[n-1]='\0'; //编码结束符for(i=0;i<n;i++) //逐个字符求哈弗曼编码{start=n-1;for(c=i,f=nodep[i].parent;f!=-1;c=f,f=nodep[f].parent){start--;if(nodep[f].lchild==c)codes[start]='0';else codes[start]='1';}strcpy(nodep[i].code,&codes[start]);nodep[i].CodeLength=strlen(nodep[i].code);}}//返回书签//解压函数void uncompress() //解压文件{clock_t start, finish;double duration;FILE *ifp,*ofp;char infile[20],outfile[20];long FileLength,sumlength,filelength;int n,m;int i,j,k;char buf[256],codes[256];unsigned char c;int maxlength;float speed;printf("请输入要解压的文件名:");scanf("%s",infile);ifp=fopen(infile,"rb");if(ifp==NULL){printf("文件名输入错误,文件不存在!\n");return;}printf("请输入目标文件名:");scanf("%s",outfile);ofp=fopen(outfile,"wb");if(ofp==NULL){printf("文件名输入错误,文件不存在!\n");return;}start=clock() ;//开始计时fread(&FileLength,4,1,ifp);//从压缩文件读出FileLength、sumlengthfread(&sumlength,4,1,ifp);fseek(ifp,sumlength,SEEK_SET); //利用sumlength读出n的值fread(&n,4,1,ifp);printf("\n解码信息:源文件长度为%d个字节,字符种类n=%d\n",FileLength,n);for(i=0;i<n;i++)//读结点信息{fread(&hfmnode[i].ch,1,1,ifp);//字符fread(&c,1,1,ifp);//编码长度hfmnode[i].CodeLength=c;hfmnode[i].code[0]=0;if(hfmnode[i].CodeLength%8>0) m=hfmnode[i].CodeLength/8+1;//m为编码占的字节数else m=hfmnode[i].CodeLength/8;for(j=0;j<m;j++)//根据字节长度m读出编码{fread(&c,1,1,ifp);//此处c为01编码转换成的字符itoa(c,buf,2);//字符型编码转换成二进制型(首位为1)//如果编码不够8位,则说明缺少了8-k位0,因此应先在前面空缺位写0for(k=8;k>strlen(buf);k--){strcat(hfmnode[i].code,"0");}//再把二进制编码存进hfmnode.code中strcat(hfmnode[i].code,buf);}hfmnode[i].code[hfmnode[i].CodeLength]=0;//去掉编码中多余的0 }//找出编码长度的最大值maxlength=0;for(i=0;i<n;i++)if(hfmnode[i].CodeLength>maxlength)maxlength=hfmnode[i].CodeLength;//开始写入目标文件fseek(ifp,8,SEEK_SET); //指针指向编码区,开始解码filelength=0;codes[0]=0;buf[0]=0;while(1){while(strlen(codes)<maxlength)//codes小于编码长度的最大值时,继续读码{fread(&c,1,1,ifp);itoa(c,buf,2);//还原编码for(k=8;k>strlen(buf);k--){strcat(codes,"0");//把缺掉的0补上}strcat(codes,buf);//codes中此时存的为一串01编码}for(i=0;i<n;i++){ //在codes中查找能使其前weight位和hfmnode.code相同的i值,weight 即为codelengthif(memcmp(hfmnode[i].code,codes,(unsignedint)hfmnode[i].CodeLength)==0) break;}strcpy(codes,codes+hfmnode[i].CodeLength);//更新codes的值c=hfmnode[i].ch;fwrite(&c,1,1,ofp);filelength++;if(filelength==FileLength) break;//写入结束}finish = clock();duration = (double)(finish - start) / CLOCKS_PER_SEC;printf( "\n解压完成,解压用时为:%f seconds\n", duration );fseek(ifp,0,SEEK_SET);FileLength=0;while(!feof(ifp)){fread(&c,1,1,ifp);FileLength++;}FileLength--;speed=(float)FileLength/duration/1000;/*printf("此文件长度为:%ld个字节\n",FileLength);*/printf("\n解压速度为:%5.2fKB/S\n",speed);fclose(ifp);fclose(ofp);return;}2.2程序运行结果:1.对文件”测试.txt”进行压缩,压缩后存储在文件”目标.doc”中,压缩速率为:2055.00KB/S,压缩率为64.92%。
huff算法的演示程序java语言实现
huff算法的演示程序java语言实现下面是一个使用Java语言实现的Huffman算法演示程序,可以实现压缩和解压数据的功能:```javapackage com.wqc.tree.huffmancode;import java.io.*;import java.util.*;import java.util.regex.Matcher;import java.util.regex.Pattern;/*** @author 高瞻远瞩* @version 1.0* @motto 算法并不可怕,可怕的是你不敢面对它,加油!别浮躁~冲击大厂* 哈夫曼编码的压缩数据:1,将发送的数据转换成哈夫曼树2 将生成的哈夫曼树转换成哈夫曼编码表规定向左为0 向右为1 是按照补码进行编码* 比如10101000是补码形式对应的原码(符号位不变其他位取反末尾+1)--》11011000 对应的十进制-->-883,把发送的数据根据哈夫曼表转换得到成二进制的数据然后将二进制数据每8位作为一个byte位存放到一个字节数组进行发送* 根据收到的字节数组进行哈夫曼编码解压:1,将字节数组根据哈夫曼编码表得到二进制的字符串序列2,将哈夫曼编码表的k和v交换3,扫描得到的二进制字符串序列对照交换后的哈夫曼编码表根据扫描到的字符串作为key得到对应的value(ascii码) 存放到字节数组返回* 注意事项:如果只是读取文件内容进行编码和解码在控制台输出的化需要考虑最后一位byte位的情况即需要190~192行的代码* 如果是解压文件则不能加上那段代码会报内存不足的异常*/public class HuffmanCode {private static HashMap<Byte, String> hm = new HashMap<>();// k-->字符 v-->哈夫曼编码比如:32=01。
三进制huffman编码代码
一、引言三进制Huffman编码是一种用于数据压缩的技术,它可以将数据以更高效的方式进行存储和传输。
本文将对三进制Huffman编码的原理和实现代码进行详细介绍。
二、Huffman编码原理1. Huffman编码的基本概念Huffman编码是一种变长编码方式,它根据信息的统计特性来构建不同长度的编码,以实现数据的高效压缩。
在Huffman编码中,出现频率高的字符用较短的编码表示,而出现频率低的字符用较长的编码表示,这样可以减少数据的传输量,从而实现数据压缩的目的。
2. 三进制Huffman编码的特点与传统的二进制Huffman编码不同,三进制Huffman编码适用于对三进制数据进行编码压缩。
在三进制Huffman编码中,数据由0、1、2三种符号组成,因此需要重新设计Huffman编码树和编码表来适应这种特殊情况。
三、三进制Huffman编码的实现1. 构建Huffman编码树我们需要根据数据的统计特点来构建Huffman编码树。
对于三进制数据,我们可以统计每种符号出现的频率,并将其构建成一个优先队列(或最小堆),然后通过不断合并权值最小的两个节点来构建Huffman编码树。
2. 生成编码表在构建好Huffman编码树之后,我们可以通过遍历这棵树来生成每个符号对应的编码。
在遍历的过程中,我们可以使用递归或者迭代的方法来获得每个符号的编码,并将其存储在一个编码表中以便后续的编码和解码操作。
3. 编码和解码操作通过生成的编码表,我们可以将原始的三进制数据编码成对应的Huffman编码,或者将Huffman编码解码成原始的三进制数据。
在编码和解码的过程中,需要注意处理边界情况和错误输入,并保证编码和解码的正确性和高效性。
四、三进制Huffman编码的应用三进制Huffman编码在实际中有着广泛的应用,特别是在存储和传输三进制数据时。
通过三进制Huffman编码,可以大大减少数据的传输量,提高数据传输的效率,从而节省存储和带宽成本。
c++哈夫曼树的文件压缩解压程序全部代码及设计报告
#include <iostream>#include <fstream>#include <queue> //队列容器using namespace std;const int leaf = 256; //最多可能出现的不同字符数const long MAX = 99999999; //表示无穷大typedef struct HTnode{long weight; //记录结点的权值int parent; //记录结点的双亲结点位置int lchild; //结点的左孩子int rchild; //结点的右孩子int *code; //记录该结点的huffman编码int codelen; //记录该结点huffman编码的长度HTnode(){weight = MAX;parent = -1;lchild = -1;rchild = -1;codelen = 0;}}HTnode;class huffmanTree{public:huffmanTree();virtual ~huffmanTree();bool count(char *input); //统计各字符出现的次数,将其写入对应结点的权值void create(); //构造huffman树void code(); //计算每个字符的huffman编码void addbit(int bit); //压缩时对一个未满8个bit的byte中加入一个bitvoid resetbyte(); //将byte清空bool compress(char *input, char *output); //压缩函数成功执行返回true 失败falsebool decompress(char *input, char *output); //解压函数成功执行返回true 失败falsevoid compare(char *input, char *output); //将原文件与压缩后的文件比较private:int root; //记录根结点的位置int leafnum; //记录不同字符的个数HTnode HT[leaf*2-1]; //HTnode结构的数组,用来表示huffman树,树的最大结点个数不会超过leaf*2-1char byte; //压缩文件时用来缓冲bit的变量int bitsnum; //byte中bit的个数int lacknum; //压缩到最后byte中的bit不满8个时填充的0的个数};huffmanTree::huffmanTree(){//初始化成员变量root = 0;leafnum = 0;byte = 0;bitsnum = 0;lacknum = 0;}huffmanTree::~huffmanTree(){for(int i=0; i<leaf; i++){if(HT[i].codelen != 0)delete []HT[i].code;}}//统计各字符出现的次数bool huffmanTree::count(char *input){ifstream ifs;char c;ifs.open(input,ios::binary);if(!ifs){cout << "无法打开文件" << input << '!' << endl;return false;}while(ifs.get(c)){if(HT[c+128].weight==MAX){ //若该字符是第一次出现,先初始化权值HT[c+128].weight = 0;leafnum++;}HT[c+128].weight++; //权值+1}ifs.close();return true;}//选权值最小的两棵树组成新的数void huffmanTree::create(){for(int i=leaf; i<2*leaf-1; i++){int loc1=-1, loc2=-1;for(int j=0; j<i; j++){if(HT[j].parent != -1)continue;if(loc1==-1 || HT[j].weight < HT[loc1].weight){loc2 = loc1;loc1 = j;}else if(loc2==-1 || HT[j].weight < HT[loc2].weight)loc2 = j;}if(HT[loc1].weight==MAX || HT[loc2].weight==MAX || loc2==-1) //只剩一棵树,结束break;HT[i].weight = HT[loc1].weight + HT[loc2].weight;//为了减少压缩文件中需要写入的huffman树的信息,约定小标小的结点做为双亲结点的左孩子HT[i].lchild = loc1>loc2 ? loc2 : loc1;HT[i].rchild = loc1>loc2 ? loc1 : loc2;HT[loc1].parent = i; HT[loc2].parent = i;root = i;}}//计算每个字符的huffman编码void huffmanTree::code(){for(int i=0; i<leaf; i++){int len=0;int loc=i;while(HT[loc].parent!=-1){ //计算huffman编码长度len++;loc = HT[loc].parent;}HT[i].codelen = len;HT[i].code = new int[len];loc = i;for(int j=len-1; j>=0; j--){ //从后往前找,记录结点的huffman编码if(loc==HT[HT[loc].parent].lchild)HT[i].code[j] = 0;elseHT[i].code[j] = 1;loc = HT[loc].parent;}}}//压缩时对一个未满8个bit的byte中加入一个bitvoid huffmanTree::addbit(int bit){if(bit == 0)byte = byte << 1; //若新增的bit为0,则直接将byte按位左移elsebyte = ((byte << 1) | 1); //若新增的bit为1,先将byte按位左移,再与1按位或运算bitsnum++;}//将byte清空void huffmanTree::resetbyte(){byte = 0;bitsnum = 0;}//压缩函数成功执行返回true 失败falsebool huffmanTree::compress(char *input, char *output){if( !count(input) )return false;create();code();ifstream ifs;ofstream ofs;ifs.open(input,ios::binary);ofs.open(output,ios::binary);char c;if(!ifs){cout << "无法打开文件" << input << '!' << endl;return false;}if(!ofs){cout << "无法打开文件" << output << '!' << endl;return false;}ofs.put(0); //预留一个字符,等压缩完后在该位置写入不足一个byte的bit个数ofs.put(root-384); //将根节点的位置-384写入(为使该值不超过char的最大表示范围)for(int i=0; i<leaf*2-1; i++){ //写入每个结点的双亲结点位置if(HT[i].parent==-1) //若该节点没有双亲结点,则写入127(一个字节所能表示的最大值)ofs.put(127);else //否则将双亲结点的位置-384再写入(为使该值不超过char的最大表示范围)ofs.put(HT[i].parent-384);}while(ifs.get(c)){ //将字符的huffman编码并加入byte中int tmp = c+128;for(int i=0; i<HT[tmp].codelen; i++){addbit(HT[tmp].code[i]);if(bitsnum==8){ //若byte已满8位,则输出该byte并将byte清空ofs.put(byte);resetbyte();}}}if(bitsnum!=0){ //处理最后未满8个字符的byte,用0填充并记录填充的个数for(int i=bitsnum; i<8; i++){addbit(0);lacknum++;}ofs.put(byte);resetbyte();}ofs.seekp(0,ios::beg); //将写指针移动到文件开头ofs.put(lacknum); //写入最后一个字节缺失的bit个数ifs.close();ofs.close();return true;}//解压函数成功执行返回true 失败falsebool huffmanTree::decompress(char *input, char *output){queue<char> q;char c;ifstream ifs;ofstream ofs;ifs.open(input,ios::binary);ofs.open(output,ios::binary);if(!ifs){cout << "无法打开文件" << input << '!' << endl;return true;}if(!ofs){cout << "无法打开文件" << output << '!' << endl;return false;}ifs.get(c);lacknum = c; //读出最后一个字节缺失的bit个数ifs.get(c);root = c+384; //读出根结点的位置for(int i=0; i<leaf*2-1; i++){ //建立各结点之间的双亲孩子关系ifs.get(c);if(c==127)continue;else{HT[i].parent = c+384;if(HT[c+384].lchild==-1)HT[c+384].lchild = i;elseHT[c+384].rchild = i;}}int point = root;//为了方便处理最后一个可能有缺失bit的字节,先将读出的数据放入队列while(ifs.get(c))q.push(c);//还原文件过程while(q.size()>1){ //还未到最后一个字节c = q.front();for(int i=0; i<8; i++){if(int(c&128)==0){point = HT[point].lchild;if(HT[point].lchild==-1 && HT[point].rchild==-1){ofs.put(char(point-128));point = root;}c = c << 1;}else{point = HT[point].rchild;if(HT[point].lchild==-1 && HT[point].rchild==-1){ofs.put(char(point-128));point = root;}c = c << 1;}}q.pop();}c = q.front(); //最后一个字节for(i=0; i<8-lacknum; i++){if(int(c&128)==0){point = HT[point].lchild;if(HT[point].lchild==-1 && HT[point].rchild==-1){ofs.put(char(point-128));point = root;}c = c << 1;}else{point = HT[point].rchild;if(HT[point].lchild==-1 && HT[point].rchild==-1){ofs.put(char(point-128));point = root;}c = c << 1;}}q.pop();ifs.close();ofs.close();return true;}//将原文件与压缩后的文件比较void huffmanTree::compare(char *input, char *output){ifstream origin, compress;origin.open(input,ios::binary);compress.open(output,ios::binary);if(!origin){cout << "无法打开文件" << input << '!' << endl;return;}if(!compress){cout << "无法打开文件" << output << '!' << endl;return;}double total1=0, total2=0;char c;while(origin.get(c))total1++;while(compress.get(c))total2++;cout << "原文件大小:" << total1 << " Byte" << endl;cout << "压缩后大小:" << total2 << " Byte" << endl;cout << "压缩率:" << total2/total1*100 << '%' << endl;origin.close();compress.close();}void main(){int choice = 1;char input[255], output[255];huffmanTree h;while(choice){cout<<" ****************************************************"<<endl;cout<<" * 基于哈夫曼树的文件压缩/解压程序*"<<endl;cout<<" * *"<<endl;cout<<" * 1) 压缩*"<<endl;cout<<" * *"<<endl;cout<<" * 2) 解压*"<<endl;cout<<" * *"<<endl;cout<<" * 0) 退出*"<<endl;cout<<" * *"<<endl;cout<<" * 说明:请输入相应的操作序号*"<<endl;cout<<" ****************************************************"<<endl;cout<<"请选择:";cin >> choice;switch(choice){case 1:{cout << "请输入待压缩的文件名:";cin >> input;cout << "请输入压缩后的文件名:";cin >> output;if( press(input,output)){pare(input,output);cout<<"文件压缩成功!"<<endl;}else{cout<<"文件压缩失败!"<<endl;}}break;case 2:{cout << "请输入待解压的文件名:";cin >> input;cout << "请输入解压后的文件名:";cin >> output;if (h.decompress(input,output))cout<<"文件解压成功!"<<endl;elsecout<<"文件解压失败!"<<endl;}break;case 0:break;default:cout << "参数错误!请重新输入" << endl;}cout << endl;}}韶关学院计算机科学学院数据结构课程设计题目:基于哈夫曼树的文件压缩/解压程序学生姓名:曹键明学号:11115011018专业:计算机科学与技术班级:11级(1)班指导教师姓名及职称:陈正铭讲师起止时间:2013 年3 月——2013 年4 月1 需求分析1.1课题背景及意义近年来,随着计算机技术的发展,多媒体计算机技术、计算机网络技术以及现代多媒体通信技术正在向着信息化、高速化、智能化迅速发展。
huffman压缩存储和解压缩源代码(精品文档)
fst.open(filename, ifstream::in|ifstream::binary); wfile(fst, pssfile, Bytecode); fst.close(); pssfile.close(); system("pause"); }
void HuffmanCoding(array<huffman, m> &HT, array<string, n> &code, array<double, n> freq) { int j; unsigned char cj = 0; for (j = 0; j < n; ++j) { HT[cj].freq = freq[cj]; HT[cj].lchild = 0; HT[cj].rchild = 0; HT[cj].parent = 0; ++cj; } int i,s1, s2; for (i= n ; i <m; ++i) { myselect(HT, i, s1, s2); HT[s1].parent = i; HT[s2].parent = i; HT[i].lchild = s1; HT[i].rchild = s2; HT[i].freq = HT[s1].freq + HT[s2].freq; } for (i = 0; i < n; ++i) { string cd(n-1, '0'); int start = n-1; int k, f; for (k = i, f = HT[i].parent; f != 0; k = f, f = HT[f].parent) if (HT[f].lchild == k) cd[--start] = '0'; else cd[--start] = '1'; string cds(cd.substr(start, (n-1) - start)); code[i] = cds; }
哈夫曼压缩解压算法源码
哈夫曼压缩解压算法源码HUFFMAN.C#include <alloc.h>#include <dos.h>#include <fcntl.h>main(argc, argv)int argc;char *argv[];{int f,er;int len,arc;char *inbuff;char *outbuff;char fname[80];char ch;if( argc < 2 ){puts( "Get me params !" );exit( 1 );}strcpy( fname,argv[1] );f = _open( fname,O_RDONLY );if( f < 0 ){puts( "Open error" );exit( 1 );}len = (int)filelength( f ); if( len < 0 ){puts( "File too large" ); exit( 1 );}inbuff = malloc( len );if( inbuff == NULL ){puts( "Not enough memory" ); exit( 1 );}len = _read( f,inbuff,len ); if( len < 0 ){puts( "Read error" );exit( 1 );}_close( f );if( !ntIsarc( inbuff+1 )){ outbuff = malloc( len );if( outbuff == NULL ){puts( "Not enough memory" ); exit( 1 );}printf( "Packing %s to ",fname );ch = *fname;*fname = '$';printf( "%s... ",fname );arc = ntEncode( outbuff,inbuff,len );if( arc == 0 ){puts( "Encode failure" );exit( 1 );}f = _creat( fname,FA_ARCH );if( f < 0 ){puts( "Create error" );exit( 1 );}er = _write( f,&ch,1 );er = _write( f,outbuff,arc );if( er < 0 ){puts( "Write error" );exit( 1 );}_close( f );printf( "Success. %.2f%%\n",((float)(len-arc)/(float)len)*100.0 ); }else{len = ntGetorig( inbuff+1 );outbuff = malloc( len );if( outbuff == NULL ){puts( "Not enough memory" );exit( 1 );}printf( "Unpacking %s to ",fname ); *fname = *inbuff;printf( "%s... ",fname );arc = ntDecode( outbuff,inbuff+1 ); if( arc == 0 ){puts( "Decode failure" );exit( 1 );}f = _creat( fname,FA_ARCH );if( f < 0 ){puts( "Create error" );exit( 1 );}er = _write( f,outbuff,len );if( er < 0 ){puts( "Write error" );exit( 1 );}_close( f );puts( "Success." );}}NTDECODE.C#define US 037#define RS 036char *_inp;char *_outp;int _origsize;short _dmaxlev;short _intnodes[25]; char *_tree[25]; char _characters[256]; char *_eof;int ntDecode( outbuff,inbuff )char *outbuff;char *inbuff;{int c, i, nchildren;int inleft;_eof = &_characters[0];if (inbuff[0] != US) return( 0 );if (inbuff[1] != RS) return( 0 );_inp = &inbuff[2];_origsize = 0;for (i=0; i<4; i++)_origsize = _origsize*256 + ((*_inp++) & 0377); inleft = _origsize;_dmaxlev = *_inp++ & 0377;if (_dmaxlev > 24) return( 0 );for (i=1; i<=_dmaxlev; i++)_intnodes[i] = *_inp++ & 0377;for (i=1; i<=_dmaxlev; i++) {_tree[i] = _eof;for (c=_intnodes[i]; c>0; c--) {if (_eof >= &_characters[255]) return( 0 );*_eof++ = *_inp++;}}*_eof++ = *_inp++;_intnodes[_dmaxlev] += 2;inleft -= _inp - &inbuff[0];if (inleft < 0) return( 0 );nchildren = 0;for (i=_dmaxlev; i>=1; i--) {c = _intnodes[i];_intnodes[i] = nchildren /= 2;nchildren += c;}return ( _decode( inleft,outbuff )); } _decode( inleft,outbuff )int inleft;char *outbuff;{int bitsleft, c, i;int j, lev;char *p;_outp = &outbuff[0];lev = 1;i = 0;while (1) {if (--inleft < 0) return( 0 );c = *_inp++;bitsleft = 8;while (--bitsleft >= 0) {i *= 2;if (c & 0200)i++;c <<= 1;if ((j = i - _intnodes[lev]) >= 0) { p = &_tree[lev][j];if (p == _eof){c = _outp - &outbuff[0];_origsize -= c;if (_origsize != 0) return( 0 );return (1);}*_outp++ = *p;lev = 1;i = 0;}elselev++;}}}NTENCODE.C#define END 256union FOUR {struct { long lng; } lint;struct { char c0, c1, c2, c3; } chars;};long _count[ END+1 ]; union FOUR _insize;int _diffbytes;int _maxlev;int _levcount[ 25 ]; int _lastnode;int _parent[ 2*END+1 ];char _length[ END+1 ]; long _bits[ END+1 ]; union FOUR _mask;long _inc;char *_maskshuff[4] = {&(_mask.chars.c3), &(_mask.chars.c2),&(_mask.chars.c1), &(_mask.chars.c0)};int _n_;struct _heap {long count;int node;} _heap[ END+2 ];#define hmove(a,b) {(b).count = (a).count; (b).node = (a).node;} int ntEncode( outbuff,inbuff,buflen ) char *outbuff;char *inbuff;int buflen;{register int c, i, p;long bitsout;_input( inbuff,buflen );_diffbytes = -1;_count[ END ] = 1;_insize.lint.lng = 0L;_n_ = 0;for (i=END; i>=0; i--) {_parent[i] = 0;if (_count[i] > 0) {_diffbytes++;_insize.lint.lng += _count[i];_heap[++_n_].count = _count[i];_heap[_n_].node = i;}}if (_diffbytes == 1) return( 0 );_insize.lint.lng >>= 1;for (i=_n_/2; i>=1; i--)_heapify(i);_lastnode = END;while (_n_ > 1) {_parent[_heap[1].node] = ++_lastnode; _inc = _heap[1].count;hmove (_heap[_n_], _heap[1]);_n_--;_heapify(1);_parent[_heap[1].node] = _lastnode; _heap[1].node = _lastnode;_heap[1].count += _inc;_heapify(1);}_parent[_lastnode] = 0;bitsout = _maxlev = 0;for (i=1; i<=24; i++)_levcount[i] = 0;for (i=0; i<=END; i++) {c = 0;for (p=_parent[i]; p!=0; p=_parent[p]) c++;_levcount[c]++;_length[i] = c;if (c > _maxlev)_maxlev = c;bitsout += c*(_count[i]>>1);}if (_maxlev > 24) return( 0 );_inc = 1L << 24;_inc >>= _maxlev;_mask.lint.lng = 0;for (i=_maxlev; i>0; i--) {for (c=0; c<=END; c++)if (_length[c] == i) {_bits[c] = _mask.lint.lng;_mask.lint.lng += _inc;}_mask.lint.lng &= ~_inc;_inc <<= 1;}return( _output( outbuff,inbuff,buflen )); }_input ( inbuff,buflen )char *inbuff;int buflen;{register int i;for (i=0; i<END; i++)_count[i] = 0;while (buflen > 0)_count[inbuff[--buflen]&0377] += 2; }int _output( outbuff,inbuff,buflen ) char *outbuff; char *inbuff;int buflen;{int c, i;char *inp;register char **q, *outp;register int bitsleft;long temp;outbuff[0] = 037; /* ascii US */outbuff[1] = 036; /* ascii RS */temp = _insize.lint.lng;for (i=5; i>=2; i--) {outbuff[i] = (char) (temp & 0377);temp >>= 8;}outp = outbuff+6;*outp++ = _maxlev;for (i=1; i<_maxlev; i++)*outp++ = _levcount[i];*outp++ = _levcount[_maxlev]-2;for (i=1; i<=_maxlev; i++)for (c=0; c<END; c++)if (_length[c] == i)*outp++ = c;inp = inbuff;bitsleft = 8;do {c = (--buflen < 0) ? END : (*inp++ & 0377); _mask.lint.lng = _bits[c]<<bitsleft;q = &_maskshuff[0];if (bitsleft == 8)*outp = **q++;else*outp |= **q++;bitsleft -= _length[c];while (bitsleft < 0) {*++outp = **q++;bitsleft += 8;}} while (c != END);if (bitsleft < 8)outp++;c = outp-outbuff;return (c);}_heapify( i )int i;{register int k;int lastparent;struct _heap heapsubi;hmove (_heap[i], heapsubi);lastparent = _n_/2;while (i <= lastparent) {k = 2*i;if (_heap[k].count > _heap[k+1].count && k < _n_) k++;if (heapsubi.count < _heap[k].count)break;hmove (_heap[k], _heap[i]);i = k;}hmove (heapsubi, _heap[i]); } NTGETORI.Cint ntGetorig( inbuff ) char *inbuff; {int i;char *in;int size;in = inbuff+2;size = 0;for (i=0; i<4; i++)size = size*256 + ((*in++) & 0377); return( size );}NTISARC.C#define US 037#define RS 036int ntIsarc( inbuff ) char *inbuff;{if( inbuff[0] == US && inbuff[1] == RS ) return( 1 );elsereturn( 0 );}。
霍夫曼编码代码
霍夫曼编码代码霍夫曼编码是一种用于数据压缩的无损编码算法,它通过将出现频率较高的字符用较短的编码表示,而将出现频率较低的字符用较长的编码表示,从而实现对数据的压缩。
下面是一个用Python实现霍夫曼编码的参考代码:```pythonfrom heapq import heappop, heappushfrom collections import defaultdictdef build_frequency_table(data):frequency_table = defaultdict(int)for char in data:frequency_table[char] += 1return frequency_tabledef build_huffman_tree(frequency_table):heap = [[weight, [char, ""]] for char, weight infrequency_table.items()]while len(heap) > 1:lo = heappop(heap)hi = heappop(heap)for pair in lo[1:]:pair[1] = '0' + pair[1]for pair in hi[1:]:pair[1] = '1' + pair[1]heappush(heap, [lo[0] + hi[0]] + lo[1:] + hi[1:])return heap[0]def build_encoding_table(huffman_tree):encoding_table = {}for char, code in huffman_tree[1:]:encoding_table[char] = codereturn encoding_tabledef huffman_encode(data):frequency_table = build_frequency_table(data)huffman_tree = build_huffman_tree(frequency_table)encoding_table = build_encoding_table(huffman_tree)encoded_data = ''.join(encoding_table[char] for char in data)return encoded_datadef huffman_decode(encoded_data, huffman_tree):decoding_table = {code: char for char, code in huffman_tree[1:]} current_code = ''decoded_data = ''for bit in encoded_data:current_code += bitif current_code in decoding_table:char = decoding_table[current_code]decoded_data += charcurrent_code = ''return decoded_data# 示例用法data = 'hello world'encoded_data = huffman_encode(data)print(encoded_data)decoded_data = huffman_decode(encoded_data,build_huffman_tree(build_frequency_table(data)))print(decoded_data)```以上代码中,`build_frequency_table()`函数用于构建字符出现频率的字典,`build_huffman_tree()`函数构建霍夫曼树,`build_encoding_table()`函数构建字符编码表,`huffman_encode()`函数对数据进行编码,`huffman_decode()`函数对编码后的数据进行解码。
用哈夫曼树实现压缩解压
用哈夫曼树实现压缩解压程序是用VC++6.0编译完成的,可以完成对任意文件的压缩解压(为方便寻找,压缩出的文件与待压缩文件在同一文件夹中),但压缩文件夹还不可以,另外该程序还能打印出压缩时所建立的哈夫曼树及哈夫曼编码。
源代码如下:#include <stdio.h>#include <string.h>#include <stdlib.h>#include <windows.h>typedef struct node{long w;short p,l,r;}htnode,*htnp;typedef struct huffman_code{unsigned char len;unsigned char *codestr;}hufcode;typedef char **huffmancode;int initial_files(char *source_filename,FILE **inp,char *obj_filename,FILE **outp);char *create_filename(char *source_filename,char* obj_filename);int compress(char *source_filename,char *obj_filename);long frequency_data(FILE *in,long fre[]);int search_set(htnp ht,int n,int *s1, int *s2);int create_hftree(long w[],int n,htnode ht[]);int encode_hftree(htnp htp,int n,hufcode hc[]);unsigned char chars_to_bits(const unsigned char chars[8]);int write_compress_file(FILE *in,FILE *out,htnp ht,hufcode hc[],char* source_filename,long source_filesize);int decompress(char *source_filename,char *obj_filename);void get_mini_huffmantree(FILE* in,short mini_ht[][2]);int write_decompress_file(FILE *in,FILE* out,short mini_ht[][2],long bits_pos,long obj_filesize);int d_initial_files(char *source_filename,FILE **inp,char *obj_filename,FILE **outp);main(){int s;char filename[10];system("color 3F");printf("***************************************\n");printf(" * 菜单:*\n");printf(" * 1.——————压缩——————*\n");printf(" * 2.—————-解压缩—————- *\n");printf(" * 0.——————退出——————*\n");printf("***************************************\n");scanf("%d",&s);while(s!=0){getchar();switch(s){case 1:puts("请输入待压缩文件路径:");gets(filename);compress(filename,NULL);break;case 2:puts("请输入待解压文件路径:");gets(filename);decompress(filename,NULL);break;default :printf("指令错误!请重新输入指令:\n");}puts(" ");printf("***************************************\n");printf(" * 菜单:*\n");printf(" * 1.——————压缩——————*\n");printf(" * 2.—————-解压缩—————- *\n");printf(" * 0.——————退出——————*\n");printf("***************************************\n");scanf("%d",&s);}}int initial_files(char *source_filename,FILE **inp,char *obj_filename,FILE **outp){if(fopen(source_filename,"rb")==NULL){return -1;}if(obj_filename==NULL){if((obj_filename=(char*)malloc(256*sizeof(char)))==NULL){return -1;}create_filename(source_filename,obj_filename);}if(strcmp(source_filename,obj_filename)==0){return -1;}printf("待压缩文件:%s,压缩文件:%s\n",source_filename,obj_filename);if((*outp=fopen(obj_filename,"wb"))==NULL){return -1;}if((*inp=fopen(source_filename,"rb"))==NULL){return -1;}free(obj_filename);return 0;}char *create_filename(char *source_filename,char* obj_filename) {char *temp;if((temp=strrchr(source_filename,'.'))==NULL){strcpy(obj_filename,source_filename);strcat(obj_filename,".zip");}else{strncpy(obj_filename,source_filename,temp-source_filename);obj_filename[temp-source_filename]='\0';strcat(obj_filename,".zip");}return obj_filename;}int compress(char *source_filename,char *obj_filename){FILE *in,*out;char ch;int error_code,i,j;float compress_rate;hufcode hc[256];htnode ht[256*2-1];long frequency[256],source_filesize,obj_filesize=0;error_code=initial_files(source_filename,&in,obj_filename,&out);if(error_code!=0){puts("文件打开失败!请重新输入文件路径:");return error_code;source_filesize=frequency_data(in,frequency);printf("文件大小%ld 字节\n",source_filesize);error_code=create_hftree(frequency,256,ht);if(error_code!=0){puts("建立哈夫曼树失败!");return error_code;}error_code=encode_hftree(ht,256,hc);if(error_code!=0){puts("建立哈夫曼编码失败!");return error_code;}for(i=0;i<256;i++)obj_filesize+=frequency[i]*hc[i].len;obj_filesize=obj_filesize%8==0obj_filesize/8:obj_filesize/8+1; for(i=0;i<256-1;i++)obj_filesize+=2*sizeof(short);obj_filesize+=strlen(source_filename)+1;obj_filesize+=sizeof(long);obj_filesize+=sizeof(unsigned int);compress_rate=(float)obj_filesize/source_filesize;printf("压缩文件大小:%ld字节,压缩比例:%.2lf%%\n",obj_filesize,compress_rate*100);error_code=write_compress_file(in,out,ht,hc,source_filename,source_file size);if(error_code!=0){puts("写入文件失败!");return error_code;}puts("压缩完成!");puts(" ");puts("是否打印该文件中字符对应的huffman树及编码");puts(" Please input Y OR N");do{scanf("%s",&ch);switch(ch){case 'Y':puts("以下是哈夫曼树:");for(i=256;i<256*2-2;i++){if(ht[i].w>0)printf("%-10d%-10d%-10d%-10d%-10d\n",i,ht[i].w,ht[i].p,ht[i].l,ht[i] .r);}puts("以下是哈夫曼编码:");for(i=0;i<256;i++){if(frequency[i]==0)i++;else{printf("%d\t",frequency[i]);for(j=0;j<hc[i].len;j++)printf(" %d",hc[i].codestr[j]);printf("\n");}}break;case 'N': break;default :printf("指令错误!请重新输入指令:\n");}}while(ch!='Y'&&ch!='N');fclose(in);fclose(out);for(i=0;i<256;i++){free(hc[i].codestr);}return 0;}long frequency_data(FILE *in,long frequency[]){int i,read_len;unsigned char buf[256];long filesize;for(i=0;i<256;i++){frequency[i]=0;}fseek(in,0L,SEEK_SET);read_len=256;while(read_len==256){read_len=fread(buf,1,256,in);for(i=0;i<read_len;i++){frequency[*(buf+i)]++;}}for(i=0,filesize=0;i<256;i++){filesize+=frequency[i];}return filesize;}int search_set(htnp ht,int n,int *s1, int *s2) {int i,x;long minValue = 1000000,min = 0;for(x=0;x<n;x++){if(ht[x].p==-1) break;}for(i=0;i<n;i++){if(ht[i].p==-1 && ht[i].w < minValue){minValue = ht[i].w;min=i;}}*s1 = min;minValue = 1000000,min = 0;for(i=0;i<n;i++){if(ht[i].p==-1 && ht[i].w < minValue && i != *s1){minValue = ht[i].w;min=i;}}*s2 = min;return 1;}int create_hftree(long w[],int n,htnode ht[]) {int m,i,s1,s2;if (n<1) return -1;m=2*n-1;if (ht==NULL) return -1;for(i=0;i<n;i++){ht[i].w=w[i];ht[i].p=ht[i].l=ht[i].r=-1;}for(;i<m;i++){ht[i].w=ht[i].p=ht[i].l=ht[i].r=-1;}for(i=n;i<m;i++){search_set(ht,i,&s1,&s2);ht[s1].p = ht[s2].p = i;ht[i].l = s1;ht[i].r = s2;ht[i].w = ht[s1].w + ht[s2].w;}return 0;}int encode_hftree(htnp htp,int n,hufcode hc[]){int i,j,p,codelen;unsigned char *code=(unsigned char*)malloc(n*sizeof(unsigned char));if (code==NULL) return -1;for(i=0;i<n;i++){for(p=i,codelen=0;p!=2*n-2;p=htp[p].p,codelen++){code[codelen]=(htp[htp[p].p].l==p0:1);}if((hc[i].codestr=(unsigned char *)malloc((codelen)*sizeof(unsigned char)))==NULL){return -1;}hc[i].len=codelen;for(j=0;j<codelen;j++){hc[i].codestr[j]=code[codelen-j-1];}}free(code);return 0;}unsigned char chars_to_bits(const unsigned char chars[8]){int i;unsigned char bits=0;bits|=chars[0];for(i=1;i<8;++i){bits<<=1;bits|=chars[i];}return bits;}int write_compress_file(FILE *in,FILE *out,htnp ht,hufcode hc[],char* source_filename,long source_filesize){unsigned int i,read_counter,write_counter,zip_head=0xFFFFFFFF;unsigned char write_char_counter,code_char_counter,copy_char_counter,read_buf[256],write_buf[256],write_chars[8],filename_size=strlen(source _filename);hufcode *cur_hufcode;fseek(in,0L,SEEK_SET);fseek(out,0L,SEEK_SET);fwrite(&zip_head,sizeof(unsigned int),1,out);fwrite(&filename_size,sizeof(unsigned char),1,out);fwrite(source_filename,sizeof(char),filename_size,out);fwrite(&source_filesize,sizeof(long),1,out);for(i=256;i<256*2-1;i++){fwrite(&(ht[i].l),sizeof(ht[i].l),1,out);fwrite(&(ht[i].r),sizeof(ht[i].r),1,out);}write_counter=write_char_counter=0;read_counter=256;while(read_counter==256){read_counter=fread(read_buf,1,256,in);for(i=0;i<read_counter;i++){cur_hufcode=&hc[read_buf[i]];code_char_counter=0;while(code_char_counter!=cur_hufcode->len){copy_char_counter= (8-write_char_counter > cur_hufcode->len-code_char_countercur_hufcode->len-code_char_counter : 8-write_char_counter);memcpy(write_chars+write_char_counter,cur_hufcode->codestr+code_c har_counter,copy_char_counter);write_char_counter+=copy_char_counter;code_char_counter+=copy_char_counter;if(write_char_counter==8){write_char_counter=0;write_buf[write_counter++]=chars_to_bits(write_chars);if(write_counter==256){fwrite(write_buf,1,256,out);write_counter=0;}}}}}fwrite(write_buf,1,write_counter,out);if(write_char_counter!=0){write_char_counter=chars_to_bits(write_chars);fwrite(&write_char_counter,1,1,out);}return 0;}void get_mini_huffmantree(FILE* in,short mini_ht[][2]) {int i;for(i=0;i<256;i++){mini_ht[i][0]=mini_ht[i][1]=-1;}fread(mini_ht[i],sizeof(short),2*(256-1),in);}int write_decompress_file(FILE *in,FILE* out,short mini_ht[][2],long bits_pos,long obj_filesize){long cur_size;unsigned char read_buf[256],write_buf[256],convert_bit;unsigned int read_counter,write_counter,cur_pos;fseek(in,bits_pos,SEEK_SET);fseek(out,0L,SEEK_SET);read_counter=256-1;cur_size=write_counter=0;cur_pos=256*2-2;while(cur_size!=obj_filesize){if(++read_counter==256){fread(read_buf,1,256,in);read_counter=0;}for(convert_bit=128;convert_bit!=0;convert_bit>>=1){cur_pos=((read_buf[read_counter]&convert_bit)==0mini_ht[cur_pos][0]: mini_ht[cur_pos][1]);if(cur_pos<256){write_buf[write_counter]=(unsignedchar)cur_pos;if(++write_counter==256){fwrite(write_buf,1,256,out);write_counter=0;}cur_pos=256*2-2;if(++cur_size==obj_filesize){break;}}}}fwrite(write_buf,1,write_counter,out);return 0;}int decompress(char *source_filename,char *obj_filename){int error_code;FILE *in,*out;short mini_ht[256*2-1][2];long obj_filesize;error_code=d_initial_files(source_filename,&in,obj_filename,&out);if(error_code!=0){puts("打开文件失败!请重新输入文件路径:");return error_code;}fread(&obj_filesize,sizeof(long),1,in);printf("解压文件大小:%ld字节\n",obj_filesize);get_mini_huffmantree(in,mini_ht);error_code=write_decompress_file(in,out,mini_ht,ftell(in),obj_filesize);if(error_code!=0){puts("解压缩失败!");return error_code;}puts("解压缩完成!");fclose(in);fclose(out);return 0;}int d_initial_files(char *source_filename,FILE **inp,char *obj_filename,FILE **outp){unsigned int zip_head;unsigned char filename_size;if ((*inp=fopen(source_filename,"rb"))==NULL){return -1;}printf("待解压缩文件:%s,",source_filename);fread(&zip_head,sizeof(unsigned int),1,*inp);if(zip_head!=0xFFFFFFFF){return -1;}if(obj_filename==NULL){if((obj_filename=(char*)malloc(256*sizeof(char)))==NULL){return -1;}fread(&filename_size,sizeof(unsigned char),1,*inp);fread(obj_filename,sizeof(char),filename_size,*inp);obj_filename[filename_size]='\0';}else{fread(&filename_size,sizeof(unsigned char),1,*inp);fseek(*inp,filename_size,SEEK_CUR);}printf("解压缩文件:%s\n",obj_filename);if((*outp=fopen(obj_filename,"wb"))==NULL){return -1;}free(obj_filename);return 0;}运行结果:待压缩文件位置:运行结果:压缩出的文件:。
用哈夫曼树算法写对文件压缩与解压缩代码
用哈夫曼树算法设计对文件文件的压缩和解压缩的程序怎么写?基本要求:(1)文件名的输入可以从命令行给出或程序界面给出;(2)压缩和解压选择也可以从命令行给出或程序界面给出;(3)给出压缩后的指标:压缩率=压缩后的文件大小/压缩前的文件大小最佳答案#include <stdio.h>#include <string.h>#include <stdlib.h>#include <conio.h>struct head{unsigned char b; /*the charactor*/long count; /*the frequency*/long parent,lch,rch; /*make a tree*/char bits[256]; /*the haffuman code*/}header[512],tmp;void compress(){char filename[255],outputfile[255],buf[512];unsigned char c;long i,j,m,n,f;long min1,pt1,flength;FILE *ifp,*ofp;printf("source filename:");gets(filename);ifp=fopen(filename,"rb");if(ifp==NULL){printf("source file open error!\n");return;}printf("destination filename:");gets(outputfile);ofp=fopen(outputfile,"wb");if(ofp==NULL){printf("destination file open error!\n");return;}flength=0;while(!feof(ifp)){fread(&c,1,1,ifp);header[c].count++;flength++;}flength--;header[c].count--;for(i=0;i<512;i++){if(header[i].count!=0) header[i].b=(unsigned char)i; else header[i].b=0;header[i].parent=-1;header[i].lch=header[i].rch=-1;}for(i=0;i<256;i++){for(j=i+1;j<256;j++){if(header[i].count<header[j].count){tmp=header[i];header[i]=header[j];header[j]=tmp;}}}for(i=0;i<256;i++) if(header[i].count==0) break;n=i;m=2*n-1;for(i=n;i<m;i++){min1=999999999;for(j=0;j<i;j++){if(header[j].parent!=-1) continue;if(min1>header[j].count){pt1=j;min1=header[j].count;continue;}}header[i].count=header[pt1].count;header[pt1].parent=i;header[i].lch=pt1;min1=999999999;for(j=0;j<i;j++){if(header[j].parent!=-1) continue;if(min1>header[j].count){pt1=j;min1=header[j].count;continue;}}header[i].count+=header[pt1].count;header[i].rch=pt1;header[pt1].parent=i;}for(i=0;i<n;i++){f=i;header[i].bits[0]=0;while(header[f].parent!=-1){j=f;f=header[f].parent;if(header[f].lch==j){j=strlen(header[i].bits);memmove(header[i].bits+1,header[i].bits,j+1);header[i].bits[0]='0';}else\ {j=strlen(header[i].bits);memmove(header[i].bits+1,header[i].bits,j+1);header[i].bits[0]='1';}}}fseek(ifp,0,SEEK_SET);fwrite(&flength,sizeof(int),1,ofp);fseek(ofp,8,SEEK_SET);buf[0]=0;f=0;pt1=8;while(!feof(ifp)){c=fgetc(ifp);f++;for(i=0;i<n;i++){if(c==header[i].b) break;}strcat(buf,header[i].bits);j=strlen(buf);c=0;while(j>=8){for(i=0;i<8;i++){if(buf[i]=='1') c=(c<<1)|1;else c=c<<1;}fwrite(&c,1,1,ofp);pt1++;strcpy(buf,buf+8);j=strlen(buf);}if(f==flength) break;}if(j>0){strcat(buf,"00000000");for(i=0;i<8;i++){if(buf[i]=='1') c=(c<<1)|1;else c=c<<1;}fwrite(&c,1,1,ofp);pt1++;}fseek(ofp,4,SEEK_SET);fwrite(&pt1,sizeof(long),1,ofp); fseek(ofp,pt1,SEEK_SET);fwrite(&n,sizeof(long),1,ofp);for(i=0;i<n;i++){fwrite(&(header[i].b),1,1,ofp);c=strlen(header[i].bits);fwrite(&c,1,1,ofp);j=strlen(header[i].bits);if(j%8!=0){for(f=j%8;f<8;f++)strcat(header[i].bits,"0");}while(header[i].bits[0]!=0){c=0;for(j=0;j<8;j++){if(header[i].bits[j]=='1') c=(c<<1)|1;else c=c<<1;}strcpy(header[i].bits,header[i].bits+8);fwrite(&c,1,1,ofp);}}fclose(ifp);fclose(ofp);printf("compress successfully!\n");return;}void uncompress(){char filename[255],outputfile[255],buf[255],bx[255]; unsigned char c;long i,j,m,n,f,p,l;long flength;FILE *ifp,*ofp;printf("source filename:");gets(filename);ifp=fopen(filename,"rb");if(ifp==NULL){printf("source file open error!\n");return;}printf("destination filename:");gets(outputfile);ofp=fopen(outputfile,"wb");if(ofp==NULL){printf("destination file open error!\n"); return;}fread(&flength,sizeof(long),1,ifp);fread(&f,sizeof(long),1,ifp);fseek(ifp,f,SEEK_SET);fread(&n,sizeof(long),1,ifp);for(i=0;i<n;i++){fread(&header[i].b,1,1,ifp);fread(&c,1,1,ifp);p=(long)c;header[i].count=p;header[i].bits[0]=0;if(p%8>0) m=p/8+1;else m=p/8;for(j=0;j<m;j++){fread(&c,1,1,ifp);f=c;itoa(f,buf,2);f=strlen(buf);for(l=8;l>f;l--){strcat(header[i].bits,"0");}strcat(header[i].bits,buf);}header[i].bits[p]=0;}for(i=0;i<n;i++){for(j=i+1;j<n;j++){if(strlen(header[i].bits)>strlen(header[j].bits)) {tmp=header[i];header[i]=header[j];header[j]=tmp;}}}p=strlen(header[n-1].bits);fseek(ifp,8,SEEK_SET);m=0;bx[0]=0;while(1){while(strlen(bx)<(unsigned int)p){fread(&c,1,1,ifp);f=c;itoa(f,buf,2);f=strlen(buf);for(l=8;l>f;l--){strcat(bx,"0");}strcat(bx,buf);}for(i=0;i<n;i++){if(memcmp(header[i].bits,bx,header[i].count)==0) break; }strcpy(bx,bx+header[i].count);c=header[i].b;fwrite(&c,1,1,ofp);m++;if(m==flength) break;}fclose(ifp);fclose(ofp);printf("Uncompress successfully!\n");return;}int main(){int c;printf("1--Compress file\n");printf("2--Uncompress file\n");printf("Select 1 or 2:");c=getch();printf("%c\n",c);if(c=='1') compress();else if(c=='2') uncompress();return 0;}。
英文文章压缩(Huffman)c++程序
///Huffman编码#include<iostream>#include<fstream>#include<string>using namespace std;typedef struct HNode{char data;//记录数据char code[20];//编码int length;//记录编码长度}*HList, HNode;typedef struct HTree{char data;//数据int lchild, rchild, parent;int weight;//权值}HTree, *HTList;string s_Vector;//字符向量//求下标int Cal(char key){//二分法查找int i, j;int mid;int length = s_Vector.length();i = 0;j = length - 1;while (i<j){if (key == s_Vector[i])return i;if (key == s_Vector[j])return j;mid = (i + j)/2;if (key == s_Vector[mid])return mid;else if (key > s_Vector[mid])i = mid;else j = mid;}}//读取文件并统计字符数目void ReadFile(string filename, HTList &HT, HList &H) {char data, swapc;int i, j, swapn, min;int length;//记录字符长度int number[150];ifstream file;///初始化i = 0;file.open(filename);if (!file){cout << "文件打开失败\n";exit(0);}while (i<150){number[i] = 0;i++;}//读取字符并记录字符while (!file.eof()){file.get(data);if (file.fail())break;length = s_Vector.length();i = 0;while (i < length){if (s_Vector[i] == data){number[i]++;break;}i++;}length = s_Vector.length();if (i == length){s_Vector.append(1, data);number[i]++;}}HT = new HTree[2 * s_Vector.length() - 1];H = new HNode[s_Vector.length()];///初始化数据length = s_Vector.length();for (i = 0; i < 2 * length - 1; i++)HT[i].lchild = HT[i].rchild = HT[i].parent = -1;///将S中元素按从小到大顺序排序存入HT中//选择排序for (i = 0; i < length - 1; i++){for (min = i, j = i + 1; j < length; j++){if (s_Vector[min] > s_Vector[j])min = j;}//交换字符swapc = s_Vector[i];s_Vector[i] = s_Vector[min];s_Vector[min] = swapc;//交换numberswapn = number[i];number[i] = number[min];number[min] = swapn;}//存入数据for (i = 0; i < length; i++){HT[i].data = H[i].data = s_Vector[i];HT[i].weight = number[i];}}//建立哈夫曼树void Build_Huffman(HTList HT){bool*state = new bool[2 * s_Vector.length()-1];//记录是否被访问int i = 0;int length = s_Vector.length();int compare_number = length;//记录已经创建的树的结点数目int pos1, pos2;//最小的两个结点下标int first = 0;//第一个未被访问的数组结点///初始化访问状态while (i < 2 * length - 1){state[i] = false;i++;}///建立Huffman树while (compare_number != 2 * length - 1){//找到firstfor (i = 0; i < compare_number; i++)if (!state[i]){first = i;break;}//找到po1for (i = pos1 = first; i<compare_number; i++){if (state[i])continue;if (HT[pos1].weight>HT[i].weight)pos1 = i;}////重置firstfor (i = 0; i < compare_number; i++){if (pos1 == i)continue;if (!state[i]){first = i;break;}}//找到pos2for (i = pos2 = first; i < compare_number; i++){if (pos1 == i)continue;if (state[i])continue;if (HT[pos2].weight>HT[i].weight)pos2 = i;}///建树HT[compare_number].lchild = pos1;HT[compare_number].rchild = pos2;HT[compare_number].weight = HT[pos1].weight + HT[pos2].weight;HT[pos1].parent = compare_number;HT[pos2].parent = compare_number;state[pos1] = state[pos2] = true;compare_number++;}}//建立Huffman编码void Build_Huffman_Code(HTList HT, HList H){//建立编码ofstream file;int i,j, parent, child;int number;char swap;//交换字符值int length = s_Vector.length();for (i = 0; i < length; i++){number = 0;child = i;while (HT[child].parent != -1){parent = HT[child].parent;if (HT[parent].lchild == child)H[i].code[number] = '0';else H[i].code[number] = '1';child = parent;number++;}H[i].length = number;H[i].code[number] = '\0';//反序for (j = 0; j < 0.5*H[i].length; j++){swap = H[i].code[j];H[i].code[j] = H[i].code[H[i].length - 1 - j];H[i].code[H[i].length - 1 - j] = swap;}}///输入到文件file.open("show.txt");i = 0;while (i<length){file.setf(ios::left);file.width(4);file << H[i].data;file.width(20);file<< H[i].code;file.width(10);file<<HT[i].weight << endl;i++;}file.close();}//文件压缩void Compress(string filename, HList H){ifstream file;file.open(filename);//按照编码压缩文件int locate;int length;int i;ofstream f;f.open("output.txt",ios::binary);string S;unsigned char c_data;//压缩文件char data;while (!file.eof()){file.get(data);if (file.fail())break;locate = Cal(data);S += H[locate].code;//编码while (S.length() >= 8){c_data = 0;for (i = 0; i < 8; i++){c_data <<= 1;//左移if (S[i] == '1')c_data |= 1;else c_data |= 0;}//写入文件f .write((char*)&c_data,sizeof(unsigned char));//删除前八个字符S.erase(0, 8);}}c_data = 0;length = S.length();if (S.length())//处理剩余字节流{//补0for (i = S.length(); i < 8; i++)S.append(1, '0');i = 0;while (i < S.length()){c_data <<= 1;if (S[i] == '1')c_data |= 1;else c_data |= 0;i++;}f.write((char*)&c_data,sizeof(char));}//最后一个字符记录文件缺少几位即可使总编码数为8的整数倍c_data = 8 - length;f.write((char*)&c_data, sizeof(char));f.close();file.close();}///查询是否有此字符串char Search(HList H,char S[]){int i, j,mid;int length = s_Vector.length();i = 0, j = length - 1;while (j-i>1){if (strcmp(H[i].code, S) == 0)return H[i].data;if (strcmp(H[j].code, S) == 0)return H[j].data;mid = (i + j) / 2;if (strcmp(H[mid].code, S) == 0)return H[mid].data;else if (strcmp(H[mid].code, S) < 0)i = mid;else j = mid;}return '\0';}//将H按编码字符串大小排序void Sort(HList H){int i,j;//计数char schar;//交换int slength;//交换int min;//记录每次比较最小的下标char swap[20];int length = s_Vector.length();//按照字符串大小将H排序for (i = 0; i < length - 1; i++){for (min = i, j = i + 1; j < length; j++){if (strcmp(H[min].code, H[j].code) > 0)min = j;}//交换strcpy_s(swap, H[min].code);strcpy_s(H[min].code, H[i].code);strcpy_s(H[i].code, swap);///交换charschar = H[min].data;H[min].data = H[i].data;H[i].data = schar;///交换lengthslength = H[min].length;H[min].length = H[i].length;H[i].length = slength;}}//文件解压缩void Uncompress(string filename,HList H){char uc_char;//读取一个字节文件unsigned char un_compare = 128;//用来做与运算string s;//记录字节流串char compare[20];//比较函数char data;//翻译结果int figure=0;//记录最后一个字符记录文件缺少几位即可使总编码数为8的整数倍int i,number;//计数int length;//记录string大小ifstream file;ofstream f;Sort(H);//将H按编码字符串大小排序file.open(filename,ios::binary);if (!file){cout << "文件打开失败\n";exit(0);}f.open("result.txt");//记录翻译结果while (!file.eof()){file.read((char*)&uc_char,sizeof(char));if (file.fail())break;for (i = 0; i < 8; i++){if (uc_char&un_compare)s.append(1, '1');else s.append(1, '0');uc_char <<= 1;}}//读取最后一个字节流并删去无用的字符流i = 0;while (i<8){figure = s[s.length() - 8 + i] - '0' + figure * 2;i++;}//删去无用字符if (figure!=8)s.erase(s.length() - 8-figure, 8+figure);else s.erase(s.length() - 8, 8);//删去最后一个字符///翻译字符流length = s.length();for (number=i = 0; i < length; i++){compare[number] = s[i];number++;compare[number] = '\0';data = Search(H, compare);if (data){f << data;number = 0;}}}//显示编码和字符出现次数void show(){ifstream file;file.open("show.txt");char s[100];cout.setf(ios::left);cout << "--------------------------------------------------------------------\n";cout.width(5);cout << "字符";cout.width(20);cout << "编码";cout.width(6);cout << "字符数\n";while (!file.eof()){file.getline(s, 100);cout << s<<endl;}}//菜单void menu(){HTList HT=NULL;//记录Huffman树HList H=NULL;//记录编码int choice;string filename;cout << " 菜单\n";cout << "--------------------------------------------------------------------\n\n";cout << " 1.压缩文件 2.解压缩文件\n";cout << " 3.查看符号出现的次数和编码\n\n";cout << " 0.退出\n";cout << "--------------------------------------------------------------------\n";while (true){cout << "输入选项:";cin >> choice;if (choice == 0)exit(0);switch (choice){case 1:cout << "输入文件名:";cin >> filename;ReadFile(filename, HT, H);Build_Huffman(HT);Build_Huffman_Code(HT, H);Compress(filename, H);//压缩cout << "压缩成功!\n";break;case 2:if (!filename.length()){cout << "请先输入压缩的文件名!!";break;}Uncompress("output.txt", H);cout << "解压成功,文件被放置result.txt中!!\n";break;case 3:show();default:break;}cout << "--------------------------------------------------------------------\n";}}//主程序int main(){menu();return 0;}。
- 1、下载文档前请自行甄别文档内容的完整性,平台不提供额外的编辑、内容补充、找答案等附加服务。
- 2、"仅部分预览"的文档,不可在线预览部分如存在完整性等问题,可反馈申请退款(可完整预览的文档不适用该条件!)。
- 3、如文档侵犯您的权益,请联系客服反馈,我们会尽快为您处理(人工客服工作时间:9:00-18:30)。
fst.close();
pssfile.close();
system("pause");
}
void HuffmanCoding(array<huffman, m> &HT, array<string, n> &code, array<double, n> freq) {
char tempc;
unsigned char bitlen;
string temps;
unsigned char pen = 0, lenpen = 0, nextpen = 0, tempchar = 0, lencode = 0;
unsigned char i;
while (fst.get(tempc)) {
string cd(n-1, '0');
int start = n-1;
int k, f;
for (k = i, f = HT[i].parent; f != 0; k = f, f = HT[f].parent)
if (HT[f].lchild == k)
cd[--start] = '0';
else
pssfile.put(pen);
}
void createfreq(ifstream& fst,array<double,n> &freq,double &sumchar) {
char tempc;
while (fst.read((char *)(&tempc),sizeof(tempc)) ){
++sumchar;
for (i = 0; i < end; ++i) {
if (HT[i].parent == 0) {
if (HT[i].freq < min1) {
s2 = s1;
s1 = i;
min2 = min1;
min1 = HT[i].freq;
}
else if (HT[i].freq < min2) {
s2 = i;
min2 = HT[i].freq;
}
}
}
}
void stoByte(array<string, n> code, array<string, n> &Bytecode) {
int j;
unsigned char cj=0;
string curByte;
unsigned char lencurByte, index = 0, pen=0, temp;
fst.close();
HuffmanCoding(HT,code,freq); //编码
//for (int p = 0; p < n; ++p)
//cout << code[p] <<"\t"<< p <<endl;
stoByte(code, Bytecode);//将字符串码转化为字节码
ofstream pssfile(pssname, ofstream::binary |ofstream::out);
};
int main() {
void HuffmanCoding(array<huffman, m> &HT, array<string, n> &code, array<double, n> freq);
void myselect(array<huffman, m> &HT, int end,int& s1, int& s2);
pen = pen | tempchar;
pssfile.put(pen);
pen = nextpen;
bitlen -= sb;//bitlen减去这次写入pen和nextpen的值
lenpen = sb - (8 - lenpen);//当前len改成nextpen的长度
++i;
}
}
}
if (lenpen)
++freq[static_cast<unsigned char>(tempc)];
}
int j;
unsigned char cj = 0;
for (j = 0; j < n; j++) {
freq[cj] = freq[cj] / sumchar;
cj++;
}
}
void dictofile(ofstream &pssfile, array<string, n> huffcode) {//将字典写入文件
bitlen = 0;//还剩没写入的位数发生变化
breபைடு நூலகம்k;
}
else {
unsigned char sb = static_cast<unsigned char>(min(8, static_cast<int>(bitlen)));//可能读入的一个字节够一个pen但是不足8位
tempchar = temps[i];
nextpen = 255;
nextpen = nextpen >> (8 - lenpen);//这个相当于nextpen的长度
nextpen = nextpen & tempchar;
nextpen = nextpen << (8 - lenpen);
tempchar = tempchar >> lenpen;
if (bitlen + lenpen < cst) {//如果这一次读入的编码还不够写满
tempchar = temps[i];
tempchar = tempchar >> lenpen;//移位后给pen
pen = pen | tempchar;
lenpen += bitlen;//pen的长度发生变化
while (lencurByte > index) {
if (lencurByte - index < cst) {
curstr = curByte.substr(index, lencurByte - index);
string t(cst - (lencurByte - index),'0');
int j;
unsigned char cj = 0;
for (j = 0; j < n; ++j) {
HT[cj].freq = freq[cj];
HT[cj].lchild = 0;
HT[cj].rchild = 0;
HT[cj].parent = 0;
++cj;
}
int i,s1, s2;
for (i= n ; i <m; ++i) {
double freq;
int parent;
int lchild;
int rchild;
public:
huffman() {
freq = 0;
parent = 0;
lchild = 0;
rchild = 0;
}
huffman(double sfreq,int sparent,int slchild,int srchild):freq(sfreq),parent(sparent),lchild(slchild),rchild(srchild){}
string curstr;
for (j = 0; j < n; ++j) {
index = 0;
curByte = code[cj];
lencurByte = curByte.length();
Bytecode[cj] = Bytecode[cj] + static_cast<char>(lencurByte); //在字典里加入位的长度
curstr = curstr + t;
index = lencurByte;//保证curstr始终有8位长度
}
else {
curstr = curByte.substr(index, cst);
index += cst;
}
for (int i = 0; i < 8; i++) {
temp = curstr[i] - 48;
temps = Bytecode[static_cast<unsigned char>(tempc)];
bitlen = temps[0];//bitlen存储的是编码有多少位
lencode = temps.length() - 1;//lencode存储的是编码以 字节形式有多少字节
i = 1;
while (lencode--) {
pssname = pssname + ".bin";
ifstream fst(filename,ifstream::binary);
if (!fst) {
cout << "文件打开失败" << endl;
system("pause");
exit(0);
}
createfreq(fst,freq,sumchar);