用C++实现数据无损压缩、解压(使用LZW算法)

合集下载

1、下载文档前请自行甄别文档内容的完整性，平台不提供额外的编辑、内容补充、找答案等附加服务。
2、"仅部分预览"的文档,不可在线预览部分如存在完整性等问题,可反馈申请退款(可完整预览的文档不适用该条件!)。
3、如文档侵犯您的权益，请联系客服反馈,我们会尽快为您处理(人工客服工作时间：9:00-18:30)。

用C++实现数据无损压缩、解压（使用LZW算法）
小俊发表于 2008-9-10 14:50:00
推荐
LZW压缩算法由Lemple-Ziv-Welch三人共同创造，用他们的名字命名。

LZW就是通过建立一个字符串表，用较短的代码来表示较长的字符串来实现压缩。

LZW压缩算法是Unisys的专利，有效期到2003年，所以对它的使用是有限制的。

字符串和编码的对应关系是在压缩过程中动态生成的，并且隐含在压缩数据中，解压的时候根据表来进行恢复，算是一种无损压缩。

个人认为LZW很适用于嵌入式系统上。

因为：1、压缩和解压速度比较快，尤其是解压速度；2、占用资源少；3、压缩比也比较理想；4、适用于文本和图像等出现连续重复字节串的数据流。

LZW算法有一点比较特别，就是压缩过程中产生的字符串对应表，不需要保存到压缩数据中，因为这个表在解压过程中能自动生成回来。

LZW算法比较简单，我是按照这本书上写的算法来编程的：
以下是源代码：
class LZWCoder
{
private:
struct TStr
{
char *string;
unsigned int len;
};
TStr StrTable[4097];
unsigned int ItemPt;
unsigned int BytePt;
unsigned char BitPt;
unsigned char Bit[8];
unsigned char Bits;
unsigned int OutBytes;
void InitStrTable();
void CopyStr(TStr *d, TStr s);
void StrJoinChar(TStr *s, char c);
unsigned int InStrTable(TStr s);
void AddTableEntry(TStr s);
void WriteCode(char *dest, unsigned int b);
unsigned int GetNextCode(char *src);
void StrFromCode(TStr *s, unsigned int c);
void WriteString(char *dest, TStr s);
public:
unsigned int Encode(char *src, unsigned int len, char *dest);
unsigned int Decode(char *src, unsigned int *len, char *dest);
LZWCoder();
~LZWCoder();
};
void LZWCoder::InitStrTable()
{
unsigned int i;
for(i = 0; i < 256; i ++)
{
StrTable[i].string = (char *)realloc(StrTable[i].string, 1);
StrTable[i].string[0] = i;
StrTable[i].len = 1;
}
StrTable[256].string = NULL;
StrTable[256].len = 0;
StrTable[257].string = NULL;
StrTable[257].len = 0;
ItemPt = 257;
Bits = 9;
}
void LZWCoder::CopyStr(TStr *d, TStr s)
{
unsigned int i;
d->string = (char *)realloc(d->string, s.len);
for(i = 0; i < s.len; i ++)
d->string[i] = s.string[i];
d->len = s.len;
}
void LZWCoder::StrJoinChar(TStr *s, char c)
{
s->string = (char *)realloc(s->string, s->len + 1);
s->string[s->len ++] = c;
}
unsigned int LZWCoder::InStrTable(TStr s)
{
unsigned int i,j;
bool b;
for(i = 0; i <= ItemPt; i ++)
{
if(StrTable[i].len == s.len)
{
b = true;
for(j = 0; j < s.len; j ++)
if(StrTable[i].string[j] != s.string[j])
{
b = false;
break;
}
if(b) return i;
}
}
return 65535;
}
void LZWCoder::AddTableEntry(TStr s)
{
CopyStr(&StrTable[++ItemPt], s);
void LZWCoder::WriteCode(char *dest, unsigned int b)
{
unsigned char i;
for(i = 0; i < Bits; i++)
{
Bit[BitPt ++] = (b & (1 << (Bits - i - 1))) != 0;
if(BitPt == 8)
{
BitPt = 0;
dest[BytePt ++] = (Bit[0] << 7)
+ (Bit[1] << 6)
+ (Bit[2] << 5)
+ (Bit[3] << 4)
+ (Bit[4] << 3)
+ (Bit[5] << 2)
+ (Bit[6] << 1)
+ Bit[7];
}
}
}
unsigned int LZWCoder::GetNextCode(char *src)
{
unsigned char i;
unsigned int c = 0;
for(i = 0; i < Bits; i ++)
{
c = (c << 1) + ((src[BytePt] & (1 << (8 - (BitPt ++) - 1))) ! = 0);
if(BitPt == 8)
{
BitPt = 0;
BytePt ++;
}
}
return c;
void LZWCoder::StrFromCode(TStr *s, unsigned int c)
{
CopyStr(s, StrTable[c]);
}
void LZWCoder::WriteString(char *dest, TStr s)
{
unsigned int i;
for(i = 0; i < s.len; i++)
dest[OutBytes ++] = s.string[i];
}
unsigned int LZWCoder::Encode(char *src, unsigned int len, char *dest)
{
TStr Omega, t;
char k;
unsigned int i;
unsigned int p;
BytePt = 0;
BitPt = 0;
InitStrTable();
WriteCode(dest, 256);
Omega.string = NULL;
Omega.len = 0;
t.string = NULL;
t.len = 0;
for(i = 0; i < len; i ++)
{
k = src[i];
CopyStr(&t, Omega);
StrJoinChar(&t, k);
if(InStrTable(t) != 65535)
CopyStr(&Omega, t);
else
{
WriteCode(dest, InStrTable(Omega));
AddTableEntry(t);
switch(ItemPt)
{
case 512: Bits = 10; break;
case 1024: Bits = 11; break;
case 2048: Bits = 12; break;
case 4096: WriteCode(dest, 25
6); InitStrTable();
}
Omega.string = (char *)realloc(Omega.string, 1);
Omega.string[0] = k;
Omega.len = 1;
}
}
WriteCode(dest, InStrTable(Omega));
WriteCode(dest, 257);
Bits = 7;
WriteCode(dest, 0);
free(Omega.string);
free(t.string);
return BytePt;
}
unsigned int LZWCoder::Decode(char *src, unsigned int *len, char *dest)
{
unsigned int code, oldcode;
TStr t, s;
BytePt = 0;
BitPt = 0;
OutBytes = 0;
t.string = NULL;
t.len = 0;
s.string = NULL;
s.len = 0;
InitStrTable();
while((code = GetNextCode(src)) != 257)
{
if(code == 256)
{
InitStrTable();
code = GetNextCode(src);
if(code == 257) break;
StrFromCode(&s, code);
WriteString(dest, s);
oldcode = code;
}
else
{
if(code <= ItemPt)
{
StrFromCode(&s, code);
WriteString(dest, s);
StrFromCode(&t, oldcode);
StrJoinChar(&t, s.string[0]);
AddTableEntry(t);
switch(ItemPt)
{
case 511: Bit s = 10; break;
case 1023: Bi ts = 11; break;
case 2047: Bi ts = 12; break;
}
oldcode = code;
}
else
{
StrFromCode(&s, oldcode);
StrJoinChar(&s, s.string[0]);
WriteString(dest, s);
AddTableEntry(s);
switch(ItemPt)
{
case 511: Bit s = 10; break;
case 1023: Bi ts = 11; break;
case 2047: Bi ts = 12; break;
}
oldcode = code;
}
}
}
free(t.string);
free(s.string);
*len = BytePt + (BitPt != 0);
return OutBytes;
}
LZWCoder::LZWCoder()
{
unsigned int i;
for(i = 0; i < 4097; i ++)
{
StrTable[i].string = NULL;
StrTable[i].len = 0;
}
}
LZWCoder::~LZWCoder()
{
unsigned int i;
for(i = 0; i < 4097; i ++)
free(StrTable[i].string);
}
用法：
LZWCoder *Coder;
Coder = new LZWCoder();
然后用
Coder->Encode(char *src, unsigned int len, char *dest);
Coder->Decode(char *src, unsigned int *len, char *dest);
进行压缩或解压。

Encode函数中，src是输入数据的指针，len是输入数据的长度，dest是输出数据的指针。

函数返回输出数据的长度。

Decode函数中各参数与Encode类似，但*len会返回解压过程使用了输入数据的字节数（一般等于压缩时输出数据的长度）。

使用时把要压缩的数据分成每段8K来处理，效果会比较好。

算法实现没做优化，速度比较慢。