文章目录
  1. 1. 压缩操作
    1. 1.1. 核心思想
    2. 1.2. 压缩代码
    3. 1.3. 解压缩

可能有人要问了,不把压缩与解压缩分开呢?清晰明了,其实我自己也尝试过分开来写,自我感觉,这是自己给自己增加难度,因为两者有着紧密的联系,分开写,一是传参难,二是不好理解。

压缩操作

核心思想

压缩是利用字符的huffman编码进行操作,每一个字符都有一个huffman编码,那么这个huffman编码怎么来的,首先读整个文件,统计出每个字符出现的次数,然后利用这些_count建huffman树,根据所建的huffman树,出现次数多的,他的huffman编码短,出现次数少的,huffman编码可能会长一点,但是他个数少,所以整体来说,达到了压缩的目的。

One Quession:这时有人疑问了,说,我拿出一个字符,结果你替换成了一堆编码,不仅没有压缩反而增容了,而且改变了原先的内容。我的回答是 是的,从目前来讲是这样的。所以从这引出了下一个问题。
解决上述问题的解决办法就是,将编码按位存储,如图:

  1. 压缩文件:顾名思义就是对文件的操作,而文件由字符组成,所以要对文件信息进行提取,最重要的信息有:
    //文件信息结构体做堆成员
    struct FileInfo
    {

    unsigned char _ch;   //字符
    LongType _count;     //字符出现的次数,权值
    string _code;        //Huffman编码
    
    FileInfo(unsigned char ch = 0)
        :_ch(ch)
        , _count(0)
        , _code("")
    {}
    
    //以_count做权值,所以需重载 < 
    bool operator < (const FileInfo& info)
    {
        return this->_count < info._count;
    }
    
    bool operator != (const int& illegal)const
    {
        if (this->_count != illegal)
            return true;
        return false;
    }
    
    FileInfo operator + (const FileInfo& info)
    {
        FileInfo FI;
        FI._count = this->_count + info._count;
        return FI;
    }
    

    };

对于重载的这些运算符,因为——weight现在变成了结构体类型,所以需要重载这些。

压缩代码

bool Compress(const char* filename)
{
    //1.打开文件,统计字符出现的次数
    //2.生成对应的HUffman编码
    //3.压缩文件
    //4.写配置文件,方便后续的解压缩

    assert(filename);
    FILE* fOut = fopen(filename, "rb");
    assert(fOut);

    //统计字符出现的次数
    char ch = fgetc(fOut);
    while (ch != EOF)
    {
        _infors[(unsigned char)ch]._count++;
        ch = fgetc(fOut);
    }

    //生成对应的HuffmanCode
    HuffmanTree<FileInfo> HT;
    HuffmanTreeNode<FileInfo>* root = HT.CreateTree(_infors, 256, 0);
    _GenerateHuffmanCode(root);

    //3.压缩文件
    CompressFile(filename);

    //4.写配置文件,方便后续的解压缩
    ConfigFile(filename);

    return true;
}

//解压缩
bool  NoCompress(const char* filename)
{
    //读配置文件,依照次数建树
    assert(filename);

    //读配置文件
    string ConfigFile = filename;
    ConfigFile = "config." + ConfigFile;
    FILE* fOutConfig = fopen(ConfigFile.c_str(), "rb");
    assert(fOutConfig);

    //读压缩文件
    string CompressFile = filename;
    CompressFile = "Compress." + CompressFile;
    FILE* fOutCompress = fopen(CompressFile.c_str(), "rb");
    assert(fOutCompress);

    //读压缩文件
    string NoCompressFile = filename;
    NoCompressFile = "NoCompress." + NoCompressFile;
    FILE* fInNoCompress = fopen(NoCompressFile.c_str(), "wb");
    assert(fInNoCompress);

    //读配置文件,依照次数建树
    //每次读一行
    //long long size = 0;  atoi ()    ==>  int 
    //size >> 32  
    //size |= int
    //size <<= 32

    //用文件个数作为终止条件
    long long size = 0;
    string str1 = GetLine(fOutConfig);
    int tmp = atoi(str1.c_str());
    size >>= 32;
    size |= tmp;
    size <<= 32;
    string str2 = GetLine(fOutConfig);
    tmp = atoi(str2.c_str());
    size |= tmp;

    str1.clear();
    str2.clear();

    string str = GetLine(fOutConfig);
    while (!str.empty())
    {
        string str2;
        str2 = str.substr(2);
        _infors[str[0]]._count = atoi(str2.c_str());
        str.clear();
        str = GetLine(fOutConfig);
    }

    //创建哈弗曼树
    HuffmanTree<FileInfo> HT;
    HuffmanTreeNode<FileInfo>* root = HT.CreateTree(_infors, 256, 0);
    HuffmanTreeNode<FileInfo>* cur = root;

    //解压缩文件
    int flag = 0;
    char ch = fgetc(fOutCompress);
    while (cur && !feof(fOutCompress))
    {
        //拿出一个字符然后进行操作
        int index = 0;
        while (cur && index < 8)
        {
            char CHAR = 1;
            CHAR = (CHAR << index) & ch;

            if (CHAR)
                cur = cur->_right;
            else
                cur = cur->_left;

            if (cur && cur->_left == NULL && cur->_right == NULL)
            {
                fputc((unsigned char)cur->_weight._ch, fInNoCompress);
                ++flag;
                if (flag == size)
                {
                    break;
                }
                cur = root;
            }
            ++index;
        }

        ch = fgetc(fOutCompress);
    }

    fclose(fOutConfig);
    fclose(fOutCompress);
    fclose(fInNoCompress);
    return true;
}

void _GenerateHuffmanCode(HuffmanTreeNode<FileInfo>* root)
{
    if (NULL == root)
        return;

    _GenerateHuffmanCode(root->_left);
    _GenerateHuffmanCode(root->_right);

    //如果当前节点是叶子节点,则生成对应的哈夫曼编码
    if (root->_left == NULL && root->_right == NULL)
    {
        HuffmanTreeNode<FileInfo>* cur = root;
        HuffmanTreeNode<FileInfo>* parent = cur->_parent;
        string& code = _infors[cur->_weight._ch]._code;

        while (parent)
        {
            if (parent->_left == cur)
            {
                code += '0';
            }
            else
            {
                code += '1';
            }
            cur = cur->_parent;
            parent = cur->_parent;
        }

        //逆置huffamanCode
        reverse(code.begin() , code.end());
        //cout << root->_weight._ch << ":" << code << " "<<endl;
    }
}
void CompressFile(const char* filename)
{
    assert(filename);

    FILE* fOut = fopen(filename, "rb");
    assert(fOut);

    string CompressFile = filename;
    CompressFile = "Compress." + CompressFile;
    FILE* fInCompress = fopen(CompressFile.c_str(), "wb");
    assert(fInCompress);

    int flag = 0;  //一个code操作完后的所在位置
    unsigned char CHAR = 0;    //要存入的字符

    char ch = fgetc(fOut);
    while (ch != EOF)
    {
        int index = 0;      //八个一组的下标

        string code = _infors[(unsigned char)ch]._code;
        while (index < code.size())
        {
            unsigned char tmp = code[index] - '0';
            tmp <<= flag;
            CHAR = CHAR | tmp;
            if (flag == 7)
            {
                //将ch写入文件
                fputc(CHAR, fInCompress);
                CHAR = 0;
            }
            ++index;
            ++flag;
            if (flag == 8)
            {
                flag -= 8;
            }
        }

        ch = fgetc(fOut);
    }

    //文件结束后加0
    if (flag != 0)
    {
        fputc(CHAR, fInCompress);
    }

    fclose(fOut);
    fclose(fInCompress);
}


//配置文件格式如:a,2
void ConfigFile(const char* filename)
{
    assert(filename);

    FILE* fOut = fopen(filename, "rb");
    assert(fOut);

    string NoCompressFile = filename;
    NoCompressFile ="config." + NoCompressFile;
    FILE* fInNoCompress = fopen(NoCompressFile.c_str(), "wb");
    assert(fInNoCompress);

    int Height = 0;       //高32位
    int Low = 0;          //低32位
    long long size = GetSize(fOut);
    Low |= (size & 0xffffffff);
    Height |= size >> 32;

    char str1[128];
    _itoa_s(Low, str1, 10);  //低位存储在第一行
    fputs(str1, fInNoCompress);
    fputc('\n', fInNoCompress);
    //

    char str2[128];
    _itoa_s(Height, str2, 10); //高位存储在第二行
    fputs(str2, fInNoCompress);
    fputc('\n', fInNoCompress);
    //delete[] str2;

    //利用infors
    for (int i = 0; i < 256; ++i)
    {
        if (_infors[i]._count)
        {
            char buffer[128];
            fputc(_infors[i]._ch, fInNoCompress);
            fputc(',', fInNoCompress);

            _itoa_s(_infors[i]._count, buffer, 10);
            fputs(buffer, fInNoCompress);

            //delete[] buffer;
            fputc('\n', fInNoCompress);
        }
    }

    fclose(fOut);
    fclose(fInNoCompress);
    //delete[] str1;
    //delete[] str2;
    //delete[] buffer;
}

解压缩

//统计源文件字符的总个数,原因:压缩文件是按8 位存储,最后八位可能没存储满,(我这里补零)
//如果不统计源文件次数,最后的0也会解压缩(一般解压为堆顶元素),结果就会多出几个字符
long long GetSize(FILE* fOut)
{
assert(fOut);
long long size = 0;
for (int i = 0; i < 256; ++i)
{
if (_infors[i]._count)
{
size = size + _infors[i]._count;
}
}
return size;
}

//解压缩
bool NoCompress(const char* filename)
{
//读配置文件,依照次数建树
assert(filename);

    //读配置文件
    string ConfigFile = filename;
    ConfigFile = "config." + ConfigFile;
    FILE* fOutConfig = fopen(ConfigFile.c_str(), "rb");
    assert(fOutConfig);

    //读压缩文件
    string CompressFile = filename;
    CompressFile = "Compress." + CompressFile;
    FILE* fOutCompress = fopen(CompressFile.c_str(), "rb");
    assert(fOutCompress);

    //读压缩文件
    string NoCompressFile = filename;
    NoCompressFile = "NoCompress." + NoCompressFile;
    FILE* fInNoCompress = fopen(NoCompressFile.c_str(), "wb");
    assert(fInNoCompress);

    //读配置文件,依照次数建树
    //每次读一行
    //long long size = 0;  atoi ()    ==>  int 
    //size >> 32  
    //size |= int
    //size <<= 32

    //用文件个数作为终止条件
    long long size = 0;
    string str1 = GetLine(fOutConfig);
    int tmp = atoi(str1.c_str());
    size >>= 32;
    size |= tmp;
    size <<= 32;
    string str2 = GetLine(fOutConfig);
    tmp = atoi(str2.c_str());
    size |= tmp;

    str1.clear();
    str2.clear();

    string str = GetLine(fOutConfig);
    while (!str.empty())
    {
        string str2;
        str2 = str.substr(2);
        _infors[str[0]]._count = atoi(str2.c_str());
        str.clear();
        str = GetLine(fOutConfig);
    }

    //创建哈弗曼树
    HuffmanTree<FileInfo> HT;
    HuffmanTreeNode<FileInfo>* root = HT.CreateTree(_infors, 256, 0);
    HuffmanTreeNode<FileInfo>* cur = root;

    //解压缩文件
    int flag = 0;
    char ch = fgetc(fOutCompress);
    while (cur && !feof(fOutCompress))
    {
        //拿出一个字符然后进行操作
        int index = 0;
        while (cur && index < 8)
        {
            char CHAR = 1;
            CHAR = (CHAR << index) & ch;

            if (CHAR)
                cur = cur->_right;
            else
                cur = cur->_left;

            if (cur && cur->_left == NULL && cur->_right == NULL)
            {
                fputc((unsigned char)cur->_weight._ch, fInNoCompress);
                ++flag;
                if (flag == size)
                {
                    break;
                }
                cur = root;
            }
            ++index;
        }

        ch = fgetc(fOutCompress);
    }

    fclose(fOutConfig);
    fclose(fOutCompress);
    fclose(fInNoCompress);
    return true;
}
文章目录
  1. 1. 压缩操作
    1. 1.1. 核心思想
    2. 1.2. 压缩代码
    3. 1.3. 解压缩