700字范文 > 数据结构第二次实验-赫夫曼编码及其应用

数据结构第二次实验-赫夫曼编码及其应用

时间：2021-05-27 20:55:50

相关推荐

数据结构第二次实验-赫夫曼编码及其应用

一、实验目的

1．目的：掌握赫夫曼（Huffman）树和赫夫曼编码的基本思想和应用。

2．任务：实现文件中数据的加解密与压缩。

二、实验内容及要求

1、任务描述

实验内容：将硬盘上的一个文本文件进行加密，比较加密文件和原始文件的大小差别；

对加密文件进行解密，比较原始文件和解码文件的内容是否一致。

输入和输出：

（1）输入：硬盘上给定的原始文件及文件路径。

（2）输出：

◆ 硬盘上的加密文件及文件路径；

◆ 硬盘上的解码文件及文件路径；

◆ 原始文件和解码文件的比对结果。

实验要求：

◆ 提取原始文件中的数据（包括中文、英文或其他字符），根据数据出现的频率为权

重，构建 Huffman 编码表；

◆ 根据 Huffman 编码表对原始文件进行加密，得到加密文件并保存到硬盘上；

◆ 将加密文件进行解密，得到解码文件并保存点硬盘上；

◆ 比对原始文件和解码文件的一致性，得出是否一致的结论。

2、主要数据类型与变量

通过从头遍历链表中的元素并依次与待插入的结点的权值进行比较，从而确定待插入结点的位置，使之从后形成的链表中的元素都按权值从大到小排列。

Find_and_insert函数：

将待插入结点的high值和low值依次与head链表中的元素进行比较，如果与之相同则代表是相同的字符。将该结点的权值加一，然后弹出链表重新比较权值的大小再将其插入。

Output函数：

遍历链表中的每一个元素，并将信息输出。

tree_node_init函数：

初始化树的每一个结点。

build_tree函数：

通过遍历链表将其每一个元素依次插入到树中。

print_huffman_pre函数：

按照参数flag的值对树进行遍历输出。

update_tree函数：

通过层级遍历的顺序进行编码，如果从该节点向左，则左孩子的编码为该节点编码 + '0’；如果从该节点向右，则右孩子的编码为该节点编码 + '1’；

Coding函数：

从已有的树的结点中依次取出结点，通过insert算法小将其按照权值的大小插入到树中，直到权值为1，则最后的这颗树就是所求的哈夫曼树。

Decoding函数：

对文件进行解码，对比输入输出文件的名称，然后以读、写方式打开文件对字符进行对比，统计中文字符和非中文字符。

三、测试

1、方案

创建测试记事本，输入文字并以ANSI的编码格式保存。

2、结果

1）统计信息：

2）词频统计

3）编码详细

4）文件输出信息

编码与解码文件：

四、总结与讨论

本次实验涉及到了很多方面的知识点，包括编码格式Ascii或GBK的选择、文件的读写、赫夫曼树的建立、赫夫曼编码及解码等，通过本次实验，我进一步掌握了赫夫曼树的关键-最优二叉树，即带权路径长度最小的二叉树，而赫夫曼编码使用的编码表是根据每一个字符出现的估算概率而建立起来的。赫夫曼编码可以将输入字符串编译成二进制代码，输入二进制代码也可以将其解码为字符串。

同时我也认识到根据不同的需求，采用不同的数据存储方式，不一定要用栈、二叉树等等高级类型，有时用基本的一维数组，只要运用得当也能达到相同的效果甚至更佳。

总之本次实验确实很有难度，虽然书上有关赫夫曼树的每一个知识点我都看了并且代码也基本都理解，但是实际利用赫夫曼树的思想进行上机实验还是感觉有些吃力，尤其是赫夫曼最小路径的存取，赫夫曼编码及译码的应用范围等一系列问题使我对数据结构更加树立起了学习之心，今后还要及时发现自己平时学习的不足和薄弱环节，从而加以弥补。

附：程序的源代码

#include <stdio.h>#include<string.h>#include<stdlib.h>typedef struct node* LIST;typedef struct node* TREE;struct node{int high;//存储Ascii码的所有字符int low;//协助存储一个汉字int weight;//统计字符个数int code_index;//编码指针char code[100];//编码数组TREE Left;//哈夫曼树左孩子TREE Right;//哈夫曼树右孩子LIST NEXT;//下一个结点};void insert(LIST head, LIST tmp);//有序插入结点LIST find_and_insert(LIST head, LIST tmp);//弹出内部的结点，然后调用insert函数void output(LIST head);//输出这个读取文件的所有字符统计情况LIST init_LIST(int high, int low, int weight);//初始化链表信息TREE tree_node_init(int high, int low, int weight);//初始化哈夫曼树各个结点TREE build_tree(LIST head);//建立哈夫曼树void print_huffman_pre(TREE Tree, int flag);//前序输出,flag 控制是否打印信息void update_tree(TREE Tree);//更新树的信息，也即更新编码信息void save_file(TREE* a, int right, TREE Tree);//保存文件void to_free_tree(TREE Tree);//释放树void to_free_list(LIST head);//释放链表void coding();//编码void decoding();//解码TREE queue[1000];//树int queue_index = 0;//索引int sum_bit_coding = 0;//编码bitint sum_bit_decoding = 0;//解码bitchar file_in[100] = "";//文件默认打开路径char file_out[100];void init_main();void update_file_out(char file_in[]){int i;for (i = strlen(file_in) - 1; i >= 0; i--)if (file_in[i] == '\\')break;int j;for (j = 0; j <= i; j++)file_out[j] = file_in[j];}int main(){init_main();return 0;}void init_main(){LIST head = init_LIST(0, 0, 0);FILE* P;while (1){printf("请输入文件的路径\n");gets(file_in);P = fopen(file_in, "r");if (P == NULL){printf("文件打开失败\n请重新输入\n");continue;}else{printf("打开成功，请选择操作命令：\n");break;}}update_file_out(file_in);int ch;while ((ch = fgetc(P)) != EOF){LIST tmp = init_LIST(ch, -1, 1);if (ch > 128)tmp->low = fgetc(P);insert(head, find_and_insert(head, tmp));}//output(head);TREE Tree = build_tree(head);coding(Tree);print_huffman_pre(Tree->NEXT, 0);update_tree(Tree);queue_index = 0;print_huffman_pre(Tree->NEXT, 0);decoding();fclose(P);while (1){int flag;printf("---------------操作命令集---------------\n");printf("****************************************\n");printf("* 1.统计信息 *\n");printf("* 2.词频统计 *\n");printf("* 3.编码详细 *\n");printf("* 4.文件输出信息 *\n");printf("* 5.Return *\n");printf("****************************************\n");scanf("%d", &flag);switch (flag){case 1:printf("-----------------------------------\n");printf("文件已经保存，共写入%d比特\n", sum_bit_coding);printf("从文件读取%d比特\n", 8 * (Tree->high + Tree->low * 2));printf("压缩率是%.3f\n", 1.0 * sum_bit_coding / 8 / (Tree->high + Tree->low * 2));printf("共写入解码文件%d比特\n", sum_bit_decoding);printf("-----------------------------------\n");break;case 2:output(head);break;case 3:print_huffman_pre(Tree->NEXT, 1);break;case 4:{//路径codingchar coding_file_name[100];strcpy(coding_file_name, file_out);strcat(coding_file_name, "coding.txt");//路径encodingchar encoding_file_name[100];strcpy(encoding_file_name, file_out);strcat(encoding_file_name, "encoding.txt");printf("输入文件的路径为%s\n", file_in);printf("加密文件的路径为%s\n", coding_file_name);printf("解码文件的路径为%s\n", encoding_file_name);break;}case 5:return;}}to_free_tree(Tree);to_free_list(head);}void decoding(){sum_bit_decoding = 0;//路径codingchar coding_file_name[100];strcpy(coding_file_name, file_out);strcat(coding_file_name, "coding.txt");//路径encodingchar encoding_file_name[100];strcpy(encoding_file_name, file_out);strcat(encoding_file_name, "encoding.txt");FILE* in = fopen(coding_file_name, "r");FILE* out = fopen(encoding_file_name, "wb");int ch;int str_index = 0, left;char str[100];while ((ch = fgetc(in)) != EOF){str[str_index++] = ch;str[str_index] = '\0';for (left = 0; left < queue_index; left++){if (strcmp(queue[left]->code, str) == 0){//bitsif (queue[left]->high > 128) sum_bit_decoding += 16;else sum_bit_decoding += 8;if ((char)queue[left]->high == '\n'){fprintf(out, "\r\n");}else{fprintf(out, "%c", queue[left]->high);if (queue[left]->high > 128) fprintf(out, "%c", queue[left]->low);}str_index = 0;break;}}}fclose(in);fclose(out);}void to_free_list(LIST head){LIST P = head;while (head->NEXT){P = head->NEXT;head->NEXT = head->NEXT->NEXT;free(P);}free(head);}void to_free_tree(TREE Tree){if (!Tree) return;to_free_tree(Tree->Left);to_free_tree(Tree->Right);free(Tree);}void save_file(TREE* a, int right, TREE Tree){int left;sum_bit_coding = 0;FILE* P = fopen(file_in, "r");//路径char coding_file_name[100];strcpy(coding_file_name, file_out);strcat(coding_file_name, "coding.txt");FILE* out = fopen(coding_file_name, "wb");if (P == NULL)printf("文件打开失败\n");int ch;while ((ch = fgetc(P)) != EOF){LIST tmp = init_LIST(ch, -1, 1);if (ch > 128)tmp->low = fgetc(P);// 查找for (left = 0; left < right; left++){if (a[left]->high == tmp->high){if (tmp->high > 128 && tmp->low == a[left]->low){fprintf(out, "%s", a[left]->code);sum_bit_coding += strlen(a[left]->code);}if (tmp->high <= 128){fprintf(out, "%s", a[left]->code);sum_bit_coding += strlen(a[left]->code);}}}free(tmp);}fclose(P);fclose(out);}void update_tree(TREE Tree){TREE a[1000];int left = 0, right = 0;if (!Tree) return;a[right++] = Tree->NEXT;while (left < right){//左if (a[left]->Left){a[right++] = a[left]->Left;strcpy(a[left]->Left->code, a[left]->code);a[left]->Left->code_index = strlen(a[left]->code);a[left]->Left->code[a[left]->Left->code_index++] = '0';}//右if (a[left]->Right){a[right++] = a[left]->Right;strcpy(a[left]->Right->code, a[left]->code);a[left]->Right->code_index = strlen(a[left]->code);a[left]->Right->code[a[left]->Right->code_index++] = '1';}left++;}save_file(a, right, Tree);}TREE tree_node_init(int high, int low, int weight){TREE tmp = (TREE)malloc(sizeof(struct node));tmp->high = high;tmp->low = low;tmp->weight = weight;strcpy(tmp->code, "\0");tmp->code_index = 0;tmp->Right = NULL;tmp->Left = NULL;tmp->NEXT = NULL;return tmp;}TREE build_tree(LIST head){TREE Tree = tree_node_init(head->high, head->low, 0);TREE T = Tree;LIST P = head->NEXT;while (P){T->NEXT = tree_node_init(P->high, P->low, P->weight);T = T->NEXT;//结点数Tree->weight++;P = P->NEXT;}return Tree;}void coding(TREE Tree){while (Tree->weight > 1){TREE t1 = Tree->NEXT;Tree->NEXT = Tree->NEXT->NEXT;TREE t2 = Tree->NEXT;Tree->NEXT = Tree->NEXT->NEXT;//add t1 and t2 to t; 合并树TREE t = tree_node_init(-1, -1, t1->weight + t2->weight);//左树t->Left = t1;//右树t->Right = t2;insert(Tree, t);Tree->weight--;}}void print_huffman_pre(TREE Tree, int flag){//遍历if (!Tree) return;if ((char)Tree->high == '\n'){queue[queue_index++] = Tree;if (flag)printf("\\n weight == %d coding = %s\n", Tree->weight, Tree->code);}else if (Tree->high > 128){queue[queue_index++] = Tree;if (flag){putchar(Tree->high);putchar(Tree->low);printf(" weight == %d coding = %s\n", Tree->weight, Tree->code);}}else if (Tree->high != -1){queue[queue_index++] = Tree;if (flag){putchar(Tree->high);printf(" weight == %d coding = %s\n", Tree->weight, Tree->code);}}print_huffman_pre(Tree->Left, flag);print_huffman_pre(Tree->Right, flag);}LIST find_and_insert(LIST head, LIST tmp){//统计汉字和其它字符的不重复个数if (tmp->low != -1) head->low++;//tem_>low不为1代表其为中文字符else head->high++;LIST P = head;while (P->NEXT){//这个字符相同if (P->NEXT->high == tmp->high){//汉字相同情况if (P->NEXT->low != -1 && tmp->low != -1 && P->NEXT->low == tmp->low){//取出当前结点LIST found = init_LIST(P->NEXT->high, P->NEXT->low, P->NEXT->weight + 1);//删除LIST del = P->NEXT;P->NEXT = P->NEXT->NEXT;del->NEXT = NULL;free(del);return found;}if (P->NEXT->low == -1 && tmp->low == -1){//取出当前结点LIST found = init_LIST(P->NEXT->high, P->NEXT->low, P->NEXT->weight + 1);//删除LIST del = P->NEXT;P->NEXT = P->NEXT->NEXT;del->NEXT = NULL;free(del);return found;}}P = P->NEXT;}return tmp;}void insert(LIST head, LIST tmp){LIST P = head;while (P->NEXT){if (tmp->weight < P->NEXT->weight)break;P = P->NEXT;}//找不到位置if (!P->NEXT){P->NEXT = tmp;return;}//inserttmp->NEXT = P->NEXT;P->NEXT = tmp;}void output(LIST head){LIST P = head->NEXT;while (P){if ((char)P->high == '\n'){printf("字符 \\n 个数是%d\t占用字节为%d\t总字节为%d\n", P->weight, P->low == -1 ? 1 : 2, P->weight * (P->low == -1 ? 1 : 2));P = P->NEXT;continue;}printf("字符 ");putchar(P->high);if (P->high > 128)putchar(P->low);printf(" 个数是%d\t占用字节为%d\t总字节为%d\n", P->weight, P->low == -1 ? 1 : 2, P->weight * (P->low == -1 ? 1 : 2));P = P->NEXT;}printf("总字节数为%d\n", head->high + head->low * 2);}LIST init_LIST(int high, int low, int weight){LIST tmp = (LIST)malloc(sizeof(struct node));tmp->high = high;tmp->low = low;tmp->weight = weight;tmp->NEXT = NULL;return tmp;}

本内容不代表本网观点和政治立场，如有侵犯你的权益请联系我们处理。

网友评论

网友评论仅供其表达个人看法，并不表明网站立场。