哈夫曼编/译码器
- 建立哈夫曼树:读入文件(*.source),统计文件中字符出现的频度,并以这些字符的频度作为权值,建立哈夫曼树。
- 编码:利用已建立好的哈夫曼树,获得各个字符的哈夫曼编码,并对正文进行编码,然后输出编码结果,并存入文件(*.code)中。
- 译码:利用已建立好的哈夫曼树将文件(.code)中的代码进行译码,并输出译码结果,并存入文件(.decode)中。
- 以下代码可以实现对大部分中文和英文的编码和译码
代码如下:
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#define N 100
#define M 2 * N - 1
typedef struct
{
char ch[3];
int weight;
int Parent, Lchild, Rchild;
}HTNode, HuffmanTree[M + 1];
typedef struct
{
char ch[3];
int WEI;
}weighting;
typedef struct
{
char* s;
int len;
}HString;
typedef char* HuffmanCode[N];
void read(FILE* fp, char str[]);
weighting* getweight(char str[], weighting w[]);
void Printvalue(weighting w[]);
void CrtHuffmanTree(HuffmanTree ht, weighting w[], int n);
void select(HuffmanTree ht, int k, int* s1, int* s2);
void PrintTree(HuffmanTree ht, int n);
void CrtHuffmanCode(HuffmanTree ht, HuffmanCode hc, int n);
void PrintCode(HuffmanCode hc, int n);
HString* Coding(char str[], weighting w[], HuffmanCode hc);
void Decoding(char code[], HuffmanTree ht, int n);
int main()
{
HuffmanTree H;
HuffmanCode T;
weighting WEI[10000];
HString* STR;
FILE* fp = NULL;
char str[10000];
char code[10000];
weighting* W;
printf("请输入获取电文的文件:");
read(fp, str);
W = getweight(str, WEI);
int len = W[0].WEI;
CrtHuffmanTree(H, W, len);
CrtHuffmanCode(H, T, len);
PrintCode(T, len);
STR = Coding(str, W, T);
printf("请输入获取编码的文件:");
read(fp, code);
Decoding(code, H, 2 * len - 1);
return 0;
}
void read(FILE* fp, char str[])
{
char filename[40];
gets_s(filename);
fp = fopen(filename, "r");
if (fp == NULL)
{
printf("\nERROR!\n");
exit(0);
}
int i = 0;
char c;
c = fgetc(fp);
if (c == EOF)
{
printf("文件为空!!!");
exit(0);
}
str[0] = c;
i++;
while (1)
{
c = fgetc(fp);
if (feof(fp))
{
str[i] = '\0';
break;
}
str[i] = c;
i++;
}
printf("文件中读出内容为:\n");
puts(str);
fclose(fp);
}
void CrtHuffmanTree(HuffmanTree ht, weighting w[], int n)
{
int m;
m = 2 * n - 1;
int i;
for (i = 1; i <= n; i++)
{
strcpy(ht[i].ch, w[i].ch);
ht[i].weight = w[i].WEI;
ht[i].Rchild = 0;
ht[i].Lchild = 0;
ht[i].Parent = 0;
}
for (i = n + 1; i <= m; i++)
{
ht[i].ch[0] = NULL;
ht[i].weight = 0;
ht[i].Rchild = 0;
ht[i].Lchild = 0;
ht[i].Parent = 0;
}
for (i = n + 1; i <= m; i++)
{
int s1, s2;
select(ht, i - 1, &s1, &s2);
ht[i].weight = ht[s1].weight + ht[s2].weight;
ht[i].Lchild = s1;
ht[i].Rchild = s2;
ht[s1].Parent = i;
ht[s2].Parent = i;
}
int len = w[0].WEI;
PrintTree(ht, 2 * len - 1);
}
void select(HuffmanTree ht, int k, int* s1, int* s2)
{
int i;
int j = 0;
int min1 = 10000;
int min2 = 10000;
for (i = 1; i <= k; i++)
{
if (ht[i].weight <= min1 && ht[i].Parent == 0)
{
*s1 = i;
j = i;
min1 = ht[i].weight;
}
}
for (i = 1; i <= k; i++)
{
if (i == j) continue;
else
{
if (ht[i].weight <= min2 && ht[i].Parent == 0)
{
*s2 = i;
min2 = ht[i].weight;
}
}
}
}
void CrtHuffmanCode(HuffmanTree ht, HuffmanCode hc, int n)
{
char* cd;
int i;
int c;
int p;
int start;
cd = (char*)malloc(n * sizeof(char));
cd[n - 1] = '\0';
for (i = 1; i <= n; i++)
{
start = n - 1;
c = i;
p = ht[i].Parent;
while (p != 0)
{
start--;
if (ht[p].Rchild == c)
{
cd[start] = '1';
}
else
{
cd[start] = '0';
}
c = p;
p = ht[p].Parent;
}
hc[i] = (char*)malloc((n - start) * sizeof(char));
strcpy(hc[i], &cd[start]);
}
free(cd);
}
void PrintTree(HuffmanTree ht, int n)
{
int i;
printf("\nPrinthuffmantree:\n");
printf("字符\tweight\tParent\tLchild\tRchild\n");
for (i = 1; i <= n; i++)
{
if (ht[i].ch[0] == '\n')
printf("\\n\t%d\t%d\t%d\t%d\n", ht[i].weight, ht[i].Parent, ht[i].Lchild, ht[i].Rchild);
else if (ht[i].ch[0] == NULL)
printf("NULL\t%d\t%d\t%d\t%d\n", ht[i].weight, ht[i].Parent, ht[i].Lchild, ht[i].Rchild);
else if (ht[i].ch[0] == ' ')
printf("' '\t%d\t%d\t%d\t%d\n", ht[i].weight, ht[i].Parent, ht[i].Lchild, ht[i].Rchild);
else if (ht[i].ch[2] == '\0')
printf("%s\t%d\t%d\t%d\t%d\n", ht[i].ch, ht[i].weight, ht[i].Parent, ht[i].Lchild, ht[i].Rchild);
else
printf("%c\t%d\t%d\t%d\t%d\n", ht[i].ch[0], ht[i].weight, ht[i].Parent, ht[i].Lchild, ht[i].Rchild);
}
printf("\n\n");
}
void PrintCode(HuffmanCode hc, int n)
{
int i;
printf("\nPrintcode:\n");
for (i = 1; i <= n; i++)
{
puts(hc[i]);
}
printf("\n\n");
}
weighting* getweight(char str[], weighting* w)
{
int i = 0;
int j = 0;
int k;
int m;
char s[3];
int len = strlen(str);
for (i = 0, k = 1; i < len; i++, k++)
{
if ((str[i] < 65 || (str[i] > 90 && str[i] < 97) || str[i] > 122) && str[i] != ' ' && str[i] != '\n' && str[i] != '\0')
{
s[0] = str[i];
s[1] = str[i + 1];
s[2] = '\0';
i++;
strcpy(w[k].ch, s);
w[k].WEI = 1;
}
else
{
w[k].ch[0] = str[i];
w[k].WEI = 1;
}
}
char str1[3];
int chinese = 0;
for (i = 0, k = 1; i < len; i++, k++)
{
if ((str[i] < 65 || (str[i] > 90 && str[i] < 97) || str[i] > 122) && str[i] != ' ' && str[i] != '\n' && str[i] != '\0')
{
str1[0] = str[i];
str1[1] = str[i + 1];
str1[2] = '\0';
strcpy(w[k].ch, str1);
i++;
chinese++;
}
else
{
w[k].ch[0] = str[i];
}
for (j = i + 1; j <= len; j++)
{
if ((str[j] < 65 || (str[j] > 90 && str[j] < 97) || str[j] > 122) && str[j] != ' ' && str[j] != '\n' && str[j] != '\0')
{
s[0] = str[j];
s[1] = str[j + 1];
s[2] = '\0';
j++;
if (strcmp(str1, s) == 0)
{
w[k].WEI++;
}
}
else
{
if (str[i] == str[j])
{
w[k].WEI++;
}
}
}
}
for (i = 1; i <= k; i++)
{
for (j = i + 1; j <= k; j++)
{
if (w[i].ch[2] == '\0')
{
if (strcmp(w[i].ch, w[j].ch) == 0)
{
w[j].WEI = 0;
}
}
else
{
if (w[i].ch[0] == w[j].ch[0])
{
w[j].WEI = 0;
}
}
}
}
k = 1;
weighting wnew[10000];
for (i = 1; i <= len - chinese; i++)
{
if (w[i].WEI == 0) continue;
else
{
wnew[k] = w[i];
k++;
}
}
wnew[0].WEI = k - 1;
w = wnew;
Printvalue(w);
return &w[0];
}
void Printvalue(weighting w[])
{
int i;
int len = w[0].WEI;
printf("\nPrintvalue:\n");
printf("字符\tweight\n");
for (i = 1; i <= len; i++)
{
if (w[i].ch[0] == '\n')
printf("\\n\t%d\n", w[i].WEI);
else if (w[i].ch[0] == ' ')
printf("' '\t%d\n", w[i].WEI);
else
{
if (w[i].ch[2] == '\0')
{
printf("%s\t%d\n", w[i].ch, w[i].WEI);
}
else
{
printf("%c\t%d\n", w[i].ch[0], w[i].WEI);
}
}
}
printf("\n\n");
}
HString* Coding(char str[], weighting w[], HuffmanCode hc)
{
HString HS[1000];
HS->len = 0;
int i;
FILE* fp;
char filename[40];
int len = strlen(str);
int length = w[0].WEI;
int j;
char s[3];
int k = 0;
char ch;
int flag;
for (i = 0; i < len; i++)
{
if ((str[i] < 65 || (str[i] > 90 && str[i] < 97) || str[i] > 122) && str[i] != ' ' && str[i] != '\n' && str[i] != '\0')
{
s[0] = str[i];
s[1] = str[i + 1];
s[2] = '\0';
i++;
flag = 1;
}
else
{
flag = 0;
}
for (j = 1; j <= length; j++)
{
if (strcmp(s, w[j].ch) == 0 && flag == 1)
{
HS[k].s = hc[j];
k++;
HS->len++;
break;
}
if (w[j].ch[0] == str[i] && flag == 0)
{
HS[k].s = hc[j];
k++;
HS->len++;
break;
}
}
}
printf("编码结果:");
for (i = 0; i < HS->len; i++)
{
printf("%s", HS[i].s);
}
printf("\n");
printf("请输入要保存编码的文件:");
gets_s(filename);
printf("\n");
fp = fopen(filename, "w");
if (fp == NULL)
{
printf("\nERROR!\n");
exit(0);
}
for (i = 0; i < HS->len; i++)
{
fprintf(fp,"%s",HS[i].s);
}
fclose(fp);
return HS;
}
void Decoding(char code[], HuffmanTree ht, int n)
{
printf("译码结果将保存至Decode.txt文件\n");
FILE* fp;
fp = fopen("Decode.txt", "w");
int i;
int j;
int start = n;
int len = strlen(code);
for (i = 0; i < len; i++)
{
if (code[i] == '1')
{
for (j = start; j > 0; j++)
{
if (ht[j].Rchild != 0)
{
start = ht[j].Rchild;
break;
}
}
if (ht[start].Lchild == 0)
{
if (ht[start].ch[2] == '\0')
{
printf("%s", ht[start].ch);
fprintf(fp, "%s", ht[start].ch);
start = n;
}
else
{
printf("%c", ht[start].ch[0]);
fprintf(fp, "%c", ht[start].ch[0]);
start = n;
}
}
}
if (code[i] == '0')
{
for (j = start; j > 0; j++)
{
if (ht[j].Lchild != 0)
{
start = ht[j].Lchild;
break;
}
}
if (ht[start].Lchild == 0)
{
if (ht[start].ch[2] == '\0')
{
printf("%s", ht[start].ch);
fprintf(fp, "%s", ht[start].ch);
start = n;
}
else
{
printf("%c", ht[start].ch[0]);
fprintf(fp, "%c", ht[start].ch[0]);
start = n;
}
}
}
}
fclose(fp);
}