我正试图为patricia/radix树的大学应用程序编写代码,该树将从txt文件中的每行读取的单词插入树中,因此如果我读取一个文件,该文件说
roman
romance
romantic
其建议被实现为
(root)
|
roman$
/ \
ce$ tic$
我还添加了检查点,这样我就可以测试程序在死亡之前能走多远,所以我的结构被设置为这样:
typedef struct no no; // declaration of the struct no
typedef struct no {
char *str; // pointer to store a string
struct no *child[26]; // Array of pointers to all possible children of the tree
int isword; // Flag to know if its a full word
int isroot; // Flag to know if its the root of the tree
} tree;
我的insertintree声明:
no* InsertWord(no *r, char *str){
printf("checkpoint 5 %s\n", str);
//caso 1, we are at the root
if(r->isroot){
r->child[r->str[0] - 'a'] = InsertWord(r->str[r->str[0] - 'a'], str);
return r;
}
//caso 2, found an empty node
if(r->str == NULL){
strcpy(r->str, str);
r->isword = 1;
printf("word %s inserted\n", str);
return r;
}
//caso 3, word is a perfect fit for the prefix
if(strcmp(r->str, str)){
r->isword = 1;
printf("word %s inserted\n", str);
return r;
}
int i;
for(i = 0; str[i] == r->str[i]; i++);
//caso 4, the prefix fits the first part of the word, but there is still more to be inserted
if(i == strlen(r->str)){
r->child[str[i] - 'a'] = InsertWord(r->child[str[i] - 'a'], str + i);
return r;
}
//caso 5, the word fits the prefix, but there is still more prefix
if(i == strlen(str)){
char *aux1;
char *aux2;
strncpy(aux1, r->str, i);
strcpy(aux2, r->str + i);
no *aux = r;
for(int j = 0; j < 26; j++){
free(r->filhos[j]);
}
r->child[aux2[0] - 'a'] = aux;
strcpy(r->str, aux1);
strcpy(r->child[aux2[0] - 'a'], aux2);
return r;
}
//caso 6, the first part of the word fits with the first part of the preix, but then they differ
if(i < strlen(str) && i < strlen(r->str)){
char *aux1;
char *aux2;
strncpy(aux1, r->str, i);
strcpy(aux2, r->str + i);
no *aux = r;
for(int j = 0; j < 26; j++){
free(r->child[j]);
}
r->child[aux2[0] - 'a'] = aux;
strcpy(r->str, aux1);
strcpy(r->child[aux2[0] - 'a'], aux2);
r->child[str[i] - 'a'] = InsertWord(r->child[str[i] - 'a'], str + i);
return r;
}
}
我现在的输出看起来像这样
checkpoint 1
checkpoint 2
checkpoint 3 roman
checkpoint 4 roman
checkpoint 5
checkpoint 5
然后程序死亡,检查点1到4都与读取文件和验证字的函数相关,它们似乎都运行良好
我试着添加一个动态分配到程序,似乎没有帮助的问题,我知道我应该提供一个最小的可复制的例子,但我不知道问题在哪里(代码不是原来在英语我尽力提供翻译,但如果任何部分仍然是葡萄牙语,我没有注意到我很抱歉)
由于它是我的要求,我将添加其余的函数,调用这个函数,它有点长,但在这里你去
no *ValidateWord(no *r, char *str) {
int i;
printf("checkpoint 3 %s\n", str);
for (i = 0; str[i] != '\0'; i++) {
if (str[i] < 'A' || (str[i] > 'Z' && str[i] < 'a') || str[i] > 'z') {
return r;
} else if (str[i] >= 'A' && str[i] <= 'Z') {
str[i] = str[i] + 32;
}
}
printf("checkpoint 4 %s\n", str);
return InsertWord(r, str);
}
no *LoadFile(no *r) {
printf("checkpoint 1");
printf("Insert the name of the chosen file: ");
char namefile[100];
scanf("%s", namefile);
FILE *fp1;
fp1 = fopen(namefile, "r");
if (fp1 == NULL) {
printf("couldnt open the file.\n");
return r;
}
char str[100];
while (fgets(str, sizeof(str), fp1)) {
printf("checkpoint 2 %s\n", str);
str[strcspn(str, "\n")] = '\0';
r = ValidateWord(r, str);
}
fclose(fp1);
return r;
}
这是调用所有这些的主文件
void menu(no* r) {
int c = 0;
printf("choose an operation:\n");
printf("Load File - 1\nCheck Words - 2\nPrint Dictionary - 3\nLoad Stopwords File - 4\nExit - 0\n\n");
scanf("%d", &c);
while (c != 0) {
if (c == 1)
r = LoadFile(r); // Calls the LoadFile function
else if (c == 2)
CheckWords(r); // calls the function CheckWords
else if (c == 3)
PrintDictionary(r); // Calls the fucntion PrintDictionary
else if (c == 4)
r = LoadStopwordsFile(r); // calls the LoadStopwordsFile function
printf("choose an operation:\n");
printf("Load File - 1\nCheck Words - 2\nPrint Dictionary - 3\nLoad Stopwords File - 4\nExit - 0\n\n");
scanf("%d", &c);
}
}
int main() {
no* r = (no*) malloc(sizeof(no)); // alocates memory for the root
if (r == NULL) {
printf("Error alocating memory.\n");
return 1; // closes the aplication
}
r->isroot = 1; // defines the isroot flag to true
menu(&r); // calls the menu function
free(r); // frees the memory alocated for the root
return 0;
}
1条答案
按热度按时间nuypyhwy1#
Patrica树是隐式二进制基数树的一种特殊情况,它比这里的树更紧凑(当然是学究式的)。
我不确定你是否需要
isroot
;是做什么用的?空树?isword
是$
的带外符号;你不需要(或想要)两个符号来表示同一件事。(对于以null结尾的字符串,自然的trie条目是使用'\0'
作为结尾后缀$
。Morin, Patrick. "Data Structures for Strings"
作为一种优化,边缘标签可以通过使用两个指向字符串的指针(用于第一个和最后一个元素)以恒定大小存储
另一种选择是不复制字符串,而只是指向子字符串,而不是复制和操作字符串。
编辑:在您跟踪的Java code in the tutorial中,它使用带外
isword
并复制子字符串。这看起来像是在分配节点,而不是字符串的副本,Java会自动处理这些副本。我认为以下是一个很好的类比:子串存储在
struct
末尾的flexible array member中;这样,节点和串的副本处于连续的存储器分配中,这减少了碎片和对malloc
的调用。