C中链表程序计数和排序的问题

ryoqjall  于 2023-04-19  发布在  其他
关注(0)|答案(1)|浏览(93)

这个C程序要解决的问题是,它应该计算HTML文件(使用输入重定向输入)中的标签数量,然后显示所有唯一的标签沿着计数。对于标签的定义有一个限制性的定义:假设一个HTML标签在'〈'之后开始,以空格或'〉'结束,标签也只能是字母数字。例如,<h1><span是HTML标签,而<!--</p>不是。程序进一步从命令行接受用户的输入。如果用户输入-a,则标签将按字母顺序打印。如果-n,则它们根据计数以降序打印,如果没有输入,则标签仅以它们与计数一起存储的顺序打印。其他输入类似。
我的代码面临的问题是,有时候,同一个标签会打印两次,比如span:1 span:1,当它应该是span:2而不是单独打印它的出现。其他标签打印得很好。我找不到为什么一些标签以这种方式打印/计数的特定顺序。我的另一个问题是我的程序也打印了一个特殊的p或P形式的标签(用某种符号)看起来不对(见错误输出)。我的最后一个问题是我的方法有一些问题,按降序排序,因为它没有正确排序,当我试图输入不同的HTML文件时,它也会导致分割错误。
所需输出(无排序):

HTML Tags Found:
body: 1
div: 1
p: 2
b: 2
span: 2

故障输出(无排序和排序):

$ ./countTags < A1.html
HTML Tags Found:
P/3: 307
body: 1
div: 1
p: 2
b: 2
span: 1
span: 1

$ ./countTags -a < A1.html
HTML Tags Found:
Pz▒▒: 509
b: 2
body: 1
div: 1
p: 2
span: 1
span: 1

$ ./countTags -n < A1.html
HTML Tags Found:
P: 713

我的代码:

#include "list.h"
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>

#define MAX_TAG_LEN 10

void insertNode(Node * head_ref, char newData[MAX_TAG_LEN])
{
    Node * new_node = (Node *) malloc(sizeof(Node));
 
    // Used in step 5
    Node * last = head_ref; 
  
    // 2. Put in the data 
    strcpy(new_node->data, newData); //new_node->data  = newData;
    new_node->count = 1;
 
    // 3. This new node is going to be the
    //    last node, so make next of it as NULL
    new_node->next = NULL;
 
    // 4. If the Linked List is empty, then make
    // the new node as head
    if (head_ref == NULL)
    {
       head_ref->next = new_node;
       return;
    } 
      
    // 5. Else traverse till the last node
    while (last->next != NULL)
        last = last->next;
  
    // 6. Change the next of last node
    last->next = new_node;
    return;   
}

void sortLinkedListAlphabetically(Node** head) {
  Node* sortedList = NULL;
  Node* unsortedList = *head;

  while (unsortedList != NULL) {
    Node* nodeToInsert = unsortedList;
    unsortedList = unsortedList->next;

    // Find the correct position to insert the node in the sorted list
    if (sortedList == NULL || strcmp(nodeToInsert->data, sortedList->data) < 0) {
      // Insert the node at the beginning of the sorted list
      nodeToInsert->next = sortedList;
      sortedList = nodeToInsert;
    } else {
      // Traverse the sorted list to find the correct position to insert the node
      Node* current = sortedList;
      while (current->next != NULL && strcmp(nodeToInsert->data, current->next->data) >= 0) {
        current = current->next;
      }

      // Insert the node before the node at the current position
      nodeToInsert->next = current->next;
      current->next = nodeToInsert;
    }
  }

  // Update the head of the original linked list to point to the head of the sorted list
  *head = sortedList;
}

void sortListDescending(Node** head) {
    Node* current = *head;
    Node* prev = NULL;
    Node* next = NULL;
    int swapped = 1;
 
    if (*head == NULL) {
        return;
    }
 
    while (swapped) {
        swapped = 0;
        prev = NULL;
        current = *head;
 
        while (current->next != NULL) {
            if (current->data < current->next->data) {
                next = current->next;
                current->next = next->next;
                next->next = current;
 
                if (prev != NULL) {
                    prev->next = next;
                }
                prev = next;
                swapped = 1;
            }
            else {
                prev = current;
                current = current->next;
            }
        }
    }
}

void printLinkedList(Node* head) {
  Node* current = head;

  while (current != NULL) {
    printf("%s: %d\n", current->data, current->count);
    current = current->next;
  }

  printf("\n");
}

void countTags(char input[])
{   
    Node * head = NULL;
    head = (Node *)malloc(sizeof(Node));

    int maxTags = 0;
    int c;
    int within_tag = 0;
    char tagName[MAX_TAG_LEN];
    int tagNameLen = 0;

    while((c = getchar()) != EOF)
    {   
        if(c == '<')
        {   
            within_tag = 1;
            tagNameLen = 0;
        }
        else if(c == '>' || c == ' ')
        {   
            within_tag = 0;
            tagName[tagNameLen] = '\0';
            
                if(c == '>')
                {   Node *last = head; 
                    int found = 0;
                    //printf("%s ~", tagName);
                    //putchar('\n');
                    while(last->next != NULL)
                    {   //printf("%s *", last->data);
                        //putchar('\n');
                        if(strcmp(last->data, tagName) == 0)
                        {   //printf("%s - %s*",last->data, tagName);
                            //putchar('\n');
                            found = 1;
                            last->count++;
                        }
                        
                        last = last->next;
                    }

                    if(found == 0)
                    {   
                        int k;
                        int alpha1 = 1;
                       
                            //printf("%s -", tagName);
                            //putchar('\n');
                            for(k=0; k<tagNameLen; k++)
                            {   
                                if(isalnum(tagName[k]) == 0)
                                {  
                                    alpha1 = 0;
                                }
                                
                            }
                            if(alpha1 == 1)
                            {   
                                insertNode(head, tagName);
                            }
                        
                    }
                }

        }
        else if(within_tag)
        {  
            if(tagNameLen < MAX_TAG_LEN )
            {   
                tagName[tagNameLen] = c;
                tagNameLen++;
            }
        }
    }
    

    if((strcmp(input, "") == 0))
    {   printf("HTML Tags Found:\n");
        printLinkedList(head);
    }
    else if((strcmp(input, "-a") == 0) || (strcmp(input, "-a-n") == 0) || (strcmp(input, "-n-a") == 0))
    {
        sortLinkedListAlphabetically(&(head->next));
        printf("HTML Tags Found:\n");
        printLinkedList(head);

    }
    else if((strcmp(input, "-n") == 0))
    {  
        sortListDescending(&head);
        printf("HTML Tags Found:\n");
        printLinkedList(head);
    }

}

int main(int argc, char *argv[])
{   
    char input1[3], input2[3];

    if(argc == 2)
    {   
        strcpy(input1, argv[1]);
        countTags(input1);
    }
    else if(argc>2)
    {
        strcpy(input1, argv[1]);
        strcpy(input2, argv[2]);
        
        char* input3;
        input3 = malloc(strlen(input1)+1+2); 
        strcpy(input3, input1); 
        strcat(input3, input2);
      
        countTags(input3);                                                                                          
    }
    else
    {
        countTags("");
    }

}

结构的头文件:

#ifndef LIST_H
#define LIST_H

#define MAX_TAG_LEN 10

typedef struct listNode
{
    char data[MAX_TAG_LEN];
    int count;
    struct listNode * next;
} Node;

#endif

通过输入重定向输入的HTML文件:

<body lang=EN-CA link=blue vlink="#954F72">
<div class=WordSection1>
<p class=MsoNormal><b><span lang=EN-US style='font-size:14.0pt;font-family: "Times New Roman",serif'>Sample</span></b></p>
<p class=MsoNormal><b><span lang=EN-US style='font-size:14.0pt;font-family: "Times New Roman",serif'>Problem 0</span></b></p>
vbkedwbf

vbkedwbf1#

这里有两个bug:

1.未初始化的head节点

给你

Node * head = NULL;
head = (Node *)malloc(sizeof(Node));

你创建了一个head节点,但没有初始化它。这可以解释为什么输出中的第一个标记是“奇怪的”。
也就是说,当您以后使用这些未初始化的值时,您的程序可能会崩溃。
在找到第一个标记之前,您可能不应该创建head节点。

2.不比较最后一个节点的tag-name

要检查标签是否已在列表中,请执行以下操作:

while(last->next != NULL)
                {
                    if(strcmp(last->data, tagName) == 0)
                    {
                        found = 1;
                        last->count++;
                    }
                    
                    last = last->next;
                }

由于while(last->next != NULL),你永远不会到达最后一个节点的字符串比较。这就是span在输出中出现两次的原因。
请尝试使用while(last != NULL)

相关问题