这个C程序要解决的问题是,它应该计算HTML文件(使用输入重定向输入)中的标签数量,然后显示所有唯一的标签沿着计数。对于标签的定义有一个限制性的定义:假设一个HTML标签在'〈'之后开始,以空格或'〉'结束,标签也只能是字母数字。例如,<h1>
和<span
是HTML标签,而<!--
和</p>
不是。程序进一步从命令行接受用户的输入。如果用户输入-a,则标签将按字母顺序打印。如果-n,则它们根据计数以降序打印,如果没有输入,则标签仅以它们与计数一起存储的顺序打印。其他输入类似。
我的代码面临的问题是,有时候,同一个标签会打印两次,比如span:1 span:1,当它应该是span:2而不是单独打印它的出现。其他标签打印得很好。我找不到为什么一些标签以这种方式打印/计数的特定顺序。我的另一个问题是我的程序也打印了一个特殊的p或P形式的标签(用某种符号)看起来不对(见错误输出)。我的最后一个问题是我的方法有一些问题,按降序排序,因为它没有正确排序,当我试图输入不同的HTML文件时,它也会导致分割错误。
所需输出(无排序):
HTML Tags Found:
body: 1
div: 1
p: 2
b: 2
span: 2
故障输出(无排序和排序):
$ ./countTags < A1.html
HTML Tags Found:
P/3: 307
body: 1
div: 1
p: 2
b: 2
span: 1
span: 1
$ ./countTags -a < A1.html
HTML Tags Found:
Pz▒▒: 509
b: 2
body: 1
div: 1
p: 2
span: 1
span: 1
$ ./countTags -n < A1.html
HTML Tags Found:
P: 713
我的代码:
#include "list.h"
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#define MAX_TAG_LEN 10
void insertNode(Node * head_ref, char newData[MAX_TAG_LEN])
{
Node * new_node = (Node *) malloc(sizeof(Node));
// Used in step 5
Node * last = head_ref;
// 2. Put in the data
strcpy(new_node->data, newData); //new_node->data = newData;
new_node->count = 1;
// 3. This new node is going to be the
// last node, so make next of it as NULL
new_node->next = NULL;
// 4. If the Linked List is empty, then make
// the new node as head
if (head_ref == NULL)
{
head_ref->next = new_node;
return;
}
// 5. Else traverse till the last node
while (last->next != NULL)
last = last->next;
// 6. Change the next of last node
last->next = new_node;
return;
}
void sortLinkedListAlphabetically(Node** head) {
Node* sortedList = NULL;
Node* unsortedList = *head;
while (unsortedList != NULL) {
Node* nodeToInsert = unsortedList;
unsortedList = unsortedList->next;
// Find the correct position to insert the node in the sorted list
if (sortedList == NULL || strcmp(nodeToInsert->data, sortedList->data) < 0) {
// Insert the node at the beginning of the sorted list
nodeToInsert->next = sortedList;
sortedList = nodeToInsert;
} else {
// Traverse the sorted list to find the correct position to insert the node
Node* current = sortedList;
while (current->next != NULL && strcmp(nodeToInsert->data, current->next->data) >= 0) {
current = current->next;
}
// Insert the node before the node at the current position
nodeToInsert->next = current->next;
current->next = nodeToInsert;
}
}
// Update the head of the original linked list to point to the head of the sorted list
*head = sortedList;
}
void sortListDescending(Node** head) {
Node* current = *head;
Node* prev = NULL;
Node* next = NULL;
int swapped = 1;
if (*head == NULL) {
return;
}
while (swapped) {
swapped = 0;
prev = NULL;
current = *head;
while (current->next != NULL) {
if (current->data < current->next->data) {
next = current->next;
current->next = next->next;
next->next = current;
if (prev != NULL) {
prev->next = next;
}
prev = next;
swapped = 1;
}
else {
prev = current;
current = current->next;
}
}
}
}
void printLinkedList(Node* head) {
Node* current = head;
while (current != NULL) {
printf("%s: %d\n", current->data, current->count);
current = current->next;
}
printf("\n");
}
void countTags(char input[])
{
Node * head = NULL;
head = (Node *)malloc(sizeof(Node));
int maxTags = 0;
int c;
int within_tag = 0;
char tagName[MAX_TAG_LEN];
int tagNameLen = 0;
while((c = getchar()) != EOF)
{
if(c == '<')
{
within_tag = 1;
tagNameLen = 0;
}
else if(c == '>' || c == ' ')
{
within_tag = 0;
tagName[tagNameLen] = '\0';
if(c == '>')
{ Node *last = head;
int found = 0;
//printf("%s ~", tagName);
//putchar('\n');
while(last->next != NULL)
{ //printf("%s *", last->data);
//putchar('\n');
if(strcmp(last->data, tagName) == 0)
{ //printf("%s - %s*",last->data, tagName);
//putchar('\n');
found = 1;
last->count++;
}
last = last->next;
}
if(found == 0)
{
int k;
int alpha1 = 1;
//printf("%s -", tagName);
//putchar('\n');
for(k=0; k<tagNameLen; k++)
{
if(isalnum(tagName[k]) == 0)
{
alpha1 = 0;
}
}
if(alpha1 == 1)
{
insertNode(head, tagName);
}
}
}
}
else if(within_tag)
{
if(tagNameLen < MAX_TAG_LEN )
{
tagName[tagNameLen] = c;
tagNameLen++;
}
}
}
if((strcmp(input, "") == 0))
{ printf("HTML Tags Found:\n");
printLinkedList(head);
}
else if((strcmp(input, "-a") == 0) || (strcmp(input, "-a-n") == 0) || (strcmp(input, "-n-a") == 0))
{
sortLinkedListAlphabetically(&(head->next));
printf("HTML Tags Found:\n");
printLinkedList(head);
}
else if((strcmp(input, "-n") == 0))
{
sortListDescending(&head);
printf("HTML Tags Found:\n");
printLinkedList(head);
}
}
int main(int argc, char *argv[])
{
char input1[3], input2[3];
if(argc == 2)
{
strcpy(input1, argv[1]);
countTags(input1);
}
else if(argc>2)
{
strcpy(input1, argv[1]);
strcpy(input2, argv[2]);
char* input3;
input3 = malloc(strlen(input1)+1+2);
strcpy(input3, input1);
strcat(input3, input2);
countTags(input3);
}
else
{
countTags("");
}
}
结构的头文件:
#ifndef LIST_H
#define LIST_H
#define MAX_TAG_LEN 10
typedef struct listNode
{
char data[MAX_TAG_LEN];
int count;
struct listNode * next;
} Node;
#endif
通过输入重定向输入的HTML文件:
<body lang=EN-CA link=blue vlink="#954F72">
<div class=WordSection1>
<p class=MsoNormal><b><span lang=EN-US style='font-size:14.0pt;font-family: "Times New Roman",serif'>Sample</span></b></p>
<p class=MsoNormal><b><span lang=EN-US style='font-size:14.0pt;font-family: "Times New Roman",serif'>Problem 0</span></b></p>
1条答案
按热度按时间vbkedwbf1#
这里有两个bug:
1.未初始化的
head
节点给你
你创建了一个
head
节点,但没有初始化它。这可以解释为什么输出中的第一个标记是“奇怪的”。也就是说,当您以后使用这些未初始化的值时,您的程序可能会崩溃。
在找到第一个标记之前,您可能不应该创建
head
节点。2.不比较最后一个节点的tag-name
要检查标签是否已在列表中,请执行以下操作:
由于
while(last->next != NULL)
,你永远不会到达最后一个节点的字符串比较。这就是span
在输出中出现两次的原因。请尝试使用
while(last != NULL)
。