需要帮助从c文件中提取注解

plicqrtu 于 2023-04-11 发布在其他

关注(0)|答案(3)|浏览(126)

我只是需要如何提取输入文本文件的注解，并粘贴在输出文件中使用C语言在Unix命令行的帮助。我不需要的代码。只是给予我指导我请赞成。这里是我想要的。

输入：

If the input file input_0.txt contains 
/* This is a single-line C comment */ 
#include <stdio.h>
/******  
* This is a nicely formatted  
* multi-line comment.  
******/ 
int main(int argc, char **argv) 
{   
  // This is a C++ comment. 
}

输出：

Then the execution of the program would be as follows. 
$ ./Comments < input_0.txt 
This is a single-line C comment 
This is a nicely formatted 
multi-line comment. 
This is a C++ comment.

这是我的代码，我修改了尊敬的@大卫C.兰金的代码。

#include <stdio.h>
#include <string.h>
#include <ctype.h>

#define MAXC 1024

int main (int argc, char **argv) {

/* Variables for removing comments*/
int ch, i = 0, flag = 0, prev = '\0';
    FILE *fp1, *fp2;
    char fname[MAX], temp[] = "temp.txt";
/* Variables for removing comments*/    

int inmulti = 0,
    insingle = 0,
    longline = 0;
char buf[MAXC] = "";

 FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;

/* validate file open for reading */

if (!fp) {  
    fprintf (stderr, "error: file open failed '%s'.\n", argv[1]);
    return 1;
}

/* open the temporary file in write mode */
    fp2 = fopen(temp, "w");

    /* error handling */
    if (!fp2) {
            printf("Unable to open temporary file!!\n");
            return 0;
    }

while (fgets (buf, MAXC, fp)) {     /* read upto MAXC into buf */
    char *p = buf;                  /* pointer to buf */
    size_t len = strlen (buf);      /* get length */

    if (longline) {                 /* is this 2nd read of long line? */
        if (insingle) {             /* are we in a single comment? */
            printf ("%s", buf);     /* print it, get next buf */
            continue;
        }
        else                        /* otherwise, reset insingle flag */
            insingle = 0;
    }

    if (inmulti) {                  /* are we in a multi-line comment? */
                        /* (note: you need to check if quoted here) */
        if (strstr (buf, "*/")) {   /* does buf contain ending? */
            inmulti = 0;            /* reset inmulti comment */
        }
        printf ("%s", buf);         /* print the line */
        continue;       /* (note: end can be before end of line) */
    }

    if (len && buf[len-1] != '\n')  /* check if end of line read */
        longline = 1;               /* if not, set longline */
    else
        longline = 0;               /* or, reset it */

    while (*p && *p != '/') p++;    /* find start (or end) of comment */
    if (!*p) continue;              /* none found, get next buf */

    if (*(p + 1) == '/') {          /* start of single line comment */
                        /* note: must make sure not part of path here */
        insingle = 1;               /* set single-line comment flag */
        printf ("%s", buf);         /* print line */
    }                   /* note: can print from p for comment only */
    else if (*(p + 1) == '*') {     /* start of multiline comment */
        if (!strstr (p + 2, "*/")) {    /* check for ending */
            inmulti = 1;            /* set multiline flag */
        }
        printf ("%s", buf);         /* print the line */
    }                   /* note: can print from p for comment only */
    else if (p > buf && *(p - 1) == '*') {  /* was / end of multi? */
        printf ("%s", buf);         /* end of multi line comment */
        inmulti = 0;
    }
}
rewind(fp);
 /* removes comments from the given input file */
    prev = fgetc(fp);
    while ((ch = fgetc(fp)) != EOF) {

            /* flag is 1 - double slash comment */
            if (flag == 1) {
                    /* skip the contents until you detect \n */
                    if (ch == '\n') {
                            flag = 0;
                            prev = fgetc(fp);
                    }
                    continue;
            }

            /* flag is 2 - slash arsterix comment */
            if (flag == 2) {
                    /* skip the contents until you detect asterix slash */
                    if (ch == '/' && prev == '*') {
                            flag = 0;
                            prev = fgetc(fp);
                    }
                    continue;
            }

            /* checking for double slash comment */
            if (ch == '/' && prev == '/') {
                    flag = 1;
            } else if (prev == '/' && ch == '*') {
                    /* slash asterix comment */
                    flag = 2;
            } else {
                    /* contents outside of comments */
                    fputc(prev, fp2);
            }
            prev = ch;
    }

if (fp != stdin) fclose (fp);   /* close file if not stdin */

/* closing the input file */
    fclose(fp);
    fclose(fp2);

return 0;
}

来源：https://stackoverflow.com/questions/47565090/need-help-to-extract-comments-from-c-file

3条答案

按热度按时间

uujelgoq1#

注意，要正确地做到这一点，需要检查更多的条件（例如"//"，"/*"或"*/"作为路径的一部分出现，或者在字符串中出现）。使用正则表达式也是另一种方法。
如果我理解正确，并且您希望使用基本C解析源文件的注解行，那么下面是一个快速阅读文件中所有行的示例（作为第一个参数提供，或在stdin上）并查找单行或多行注解分隔符。
这并不打算是完整的，并涵盖所有的角落情况或情况下，定界符出现在文字，定义等，但一些注意已经采取了注意，额外的代码应添加到解决这些问题。
基本方法是读取MAXC中的一行（1024字节块）并跟踪3个标志。longline表示该行超过MAXC个字符，您已读取第二个（或第三，或第四...）缓冲区已满。inmulti跟踪您是否在多行注解中。最后insingle在您处于单个注解中的位置-可能超过MAXC个字符的行注解。读取循环根据标志的状态进行检查和操作，同时查找多行注解的结尾（如果在一个注解内）。代码还检查多行注解的开始和结束--所有注解都在一行内。
考虑到这些条件，您可以从以下内容开始：

#include <stdio.h>
#include <string.h>
#include <ctype.h>

#define MAXC 1024

int main (int argc, char **argv) {

    int inmulti = 0,
        insingle = 0,
        longline = 0;
    char buf[MAXC] = "";
    FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;

    if (!fp) {  /* validate file open for reading */
        fprintf (stderr, "error: file open failed '%s'.\n", argv[1]);
        return 1;
    }

    while (fgets (buf, MAXC, fp)) {     /* read upto MAXC into buf */
        char *p = buf;                  /* pointer to buf */
        size_t len = strlen (buf);      /* get length */

        if (longline) {                 /* is this 2nd read of long line? */
            if (insingle) {             /* are we in a single comment? */
                printf ("%s", buf);     /* print it, get next buf */
                continue;
            }
            else                        /* otherwise, reset insingle flag */
                insingle = 0;
        }

        if (inmulti) {                  /* are we in a multi-line comment? */
                            /* (note: you need to check if quoted here) */
            if (strstr (buf, "*/")) {   /* does buf contain ending? */
                inmulti = 0;            /* reset inmulti comment */
            }
            printf ("%s", buf);         /* print the line */
            continue;       /* (note: end can be before end of line) */
        }

        if (len && buf[len-1] != '\n')  /* check if end of line read */
            longline = 1;               /* if not, set longline */
        else
            longline = 0;               /* or, reset it */

        while (*p && *p != '/') p++;    /* find start (or end) of comment */
        if (!*p) continue;              /* none found, get next buf */

        if (*(p + 1) == '/') {          /* start of single line comment */
                            /* note: must make sure not part of path here */
            insingle = 1;               /* set single-line comment flag */
            printf ("%s", buf);         /* print line */
        }                   /* note: can print from p for comment only */
        else if (*(p + 1) == '*') {     /* start of multiline comment */
            if (!strstr (p + 2, "*/")) {    /* check for ending */
                inmulti = 1;            /* set multiline flag */
            }
            printf ("%s", buf);         /* print the line */
        }                   /* note: can print from p for comment only */
        else if (p > buf && *(p - 1) == '*') {  /* was / end of multi? */
            printf ("%s", buf);         /* end of multi line comment */
            inmulti = 0;
        }
    }

    if (fp != stdin) fclose (fp);   /* close file if not stdin */

    return 0;
}

输入文件示例

$ cat dat/comments.txt
/* This is a single-line C comment */
#include <stdio.h>
/******
* This is a nicely formatted
* multi-line comment.
******/
int main(int argc, char **argv)
{
// This is a C++ comment.
}

示例使用/输出

$ ./bin/comments <dat/comments.txt
/* This is a single-line C comment */
/******
* This is a nicely formatted
* multi-line comment.
******/
  // This is a C++ comment.

**注意：**这样的练习的价值在于学习值，通过一个长字符串识别某些单个字符，以及在循环通过文件时处理各种标志和程序状态。
逐字阅读

从 * 面向行 * 的方法切换到 * 面向字符 * 的方法（并在chux的注解中添加了几个状态），您将看到第一个字符（保存它），然后读取文件中的剩余字符。这提供了一种比较 previous 和 current 的方法，以确定您是否在 * 单行 * 注解内一个 * 多行 * 注解或 * 单 * 或 * 双 * 引号。
同样，这并不是为了捕捉每一个角落的情况，但输出被更新为不打印开始或结束注解分隔符。（您需要调整多行注解中*的打印和注解中的引号）。
从fgets阅读更改为fgetc，您可以执行类似以下操作：

#include <stdio.h>

int main (int argc, char **argv) {

    int inmulti = 0,    /* in multi-line comment flag */
        insingle = 0,   /* in single-line comment flag */
        insquo = 0,     /* within single-quotes */
        indquo = 0,     /* within double-quotes */
        c, prev = 0;
    FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;

    if (!fp) {  /* validate file open for reading */
        fprintf (stderr, "error: file open failed '%s'.\n", argv[1]);
        return 1;
    }

    if ((prev = fgetc(fp)) == EOF)      /* read 1st char */
        return 1;
    while ((c = fgetc(fp)) != EOF) {    /* read remaining */
        switch (c) {                    /* switch on c */
            case '/':
                if (prev == '/' && !(insquo | indquo))
                    insingle = 1;
                if (prev == '*' && !(insquo | indquo))
                    inmulti = 0;
                break;
            case '*':
                if (prev == '/' && !(insquo | indquo))
                    inmulti = 1;
                break;
            case '\n':
                insingle = 0;
                if (insingle || inmulti)
                    putchar (c);
                break;
            case '\'':
                insquo = insquo ? 0 : 1;
                break;
            case '"':
                indquo = indquo ? 0 : 1;
                break;
            default:
                if ((insingle || inmulti) && !(insquo | indquo))
                    putchar (c);
                break;
        }
        prev = c;
    }
    if (fp != stdin) fclose (fp);   /* close file if not stdin */

    putchar ('\n');     /* tidy up with newline */

    return 0;
}

示例使用/输出

$ ./bin/commentsfgetc <dat/comments.txt
 This is a single-line C comment
 This is a nicely formatted
 multi-line comment.
 This is a C++ comment.

如果您对如何识别字符或如何控制代码以定位注解块的开始和结束有疑问，请仔细查看并告诉我。

赞(0）回复(0）举报 2023-04-11

mo49yndu2#

您可以使用这个shell脚本来执行此操作，并保存到文件comments.txt中

cat generic.c | awk '/\/\// {print $0}; /\/\*/ {aux=1}; {if(aux) print $0}; /\*\// {aux=0}' > comments.txt

好运

赞(0）回复(0）举报 2023-04-11

ktca8awb3#

我修改了大卫提出的算法。对一行使用了缓冲区

#include <stdio.h>
#include <iostream>

int main(int argc, char** argv) {

    int inmulti = 0,    /* in multi-line comment flag */
        insingle = 0;   /* in single-line comment flag */
    wint_t c, prev_1, prev_2;
    std::wstring str;

    FILE* fp = argc > 1 ? fopen(argv[1], "r,ccs=UTF-8") : stdin;

    if (!fp) {  /* validate file open for reading */
        fprintf(stderr, "error: file open failed '%s'.\n", argv[1]);
        return 1;
    }

    if ((prev_1 = fgetwc(fp)) == WEOF)      /* read 1st char */
        return 1;
    if ((prev_2 = fgetwc(fp)) == WEOF)      /* read 2st char */
        return 1;

    while ((c = fgetwc(fp)) != WEOF) {    /* read remaining */
        if ((prev_1 == L'/' && prev_2 == L'*') && !insingle) {
            inmulti = 1;
            str += L"/*";
        }
        if ((prev_1 == L'*' && prev_2 == L'/') && inmulti) {
            inmulti = 0;
            str += L"\n";
        }
        if ((prev_1 == L'/' && prev_2 == L'/') && !inmulti) {
            insingle = 1;
            str += L"//";
        }
        if ((prev_1 == L'\r' || prev_2 == L'\n')) {
            insingle = 0;
            std::wcout << str.data();
            str.clear();
        }

        if (insingle || inmulti) {
            str += c;
        }

        prev_1 = prev_2;
        prev_2 = c;
    }

    std::wcout << str.data();

    if (fp != stdin) fclose(fp);   /* close file if not stdin */

    return 0;
}

输入文件示例

/* This is a single-line C comment */ /* Next multiline 
   comment */
#include <stdio.h>
/******  
* This is a nicely formatted  
* multi-line comment.  
******/ 
int main(int argc, char **argv) 
{   
  // This is a C++ comment.
}

使用/输出示例：

/* This is a single-line C comment */
/* Next multiline 
   comment */
/******  
* This is a nicely formatted  
* multi-line comment.  
******/
// This is a C++ comment.

在VisualStudio中编译，但这个想法很容易移植到GNU C++中。

赞(0）回复(0）举报 2023-04-11

我来回答

需要帮助从c文件中提取注解

3条答案

相关问题

热门标签

最新问答