C语言 解析命令行输入文件名以检查内容的正确性

xytpbqjk  于 11个月前  发布在  其他
关注(0)|答案(2)|浏览(117)

希望从命令行解析文件名并检查其正确性,例如(1)总长度,(2)预期扩展名,(3)'_'位置和其他输入值。
顺序如下:

$check.exe input_file  L2A30000_0102051303042026_0001.dat

字符串
它应该检查输出文件(L2A30000_0102051303042026_0001.dat)是否按应有的方式键入(不是按精确值,而是按类型和长度)。

// Function to check if a string consists of digits
int isNumeric(const char *str) {
    while (*str) {
        if (!isdigit(*str)) {
        return 0;  // Not a digit
        }
        str++;
    }
    return 1;  // All characters are digits
}

int main(int argc, char *argv[]) {
    // Check if the correct number of command line arguments is  
    provided
    if (argc != 3) {
        printf("Usage: %s inputfile outputfile\n", argv[0]);
        return 1;
    }

   // Extract the output file name from the command line arguments
   const char *outputFileName = argv[2];

   // Define the expected format
   char asciiChar1, numChar1, asciiChar2, numChar2, numChar3[5],      
   underscore1, numChar4[17], underscore2, numChar5[5],  
   numChar6[4], extension[4];

   int result = sscanf(outputFileName, 
   "%c%c%c%c%4[0-9]%c%16[0-9]%c%1[0-9]%3[0-9]_%3[0-9]%4[.dat]",
                    &asciiChar1, &numChar1, &asciiChar2, 
   &numChar2, numChar3, &underscore1, numChar4, &underscore2, 
   numChar5, numChar6, extension);

  // Debugging print statement
  printf("Debug: sscanf result: %d\n", result);

  printf("Debug: asciiChar1: %c\n", asciiChar1);
  printf("Debug: numChar1: %c\n", numChar1);
  printf("Debug: asciiChar2: %c\n", asciiChar2);
  printf("Debug: numChar2: %c\n", numChar2);
  printf("Debug: numChar3: %s\n", numChar3);
  printf("Debug: underscore1: %c\n", underscore1);
  printf("Debug: numChar4: %s\n", numChar4);
  printf("Debug: underscore2: %c\n", underscore2);
  printf("Debug: numChar5: %s\n", numChar5);
  printf("Debug: numChar6: %s\n", numChar6);
  printf("Debug: extension: %s\n", extension);

 // Check if the extracted values match the expected format
 if (result != 12 || !isalpha(asciiChar1) || !isdigit(numChar1) || 
    !isalpha(asciiChar2) || !isdigit(numChar2) ||
    strlen(numChar3) != 4 || !isNumeric(numChar3) ||    
    strlen(numChar4) != 16 || !isNumeric(numChar4) ||
    strlen(numChar5) != 4 || !isNumeric(numChar5) || 
    strlen(numChar6) != 3 || !isNumeric(numChar6) ||
    strlen(extension) != 3 || strcmp(extension, ".dat") != 0) {

    printf("Error: Output file format is incorrect.\n");
    return 1;
}

// If all checks pass, the output file format is correct
 printf("Output file format is correct.\n");

 return 0;
}


命令行输入:

.\check.exe inputfile L2A30000_0102051303042026_0001.dat


这是我得到的输出:

Debug: sscanf result: 9
...
Debug: numChar5: 0001
Debug: extension:
Error: Output file format is incorrect.


这是我期待的输出:

Debug: extension:.dat


这部分不工作。其他部分正常。想检查extension是否是.dat文件名。如果不是,它将打印错误消息并退出。

sq1bmfud

sq1bmfud1#

我建议您在格式字符串和匹配参数中引入一些额外的白色空间,并沿着沿着这些行:

int result = sscanf(outputFileName, 
        "%c%c"
        "%c%c"
        "%4[0-9]"
        "%c"
        "%16[0-9]"
        "%c" // underscore2
        "%1[0-9]"
        "%3[0-9]_%3[0-9]%4[.dat]",
        &asciiChar1, &numChar1,
        &asciiChar2, &numChar2,
        numChar3,
        &underscore1,
        numChar4,
        &underscore2,
        numChar5,
        numChar6,
        extension
    );

字符串
所以直到第二个下划线都是正确的。然后你需要一个数字(char numChar5[5])但这与变量的大小不匹配。(char numChat6[4])这是好的。然后是第三个下划线,这是不是在输入中。3个没有匹配参数的数字。“%4[.dat]”这会导致缓冲区溢出,因为扩展变量是char extension[4]。总共12个格式指令和11个参数,这是未定义的行为。
您可以通过硬编码固定字符串来简化它:

#include <ctype.h>
#include <stdio.h>
#include <string.h>

int isNumeric(const char *str) {
    for(; isdigit(*str); str++);
    return !*str;
}

int main(int argc, char *argv[]) {
    if (argc != 3) {
        printf("Usage: %s inputfile outputfile\n", argv[0]);
        return 1;
    }
    const char *outputFileName = argv[2];
    char asciiChar1, numChar1, asciiChar2, numChar2, numChar3[5], numChar4[17], numChar5[5], extension[4];
    int result = sscanf(outputFileName,
        "%c%c"
        "%c%c"
        "%4[0-9]"
        "_"
        "%16[0-9]"
        "_"
        "%4[0-9]"
        ".dat",
        &asciiChar1, &numChar1,
        &asciiChar2, &numChar2,
        numChar3,
        numChar4,
        numChar5
    );
    printf("Debug: sscanf result: %d\n", result);
    printf("Debug: asciiChar1: %c\n", asciiChar1);
    printf("Debug: numChar1: %c\n", numChar1);
    printf("Debug: asciiChar2: %c\n", asciiChar2);
    printf("Debug: numChar2: %c\n", numChar2);
    printf("Debug: numChar3: %s\n", numChar3);
    printf("Debug: numChar4: %s\n", numChar4);
    printf("Debug: numChar5: %s\n", numChar5);
    if (result != 7 || !isalpha(asciiChar1) || !isdigit(numChar1) ||
        !isalpha(asciiChar2) || !isdigit(numChar2) ||
        strlen(numChar3) != 4 || !isNumeric(numChar3) ||
        strlen(numChar4) != 16 || !isNumeric(numChar4) ||
        strlen(numChar5) != 4 || !isNumeric(numChar5)
    ) {

        printf("Error: Output file format is incorrect.\n");
        return 1;
    }
    printf("Output file format is correct.\n");
    return 0;
}


示例运行:

./a.out  input_file L2A30000_0102051303042026_0001.dat
Debug: sscanf result: 7
Debug: asciiChar1: L
Debug: numChar1: 2
Debug: asciiChar2: A
Debug: numChar2: 3
Debug: numChar3: 0000
Debug: numChar4: 0102051303042026
Debug: numChar5: 0001
Output file format is correct.


另一种方法是通过一个小的解释器is_valid_format2()解析文件名is_valid_format()

#include <ctype.h>
#include <stdio.h>
#include <string.h>

const char *alpha(const char *s) {
    if(!s) return NULL;
    if(!isalpha(*s)) return NULL;
    return s + 1;
}

const char *digits(const char *s, size_t n) {
    if(!s) return NULL;
    for(size_t i = 0; i < n; i++)
        if(!isdigit(s[i])) return NULL;
    return s + n;
}
const char *str(const char *s, const char *s2) {
    if(!s) return NULL;
    size_t n = strlen(s2);
    if(strncmp(s, s2, n)) return NULL;
    return s + n;
}

int is_valid_filename(const char *s) {
    s = alpha(s);
    s = digits(s, 1);
    s = alpha(s);
    s = digits(s, 5);
    s = str(s, "_");
    s = digits(s, 16);
    s = str(s, "_");
    s = digits(s, 4);
    s = str(s, ".dat");
    return s && !*s;
}

int is_valid_filename2(const char *s) {
    struct {
        enum { ALPHA, DIGITS, STR } type;
        union {
            int n;
            const char *s;
        };
    } format[] = {
        { ALPHA },
        { DIGITS, .n = 1 },
        { ALPHA },
        { DIGITS, .n = 5 },
        { STR, .s = "_" },
        { DIGITS, .n = 16 },
        { STR, .s = "_" },
        { DIGITS, .n = 4 },
        { STR, .s = ".dat" },
    };
    size_t n = sizeof format / sizeof *format;
    for(size_t i = 0; s && i < n; i++) {
        switch(format[i].type) {
            case ALPHA:
                s = alpha(s);
                break;
            case DIGITS:
                s = digits(s, format[i].n);
                break;
            case STR:
                s = str(s, format[i].s);
                break;
        }
    }
    return s && !*s;
}

int main(int argc, char *argv[]) {
    if (argc != 3) {
        printf("Usage: %s inputfile outputfile\n", argv[0]);
        return 1;
    }
    char *result[] = { "invalid", "valid" };
    printf("%s\n", result[is_valid_filename(argv[2])]);
    printf("%s\n", result[is_valid_filename2(argv[2])]);
}

3lxsmp7m

3lxsmp7m2#

考虑一个简化:

  • "%[]"限制有效输入。保存到char数组中。
  • "%n"以保存扫描偏移并确定字符串结束是否成功。
  • 使用字符串文字连接可以更清晰地显示复杂格式。
  • asciiChar1的名称更改为alphaChar1,因为代码在此处查找A-Z和a-z。
  • 不要尝试打印字符串,除非它扫描成功 * 第一 * 确定。

请注意,扫描长度正确的唯一方法是所有[]字符串成功扫描到其最大宽度,并且下一个要扫描的字符是 *null字符 *。
这大大简化了测试。

char alphaChar1[2], numChar1[2], alphaChar2[2], numChar2[2], //
  numChar3[5], numChar4[17], numChar5[5];
  /* numChar6[4] Apparently not in OP's sample fileanme */

  #define FMT_ALPHA "%1[A-Za-z]"
  #define FMT_DIGIT "%1[0-9]"
  #define FMT_EXT ".dat"
  char sample[] = "L2A30000_0102051303042026_0001.dat";
  #define FMT_N (sizeof sample - 1)

  int n = 0;
  sscanf(outputFileName, //
      FMT_ALPHA FMT_DIGIT FMT_ALPHA FMT_DIGIT
      "%4[0-9]" "_" "%16[0-9]" "_" "%4[0-9]" FMT_EXT "%n", //
      alphaChar1, numChar1, alphaChar2, numChar2, //
      numChar3, numChar4, numChar5, &n);

  // Only this test needed.
  if (n == FMT_N && outputFileName[FMT_N] == '\0') {
    // Success
    printf("Debug: alphaChar1: %c\n", alphaChar1[0]);
    printf("Debug: numChar1: %c\n", numChar1[0]);
    printf("Debug: alphaChar2: %c\n", alphaChar2[0]);    
    printf("Debug: numChar2: %c\n", numChar2[0]);
    printf("Debug: numChar3: %s\n", numChar3);
    printf("Debug: numChar4: %s\n", numChar4);
    printf("Debug: numChar5: %s\n", numChar5);
  } else {
    puts("Failure");
  }

字符串
输出

Debug: alphaChar1: L
Debug: numChar1: 2
Debug: alphaChar2: A
Debug: numChar2: 3
Debug: numChar3: 0000
Debug: numChar4: 0102051303042026
Debug: numChar5: 0001

相关问题