c++从dbc条目到数组的多个正则表达式提取

holgip5t  于 2023-02-10  发布在  其他
关注(0)|答案(3)|浏览(126)

你好,我想从以下字符串中提取参数:VAL_ 234 State1 123 "Description 1" 0 "Description 2 with \n new line" 90903489 "Big value and special characters &$§())!" ;
所需的匹配项为

  • 234
  • 状态1
  • 然后是无符号整数和字符串组合的数组
  • 123“说明1”
  • 0“说明2有\n新行”
  • 90903489“大值和特殊字符&$§())!”

如果不能直接拆分数组,那么在第二步拆分数组。使用下面的正则表达式,我总是得到数组90903489 "Big value and special characters &$§())!"的最后一个匹配项
^VAL_ ([0-9]+) ([A-Za-z_][A-Za-z_0-9]*) ([0-9]*\\s\"[^\"]*\"\\s)+
有可能提取值吗?我已经找到了

auto blah = std::string{"5001 | 5002 | 5003"};
auto values = std::vector<std::string>{
    std::sregex_token_iterator{blah.begin(), blah.end(), std::regex{R"(\d+)"}},
    std::sregex_token_iterator{}};

this post返回,但是它只返回完整的字符串。是否可以迭代子匹配?

0dxa2lsx

0dxa2lsx1#

不确定您是否对如何分隔匹配有任何特定的要求,但是您可以使用the following regular expression来匹配其中一种模式:

(?:^VAL_\s(\d+)\s(\w+)|\s(\d+\s".+?"))

样本代码:

const std::string input{ R"(VAL_ 234 State1 123 "Description 1" 0 "Description 2 with \n new line" 90903489 "Big value and special
characters &$§())!")" };
const std::regex regex{ R"((?:^VAL_\s(\d+)\s(\w+)|\s(\d+\s".+?")))" };
const std::sregex_iterator end{};
for(auto it = std::sregex_iterator{ std::cbegin(input), std::cend(input), regex };
    it != end; ++it) {
    auto match = *it;
    if (match.empty()) {
        std::cerr << "Nothing matched" << '\n';
        continue;
    } else {
        if (match[1].matched) {
            std::cout << "Val match: " << match[1].str() << '\n';
        }
        if (match[2].matched) {
            std::cout << "State match: " << match[2].str() << '\n';
        }
        if (match[3].matched) {
            std::cout << "Etc match: " << match[3].str() << '\n';
        }
    }
}
8cdiaqws

8cdiaqws2#

基于@rustyx link,我创建了自己的解析器

enum VALToken {
    Identifier = 0,
    CANId,
    SignalName,
    Value,
    Description
};

struct ValueDescription{
    std::string value;
    std::string description;
};

int main(int argc, char *argv[])
{
    const std::string s = R"(VAL_ 234 State1 123 "Description 1" 0 "Description 2 with \n new line" 90903489 "Big value and special characters &$§())!" ;)";

    auto state = Identifier;
    const char* a = s.data();
    std::string can_id;
    std::string signal_name;
    std::vector<ValueDescription> vds;
    ValueDescription vd;
    for (;;) {
        switch (state) {
        case Identifier: {
            if (*a != 'V')
                return 0;
            a++;
            if (*a != 'A')
                return 0;
            a++;
            if (*a != 'L')
                return 0;
            a++;
            if (*a != '_')
                return 0;
            a++;
            if (*a != ' ')
                return 0;
            a++; // skip whitespace
            state = CANId;
            break;
        }
        case CANId: {
            while(*a >= '0' && *a <= '9') {
                can_id += *a;
                a++;
            }
            if (can_id.empty())
                return 0;
            if (*a != ' ')
                return 0;
            a++; // skip whitespace
            state = SignalName;
            break;
        }
        case SignalName: {
            if ((*a >= 'a' && *a <= 'z') || (*a >= 'A' && *a <= 'Z') || *a == '_')
                signal_name += *a;
            else
                return 0;
            a++;
            while ((*a >= 'a' && *a <= 'z') || (*a >= 'A' && *a <= 'Z') || *a == '_' || (*a >= '0' && *a <= '9')) {
                signal_name += *a;
                a++;
            }
            if (*a != ' ')
                return 0;
            a++; // skip whitespace
            state = Value;
            break;
        }
        case Value: {
            std::string value_str;
            while (*a >= '0' && *a <= '9') {
                value_str += *a;
                a++;
            }
            if (value_str.empty())
                return 0;

            if (*a != ' ')
                return 0;
            a++; // skip whitespace
            vd.value = value_str;
            state = Description;
            break;
        }
        case Description: {
            std::string desc;
            if (*a != '"')
                return 0;
            a++;
            while (*a != '"' && *a != 0) {
                desc += *a;
                a++;
            }
            if (*a == 0)
                return 0;
            a++;
            if (*a != ' ')
                return 0;
            a++; // skip whitespace

            vd.description = desc;
            vds.push_back(vd);

            state = Value;
            break;
        }
        }
    }

    return 0;
}
u3r8eeie

u3r8eeie3#

我将执行regex_match,然后使用sregex_iterator执行循环。
Demo(https://godbolt.org/z/zYPWv4aP6)

#include <fmt/core.h>
#include <regex>
#include <string>

int main() {
    const std::string text{ "VAL_ 234 State1"
        " 123 \"Description 1\""
        " 0 \"Description 2 with \\n new line\""
        " 90903489 \"Big value and special characters &$§())!\""
    };
    const std::regex pattern{ R"(VAL_ (\d+) \w+(\d+)(.*))" };
    std::smatch matches{};
    if (std::regex_match(text, matches, pattern)) {
        fmt::print("{}\n{}\n", matches[1].str(), matches[2].str());

        std::regex array_pattern{ R"(\s+(\d+)\s+\"([^"]+)\")" };
        auto array_text{ matches[3].str() };
        for (std::sregex_iterator it{ array_text.begin(), array_text.end(), array_pattern };
            it != std::sregex_iterator{};
            ++it) {

            std::smatch array_matches{ *it };
            fmt::print("\t'{}', '{}'\n", array_matches[1].str(), array_matches[2].str());
        }
    }
}

// Outputs:
//
// 234
// 1
//    '123', 'Description 1'
//    '0', 'Description 2 with \n new line'
//    '90903489', 'Big value and special characters &$§())!'

相关问题