swift 通过regrex拆分字符串并获取匹配范围

ubof19bj  于 2023-02-11  发布在  Swift
关注(0)|答案(1)|浏览(148)
extension String {
    func splitWithRegex(by regexStr: String) -> [String] {
        guard let regex = try? NSRegularExpression(pattern: regexStr) else { return [] }
        let nsRange = NSRange(startIndex..., in: self)
        var index = startIndex
        var array = regex.matches(in: self, range: nsRange)
            .map { match -> String in
                let range = Range(match.range, in: self)!
                let result = self[index..<range.lowerBound]
                index = range.upperBound
                return String(result)
            }
        array.append(String(self[index...]))
        return array
    }
}

我使用上面的代码,使用regrex模式在Swift上拆分一个字符串,并返回一个字符串数组
如何返回一个[(String, Range)]的对象,这样我就可以用分割后的子字符串作为键,用它的range作为值。

例如:

let string = "This is a string"
let stringRange = string.splitWithRegex(by: "\\s+")

结果:

(This, 0...4)
(is, 5...7)
(a, 8...9)
(string, 10...16)
bvjveswy

bvjveswy1#

我想应该可以了,但我不太确定:

func splitWithRegex2(by regexStr: String) -> [(String, Range<String.Index>)] {
    guard let regex = try? NSRegularExpression(pattern: regexStr) else { return [] }
    let nsRange = NSRange(startIndex..., in: self)
    var index = startIndex
    var array = regex.matches(in: self, range: nsRange)
        .compactMap { match -> (String, Range<String.Index>)? in
            guard let range = Range(match.range, in: self) else { return nil }
            let resultRange = index..<range.lowerBound
            let string = String(self[resultRange])
            index = range.upperBound
            guard !resultRange.isEmpty else { return nil } //This should fix the issue where the first match starts your string, in your case, an empty space
            let values = (string, resultRange)
            return values
        }

    //Append last value check: needed?
    if index < endIndex {
        array.append((String(self[index...]), index..<endIndex))
    }
    return array
}

播放:

let stringReg1 = "This  is a string"
let stringRange1 = stringReg1.splitWithRegex2(by: "\\s+")
print(stringRange1)
stringRange1.forEach { //More userfriendly than Range(Swift.String.Index(_rawBits: 65536)..<Swift.String.Index(_rawBits: 327680)))
    let nsRange = NSRange($0.1, in: stringReg1)
    print("\($0.0), \(nsRange.location)...\(nsRange.location + nsRange.length)")
}

输出:
一个二个一个一个
输出:

$>[("This", Range(Swift.String.Index(_rawBits: 262144)..<Swift.String.Index(_rawBits: 524288))), ("is", Range(Swift.String.Index(_rawBits: 655360)..<Swift.String.Index(_rawBits: 786432))), ("a", Range(Swift.String.Index(_rawBits: 851968)..<Swift.String.Index(_rawBits: 917504))), ("string", Range(Swift.String.Index(_rawBits: 983040)..<Swift.String.Index(_rawBits: 1376256)))]
$>This, 4...8
$>is, 10...12
$>a, 13...14
$>string, 15...21

相关问题