regex 特殊字符的JavaScript正则表达式

sulc1iza  于 2023-08-08  发布在  Java
关注(0)|答案(2)|浏览(109)

我正在做一些代码编辑器,我使用正则表达式来给一些单词着色。我想接受所有字符,但我想划分一些字符,如数字,{},(),关键字(int,bool,...)。所以我尝试了这个正则表达式:

/([a-zA-Z_]\w*|\d+|[(){}#\[\]!"$%&/=?*+-;,:.<>@#\\|€÷פ߸¨'˝´˙`˛°˘^ˇ~])|(\s+)/g

字符串
它的输出是:

["#", "include", " ", "int", "(","bool", ")", "01", {", "while", "}", "0", "/", "/"]


就像我说的,我想把所有的数字都分开:["0", "1"]
关键词:["int, "bool"]
括号:["(", ")", "{", "}"]
包含:["#include", "#ifdef", "#ifndef", "#define"]
includes的第一个问题是RegEx可能无法接受完整的单词,比如#include,所以如果有人能告诉我如何在某个时候获取#之后的所有文本,比如include(prove me wrong),
如果不能,我将使用这样颜色["#",“include”],
最后,第二个问题是注解,我不想在一行上标记一个,因为divison /,我想像这样检测它:

["//", "/*", "*/"]


我想为//整行着色,并将整个文本直到“BIG”注解结束,如下所示:

/*text text text
  text text text
  text text text*/

同样重要的是,它需要接受空间。不同的主题!

正则表达式:
x1c 0d1x的数据
预期:



JavaScript、HTML、CSS、cpp.ge.js:

var editor = document.getElementById("editor");
editor.addEventListener("input", handleInput);

function getCaretPosition() {
    var caretOffset = 0;
    var selection = window.getSelection();
    if (selection.rangeCount > 0) {
        var range = selection.getRangeAt(0).cloneRange();
        range.selectNodeContents(editor);
        range.setEnd(selection.focusNode, selection.focusOffset);
        caretOffset = range.toString().length;
    }
    return caretOffset;
}

function setCaretPosition(position) {
    var selection = window.getSelection();
    var range = document.createRange();
    var textNodes = getTextNodes(editor);
    var currentNode = null;
    var charCount = 0;
    for (var i = 0; i < textNodes.length; i++) {
        var node = textNodes[i];
        var nodeLength = node.textContent.length;
        if (position <= charCount + nodeLength) {
            currentNode = node;
            break;
        }
        charCount += nodeLength;
    }
    if (currentNode) {
        var offset = position - charCount;
        range.setStart(currentNode, offset);
        range.collapse(true);
        selection.removeAllRanges();
        selection.addRange(range);
    }
}

function getTextNodes(node) {
    var textNodes = [];
    function traverse(node) {
        if (node.nodeType === Node.TEXT_NODE) {
            textNodes.push(node);
        } else {
            var childNodes = node.childNodes;
            for (var i = 0; i < childNodes.length; i++) {
                traverse(childNodes[i]);
            }
        }
    }
    traverse(node);
    return textNodes;
}

function handleInput() {
    var userInput = editor.textContent;
    var currentPosition = getCaretPosition();
    var formattedContent = formatContent(userInput);
    editor.innerHTML = formattedContent;
    setCaretPosition(currentPosition);
}

function formatContent(input) {
    var formattedHTML = "";
    var regex = /([a-zA-Z_]\w*|\d+|[(){}#\[\]!"$%&/=?*+-;,:.<>@#\\|€÷פ߸¨'˝´˙`˛°˘^ˇ~])|(\s+)/g;
    var tokens = input.match(regex);
    console.log(tokens);
    
    if (tokens) {
        for (var i = 0; i < tokens.length; i++) {
            var token = tokens[i];
            if (keywords.indexOf(token.trim()) > -1) {
                formattedHTML += "<span class='keywords'>" + token + "</span>";
            } else if (brackets.indexOf(token.trim()) > -1) {
                formattedHTML += "<span class='brackets'>" + token + "</span>";
            } else if (comNum.indexOf(token.trim()) > -1) {
                formattedHTML += "<span class='comNum'>" + token + "</span>";
            } else if (defs.indexOf(token.trim()) > -1) {
                formattedHTML += "<span class='defs'>" + token + "</span>";
            } else {
                formattedHTML += token;
            }
        }
    }
    return formattedHTML;
}
<!DOCTYPE html>
<html>
    <head>
        <title>ge - The Graphical Editor</title>
        <link rel="stylesheet" href="style.css">
    </head>

    <body>
        <div id="editor" contenteditable></div>
    </body>

    <script src="cpp.ge.js"></script>
    <script src="script.js"></script>
</html>
#editor {
    position: fixed;
    padding: 0;
    margin: 0;
    top: 0;
    left: 0;
    width: 100%;
    height: 100%;
    background-color: #1e1e1e;
    color: white;
    font-size: 14px;
    font-family: monospace;
}

.keywords {
    color: #569cd6;
}

.brackets {
    color: #ffd700;
}

.comNum {
    color: #608b4e;
}

.defs {
    color: #da70d6;
}
//START CPP
var keywords = [
    'abstract',
    'amp',
    'array',
    'auto',
    'bool',
    'break',
    'case',
    'catch',
    'char',
    'class',
    'const',
    'constexpr',
    'const_cast',
    'continue',
    'cpu',
    'decltype',
    'default',
    'delegate',
    'delete',
    'do',
    'double',
    'dynamic_cast',
    'each',
    'else',
    'enum',
    'event',
    'explicit',
    'export',
    'extern',
    'false',
    'final',
    'finally',
    'float',
    'for',
    'friend',
    'gcnew',
    'generic',
    'goto',
    'if',
    'in',
    'initonly',
    'inline',
    'int',
    'interface',
    'interior_ptr',
    'internal',
    'literal',
    'long',
    'mutable',
    'namespace',
    'new',
    'noexcept',
    'nullptr',
    '__nullptr',
    'operator',
    'override',
    'partial',
    'pascal',
    'pin_ptr',
    'private',
    'property',
    'protected',
    'public',
    'ref',
    'register',
    'reinterpret_cast',
    'restrict',
    'return',
    'safe_cast',
    'sealed',
    'short',
    'signed',
    'sizeof',
    'static',
    'static_assert',
    'static_cast',
    'struct',
    'switch',
    'template',
    'this',
    'thread_local',
    'throw',
    'tile_static',
    'true',
    'try',
    'typedef',
    'typeid',
    'typename',
    'union',
    'unsigned',
    'using',
    'virtual',
    'void',
    'volatile',
    'wchar_t',
    'where',
    'while',

    '_asm', // reserved word with one underscores
    '_based',
    '_cdecl',
    '_declspec',
    '_fastcall',
    '_if_exists',
    '_if_not_exists',
    '_inline',
    '_multiple_inheritance',
    '_pascal',
    '_single_inheritance',
    '_stdcall',
    '_virtual_inheritance',
    '_w64',

    '__abstract', // reserved word with two underscores
    '__alignof',
    '__asm',
    '__assume',
    '__based',
    '__box',
    '__builtin_alignof',
    '__cdecl',
    '__clrcall',
    '__declspec',
    '__delegate',
    '__event',
    '__except',
    '__fastcall',
    '__finally',
    '__forceinline',
    '__gc',
    '__hook',
    '__identifier',
    '__if_exists',
    '__if_not_exists',
    '__inline',
    '__int128',
    '__int16',
    '__int32',
    '__int64',
    '__int8',
    '__interface',
    '__leave',
    '__m128',
    '__m128d',
    '__m128i',
    '__m256',
    '__m256d',
    '__m256i',
    '__m512',
    '__m512d',
    '__m512i',
    '__m64',
    '__multiple_inheritance',
    '__newslot',
    '__nogc',
    '__noop',
    '__nounwind',
    '__novtordisp',
    '__pascal',
    '__pin',
    '__pragma',
    '__property',
    '__ptr32',
    '__ptr64',
    '__raise',
    '__restrict',
    '__resume',
    '__sealed',
    '__single_inheritance',
    '__stdcall',
    '__super',
    '__thiscall',
    '__try',
    '__try_cast',
    '__typeof',
    '__unaligned',
    '__unhook',
    '__uuidof',
    '__value',
    '__virtual_inheritance',
    '__w64',
    '__wchar_t'
];
var brackets = ["(", ")", "()", "{", "}", "{}"];
var comNum = ["//", "/*", "*/", , "/**/", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9"];
var defs = ["#include", "#ifdef", "#ifndef", "#define", "#endif"];
//END CPP
5ssjco0h

5ssjco0h1#

若要在代码编辑器中实现所需的颜色和格式设置,可以更新formatContent函数,使其也能处理注解和特殊字符。此外,您可以修改正则表达式以匹配注解模式和特殊字符。下面是formatContent函数的更新版本:

function formatContent(input) {
  var formattedHTML = "";
  var regex = /([a-zA-Z_]\w*|\d+|[(){}#\[\]!"$%&/=?*+-;,:.<>@#\\|€÷פ߸¨'˝´˙`˛°˘^ˇ~]|\/\/|\/\*|\*\/)|(\s+)/g;
  var tokens = input.match(regex);

  if (tokens) {
    var inComment = false;
    for (var i = 0; i < tokens.length; i++) {
      var token = tokens[i];
      if (inComment) {
        if (token === "*/") {
          formattedHTML += "<span class='comment'>" + token + "</span>";
          inComment = false;
        } else {
          formattedHTML += "<span class='comment'>" + token + "</span>";
        }
      } else if (token.startsWith("//")) {
        formattedHTML += "<span class='comment'>" + token + "</span>";
      } else if (token === "/*") {
        inComment = true;
        formattedHTML += "<span class='comment'>" + token + "</span>";
      } else if (keywords.indexOf(token.trim()) > -1) {
        formattedHTML += "<span class='keywords'>" + token + "</span>";
      } else if (brackets.indexOf(token.trim()) > -1) {
        formattedHTML += "<span class='brackets'>" + token + "</span>";
      } else if (comNum.indexOf(token.trim()) > -1) {
        formattedHTML += "<span class='comNum'>" + token + "</span>";
      } else if (defs.indexOf(token.trim()) > -1) {
        formattedHTML += "<span class='defs'>" + token + "</span>";
      } else {
        formattedHTML += token;
      }
    }
  }
  return formattedHTML;
}

字符串
注解(//)和多行注解(/*... /).当编辑器遇到//时,它会应用comment类为整行上色。类似地,当遇到/ 时,它将继续标记文本,直到找到 */,将其视为多行注解。
请记住更新您的CSS,以相应地样式化新的注解类:

.comment {
  color: #808080; /* Color for comments */
}


通过这些更改,代码编辑器现在应该可以正确地处理注解、特殊字符和具有所需颜色和格式的关键字。

bkhjykvo

bkhjykvo2#

感谢Debby Sinkalu的回答,我实现了他的评论分类系统。正则表达式看起来像这样:

/(#include|#ifdef|#ifndef|#define)|\/\/|\/\*|\*\/|([a-zA-Z_]\w*|\d+|[(){}#$%&.:/*\[\]!=";\\+])|(\s+)|([\s\S])+/g

字符串

相关问题