如何使用JavaScript将HTML字符串转换为WhatsApp消息格式

ttvkxqim  于 2023-04-04  发布在  Java
关注(0)|答案(3)|浏览(99)

我想下面的HTML字符串转换成推荐的WhatsApp消息格式使用JavaScript

let htmlText = '<b>TEST </b> BODY <i><b>WITH </b></i>SAMPLE <strike style="font-weight: bold; font-style: italic;">FORMAT&nbsp;</strike>&nbsp; &nbsp; HERE"';

转换为以下格式

*TEST*  BODY _*WITH *_ SAMPLE *_~FORMAT~_* HERE
omtl5h9j

omtl5h9j1#

我建议使用DOM解析器来解析HTML,然后遍历从中得到的DOM。这样,结果文本也会将所有HTML实体解析为文本,任何HTML注解都将被删除,并且当HTML标签或其属性中的间距与预期不同时,它不会中断。
我还将确保格式化字符与它们应用的单词相邻,因此将周围的任何空白都排除在外:

function htmlToFormat(html) {
    const codes = { B: "*", I: "_", STRIKE: "~" };
    const {body} = new DOMParser().parseFromString(htmlText, "text/html");
    const dfs = ({childNodes}) => Array.from(childNodes, node => {
        if (node.nodeType == 1) {
            const s = dfs(node);
            const code = codes[node.tagName];
            return code ? s.replace(/^(\s*)(?=\S)|(?<=\S)(\s*)$/g, `$1${code}$2`) : s;
        } else {
            return node.textContent;
        }
    }).join("");

    return dfs(body);
}

// Demo
let htmlText = '<b>TEST </b> BODY <i><b>WITH </b></i>SAMPLE <strike style="font-weight: bold; font-style: italic;">FORMAT&nbsp;</strike>&nbsp; &nbsp; HERE"';
console.log(htmlToFormat(htmlText));
fykwrbwg

fykwrbwg2#

如果你确定这是你得到的htmlText的确切格式,这很容易。这里的技巧是使用正则表达式并替换字符以获得所需的格式:

function convertToWhatsAppFormat(htmlText) {
  htmlText = htmlText.replace(/<\/?b>/g, "*");
  htmlText = htmlText.replace(/<\/?i>/g, "_");
  htmlText = htmlText.replace(/<strike[^>]*>/g, "~");
  htmlText = htmlText.replace(/<\/strike>/g, "~");
  htmlText = htmlText.replace(/<[^>]+>/g, "");
  htmlText = htmlText.replace(/&nbsp;/g, "");

  return htmlText;
};

let htmlText = '<b>TEST </b> BODY <i><b>WITH </b></i>SAMPLE <strike style="font-weight: bold; font-style: italic;">FORMAT&nbsp;</strike>&nbsp; &nbsp; HERE"';
let whatsappText = convertToWhatsAppFormat(htmlText);

console.log(whatsappText);

replace()方法在这种情况下所有出现的给定正则表达式。我希望这是有帮助的。

aor9mmx1

aor9mmx13#

基于@trincot解决方案,添加了一些额外的更改

function htmlToFormat(htmlText) {
    const matches = htmlText.matchAll(/<i><b>(.*?)<\/b><\/i>/g);
    for (const match of matches) {
      if (match[1]) {
        htmlText = htmlText.replace(match[0], `<b><i>${match[1]}</i></b>`);
      }
    }
    // format strike text
    let strikeArr = htmlText.match(/<strike(.*?)<\/strike>/g);
    if (strikeArr.length) {
      for (let i=0;i< strikeArr.length; i++) {
        if (strikeArr[i]) {
          strikeText = strikeArr[i].match(/style="(.*?)">/g);
          if (strikeText.length) {
            let strikeTextSplit = strikeText[0].split(";");
            let italicStatus = false;
            let boldStatus = false;
            let trimedText = strikeArr[i].replace(/\s*\S*\="[^"]+"\s*/gm, "");
            for (let j=0; j< strikeTextSplit.length; j++) {
              if (strikeTextSplit[j].includes("italic")) {
                italicStatus = true;
              }
              if (strikeTextSplit[j].includes("bold")) {
                boldStatus = true;
              }
            }
            if (italicStatus) {
              trimedText = `<i>${trimedText}</i>`;
            }
            if (boldStatus) {
              trimedText = `<b>${trimedText}</b>`
            }
            htmlText = htmlText.replace(strikeArr[i], trimedText);
          }
        }
      }
    }
    const codes = { B: "*", I: "_", STRIKE: "~" };
      const {body} = new DOMParser().parseFromString(htmlText, "text/html");
      const dfs = ({childNodes}) => Array.from(childNodes, node => {
        if (node.nodeType == 1) {
            const s = dfs(node);
            const code = codes[node.tagName];
            return code ? s.replace(/^(\s*)(?=\S)|(?<=\S)(\s*)$/g, `$1${code}$2`) : s;
        } else {
            return node.textContent;
        }
      }).join("");

    return dfs(body);
    }
    let htmlText = '<b>TEST </b> BODY <i><b>WITH </b></i>SAMPLE <strike style="font-weight: bold; font-style: italic;">FORMAT&nbsp;</strike>&nbsp; &nbsp; HERE"';
    console.log(htmlToFormat(htmlText));

相关问题