JavaScript string.prototype.contains()with locale

holgip5t  于 2023-09-29  发布在  Java
关注(0)|答案(6)|浏览(102)

是否可以检查一个字符串是否包含一个具有区域设置支持的子字符串?
'Ábc'.contains('A')应该为true。
JavaScript现在有string.prototype.localeCompare()用于字符串比较,并支持本地化,但我看不到对应的localeContains()

polkgigr

polkgigr1#

你可以这样做:

String.prototype.contains = function contains(charToCheck) {
  return this.split('').some(char => char.localeCompare(charToCheck, 'en', {sensitivity: 'base'}) === 0)
}

console.log('Ábc'.contains('A')) // true
console.log('Ábc'.contains('B')) // true
console.log('Ábc'.contains('b')) //true
console.log('Ábc'.contains('u')) //false
console.log('coté'.contains('e')) //true

localCompare上的文档。灵敏度base表示:
“base”:只有碱基字母不同的字符串比较为不相等。例如:a ≠ ba = áa = A

8ehkhllq

8ehkhllq2#

contains()有一个更快的替代方法,可以对字符串进行locale检查

看起来去掉变音符号,然后在本地比较字符串要快得多:在我的架构上,比@chickens或@dag0310解决方案快了近10倍,请检查您的here。如果检查空字符串是否与String.includes一致,则返回true。

String.prototype.localeContains = function(sub) {
  if(sub==="") return true;
  if(!sub || !this.length) return false;
  sub = ""+sub;
  if(sub.length>this.length) return false;
  let ascii = s => s.normalize("NFKD").replace(/[\u0300-\u036f]/g, "").toLowerCase();
  return ascii(this).includes(ascii(sub));
}

var str = "142 Rozmočených Kříd";
console.log(str.localeContains("kŘi"));
console.log(str.localeContains(42));
console.log(str.localeContains(""));
console.log(str.localeContains(false));

**NFKD

NFKD修饰符将所有precomposed characters分解为它们的基本字符和组合标记,随后通过replace调用将其删除。

重要提示

这对某些解决方案不起作用,如"Straße".contains("SS"),这取决于您是否想将ß视为SS的替代品。要了解更多信息,请查看on MDN提到的规范和兼容性转换,其中还提到了NFKD以外的其他形式。
感谢@LukasKalbertodt在评论中提到边缘情况。

ahy6op9u

ahy6op9u3#

如果你正在寻找一个以上的字符,这里是一个不是很有效,但工作的选择:

const localeContains = (a,b) => !!a.split('').filter((v,i)=>a.slice(i,b.length).localeCompare(b, "en", { sensitivity: 'base' })===0).length
a = "RESERVE ME";
b = "réservé";

console.log(localeContains(a,b));
ix0qys7i

ix0qys7i4#

如果搜索的字符串不在主字符串的开头,则chickens的答案不起作用。
请使用此包:https://www.npmjs.com/package/locale-includes

localeIncludes('RESERVE ME', 'éservé', {usage: 'search', sensitivity: 'base'});
// true

为了使它更好地用作字符串原型函数:

String.prototype.localeIncludes = function(str) {
  return localeIncludes(this, str, {usage: 'search', sensitivity: 'base'});
};

'RESERVE ME'.localeIncludes('éservé');
// true
dy2hfwbg

dy2hfwbg5#

您可以规范化字符串并使用string.includes

// inspired by https://stackoverflow.com/a/47368424/5734961
/**
 * Returns true if searchString appears as a substring of the result of converting first argument
 * to a String, at one or more positions that are greater than or equal to position,
 * if compared in the current or specified locale; otherwise, returns false.
 * Options is considered to have { usage: 'search', sensitivity: 'base' } defaults
 * @param {string} string search string
 * @param {string} searchString search string
 * @param {string|string[]=} locales A locale string or array of locale strings that contain one or more language or locale tags. If you include more than one locale string, list them in descending order of priority so that the first entry is the preferred locale. If you omit this parameter, the default locale of the JavaScript runtime is used. This parameter must conform to BCP 47 standards; see the Intl.Collator object for details.
 * @param {Intl.CollatorOptions=} options An object that contains one or more properties that specify comparison options. see the Intl.Collator object for details.
 * @param {number=} position If position is undefined, 0 is assumed, so as to search all of the String.
 * @returns {boolean}
 */
function localeIncludes(string, searchString, locales, options, position = 0) {
  const optionsN = { usage: 'search', sensitivity: 'base', ...options ?? {} };
  const collator = new Intl.Collator(locales, optionsN);
  const { sensitivity, ignorePunctuation } = collator.resolvedOptions();
  function localeNormalize(string) {
    // `localeCompare` MUST `ToString` its arguments
    // We want to normalize out strings so `u'` does not include `u` 
    let stringN = String(string).normalize('NFC');
    // If comparison is case-insensitive we want to normalize case
    if (sensitivity === 'base' || sensitivity === 'accent')
      stringN = stringN.toLocaleLowerCase(locales);
    // then we try to remove accents (you may cache letters in a Map to make it faster)
    return stringN.replaceAll(/./g, (letter) => {
      // first check if you can remove the character completely
      if (ignorePunctuation) {
        if (collator.compare(letter, '') === 0) return '';
      }
      let normalizedLetter = letter.normalize('NFD').replace(/[\u0300-\u036f]/gi, '');
      /*
       * // If you want you may add some custom normalizers (per-language)
       * const mapSv = new Map([ ['w', 'v'], ['ß', 'SS'] ])
       * if (lang === 'sv' && mapSv.has(letter)) return mapSv.get(letter);
       */
      return letter !== normalizedLetter && collator.compare(letter, normalizedLetter) === 0 ? normalizedLetter : letter;
    });
  }
  return localeNormalize(string).includes(localeNormalize(searchString));
}

或尝试查找匹配的子字符串

/**
 * Returns true if searchString appears as a substring of the result of converting first argument
 * to a String, at one or more positions that are greater than or equal to position,
 * if compared in the current or specified locale; otherwise, returns false.
 * Collators with `numeric` and `ignorePunctuation` options are not supported.
 * @param {string} string search string
 * @param {string} searchString search string
 * @param {string|string[]=} locales A locale string or array of locale strings that contain one or more language or locale tags. If you include more than one locale string, list them in descending order of priority so that the first entry is the preferred locale. If you omit this parameter, the default locale of the JavaScript runtime is used. This parameter must conform to BCP 47 standards; see the Intl.Collator object for details.
 * @param {Intl.CollatorOptions=} options An object that contains one or more properties that specify comparison options. see the Intl.Collator object for details.
 * @param {number=} position If position is undefined, 0 is assumed, so as to search all of the String.
 * @returns {boolean}
 */
function localeIncludes(string, searchString, locales, options, position = 0) {
  // `localeCompare` uses `Intl.Collator.compare` under the hood
  // `localeCompare` casts `ToString` over both arguments
  // We don't want "á" to contain "a", so we should normalize the strings first.
  // `Intl.Collator` uses Canonical Equivalence according to the Unicode Standard, so normalization won't change the order
  const stringN = String(string).normalize();
  const searchStringN = String(searchString).normalize();
  const collator = new Intl.Collator(locales, options);
  /*
   * // if you can have strings of different length (like with `ignorePunctuation`), you'll have to check every substring
   * for (let i = 0; i < string.length; i++) {
   *   for (let j = i; j < string.length; j++) {
   *     // WARNING, THIS IS $ O(n^2) $
   *     let substring = string.substring(i, i + searchString.length);
   *     if (collator.compare(substring, searchString) === 0) return i;
   *   }
   * }
   */
  for (let i = position; i <= stringN.length - searchStringN.length; i++) {
    // non-numeric non-ignorePunctuation `collator` expected
    const substring = stringN.substring(i, i + searchStringN.length);
    if (collator.compare(substring, searchStringN) === 0)
      return true;
  }
  return false;
}

或者你可以使用一个黑客使用window.find,它的工作原理 * 确切 *,如果你在页面上搜索与Ctrl-F

function iframeIncludes(string, searchString, locale) {
  const iframe = document.createElement('iframe');
  iframe.style = 'position: fixed; top: 0; left: 0;';
  document.body.append(iframe);
  const iframeDoc = f.contentDocument;
  iframeDoc.open();
  // you MUST use <pre> otherwise it doesn't work
  iframeDoc.write(`
    <html lang="${locale}">
      <body>
        <pre></pre>
      </body>
    </html>
  `);
  iframeDoc.close();
  const pre = iframeDoc.querySelector('pre');
  pre.innerText = string;
  const result = iframe.contentWindow.find(searchString);
  iframe.remove();
  return result;
}
ijnw1ujt

ijnw1ujt6#

function localeContains(mainString, substring) {
      // Normalize both the main string and the substring to ensure consistency
      const normalizedMain = mainString.normalize('NFD');
      const normalizedSub = substring.normalize('NFD');
    
      // Use indexOf to check if the normalized substring exists in the normalized main string
      return normalizedMain.indexOf(normalizedSub) !== -1;
    }
    
    // Example usage:
    const stringToCheck = "142 Rozmočených Kříd";
    console.log(localeContains(stringToCheck, 'ří')); // true
    console.log(localeContains(stringToCheck, 'a')); // false
    console.log(localeContains(stringToCheck, 'yc')); // false
    console.log(localeContains(stringToCheck, 'zmoč')); // true

这段代码提供了对区域设置敏感的子字符串检查,如下所示:

  • 标准化:首先使用normalize('NFD')方法对主字符串和子字符串进行规范化。规范化将带有变音符号的字符分解为它们的基字符,从而更容易执行不区分大小写和重音的比较。
  • 子字符串检查:最后,我使用indexOf来检查规范化和重命名的子字符串是否存在于规范化和重命名的主字符串中。如果是,则indexOf返回子字符串开始的位置,该位置不是-1。

相关问题