
x33g5p2x  于2022-01-17 转载在 其他  



[英]Calculates the number of char values required to represent the specified Unicode code point. This method checks if the codePointis greater than or equal to 0x10000, in which case 2 is returned, otherwise 1. To test if the code point is valid, use the #isValidCodePoint(int) method.



final int length = s.length();
for (int offset = 0; offset < length; ) {
  final int codepoint = s.codePointAt(offset);

  // do something with the codepoint

  offset += Character.charCount(codepoint);

代码示例来源:origin: stanfordnlp/CoreNLP

public static boolean langIndependentPuncCheck(String token) {
 boolean isNotWord = true;
 for (int offset = 0; offset < token.length(); ) {
   final int codepoint = token.codePointAt(offset);
   if (Character.isLetterOrDigit(codepoint)) {
    isNotWord = false;
   offset += Character.charCount(codepoint);
 return isNotWord;

代码示例来源:origin: prestodb/presto

static int codePointIndexToCharIndex(String s, int codePointCount) {
 for (int i = 0, j = 0, length = s.length(), c; i < length; i += Character.charCount(c)) {
  if (j == codePointCount) {
   return i;
  c = s.codePointAt(i);
  if ((Character.isISOControl(c) && c != '\n' && c != '\r')
    || c == Buffer.REPLACEMENT_CHARACTER) {
   return -1;
 return s.length();


String str = "....";
int offset = 0, strLen = str.length();
while (offset < strLen) {
 int curChar = str.codePointAt(offset);
 offset += Character.charCount(curChar);
 // do something with curChar

代码示例来源:origin: neo4j/neo4j

private int ltrimIndex( String value )
  int start = 0, length = value.length();
  while ( start < length )
    int codePoint = value.codePointAt( start );
    if ( !Character.isWhitespace( codePoint ) )
    start += Character.charCount( codePoint );
  return start;

代码示例来源:origin: square/okhttp

/** Returns {@code s} with control characters and non-ASCII characters replaced with '?'. */
private static String toHumanReadableAscii(String s) {
 for (int i = 0, length = s.length(), c; i < length; i += Character.charCount(c)) {
  c = s.codePointAt(i);
  if (c > '\u001f' && c < '\u007f') continue;
  Buffer buffer = new Buffer();
  buffer.writeUtf8(s, 0, i);
  for (int j = i + Character.charCount(c); j < length; j += Character.charCount(c)) {
   c = s.codePointAt(j);
   buffer.writeUtf8CodePoint(c > '\u001f' && c < '\u007f' ? c : '?');
  return buffer.readUtf8();
 return s;

代码示例来源:origin: neo4j/neo4j

public int computeHash()
  //NOTE that we are basing the hash code on code points instead of char[] values.
  if ( value.isEmpty() )
    return 0;
  int h = 1, length = value.length();
  for ( int offset = 0, codePoint; offset < length; offset += Character.charCount( codePoint ) )
    codePoint = value.codePointAt( offset );
    h = 31 * h + codePoint;
  return h;

代码示例来源:origin: redisson/redisson

public static boolean isPrintable(final String data) {
  final int length = data.length();
  for (int offset = 0; offset < length; ) {
    final int codePoint = data.codePointAt(offset);
    if (!isPrintable(codePoint)) {
      return false;
    offset += Character.charCount(codePoint);
  return true;

代码示例来源:origin: redisson/redisson

private static int[] toCodePoints(char[] str) {
  int[] codePoints = new int[Character.codePointCount(str, 0, str.length)];
  for (int i = 0, c = 0; i < str.length; c++) {
    int cp = Character.codePointAt(str, i);
    codePoints[c] = cp;
    i += Character.charCount(cp);
  return codePoints;

代码示例来源:origin: square/javapoet

public static String toJavaIdentifier(String suggestion) {
 StringBuilder result = new StringBuilder();
 for (int i = 0; i < suggestion.length(); ) {
  int codePoint = suggestion.codePointAt(i);
  if (i == 0
    && !Character.isJavaIdentifierStart(codePoint)
    && Character.isJavaIdentifierPart(codePoint)) {
  int validCodePoint = Character.isJavaIdentifierPart(codePoint) ? codePoint : '_';
  i += Character.charCount(codePoint);
 return result.toString();

代码示例来源:origin: apache/incubator-druid

private int compareNonNumeric(String str0, String str1, int[] pos)
 // find the end of both non-numeric substrings
 int start0 = pos[0];
 int ch0 = str0.codePointAt(pos[0]);
 pos[0] += Character.charCount(ch0);
 while (pos[0] < str0.length() && !isDigit(ch0 = str0.codePointAt(pos[0]))) {
  pos[0] += Character.charCount(ch0);
 int start1 = pos[1];
 int ch1 = str1.codePointAt(pos[1]);
 pos[1] += Character.charCount(ch1);
 while (pos[1] < str1.length() && !isDigit(ch1 = str1.codePointAt(pos[1]))) {
  pos[1] += Character.charCount(ch1);
 // compare the substrings
 return, pos[0]), str1.substring(start1, pos[1]));

代码示例来源:origin: square/retrofit

private static String canonicalizeForPath(String input, boolean alreadyEncoded) {
 int codePoint;
 for (int i = 0, limit = input.length(); i < limit; i += Character.charCount(codePoint)) {
  codePoint = input.codePointAt(i);
  if (codePoint < 0x20 || codePoint >= 0x7f
    || PATH_SEGMENT_ALWAYS_ENCODE_SET.indexOf(codePoint) != -1
    || (!alreadyEncoded && (codePoint == '/' || codePoint == '%'))) {
   // Slow path: the character at i requires encoding!
   Buffer out = new Buffer();
   out.writeUtf8(input, 0, i);
   canonicalizeForPath(out, input, i, limit, alreadyEncoded);
   return out.readUtf8();
 // Fast path: no characters required encoding.
 return input;

代码示例来源:origin: prestodb/presto

/** Returns {@code s} with control characters and non-ASCII characters replaced with '?'. */
public static String toHumanReadableAscii(String s) {
 for (int i = 0, length = s.length(), c; i < length; i += Character.charCount(c)) {
  c = s.codePointAt(i);
  if (c > '\u001f' && c < '\u007f') continue;
  Buffer buffer = new Buffer();
  buffer.writeUtf8(s, 0, i);
  for (int j = i; j < length; j += Character.charCount(c)) {
   c = s.codePointAt(j);
   buffer.writeUtf8CodePoint(c > '\u001f' && c < '\u007f' ? c : '?');
  return buffer.readUtf8();
 return s;

代码示例来源:origin: square/okhttp

static void percentDecode(Buffer out, String encoded, int pos, int limit, boolean plusIsSpace) {
 int codePoint;
 for (int i = pos; i < limit; i += Character.charCount(codePoint)) {
  codePoint = encoded.codePointAt(i);
  if (codePoint == '%' && i + 2 < limit) {
   int d1 = decodeHexDigit(encoded.charAt(i + 1));
   int d2 = decodeHexDigit(encoded.charAt(i + 2));
   if (d1 != -1 && d2 != -1) {
    out.writeByte((d1 << 4) + d2);
    i += 2;
  } else if (codePoint == '+' && plusIsSpace) {
   out.writeByte(' ');

代码示例来源:origin: square/okhttp

@Nullable Charset charset) {
int codePoint;
for (int i = pos; i < limit; i += Character.charCount(codePoint)) {
 codePoint = input.codePointAt(i);
 if (codePoint < 0x20

代码示例来源:origin: prestodb/presto

static void percentDecode(Buffer out, String encoded, int pos, int limit, boolean plusIsSpace) {
 int codePoint;
 for (int i = pos; i < limit; i += Character.charCount(codePoint)) {
  codePoint = encoded.codePointAt(i);
  if (codePoint == '%' && i + 2 < limit) {
   int d1 = decodeHexDigit(encoded.charAt(i + 1));
   int d2 = decodeHexDigit(encoded.charAt(i + 2));
   if (d1 != -1 && d2 != -1) {
    out.writeByte((d1 << 4) + d2);
    i += 2;
  } else if (codePoint == '+' && plusIsSpace) {
   out.writeByte(' ');

代码示例来源:origin: square/okhttp

for (int i = pos; i < limit; i += Character.charCount(codePoint)) {
 codePoint = input.codePointAt(i);
 if (alreadyEncoded
  } else {
   encodedCharBuffer.writeString(input, i, i + Character.charCount(codePoint), charset);

代码示例来源:origin: google/guava

@AndroidIncompatible // slow
@GwtIncompatible // Doubles.tryParse
public void testTryParseAllCodePoints() {
 // Exercise non-ASCII digit test cases and the like.
 char[] tmp = new char[2];
 for (int i = Character.MIN_CODE_POINT; i < Character.MAX_CODE_POINT; i++) {
  Character.toChars(i, tmp, 0);
  checkTryParse(String.copyValueOf(tmp, 0, Character.charCount(i)));

代码示例来源:origin: google/guava

@AndroidIncompatible // slow
@GwtIncompatible // Floats.tryParse
public void testTryParseAllCodePoints() {
 // Exercise non-ASCII digit test cases and the like.
 char[] tmp = new char[2];
 for (int i = Character.MIN_CODE_POINT; i < Character.MAX_CODE_POINT; i++) {
  Character.toChars(i, tmp, 0);
  checkTryParse(String.copyValueOf(tmp, 0, Character.charCount(i)));

代码示例来源:origin: square/retrofit

private static void canonicalizeForPath(Buffer out, String input, int pos, int limit,
  boolean alreadyEncoded) {
 Buffer utf8Buffer = null; // Lazily allocated.
 int codePoint;
 for (int i = pos; i < limit; i += Character.charCount(codePoint)) {
  codePoint = input.codePointAt(i);
  if (alreadyEncoded
    && (codePoint == '\t' || codePoint == '\n' || codePoint == '\f' || codePoint == '\r')) {
   // Skip this character.
  } else if (codePoint < 0x20 || codePoint >= 0x7f
    || PATH_SEGMENT_ALWAYS_ENCODE_SET.indexOf(codePoint) != -1
    || (!alreadyEncoded && (codePoint == '/' || codePoint == '%'))) {
   // Percent encode this character.
   if (utf8Buffer == null) {
    utf8Buffer = new Buffer();
   while (!utf8Buffer.exhausted()) {
    int b = utf8Buffer.readByte() & 0xff;
    out.writeByte(HEX_DIGITS[(b >> 4) & 0xf]);
    out.writeByte(HEX_DIGITS[b & 0xf]);
  } else {
   // This character doesn't need encoding. Just copy it over.

