- mysql
在使用mysql时候,某些字段会存储中文字符复,或是包含中文字符的串,查询出来的方制法是:
SELECT col FROM table WHERE length(col)!知=char_length(col)
当字符集为UTF-8,并且字符为中文时,length() 和 char_length() 两个方法返回的结果是不相道同的。
- Java
Java
// GENERAL_PUNCTUATION 判断中文的“号
// CJK_SYMBOLS_AND_PUNCTUATION 判断中文的。号
// HALFWIDTH_AND_FULLWIDTH_FORMS 判断中文的,号
private static final boolean isChinese(char c) {
Character.UnicodeBlock ub = Character.UnicodeBlock.of(c);
if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
|| ub == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS
|| ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
|| ub == Character.UnicodeBlock.GENERAL_PUNCTUATION
|| ub == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION
|| ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS) {
return true;
}
return false;
}
public static final boolean isChinese(String strName) {
char[] ch = strName.toCharArray();
for (int i = 0; i < ch.length; i++) {
char c = ch[i];
if (isChinese(c)) {
return true;
}
}
return false;
}
2. kotlin
// CJK_SYMBOLS_AND_PUNCTUATION 判断中文的。号
// HALFWIDTH_AND_FULLWIDTH_FORMS 判断中文的,号
private fun isChinese(c: Char): Boolean {
val ub = UnicodeBlock.of(c)
return ub === UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS || ub === UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS || ub === UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A || ub === UnicodeBlock.GENERAL_PUNCTUATION || ub === UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION || ub === UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS
}
fun isChinese(strName: String): Boolean {
val ch = strName.toCharArray()
for (i in ch.indices) {
val c = ch[i]
if (isChinese(c)) {
return true
}
}
return false
}