20178 月11
UTF-8中文字符编码和解码,用位运算实现
/**
* UTF中文字符编码和解码
* 中文字符占3个字节,前缀分别是:1110XXXX 10XXXXXX 10XXXXXX
*
* @author FreeDroid
*
*/
public class Utf8codeANDdecode {
public static void main(String[] args) {
int ch = '我';
byte[] bytes = codeUTF8(ch);
char ch2 = decodeUTF8(bytes);
System.out.println(ch2);
}
/**
* 解码
* @param bytes
* @return
*/
public static char decodeUTF8(byte[] bytes) {
int ch = (bytes[0]<<12&0xffff)|(bytes[1]<<6&0x3fff)|(bytes[2]&0x3f);
return (char) ch;
}
/**
* 编码
* @param ch
* @return
*/
public static byte[] codeUTF8(int ch) {
int b3 = ch & 0x3f | 0x80;
int b2 = ch >>> 6 & 0x3f | 0x80;
int b1 = ch >>> 12 & 0xf | 0xe0;
return new byte[] { (byte) b1, (byte) b2, (byte) b3 };
}
}
本文地址:https://wizzer.cn/archives/3362 , 转载请保留.