20178 月11
UTF-8中文字符编码和解码,用位运算实现
/** * UTF中文字符编码和解码 * 中文字符占3个字节,前缀分别是:1110XXXX 10XXXXXX 10XXXXXX * * @author FreeDroid * */ public class Utf8codeANDdecode { public static void main(String[] args) { int ch = '我'; byte[] bytes = codeUTF8(ch); char ch2 = decodeUTF8(bytes); System.out.println(ch2); } /** * 解码 * @param bytes * @return */ public static char decodeUTF8(byte[] bytes) { int ch = (bytes[0]<<12&0xffff)|(bytes[1]<<6&0x3fff)|(bytes[2]&0x3f); return (char) ch; } /** * 编码 * @param ch * @return */ public static byte[] codeUTF8(int ch) { int b3 = ch & 0x3f | 0x80; int b2 = ch >>> 6 & 0x3f | 0x80; int b1 = ch >>> 12 & 0xf | 0xe0; return new byte[] { (byte) b1, (byte) b2, (byte) b3 }; } }