Base16
编码的ECMAScript语言实现如下:
//ECMAScript中的字符串采用UTF16编码的
//我们先获得每个字符的codePoint,然后再转换为UTF8编码,最后把每个字节转成ASCII编码
function codePoint2UTF8(str) {
var strLength = str.length;
var utf8Bytes = [];
for (var i = 0; i < strLength; i++) {
var code = str.charCodeAt(i);
if (code > 0x0000 && code <= 0x007F) {
// 单字节,这里并不考虑0x0000,因为它是空字节
// U+00000000 – U+0000007F 0xxxxxxx
utf8Bytes.push(str.charAt(i));
} else if (code >= 0x0080 && code <= 0x07FF) {
// 双字节
// U+00000080 – U+000007FF 110xxxxx 10xxxxxx
// 110xxxxx
var byte1 = 0xC0 | ((code >> 6) & 0x1F);
// 10xxxxxx
var byte2 = 0x80 | (code & 0x3F);
utf8Bytes.push(
String.fromCharCode(byte1),
String.fromCharCode(byte2)
);
} else if (code >= 0x0800 && code <= 0xFFFF) {
// 三字节
// U+00000800 – U+0000FFFF 1110xxxx 10xxxxxx 10xxxxxx
// 1110xxxx
var byte1 = 0xE0 | ((code >> 12) & 0x0F);
// 10xxxxxx
var byte2 = 0x80 | ((code >> 6) & 0x3F);
// 10xxxxxx
var byte3 = 0x80 | (code & 0x3F);
utf8Bytes.push(
String.fromCharCode(byte1),
String.fromCharCode(byte2),
String.fromCharCode(byte3)
);
} else if (code >= 0x00010000 && code <= 0x001FFFFF) {
// 四字节
// U+00010000 – U+001FFFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
} else if (code >= 0x00200000 && code <= 0x03FFFFFF) {
// 五字节
// U+00200000 – U+03FFFFFF 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
} else /** if (code >= 0x04000000 && code <= 0x7FFFFFFF)*/ {
// 六字节
// U+04000000 – U+7FFFFFFF 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
}
}
return utf8Bytes.join('');
}
var base16EncodeTable = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'];
function base16_encode(str) {
var utf8Str = codePoint2UTF8(str);
var base16EncodedStr = "";
var utf8StrLength = utf8Str.length;
for (var i = 0; i < utf8StrLength; i++) {
var ascii = utf8Str.charCodeAt(i);
//向右移动4bit,获得高4bit
var highByte = ascii >> 4;
//与0x0f做位与运算,获得低4bit
var lowByte = ascii & 0x0F;
base16EncodedStr = base16EncodedStr.concat(base16EncodeTable[highByte], base16EncodeTable[lowByte]);
}
return base16EncodedStr;
}
Base16
解码的ECMAScript语言实现如下:
function ascii2UTF16(str) {
var strLength = str.length;
var utf16Chars = [];
for (var i = 0; i < strLength; i++) {
var ascii = str.charCodeAt(i);
// 对第一个字节进行判断,看看最高1位是否为1
if (((ascii >> 7) & 0xFF) == 0x0) {
// 单字节
// 0xxxxxxx
utf16Chars.push(str.charAt(i));
} else if (((ascii >> 5) & 0xFF) == 0x6) {
// 双字节
// 110xxxxx 10xxxxxx
var code2 = str.charCodeAt(++i);
var byte1 = (ascii & 0x1F) << 6;
var byte2 = code2 & 0x3F;
var utf16 = byte1 | byte2;
utf16Chars.push(Sting.fromCharCode(utf16));
} else if (((ascii >> 4) & 0xFF) == 0xE) {
// 三字节
// 1110xxxx 10xxxxxx 10xxxxxx
var code2 = str.charCodeAt(++i);
var code3 = str.charCodeAt(++i);
var byte1 = (ascii << 4) | ((code2 >> 2) & 0x0F);
var byte2 = ((code2 & 0x03) << 6) | (code3 & 0x3F);
var utf16 = ((byte1 & 0x00FF) << 8) | byte2
utf16Chars.push(String.fromCharCode(utf16));
} else if (((ascii >> 3) & 0xFF) == 0x1E) {
// 四字节
// 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
} else if (((ascii >> 2) & 0xFF) == 0x3E) {
// 五字节
// 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
} else /** if (((ascii >> 1) & 0xFF) == 0x7E)*/ {
// 六字节
// 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
}
}
return utf16Chars.join('');
}
//把16进制字符转换成10进制表示的数字
//man ascii查看ascii对应表
//字符 十进制
//'0' => 48
//'9' => 57
//'a' => 97
//'f' => 102
//'A' => 65
//'F' => 70
function hex2dec(asciiCode) {
if (48 <= asciiCode && asciiCode <= 57) {
return asciiCode - 48;
} else if (97 <= asciiCode && asciiCode <= 102) {
return asciiCode - 97 + 10;
} else if (65 <= asciiCode && asciiCode <= 70) {
return asciiCode - 65 + 10;
} else {
return 0;
}
}
function base16_decode(str) {
var strLength = str.length;
var halfStrLength = strLength / 2;
var asciiChars = [];
for (var i = 0; i < halfStrLength; i++) {
var c1 = str.charCodeAt(2 * i);
var c0 = str.charCodeAt(2 * i + 1);
//16进制数字转换为10进制数字的过程
var ascii = hex2dec(c1) * 16 + hex2dec(c0);
asciiChars[i] = String.fromCharCode(ascii);
}
return ascii2UTF16(asciiChars.join(''));
}