utilities.js 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143
  1. /**
  2. * Make sure the charset of the page using this script is
  3. * set to utf-8 or you will not get the correct results.
  4. */
  5. var utf8 = (function () {
  6. var highSurrogateMin = 0xd800,
  7. highSurrogateMax = 0xdbff,
  8. lowSurrogateMin = 0xdc00,
  9. lowSurrogateMax = 0xdfff,
  10. surrogateBase = 0x10000;
  11. function isHighSurrogate(charCode) {
  12. return highSurrogateMin <= charCode && charCode <= highSurrogateMax;
  13. }
  14. function isLowSurrogate(charCode) {
  15. return lowSurrogateMin <= charCode && charCode <= lowSurrogateMax;
  16. }
  17. function combineSurrogate(high, low) {
  18. return ((high - highSurrogateMin) << 10) + (low - lowSurrogateMin) + surrogateBase;
  19. }
  20. /**
  21. * Convert charCode to JavaScript String
  22. * handling UTF16 surrogate pair
  23. */
  24. function chr(charCode) {
  25. var high, low;
  26. if (charCode < surrogateBase) {
  27. return String.fromCharCode(charCode);
  28. }
  29. // convert to UTF16 surrogate pair
  30. high = ((charCode - surrogateBase) >> 10) + highSurrogateMin,
  31. low = (charCode & 0x3ff) + lowSurrogateMin;
  32. return String.fromCharCode(high, low);
  33. }
  34. /**
  35. * Convert JavaScript String to an Array of
  36. * UTF8 bytes
  37. * @export
  38. */
  39. function stringToBytes(str) {
  40. var bytes = [],
  41. strLength = str.length,
  42. strIndex = 0,
  43. charCode, charCode2;
  44. while (strIndex < strLength) {
  45. charCode = str.charCodeAt(strIndex++);
  46. // handle surrogate pair
  47. if (isHighSurrogate(charCode)) {
  48. if (strIndex === strLength) {
  49. throw new Error('Invalid format');
  50. }
  51. charCode2 = str.charCodeAt(strIndex++);
  52. if (!isLowSurrogate(charCode2)) {
  53. throw new Error('Invalid format');
  54. }
  55. charCode = combineSurrogate(charCode, charCode2);
  56. }
  57. // convert charCode to UTF8 bytes
  58. if (charCode < 0x80) {
  59. // one byte
  60. bytes.push(charCode);
  61. }
  62. else if (charCode < 0x800) {
  63. // two bytes
  64. bytes.push(0xc0 | (charCode >> 6));
  65. bytes.push(0x80 | (charCode & 0x3f));
  66. }
  67. else if (charCode < 0x10000) {
  68. // three bytes
  69. bytes.push(0xe0 | (charCode >> 12));
  70. bytes.push(0x80 | ((charCode >> 6) & 0x3f));
  71. bytes.push(0x80 | (charCode & 0x3f));
  72. }
  73. else {
  74. // four bytes
  75. bytes.push(0xf0 | (charCode >> 18));
  76. bytes.push(0x80 | ((charCode >> 12) & 0x3f));
  77. bytes.push(0x80 | ((charCode >> 6) & 0x3f));
  78. bytes.push(0x80 | (charCode & 0x3f));
  79. }
  80. }
  81. return bytes;
  82. }
  83. /**
  84. * Convert an Array of UTF8 bytes to
  85. * a JavaScript String
  86. * @export
  87. */
  88. function bytesToString(bytes) {
  89. var str = '',
  90. length = bytes.length,
  91. index = 0,
  92. byte,
  93. charCode;
  94. while (index < length) {
  95. // first byte
  96. byte = bytes[index++];
  97. if (byte < 0x80) {
  98. // one byte
  99. charCode = byte;
  100. }
  101. else if ((byte >> 5) === 0x06) {
  102. // two bytes
  103. charCode = ((byte & 0x1f) << 6) | (bytes[index++] & 0x3f);
  104. }
  105. else if ((byte >> 4) === 0x0e) {
  106. // three bytes
  107. charCode = ((byte & 0x0f) << 12) | ((bytes[index++] & 0x3f) << 6) | (bytes[index++] & 0x3f);
  108. }
  109. else {
  110. // four bytes
  111. charCode = ((byte & 0x07) << 18) | ((bytes[index++] & 0x3f) << 12) | ((bytes[index++] & 0x3f) << 6) | (bytes[index++] & 0x3f);
  112. }
  113. str += chr(charCode);
  114. }
  115. return str;
  116. }
  117. return {
  118. stringToBytes: stringToBytes,
  119. bytesToString: bytesToString
  120. };
  121. }());