671 lines
16 KiB
JavaScript
671 lines
16 KiB
JavaScript
/**
|
|
* Unidecode takes UTF-8 data and tries to represent it in US-ASCII characters (i.e., the universally displayable characters between 0x00 and 0x7F).
|
|
* The representation is almost always an attempt at transliteration -- i.e., conveying, in Roman letters, the pronunciation expressed by the text in
|
|
* some other writing system.
|
|
*
|
|
* The tables used (in data) are converted from the tables provided in the perl library Text::Unidecode (http://search.cpan.org/dist/Text-Unidecode/lib/Text/Unidecode.pm)
|
|
* and are distributed under the perl license
|
|
*
|
|
* @author Francois-Guillaume Ribreau
|
|
*
|
|
* Based on the port of unidecode for php
|
|
*/
|
|
|
|
'use strict';
|
|
|
|
var tr = {};
|
|
var utf8_rx = /(?![\x00-\x7F]|[\xC0-\xDF][\x80-\xBF]|[\xE0-\xEF][\x80-\xBF]{2}|[\xF0-\xF7][\x80-\xBF]{3})./g;
|
|
|
|
module.exports = function (str) {
|
|
return str.replace(utf8_rx, unidecode_internal_replace);
|
|
};
|
|
|
|
function unidecode_internal_replace(match) {
|
|
var utf16 = utf8_to_utf16(match);
|
|
|
|
if (utf16 > 0xFFFF) {
|
|
return '_';
|
|
} else {
|
|
|
|
var h = utf16 >> 8;
|
|
var l = utf16 & 0xFF;
|
|
|
|
// (18) 18 > h < 1e (30)
|
|
if (h > 24 && h < 30) return '';
|
|
|
|
//(d7) 215 > h < 249 (f9) no supported
|
|
if (h > 215 && h < 249) return '';
|
|
|
|
if (!tr[h]) {
|
|
switch (dec2hex(h)) {
|
|
case '00':
|
|
tr[h] = require('./data/x00');
|
|
break;
|
|
case '01':
|
|
tr[h] = require('./data/x01');
|
|
break;
|
|
case '02':
|
|
tr[h] = require('./data/x02');
|
|
break;
|
|
case '03':
|
|
tr[h] = require('./data/x03');
|
|
break;
|
|
case '04':
|
|
tr[h] = require('./data/x04');
|
|
break;
|
|
case '05':
|
|
tr[h] = require('./data/x05');
|
|
break;
|
|
case '06':
|
|
tr[h] = require('./data/x06');
|
|
break;
|
|
case '07':
|
|
tr[h] = require('./data/x07');
|
|
break;
|
|
case '09':
|
|
tr[h] = require('./data/x09');
|
|
break;
|
|
case '0a':
|
|
tr[h] = require('./data/x0a');
|
|
break;
|
|
case '0b':
|
|
tr[h] = require('./data/x0b');
|
|
break;
|
|
case '0c':
|
|
tr[h] = require('./data/x0c');
|
|
break;
|
|
case '0d':
|
|
tr[h] = require('./data/x0d');
|
|
break;
|
|
case '0e':
|
|
tr[h] = require('./data/x0e');
|
|
break;
|
|
case '0f':
|
|
tr[h] = require('./data/x0f');
|
|
break;
|
|
case '10':
|
|
tr[h] = require('./data/x10');
|
|
break;
|
|
case '11':
|
|
tr[h] = require('./data/x11');
|
|
break;
|
|
case '12':
|
|
tr[h] = require('./data/x12');
|
|
break;
|
|
case '13':
|
|
tr[h] = require('./data/x13');
|
|
break;
|
|
case '14':
|
|
tr[h] = require('./data/x14');
|
|
break;
|
|
case '15':
|
|
tr[h] = require('./data/x15');
|
|
break;
|
|
case '16':
|
|
tr[h] = require('./data/x16');
|
|
break;
|
|
case '17':
|
|
tr[h] = require('./data/x17');
|
|
break;
|
|
case '18':
|
|
tr[h] = require('./data/x18');
|
|
break;
|
|
case '1e':
|
|
tr[h] = require('./data/x1e');
|
|
break;
|
|
case '1f':
|
|
tr[h] = require('./data/x1f');
|
|
break;
|
|
case '20':
|
|
tr[h] = require('./data/x20');
|
|
break;
|
|
case '21':
|
|
tr[h] = require('./data/x21');
|
|
break;
|
|
case '22':
|
|
tr[h] = require('./data/x22');
|
|
break;
|
|
case '23':
|
|
tr[h] = require('./data/x23');
|
|
break;
|
|
case '24':
|
|
tr[h] = require('./data/x24');
|
|
break;
|
|
case '25':
|
|
tr[h] = require('./data/x25');
|
|
break;
|
|
case '26':
|
|
tr[h] = require('./data/x26');
|
|
break;
|
|
case '27':
|
|
tr[h] = require('./data/x27');
|
|
break;
|
|
case '28':
|
|
tr[h] = require('./data/x28');
|
|
break;
|
|
case '2e':
|
|
tr[h] = require('./data/x2e');
|
|
break;
|
|
case '2f':
|
|
tr[h] = require('./data/x2f');
|
|
break;
|
|
case '30':
|
|
tr[h] = require('./data/x30');
|
|
break;
|
|
case '31':
|
|
tr[h] = require('./data/x31');
|
|
break;
|
|
case '32':
|
|
tr[h] = require('./data/x32');
|
|
break;
|
|
case '33':
|
|
tr[h] = require('./data/x33');
|
|
break;
|
|
case '4d':
|
|
tr[h] = require('./data/x4d');
|
|
break;
|
|
case '4e':
|
|
tr[h] = require('./data/x4e');
|
|
break;
|
|
case '4f':
|
|
tr[h] = require('./data/x4f');
|
|
break;
|
|
case '50':
|
|
tr[h] = require('./data/x50');
|
|
break;
|
|
case '51':
|
|
tr[h] = require('./data/x51');
|
|
break;
|
|
case '52':
|
|
tr[h] = require('./data/x52');
|
|
break;
|
|
case '53':
|
|
tr[h] = require('./data/x53');
|
|
break;
|
|
case '54':
|
|
tr[h] = require('./data/x54');
|
|
break;
|
|
case '55':
|
|
tr[h] = require('./data/x55');
|
|
break;
|
|
case '56':
|
|
tr[h] = require('./data/x56');
|
|
break;
|
|
case '57':
|
|
tr[h] = require('./data/x57');
|
|
break;
|
|
case '58':
|
|
tr[h] = require('./data/x58');
|
|
break;
|
|
case '59':
|
|
tr[h] = require('./data/x59');
|
|
break;
|
|
case '5a':
|
|
tr[h] = require('./data/x5a');
|
|
break;
|
|
case '5b':
|
|
tr[h] = require('./data/x5b');
|
|
break;
|
|
case '5c':
|
|
tr[h] = require('./data/x5c');
|
|
break;
|
|
case '5d':
|
|
tr[h] = require('./data/x5d');
|
|
break;
|
|
case '5e':
|
|
tr[h] = require('./data/x5e');
|
|
break;
|
|
case '5f':
|
|
tr[h] = require('./data/x5f');
|
|
break;
|
|
case '60':
|
|
tr[h] = require('./data/x60');
|
|
break;
|
|
case '61':
|
|
tr[h] = require('./data/x61');
|
|
break;
|
|
case '62':
|
|
tr[h] = require('./data/x62');
|
|
break;
|
|
case '63':
|
|
tr[h] = require('./data/x63');
|
|
break;
|
|
case '64':
|
|
tr[h] = require('./data/x64');
|
|
break;
|
|
case '65':
|
|
tr[h] = require('./data/x65');
|
|
break;
|
|
case '66':
|
|
tr[h] = require('./data/x66');
|
|
break;
|
|
case '67':
|
|
tr[h] = require('./data/x67');
|
|
break;
|
|
case '68':
|
|
tr[h] = require('./data/x68');
|
|
break;
|
|
case '69':
|
|
tr[h] = require('./data/x69');
|
|
break;
|
|
case '6a':
|
|
tr[h] = require('./data/x6a');
|
|
break;
|
|
case '6b':
|
|
tr[h] = require('./data/x6b');
|
|
break;
|
|
case '6c':
|
|
tr[h] = require('./data/x6c');
|
|
break;
|
|
case '6d':
|
|
tr[h] = require('./data/x6d');
|
|
break;
|
|
case '6e':
|
|
tr[h] = require('./data/x6e');
|
|
break;
|
|
case '6f':
|
|
tr[h] = require('./data/x6f');
|
|
break;
|
|
case '70':
|
|
tr[h] = require('./data/x70');
|
|
break;
|
|
case '71':
|
|
tr[h] = require('./data/x71');
|
|
break;
|
|
case '72':
|
|
tr[h] = require('./data/x72');
|
|
break;
|
|
case '73':
|
|
tr[h] = require('./data/x73');
|
|
break;
|
|
case '74':
|
|
tr[h] = require('./data/x74');
|
|
break;
|
|
case '75':
|
|
tr[h] = require('./data/x75');
|
|
break;
|
|
case '76':
|
|
tr[h] = require('./data/x76');
|
|
break;
|
|
case '77':
|
|
tr[h] = require('./data/x77');
|
|
break;
|
|
case '78':
|
|
tr[h] = require('./data/x78');
|
|
break;
|
|
case '79':
|
|
tr[h] = require('./data/x79');
|
|
break;
|
|
case '7a':
|
|
tr[h] = require('./data/x7a');
|
|
break;
|
|
case '7b':
|
|
tr[h] = require('./data/x7b');
|
|
break;
|
|
case '7c':
|
|
tr[h] = require('./data/x7c');
|
|
break;
|
|
case '7d':
|
|
tr[h] = require('./data/x7d');
|
|
break;
|
|
case '7e':
|
|
tr[h] = require('./data/x7e');
|
|
break;
|
|
case '7f':
|
|
tr[h] = require('./data/x7f');
|
|
break;
|
|
case '80':
|
|
tr[h] = require('./data/x80');
|
|
break;
|
|
case '81':
|
|
tr[h] = require('./data/x81');
|
|
break;
|
|
case '82':
|
|
tr[h] = require('./data/x82');
|
|
break;
|
|
case '83':
|
|
tr[h] = require('./data/x83');
|
|
break;
|
|
case '84':
|
|
tr[h] = require('./data/x84');
|
|
break;
|
|
case '85':
|
|
tr[h] = require('./data/x85');
|
|
break;
|
|
case '86':
|
|
tr[h] = require('./data/x86');
|
|
break;
|
|
case '87':
|
|
tr[h] = require('./data/x87');
|
|
break;
|
|
case '88':
|
|
tr[h] = require('./data/x88');
|
|
break;
|
|
case '89':
|
|
tr[h] = require('./data/x89');
|
|
break;
|
|
case '8a':
|
|
tr[h] = require('./data/x8a');
|
|
break;
|
|
case '8b':
|
|
tr[h] = require('./data/x8b');
|
|
break;
|
|
case '8c':
|
|
tr[h] = require('./data/x8c');
|
|
break;
|
|
case '8d':
|
|
tr[h] = require('./data/x8d');
|
|
break;
|
|
case '8e':
|
|
tr[h] = require('./data/x8e');
|
|
break;
|
|
case '8f':
|
|
tr[h] = require('./data/x8f');
|
|
break;
|
|
case '90':
|
|
tr[h] = require('./data/x90');
|
|
break;
|
|
case '91':
|
|
tr[h] = require('./data/x91');
|
|
break;
|
|
case '92':
|
|
tr[h] = require('./data/x92');
|
|
break;
|
|
case '93':
|
|
tr[h] = require('./data/x93');
|
|
break;
|
|
case '94':
|
|
tr[h] = require('./data/x94');
|
|
break;
|
|
case '95':
|
|
tr[h] = require('./data/x95');
|
|
break;
|
|
case '96':
|
|
tr[h] = require('./data/x96');
|
|
break;
|
|
case '97':
|
|
tr[h] = require('./data/x97');
|
|
break;
|
|
case '98':
|
|
tr[h] = require('./data/x98');
|
|
break;
|
|
case '99':
|
|
tr[h] = require('./data/x99');
|
|
break;
|
|
case '9a':
|
|
tr[h] = require('./data/x9a');
|
|
break;
|
|
case '9b':
|
|
tr[h] = require('./data/x9b');
|
|
break;
|
|
case '9c':
|
|
tr[h] = require('./data/x9c');
|
|
break;
|
|
case '9d':
|
|
tr[h] = require('./data/x9d');
|
|
break;
|
|
case '9e':
|
|
tr[h] = require('./data/x9e');
|
|
break;
|
|
case '9f':
|
|
tr[h] = require('./data/x9f');
|
|
break;
|
|
case 'a0':
|
|
tr[h] = require('./data/xa0');
|
|
break;
|
|
case 'a1':
|
|
tr[h] = require('./data/xa1');
|
|
break;
|
|
case 'a2':
|
|
tr[h] = require('./data/xa2');
|
|
break;
|
|
case 'a3':
|
|
tr[h] = require('./data/xa3');
|
|
break;
|
|
case 'a4':
|
|
tr[h] = require('./data/xa4');
|
|
break;
|
|
case 'ac':
|
|
tr[h] = require('./data/xac');
|
|
break;
|
|
case 'ad':
|
|
tr[h] = require('./data/xad');
|
|
break;
|
|
case 'ae':
|
|
tr[h] = require('./data/xae');
|
|
break;
|
|
case 'af':
|
|
tr[h] = require('./data/xaf');
|
|
break;
|
|
case 'b0':
|
|
tr[h] = require('./data/xb0');
|
|
break;
|
|
case 'b1':
|
|
tr[h] = require('./data/xb1');
|
|
break;
|
|
case 'b2':
|
|
tr[h] = require('./data/xb2');
|
|
break;
|
|
case 'b3':
|
|
tr[h] = require('./data/xb3');
|
|
break;
|
|
case 'b4':
|
|
tr[h] = require('./data/xb4');
|
|
break;
|
|
case 'b5':
|
|
tr[h] = require('./data/xb5');
|
|
break;
|
|
case 'b6':
|
|
tr[h] = require('./data/xb6');
|
|
break;
|
|
case 'b7':
|
|
tr[h] = require('./data/xb7');
|
|
break;
|
|
case 'b8':
|
|
tr[h] = require('./data/xb8');
|
|
break;
|
|
case 'b9':
|
|
tr[h] = require('./data/xb9');
|
|
break;
|
|
case 'ba':
|
|
tr[h] = require('./data/xba');
|
|
break;
|
|
case 'bb':
|
|
tr[h] = require('./data/xbb');
|
|
break;
|
|
case 'bc':
|
|
tr[h] = require('./data/xbc');
|
|
break;
|
|
case 'bd':
|
|
tr[h] = require('./data/xbd');
|
|
break;
|
|
case 'be':
|
|
tr[h] = require('./data/xbe');
|
|
break;
|
|
case 'bf':
|
|
tr[h] = require('./data/xbf');
|
|
break;
|
|
case 'c0':
|
|
tr[h] = require('./data/xc0');
|
|
break;
|
|
case 'c1':
|
|
tr[h] = require('./data/xc1');
|
|
break;
|
|
case 'c2':
|
|
tr[h] = require('./data/xc2');
|
|
break;
|
|
case 'c3':
|
|
tr[h] = require('./data/xc3');
|
|
break;
|
|
case 'c4':
|
|
tr[h] = require('./data/xc4');
|
|
break;
|
|
case 'c5':
|
|
tr[h] = require('./data/xc5');
|
|
break;
|
|
case 'c6':
|
|
tr[h] = require('./data/xc6');
|
|
break;
|
|
case 'c7':
|
|
tr[h] = require('./data/xc7');
|
|
break;
|
|
case 'c8':
|
|
tr[h] = require('./data/xc8');
|
|
break;
|
|
case 'c9':
|
|
tr[h] = require('./data/xc9');
|
|
break;
|
|
case 'ca':
|
|
tr[h] = require('./data/xca');
|
|
break;
|
|
case 'cb':
|
|
tr[h] = require('./data/xcb');
|
|
break;
|
|
case 'cc':
|
|
tr[h] = require('./data/xcc');
|
|
break;
|
|
case 'cd':
|
|
tr[h] = require('./data/xcd');
|
|
break;
|
|
case 'ce':
|
|
tr[h] = require('./data/xce');
|
|
break;
|
|
case 'cf':
|
|
tr[h] = require('./data/xcf');
|
|
break;
|
|
case 'd0':
|
|
tr[h] = require('./data/xd0');
|
|
break;
|
|
case 'd1':
|
|
tr[h] = require('./data/xd1');
|
|
break;
|
|
case 'd2':
|
|
tr[h] = require('./data/xd2');
|
|
break;
|
|
case 'd3':
|
|
tr[h] = require('./data/xd3');
|
|
break;
|
|
case 'd4':
|
|
tr[h] = require('./data/xd4');
|
|
break;
|
|
case 'd5':
|
|
tr[h] = require('./data/xd5');
|
|
break;
|
|
case 'd6':
|
|
tr[h] = require('./data/xd6');
|
|
break;
|
|
case 'd7':
|
|
tr[h] = require('./data/xd7');
|
|
break;
|
|
case 'f9':
|
|
tr[h] = require('./data/xf9');
|
|
break;
|
|
case 'fa':
|
|
tr[h] = require('./data/xfa');
|
|
break;
|
|
case 'fb':
|
|
tr[h] = require('./data/xfb');
|
|
break;
|
|
case 'fc':
|
|
tr[h] = require('./data/xfc');
|
|
break;
|
|
case 'fd':
|
|
tr[h] = require('./data/xfd');
|
|
break;
|
|
case 'fe':
|
|
tr[h] = require('./data/xfe');
|
|
break;
|
|
case 'ff':
|
|
tr[h] = require('./data/xff');
|
|
break;
|
|
default:
|
|
// console.error("Unidecode file not found for h=", h);
|
|
return '';
|
|
}
|
|
}
|
|
|
|
return tr[h][l];
|
|
}
|
|
}
|
|
|
|
function dec2hex(i) {
|
|
return (i + 0x100).toString(16).substr(-2);
|
|
}
|
|
|
|
function utf8_to_utf16(raw) {
|
|
var b1, b2, b3, b4,
|
|
x, y, z;
|
|
|
|
while (Array.isArray(raw)) raw = raw[0];
|
|
|
|
switch (raw.length) {
|
|
case 1:
|
|
return ord(raw);
|
|
|
|
// http://en.wikipedia.org/wiki/UTF-8
|
|
case 2:
|
|
b1 = ord(raw.substr(0, 1));
|
|
b2 = ord(raw.substr(1, 1));
|
|
|
|
x = ((b1 & 0x03) << 6) | (b2 & 0x3F);
|
|
y = (b1 & 0x1C) >> 2;
|
|
|
|
return (y << 8) | x;
|
|
|
|
case 3:
|
|
b1 = ord(raw.substr(0, 1));
|
|
b2 = ord(raw.substr(1, 1));
|
|
b3 = ord(raw.substr(2, 1));
|
|
|
|
x = ((b2 & 0x03) << 6) | (b3 & 0x3F);
|
|
y = ((b1 & 0x0F) << 4) | ((b2 & 0x3C) >> 2);
|
|
|
|
return (y << 8) | x;
|
|
|
|
default:
|
|
b1 = ord(raw.substr(0, 1));
|
|
b2 = ord(raw.substr(1, 1));
|
|
b3 = ord(raw.substr(2, 1));
|
|
b4 = ord(raw.substr(3, 1));
|
|
|
|
x = ((b3 & 0x03) << 6) | (b4 & 0x3F);
|
|
y = ((b2 & 0x0F) << 4) | ((b3 & 0x3C) >> 2);
|
|
z = ((b1 & 0x07) << 5) | ((b2 & 0x30) >> 4);
|
|
|
|
return (z << 16) | (y << 8) | x;
|
|
}
|
|
}
|
|
|
|
/* From php.js */
|
|
|
|
function ord(string) {
|
|
// Returns the codepoint value of a character
|
|
//
|
|
// version: 1109.2015
|
|
// discuss at: http://phpjs.org/functions/ord
|
|
// + original by: Kevin van Zonneveld (http://kevin.vanzonneveld.net)
|
|
// + bugfixed by: Onno Marsman
|
|
// + improved by: Brett Zamir (http://brett-zamir.me)
|
|
// + input by: incidence
|
|
// * example 1: ord('K');
|
|
// * returns 1: 75
|
|
// * example 2: ord('\uD800\uDC00'); // surrogate pair to create a single Unicode character
|
|
// * returns 2: 65536
|
|
var str = string + '',
|
|
code = str.charCodeAt(0);
|
|
if (0xD800 <= code && code <= 0xDBFF) { // High surrogate (could change last hex to 0xDB7F to treat high private surrogates as single characters)
|
|
var hi = code;
|
|
if (str.length === 1) {
|
|
return code; // This is just a high surrogate with no following low surrogate, so we return its value;
|
|
// we could also throw an error as it is not a complete character, but someone may want to know
|
|
}
|
|
var low = str.charCodeAt(1);
|
|
return ((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000;
|
|
}
|
|
if (0xDC00 <= code && code <= 0xDFFF) { // Low surrogate
|
|
return code; // This is just a low surrogate with no preceding high surrogate, so we return its value;
|
|
// we could also throw an error as it is not a complete character, but someone may want to know
|
|
}
|
|
return code;
|
|
}
|