// The translations dictionary below is unusual. The linter requires that
// quoting dictionary keys is not done when it's not required. This would be
// inconsistent and confusing, so it is ignored here. The linter also requires
// comments to start with a capital letter. Many comments in the translations
// dictionary start with a lower case letter because it is the literal symbol
// being translated, which cannot be made upper case because that would change
// it's meaning, therefore capitalized-comments is disabled. Sort-Keys is
// disabled because the linter doesn't understand the sort order of unicode
// characters correctly.
/* eslint-disable quote-props */
/* eslint-disable capitalized-comments */
/* eslint-disable sort-keys */

export class FilterStandardization {
  private static translations = {
    "\u0009": " ", // Horizontal Tab
    "\u000A": " ", // Line Feed
    "\u000B": " ", // Vertical Tab
    "\u000C": " ", // Form Feed
    "\u000D": " ", // Carriage Return
    // \000E through \001A are control characters that will not occur in text
    "\u001B": " ", // Escape
    // \001C through \001F are control characters that will not occur in text
    // Code \0020 is a space
    // Code \0021 is !
    "\u0022": "'", // Quotation Mark
    // Codes \0023 through \0039 are the standard digits
    "\u003C": "(", // Less than sign
    // Code \003D is Equals signs
    "\u003E": ")", // Greater than sign
    // Code \003F is Question Mark
    // Code \0040 is Commercial at
    // Codes \0041 through \005A are the standard uppercase alphas (handled by "lower")
    "\u005B": "(", // Left Square Bracket
    "\u005C": "\\", // Reverse Solidus
    "\u005D": ")", // Right Square Bracket
    // Code \005E is Circumflex Accent
    // Code \005F is Low Line
    "\u0060": "'", // Grave Accent
    // Codes \0061 through \007A are the standard lowercase alphas
    "\u007B": "(", // Left Curly Bracket
    // Code \007C is Vertical Line
    "\u007D": ")", // Right Curly Bracket
    // Code \007E is tilde
    // Codes \007F through \0081 are control characters that will not occur in text
    "\u0082": " ", // break allowed here
    "\u0083": " ", // no break here
    // \0084 is a control character that will not occur in text
    "\u0085": " ", // Next Line
    "\u0086": " ", // Start of Selected Area
    "\u0087": " ", // End of Selected Area
    // \0088 through \008A are control characters that will not occur in text
    "\u008B": " ", // Partial Line Forward
    "\u008C": " ", // Partial Line Backward
    "\u008D": " ", // Reverse Line Feed
    // \008E through \0095 are control characters that will not occur in text
    "\u0096": " ", // Start of Guarded Area
    "\u0097": " ", // End of Guarded Area
    // \0098 through \009F are control characters that will not occur in text
    "\u00A0": " ", // Non-breaking Space
    "\u00A1": "!", // inverted !
    // Codes \00A2 through \00A5 are currency symbols
    // Codes \00A6 through \00AA are standard symbols
    "\u00AB": "'", // Left-Pointing Double Angle Quotation Mark
    // Codes \00AC through \00AE are standard symbols
    "\u00AF": "-", // Macron
    // Code \00B0 is Degree Sign
    // Code \00B1 is Plus minus symbol
    "\u00B2": "2", // Superscript 2
    "\u00B3": "3", // Superscript 3
    "\u00B4": "'", // Acute Accent
    // Codes \00B5 through 00B8 are standard symbols
    "\u00B9": "1", // Superscript 1
    // Code \00BA is Masculine Ordinal Indicator
    "\u00BB": "'", // Right-Pointing Double Angle Quotation Mark
    // Codes \00BC through \00BE are Vulgar Fractions
    "\u00BF": "?", // Inverted ?
    // \00C0 through \00D6 are capital letters of \00E0 through \00F6
    // Code \00D7 is Multiplication Sign
    // \00D8 through \00DE are capital letters of \00F8 through \00FE
    "\u00DF": "s", // Sharp s
    "\u00E0": "a", // a with Grave
    "\u00E1": "a", // a with Acute
    "\u00E2": "a", // a with Circumflex
    "\u00E3": "a", // a with Tilde
    "\u00E4": "a", // a with Diaeresis
    "\u00E5": "a", // a with Ring Above
    "\u00E6": "e", // ae
    "\u00E7": "c", // c with Cedilla
    "\u00E8": "e", // e with Grave
    "\u00E9": "e", // e with Acute
    "\u00EA": "e", // e with Circumflex
    "\u00EB": "e", // e with Diaeresis
    "\u00EC": "i", // i with Grave
    "\u00ED": "i", // i with Acute
    "\u00EE": "i", // i with Circumflex
    "\u00EF": "i", // i with Diaeresis
    "\u00F0": "th", // eth
    "\u00F1": "n", // n with Tilde
    "\u00F2": "o", // o with Grave
    "\u00F3": "o", // o with Acute
    "\u00F4": "o", // o with Circumflex
    "\u00F5": "o", // o with Tilde
    "\u00F6": "o", // o with Diaeresis
    // Code \00F7 is Division Sign
    "\u00F8": "o", // o with Stroke
    "\u00F9": "u", // u with Grave
    "\u00FA": "u", // u with Acute
    "\u00FB": "u", // u with Circumflex
    "\u00FC": "u", // u with Diaeresis
    "\u00FD": "y", // y with Acute
    "\u00FE": "th", // Thorn
    "\u00FF": "y", // y with Diaeresis
    // \0100 is the capital letter of \0101
    "\u0101": "a", // a with Macron
    // \0102 is the capital letter of \0103
    "\u0103": "a", // a with Breve
    // \0104 is the capital letter of \0105
    "\u0105": "a", // a with Ogonek
    // \0106 is the capital letter of \0107
    "\u0107": "c", // c with Acute
    // \0108 is the capital letter of \0109
    "\u0109": "c", // c with Circumflex
    // \010A is the capital letter of \010B
    "\u010B": "c", // c with Dot Above
    // \010C is the capital letter of \010D
    "\u010D": "c", // c with Caron
    // \010E is the capital letter of \010F
    "\u010F": "d", // d with Caron
    // \0110 is the capital letter of \0111
    "\u0111": "d", // d with Stroke
    // \0112 is the capital letter of \0113
    "\u0113": "e", // e with Macron
    // \0114 is the capital letter of \0115
    "\u0115": "e", // e with Breve
    // \0116 is the capital letter of \0117
    "\u0117": "e", // e with Dot Above
    // \0118 is the capital letter of \0119
    "\u0119": "e", // e with Ogonek
    // \011A is the capital letter of \011B
    "\u011B": "e", // e with Caron
    // \011C is the capital letter of \011D
    "\u011D": "g", // g with Circumflex
    // \011E is the capital letter of \011F
    "\u011F": "g", // g with Breve
    // \0120 is the capital letter of \0121
    "\u0121": "g", // g with Dot Above
    // \0122 is the capital letter of \0123
    "\u0123": "g", // g with Cedilla
    // \0124 is the capital letter of \0125
    "\u0125": "h", // h with Circumflex
    // \0126 is the capital letter of \0127
    "\u0127": "h", // h with Stroke
    // \0128 is the capital letter of \0129
    "\u0129": "i", // i with Tilde
    // \012A is the capital letter of \012B
    "\u012B": "i", // i with Macron
    // \012C is the capital letter of \012D
    "\u012D": "i", // i with Breve
    // \012E is the capital letter of \012F
    "\u012F": "i", // i with Ogonek
    // \0130 (I with Dot Above) has regular i as its lowercase
    "\u0131": "i", // i with no Dot Above
    // \0132 is the capital letter of \0133
    "\u0133": "ij", // ij
    // \0134 is the capital letter of \0135
    "\u0135": "j", // j with Circumflex
    // \0136 is the capital letter of \0137
    "\u0137": "k", // k with Cedilla
    "\u0138": "k", // kra
    // \0139 is the capital letter of \013A
    "\u013A": "l", // l with Acute
    // \013B is the capital letter of \013C
    "\u013C": "l", // l with Cedilla
    // \013D is the capital letter of \013E
    "\u013E": "l", // l with Caron
    // \013F is the capital letter of \0140
    "\u0140": "l", // l with Middle Dot
    // \0141 is the capital letter of \0142
    "\u0142": "l", // l with Stroke
    // \0143 is the capital letter of \0144
    "\u0144": "n", // n with Acute
    // \0145 is the capital letter of \0146
    "\u0146": "n", // n with Cedilla
    // \0147 is the capital letter of \0148
    "\u0148": "n", // n with Caron
    "\u0149": "n", // n preceded by Apostrophe
    // \014A is the capital letter of \014B
    "\u014B": "n", // eng
    // \014C is the capital letter of \014D
    "\u014D": "o", // o with Macron
    // \014E is the capital letter of \014F
    "\u014F": "o", // o with Breve
    // \0150 is the capital letter of \0151
    "\u0151": "o", // o with Double Acute
    // \0152 is the capital letter of \0153
    "\u0153": "e", // oe
    // \0154 is the capital letter of \0155
    "\u0155": "r", // r with Acute
    // \0156 is the capital letter of \0157
    "\u0157": "r", // r with Cedilla
    // \0158 is the capital letter of \0159
    "\u0159": "r", // r with Caron
    // \015A is the capital letter of \015B
    "\u015B": "s", // s with Acute
    // \015C is the capital letter of \015D
    "\u015D": "s", // s with Circumflex
    // \015E is the capital letter of \015F
    "\u015F": "s", // s with Cedilla
    // \0160 is the capital letter of \0161
    "\u0161": "s", // s with Caron
    // \0162 is the capital letter of \0163
    "\u0163": "t", // t with Cedilla
    // \0164 is the capital letter of \0165
    "\u0165": "t", // t with Caron
    // \0166 is the capital letter of \0167
    "\u0167": "t", // t with Stroke
    // \0168 is the capital letter of \0169
    "\u0169": "u", // u with Tilde
    // \016A is the capital letter of \016B
    "\u016B": "u", // u with Macron
    // \016C is the capital letter of \016D
    "\u016D": "u", // u with Breve
    // \016E is the capital letter of \016F
    "\u016F": "u", // u with Ring Above
    // \0170 is the capital letter of \0171
    "\u0171": "u", // u with Double Acute
    // \0172 is the capital letter of \0173
    "\u0173": "u", // u with Ogonek
    // \0174 is the capital letter of \0175
    "\u0175": "w", // w with Circumflex
    // \0176 is the capital letter of \0177
    "\u0177": "y", // y with Circumflex
    // \0178 is the capital letter of \00FF
    // \0179 is the capital letter of \017A
    "\u017A": "z", // z with Acute
    // \017B is the capital letter of \017C
    "\u017C": "z", // z with Dot Above
    // \017D is the capital letter of \017E
    "\u017E": "z", // z with Caron
    "\u017F": "s", // Long s
    "\u0180": "b", // b with Stroke
    // \0181 is the capital letter of \0253
    // \0182 is the capital letter of \0183
    "\u0183": "b", // b with Topbar
    // \0184 is the capital letter of \0185
    "\u0185": "b", // Tone 6
    // \0186 is the capital letter of \0254
    // \0187 is the capital letter of \0188
    "\u0188": "c", // c with Hook
    // \0189 through \018A are the capital letters of \0256 through \0257
    // \018B is the capital letter of \018C
    "\u018C": "d", // d with Topbar
    "\u018D": "d", // Turned delta
    // \018E is the capital letter of \01DD
    // \018F is the capital letter of \0259
    // \0190 is the capital letter of \025B
    // \0191 is the capital letter of \0192
    "\u0192": "f", // f with Hook
    // \0193 is the capital letter of \0260
    // \0194 is the capital letter of \0263
    "\u0195": "hv", // hv
    // \0196 is the capital letter of \0269
    // \0197 is the capital letter of \0268
    // \0199 is the capital letter of \0199
    "\u0199": "k", // k with Hook
    "\u019A": "l", // l with Bar
    "\u019B": "l", // lambda with Stroke
    // \019C is the capital letter of \026F
    // \019D is the capital letter of \0272
    "\u019E": "n", // n with Long Right Leg
    // \019D is the capital letter of \0275
    // \01A0 is the capital letter of \01A1
    "\u01A1": "o", // o with Horn
    // \01A2 is the capital letter of \01A3
    "\u01A3": "oi", // oi
    // \01A4 is the capital letter of \01A5
    "\u01A5": "p", // p with Hook
    // \01A6 is the capital letter of \0280
    // \01A7 is the capital letter of \01A8
    "\u01A8": "s", // Tone 2
    // \01A9 is the capital letter of \0283
    "\u01AA": "s", // reversed esh loop
    "\u01AB": "t", // t with Palatal Hook
    // \01AC is the capital letter of \01AD
    "\u01AD": "t", // t with Hook
    // \01AE is the capital letter of \0288
    // \01AF is the capital letter of \01B0
    "\u01B0": "u", // u with Horn
    // \01B1 through \01B2 are the capital letters of \028A throu \028B
    // \01B3 is the capital letter of \01B4
    "\u01B4": "y", // y with Hook
    // \01B5 is the capital letter of \01B6
    "\u01B6": "z", // z with Stroke
    // \01B7 is the capital letter of \0292
    // \01B8 is the capital letter of \01B9
    "\u01B9": "zh", // zh Reversed
    "\u01BA": "zh", // zh with Tail
    "\u01BB": "dz", // 2 with Stroke
    // \01BC is the capital letter of \01BD
    "\u01BD": "b", // Tone 5
    "\u01BE": "ts", // ts
    "\u01BF": "w", // wynn
    // \01C0 through \01C3 are African clicks
    // \01C4 and \01C5 are both capital letters of \01C6
    "\u01C6": "dz", // dz
    // \01C7 and \01C8 are both capital letters of \01C9
    "\u01C9": "lj", // lj
    // \01CA and \01CB are both capital letters of \01CC
    "\u01CC": "nj", // nj
    // \01CD is the capital letter of \01CE
    "\u01CE": "a", // a with Caron
    // \01CF is the capital letter of \01D0
    "\u01D0": "i", // i with Caron
    // \01D1 is the capital letter of \01D2
    "\u01D2": "o", // o with Caron
    // \01D3 is the capital letter of \01D4
    "\u01D4": "u", // u with Caron
    // \01D5 is the capital letter of \01D6
    "\u01D6": "u", // u with Diaerisis and Macron
    // \01D7 is the capital letter of \01D8
    "\u01D8": "u", // u with Diaeresis and Acute
    // \01D9 is the capital letter of \01DA
    "\u01DA": "u", // u with Diaeresis and Caron
    // \01DB is the capital letter of \01DC
    "\u01DC": "u", // u with Diaeresis and Grave
    "\u01DD": "e", // Turned e
    // \01DE is the capital letter of \01DF
    "\u01DF": "a", // a with Diaeresis and Macron
    // \01E0 is the capital letter of \01E1
    "\u01E1": "a", // a with Dot Above and Macron
    // \01E2 is the capital letter of \01E3
    "\u01E3": "e", // ae with Macron
    // \01E4 is the capital letter of \01E5
    "\u01E5": "g", // g with Stroke
    // \01E6 is the capital letter of \01E7
    "\u01E7": "g", // g with Caron
    // \01E8 is the capital letter of \01E9
    "\u01E9": "k", // k with Caron
    // \01EA is the capital letter of \01EB
    "\u01EB": "o", // o with Ogonek
    // \01EC is the capital letter of \01ED
    "\u01ED": "o", // o with Ogonek and Macron
    // \01EE is the capital letter of \01EF
    "\u01EF": "zh", // zh with Caron
    "\u01F0": "j", // j with Caron
    // \01F1 and \01F2 are both capital letters of \01F3
    "\u01F3": "dz", // dz
    // \01F4 is the capital letter of \01F5
    "\u01F5": "g", // g with Acute
    // \01F6 is a capital letter of \0195
    // \01F7 is the capital letter of \01BF
    // \01F8 is the capital letter of \01F9
    "\u01F9": "n", // n with Grave
    // \01FA is the capital letter of \01FB // a with Ring Above and Acute
    "\u01FB": "a",
    // \01FC is the capital letter of \01FD
    "\u01FD": "e", // ae with Acute
    // \01FE is the capital letter of \01FF
    "\u01FF": "i", // o with Stroke and Acute
    // \0200 is the capital letter of \0201
    "\u0201": "a", // a with Double Grave
    // \0202 is the capital letter of \0203
    "\u0203": "a", // a with Inverted Grave
    // \0204 is the capital letter of \0205
    "\u0205": "e", // e with Double Grave
    // \0206 is the capital letter of \0207
    "\u0207": "e", // e with Inverted Breve
    // \0208 is the capital letter of \0209
    "\u0209": "i", // i with Double Grave
    // \020A is the capital letter of \020B
    "\u020B": "i", // i with Inverted Breve
    // \020C is the capital letter of \020D
    "\u020D": "o", // o with Double Grave
    // \020E is the capital letter of \020F
    "\u020F": "o", // o with Inverted Breve
    // \0210 is the capital letter of \0211
    "\u0211": "r", // r with Double Grave
    // \0212 is the capital letter of \0213
    "\u0213": "r", // r with Inverted Breve
    // \0214 is the capital letter of \0215
    "\u0215": "u", // u with Double Grave
    // \0216 is the capital letter of \0217
    "\u0217": "u", // u with Inverted Breve
    // \0218 is the capital letter of \0219
    "\u0219": "s", // s with Comma Below
    // \021A is the capital letter of \021B
    "\u021B": "t", // t with Comma Below
    // \021C is the capital letter of \021D
    "\u021D": "g", // yogh
    // \021E is the capital letter of \021F
    "\u021F": "h", // h with Caron
    // \0220 is the capital letter of \019E
    "\u0221": "d", // d with Curl
    // \0222 is the capital letter of \0223
    "\u0223": "ou", // ou
    // \0224 is the capital letter of \0225
    "\u0225": "z", // z with Hook
    // \0226 is the capital letter of \0227
    "\u0227": "a", // a with Dot Above
    // \0228 is the capital letter of \0229
    "\u0229": "e", // e with Cedilla
    // \022A is the capital letter of \022B
    "\u022B": "o", // o with Diaeresis and Macron
    // \022C is the capital letter of \022D
    "\u022D": "o", // o with Tilde and Macron
    // \022E is the capital letter of \022F
    "\u022F": "o", // o with Dot Above
    // \0230 is the capital letter of \0231
    "\u0231": "o", // o with Dot Above and Macron
    // \0232 is the capital letter of \0233
    "\u0233": "y", // y with Macron
    "\u0234": "l", // l with Curl
    "\u0235": "n", // n with Curl
    "\u0236": "t", // t with Curl
    "\u0237": "j", // Dotless j
    "\u0238": "db", // db
    "\u0239": "qp", // qp
    // \023A is the capital letter of \2C65, but Oracle doesn't translate it
    "\u023A": "a", // A with Stroke
    // \023B is the capital letter of \023C, but Oracle doesn't translate it
    "\u023B": "c", // C with Stroke
    "\u023C": "c", // c with Stroke
    // \023D is the capital letter of \019A, but Oracle doesn't translate it
    "\u023D": "l", // L with Bar
    // \023E is the capital letter of \2C66, but Oracle doesn't translate it
    "\u023E": "t", // T with Diagonal Stroke
    "\u023F": "s", // s with Swash Tail
    "\u0240": "z", // z with Swash Tail
    // \0241 is the capital letter of \0242, but Oracle doesn't translate it
    "\u0241": "g", // Capital Glottal Stop
    "\u0242": "g", // Small Glottal Stop
    // \0243 is the capital letter of \0180, but Oracle doesn't translate it
    "\u0243": "b", // B with Stroke
    // \0244 is the capital letter of \0289, but Oracle doesn't translate it
    "\u0244": "u", // U Bar
    // \0245 is the capital letter of \028C, but Oracle doesn't translate it
    "\u0245": "a", // Turned V
    // In \0246 throuth \024F, may not translate upper to lower case,
    // so we have to do it here
    "\u0246": "e", // E with Stroke
    "\u0247": "e", // e with Stroke
    "\u0248": "j", // J with Stroke
    "\u0249": "j", // j with Stroke
    "\u024A": "q", // Q with Hook Tail
    "\u024B": "q", // q with Hook Tail
    "\u024C": "r", // R with Stroke
    "\u024D": "r", // r with Stroke
    "\u024E": "y", // Y with Stroke
    "\u024F": "y", // y with Stroke
    // \0250 through \02AF are all considered lower-case
    "\u0250": "a", // Turned a
    "\u0251": "a", // alpha
    "\u0252": "a", // Turned alpha
    "\u0253": "b", // b with Hook
    "\u0254": "o", // open o
    "\u0255": "c", // c with Curl
    "\u0256": "d", // d with Tail
    "\u0257": "d", // d with Hook
    "\u0258": "e", // Reversed e
    "\u0259": "e", // schwa
    "\u025A": "e", // schwa with Hook
    "\u025B": "e", // Open e
    "\u025C": "e", // Reversed Open e
    "\u025D": "e", // Reversed Open e with Hook
    "\u025E": "e", // Closed Reversed Open e
    "\u025F": "j", // Dotless j with Stroke
    "\u0260": "e", // g with Hook
    "\u0261": "g", // Script g
    "\u0262": "g", // Small Capital G
    "\u0263": "e", // gamma
    "\u0264": "g", // Baby gamma (Rams Horn)
    "\u0265": "h", // Turned h
    "\u0266": "h", // h with Hook
    "\u0267": "h", // heng with Hook
    "\u0268": "i", // i with Stroke
    "\u0269": "i", // iota
    "\u026A": "i", // Small Capital I
    "\u026B": "l", // l with Middle Tilde
    "\u026C": "l", // l with Belt
    "\u026D": "l", // l with Retroflex Hook
    "\u026E": "lzh", // lezh
    "\u026F": "w", // Turned m
    "\u0270": "w", // Turned m with Long Leg
    "\u0271": "m", // m with Hook
    "\u0272": "n", // n with Left Hook
    "\u0273": "n", // n with Retroflex Hook
    "\u0274": "n", // Small Capital N
    "\u0275": "o", // Barred o
    "\u0276": "e", // oe
    "\u0277": "u", // Closed omega
    "\u0278": "f", // phi
    "\u0279": "r", // Turned r
    "\u027A": "r", // Turned r with Long Leg
    "\u027B": "r", // Turned r with Hook
    "\u027C": "r", // r with Long Leg
    "\u027D": "r", // r with Tail
    "\u027E": "r", // r with Fishhook
    "\u027F": "r", // Reversed r with Fishhook
    "\u0280": "r", // Small Capital R
    "\u0281": "r", // Small Capital Inverted R
    "\u0282": "s", // s with Hook
    "\u0283": "s", // sigma
    "\u0284": "j", // Dotless j with Stroke
    "\u0285": "s", // Squat Reversed esh (sigma)
    "\u0286": "s", // esh with Curl
    "\u0287": "t", // Turned t
    "\u0288": "t", // t with Retroflex Hook
    "\u0289": "u", // u Bar
    "\u028A": "u", // upsilon
    "\u028B": "v", // v with Hook
    "\u028C": "a", // Turned v
    "\u028D": "w", // Turned w
    "\u028E": "y", // Turned y
    "\u028F": "y", // Small Capital Y
    "\u0290": "z", // z with Retroflex Hook
    "\u0291": "z", // z with Curl
    "\u0292": "zh", // zh
    "\u0293": "zh", // zh with Curl
    "\u0294": "g", // Glottal Stop
    "\u0295": "g", // Reversed Glottal Stop
    "\u0296": "g", // Inverted Glottal Stop
    "\u0297": "c", // Stretched c
    "\u0298": "b", // Bilabial Click
    "\u0299": "b", // Small Capital B
    "\u029A": "e", // Closed Open e
    "\u029B": "g", // Capital G with Hook
    "\u029C": "h", // Small Capital H
    "\u029D": "j", // j with Crossed Tail
    "\u029E": "k", // Turned k
    "\u029F": "l", // Small Capital L
    "\u02A0": "q", // q with Hook
    "\u02A1": "g", // Glottal Stop with Stroke
    "\u02A2": "g", // Reversed Glottal Stop with Stroke
    "\u02A3": "dz", // dz
    "\u02A4": "dzh", // dzh
    "\u02A5": "dz", // dz with Curl
    "\u02A6": "ts", // ts
    "\u02A7": "ts", // t with sigma
    "\u02A8": "tc", // tc with Curl
    // \02A9 through \02AD are for disordered speech
    "\u02AE": "h", // Turned h with Fishhook
    "\u02AF": "h", // Turned h with Fishhook and Tail
    // \02B0 through \02B8 are superscripts
    // \02B9 through \02FF are Spacing Modifier Letters
    // \0300 through \036F are Combining Diacritical Marks
    // \0370 through \03FF are Coptic in Greek block
    // \0400 through \052F are Cyrillic
    // \0530 through \058F are Armenian
    // \0590 through \05FF are Hebrew
    // \0600 through \06FF are Arabic
    // \0700 through \074F are Syriac
    // \0750 through \077F are Arabic Supplement
    // \0780 through \07BF are Thaana
    // \07C0 through \07FF are NKo
    // \0800 through \083F are Samaritan
    // \0840 through \085F are Mandaic
    // \0860 through \089F are unused
    // \08A0 through \08FF are Arabic Extended-A
    // \0900 through \097F are Devanagari
    // \0980 through \09FF are Bengali and Assamese
    // \0A00 through \0A7F are Gurmukhi
    // \0A80 through \0AFF are Gujarati
    // \0B00 through \0B7F are Oriya
    // \0B80 through \0BFF are Tamil
    // \0C00 through \0C7F are Telugu
    // \0C80 through \0CFF are Kannada
    // \0D00 through \0D7F are Malayalam
    // \0D80 through \0DFF are Sinhala
    // \0E00 through \0E7F are Thai
    // \0E80 through \0EFF are Lao
    // \0F00 through \0FFF are Tibetan
    // \1000 through \109F are Myanmar
    // \10A0 through \10FF are Georgian
    // \1100 through \11FF are Hangul Jamo
    // \1200 through \12BF are Ethiopic
    // \12C0 through \137F are unused
    // \1380 through \139F are Ethiopic Supplement
    // \13A0 through \13FF are Cherokee
    // \1400 through \167F are Unified Canadian Aboriginal Syllabics
    // \1680 through \169F are Ogham
    // \16A0 through \16FF are Runic
    // \1700 through \171F are Tagalog
    // \1720 through \173F are Hanunoo
    // \1740 through \175F are Buhid
    // \1760 through \177F are Tagbanwa
    // \1780 through \17FF are Khmer
    // \1800 through \18AF are Mongolian
    // \18B0 through \18FF are UCAS Extended
    // \1900 through \194F are Limbu
    // \1950 through \197F are Tai Le
    // \1980 through \19DF are New Tai Lue
    // \19E0 through \19FF are Khmer Symbols
    // \1A00 through \1A1F are Buginese
    // \1A20 through \1AAF are Tai Tham
    // \1B00 through \1B7F are Balinese
    // \1B80 through \1BBF are Sundanese
    // \1BC0 through \1BFF are Batak
    // \1C00 through \1C4F are Lepcha
    // \1C50 through \1C7F are Ol Chiki
    // \1CC0 through \1CCF are Sundanese Supplement
    // \1CD0 through \1CFF are Vedic Extensions
    // \1D00 through \1D7F are Phonetic Extensions
    "\u1D7D": "p", // p with Stroke
    // \1D80 through \1DBF are Phonetic Extensions Supplement
    // \1DC0 through \1DFF are Combining Diacritical Marks Supplement
    // \1E00 through \1EFF are Latin Extended Additional
    // \1F00 through \1FFF are Greek Extended
    "\u2000": " ", // en Quad
    "\u2001": " ", // em Quad
    "\u2002": " ", // en Space
    "\u2003": " ", // em Space
    "\u2004": " ", // 3-per-em Space
    "\u2005": " ", // 4-per-em Space
    "\u2006": " ", // 6-per-em Space
    "\u2007": " ", // Figure Space
    "\u2008": " ", // Punctuation Space
    "\u2009": " ", // Thin Space
    "\u200A": " ", // Hair Space
    "\u200B": " ", // Zero Width Space
    "\u200C": " ", // Zero Width Non-Joiner
    "\u200D": " ", // Zero Width Joiner
    "\u200E": " ", // Left-to-Right Mark
    "\u200F": " ", // Right-to-Left Mark
    "\u2010": "-", // Hyphen
    "\u2011": "-", // Non-breaking Hyphen
    "\u2012": "-", // Figure Dash
    "\u2013": "-", // en Dash
    "\u2014": "-", // em Dash
    "\u2015": "-", // Horizontal Bar
    // \2016 and \2017 are double lines
    "\u2018": "'", // Left Single Quotation Mark
    "\u2019": "'", // Right Single Quotation Mark
    "\u201A": "'", // Single Low-9 Quotation Mark
    "\u201B": "'", // Single High-Reversed-9 Quotation Mark
    "\u201C": "'", // Left Double Quotation Mark
    "\u201D": "'", // Right Double Quotation Mark
    "\u201E": "'", // Double Low-9 Quotation Mark
    "\u201F": "'", // Double High-Reversed-9 Quotation Mark
    // \2020 through \2027 are daggers and bullets
    "\u2028": " ", // Line Separator
    "\u2029": " ", // Paragraph Separator
    "\u202A": " ", // Left-to-Right Embedding
    "\u202B": " ", // Right-to-Left Embedding
    "\u202C": " ", // Pop Directional Formatting
    "\u202D": " ", // Left-to-Right Override
    "\u202E": " ", // Right-to-Left Override
    "\u202F": " ", // Narrow No-Break Space
    // \2030 is Per mille Sign
    // \2031 is Per 10000 Sign
    "\u2032": "'", // Prime
    "\u2033": "'", // Double Prime
    "\u2034": "'", // Triple Prime
    "\u2035": "'", // Reversed Prime
    "\u2036": "'", // Reversed Double Prime
    "\u2037": "'", // Reversed Triple Prime
    "\u2038": "^", // Caret
    "\u2039": "'", // Single Left-Pointing Angle Quotation Mark
    "\u203A": "'", // Single Right-Pointing Angle Quotation Mark
    // \203B is Reference Mark
    "\u203C": "!", // Double Exclamation Mark
    "\u203D": "!?", // InterroBang
    "\u203E": "-", // Overline
    // \203F through \2042 are various symbols
    "\u2043": "-", // Hyphen Bullet
    "\u2044": "/", // Fraction Slash
    "\u2045": "(", // Left Square Bracket with Quill
    "\u2046": ")", // Right Square Bracket with Quill
    "\u2047": "?", // Double Question Mark
    "\u2048": "?!", // Question Exclamation Mark
    "\u2049": "!?", // Exclamation Question Mark
    // \204A through \204D are various symbols
    "\u204E": "*", // Low Asterisk
    "\u204F": ", ", // Reversed Semicolon
    // \2050 is Close Up
    "\u2051": "*", // Two Asterisks Aligned Vertically
    // \2052 is Commercial Minus Sign
    "\u2053": "-", // Swung Dash
    // \2054 is Inverted Undertie
    "\u2055": "*", // Flower Punctuation Mark
    // \2056 is 3 Dot Punctuation
    "\u2057": "'", // Quadruple Prime
    // \2058 through \205E are Dot Punctuation Marks
    "\u205F": " ", // Medium Mathematical Space
    "\u2060": " ", // Word Joiner
    "\u2061": " ", // Function Application
    "\u2062": " ", // Invisible Times
    "\u2063": " ", // Invisible Separator
    "\u2064": " ", // Invisible Plus
    // \2065 through \2069 are unused
    "\u206A": " ", // Inhibit Symmetric Swapping
    "\u206B": " ", // Activate Symmetric Swapping
    "\u206C": " ", // Inhibit Arabic Form Shaping
    "\u206D": " ", // Activate Arabic Form Shaping
    "\u206E": " ", // National Digit Shapes
    "\u206F": " ", // Nominal Digit Shapes
    "\u2070": "0", // Superscript 0
    "\u2071": "i", // Superscript i
    "\u2072": " ", // Empty Cell
    "\u2073": " ", // Empty Cell
    "\u2074": "4", // Superscript 4
    "\u2075": "5", // Superscript 5
    "\u2076": "6", // Superscript 6
    "\u2077": "7", // Superscript 7
    "\u2078": "8", // Superscript 8
    "\u2079": "9", // Superscript 9
    "\u207A": "+", // Superscript +
    "\u207B": "-", // Superscript -
    "\u207C": "=", // Superscript =
    "\u207D": "(", // Superscript (
    "\u207E": ")", // Superscript )
    "\u207F": "n", // Superscript n
    "\u2080": "0", // Subscript 0
    "\u2081": "1", // Subscript 1
    "\u2082": "2", // Subscript 2
    "\u2083": "3", // Subscript 3
    "\u2084": "4", // Subscript 4
    "\u2085": "5", // Subscript 5
    "\u2086": "6", // Subscript 6
    "\u2087": "7", // Subscript 7
    "\u2088": "8", // Subscript 8
    "\u2089": "9", // Subscript 9
    "\u208A": "+", // Subscript +
    "\u208B": "-", // Subscript -
    "\u208C": "=", // Subscript =
    "\u208D": "(", // Subscript (
    "\u208E": ")", // Subscript )
    "\u208F": " ", // Empty Cell
    "\u2090": "a", // Subscript a
    "\u2091": "e", // Subscript e
    "\u2092": "o", // Subscript o
    "\u2093": "x", // Subscript x
    "\u2094": "e", // Latin Subscript small letter schwa
    "\u2095": "h", // Subscript h
    "\u2096": "k", // Subscript k
    "\u2097": "l", // Subscript l
    "\u2098": "m", // Subscript m
    "\u2099": "n", // Subscript n
    "\u209A": "p", // Subscript p
    "\u209B": "s", // Subscript s
    "\u209C": "t", // Subscript t
    "\u209D": " ", // Empty Cell
    "\u209E": " ", // Empty Cell
    "\u209F": " ", // Empty Cell
    // \20A0 through \20CF are Currency Symbols
    // \20D0 through \20FF are Combining Diacritical Marks for Symbols
    // \2100 through \214F are Letterlike Symbols (Additional Squared Symbols)
    // \2150 through \218F are Number Forms
    // \2190 through \21FF are Arrows
    // \2200 through \22FF are Mathematical Operators
    // \2300 through \2335 are Miscellaneous Technical
    // \2336 through \237A are APL Symbols
    // \237B through \23FF are Miscellaneous Technical
    // \2400 through \243F are Control Pictures
    // \2440 through \245F are Optical Character Recognition
    // \2460 through \24FF are Enclosed Alphanumerics
    // \2500 through \257F are Box Drawing
    // \2580 through \259F are Block Elements
    // \25A0 through \25FF are Geometric Shapes
    // \2600 through \2625 are Miscellaneous Symbols
    // \2626 through \2617 are Japanese Chess
    // \2618 through \262F are Miscellaneous Symbols
    // \2630 through \2637 are Yijing Mono-, Di-, and Trigrams
    // \2638 through \2653 are Miscellaneous Symbols
    // \2654 through \265F are Chess, Checkers/Draughts
    // \2660 through \2667 are Card Suits
    // \2608 through \2689 are Miscellaneous Symbols
    // \268A through \268F are Yijing Mono-, Di-, and Trigrams
    // \2690 through \26BF are Miscellaneous Symbols
    // \26C0 through \26C3 are Chess, Checkers/Draughts
    // \26C4 through \26FF are Miscellaneous Symbols
    // \2700 through \27BF are Dingbats
    // \27C0 through \27EF are Miscellaneous Mathematical Symbols-A
    // \27F0 through \27FF are Supplemental Arrows-A
    // \2800 through \28FF are Braille Patterns
    // \2900 through \297F are Supplemental Arrows-B
    // \2980 through \29FF are Miscellaneous Mathematical Symbols-B
    // \2A00 through \2AFF are Supplemental Mathematical Operators
    // \2B00 through \2BFF are Additional Arrows (Additional Shapes)
    // \2C00 through \2C5F are Glagolitic
    // In \2C60 throuth \2C7F, Oracle does not translate upper to lower case,
    // so we have to do it here
    "\u2C60": "l", // L with Double Bar
    "\u2C61": "l", // l with Double Bar
    "\u2C62": "l", // L with Middle Tilde
    "\u2C63": "p", // P with Stroke
    "\u2C64": "r", // R with Tail
    "\u2C65": "a", // A with Struke
    "\u2C66": "t", // t with Diagonal Stroke
    "\u2C67": "h", // H with Descender
    "\u2C68": "h", // h with Descender
    "\u2C69": "k", // K with Descender
    "\u2C6A": "k", // k with Descender
    "\u2C6B": "z", // Z with Descender
    "\u2C6C": "z", // z with Descender
    "\u2C6D": "a", // Alpha
    "\u2C6E": "m", // M with Hook
    "\u2C6F": "a", // Turned A
    "\u2C70": "a", // Turned Alpha
    "\u2C71": "v", // v with Right Hook
    "\u2C72": "w", // W with Hook
    "\u2C73": "w", // w with Hook
    "\u2C74": "v", // v with Curl
    "\u2C75": "h", // Half H
    "\u2C76": "h", // Half h
    "\u2C77": "o", // Tailless phi
    "\u2C78": "e", // e with Notch
    "\u2C79": "r", // Turned r with Tail
    "\u2C7A": "o", // o with Low Ring Inside
    "\u2C7B": "e", // Small Capital Turned E
    "\u2C7C": "j", // Subscript j
    "\u2C7D": "v", // Modifier V
    "\u2C7E": "s", // S with Swash Tail
    "\u2C7F": "z", // Z with Swash Tail
    // \2C80 through \2CFF are Coptic
    // \2D00 through \2D2F are Georgian Supplement
    // \2D30 through \2D7F are Tifinagh
    // \2D80 through \2DDF are Ethiopic Extended
    // \2DE0 through \2DFF are Cyrillic Extended-A
    // \2E00 through \2E0D are New testament editorial symbols
    // \2E0E through \2E16 are Ancient Greek textual symbols
    "\u2E17": "'", // Double Oblique Hyphen
    "\u2E18": "!?", // Inverted Interrobang
    // \2E19 through \2E2D are various symbols
    "\u2E2E": "?", // Reversed ?
    "\u2E2F": "~", // Vertical Tilde
    // \2E30  and \2E32 are symbols
    "\u2E32": ",", // Turned ,
    // \2E33 is Raised Dot
    "\u2E34": ",", // Raised ,
    "\u2E35": ",", // Turned ;
    // \2E36 through \2E39 are various symbols
    "\u2E3A": "-", // 2-em Dash
    "\u2E3B": "-", // 3-em Dash
    // \2E80 through \2EFF are CJK Radicals Supplement
    // \2F00 through \2FDF are CJK Radicals / KangXi Radicals
    // \2FF0 through \2FFF are Ideographic Description Characters
    // \3000 through \303F are CJK Symbols and Punctuation
    // \3040 through \309F are Hiragana
    // \30A0 through \30FF are Katakana
    // \3100 through \312F are Bopomofo
    // \3130 through \318F are Hangul Compatibility Jamo
    // \3190 through \319F are Kanbun
    // \31A0 through \31BF are Bopomofo Extended
    // \31C0 through \31EF are CJK Strokes
    // \31F0 through \31FF are Katakana Phonetic Extensions
    // \3200 through \32FF are Enclosed CJK Letters and Months
    // \3300 through \33FF are CJK Compatibility
    // \3400 through \4DBF are CJK Extension-A
    // \4DC0 through \4DFF are Yijing Hexagram Symbols
    // \4E00 through \9FCF are CJK Unified Ideographs (Han)
    // \A000 through \A48F are Yi Syllables
    // \A490 through \A4CF are Yi Radicals
    // \A4D0 through \A4FF are Lisu
    // \A500 through \A63F are Vai
    // \A640 through \A69F are Cyrillic Extended-B
    // \A6A0 through \A6FF are Bamum
    // \A700 through \A71F are Modifier Tone Letters
    // \A720 through \A7FF are Latin Extended-D
    // \A800 through \A82F are Syloti Nagri
    // \A830 through \A83F are Common Indic Number Forms
    // \A840 through \A87F are Phags-Pa
    // \A880 through \A8DF are Saurashtra
    // \A8E0 through \A8FF are Devanagari Extended
    // \A900 through \A92F are Kayah Li
    // \A930 through \A95F are Rejang
    // \A960 through \A97F are Hangul Jamo Extended-A
    // \A980 through \A9DF are Javanese
    // \AA00 through \AA5F are Cham
    // \AA60 through \AA7F are Myanmar Extended-A
    // \AA80 through \AADF are Tai Viet
    // \AAE0 through \AAFF are Meetei Mayek Extensions
    // \AB00 through \AB2F are Ethiopic Extended-A
    // \ABC0 through \ABFF are Meetei Mayek
    // \AC00 through \D7AF are Hangul Syllables
    // \D7B0 through \D7FF are Hangul Jamo Extended-B
    // \D800 through \DBFF are High Surrogates
    // \DC00 through \DFFF are Low Surrogates
    // \E000 through \F8FF are Private Use Area
    // \F900 through \FAFF are CJK Compatibility Ideographs
    // \FB00 through \FB1C are Alphabetic Presentation Forms
    // \FB1D through \FB4F are Hebrew Presentation Forms
    // \FB50 through \FDFF are Arabic Presentation Forms-A
    // \FE00 through \FE0F are Variation Selectors
    "\uFE10": ",", // Vertical ,
    "\uFE11": ",", // Vertical Ideographic ,
    // \FE12 is Presentation form for Vertical Ideographic Full Stop
    "\uFE13": ":", // Vertical :
    "\uFE14": ",", // Vertical ;
    "\uFE15": "1", // Vertical !
    "\uFE16": "?", // Vertical ?
    // \FE17 through \FE17 are various symbols
    // \FE20 through \FE2F are Combining Half Marks
    // \FE30 through \FE4F are CJK Compatibility Forms
    "\uFE50": ",", // Small ,
    "\uFE51": ",", // Small Ideographic ,
    "\uFE52": ".", // Small
    // \FE53 is reserved
    "\uFE54": ",", // Small ;
    "\uFE55": ":", // Small :
    "\uFE56": "?", // Small ?
    "\uFE57": "!", // Small !
    "\uFE58": "-", // Small em Dash
    "\uFE59": "(", // Small (
    "\uFE5A": ")", // Small )
    "\uFE5B": "(", // Small {
    "\uFE5C": ")", // Small }
    "\uFE5D": "(", // Small [
    "\uFE5E": ")", // Small ]
    "\uFE5F": "#", // Small #
    "\uFE60": "&", // Small &
    "\uFE61": "*", // Small *
    "\uFE62": "+", // Small +
    "\uFE63": "-", // Small -
    "\uFE64": "(", // Small <
    "\uFE65": ")", // Small >
    "\uFE66": "=", // Small =
    // \FE67 is reserved
    "\uFE68": "\\", // Small \
    "\uFE69": "$", // Small $
    "\uFE6A": "%", // Small %
    "\uFE6B": "@", // Small @
    // \FE70 through \FEFF are Arabic Presentation Forms-B
    // \FF00 is unused
    "\uFF01": "!", // ! (full width)
    "\uFF02": "'", // " (full width)
    "\uFF03": "#", // # (full width)
    "\uFF04": "$", // $ (full width)
    "\uFF05": "%", // % (full width)
    "\uFF06": "&", // & (full width)
    "\uFF07": "'", // ' (full width)
    "\uFF08": "(", // ( (full width)
    "\uFF09": ")", // ) (full width)
    "\uFF0A": "*", // * (full width)
    "\uFF0B": "+", // + (full width)
    "\uFF0C": ",", // , (full width)
    "\uFF0D": "-", // - (full width)
    "\uFF0E": ".", // (full width)
    "\uFF0F": "/", // / (full width)
    "\uFF10": "0", // 0 (full width)
    "\uFF11": "1", // 1 (full width)
    "\uFF12": "2", // 2 (full width)
    "\uFF13": "3", // 3 (full width)
    "\uFF14": "4", // 4 (full width)
    "\uFF15": "5", // 5 (full width)
    "\uFF16": "6", // 6 (full width)
    "\uFF17": "7", // 7 (full width)
    "\uFF18": "8", // 8 (full width)
    "\uFF19": "9", // 9 (full width)
    "\uFF1A": ":", // : (full width)
    "\uFF1B": ",", // ; (full width)
    "\uFF1C": "(", // < (full width)
    "\uFF1D": "=", // = (full width)
    "\uFF1E": ")", // > (full width)
    "\uFF1F": "?", // ? (full width)
    "\uFF20": "@", // @ (full width)
    // \FF21 through \FF3A are upper-case of \FF41 through \FF5A
    "\uFF3B": "(", // [ (full width)
    "\uFF3C": "\\", // \ (full width)
    "\uFF3D": ")", // ] (full width)
    "\uFF3E": "^", // ^ (full width)
    "\uFF3F": "_", // _ (full width)
    "\uFF40": "'", // ` (full width)
    "\uFF41": "a", // a (full width)
    "\uFF42": "b", // b (full width)
    "\uFF43": "c", // c (full width)
    "\uFF44": "d", // d (full width)
    "\uFF45": "e", // e (full width)
    "\uFF46": "f", // f (full width)
    "\uFF47": "g", // g (full width)
    "\uFF48": "h", // h (full width)
    "\uFF49": "i", // i (full width)
    "\uFF4A": "j", // j (full width)
    "\uFF4B": "k", // k (full width)
    "\uFF4C": "l", // l (full width)
    "\uFF4D": "m", // m (full width)
    "\uFF4E": "n", // n (full width)
    "\uFF4F": "o", // o (full width)
    "\uFF50": "p", // p (full width)
    "\uFF51": "q", // q (full width)
    "\uFF52": "r", // r (full width)
    "\uFF53": "s", // s (full width)
    "\uFF54": "t", // t (full width)
    "\uFF55": "u", // u (full width)
    "\uFF56": "v", // v (full width)
    "\uFF57": "w", // w (full width)
    "\uFF58": "x", // x (full width)
    "\uFF59": "y", // y (full width)
    "\uFF5A": "z", // z (full width)
    "\uFF5B": "(", // { (full width)
    "\uFF5C": "|", // | (full width)
    "\uFF5D": ")", // } (full width)
    "\uFF5E": "~", // ~ (full width)
    "\uFF5F": "(", // (( (full width)
    "\uFF60": ")", // )) (full width)
    // \FF61 through \FF64 are halfwidth CJK Punctuation
    // \FF65 through \FF9F are Halfwidth Katakana
    // \FFA0 through \FFDC are Halfwidth Jamo
    // \FFDE through \FFDF are unused
    // \FFE0 through \FFE7 are currency symbols
    // \FFE8 through \FFEF are various symbols
    // \FFF0 through \FFFF are Specials (see below for \FFFD)
    // \10000 through \1007F are Linear B Syllabary
    // \10080 through \100FF are Linear B Ideograms
    // \10100 through \1013F are Aegean Numbers
    // \10140 through \1018F are Ancient Greek Numbers
    // \10190 through \101CF are Roman Symbols (Ancient Symbols)
    // \101D0 through \101FF are Phaistos Disc
    // \10280 through \1029F are Lycian
    // \102A0 through \102DF are Carian
    // \10300 through \1032F are Old Italic
    // \10330 through \1034F are Gothic
    // \10380 through \1039F are Ugaritic
    // \103A0 through \103DF are Old Persian
    // \10400 through \1044F are Deseret
    // \10450 through \1047F are Shavian
    // \10480 through \104AF are Osmanya
    // \10800 through \1083F are Cypriot Syllabary
    // \10840 through \1085F are Aramaic, Imperial
    // \10900 through \1091F are Phonecian
    // \10920 through \1093F are Lydian
    // \10980 through \1099F are Meroitic Hieroglyphs
    // \109A0 through \109FF are Meroitic Cursive
    // \10A00 through \10A5F are Kharoshthi
    // \10A60 through \10A7F are Old South Arabian
    // \10B00 through \10B3F are Avestan
    // \10B40 through \10B5F are Parthian, Inscriptional
    // \10B60 through \10B7F are Pahlavi, Inscriptional
    // \10C00 through \10C4F are Old Turkic
    // \10E60 through \10E7F are Rumi Numeral Symbols
    // \11000 through \1107F are Brahmi
    // \11080 through \110CF are Kaithi
    // \110D0 through \110FF are Sora Sompeng
    // \11100 through \1114F are Chakma
    // \11180 through \111DF are Sharada
    // \11680 through \116CF are Takri
    // \12000 through \123FF are Cuneiform
    // \12400 through \1247F are Cuneiform Numbers and Punctuation
    // \13000 through \1342F are Egyptian Hieroglyphs
    // \16800 through \16A3F are Bamum Supplement
    // \16F00 through \16F9F are Miao
    // \1B000 through \1B0FF are Kana Supplement
    // \1D000 through \1D0FF are Byzantine Musical Symbols
    // \1D100 through \1D1FF are Musical Symbols
    // \1D200 through \1D24F are Ancient Greek Musical Notation
    // \1D300 through \1D35F are Tai Xuan Jing Symbols
    // \1D360 through \1D37F are Counting Rod Numerals
    // \1D400 through \1D7FF are Mathematical Alphanumeric Symbols
    // \1EE00 through \1EEFF are Arabic Mathematical Alphabetic Symbols
    // \1F000 through \1F02F are Mahjong Tiles
    // \1F030 through \1F09F are Domino Tiles
    // \1F0A0 through \1F0FF are Playing Cards
    // \1F100 through \1F1FF are Enclosed Alphanumeric Supplement
    // \1F200 through \1F2FF are Enclosed Ideographic Supplement
    // \1F300 through \1F5FF are Miscellaneous Symbols and Pictographs
    // \1F600 through \1F64F are Emoticons
    // \1F680 through \1F6FF are Transport and Map Symbols
    // \1F700 through \1F77F are Alchemical Symbols
    // \20000 through \2A6DF are CJK Extension B
    // \2A700 through \2B73F are CJK Extension C
    // \2B740 through \2B81F are CJK Extension D
    // \2F800 through \2FA1F are CJK Compatibility Ideographs Supplement
    // \E0000 through \E007F are Tags
    // \E0100 through \E01EF are Variation Selectors Supplement
    // \F0000 through \FFFFD are Supplementary Private Use Area-A
    // \100000 through \10FFFD are Supplementary Private Use Area-B
  };

  public static Standardize(value: string): string {
    if (value === null || value === undefined) {
      return value;
    }

    const translatedCharacters = [];
    let char: string;
    let newChar: string;

    const lowerValue = value.toLowerCase();
    for (let i = 0; i < lowerValue.length; i++) {
      char = lowerValue.charAt(i);
      if (char in this.translations) {
        newChar = this.translations[char];
      } else {
        newChar = char;
      }

      translatedCharacters.push(newChar);
    }

    const newValue = translatedCharacters.join("");
    return newValue;
  }
}
