Fossil

Check-in [e217b8b9]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:(cherry-pick): Update internal Unicode character tables, used in regular expression handling, from version 11.0 to 12.0. In "fossil regexp", "fossil grep" and the TH1 "regexp" command, the -nocase option now removes multiple diacritics from the same character (derived from SQLite's remove_diacritics=2)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | branch-2.8
Files: files | file ages | folders
SHA3-256: e217b8b988b77edbd7e9cb2aecadcfceb69025016775f279b91e81c3b52d31ef
User & Date: jan.nijtmans 2019-03-01 10:30:44
Context
2019-03-21
23:51
(cherry-pick): Add the "New Era" or "元号" placeholder to the (regexp) Unicode tables. So fossil is ready for the expected japanese May 1 event. See: http://blog.unicode.org/2018/09/new-japanese-era.html check-in: 31aefc3b user: jan.nijtmans tags: branch-2.8
2019-03-01
10:30
(cherry-pick): Update internal Unicode character tables, used in regular expression handling, from version 11.0 to 12.0. In "fossil regexp", "fossil grep" and the TH1 "regexp" command, the -nocase option now removes multiple diacritics from the same character (derived from SQLite's remove_diacritics=2) check-in: e217b8b9 user: jan.nijtmans tags: branch-2.8
09:38
Update internal Unicode character tables, used in regular expression handling, from version 11.0 to 12.0. In "fossil regexp", "fossil grep" and the TH1 "regexp" command, the -nocase option now removes multiple diacritics from the same character (derived from SQLite's remove_diacritics=2) check-in: b2c424ad user: jan.nijtmans tags: trunk
2019-02-27
19:12
(cherry-pick): Fix a mysterious bug in is_ticket() that was preventing me from updating the TCL repository. check-in: c460f943 user: jan.nijtmans tags: branch-2.8
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/regexp.c.

11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
..
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
...
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
...
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
...
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
...
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
...
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
...
660
661
662
663
664
665
666

667
668
669
670
671
672
673

674
675
676
677
678
679
680
681
682
683
684
685
686



687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
**
** Author contact information:
**   drh@hwaci.com
**   http://www.hwaci.com/drh/
**
*******************************************************************************
**
** This file was adapted from the test_regexp.c file in SQLite3.  That
** file is in the public domain.
**
** See ../www/grep.md for details of the algorithm and RE dialect.
*/
#include "config.h"
#include "regexp.h"

................................................................................
};
#endif

/* Add a state to the given state set if it is not already there */
static void re_add_state(ReStateSet *pSet, int newState){
  unsigned i;
  for(i=0; i<pSet->nState; i++) if( pSet->aState[i]==newState ) return;
  pSet->aState[pSet->nState++] = newState;
}

/* Extract the next unicode character from *pzIn and return it.  Advance
** *pzIn to the first byte past the end of the character returned.  To
** be clear:  this routine converts utf8 to unicode.  This routine is
** optimized for the common case where the next character is a single byte.
*/
................................................................................
      c = 0xfffd;
    }
  }
  return c;
}
static unsigned re_next_char_nocase(ReInput *p){
  unsigned c = re_next_char(p);
  return unicode_fold(c,1);
}

/* Return true if c is a perl "word" character:  [A-Za-z0-9_] */
static int re_word_char(int c){
  return unicode_isalnum(c) || c=='_';
}

................................................................................
  int c = RE_EOF+1;
  int cPrev = 0;
  int rc = 0;
  ReInput in;

  in.z = zIn;
  in.i = 0;
  in.mx = nIn>=0 ? nIn : strlen((const char*)zIn);

  /* Look for the initial prefix match, if there is one. */
  if( pRe->nInit ){
    unsigned char x = pRe->zInit[0];
    while( in.i+pRe->nInit<=in.mx
     && (zIn[in.i]!=x ||
         strncmp((const char*)zIn+in.i, (const char*)pRe->zInit, pRe->nInit)!=0)
    ){
      in.i++;
    }
    if( in.i+pRe->nInit>in.mx ) return 0;
  }

  if( pRe->nState<=count(aSpace)*2 ){
    pToFree = 0;
    aStateSet[0].aState = aSpace;
  }else{
    pToFree = fossil_malloc( sizeof(ReStateNumber)*2*pRe->nState );
    if( pToFree==0 ) return -1;
    aStateSet[0].aState = pToFree;
  }
................................................................................
  int i;
  if( p->nAlloc<=p->nState && re_resize(p, p->nAlloc*2) ) return 0;
  for(i=p->nState; i>iBefore; i--){
    p->aOp[i] = p->aOp[i-1];
    p->aArg[i] = p->aArg[i-1];
  }
  p->nState++;
  p->aOp[iBefore] = op;
  p->aArg[iBefore] = arg;
  return iBefore;
}

/* Append a new opcode and argument to the end of the RE under construction.
*/
static int re_append(ReCompiled *p, int op, int arg){
................................................................................
  if( zIn[0]=='^' ){
    zIn++;
  }else{
    re_append(pRe, RE_OP_ANYSTAR, 0);
  }
  pRe->sIn.z = (unsigned char*)zIn;
  pRe->sIn.i = 0;
  pRe->sIn.mx = strlen(zIn);
  zErr = re_subcompile_re(pRe);
  if( zErr ){
    re_free(pRe);
    return zErr;
  }
  if( rePeek(pRe)=='$' && pRe->sIn.i+1>=pRe->sIn.mx ){
    re_append(pRe, RE_OP_MATCH, RE_EOF);
................................................................................
  ** regex engine over the string.  Do not worry able trying to match
  ** unicode characters beyond plane 0 - those are very rare and this is
  ** just an optimization. */
  if( pRe->aOp[0]==RE_OP_ANYSTAR ){
    for(j=0, i=1; j<sizeof(pRe->zInit)-2 && pRe->aOp[i]==RE_OP_MATCH; i++){
      unsigned x = pRe->aArg[i];
      if( x<=127 ){
        pRe->zInit[j++] = x;
      }else if( x<=0xfff ){
        pRe->zInit[j++] = 0xc0 | (x>>6);
        pRe->zInit[j++] = 0x80 | (x&0x3f);
      }else if( x<=0xffff ){
        pRe->zInit[j++] = 0xd0 | (x>>12);
        pRe->zInit[j++] = 0x80 | ((x>>6)&0x3f);
        pRe->zInit[j++] = 0x80 | (x&0x3f);
      }else{
        break;
      }
    }
    if( j>0 && pRe->zInit[j-1]==0 ) j--;
................................................................................
  int argc,
  sqlite3_value **argv
){
  ReCompiled *pRe;          /* Compiled regular expression */
  const char *zPattern;     /* The regular expression */
  const unsigned char *zStr;/* String being searched */
  const char *zErr;         /* Compile error message */


  pRe = sqlite3_get_auxdata(context, 0);
  if( pRe==0 ){
    zPattern = (const char*)sqlite3_value_text(argv[0]);
    if( zPattern==0 ) return;
    zErr = re_compile(&pRe, zPattern, 0);
    if( zErr ){

      sqlite3_result_error(context, zErr, -1);
      return;
    }
    if( pRe==0 ){
      sqlite3_result_error_nomem(context);
      return;
    }
    sqlite3_set_auxdata(context, 0, pRe, (void(*)(void*))re_free);
  }
  zStr = (const unsigned char*)sqlite3_value_text(argv[1]);
  if( zStr!=0 ){
    sqlite3_result_int(context, re_match(pRe, zStr, -1));
  }



}

/*
** Invoke this routine in order to install the REGEXP function in an
** SQLite database connection.
**
** Use:
**
**      sqlite3_auto_extension(sqlite3_add_regexp_func);
**
** to cause this extension to be automatically loaded into each new
** database connection.
*/
int re_add_sql_func(sqlite3 *db){
  return sqlite3_create_function(db, "regexp", 2, SQLITE_UTF8, 0,
                                 re_sql_func, 0, 0);
}

/*







|







 







|







 







|







 







|













|







 







|







 







|







 







|

|


|







 







>







>







|





>
>
>



|

<
<
<
<
<
<
<







11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
..
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
...
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
...
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
...
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
...
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
...
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
...
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696







697
698
699
700
701
702
703
**
** Author contact information:
**   drh@hwaci.com
**   http://www.hwaci.com/drh/
**
*******************************************************************************
**
** This file was adapted from the ext/misc/regexp.c file in SQLite3.  That
** file is in the public domain.
**
** See ../www/grep.md for details of the algorithm and RE dialect.
*/
#include "config.h"
#include "regexp.h"

................................................................................
};
#endif

/* Add a state to the given state set if it is not already there */
static void re_add_state(ReStateSet *pSet, int newState){
  unsigned i;
  for(i=0; i<pSet->nState; i++) if( pSet->aState[i]==newState ) return;
  pSet->aState[pSet->nState++] = (ReStateNumber)newState;
}

/* Extract the next unicode character from *pzIn and return it.  Advance
** *pzIn to the first byte past the end of the character returned.  To
** be clear:  this routine converts utf8 to unicode.  This routine is
** optimized for the common case where the next character is a single byte.
*/
................................................................................
      c = 0xfffd;
    }
  }
  return c;
}
static unsigned re_next_char_nocase(ReInput *p){
  unsigned c = re_next_char(p);
  return unicode_fold(c,2);
}

/* Return true if c is a perl "word" character:  [A-Za-z0-9_] */
static int re_word_char(int c){
  return unicode_isalnum(c) || c=='_';
}

................................................................................
  int c = RE_EOF+1;
  int cPrev = 0;
  int rc = 0;
  ReInput in;

  in.z = zIn;
  in.i = 0;
  in.mx = nIn>=0 ? nIn : (int)strlen((char const*)zIn);

  /* Look for the initial prefix match, if there is one. */
  if( pRe->nInit ){
    unsigned char x = pRe->zInit[0];
    while( in.i+pRe->nInit<=in.mx
     && (zIn[in.i]!=x ||
         strncmp((const char*)zIn+in.i, (const char*)pRe->zInit, pRe->nInit)!=0)
    ){
      in.i++;
    }
    if( in.i+pRe->nInit>in.mx ) return 0;
  }

  if( pRe->nState<=(sizeof(aSpace)/(sizeof(aSpace[0])*2)) ){
    pToFree = 0;
    aStateSet[0].aState = aSpace;
  }else{
    pToFree = fossil_malloc( sizeof(ReStateNumber)*2*pRe->nState );
    if( pToFree==0 ) return -1;
    aStateSet[0].aState = pToFree;
  }
................................................................................
  int i;
  if( p->nAlloc<=p->nState && re_resize(p, p->nAlloc*2) ) return 0;
  for(i=p->nState; i>iBefore; i--){
    p->aOp[i] = p->aOp[i-1];
    p->aArg[i] = p->aArg[i-1];
  }
  p->nState++;
  p->aOp[iBefore] = (char)op;
  p->aArg[iBefore] = arg;
  return iBefore;
}

/* Append a new opcode and argument to the end of the RE under construction.
*/
static int re_append(ReCompiled *p, int op, int arg){
................................................................................
  if( zIn[0]=='^' ){
    zIn++;
  }else{
    re_append(pRe, RE_OP_ANYSTAR, 0);
  }
  pRe->sIn.z = (unsigned char*)zIn;
  pRe->sIn.i = 0;
  pRe->sIn.mx = (int)strlen(zIn);
  zErr = re_subcompile_re(pRe);
  if( zErr ){
    re_free(pRe);
    return zErr;
  }
  if( rePeek(pRe)=='$' && pRe->sIn.i+1>=pRe->sIn.mx ){
    re_append(pRe, RE_OP_MATCH, RE_EOF);
................................................................................
  ** regex engine over the string.  Do not worry able trying to match
  ** unicode characters beyond plane 0 - those are very rare and this is
  ** just an optimization. */
  if( pRe->aOp[0]==RE_OP_ANYSTAR ){
    for(j=0, i=1; j<sizeof(pRe->zInit)-2 && pRe->aOp[i]==RE_OP_MATCH; i++){
      unsigned x = pRe->aArg[i];
      if( x<=127 ){
        pRe->zInit[j++] = (unsigned char)x;
      }else if( x<=0xfff ){
        pRe->zInit[j++] = (unsigned char)(0xc0 | (x>>6));
        pRe->zInit[j++] = 0x80 | (x&0x3f);
      }else if( x<=0xffff ){
        pRe->zInit[j++] = (unsigned char)(0xd0 | (x>>12));
        pRe->zInit[j++] = 0x80 | ((x>>6)&0x3f);
        pRe->zInit[j++] = 0x80 | (x&0x3f);
      }else{
        break;
      }
    }
    if( j>0 && pRe->zInit[j-1]==0 ) j--;
................................................................................
  int argc,
  sqlite3_value **argv
){
  ReCompiled *pRe;          /* Compiled regular expression */
  const char *zPattern;     /* The regular expression */
  const unsigned char *zStr;/* String being searched */
  const char *zErr;         /* Compile error message */
  int setAux = 0;           /* True to invoke sqlite3_set_auxdata() */

  pRe = sqlite3_get_auxdata(context, 0);
  if( pRe==0 ){
    zPattern = (const char*)sqlite3_value_text(argv[0]);
    if( zPattern==0 ) return;
    zErr = re_compile(&pRe, zPattern, 0);
    if( zErr ){
      re_free(pRe);
      sqlite3_result_error(context, zErr, -1);
      return;
    }
    if( pRe==0 ){
      sqlite3_result_error_nomem(context);
      return;
    }
    setAux = 1;
  }
  zStr = (const unsigned char*)sqlite3_value_text(argv[1]);
  if( zStr!=0 ){
    sqlite3_result_int(context, re_match(pRe, zStr, -1));
  }
  if( setAux ){
    sqlite3_set_auxdata(context, 0, pRe, (void(*)(void*))re_free);
  }
}

/*
** Invoke this routine to register the regexp() function with the
** SQLite database connection.







*/
int re_add_sql_func(sqlite3 *db){
  return sqlite3_create_function(db, "regexp", 2, SQLITE_UTF8, 0,
                                 re_sql_func, 0, 0);
}

/*

Changes to src/unicode.c.

57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124

125
126
127
128
129

130

131
132
133
134

135
136
137
138
139
140
141
142
143



144
145
146
147
148
149
150
...
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189

190
191
192
193


194
195


196

197
198
199
200
201
202
203
204
205
206
















207
208
209
210
211
212
213
...
216
217
218
219
220
221
222

223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
...
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333

334
335
336
337
338
339
340
341
342
343
344
345
346
347
348

349
350
351
352
353
354
355
...
375
376
377
378
379
380
381

382

383
384
385
386
387
388
389
    0x0027E802, 0x0027F402, 0x00280403, 0x0028F001, 0x0028F805,
    0x00291C02, 0x00292C03, 0x00294401, 0x0029C002, 0x0029D402,
    0x002A0403, 0x002AF001, 0x002AF808, 0x002B1C03, 0x002B2C03,
    0x002B8802, 0x002BC002, 0x002BE806, 0x002C0403, 0x002CF001,
    0x002CF807, 0x002D1C02, 0x002D2C03, 0x002D5802, 0x002D8802,
    0x002DC001, 0x002E0801, 0x002EF805, 0x002F1803, 0x002F2804,
    0x002F5C01, 0x002FCC08, 0x00300005, 0x0030F807, 0x00311803,
    0x00312804, 0x00315402, 0x00318802, 0x0031FC01, 0x00320404,
    0x0032F001, 0x0032F807, 0x00331803, 0x00332804, 0x00335402,
    0x00338802, 0x00340004, 0x0034EC02, 0x0034F807, 0x00351803,
    0x00352804, 0x00353C01, 0x00355C01, 0x00358802, 0x0035E401,
    0x00360802, 0x00372801, 0x00373C06, 0x00375801, 0x00376008,
    0x0037C803, 0x0038C401, 0x0038D007, 0x0038FC01, 0x00391C09,
    0x00396802, 0x003AC401, 0x003AD006, 0x003AEC02, 0x003B2006,
    0x003C041F, 0x003CD00C, 0x003DC417, 0x003E340B, 0x003E6424,
    0x003EF80F, 0x003F380D, 0x0040AC14, 0x00412806, 0x00415804,
    0x00417803, 0x00418803, 0x00419C07, 0x0041C404, 0x0042080C,
    0x00423C01, 0x00426806, 0x0043EC01, 0x004D740C, 0x004E400A,
    0x00500001, 0x0059B402, 0x005A0001, 0x005A6C02, 0x005BAC03,
    0x005C4803, 0x005CC805, 0x005D4802, 0x005DC802, 0x005ED023,
    0x005F6004, 0x005F7401, 0x0060000F, 0x00621402, 0x0062A401,
    0x0064800C, 0x0064C00C, 0x00650001, 0x00651002, 0x00677822,
    0x00685C05, 0x00687802, 0x0069540A, 0x0069801D, 0x0069FC01,
    0x006A8007, 0x006AA006, 0x006AC00F, 0x006C0005, 0x006CD011,
    0x006D6823, 0x006E0003, 0x006E840D, 0x006F980E, 0x006FF004,
    0x00709014, 0x0070EC05, 0x0071F802, 0x00730008, 0x00734019,
    0x0073B401, 0x0073C803, 0x0073DC03, 0x0077003A, 0x0077EC05,
    0x007EF401, 0x007EFC03, 0x007F3403, 0x007F7403, 0x007FB403,
    0x007FF402, 0x00800065, 0x0081980A, 0x0081E805, 0x00822805,
    0x00828020, 0x00834021, 0x00840002, 0x00840C04, 0x00842002,
    0x00845001, 0x00845803, 0x00847806, 0x00849401, 0x00849C01,
    0x0084A401, 0x0084B801, 0x0084E802, 0x00850005, 0x00852804,
    0x00853C01, 0x00862802, 0x00864297, 0x0091000B, 0x0092704E,
    0x00940276, 0x009E53E0, 0x00ADD820, 0x00AE6031, 0x00AF2835,
    0x00B39406, 0x00B3BC03, 0x00B3E404, 0x00B3F802, 0x00B5C001,
    0x00B5FC01, 0x00B7804F, 0x00B8C01F, 0x00BA001A, 0x00BA6C59,
    0x00BC00D6, 0x00BFC00C, 0x00C00005, 0x00C02019, 0x00C0A807,
    0x00C0D802, 0x00C0F403, 0x00C26404, 0x00C28001, 0x00C3EC01,
    0x00C64002, 0x00C6580A, 0x00C70024, 0x00C8001F, 0x00C8A81E,
    0x00C94001, 0x00C98020, 0x00CA2827, 0x00CB003F, 0x00CC0100,
    0x01370040, 0x02924037, 0x0293F802, 0x02983403, 0x0299BC10,
    0x029A7802, 0x029BC008, 0x029C0017, 0x029C8002, 0x029E2402,
    0x02A00801, 0x02A01801, 0x02A02C01, 0x02A08C09, 0x02A0D804,
    0x02A1D004, 0x02A20002, 0x02A2D012, 0x02A33802, 0x02A38012,
    0x02A3E003, 0x02A3F001, 0x02A3FC01, 0x02A4980A, 0x02A51C0D,
    0x02A57C01, 0x02A60004, 0x02A6CC1B, 0x02A77802, 0x02A79401,
    0x02A8A40E, 0x02A90C01, 0x02A93002, 0x02A97004, 0x02A9DC03,
    0x02A9EC03, 0x02AAC001, 0x02AAC803, 0x02AADC02, 0x02AAF802,
    0x02AB0401, 0x02AB7802, 0x02ABAC07, 0x02ABD402, 0x02AD6C01,
    0x02AF8C0B, 0x03600001, 0x036DFC02, 0x036FFC02, 0x037FFC01,
    0x03EC7801, 0x03ECA401, 0x03EEC810, 0x03F4F802, 0x03F7F002,
    0x03F8001A, 0x03F88033, 0x03F95013, 0x03F9A004, 0x03FBFC01,
    0x03FC040F, 0x03FC6807, 0x03FCEC06, 0x03FD6C0B, 0x03FF8007,
    0x03FFA007, 0x03FFE405, 0x04040003, 0x0404DC09, 0x0405E411,
    0x04063003, 0x0406400C, 0x04068001, 0x0407402E, 0x040B8001,
    0x040DD805, 0x040E7C01, 0x040F4001, 0x0415BC01, 0x04215C01,
    0x0421DC02, 0x04247C01, 0x0424FC01, 0x04280403, 0x04281402,
    0x04283004, 0x0428E003, 0x0428FC01, 0x04294009, 0x0429FC01,
    0x042B2001, 0x042B9402, 0x042BC007, 0x042CE407, 0x042E6404,
    0x04349004, 0x043D180B, 0x043D5405, 0x04400003, 0x0440E016,
    0x0441FC04, 0x0442C012, 0x04433401, 0x04440003, 0x04449C0E,
    0x04450004, 0x04451402, 0x0445CC03, 0x04460003, 0x0446CC0E,
    0x04471409, 0x04476C01, 0x04477403, 0x0448B013, 0x044AA401,
    0x044B7C0C, 0x044C0004, 0x044CEC02, 0x044CF807, 0x044D1C02,
    0x044D2C03, 0x044D5C01, 0x044D8802, 0x044D9807, 0x044DC005,
    0x0450D412, 0x04512C05, 0x04516C01, 0x04517402, 0x0452C014,
    0x04531801, 0x0456BC07, 0x0456E020, 0x04577002, 0x0458C014,
    0x0459800D, 0x045AAC0D, 0x045C740F, 0x045CF004, 0x0460B010,

    0x0468040A, 0x0468CC07, 0x0468EC0D, 0x0469440B, 0x046A2813,
    0x046A7805, 0x0470BC08, 0x0470E008, 0x04710405, 0x0471C002,
    0x04724816, 0x0472A40E, 0x0474C406, 0x0474E801, 0x0474F002,
    0x0474FC07, 0x04751C01, 0x04762805, 0x04764002, 0x04764C05,
    0x047BCC06, 0x0491C005, 0x05A9B802, 0x05ABC006, 0x05ACC010,

    0x05AD1002, 0x05BA5C04, 0x05BD442E, 0x05BE3C04, 0x06F27008,

    0x074000F6, 0x07440027, 0x0744A4C0, 0x07480046, 0x074C0057,
    0x075B0401, 0x075B6C01, 0x075BEC01, 0x075C5401, 0x075CD401,
    0x075D3C01, 0x075DBC01, 0x075E2401, 0x075EA401, 0x075F0C01,
    0x0760028C, 0x076A6C05, 0x076A840F, 0x07800007, 0x07802011,

    0x07806C07, 0x07808C02, 0x07809805, 0x07A34007, 0x07A51007,
    0x07A57802, 0x07B2B001, 0x07B2C001, 0x07BBC002, 0x07C0002C,
    0x07C0C064, 0x07C2800F, 0x07C2C40F, 0x07C3040F, 0x07C34425,
    0x07C4405C, 0x07C5C03D, 0x07C7981D, 0x07C8402C, 0x07C90009,
    0x07C94002, 0x07C98006, 0x07CC03D5, 0x07DB800D, 0x07DBC00A,
    0x07DC0074, 0x07DE0059, 0x07E0000C, 0x07E04038, 0x07E1400A,
    0x07E18028, 0x07E2401E, 0x07E4000C, 0x07E4402F, 0x07E50031,
    0x07E5CC04, 0x07E5E801, 0x07E5F027, 0x07E6C00A, 0x07E70003,
    0x07E74030, 0x07E9800E, 0x38000401, 0x38008060, 0x380400F0,



  };
  static const unsigned int aAscii[4] = {
    0xFFFFFFFF, 0xFC00FFFF, 0xF8000001, 0xF8000001,
  };

  if( (unsigned int)c<128 ){
    return ( (aAscii[c >> 5] & ((unsigned int)1 << (c & 0x001F)))==0 );
................................................................................
** If the argument is a codepoint corresponding to a lowercase letter
** in the ASCII range with a diacritic added, return the codepoint
** of the ASCII letter only. For example, if passed 235 - "LATIN
** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER
** E"). The resuls of passing a codepoint that corresponds to an
** uppercase letter are undefined.
*/
static int unicode_remove_diacritic(int c){
  static const unsigned short aDia[] = {
        0,  1797,  1848,  1859,  1891,  1928,  1940,  1995,
     2024,  2040,  2060,  2110,  2168,  2206,  2264,  2286,
     2344,  2383,  2472,  2488,  2516,  2596,  2668,  2732,
     2782,  2842,  2894,  2954,  2984,  3000,  3028,  3336,
     3456,  3696,  3712,  3728,  3744,  3896,  3912,  3928,
     3968,  4008,  4040,  4106,  4138,  4170,  4202,  4234,
     4266,  4296,  4312,  4344,  4408,  4424,  4472,  4504,

     6148,  6198,  6264,  6280,  6360,  6429,  6505,  6529,
    61448, 61468, 61534, 61592, 61642, 61688, 61704, 61726,
    61784, 61800, 61836, 61880, 61914, 61948, 61998, 62122,
    62154, 62200, 62218, 62302, 62364, 62442, 62478, 62536,


    62554, 62584, 62604, 62640, 62648, 62656, 62664, 62730,
    62924, 63050, 63082, 63274, 63390,


  };

  static const char aChar[] = {
    '\0', 'a',  'c',  'e',  'i',  'n',  'o',  'u',  'y',  'y',  'a',  'c',
    'd',  'e',  'e',  'g',  'h',  'i',  'j',  'k',  'l',  'n',  'o',  'r',
    's',  't',  'u',  'u',  'w',  'y',  'z',  'o',  'u',  'a',  'i',  'o',
    'u',  'g',  'k',  'o',  'j',  'g',  'n',  'a',  'e',  'i',  'o',  'r',
    'u',  's',  't',  'h',  'a',  'e',  'o',  'y',  '\0', '\0', '\0', '\0',
    '\0', '\0', '\0', '\0', 'a',  'b',  'd',  'd',  'e',  'f',  'g',  'h',
    'h',  'i',  'k',  'l',  'l',  'm',  'n',  'p',  'r',  'r',  's',  't',
    'u',  'v',  'w',  'w',  'x',  'y',  'z',  'h',  't',  'w',  'y',  'a',
    'e',  'i',  'o',  'u',  'y',
















  };

  unsigned int key = (((unsigned int)c)<<3) | 0x00000007;
  int iRes = 0;
  int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1;
  int iLo = 0;
  while( iHi>=iLo ){
................................................................................
      iRes = iTest;
      iLo = iTest+1;
    }else{
      iHi = iTest-1;
    }
  }
  assert( key>=aDia[iRes] );

  return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]);
}


/*
** Return true if the argument interpreted as a unicode codepoint
** is a diacritical modifier character.
*/
int unicode_is_diacritic(int c){
  unsigned int mask0 = 0x08029FDF;
  unsigned int mask1 = 0x000361F8;
  if( c<768 || c>817 ) return 0;
  return (c < 768+32) ?
      (mask0 & (1 << (c-768))) :
      (mask1 & (1 << (c-768-32)));
}


/*
** Interpret the argument as a unicode codepoint. If the codepoint
** is an upper case character that has a lower case equivalent,
** return the codepoint corresponding to the lower case version.
** Otherwise, return a copy of the argument.
**
** The results are undefined if the value passed to this function
** is less than zero.
*/
int unicode_fold(int c, int bRemoveDiacritic){
  /* Each entry in the following array defines a rule for folding a range
  ** of codepoints to lower case. The rule applies to a range of nRange
  ** codepoints starting at codepoint iCode.
  **
  ** If the least significant bit in flags is clear, then the rule applies
  ** to all nRange codepoints (i.e. all nRange codepoints are upper case and
  ** need to be folded). Or, if it is set, then the rule only applies to
................................................................................
  static const struct TableEntry {
    unsigned short iCode;
    unsigned char flags;
    unsigned char nRange;
  } aEntry[] = {
    {65, 14, 26},          {181, 66, 1},          {192, 14, 23},
    {216, 14, 7},          {256, 1, 48},          {306, 1, 6},
    {313, 1, 16},          {330, 1, 46},          {376, 152, 1},
    {377, 1, 6},           {383, 140, 1},         {385, 52, 1},
    {386, 1, 4},           {390, 46, 1},          {391, 0, 1},
    {393, 44, 2},          {395, 0, 1},           {398, 34, 1},
    {399, 40, 1},          {400, 42, 1},          {401, 0, 1},
    {403, 44, 1},          {404, 48, 1},          {406, 54, 1},
    {407, 50, 1},          {408, 0, 1},           {412, 54, 1},
    {413, 56, 1},          {415, 58, 1},          {416, 1, 6},
    {422, 62, 1},          {423, 0, 1},           {425, 62, 1},
    {428, 0, 1},           {430, 62, 1},          {431, 0, 1},
    {433, 60, 2},          {435, 1, 4},           {439, 64, 1},
    {440, 0, 1},           {444, 0, 1},           {452, 2, 1},
    {453, 0, 1},           {455, 2, 1},           {456, 0, 1},
    {458, 2, 1},           {459, 1, 18},          {478, 1, 18},
    {497, 2, 1},           {498, 1, 4},           {502, 158, 1},
    {503, 170, 1},         {504, 1, 40},          {544, 146, 1},
    {546, 1, 18},          {570, 74, 1},          {571, 0, 1},
    {573, 144, 1},         {574, 72, 1},          {577, 0, 1},
    {579, 142, 1},         {580, 30, 1},          {581, 32, 1},
    {582, 1, 10},          {837, 38, 1},          {880, 1, 4},
    {886, 0, 1},           {895, 38, 1},          {902, 20, 1},
    {904, 18, 3},          {908, 28, 1},          {910, 26, 2},
    {913, 14, 17},         {931, 14, 9},          {962, 0, 1},
    {975, 4, 1},           {976, 176, 1},         {977, 178, 1},
    {981, 182, 1},         {982, 180, 1},         {984, 1, 24},
    {1008, 172, 1},        {1009, 174, 1},        {1012, 166, 1},
    {1013, 164, 1},        {1015, 0, 1},          {1017, 188, 1},
    {1018, 0, 1},          {1021, 146, 3},        {1024, 36, 16},
    {1040, 14, 32},        {1120, 1, 34},         {1162, 1, 54},
    {1216, 6, 1},          {1217, 1, 14},         {1232, 1, 96},
    {1329, 24, 38},        {4256, 70, 38},        {4295, 70, 1},
    {4301, 70, 1},         {5112, 186, 6},        {7296, 122, 1},
    {7297, 124, 1},        {7298, 126, 1},        {7299, 130, 2},
    {7301, 128, 1},        {7302, 132, 1},        {7303, 134, 1},
    {7304, 96, 1},         {7312, 138, 43},       {7357, 138, 3},
    {7680, 1, 150},        {7835, 168, 1},        {7838, 116, 1},
    {7840, 1, 96},         {7944, 186, 8},        {7960, 186, 6},
    {7976, 186, 8},        {7992, 186, 8},        {8008, 186, 6},
    {8025, 187, 8},        {8040, 186, 8},        {8072, 186, 8},
    {8088, 186, 8},        {8104, 186, 8},        {8120, 186, 2},
    {8122, 162, 2},        {8124, 184, 1},        {8126, 120, 1},
    {8136, 160, 4},        {8140, 184, 1},        {8152, 186, 2},
    {8154, 156, 2},        {8168, 186, 2},        {8170, 154, 2},
    {8172, 188, 1},        {8184, 148, 2},        {8186, 150, 2},
    {8188, 184, 1},        {8486, 118, 1},        {8490, 112, 1},
    {8491, 114, 1},        {8498, 12, 1},         {8544, 8, 16},
    {8579, 0, 1},          {9398, 10, 26},        {11264, 24, 47},
    {11360, 0, 1},         {11362, 108, 1},       {11363, 136, 1},
    {11364, 110, 1},       {11367, 1, 6},         {11373, 104, 1},
    {11374, 106, 1},       {11375, 100, 1},       {11376, 102, 1},
    {11378, 0, 1},         {11381, 0, 1},         {11390, 98, 2},
    {11392, 1, 100},       {11499, 1, 4},         {11506, 0, 1},
    {42560, 1, 46},        {42624, 1, 28},        {42786, 1, 14},
    {42802, 1, 62},        {42873, 1, 4},         {42877, 94, 1},
    {42878, 1, 10},        {42891, 0, 1},         {42893, 86, 1},
    {42896, 1, 4},         {42902, 1, 20},        {42922, 80, 1},
    {42923, 76, 1},        {42924, 78, 1},        {42925, 82, 1},
    {42926, 80, 1},        {42928, 90, 1},        {42929, 84, 1},
    {42930, 88, 1},        {42931, 68, 1},        {42932, 1, 6},

    {43888, 92, 80},       {65313, 14, 26},
  };
  static const unsigned short aiOff[] = {
   1,     2,     8,     15,    16,    26,    28,    32,
   34,    37,    38,    40,    48,    63,    64,    69,
   71,    79,    80,    116,   202,   203,   205,   206,
   207,   209,   210,   211,   213,   214,   217,   218,
   219,   775,   928,   7264,  10792, 10795, 23217, 23221,
   23228, 23231, 23254, 23256, 23275, 23278, 26672, 30204,
   35267, 54721, 54753, 54754, 54756, 54787, 54793, 54809,
   57153, 57274, 57921, 58019, 58363, 59314, 59315, 59324,
   59325, 59326, 59332, 59356, 61722, 62528, 65268, 65341,
   65373, 65406, 65408, 65410, 65415, 65424, 65436, 65439,
   65450, 65462, 65472, 65476, 65478, 65480, 65482, 65488,
   65506, 65511, 65514, 65521, 65527, 65528, 65529,

  };

  int ret = c;

  assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 );

  if( c<128 ){
................................................................................
    assert( iRes>=0 && c>=aEntry[iRes].iCode );
    p = &aEntry[iRes];
    if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){
      ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;
      assert( ret>0 );
    }


    if( bRemoveDiacritic ) ret = unicode_remove_diacritic(ret);

  }

  else if( c>=66560 && c<66600 ){
    ret = c + 40;
  }
  else if( c>=66736 && c<66772 ){
    ret = c + 40;







|
|
|
|
|
|
|












|






|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
>
|
|
|
|
<
>
|
>
|
|
|
|
>
|
|
|
|
|
|
|
<
<
>
>
>







 







|





|
|
|
>
|
<
|
|
>
>
|
<
>
>

>
|
<
<
<
<
|
|
|
|
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







>
|












|
|












|







 







|
|












|
|

|
|




|
|
|
|
|



|
|
|
|
|
|
|
|
|
|
|
|
|
|
|

|
|
|
|


|
|

|
|
|
>
|







|
|
|
|
|
|
|
>







 







>
|
>







57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129

130
131
132
133
134
135
136
137
138
139
140
141
142
143
144


145
146
147
148
149
150
151
152
153
154
...
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195

196
197
198
199
200

201
202
203
204
205




206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
...
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
...
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
...
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
    0x0027E802, 0x0027F402, 0x00280403, 0x0028F001, 0x0028F805,
    0x00291C02, 0x00292C03, 0x00294401, 0x0029C002, 0x0029D402,
    0x002A0403, 0x002AF001, 0x002AF808, 0x002B1C03, 0x002B2C03,
    0x002B8802, 0x002BC002, 0x002BE806, 0x002C0403, 0x002CF001,
    0x002CF807, 0x002D1C02, 0x002D2C03, 0x002D5802, 0x002D8802,
    0x002DC001, 0x002E0801, 0x002EF805, 0x002F1803, 0x002F2804,
    0x002F5C01, 0x002FCC08, 0x00300005, 0x0030F807, 0x00311803,
    0x00312804, 0x00315402, 0x00318802, 0x0031DC01, 0x0031FC01,
    0x00320404, 0x0032F001, 0x0032F807, 0x00331803, 0x00332804,
    0x00335402, 0x00338802, 0x00340004, 0x0034EC02, 0x0034F807,
    0x00351803, 0x00352804, 0x00353C01, 0x00355C01, 0x00358802,
    0x0035E401, 0x00360802, 0x00372801, 0x00373C06, 0x00375801,
    0x00376008, 0x0037C803, 0x0038C401, 0x0038D007, 0x0038FC01,
    0x00391C09, 0x00396802, 0x003AC401, 0x003AD009, 0x003B2006,
    0x003C041F, 0x003CD00C, 0x003DC417, 0x003E340B, 0x003E6424,
    0x003EF80F, 0x003F380D, 0x0040AC14, 0x00412806, 0x00415804,
    0x00417803, 0x00418803, 0x00419C07, 0x0041C404, 0x0042080C,
    0x00423C01, 0x00426806, 0x0043EC01, 0x004D740C, 0x004E400A,
    0x00500001, 0x0059B402, 0x005A0001, 0x005A6C02, 0x005BAC03,
    0x005C4803, 0x005CC805, 0x005D4802, 0x005DC802, 0x005ED023,
    0x005F6004, 0x005F7401, 0x0060000F, 0x00621402, 0x0062A401,
    0x0064800C, 0x0064C00C, 0x00650001, 0x00651002, 0x00677822,
    0x00685C05, 0x00687802, 0x0069540A, 0x0069801D, 0x0069FC01,
    0x006A8007, 0x006AA006, 0x006AC00F, 0x006C0005, 0x006CD011,
    0x006D6823, 0x006E0003, 0x006E840D, 0x006F980E, 0x006FF004,
    0x00709014, 0x0070EC05, 0x0071F802, 0x00730008, 0x00734019,
    0x0073B401, 0x0073D001, 0x0073DC03, 0x0077003A, 0x0077EC05,
    0x007EF401, 0x007EFC03, 0x007F3403, 0x007F7403, 0x007FB403,
    0x007FF402, 0x00800065, 0x0081980A, 0x0081E805, 0x00822805,
    0x00828020, 0x00834021, 0x00840002, 0x00840C04, 0x00842002,
    0x00845001, 0x00845803, 0x00847806, 0x00849401, 0x00849C01,
    0x0084A401, 0x0084B801, 0x0084E802, 0x00850005, 0x00852804,
    0x00853C01, 0x00862802, 0x00864297, 0x0091000B, 0x0092704E,
    0x00940276, 0x009E53E0, 0x00ADD820, 0x00AE6068, 0x00B39406,
    0x00B3BC03, 0x00B3E404, 0x00B3F802, 0x00B5C001, 0x00B5FC01,
    0x00B7804F, 0x00B8C020, 0x00BA001A, 0x00BA6C59, 0x00BC00D6,
    0x00BFC00C, 0x00C00005, 0x00C02019, 0x00C0A807, 0x00C0D802,
    0x00C0F403, 0x00C26404, 0x00C28001, 0x00C3EC01, 0x00C64002,
    0x00C6580A, 0x00C70024, 0x00C8001F, 0x00C8A81E, 0x00C94001,
    0x00C98020, 0x00CA2827, 0x00CB003F, 0x00CC0100, 0x01370040,
    0x02924037, 0x0293F802, 0x02983403, 0x0299BC10, 0x029A7802,
    0x029BC008, 0x029C0017, 0x029C8002, 0x029E2402, 0x02A00801,
    0x02A01801, 0x02A02C01, 0x02A08C09, 0x02A0D804, 0x02A1D004,
    0x02A20002, 0x02A2D012, 0x02A33802, 0x02A38012, 0x02A3E003,
    0x02A3F001, 0x02A3FC01, 0x02A4980A, 0x02A51C0D, 0x02A57C01,
    0x02A60004, 0x02A6CC1B, 0x02A77802, 0x02A79401, 0x02A8A40E,
    0x02A90C01, 0x02A93002, 0x02A97004, 0x02A9DC03, 0x02A9EC03,
    0x02AAC001, 0x02AAC803, 0x02AADC02, 0x02AAF802, 0x02AB0401,
    0x02AB7802, 0x02ABAC07, 0x02ABD402, 0x02AD6C01, 0x02AF8C0B,
    0x03600001, 0x036DFC02, 0x036FFC02, 0x037FFC01, 0x03EC7801,
    0x03ECA401, 0x03EEC810, 0x03F4F802, 0x03F7F002, 0x03F8001A,
    0x03F88033, 0x03F95013, 0x03F9A004, 0x03FBFC01, 0x03FC040F,
    0x03FC6807, 0x03FCEC06, 0x03FD6C0B, 0x03FF8007, 0x03FFA007,
    0x03FFE405, 0x04040003, 0x0404DC09, 0x0405E411, 0x04063003,
    0x0406400C, 0x04068001, 0x0407402E, 0x040B8001, 0x040DD805,
    0x040E7C01, 0x040F4001, 0x0415BC01, 0x04215C01, 0x0421DC02,
    0x04247C01, 0x0424FC01, 0x04280403, 0x04281402, 0x04283004,
    0x0428E003, 0x0428FC01, 0x04294009, 0x0429FC01, 0x042B2001,
    0x042B9402, 0x042BC007, 0x042CE407, 0x042E6404, 0x04349004,
    0x043D180B, 0x043D5405, 0x04400003, 0x0440E016, 0x0441FC04,
    0x0442C012, 0x04433401, 0x04440003, 0x04449C0E, 0x04450004,
    0x04451402, 0x0445CC03, 0x04460003, 0x0446CC0E, 0x04471409,
    0x04476C01, 0x04477403, 0x0448B013, 0x044AA401, 0x044B7C0C,
    0x044C0004, 0x044CEC02, 0x044CF807, 0x044D1C02, 0x044D2C03,
    0x044D5C01, 0x044D8802, 0x044D9807, 0x044DC005, 0x0450D412,
    0x04512C05, 0x04516C01, 0x04517402, 0x0452C014, 0x04531801,
    0x0456BC07, 0x0456E020, 0x04577002, 0x0458C014, 0x0459800D,
    0x045AAC0D, 0x045C740F, 0x045CF004, 0x0460B010, 0x04674407,
    0x04676807, 0x04678801, 0x04679001, 0x0468040A, 0x0468CC07,
    0x0468EC0D, 0x0469440B, 0x046A2813, 0x046A7805, 0x0470BC08,
    0x0470E008, 0x04710405, 0x0471C002, 0x04724816, 0x0472A40E,
    0x0474C406, 0x0474E801, 0x0474F002, 0x0474FC07, 0x04751C01,
    0x04762805, 0x04764002, 0x04764C05, 0x047BCC06, 0x047F541D,

    0x047FFC01, 0x0491C005, 0x04D0C009, 0x05A9B802, 0x05ABC006,
    0x05ACC010, 0x05AD1002, 0x05BA5C04, 0x05BD3C01, 0x05BD4437,
    0x05BE3C04, 0x05BF8801, 0x06F27008, 0x074000F6, 0x07440027,
    0x0744A4C0, 0x07480046, 0x074C0057, 0x075B0401, 0x075B6C01,
    0x075BEC01, 0x075C5401, 0x075CD401, 0x075D3C01, 0x075DBC01,
    0x075E2401, 0x075EA401, 0x075F0C01, 0x0760028C, 0x076A6C05,
    0x076A840F, 0x07800007, 0x07802011, 0x07806C07, 0x07808C02,
    0x07809805, 0x0784C007, 0x07853C01, 0x078BB004, 0x078BFC01,
    0x07A34007, 0x07A51007, 0x07A57802, 0x07B2B001, 0x07B2C001,
    0x07B4B801, 0x07BBC002, 0x07C0002C, 0x07C0C064, 0x07C2800F,
    0x07C2C40F, 0x07C3040F, 0x07C34425, 0x07C4405D, 0x07C5C03D,
    0x07C7981D, 0x07C8402C, 0x07C90009, 0x07C94002, 0x07C98006,
    0x07CC03D6, 0x07DB800D, 0x07DBC00B, 0x07DC0074, 0x07DE0059,
    0x07DF800C, 0x07E0000C, 0x07E04038, 0x07E1400A, 0x07E18028,
    0x07E2401E, 0x07E4000C, 0x07E43465, 0x07E5CC04, 0x07E5E829,


    0x07E69406, 0x07E6B81D, 0x07E73487, 0x07E9800E, 0x07E9C004,
    0x07E9E003, 0x07EA0003, 0x07EA4006, 0x38000401, 0x38008060,
    0x380400F0,
  };
  static const unsigned int aAscii[4] = {
    0xFFFFFFFF, 0xFC00FFFF, 0xF8000001, 0xF8000001,
  };

  if( (unsigned int)c<128 ){
    return ( (aAscii[c >> 5] & ((unsigned int)1 << (c & 0x001F)))==0 );
................................................................................
** If the argument is a codepoint corresponding to a lowercase letter
** in the ASCII range with a diacritic added, return the codepoint
** of the ASCII letter only. For example, if passed 235 - "LATIN
** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER
** E"). The resuls of passing a codepoint that corresponds to an
** uppercase letter are undefined.
*/
static int unicode_remove_diacritic(int c, int bComplex){
  static const unsigned short aDia[] = {
        0,  1797,  1848,  1859,  1891,  1928,  1940,  1995,
     2024,  2040,  2060,  2110,  2168,  2206,  2264,  2286,
     2344,  2383,  2472,  2488,  2516,  2596,  2668,  2732,
     2782,  2842,  2894,  2954,  2984,  3000,  3028,  3336,
     3456,  3696,  3712,  3728,  3744,  3766,  3832,  3896,
     3912,  3928,  3944,  3968,  4008,  4040,  4056,  4106,
     4138,  4170,  4202,  4234,  4266,  4296,  4312,  4344,
     4408,  4424,  4442,  4472,  4488,  4504,  6148,  6198,
     6264,  6280,  6360,  6429,  6505,  6529, 61448, 61468,

    61512, 61534, 61592, 61610, 61642, 61672, 61688, 61704,
    61726, 61784, 61800, 61816, 61836, 61880, 61896, 61914,
    61948, 61998, 62062, 62122, 62154, 62184, 62200, 62218,
    62252, 62302, 62364, 62410, 62442, 62478, 62536, 62554,
    62584, 62604, 62640, 62648, 62656, 62664, 62730, 62766,

    62830, 62890, 62924, 62974, 63032, 63050, 63082, 63118,
    63182, 63242, 63274, 63310, 63368, 63390,
  };
#define HIBIT ((unsigned char)0x80)
  static const unsigned char aChar[] = {




    '\0',      'a',       'c',       'e',       'i',       'n',
    'o',       'u',       'y',       'y',       'a',       'c',
    'd',       'e',       'e',       'g',       'h',       'i',
    'j',       'k',       'l',       'n',       'o',       'r',
    's',       't',       'u',       'u',       'w',       'y',
    'z',       'o',       'u',       'a',       'i',       'o',
    'u',       'u'|HIBIT, 'a'|HIBIT, 'g',       'k',       'o',
    'o'|HIBIT, 'j',       'g',       'n',       'a'|HIBIT, 'a',
    'e',       'i',       'o',       'r',       'u',       's',
    't',       'h',       'a',       'e',       'o'|HIBIT, 'o',
    'o'|HIBIT, 'y',       '\0',      '\0',      '\0',      '\0',
    '\0',      '\0',      '\0',      '\0',      'a',       'b',
    'c'|HIBIT, 'd',       'd',       'e'|HIBIT, 'e',       'e'|HIBIT,
    'f',       'g',       'h',       'h',       'i',       'i'|HIBIT,
    'k',       'l',       'l'|HIBIT, 'l',       'm',       'n',
    'o'|HIBIT, 'p',       'r',       'r'|HIBIT, 'r',       's',
    's'|HIBIT, 't',       'u',       'u'|HIBIT, 'v',       'w',
    'w',       'x',       'y',       'z',       'h',       't',
    'w',       'y',       'a',       'a'|HIBIT, 'a'|HIBIT, 'a'|HIBIT,
    'e',       'e'|HIBIT, 'e'|HIBIT, 'i',       'o',       'o'|HIBIT,
    'o'|HIBIT, 'o'|HIBIT, 'u',       'u'|HIBIT, 'u'|HIBIT, 'y',
  };

  unsigned int key = (((unsigned int)c)<<3) | 0x00000007;
  int iRes = 0;
  int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1;
  int iLo = 0;
  while( iHi>=iLo ){
................................................................................
      iRes = iTest;
      iLo = iTest+1;
    }else{
      iHi = iTest-1;
    }
  }
  assert( key>=aDia[iRes] );
  if( bComplex==0 && (aChar[iRes] & 0x80) ) return c;
  return (c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : ((int)aChar[iRes] & 0x7F);
}


/*
** Return true if the argument interpreted as a unicode codepoint
** is a diacritical modifier character.
*/
int unicode_is_diacritic(int c){
  unsigned int mask0 = 0x08029FDF;
  unsigned int mask1 = 0x000361F8;
  if( c<768 || c>817 ) return 0;
  return (c < 768+32) ?
      (mask0 & ((unsigned int)1 << (c-768))) :
      (mask1 & ((unsigned int)1 << (c-768-32)));
}


/*
** Interpret the argument as a unicode codepoint. If the codepoint
** is an upper case character that has a lower case equivalent,
** return the codepoint corresponding to the lower case version.
** Otherwise, return a copy of the argument.
**
** The results are undefined if the value passed to this function
** is less than zero.
*/
int unicode_fold(int c, int eRemoveDiacritic){
  /* Each entry in the following array defines a rule for folding a range
  ** of codepoints to lower case. The rule applies to a range of nRange
  ** codepoints starting at codepoint iCode.
  **
  ** If the least significant bit in flags is clear, then the rule applies
  ** to all nRange codepoints (i.e. all nRange codepoints are upper case and
  ** need to be folded). Or, if it is set, then the rule only applies to
................................................................................
  static const struct TableEntry {
    unsigned short iCode;
    unsigned char flags;
    unsigned char nRange;
  } aEntry[] = {
    {65, 14, 26},          {181, 66, 1},          {192, 14, 23},
    {216, 14, 7},          {256, 1, 48},          {306, 1, 6},
    {313, 1, 16},          {330, 1, 46},          {376, 156, 1},
    {377, 1, 6},           {383, 144, 1},         {385, 52, 1},
    {386, 1, 4},           {390, 46, 1},          {391, 0, 1},
    {393, 44, 2},          {395, 0, 1},           {398, 34, 1},
    {399, 40, 1},          {400, 42, 1},          {401, 0, 1},
    {403, 44, 1},          {404, 48, 1},          {406, 54, 1},
    {407, 50, 1},          {408, 0, 1},           {412, 54, 1},
    {413, 56, 1},          {415, 58, 1},          {416, 1, 6},
    {422, 62, 1},          {423, 0, 1},           {425, 62, 1},
    {428, 0, 1},           {430, 62, 1},          {431, 0, 1},
    {433, 60, 2},          {435, 1, 4},           {439, 64, 1},
    {440, 0, 1},           {444, 0, 1},           {452, 2, 1},
    {453, 0, 1},           {455, 2, 1},           {456, 0, 1},
    {458, 2, 1},           {459, 1, 18},          {478, 1, 18},
    {497, 2, 1},           {498, 1, 4},           {502, 162, 1},
    {503, 174, 1},         {504, 1, 40},          {544, 150, 1},
    {546, 1, 18},          {570, 74, 1},          {571, 0, 1},
    {573, 148, 1},         {574, 72, 1},          {577, 0, 1},
    {579, 146, 1},         {580, 30, 1},          {581, 32, 1},
    {582, 1, 10},          {837, 38, 1},          {880, 1, 4},
    {886, 0, 1},           {895, 38, 1},          {902, 20, 1},
    {904, 18, 3},          {908, 28, 1},          {910, 26, 2},
    {913, 14, 17},         {931, 14, 9},          {962, 0, 1},
    {975, 4, 1},           {976, 180, 1},         {977, 182, 1},
    {981, 186, 1},         {982, 184, 1},         {984, 1, 24},
    {1008, 176, 1},        {1009, 178, 1},        {1012, 170, 1},
    {1013, 168, 1},        {1015, 0, 1},          {1017, 192, 1},
    {1018, 0, 1},          {1021, 150, 3},        {1024, 36, 16},
    {1040, 14, 32},        {1120, 1, 34},         {1162, 1, 54},
    {1216, 6, 1},          {1217, 1, 14},         {1232, 1, 96},
    {1329, 24, 38},        {4256, 70, 38},        {4295, 70, 1},
    {4301, 70, 1},         {5112, 190, 6},        {7296, 126, 1},
    {7297, 128, 1},        {7298, 130, 1},        {7299, 134, 2},
    {7301, 132, 1},        {7302, 136, 1},        {7303, 138, 1},
    {7304, 100, 1},        {7312, 142, 43},       {7357, 142, 3},
    {7680, 1, 150},        {7835, 172, 1},        {7838, 120, 1},
    {7840, 1, 96},         {7944, 190, 8},        {7960, 190, 6},
    {7976, 190, 8},        {7992, 190, 8},        {8008, 190, 6},
    {8025, 191, 8},        {8040, 190, 8},        {8072, 190, 8},
    {8088, 190, 8},        {8104, 190, 8},        {8120, 190, 2},
    {8122, 166, 2},        {8124, 188, 1},        {8126, 124, 1},
    {8136, 164, 4},        {8140, 188, 1},        {8152, 190, 2},
    {8154, 160, 2},        {8168, 190, 2},        {8170, 158, 2},
    {8172, 192, 1},        {8184, 152, 2},        {8186, 154, 2},
    {8188, 188, 1},        {8486, 122, 1},        {8490, 116, 1},
    {8491, 118, 1},        {8498, 12, 1},         {8544, 8, 16},
    {8579, 0, 1},          {9398, 10, 26},        {11264, 24, 47},
    {11360, 0, 1},         {11362, 112, 1},       {11363, 140, 1},
    {11364, 114, 1},       {11367, 1, 6},         {11373, 108, 1},
    {11374, 110, 1},       {11375, 104, 1},       {11376, 106, 1},
    {11378, 0, 1},         {11381, 0, 1},         {11390, 102, 2},
    {11392, 1, 100},       {11499, 1, 4},         {11506, 0, 1},
    {42560, 1, 46},        {42624, 1, 28},        {42786, 1, 14},
    {42802, 1, 62},        {42873, 1, 4},         {42877, 98, 1},
    {42878, 1, 10},        {42891, 0, 1},         {42893, 88, 1},
    {42896, 1, 4},         {42902, 1, 20},        {42922, 80, 1},
    {42923, 76, 1},        {42924, 78, 1},        {42925, 84, 1},
    {42926, 80, 1},        {42928, 92, 1},        {42929, 86, 1},
    {42930, 90, 1},        {42931, 68, 1},        {42932, 1, 12},
    {42946, 0, 1},         {42948, 178, 1},       {42949, 82, 1},
    {42950, 96, 1},        {43888, 94, 80},       {65313, 14, 26},
  };
  static const unsigned short aiOff[] = {
   1,     2,     8,     15,    16,    26,    28,    32,
   34,    37,    38,    40,    48,    63,    64,    69,
   71,    79,    80,    116,   202,   203,   205,   206,
   207,   209,   210,   211,   213,   214,   217,   218,
   219,   775,   928,   7264,  10792, 10795, 23217, 23221,
   23228, 23229, 23231, 23254, 23256, 23275, 23278, 26672,
   30152, 30204, 35267, 54721, 54753, 54754, 54756, 54787,
   54793, 54809, 57153, 57274, 57921, 58019, 58363, 59314,
   59315, 59324, 59325, 59326, 59332, 59356, 61722, 62528,
   65268, 65341, 65373, 65406, 65408, 65410, 65415, 65424,
   65436, 65439, 65450, 65462, 65472, 65476, 65478, 65480,
   65482, 65488, 65506, 65511, 65514, 65521, 65527, 65528,
   65529,
  };

  int ret = c;

  assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 );

  if( c<128 ){
................................................................................
    assert( iRes>=0 && c>=aEntry[iRes].iCode );
    p = &aEntry[iRes];
    if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){
      ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;
      assert( ret>0 );
    }

    if( eRemoveDiacritic ){
      ret = unicode_remove_diacritic(ret, eRemoveDiacritic==2);
    }
  }

  else if( c>=66560 && c<66600 ){
    ret = c + 40;
  }
  else if( c>=66736 && c<66772 ){
    ret = c + 40;