Implemented server main loop and started with import command.
[gnupg.git] / util / strgutil.c
1 /* strgutil.c -  string utilities
2  *      Copyright (C) 1998, 2000 Free Software Foundation, Inc.
3  *
4  * This file is part of GnuPG.
5  *
6  * GnuPG is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * GnuPG is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
19  */
20
21 #include <config.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <ctype.h>
25 #include <gcrypt.h>
26 #include "types.h"
27 #include "util.h"
28 #include "memory.h"
29
30
31 static ushort koi8_unicode[128] = {
32     0x2500,0x2502,0x250c,0x2510,0x2514,0x2518,0x251c,0x2524,
33     0x252c,0x2534,0x253c,0x2580,0x2584,0x2588,0x258c,0x2590,
34     0x2591,0x2592,0x2593,0x2320,0x25a0,0x2219,0x221a,0x2248,
35     0x2264,0x2265,0x00a0,0x2321,0x00b0,0x00b2,0x00b7,0x00f7,
36     0x2550,0x2551,0x2552,0x0451,0x2553,0x2554,0x2555,0x2556,
37     0x2557,0x2558,0x2559,0x255a,0x255b,0x255c,0x255d,0x255e,
38     0x255f,0x2560,0x2561,0x0401,0x2562,0x2563,0x2564,0x2565,
39     0x2566,0x2567,0x2568,0x2569,0x256a,0x256b,0x256c,0x00a9,
40     0x044e,0x0430,0x0431,0x0446,0x0434,0x0435,0x0444,0x0433,
41     0x0445,0x0438,0x0439,0x043a,0x043b,0x043c,0x043d,0x043e,
42     0x043f,0x044f,0x0440,0x0441,0x0442,0x0443,0x0436,0x0432,
43     0x044c,0x044b,0x0437,0x0448,0x044d,0x0449,0x0447,0x044a,
44     0x042e,0x0410,0x0411,0x0426,0x0414,0x0415,0x0424,0x0413,
45     0x0425,0x0418,0x0419,0x041a,0x041b,0x041c,0x041d,0x041e,
46     0x041f,0x042f,0x0420,0x0421,0x0422,0x0423,0x0416,0x0412,
47     0x042c,0x042b,0x0417,0x0428,0x042d,0x0429,0x0427,0x042a
48 };
49
50 static ushort latin2_unicode[128] = {
51     0x0080,0x0081,0x0082,0x0083,0x0084,0x0085,0x0086,0x0087,
52     0x0088,0x0089,0x008A,0x008B,0x008C,0x008D,0x008E,0x008F,
53     0x0090,0x0091,0x0092,0x0093,0x0094,0x0095,0x0096,0x0097,
54     0x0098,0x0099,0x009A,0x009B,0x009C,0x009D,0x009E,0x009F,
55     0x00A0,0x0104,0x02D8,0x0141,0x00A4,0x013D,0x015A,0x00A7,
56     0x00A8,0x0160,0x015E,0x0164,0x0179,0x00AD,0x017D,0x017B,
57     0x00B0,0x0105,0x02DB,0x0142,0x00B4,0x013E,0x015B,0x02C7,
58     0x00B8,0x0161,0x015F,0x0165,0x017A,0x02DD,0x017E,0x017C,
59     0x0154,0x00C1,0x00C2,0x0102,0x00C4,0x0139,0x0106,0x00C7,
60     0x010C,0x00C9,0x0118,0x00CB,0x011A,0x00CD,0x00CE,0x010E,
61     0x0110,0x0143,0x0147,0x00D3,0x00D4,0x0150,0x00D6,0x00D7,
62     0x0158,0x016E,0x00DA,0x0170,0x00DC,0x00DD,0x0162,0x00DF,
63     0x0155,0x00E1,0x00E2,0x0103,0x00E4,0x013A,0x0107,0x00E7,
64     0x010D,0x00E9,0x0119,0x00EB,0x011B,0x00ED,0x00EE,0x010F,
65     0x0111,0x0144,0x0148,0x00F3,0x00F4,0x0151,0x00F6,0x00F7,
66     0x0159,0x016F,0x00FA,0x0171,0x00FC,0x00FD,0x0163,0x02D9
67 };
68
69 static ushort ibm850_unicode[128] = {
70     0x00c7,0x00fc,0x00e9,0x00e2,0x00e4,0x00e0,0x00e5,0x00e7,
71     0x00ea,0x00eb,0x00e8,0x00ef,0x00ee,0x00ec,0x00c4,0x00c5,
72     0x00c9,0x00e6,0x00c6,0x00f4,0x00f6,0x00f2,0x00fb,0x00f9,
73     0x00ff,0x00d6,0x00dc,0x00f8,0x00a3,0x00d8,0x00d7,0x0192,
74     0x00e1,0x00ed,0x00f3,0x00fa,0x00f1,0x00d1,0x00aa,0x00ba,
75     0x00bf,0x00ae,0x00ac,0x00bd,0x00bc,0x00a1,0x00ab,0x00bb,
76     0x2591,0x2592,0x2593,0x2502,0x2524,0x00c1,0x00c2,0x00c0,
77     0x00a9,0x2563,0x2551,0x2557,0x255d,0x00a2,0x00a5,0x2510,
78     0x2514,0x2534,0x252c,0x251c,0x2500,0x253c,0x00e3,0x00c3,
79     0x255a,0x2554,0x2569,0x2566,0x2560,0x2550,0x256c,0x00a4,
80     0x00f0,0x00d0,0x00ca,0x00cb,0x00c8,0x0131,0x00cd,0x00ce,
81     0x00cf,0x2518,0x250c,0x2588,0x2584,0x00a6,0x00cc,0x2580,
82     0x00d3,0x00df,0x00d4,0x00d2,0x00f5,0x00d5,0x00b5,0x00fe,
83     0x00de,0x00da,0x00db,0x00d9,0x00fd,0x00dd,0x00af,0x00b4,
84     0x00ad,0x00b1,0x2017,0x00be,0x00b6,0x00a7,0x00f7,0x00b8,
85     0x00b0,0x00a8,0x00b7,0x00b9,0x00b3,0x00b2,0x25a0,0x00a0,
86 };
87
88 static int query_native_charset_done = 0;
89 static const char *active_charset_name = "iso-8859-1";
90 static ushort *active_charset = NULL;
91
92
93 void
94 free_strlist( STRLIST sl )
95 {
96     STRLIST sl2;
97
98     for(; sl; sl = sl2 ) {
99         sl2 = sl->next;
100         gcry_free(sl);
101     }
102 }
103
104
105 STRLIST
106 add_to_strlist( STRLIST *list, const char *string )
107 {
108     STRLIST sl;
109
110     sl = gcry_xmalloc( sizeof *sl + strlen(string));
111     sl->flags = 0;
112     strcpy(sl->d, string);
113     sl->next = *list;
114     *list = sl;
115     return sl;
116 }
117
118 /****************
119  * ame as add_to_strlist() but if is_utf8 is *not* set a conversion
120  * to UTF8 is done
121  */
122 STRLIST
123 add_to_strlist2( STRLIST *list, const char *string, int is_utf8 )
124 {
125     STRLIST sl;
126
127     if( is_utf8 )
128         sl = add_to_strlist( list, string );
129     else {
130         char *p = native_to_utf8( string );
131         sl = add_to_strlist( list, p );
132         gcry_free( p );
133     }
134     return sl;
135 }
136
137 STRLIST
138 append_to_strlist( STRLIST *list, const char *string )
139 {
140     STRLIST r, sl;
141
142     sl = gcry_xmalloc( sizeof *sl + strlen(string));
143     sl->flags = 0;
144     strcpy(sl->d, string);
145     sl->next = NULL;
146     if( !*list )
147         *list = sl;
148     else {
149         for( r = *list; r->next; r = r->next )
150             ;
151         r->next = sl;
152     }
153     return sl;
154 }
155
156 STRLIST
157 append_to_strlist2( STRLIST *list, const char *string, int is_utf8 )
158 {
159     STRLIST sl;
160
161     if( is_utf8 )
162         sl = append_to_strlist( list, string );
163     else {
164         char *p = native_to_utf8( string );
165         sl = append_to_strlist( list, p );
166         gcry_free( p );
167     }
168     return sl;
169 }
170
171
172 STRLIST
173 strlist_prev( STRLIST head, STRLIST node )
174 {
175     STRLIST n;
176
177     for(n=NULL; head && head != node; head = head->next )
178         n = head;
179     return n;
180 }
181
182 STRLIST
183 strlist_last( STRLIST node )
184 {
185     if( node )
186         for( ; node->next ; node = node->next )
187             ;
188     return node;
189 }
190
191
192
193 int
194 string_count_chr( const char *string, int c )
195 {
196     int count;
197     for(count=0; *string; string++ )
198         if( *string == c )
199             count++;
200     return count;
201 }
202
203
204 static const char*
205 query_native_charset(void)
206 {
207   #ifdef __MINGW32__
208     unsigned int cp;
209
210     cp = GetConsoleOutputCP();
211     if( cp != GetConsoleCP() ) {
212         /* The input cgarset is not equal to the output charset
213          * our system depends on it and therefore we will set
214          * same the same (this won't work on Windows 95) */
215         if( !SetConsoleCP( cp ) )
216             log_info("can't set Input-CP to Output-CP: %d\n",
217                                                     (int)GetLastError() );
218     }
219     /* we could read the registry, but this seems to be too much work */
220     switch( cp ) {
221       case 850:  return "ibm850";
222       case 437:  return "ibm437";
223       case 1252: return "iso-8859-1";
224       default:
225         log_info("unknown MS-Windows CodePage %u "
226                  "- trying to switch to Latin-1\n", cp );
227         /* try to set latin-1 */
228         if( !SetConsoleOutputCP( 1252 ) ) {
229             if( !SetConsoleCP( 1252 ) )
230                 return "iso-8859-1";
231             else /* back off */
232                 SetConsoleOutputCP( cp );
233         }
234         log_info("no information about MS-Windows CodePage %u\n", cp );
235         return NULL;
236     }
237   #else
238     return NULL; /* unknown */
239   #endif
240 }
241
242
243 const char*
244 get_native_charset()
245 {
246     if( !query_native_charset_done ) {
247         const char *s;
248
249         query_native_charset_done = 1;
250         s = query_native_charset();
251         if( s )
252             set_native_charset(s);
253     }
254
255     return active_charset_name;
256 }
257
258
259 int
260 set_native_charset( const char *newset )
261 {
262     query_native_charset_done = 1; /* don't do this when we want to set one*/
263     if( !stricmp( newset, "iso-8859-1" ) ) {
264         active_charset_name = "iso-8859-1";
265         active_charset = NULL;
266     }
267     else if( !stricmp( newset, "iso-8859-2" ) ) {
268         active_charset_name = "iso-8859-2";
269         active_charset = latin2_unicode;
270     }
271     else if( !stricmp( newset, "koi8-r" ) ) {
272         active_charset_name = "koi8-r";
273         active_charset = koi8_unicode;
274     }
275     else if( !stricmp( newset, "ibm850" ) || !stricmp( newset, "ibm437" ) ) {
276         active_charset_name = "ibm850";
277         active_charset = ibm850_unicode;
278     }
279     else
280         return GPGERR_GENERAL;
281     return 0;
282 }
283
284
285 /****************
286  * Convert string, which is in native encoding to UTF8 and return the
287  * new allocated UTF8 string.
288  */
289 char *
290 native_to_utf8( const char *string )
291 {
292     const byte *s;
293     char *buffer;
294     byte *p;
295     size_t length=0;
296
297     if( active_charset ) {
298         for(s=string; *s; s++ ) {
299             length++;
300             if( *s & 0x80 )
301                 length += 2; /* we may need 3 bytes */
302         }
303         buffer = gcry_xmalloc( length + 1 );
304         for(p=buffer, s=string; *s; s++ ) {
305             if( *s & 0x80 ) {
306                 ushort val = active_charset[ *s & 0x7f ];
307                 if( val < 0x0800 ) {
308                     *p++ = 0xc0 | ( (val >> 6) & 0x1f );
309                     *p++ = 0x80 | (  val & 0x3f );
310                 }
311                 else {
312                     *p++ = 0xe0 | ( (val >> 12) & 0x0f );
313                     *p++ = 0x80 | ( (val >>  6) & 0x3f );
314                     *p++ = 0x80 | (  val & 0x3f );
315                 }
316             }
317             else
318                 *p++ = *s;
319         }
320         *p = 0;
321     }
322     else {
323         for(s=string; *s; s++ ) {
324             length++;
325             if( *s & 0x80 )
326                 length++;
327         }
328         buffer = gcry_xmalloc( length + 1 );
329         for(p=buffer, s=string; *s; s++ ) {
330             if( *s & 0x80 ) {
331                 *p++ = 0xc0 | ((*s >> 6) & 3);
332                 *p++ = 0x80 | ( *s & 0x3f );
333             }
334             else
335                 *p++ = *s;
336         }
337         *p = 0;
338     }
339     return buffer;
340 }
341
342
343 /****************
344  * Convert string, which is in UTF8 to native encoding.
345  * illegal encodings by some "\xnn" and quote all control characters
346  */
347 char *
348 utf8_to_native( const char *string, size_t length )
349 {
350     int nleft;
351     int i;
352     byte encbuf[7];
353     int encidx;
354     const byte *s;
355     size_t n;
356     byte *buffer = NULL, *p = NULL;
357     unsigned long val = 0;
358     size_t slen;
359     int resync = 0;
360
361     /* 1. pass (p==NULL): count the extended utf-8 characters */
362     /* 2. pass (p!=NULL): create string */
363     for( ;; ) {
364         for( slen=length, nleft=encidx=0, n=0, s=string; slen; s++, slen-- ) {
365             if( resync ) {
366                 if( !(*s < 128 || (*s >= 0xc0 && *s <= 0xfd)) ) {
367                     /* still invalid */
368                     if( p ) {
369                         sprintf(p, "\\x%02x", *s );
370                         p += 4;
371                     }
372                     n += 4;
373                     continue;
374                 }
375                 resync = 0;
376             }
377             if( !nleft ) {
378                 if( !(*s & 0x80) ) { /* plain ascii */
379                     if( iscntrl( *s ) ) {
380                         n++;
381                         if( p )
382                             *p++ = '\\';
383                         switch( *s ) {
384                           case '\n': n++; if( p ) *p++ = 'n'; break;
385                           case '\r': n++; if( p ) *p++ = 'r'; break;
386                           case '\f': n++; if( p ) *p++ = 'f'; break;
387                           case '\v': n++; if( p ) *p++ = 'v'; break;
388                           case '\b': n++; if( p ) *p++ = 'b'; break;
389                           case   0 : n++; if( p ) *p++ = '0'; break;
390                           default: n += 3;
391                                    sprintf( p, "x%02x", *s );
392                                    if ( p )
393                                        p += 3;
394                                    break;
395                         }
396                     }
397                     else {
398                         if( p ) *p++ = *s;
399                         n++;
400                     }
401                 }
402                 else if( (*s & 0xe0) == 0xc0 ) { /* 110x xxxx */
403                     val = *s & 0x1f;
404                     nleft = 1;
405                     encbuf[encidx=0] = *s;
406                 }
407                 else if( (*s & 0xf0) == 0xe0 ) { /* 1110 xxxx */
408                     val = *s & 0x0f;
409                     nleft = 2;
410                     encbuf[encidx=0] = *s;
411                 }
412                 else if( (*s & 0xf8) == 0xf0 ) { /* 1111 0xxx */
413                     val = *s & 0x07;
414                     nleft = 3;
415                     encbuf[encidx=0] = *s;
416                 }
417                 else if( (*s & 0xfc) == 0xf8 ) { /* 1111 10xx */
418                     val = *s & 0x03;
419                     nleft = 4;
420                     encbuf[encidx=0] = *s;
421                 }
422                 else if( (*s & 0xfe) == 0xfc ) { /* 1111 110x */
423                     val = *s & 0x01;
424                     nleft = 5;
425                     encbuf[encidx=0] = *s;
426                 }
427                 else {  /* invalid encoding: print as \xnn */
428                     if( p ) {
429                         sprintf(p, "\\x%02x", *s );
430                         p += 4;
431                     }
432                     n += 4;
433                     resync = 1;
434                 }
435             }
436             else if( *s < 0x80 || *s >= 0xc0 ) { /* invalid */
437                 if( p ) {
438                     sprintf(p, "\\x%02x", *s );
439                     p += 4;
440                 }
441                 n += 4;
442                 nleft = 0;
443                 resync = 1;
444             }
445             else {
446                 encbuf[++encidx] = *s;
447                 val <<= 6;
448                 val |= *s & 0x3f;
449                 if( !--nleft ) { /* ready */
450                     if( active_charset ) { /* table lookup */
451                         for(i=0; i < 128; i++ ) {
452                             if( active_charset[i] == val )
453                                 break;
454                         }
455                         if( i < 128 ) { /* we can print this one */
456                             if( p ) *p++ = i+128;
457                             n++;
458                         }
459                         else { /* we do not have a translation: print utf8 */
460                             if( p ) {
461                                 for(i=0; i < encidx; i++ ) {
462                                     sprintf(p, "\\x%02x", encbuf[i] );
463                                     p += 4;
464                                 }
465                             }
466                             n += encidx*4;
467                         }
468                     }
469                     else { /* native set */
470                         if( val >= 0x80 && val < 256 ) {
471                             n++;    /* we can simply print this character */
472                             if( p ) *p++ = val;
473                         }
474                         else { /* we do not have a translation: print utf8 */
475                             if( p ) {
476                                 for(i=0; i < encidx; i++ ) {
477                                     sprintf(p, "\\x%02x", encbuf[i] );
478                                     p += 4;
479                                 }
480                             }
481                             n += encidx*4;
482                         }
483                     }
484
485                 }
486
487             }
488         }
489         if( !buffer ) { /* allocate the buffer after the first pass */
490             buffer = p = gcry_xmalloc( n + 1 );
491         }
492         else {
493             *p = 0; /* make a string */
494             return buffer;
495         }
496     }
497 }
498
499
500
501
502
503