See ChangeLog: Tue Oct 26 14:10:21 CEST 1999 Werner Koch
[gnupg.git] / util / strgutil.c
1 /* strgutil.c -  string utilities
2  *      Copyright (C) 1998 Free Software Foundation, Inc.
3  *
4  * This file is part of GnuPG.
5  *
6  * GnuPG is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * GnuPG is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
19  */
20
21 #include <config.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <ctype.h>
25 #include "types.h"
26 #include "util.h"
27 #include "memory.h"
28
29
30 static ushort koi8_unicode[128] = {
31     0x2500,0x2502,0x250c,0x2510,0x2514,0x2518,0x251c,0x2524,
32     0x252c,0x2534,0x253c,0x2580,0x2584,0x2588,0x258c,0x2590,
33     0x2591,0x2592,0x2593,0x2320,0x25a0,0x2219,0x221a,0x2248,
34     0x2264,0x2265,0x00a0,0x2321,0x00b0,0x00b2,0x00b7,0x00f7,
35     0x2550,0x2551,0x2552,0x0451,0x2553,0x2554,0x2555,0x2556,
36     0x2557,0x2558,0x2559,0x255a,0x255b,0x255c,0x255d,0x255e,
37     0x255f,0x2560,0x2561,0x0401,0x2562,0x2563,0x2564,0x2565,
38     0x2566,0x2567,0x2568,0x2569,0x256a,0x256b,0x256c,0x00a9,
39     0x044e,0x0430,0x0431,0x0446,0x0434,0x0435,0x0444,0x0433,
40     0x0445,0x0438,0x0439,0x043a,0x043b,0x043c,0x043d,0x043e,
41     0x043f,0x044f,0x0440,0x0441,0x0442,0x0443,0x0436,0x0432,
42     0x044c,0x044b,0x0437,0x0448,0x044d,0x0449,0x0447,0x044a,
43     0x042e,0x0410,0x0411,0x0426,0x0414,0x0415,0x0424,0x0413,
44     0x0425,0x0418,0x0419,0x041a,0x041b,0x041c,0x041d,0x041e,
45     0x041f,0x042f,0x0420,0x0421,0x0422,0x0423,0x0416,0x0412,
46     0x042c,0x042b,0x0417,0x0428,0x042d,0x0429,0x0427,0x042a
47 };
48
49 static ushort latin2_unicode[128] = {
50     0x0080,0x0081,0x0082,0x0083,0x0084,0x0085,0x0086,0x0087,
51     0x0088,0x0089,0x008A,0x008B,0x008C,0x008D,0x008E,0x008F,
52     0x0090,0x0091,0x0092,0x0093,0x0094,0x0095,0x0096,0x0097,
53     0x0098,0x0099,0x009A,0x009B,0x009C,0x009D,0x009E,0x009F,
54     0x00A0,0x0104,0x02D8,0x0141,0x00A4,0x013D,0x015A,0x00A7,
55     0x00A8,0x0160,0x015E,0x0164,0x0179,0x00AD,0x017D,0x017B,
56     0x00B0,0x0105,0x02DB,0x0142,0x00B4,0x013E,0x015B,0x02C7,
57     0x00B8,0x0161,0x015F,0x0165,0x017A,0x02DD,0x017E,0x017C,
58     0x0154,0x00C1,0x00C2,0x0102,0x00C4,0x0139,0x0106,0x00C7,
59     0x010C,0x00C9,0x0118,0x00CB,0x011A,0x00CD,0x00CE,0x010E,
60     0x0110,0x0143,0x0147,0x00D3,0x00D4,0x0150,0x00D6,0x00D7,
61     0x0158,0x016E,0x00DA,0x0170,0x00DC,0x00DD,0x0162,0x00DF,
62     0x0155,0x00E1,0x00E2,0x0103,0x00E4,0x013A,0x0107,0x00E7,
63     0x010D,0x00E9,0x0119,0x00EB,0x011B,0x00ED,0x00EE,0x010F,
64     0x0111,0x0144,0x0148,0x00F3,0x00F4,0x0151,0x00F6,0x00F7,
65     0x0159,0x016F,0x00FA,0x0171,0x00FC,0x00FD,0x0163,0x02D9
66 };
67
68 static ushort ibm850_unicode[128] = {
69     0x00c7,0x00fc,0x00e9,0x00e2,0x00e4,0x00e0,0x00e5,0x00e7,
70     0x00ea,0x00eb,0x00e8,0x00ef,0x00ee,0x00ec,0x00c4,0x00c5,
71     0x00c9,0x00e6,0x00c6,0x00f4,0x00f6,0x00f2,0x00fb,0x00f9,
72     0x00ff,0x00d6,0x00dc,0x00f8,0x00a3,0x00d8,0x00d7,0x0192,
73     0x00e1,0x00ed,0x00f3,0x00fa,0x00f1,0x00d1,0x00aa,0x00ba,
74     0x00bf,0x00ae,0x00ac,0x00bd,0x00bc,0x00a1,0x00ab,0x00bb,
75     0x2591,0x2592,0x2593,0x2502,0x2524,0x00c1,0x00c2,0x00c0,
76     0x00a9,0x2563,0x2551,0x2557,0x255d,0x00a2,0x00a5,0x2510,
77     0x2514,0x2534,0x252c,0x251c,0x2500,0x253c,0x00e3,0x00c3,
78     0x255a,0x2554,0x2569,0x2566,0x2560,0x2550,0x256c,0x00a4,
79     0x00f0,0x00d0,0x00ca,0x00cb,0x00c8,0x0131,0x00cd,0x00ce,
80     0x00cf,0x2518,0x250c,0x2588,0x2584,0x00a6,0x00cc,0x2580,
81     0x00d3,0x00df,0x00d4,0x00d2,0x00f5,0x00d5,0x00b5,0x00fe,
82     0x00de,0x00da,0x00db,0x00d9,0x00fd,0x00dd,0x00af,0x00b4,
83     0x00ad,0x00b1,0x2017,0x00be,0x00b6,0x00a7,0x00f7,0x00b8,
84     0x00b0,0x00a8,0x00b7,0x00b9,0x00b3,0x00b2,0x25a0,0x00a0,
85 };
86
87 static int query_native_charset_done = 0;
88 static const char *active_charset_name = "iso-8859-1";
89 static ushort *active_charset = NULL;
90
91
92 void
93 free_strlist( STRLIST sl )
94 {
95     STRLIST sl2;
96
97     for(; sl; sl = sl2 ) {
98         sl2 = sl->next;
99         m_free(sl);
100     }
101 }
102
103
104 STRLIST
105 add_to_strlist( STRLIST *list, const char *string )
106 {
107     STRLIST sl;
108
109     sl = m_alloc( sizeof *sl + strlen(string));
110     sl->flags = 0;
111     strcpy(sl->d, string);
112     sl->next = *list;
113     *list = sl;
114     return sl;
115 }
116
117 /****************
118  * ame as add_to_strlist() but if is_utf8 is *not* set a conversion
119  * to UTF8 is done
120  */
121 STRLIST
122 add_to_strlist2( STRLIST *list, const char *string, int is_utf8 )
123 {
124     STRLIST sl;
125
126     if( is_utf8 )
127         sl = add_to_strlist( list, string );
128     else {
129         char *p = native_to_utf8( string );
130         sl = add_to_strlist( list, p );
131         m_free( p );
132     }
133     return sl;
134 }
135
136 STRLIST
137 append_to_strlist( STRLIST *list, const char *string )
138 {
139     STRLIST r, sl;
140
141     sl = m_alloc( sizeof *sl + strlen(string));
142     sl->flags = 0;
143     strcpy(sl->d, string);
144     sl->next = NULL;
145     if( !*list )
146         *list = sl;
147     else {
148         for( r = *list; r->next; r = r->next )
149             ;
150         r->next = sl;
151     }
152     return sl;
153 }
154
155 STRLIST
156 append_to_strlist2( STRLIST *list, const char *string, int is_utf8 )
157 {
158     STRLIST sl;
159
160     if( is_utf8 )
161         sl = append_to_strlist( list, string );
162     else {
163         char *p = native_to_utf8( string );
164         sl = append_to_strlist( list, p );
165         m_free( p );
166     }
167     return sl;
168 }
169
170
171 STRLIST
172 strlist_prev( STRLIST head, STRLIST node )
173 {
174     STRLIST n;
175
176     for(n=NULL; head && head != node; head = head->next )
177         n = head;
178     return n;
179 }
180
181 STRLIST
182 strlist_last( STRLIST node )
183 {
184     if( node )
185         for( ; node->next ; node = node->next )
186             ;
187     return node;
188 }
189
190
191
192 /****************
193  * look for the substring SUB in buffer and return a pointer to that
194  * substring in BUF or NULL if not found.
195  * Comparison is case-insensitive.
196  */
197 const char *
198 memistr( const char *buf, size_t buflen, const char *sub )
199 {
200     const byte *t, *s ;
201     size_t n;
202
203     for( t=buf, n=buflen, s=sub ; n ; t++, n-- )
204         if( toupper(*t) == toupper(*s) ) {
205             for( buf=t++, buflen = n--, s++;
206                  n && toupper(*t) == toupper(*s); t++, s++, n-- )
207                 ;
208             if( !*s )
209                 return buf;
210             t = buf; n = buflen; s = sub ;
211         }
212
213     return NULL ;
214 }
215
216 /****************
217  * Wie strncpy(), aber es werden maximal n-1 zeichen kopiert und ein
218  * '\0' angehängt. Ist n = 0, so geschieht nichts, ist Destination
219  * gleich NULL, so wird via m_alloc Speicher besorgt, ist dann nicht
220  * genügend Speicher vorhanden, so bricht die funktion ab.
221  */
222 char *
223 mem2str( char *dest , const void *src , size_t n )
224 {
225     char *d;
226     const char *s;
227
228     if( n ) {
229         if( !dest )
230             dest = m_alloc( n ) ;
231         d = dest;
232         s = src ;
233         for(n--; n && *s; n-- )
234             *d++ = *s++;
235         *d = '\0' ;
236     }
237
238     return dest ;
239 }
240
241
242 /****************
243  * remove leading and trailing white spaces
244  */
245 char *
246 trim_spaces( char *str )
247 {
248     char *string, *p, *mark;
249
250     string = str;
251     /* find first non space character */
252     for( p=string; *p && isspace( *(byte*)p ) ; p++ )
253         ;
254     /* move characters */
255     for( (mark = NULL); (*string = *p); string++, p++ )
256         if( isspace( *(byte*)p ) ) {
257             if( !mark )
258                 mark = string ;
259         }
260         else
261             mark = NULL ;
262     if( mark )
263         *mark = '\0' ;  /* remove trailing spaces */
264
265     return str ;
266 }
267
268
269
270 unsigned
271 trim_trailing_chars( byte *line, unsigned len, const char *trimchars )
272 {
273     byte *p, *mark;
274     unsigned n;
275
276     for(mark=NULL, p=line, n=0; n < len; n++, p++ ) {
277         if( strchr(trimchars, *p ) ) {
278             if( !mark )
279                 mark = p;
280         }
281         else
282             mark = NULL;
283     }
284
285     if( mark ) {
286         *mark = 0;
287         return mark - line;
288     }
289     return len;
290 }
291
292 /****************
293  * remove trailing white spaces and return the length of the buffer
294  */
295 unsigned
296 trim_trailing_ws( byte *line, unsigned len )
297 {
298     return trim_trailing_chars( line, len, " \t\r\n" );
299 }
300
301
302
303 int
304 string_count_chr( const char *string, int c )
305 {
306     int count;
307     for(count=0; *string; string++ )
308         if( *string == c )
309             count++;
310     return count;
311 }
312
313
314 static const char*
315 query_native_charset(void)
316 {
317   #ifdef __MINGW32__
318     unsigned int cp;
319
320     cp = GetConsoleOutputCP();
321     if( cp != GetConsoleCP() ) {
322         /* The input cgarset is not equal to the output charset
323          * our system depends on it and therefore we will set
324          * same the same (this won't work on Windows 95) */
325         if( !SetConsoleCP( cp ) )
326             log_info("can't set Input-CP to Output-CP: %d\n",
327                                                     (int)GetLastError() );
328     }
329     /* we could read the registry, but this seems to be too much work */
330     switch( cp ) {
331       case 850:  return "ibm850";
332       case 437:  return "ibm437";
333       case 1252: return "iso-8859-1";
334       default:
335         log_info("unknown MS-Windows CodePage %u "
336                  "- trying to switch to Latin-1\n", cp );
337         /* try to set latin-1 */
338         if( !SetConsoleOutputCP( 1252 ) ) {
339             if( !SetConsoleCP( 1252 ) )
340                 return "iso-8859-1";
341             else /* back off */
342                 SetConsoleOutputCP( cp );
343         }
344         log_info("no information about MS-Windows CodePage %u\n", cp );
345         return NULL;
346     }
347   #else
348     return NULL; /* unknown */
349   #endif
350 }
351
352
353 const char*
354 get_native_charset()
355 {
356     if( !query_native_charset_done ) {
357         const char *s;
358
359         query_native_charset_done = 1;
360         s = query_native_charset();
361         if( s )
362             set_native_charset(s);
363     }
364
365     return active_charset_name;
366 }
367
368
369 int
370 set_native_charset( const char *newset )
371 {
372     query_native_charset_done = 1; /* don't do this when we want to set one*/
373     if( !stricmp( newset, "iso-8859-1" ) ) {
374         active_charset_name = "iso-8859-1";
375         active_charset = NULL;
376     }
377     else if( !stricmp( newset, "iso-8859-2" ) ) {
378         active_charset_name = "iso-8859-2";
379         active_charset = latin2_unicode;
380     }
381     else if( !stricmp( newset, "koi8-r" ) ) {
382         active_charset_name = "koi8-r";
383         active_charset = koi8_unicode;
384     }
385     else if( !stricmp( newset, "ibm850" ) || !stricmp( newset, "ibm437" ) ) {
386         active_charset_name = "ibm850";
387         active_charset = ibm850_unicode;
388     }
389     else
390         return G10ERR_GENERAL;
391     return 0;
392 }
393
394
395 /****************
396  * Convert string, which is in native encoding to UTF8 and return the
397  * new allocated UTF8 string.
398  */
399 char *
400 native_to_utf8( const char *string )
401 {
402     const byte *s;
403     char *buffer;
404     byte *p;
405     size_t length=0;
406
407     if( active_charset ) {
408         for(s=string; *s; s++ ) {
409             length++;
410             if( *s & 0x80 )
411                 length += 2; /* we may need 3 bytes */
412         }
413         buffer = m_alloc( length + 1 );
414         for(p=buffer, s=string; *s; s++ ) {
415             if( *s & 0x80 ) {
416                 ushort val = active_charset[ *s & 0x7f ];
417                 if( val < 0x0800 ) {
418                     *p++ = 0xc0 | ( (val >> 6) & 0x1f );
419                     *p++ = 0x80 | (  val & 0x3f );
420                 }
421                 else {
422                     *p++ = 0xe0 | ( (val >> 12) & 0x0f );
423                     *p++ = 0x80 | ( (val >>  6) & 0x3f );
424                     *p++ = 0x80 | (  val & 0x3f );
425                 }
426             }
427             else
428                 *p++ = *s;
429         }
430         *p = 0;
431     }
432     else {
433         for(s=string; *s; s++ ) {
434             length++;
435             if( *s & 0x80 )
436                 length++;
437         }
438         buffer = m_alloc( length + 1 );
439         for(p=buffer, s=string; *s; s++ ) {
440             if( *s & 0x80 ) {
441                 *p++ = 0xc0 | ((*s >> 6) & 3);
442                 *p++ = 0x80 | ( *s & 0x3f );
443             }
444             else
445                 *p++ = *s;
446         }
447         *p = 0;
448     }
449     return buffer;
450 }
451
452
453 /****************
454  * Convert string, which is in UTF8 to native encoding.
455  * illegal encodings by some "\xnn" and quote all control characters
456  */
457 char *
458 utf8_to_native( const char *string, size_t length )
459 {
460     int nleft;
461     int i;
462     byte encbuf[7];
463     int encidx;
464     const byte *s;
465     size_t n;
466     byte *buffer = NULL, *p = NULL;
467     unsigned long val = 0;
468     size_t slen;
469     int resync = 0;
470
471     /* 1. pass (p==NULL): count the extended utf-8 characters */
472     /* 2. pass (p!=NULL): create string */
473     for( ;; ) {
474         for( slen=length, nleft=encidx=0, n=0, s=string; slen; s++, slen-- ) {
475             if( resync ) {
476                 if( !(*s < 128 || (*s >= 0xc0 && *s <= 0xfd)) ) {
477                     /* still invalid */
478                     if( p ) {
479                         sprintf(p, "\\x%02x", *s );
480                         p += 4;
481                     }
482                     n += 4;
483                     continue;
484                 }
485                 resync = 0;
486             }
487             if( !nleft ) {
488                 if( !(*s & 0x80) ) { /* plain ascii */
489                     if( iscntrl( *s ) ) {
490                         n++;
491                         if( p )
492                             *p++ = '\\';
493                         switch( *s ) {
494                           case '\n': n++; if( p ) *p++ = 'n'; break;
495                           case '\r': n++; if( p ) *p++ = 'r'; break;
496                           case '\f': n++; if( p ) *p++ = 'f'; break;
497                           case '\v': n++; if( p ) *p++ = 'v'; break;
498                           case '\b': n++; if( p ) *p++ = 'b'; break;
499                           case   0 : n++; if( p ) *p++ = '0'; break;
500                           default: n += 3;
501                                    sprintf( p, "x%02x", *s );
502                                    p += 3;
503                                    break;
504                         }
505                     }
506                     else {
507                         if( p ) *p++ = *s;
508                         n++;
509                     }
510                 }
511                 else if( (*s & 0xe0) == 0xc0 ) { /* 110x xxxx */
512                     val = *s & 0x1f;
513                     nleft = 1;
514                     encbuf[encidx=0] = *s;
515                 }
516                 else if( (*s & 0xf0) == 0xe0 ) { /* 1110 xxxx */
517                     val = *s & 0x0f;
518                     nleft = 2;
519                     encbuf[encidx=0] = *s;
520                 }
521                 else if( (*s & 0xf8) == 0xf0 ) { /* 1111 0xxx */
522                     val = *s & 0x07;
523                     nleft = 3;
524                     encbuf[encidx=0] = *s;
525                 }
526                 else if( (*s & 0xfc) == 0xf8 ) { /* 1111 10xx */
527                     val = *s & 0x03;
528                     nleft = 4;
529                     encbuf[encidx=0] = *s;
530                 }
531                 else if( (*s & 0xfe) == 0xfc ) { /* 1111 110x */
532                     val = *s & 0x01;
533                     nleft = 5;
534                     encbuf[encidx=0] = *s;
535                 }
536                 else {  /* invalid encoding: print as \xnn */
537                     if( p ) {
538                         sprintf(p, "\\x%02x", *s );
539                         p += 4;
540                     }
541                     n += 4;
542                     resync = 1;
543                 }
544             }
545             else if( *s < 0x80 || *s >= 0xc0 ) { /* invalid */
546                 if( p ) {
547                     sprintf(p, "\\x%02x", *s );
548                     p += 4;
549                 }
550                 n += 4;
551                 nleft = 0;
552                 resync = 1;
553             }
554             else {
555                 encbuf[++encidx] = *s;
556                 val <<= 6;
557                 val |= *s & 0x3f;
558                 if( !--nleft ) { /* ready */
559                     if( active_charset ) { /* table lookup */
560                         for(i=0; i < 128; i++ ) {
561                             if( active_charset[i] == val )
562                                 break;
563                         }
564                         if( i < 128 ) { /* we can print this one */
565                             if( p ) *p++ = i+128;
566                             n++;
567                         }
568                         else { /* we do not have a translation: print utf8 */
569                             if( p ) {
570                                 for(i=0; i < encidx; i++ ) {
571                                     sprintf(p, "\\x%02x", encbuf[i] );
572                                     p += 4;
573                                 }
574                             }
575                             n += encidx*4;
576                         }
577                     }
578                     else { /* native set */
579                         if( val >= 0x80 && val < 256 ) {
580                             n++;    /* we can simply print this character */
581                             if( p ) *p++ = val;
582                         }
583                         else { /* we do not have a translation: print utf8 */
584                             if( p ) {
585                                 for(i=0; i < encidx; i++ ) {
586                                     sprintf(p, "\\x%02x", encbuf[i] );
587                                     p += 4;
588                                 }
589                             }
590                             n += encidx*4;
591                         }
592                     }
593
594                 }
595
596             }
597         }
598         if( !buffer ) { /* allocate the buffer after the first pass */
599             buffer = p = m_alloc( n + 1 );
600         }
601         else {
602             *p = 0; /* make a string */
603             return buffer;
604         }
605     }
606 }
607
608
609
610 /*********************************************
611  ********** missing string functions *********
612  *********************************************/
613
614 #ifndef HAVE_STPCPY
615 char *
616 stpcpy(char *a,const char *b)
617 {
618     while( *b )
619         *a++ = *b++;
620     *a = 0;
621
622     return (char*)a;
623 }
624 #endif
625
626 #ifndef HAVE_STRLWR
627 char *
628 strlwr(char *s)
629 {
630     char *p;
631     for(p=s; *p; p++ )
632         *p = tolower(*p);
633     return s;
634 }
635 #endif
636
637 /****************
638  * mingw32/cpd has a memicmp()
639  */
640 #ifndef HAVE_MEMICMP
641 int
642 memicmp( const char *a, const char *b, size_t n )
643 {
644     for( ; n; n--, a++, b++ )
645         if( *a != *b  && toupper(*(const byte*)a) != toupper(*(const byte*)b) )
646             return *(const byte *)a - *(const byte*)b;
647     return 0;
648 }
649 #endif
650
651