See ChangeLog: Tue Aug 31 17:20:44 CEST 1999 Werner Koch
[gnupg.git] / util / strgutil.c
1 /* strgutil.c -  string utilities
2  *      Copyright (C) 1998 Free Software Foundation, Inc.
3  *
4  * This file is part of GnuPG.
5  *
6  * GnuPG is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * GnuPG is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
19  */
20
21 #include <config.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <ctype.h>
25 #include "types.h"
26 #include "util.h"
27 #include "memory.h"
28
29
30 static ushort koi8_unicode[128] = {
31     0x2500,0x2502,0x250c,0x2510,0x2514,0x2518,0x251c,0x2524,
32     0x252c,0x2534,0x253c,0x2580,0x2584,0x2588,0x258c,0x2590,
33     0x2591,0x2592,0x2593,0x2320,0x25a0,0x2219,0x221a,0x2248,
34     0x2264,0x2265,0x00a0,0x2321,0x00b0,0x00b2,0x00b7,0x00f7,
35     0x2550,0x2551,0x2552,0x0451,0x2553,0x2554,0x2555,0x2556,
36     0x2557,0x2558,0x2559,0x255a,0x255b,0x255c,0x255d,0x255e,
37     0x255f,0x2560,0x2561,0x0401,0x2562,0x2563,0x2564,0x2565,
38     0x2566,0x2567,0x2568,0x2569,0x256a,0x256b,0x256c,0x00a9,
39     0x044e,0x0430,0x0431,0x0446,0x0434,0x0435,0x0444,0x0433,
40     0x0445,0x0438,0x0439,0x043a,0x043b,0x043c,0x043d,0x043e,
41     0x043f,0x044f,0x0440,0x0441,0x0442,0x0443,0x0436,0x0432,
42     0x044c,0x044b,0x0437,0x0448,0x044d,0x0449,0x0447,0x044a,
43     0x042e,0x0410,0x0411,0x0426,0x0414,0x0415,0x0424,0x0413,
44     0x0425,0x0418,0x0419,0x041a,0x041b,0x041c,0x041d,0x041e,
45     0x041f,0x042f,0x0420,0x0421,0x0422,0x0423,0x0416,0x0412,
46     0x042c,0x042b,0x0417,0x0428,0x042d,0x0429,0x0427,0x042a
47 };
48
49 static ushort latin2_unicode[128] = {
50     0x0080,0x0081,0x0082,0x0083,0x0084,0x0085,0x0086,0x0087,
51     0x0088,0x0089,0x008A,0x008B,0x008C,0x008D,0x008E,0x008F,
52     0x0090,0x0091,0x0092,0x0093,0x0094,0x0095,0x0096,0x0097,
53     0x0098,0x0099,0x009A,0x009B,0x009C,0x009D,0x009E,0x009F,
54     0x00A0,0x0104,0x02D8,0x0141,0x00A4,0x013D,0x015A,0x00A7,
55     0x00A8,0x0160,0x015E,0x0164,0x0179,0x00AD,0x017D,0x017B,
56     0x00B0,0x0105,0x02DB,0x0142,0x00B4,0x013E,0x015B,0x02C7,
57     0x00B8,0x0161,0x015F,0x0165,0x017A,0x02DD,0x017E,0x017C,
58     0x0154,0x00C1,0x00C2,0x0102,0x00C4,0x0139,0x0106,0x00C7,
59     0x010C,0x00C9,0x0118,0x00CB,0x011A,0x00CD,0x00CE,0x010E,
60     0x0110,0x0143,0x0147,0x00D3,0x00D4,0x0150,0x00D6,0x00D7,
61     0x0158,0x016E,0x00DA,0x0170,0x00DC,0x00DD,0x0162,0x00DF,
62     0x0155,0x00E1,0x00E2,0x0103,0x00E4,0x013A,0x0107,0x00E7,
63     0x010D,0x00E9,0x0119,0x00EB,0x011B,0x00ED,0x00EE,0x010F,
64     0x0111,0x0144,0x0148,0x00F3,0x00F4,0x0151,0x00F6,0x00F7,
65     0x0159,0x016F,0x00FA,0x0171,0x00FC,0x00FD,0x0163,0x02D9
66 };
67
68 static const char *active_charset_name = "iso-8859-1";
69 static ushort *active_charset = NULL;
70
71
72 void
73 free_strlist( STRLIST sl )
74 {
75     STRLIST sl2;
76
77     for(; sl; sl = sl2 ) {
78         sl2 = sl->next;
79         m_free(sl);
80     }
81 }
82
83
84 STRLIST
85 add_to_strlist( STRLIST *list, const char *string )
86 {
87     STRLIST sl;
88
89     sl = m_alloc( sizeof *sl + strlen(string));
90     sl->flags = 0;
91     strcpy(sl->d, string);
92     sl->next = *list;
93     *list = sl;
94     return sl;
95 }
96
97 /****************
98  * ame as add_to_strlist() but if is_utf8 is *not* set a conversion
99  * to UTF8 is done
100  */
101 STRLIST
102 add_to_strlist2( STRLIST *list, const char *string, int is_utf8 )
103 {
104     STRLIST sl;
105
106     if( is_utf8 )
107         sl = add_to_strlist( list, string );
108     else {
109         char *p = native_to_utf8( string );
110         sl = add_to_strlist( list, p );
111         m_free( p );
112     }
113     return sl;
114 }
115
116 STRLIST
117 append_to_strlist( STRLIST *list, const char *string )
118 {
119     STRLIST r, sl;
120
121     sl = m_alloc( sizeof *sl + strlen(string));
122     sl->flags = 0;
123     strcpy(sl->d, string);
124     sl->next = NULL;
125     if( !*list )
126         *list = sl;
127     else {
128         for( r = *list; r->next; r = r->next )
129             ;
130         r->next = sl;
131     }
132     return sl;
133 }
134
135 STRLIST
136 append_to_strlist2( STRLIST *list, const char *string, int is_utf8 )
137 {
138     STRLIST sl;
139
140     if( is_utf8 )
141         sl = append_to_strlist( list, string );
142     else {
143         char *p = native_to_utf8( string );
144         sl = append_to_strlist( list, p );
145         m_free( p );
146     }
147     return sl;
148 }
149
150
151 STRLIST
152 strlist_prev( STRLIST head, STRLIST node )
153 {
154     STRLIST n;
155
156     for(n=NULL; head && head != node; head = head->next )
157         n = head;
158     return n;
159 }
160
161 STRLIST
162 strlist_last( STRLIST node )
163 {
164     if( node )
165         for( ; node->next ; node = node->next )
166             ;
167     return node;
168 }
169
170
171
172 /****************
173  * look for the substring SUB in buffer and return a pointer to that
174  * substring in BUF or NULL if not found.
175  * Comparison is case-insensitive.
176  */
177 const char *
178 memistr( const char *buf, size_t buflen, const char *sub )
179 {
180     const byte *t, *s ;
181     size_t n;
182
183     for( t=buf, n=buflen, s=sub ; n ; t++, n-- )
184         if( toupper(*t) == toupper(*s) ) {
185             for( buf=t++, buflen = n--, s++;
186                  n && toupper(*t) == toupper(*s); t++, s++, n-- )
187                 ;
188             if( !*s )
189                 return buf;
190             t = buf; n = buflen; s = sub ;
191         }
192
193     return NULL ;
194 }
195
196 /****************
197  * Wie strncpy(), aber es werden maximal n-1 zeichen kopiert und ein
198  * '\0' angehängt. Ist n = 0, so geschieht nichts, ist Destination
199  * gleich NULL, so wird via m_alloc Speicher besorgt, ist dann nicht
200  * genügend Speicher vorhanden, so bricht die funktion ab.
201  */
202 char *
203 mem2str( char *dest , const void *src , size_t n )
204 {
205     char *d;
206     const char *s;
207
208     if( n ) {
209         if( !dest )
210             dest = m_alloc( n ) ;
211         d = dest;
212         s = src ;
213         for(n--; n && *s; n-- )
214             *d++ = *s++;
215         *d = '\0' ;
216     }
217
218     return dest ;
219 }
220
221
222 /****************
223  * remove leading and trailing white spaces
224  */
225 char *
226 trim_spaces( char *str )
227 {
228     char *string, *p, *mark;
229
230     string = str;
231     /* find first non space character */
232     for( p=string; *p && isspace( *(byte*)p ) ; p++ )
233         ;
234     /* move characters */
235     for( (mark = NULL); (*string = *p); string++, p++ )
236         if( isspace( *(byte*)p ) ) {
237             if( !mark )
238                 mark = string ;
239         }
240         else
241             mark = NULL ;
242     if( mark )
243         *mark = '\0' ;  /* remove trailing spaces */
244
245     return str ;
246 }
247
248
249
250 unsigned
251 trim_trailing_chars( byte *line, unsigned len, const char *trimchars )
252 {
253     byte *p, *mark;
254     unsigned n;
255
256     for(mark=NULL, p=line, n=0; n < len; n++, p++ ) {
257         if( strchr(trimchars, *p ) ) {
258             if( !mark )
259                 mark = p;
260         }
261         else
262             mark = NULL;
263     }
264
265     if( mark ) {
266         *mark = 0;
267         return mark - line;
268     }
269     return len;
270 }
271
272 /****************
273  * remove trailing white spaces and return the length of the buffer
274  */
275 unsigned
276 trim_trailing_ws( byte *line, unsigned len )
277 {
278     return trim_trailing_chars( line, len, " \t\r\n" );
279 }
280
281
282
283 int
284 string_count_chr( const char *string, int c )
285 {
286     int count;
287     for(count=0; *string; string++ )
288         if( *string == c )
289             count++;
290     return count;
291 }
292
293
294 int
295 set_native_charset( const char *newset )
296 {
297     if( !stricmp( newset, "iso-8859-1" ) ) {
298         active_charset_name = "iso-8859-1";
299         active_charset = NULL;
300     }
301     else if( !stricmp( newset, "iso-8859-2" ) ) {
302         active_charset_name = "iso-8859-2";
303         active_charset = latin2_unicode;
304     }
305     else if( !stricmp( newset, "koi8-r" ) ) {
306         active_charset_name = "koi8-r";
307         active_charset = koi8_unicode;
308     }
309     else
310         return G10ERR_GENERAL;
311     return 0;
312 }
313
314 const char*
315 get_native_charset()
316 {
317     return active_charset_name;
318 }
319
320 /****************
321  * Convert string, which is in native encoding to UTF8 and return the
322  * new allocated UTF8 string.
323  */
324 char *
325 native_to_utf8( const char *string )
326 {
327     const byte *s;
328     char *buffer;
329     byte *p;
330     size_t length=0;
331
332     if( active_charset ) {
333         for(s=string; *s; s++ ) {
334             length++;
335             if( *s & 0x80 )
336                 length += 2; /* we may need 3 bytes */
337         }
338         buffer = m_alloc( length + 1 );
339         for(p=buffer, s=string; *s; s++ ) {
340             if( *s & 0x80 ) {
341                 ushort val = active_charset[ *s & 0x7f ];
342                 if( val < 0x0800 ) {
343                     *p++ = 0xc0 | ( (val >> 6) & 0x1f );
344                     *p++ = 0x80 | (  val & 0x3f );
345                 }
346                 else {
347                     *p++ = 0xe0 | ( (val >> 12) & 0x0f );
348                     *p++ = 0x80 | ( (val >>  6) & 0x3f );
349                     *p++ = 0x80 | (  val & 0x3f );
350                 }
351             }
352             else
353                 *p++ = *s;
354         }
355         *p = 0;
356     }
357     else {
358         for(s=string; *s; s++ ) {
359             length++;
360             if( *s & 0x80 )
361                 length++;
362         }
363         buffer = m_alloc( length + 1 );
364         for(p=buffer, s=string; *s; s++ ) {
365             if( *s & 0x80 ) {
366                 *p++ = 0xc0 | ((*s >> 6) & 3);
367                 *p++ = 0x80 | ( *s & 0x3f );
368             }
369             else
370                 *p++ = *s;
371         }
372         *p = 0;
373     }
374     return buffer;
375 }
376
377
378 /****************
379  * Convert string, which is in UTF8 to native encoding.
380  * illegal encodings by some "\xnn" and quote all control characters
381  */
382 char *
383 utf8_to_native( const char *string, size_t length )
384 {
385     int nleft;
386     int i;
387     byte encbuf[7];
388     int encidx;
389     const byte *s;
390     size_t n;
391     byte *buffer = NULL, *p = NULL;
392     unsigned long val = 0;
393     size_t slen;
394     int resync = 0;
395
396     /* 1. pass (p==NULL): count the extended utf-8 characters */
397     /* 2. pass (p!=NULL): create string */
398     for( ;; ) {
399         for( slen=length, nleft=encidx=0, n=0, s=string; slen; s++, slen-- ) {
400             if( resync ) {
401                 if( !(*s < 128 || (*s >= 0xc0 && *s <= 0xfd)) ) {
402                     /* still invalid */
403                     if( p ) {
404                         sprintf(p, "\\x%02x", *s );
405                         p += 4;
406                     }
407                     n += 4;
408                     continue;
409                 }
410                 resync = 0;
411             }
412             if( !nleft ) {
413                 if( !(*s & 0x80) ) { /* plain ascii */
414                     if( iscntrl( *s ) ) {
415                         n++;
416                         if( p )
417                             *p++ = '\\';
418                         switch( *s ) {
419                           case '\n': n++; if( p ) *p++ = 'n'; break;
420                           case '\r': n++; if( p ) *p++ = 'r'; break;
421                           case '\f': n++; if( p ) *p++ = 'f'; break;
422                           case '\v': n++; if( p ) *p++ = 'v'; break;
423                           case '\b': n++; if( p ) *p++ = 'b'; break;
424                           case   0 : n++; if( p ) *p++ = '0'; break;
425                           default: n += 3;
426                                    sprintf( p, "x%02x", *s );
427                                    p += 3;
428                                    break;
429                         }
430                     }
431                     else {
432                         if( p ) *p++ = *s;
433                         n++;
434                     }
435                 }
436                 else if( (*s & 0xe0) == 0xc0 ) { /* 110x xxxx */
437                     val = *s & 0x1f;
438                     nleft = 1;
439                     encbuf[encidx=0] = *s;
440                 }
441                 else if( (*s & 0xf0) == 0xe0 ) { /* 1110 xxxx */
442                     val = *s & 0x0f;
443                     nleft = 2;
444                     encbuf[encidx=0] = *s;
445                 }
446                 else if( (*s & 0xf8) == 0xf0 ) { /* 1111 0xxx */
447                     val = *s & 0x07;
448                     nleft = 3;
449                     encbuf[encidx=0] = *s;
450                 }
451                 else if( (*s & 0xfc) == 0xf8 ) { /* 1111 10xx */
452                     val = *s & 0x03;
453                     nleft = 4;
454                     encbuf[encidx=0] = *s;
455                 }
456                 else if( (*s & 0xfe) == 0xfc ) { /* 1111 110x */
457                     val = *s & 0x01;
458                     nleft = 5;
459                     encbuf[encidx=0] = *s;
460                 }
461                 else {  /* invalid encoding: print as \xnn */
462                     if( p ) {
463                         sprintf(p, "\\x%02x", *s );
464                         p += 4;
465                     }
466                     n += 4;
467                     resync = 1;
468                 }
469             }
470             else if( *s < 0x80 || *s >= 0xc0 ) { /* invalid */
471                 if( p ) {
472                     sprintf(p, "\\x%02x", *s );
473                     p += 4;
474                 }
475                 n += 4;
476                 nleft = 0;
477                 resync = 1;
478             }
479             else {
480                 encbuf[++encidx] = *s;
481                 val <<= 6;
482                 val |= *s & 0x3f;
483                 if( !--nleft ) { /* ready */
484                     if( active_charset ) { /* table lookup */
485                         for(i=0; i < 128; i++ ) {
486                             if( active_charset[i] == val )
487                                 break;
488                         }
489                         if( i < 128 ) { /* we can print this one */
490                             if( p ) *p++ = i+128;
491                             n++;
492                         }
493                         else { /* we do not have a translation: print utf8 */
494                             if( p ) {
495                                 for(i=0; i < encidx; i++ ) {
496                                     sprintf(p, "\\x%02x", encbuf[i] );
497                                     p += 4;
498                                 }
499                             }
500                             n += encidx*4;
501                         }
502                     }
503                     else { /* native set */
504                         if( val >= 0x80 && val < 256 ) {
505                             n++;    /* we can simply print this character */
506                             if( p ) *p++ = val;
507                         }
508                         else { /* we do not have a translation: print utf8 */
509                             if( p ) {
510                                 for(i=0; i < encidx; i++ ) {
511                                     sprintf(p, "\\x%02x", encbuf[i] );
512                                     p += 4;
513                                 }
514                             }
515                             n += encidx*4;
516                         }
517                     }
518
519                 }
520
521             }
522         }
523         if( !buffer ) { /* allocate the buffer after the first pass */
524             buffer = p = m_alloc( n + 1 );
525         }
526         else {
527             *p = 0; /* make a string */
528             return buffer;
529         }
530     }
531 }
532
533
534
535 /*********************************************
536  ********** missing string functions *********
537  *********************************************/
538
539 #ifndef HAVE_STPCPY
540 char *
541 stpcpy(char *a,const char *b)
542 {
543     while( *b )
544         *a++ = *b++;
545     *a = 0;
546
547     return (char*)a;
548 }
549 #endif
550
551 #ifndef HAVE_STRLWR
552 char *
553 strlwr(char *s)
554 {
555     char *p;
556     for(p=s; *p; p++ )
557         *p = tolower(*p);
558     return s;
559 }
560 #endif
561
562 /****************
563  * mingw32/cpd has a memicmp()
564  */
565 #ifndef HAVE_MEMICMP
566 int
567 memicmp( const char *a, const char *b, size_t n )
568 {
569     for( ; n; n--, a++, b++ )
570         if( *a != *b  && toupper(*(const byte*)a) != toupper(*(const byte*)b) )
571             return *(const byte *)a - *(const byte*)b;
572     return 0;
573 }
574 #endif
575
576