1 /* strgutil.c - string utilities
2 * Copyright (C) 1998 Free Software Foundation, Inc.
4 * This file is part of GnuPG.
6 * GnuPG is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * GnuPG is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
30 static ushort koi8_unicode[128] = {
31 0x2500,0x2502,0x250c,0x2510,0x2514,0x2518,0x251c,0x2524,
32 0x252c,0x2534,0x253c,0x2580,0x2584,0x2588,0x258c,0x2590,
33 0x2591,0x2592,0x2593,0x2320,0x25a0,0x2219,0x221a,0x2248,
34 0x2264,0x2265,0x00a0,0x2321,0x00b0,0x00b2,0x00b7,0x00f7,
35 0x2550,0x2551,0x2552,0x0451,0x2553,0x2554,0x2555,0x2556,
36 0x2557,0x2558,0x2559,0x255a,0x255b,0x255c,0x255d,0x255e,
37 0x255f,0x2560,0x2561,0x0401,0x2562,0x2563,0x2564,0x2565,
38 0x2566,0x2567,0x2568,0x2569,0x256a,0x256b,0x256c,0x00a9,
39 0x044e,0x0430,0x0431,0x0446,0x0434,0x0435,0x0444,0x0433,
40 0x0445,0x0438,0x0439,0x043a,0x043b,0x043c,0x043d,0x043e,
41 0x043f,0x044f,0x0440,0x0441,0x0442,0x0443,0x0436,0x0432,
42 0x044c,0x044b,0x0437,0x0448,0x044d,0x0449,0x0447,0x044a,
43 0x042e,0x0410,0x0411,0x0426,0x0414,0x0415,0x0424,0x0413,
44 0x0425,0x0418,0x0419,0x041a,0x041b,0x041c,0x041d,0x041e,
45 0x041f,0x042f,0x0420,0x0421,0x0422,0x0423,0x0416,0x0412,
46 0x042c,0x042b,0x0417,0x0428,0x042d,0x0429,0x0427,0x042a
49 static ushort latin2_unicode[128] = {
50 0x0080,0x0081,0x0082,0x0083,0x0084,0x0085,0x0086,0x0087,
51 0x0088,0x0089,0x008A,0x008B,0x008C,0x008D,0x008E,0x008F,
52 0x0090,0x0091,0x0092,0x0093,0x0094,0x0095,0x0096,0x0097,
53 0x0098,0x0099,0x009A,0x009B,0x009C,0x009D,0x009E,0x009F,
54 0x00A0,0x0104,0x02D8,0x0141,0x00A4,0x013D,0x015A,0x00A7,
55 0x00A8,0x0160,0x015E,0x0164,0x0179,0x00AD,0x017D,0x017B,
56 0x00B0,0x0105,0x02DB,0x0142,0x00B4,0x013E,0x015B,0x02C7,
57 0x00B8,0x0161,0x015F,0x0165,0x017A,0x02DD,0x017E,0x017C,
58 0x0154,0x00C1,0x00C2,0x0102,0x00C4,0x0139,0x0106,0x00C7,
59 0x010C,0x00C9,0x0118,0x00CB,0x011A,0x00CD,0x00CE,0x010E,
60 0x0110,0x0143,0x0147,0x00D3,0x00D4,0x0150,0x00D6,0x00D7,
61 0x0158,0x016E,0x00DA,0x0170,0x00DC,0x00DD,0x0162,0x00DF,
62 0x0155,0x00E1,0x00E2,0x0103,0x00E4,0x013A,0x0107,0x00E7,
63 0x010D,0x00E9,0x0119,0x00EB,0x011B,0x00ED,0x00EE,0x010F,
64 0x0111,0x0144,0x0148,0x00F3,0x00F4,0x0151,0x00F6,0x00F7,
65 0x0159,0x016F,0x00FA,0x0171,0x00FC,0x00FD,0x0163,0x02D9
68 static const char *active_charset_name = "iso-8859-1";
69 static ushort *active_charset = NULL;
73 free_strlist( STRLIST sl )
77 for(; sl; sl = sl2 ) {
85 add_to_strlist( STRLIST *list, const char *string )
89 sl = m_alloc( sizeof *sl + strlen(string));
91 strcpy(sl->d, string);
98 * ame as add_to_strlist() but if is_utf8 is *not* set a conversion
102 add_to_strlist2( STRLIST *list, const char *string, int is_utf8 )
107 sl = add_to_strlist( list, string );
109 char *p = native_to_utf8( string );
110 sl = add_to_strlist( list, p );
117 append_to_strlist( STRLIST *list, const char *string )
121 sl = m_alloc( sizeof *sl + strlen(string));
123 strcpy(sl->d, string);
128 for( r = *list; r->next; r = r->next )
136 append_to_strlist2( STRLIST *list, const char *string, int is_utf8 )
141 sl = append_to_strlist( list, string );
143 char *p = native_to_utf8( string );
144 sl = append_to_strlist( list, p );
152 strlist_prev( STRLIST head, STRLIST node )
156 for(n=NULL; head && head != node; head = head->next )
162 strlist_last( STRLIST node )
165 for( ; node->next ; node = node->next )
173 * look for the substring SUB in buffer and return a pointer to that
174 * substring in BUF or NULL if not found.
175 * Comparison is case-insensitive.
178 memistr( const char *buf, size_t buflen, const char *sub )
183 for( t=buf, n=buflen, s=sub ; n ; t++, n-- )
184 if( toupper(*t) == toupper(*s) ) {
185 for( buf=t++, buflen = n--, s++;
186 n && toupper(*t) == toupper(*s); t++, s++, n-- )
190 t = buf; n = buflen; s = sub ;
197 * Wie strncpy(), aber es werden maximal n-1 zeichen kopiert und ein
198 * '\0' angehängt. Ist n = 0, so geschieht nichts, ist Destination
199 * gleich NULL, so wird via m_alloc Speicher besorgt, ist dann nicht
200 * genügend Speicher vorhanden, so bricht die funktion ab.
203 mem2str( char *dest , const void *src , size_t n )
210 dest = m_alloc( n ) ;
213 for(n--; n && *s; n-- )
223 * remove leading and trailing white spaces
226 trim_spaces( char *str )
228 char *string, *p, *mark;
231 /* find first non space character */
232 for( p=string; *p && isspace( *(byte*)p ) ; p++ )
234 /* move characters */
235 for( (mark = NULL); (*string = *p); string++, p++ )
236 if( isspace( *(byte*)p ) ) {
243 *mark = '\0' ; /* remove trailing spaces */
251 trim_trailing_chars( byte *line, unsigned len, const char *trimchars )
256 for(mark=NULL, p=line, n=0; n < len; n++, p++ ) {
257 if( strchr(trimchars, *p ) ) {
273 * remove trailing white spaces and return the length of the buffer
276 trim_trailing_ws( byte *line, unsigned len )
278 return trim_trailing_chars( line, len, " \t\r\n" );
284 string_count_chr( const char *string, int c )
287 for(count=0; *string; string++ )
295 set_native_charset( const char *newset )
297 if( !stricmp( newset, "iso-8859-1" ) ) {
298 active_charset_name = "iso-8859-1";
299 active_charset = NULL;
301 else if( !stricmp( newset, "iso-8859-2" ) ) {
302 active_charset_name = "iso-8859-2";
303 active_charset = latin2_unicode;
305 else if( !stricmp( newset, "koi8-r" ) ) {
306 active_charset_name = "koi8-r";
307 active_charset = koi8_unicode;
310 return G10ERR_GENERAL;
317 return active_charset_name;
321 * Convert string, which is in native encoding to UTF8 and return the
322 * new allocated UTF8 string.
325 native_to_utf8( const char *string )
332 if( active_charset ) {
333 for(s=string; *s; s++ ) {
336 length += 2; /* we may need 3 bytes */
338 buffer = m_alloc( length + 1 );
339 for(p=buffer, s=string; *s; s++ ) {
341 ushort val = active_charset[ *s & 0x7f ];
343 *p++ = 0xc0 | ( (val >> 6) & 0x1f );
344 *p++ = 0x80 | ( val & 0x3f );
347 *p++ = 0xe0 | ( (val >> 12) & 0x0f );
348 *p++ = 0x80 | ( (val >> 6) & 0x3f );
349 *p++ = 0x80 | ( val & 0x3f );
358 for(s=string; *s; s++ ) {
363 buffer = m_alloc( length + 1 );
364 for(p=buffer, s=string; *s; s++ ) {
366 *p++ = 0xc0 | ((*s >> 6) & 3);
367 *p++ = 0x80 | ( *s & 0x3f );
379 * Convert string, which is in UTF8 to native encoding. Replace
380 * illegal encodings by some "\xnn".
383 utf8_to_native( const char *string )
390 /* quick check whether we actually have characters with bit 8 set */
391 for( s=string; *s; s++ )
394 if( !*s ) /* that is easy */
395 return m_strdup(string);
397 /* count the extended utf-8 characters */
401 for( n=1, s=string; *s; s++ ) {
404 else if( (*s & 0xe0) == 0xc0 )
406 else if( (*s & 0xf0) == 0xe0 )
408 else if( (*s & 0xf8) == 0xf0 )
411 n++; /* invalid encoding */
414 buffer = p = m_alloc( n );
415 for( s=string; *s; ) {
418 else if( (*s & 0xe0) == 0xc0 ) {
420 if( (s[1] & 0xc0) != 0x80 )
422 val = (*s << 6) | (s[1] & 0x3f);
424 else if( (*s & 0xf0) == 0xe0 )
426 else if( (*s & 0xf8) == 0xf0 )
429 n++; /* invalid encoding */
432 return m_strdup(string);
438 * check whether string is a valid UTF8 string.
441 * 2 = invalid encoding
444 check_utf8_string( const char *string )
451 /*********************************************
452 ********** missing string functions *********
453 *********************************************/
457 stpcpy(char *a,const char *b)
479 * mingw32/cpd has a memicmp()
483 memicmp( const char *a, const char *b, size_t n )
485 for( ; n; n--, a++, b++ )
486 if( *a != *b && toupper(*(const byte*)a) != toupper(*(const byte*)b) )
487 return *(const byte *)a - *(const byte*)b;