* gpgkeys_hkp.c (parse_hkp_index, dehtmlize): Move HTML functionality into
authorDavid Shaw <dshaw@jabberwocky.com>
Mon, 26 Aug 2002 19:22:48 +0000 (19:22 +0000)
committerDavid Shaw <dshaw@jabberwocky.com>
Mon, 26 Aug 2002 19:22:48 +0000 (19:22 +0000)
new "dehtmlize" function.  Remove HTML before trying to parse each line
from the keyserver.  If the keyserver provides key type information in the
listing, use it.  (Copy over from g10/hkp.c).

keyserver/ChangeLog
keyserver/gpgkeys_hkp.c

index 5362dc6..facf9b2 100644 (file)
@@ -1,3 +1,11 @@
+2002-08-26  David Shaw  <dshaw@jabberwocky.com>
+
+       * gpgkeys_hkp.c (parse_hkp_index, dehtmlize): Move HTML
+       functionality into new "dehtmlize" function.  Remove HTML before
+       trying to parse each line from the keyserver.  If the keyserver
+       provides key type information in the listing, use it.  (Copy over
+       from g10/hkp.c).
+
 2002-08-19  David Shaw  <dshaw@jabberwocky.com>
 
        * gpgkeys_hkp.c (get_key, parse_hkp_index): Bring over latest code
index 455dd00..2cb27be 100644 (file)
@@ -391,6 +391,77 @@ unsigned int scan_isodatestr( const char *string )
   return stamp;
 }
 
+/* Remove anything <between brackets> and de-urlencode in place.  Note
+   that this requires all brackets to be closed on the same line.  It
+   also means that the result is never larger than the input. */
+static void
+dehtmlize(char *line)
+{
+  int parsedindex=0;
+  char *parsed=line;
+
+  while(*line!='\0')
+    {
+      switch(*line)
+       {
+       case '<':
+         while(*line!='>' && *line!='\0')
+           line++;
+
+         if(*line!='\0')
+           line++;
+         break;
+
+       case '&':
+         if((*(line+1)!='\0' && tolower(*(line+1))=='l') &&
+            (*(line+2)!='\0' && tolower(*(line+2))=='t') &&
+            (*(line+3)!='\0' && *(line+3)==';'))
+           {
+             parsed[parsedindex++]='<';
+             line+=4;
+             break;
+           }
+         else if((*(line+1)!='\0' && tolower(*(line+1))=='g') &&
+                 (*(line+2)!='\0' && tolower(*(line+2))=='t') &&
+                 (*(line+3)!='\0' && *(line+3)==';'))
+           {
+             parsed[parsedindex++]='>';
+             line+=4;
+             break;
+           }
+         else if((*(line+1)!='\0' && tolower(*(line+1))=='a') &&
+                 (*(line+2)!='\0' && tolower(*(line+2))=='m') &&
+                 (*(line+3)!='\0' && tolower(*(line+3))=='p') &&
+                 (*(line+4)!='\0' && *(line+4)==';'))
+           {
+             parsed[parsedindex++]='&';
+             line+=5;
+             break;
+           }
+
+       default:
+         parsed[parsedindex++]=*line;
+         line++;
+         break;
+       }
+    }
+
+  parsed[parsedindex]='\0';
+
+  /* Chop off any trailing whitespace.  Note that the HKP servers have
+     \r\n as line endings, and the NAI HKP servers have just \n. */
+
+  if(parsedindex>0)
+    {
+      parsedindex--;
+      while(isspace(parsed[parsedindex]))
+       {
+         parsed[parsedindex]='\0';
+         parsedindex--;
+       }
+    }
+}
+
 /* pub  2048/<a href="/pks/lookup?op=get&search=0x3CB3B415">3CB3B415</a> 1998/04/03 David M. Shaw &lt;<a href="/pks/lookup?op=get&search=0x3CB3B415">dshaw@jabberwocky.com</a>&gt; */
 
 /* Luckily enough, both the HKP server and NAI HKP interface to their
@@ -400,17 +471,21 @@ unsigned int scan_isodatestr( const char *string )
 int parse_hkp_index(char *line,char **buffer)
 {
   static int open=0,revoked=0;
-  static char *key=NULL,*uid=NULL;
+  static char *key=NULL,*uid=NULL,*type=NULL;
   static unsigned int bits,createtime;
   int ret=0;
 
-  /*  printf("Open %d, LINE: %s, uid: %s\n",open,line,uid); */
+  /* printf("Open %d, LINE: %s, uid: %s\n",open,line,uid); */
+
+  dehtmlize(line);
+
+  /* printf("Now open %d, LINE: \"%s\", uid: %s\n",open,line,uid); */
 
   /* Try and catch some bastardization of HKP.  If we don't have
      certain unchanging landmarks, we can't reliably parse the
-     response. */
-
-  if(open && strncasecmp(line,"</pre>",6)!=0 &&
+     response.  This only complains about problems within the key
+     section itself.  Headers and footers should not matter. */
+  if(open && line[0]!='\0' &&
      strncasecmp(line,"pub ",4)!=0 &&
      strncasecmp(line,"    ",4)!=0)
     {
@@ -420,8 +495,6 @@ int parse_hkp_index(char *line,char **buffer)
       return -1;
     }
 
-  /* printf("Open %d, LINE: %s\n",open,line); */
-
   /* For multiple UIDs */
   if(open && uid!=NULL)
     {
@@ -456,7 +529,10 @@ int parse_hkp_index(char *line,char **buffer)
          append_quoted(*buffer,revoked?"1:":":",0);
          sprintf(intstr,"%u",createtime);
          append_quoted(*buffer,intstr,':');
-         append_quoted(*buffer,"::::",0);
+         append_quoted(*buffer,":::",0);
+         if(type)
+           append_quoted(*buffer,type,':');
+         append_quoted(*buffer,":",0);
          sprintf(intstr,"%u",bits);
          append_quoted(*buffer,intstr,':');
          append_quoted(*buffer,"\n",0);
@@ -486,13 +562,16 @@ int parse_hkp_index(char *line,char **buffer)
       if(tok==NULL)
        return ret;
 
-      bits=atoi(tok);
+      if(tok[strlen(tok)-1]=='R')
+       type="RSA";
+      else if(tok[strlen(tok)-1]=='D')
+       type="DSA";
+      else
+       type=NULL;
 
-      tok=strsep(&line,">");
-      if(tok==NULL)
-       return ret;
+      bits=atoi(tok);
 
-      tok=strsep(&line,"<");
+      tok=strsep(&line," ");
       if(tok==NULL)
        {
          key=strdup("00000000");
@@ -505,10 +584,6 @@ int parse_hkp_index(char *line,char **buffer)
       if(tok==NULL)
        return ret;
 
-      tok=strsep(&line," ");
-      if(tok==NULL)
-       return ret;
-  
       /* The date parser wants '-' instead of '/', so... */
       temp=tok;
       while(*temp!='\0')
@@ -524,77 +599,25 @@ int parse_hkp_index(char *line,char **buffer)
 
   if(open)
     {
-      int uidindex=0;
-
       if(line==NULL)
        {
          uid=strdup("Key index corrupted");
          return ret;
        }
 
-      /* All that's left is the user name.  Strip off anything
-        <between brackets> and de-urlencode it. */
-
       while(*line==' ' && *line!='\0')
        line++;
 
+      if(*line=='\0')
+       return ret;
+
       if(strncmp(line,"*** KEY REVOKED ***",19)==0)
        {
          revoked=1;
          return ret;
        }
 
-      uid=malloc(strlen(line)+1);
-
-      while(*line!='\0')
-       {
-         switch(*line)
-           {
-           case '<':
-             while(*line!='>' && *line!='\0')
-               line++;
-
-             if(*line!='\0')
-               line++;
-             break;
-
-           case '&':
-             if((*(line+1)!='\0' && tolower(*(line+1))=='l') &&
-                (*(line+2)!='\0' && tolower(*(line+2))=='t') &&
-                (*(line+3)!='\0' && *(line+3)==';'))
-               {
-                 uid[uidindex++]='<';
-                 line+=4;
-                 break;
-               }
-
-             if((*(line+1)!='\0' && tolower(*(line+1))=='g') &&
-                (*(line+2)!='\0' && tolower(*(line+2))=='t') &&
-                (*(line+3)!='\0' && *(line+3)==';'))
-               {
-                 uid[uidindex++]='>';
-                 line+=4;
-                 break;
-               }
-
-           default:
-             uid[uidindex++]=*line;
-             line++;
-             break;
-           }
-       }
-
-      uid[uidindex]='\0';
-
-      /* Chop off the trailing \r, \n, or both. This is fussy as the
-         true HKP servers have \r\n, and the NAI HKP servers have just
-         \n. */
-
-      if(isspace(uid[uidindex-1]))
-       uid[uidindex-1]='\0';
-
-      if(isspace(uid[uidindex-2]))
-       uid[uidindex-2]='\0';
+      uid=strdup(line);
     }
 
   return ret;