Logo Search packages:      
Sourcecode: echoping version File versions  Download package

HTParse.c

/*              Parse HyperText Document Address                HTParse.c
   **           ================================
 */

#include "HTParse.h"
#define TRACE 0

#define FREE(x) if (x) {free(x); x = NULL;}

struct struct_parts
  {
    char *access;
    char *host;
    char *absolute;
    char *relative;
/*      char * search;          no - treated as part of path */
    char *anchor;
  };

/*      Strings of any length
   **      ---------------------
 */
PUBLIC int strcasecomp
ARGS2 (
      CONST char *, a,
      CONST char *, b)
{
  CONST char *p = a;
  CONST char *q = b;

  for (p = a, q = b; *p && *q; p++, q++)
    {
      int diff = TOLOWER (*p) - TOLOWER (*q);
      if (diff)
      return diff;
    }
  if (*p)
    return 1;                 /* p was longer than q */
  if (*q)
    return -1;                /* p was shorter than q */
  return 0;             /* Exact match */
}

/*      With count limit
   **      ----------------
 */
PUBLIC int strncasecomp
ARGS3 (
      CONST char *, a,
      CONST char *, b,
      int, n)
{
  CONST char *p = a;
  CONST char *q = b;

  for (p = a, q = b;;
       p++, q++)
    {
      int diff;
      if (p == (a + n))
      return 0;         /*   Match up to n characters */
      if (!(*p && *q))
      return (*p - *q);
      diff = TOLOWER (*p) - TOLOWER (*q);
      if (diff)
      return diff;
    }
  /*NOTREACHED */
}

/*      Allocate a new copy of a string, and returns it
 */
PUBLIC char *HTSACopy
ARGS2 (
      char **, dest,
      CONST char *, src)
{
  FREE (*dest);
  if (src)
    {
      *dest = (char *) malloc (strlen (src) + 1);
      if (*dest == NULL)
      outofmem (__FILE__, "HTSACopy");
      strcpy (*dest, src);
    }
  return *dest;
}
/*      String Allocate and Concatenate
 */
PUBLIC char *HTSACat
ARGS2 (
      char **, dest,
      CONST char *, src)
{
  if (src && *src)
    {
      if (*dest)
      {
        int length = strlen (*dest);
        *dest = (char *) realloc (*dest, length + strlen (src) + 1);
        if (*dest == NULL)
          outofmem (__FILE__, "HTSACat");
        strcpy (*dest + length, src);
      }
      else
      {
        *dest = (char *) malloc (strlen (src) + 1);
        if (*dest == NULL)
          outofmem (__FILE__, "HTSACat");
        strcpy (*dest, src);
      }
    }
  return *dest;
}



/*      Strip white space off a string.                         HTStrip()
   **   -------------------------------
   **
   ** On exit,
   **   Return value points to first non-white character, or to 0 if none.
   **   All trailing white space is OVERWRITTEN with zero.
 */
PUBLIC char *HTStrip
ARGS1 (
      char *, s)
{
#define SPACE(c) ((c == ' ') || (c == '\t') || (c == '\n'))
  char *p = s;
  for (p = s; *p; p++)
    ;                   /* Find end of string */
  for (p--; p >= s; p--)
    {
      if (SPACE (*p))
      *p = '\0';        /* Zap trailing blanks */
      else
      break;
    }
  while (SPACE (*s))
    s++;                /* Strip leading blanks */
  return s;
}

/*      Scan a filename for its consituents.                    scan()
   **   ------------------------------------
   **
   ** On entry,
   **   name    points to a document name which may be incomplete.
   ** On exit,
   **      absolute or relative may be nonzero (but not both).
   **   host, anchor and access may be nonzero if they were specified.
   **   Any which are nonzero point to zero terminated strings.
 */
PRIVATE void scan
ARGS2 (
      char *, name,
      struct struct_parts *, parts)
{
  char *after_access;
  char *p;
  /* int length = strlen (name); */

  parts->access = NULL;
  parts->host = NULL;
  parts->absolute = NULL;
  parts->relative = NULL;
  parts->anchor = NULL;

  /*
     **  Scan left-to-right for a scheme (access).
   */
  after_access = name;
  for (p = name; *p; p++)
    {
      if (*p == ':')
      {
        *p = '\0';
        parts->access = name; /* Access name has been specified */
        after_access = (p + 1);
        break;
      }
      if (*p == '/' || *p == '#' || *p == ';' || *p == '?')
      break;
    }

#ifdef NOTDEFINED
  for (p = (name + length - 1); p >= name; p--)
    {
#endif /* NOTDEFINED */
      /*
         **  Scan left-to-right for a fragment (anchor).
       */
      for (p = after_access; *p; p++)
      {
        if (*p == '#')
          {
            parts->anchor = (p + 1);
            *p = '\0';  /* terminate the rest */
          }
      }

      /*
         **  Scan left-to-right for a host or absolute path.
       */
      p = after_access;
      if (*p == '/')
      {
        if (p[1] == '/')
          {
            parts->host = (p + 2);  /* host has been specified    */
            *p = '\0';  /* Terminate access           */
            p = strchr (parts->host, '/');      /* look for end of host name if any */
            if (p != NULL)
            {
              *p = '\0';      /* Terminate host */
              parts->absolute = (p + 1);  /* Root has been found */
            }
          }
        else
          {
            parts->absolute = (p + 1);    /* Root found but no host */
          }
      }
      else
      {
        parts->relative = (*after_access) ?
          after_access : NULL;      /* NULL for "" */
      }

      /*
         **  Check schemes that commonly have unescaped hashes.
       */
      if (parts->access && parts->anchor)
      {
        if ((!parts->host && strcasecomp (parts->access, "lynxcgi")) ||
            !strcasecomp (parts->access, "nntp") ||
            !strcasecomp (parts->access, "snews") ||
            !strcasecomp (parts->access, "news") ||
            !strcasecomp (parts->access, "data"))
          {
            /* 
             *  Access specified but no host and not a lynxcgi URL, so the
             *  anchor may not really be one, e.g., news:j462#36487@foo.bar,
             *  or it's an nntp or snews URL, or news URL with a host.
             *  Restore the '#' in the address.
             */
            *(parts->anchor - 1) = '#';
            parts->anchor = NULL;
          }
      }

#ifdef NOT_DEFINED            /* search is just treated as part of path */
      {
      char *p = (relative ? relative : absolute);
      if (p != NULL)
        {
          char *q = strchr (p, '?');      /* Any search string? */
          if (q != NULL)
            {
            *q = '\0';  /* If so, chop that off. */
            parts->search = (q + 1);
            }
        }
      }
#endif /* NOT_DEFINED */
    }                   /*scan */


/*      Parse a Name relative to another name.                  HTParse()
   **   --------------------------------------
   **
   **   This returns those parts of a name which are given (and requested)
   **   substituting bits from the related name where necessary.
   **
   ** On entry,
   **   aName           A filename given
   **      relatedName     A name relative to which aName is to be parsed
   **      wanted          A mask for the bits which are wanted.
   **
   ** On exit,
   **   returns         A pointer to a malloc'd string which MUST BE FREED
 */
  PUBLIC char *HTParse ARGS3 (
                         CONST char *, aName,
                         CONST char *, relatedName,
                         int, wanted)
  {
    char *result = NULL;
    char *return_value = NULL;
    int len;
    char *name = NULL;
    char *rel = NULL;
    char *p;
    char *access;
    struct struct_parts given, related;

    if (TRACE)
        fprintf (stderr,
             "HTParse: aName:%s   relatedName:%s\n", aName, relatedName);

    /*
       **  Allocate the output string.
     */
      len = strlen (aName) + strlen (relatedName) + 10;
      result = (char *) malloc (len);     /* Lots of space: more than enough */
    if (result == NULL)
        outofmem (__FILE__, "HTParse");
      result[0] = '\0';       /* Clear string  */

    /*
       **  Make working copies of the input strings to cut up.
     */
      StrAllocCopy (name, aName);
      StrAllocCopy (rel, relatedName);

    /*
       **  Cut up the strings into URL fields.
     */
      scan (name, &given);
      scan (rel, &related);

    /*
       **  Handle the scheme (access) field.
     */
    if (given.access && given.host && !given.relative && !given.absolute)
      {
      if (!strcmp (given.access, "http") ||
          !strcmp (given.access, "https") ||
          !strcmp (given.access, "ftp"))
        /*
           **  Assume root.
         */
        given.absolute = "";
      }
    access = given.access ? given.access : related.access;
    if (wanted & PARSE_ACCESS)
      {
      if (access)
        {
          strcat (result, access);
          if (wanted & PARSE_PUNCTUATION)
            strcat (result, ":");
        }
      }

    /*
       **  If different schemes, inherit nothing.
       **
       **  We'll try complying with RFC 1808 and
       **  the Fielding draft, and inherit nothing
       **  if both schemes are given, rather than
       **  only when they differ, except for
       **  file URLs - FM
       **
       **  After trying it for a while, it's still
       **  premature, IHMO, to go along with it, so
       **  this is back to inheriting for identical
       **  schemes whether or not they are "file".
       **  If you want to try it again yourself,
       **  uncomment the strncasecomp() below. - FM
     */
    if ((given.access && related.access) &&
      (                 /* strcasecomp(given.access, "file") || */
        strcmp (given.access, related.access)))
      {
      related.host = NULL;
      related.absolute = NULL;
      related.relative = NULL;
      related.anchor = NULL;
      }

    /*
       **  Handle the host field.
     */
    if (wanted & PARSE_HOST)
      if (given.host || related.host)
      {
        char *tail = result + strlen (result);
        if (wanted & PARSE_PUNCTUATION)
          strcat (result, "//");
        strcat (result, given.host ? given.host : related.host);
#define CLEAN_URLS
#ifdef CLEAN_URLS
        /*
           **  Ignore default port numbers, and trailing dots on FQDNs,
           **  which will only cause identical addresses to look different.
         */
        {
          char *p, *h;
          p = strchr (tail, ':');
          if (p != NULL && !isdigit ((unsigned char) p[1]))
            /*
               **  Colon not followed by a port number.
             */
            *p = '\0';
          if (p != NULL && p != '\0' && access != NULL)
            {
            /*
               **  Port specified.
             */
            if ((!strcmp (access, "http") && !strcmp (p, ":80")) ||
                (!strcmp (access, "gopher") && !strcmp (p, ":70")) ||
                (!strcmp (access, "ftp") && !strcmp (p, ":21")) ||
                (!strcmp (access, "wais") && !strcmp (p, ":210")) ||
                (!strcmp (access, "nntp") && !strcmp (p, ":119")) ||
                (!strcmp (access, "news") && !strcmp (p, ":119")) ||
                (!strcmp (access, "snews") && !strcmp (p, ":563")) ||
                (!strcmp (access, "finger") && !strcmp (p, ":79")) ||
                (!strcmp (access, "cso") && !strcmp (p, ":105")))
              *p = '\0';      /* It is the default: ignore it */
            }
          if (p == NULL)
            {
            int len = strlen (tail);

            if (len > 0)
              {
                h = tail + len - 1;       /* last char of hostname */
                if (*h == '.')
                  *h = '\0';  /* chop final . */
              }
            }
          else
            {
            h = p;
            h--;        /* End of hostname */
            if (*h == '.')
              {
                /*
                   **  Slide p over h.
                 */
                while (*p != '\0')
                  *h++ = *p++;
                *h = '\0';    /* terminate */
              }
            }
        }
#endif /* CLEAN_URLS */
      }

    /*
       **  If different hosts, inherit no path.
     */
    if (given.host && related.host)
      if (strcmp (given.host, related.host) != 0)
      {
        related.absolute = NULL;
        related.relative = NULL;
        related.anchor = NULL;
      }

    /*
       **  Handle the path.
     */
    if (wanted & PARSE_PATH)
      {
      if (access && !given.absolute && given.relative)
        {
          if (!strcasecomp (access, "nntp") ||
            !strcasecomp (access, "snews") ||
            (!strcasecomp (access, "news") &&
             !strncasecomp (result, "news://", 7)))
            {
            /*
             *  Treat all given nntp or snews paths,
             *  or given paths for news URLs with a host,
             *  as absolute.
             */
            given.absolute = given.relative;
            given.relative = NULL;
            }
        }
      if (given.absolute)
        {               /* All is given */
          if (wanted & PARSE_PUNCTUATION)
            strcat (result, "/");
          strcat (result, given.absolute);
          if (TRACE)
            fprintf (stderr, "1\n");
        }
      else if (related.absolute)
        {               /* Adopt path not name */
          strcat (result, "/");
          strcat (result, related.absolute);
          if (given.relative)
            {
            p = strchr (result, '?');     /* Search part? */
            if (p == NULL)
              p = (result + strlen (result) - 1);
            for (; *p != '/'; p--)
              ;         /* last / */
            p[1] = '\0';      /* Remove filename */
            strcat (result, given.relative);    /* Add given one */
            HTSimplify (result);
            }
          if (TRACE)
            fprintf (stderr, "2\n");
        }
      else if (given.relative)
        {
          strcat (result, given.relative);      /* what we've got */
          if (TRACE)
            fprintf (stderr, "3\n");
        }
      else if (related.relative)
        {
          strcat (result, related.relative);
          if (TRACE)
            fprintf (stderr, "4\n");
        }
      else
        {               /* No inheritance */
          if (strncasecomp (aName, "lynxcgi:", 8) &&
            strncasecomp (aName, "lynxexec:", 9) &&
            strncasecomp (aName, "lynxprog:", 9))
            {
            strcat (result, "/");
            }
          if (!strcmp (result, "news:/"))
            result[5] = '*';
          if (TRACE)
            fprintf (stderr, "5\n");
        }
      }

    /*
       **  Handle the fragment (anchor).
     */
    if (wanted & PARSE_ANCHOR)
      if ((given.anchor && *given.anchor) ||
        (!given.anchor && related.anchor))
      {
        if (wanted & PARSE_PUNCTUATION)
          strcat (result, "#");
        strcat (result, (given.anchor) ?
              given.anchor : related.anchor);
      }
    if (TRACE)
      fprintf (stderr, "HTParse: result:%s\n", result);
    FREE (rel);
    FREE (name);

    StrAllocCopy (return_value, result);
    FREE (result);

    return return_value;      /* exactly the right length */
  }

/*      Simplify a filename.                            HTSimplify()
   **   --------------------
   **
   **  A unix-style file is allowed to contain the seqeunce xxx/../ which may
   **  be replaced by "" , and the seqeunce "/./" which may be replaced by "/".
   **  Simplification helps us recognize duplicate filenames.
   **
   **   Thus,   /etc/junk/../fred       becomes /etc/fred
   **           /etc/junk/./fred        becomes /etc/junk/fred
   **
   **      but we should NOT change
   **           http://fred.xxx.edu/../..
   **
   **   or      ../../albert.html
 */
  PUBLIC void HTSimplify ARGS1 (
                         char *, filename)
  {
    char *p;
    char *q, *q1;

    if (filename == NULL)
        return;

    if ((filename[0] && filename[1]) && strchr (filename, '/') != NULL)
      {
      for (p = (filename + 2); *p; p++)
        {
          if (*p == '/')
            {
            if ((p[1] == '.') && (p[2] == '.') &&
                (p[3] == '/' || p[3] == '\0'))
              {
                /*
                   **  Handle "/../" or "/..".
                 */
                for (q = (p - 1); (q >= filename) && (*q != '/'); q--)
                  /*
                     **  Back up to previous slash or beginning of string.
                   */
                  ;
                if ((q[0] == '/') && strncmp (q, "/../", 4) &&
                  !((q - 1) > filename && q[-1] == '/'))
                  {
                  /*
                     **  Not at beginning of string or in a
                     **  host field, so remove the "/xxx/..".
                   */
                  q1 = (p + 3);
                  p = q;
                  while (*q1 != '\0')
                    *p++ = *q1++;
                  *p = '\0';  /* terminate */
#ifdef NOTDEFINED
                  /*
                     **  Make sure filename has at least one slash.
                   */
                  if (*filename == '\0')
                    {
                      *filename = '/';
                      *(filename + 1) = '\0';
                    }
#endif                        /* NOTDEFINED */
                  /*
                     **  Start again with previous slash.
                   */
                  p = (q - 1);
                  }
              }
            else if (p[1] == '.' && p[2] == '/')
              {
                /*
                   **  Handle "./" by removing the characters.
                 */
                q = p;
                q1 = (p + 2);
                while (*q1 != '\0')
                  *q++ = *q1++;
                *q = '\0';    /* terminate */
                p--;
              }
            else if (p[1] == '.' && p[2] == '\0')
              {
                /*
                   **  Handle terminal "." by removing the character.
                 */
                p[1] = '\0';
              }
            }
        }
      }
  }

/*      Make Relative Name.                                     HTRelative()
   **   -------------------
   **
   ** This function creates and returns a string which gives an expression of
   ** one address as related to another. Where there is no relation, an absolute
   ** address is retured.
   **
   **  On entry,
   **   Both names must be absolute, fully qualified names of nodes
   **   (no anchor bits)
   **
   **  On exit,
   **   The return result points to a newly allocated name which, if
   **   parsed by HTParse relative to relatedName, will yield aName.
   **   The caller is responsible for freeing the resulting name later.
   **
 */
  PUBLIC char *HTRelative ARGS2 (
                          CONST char *, aName,
                          CONST char *, relatedName)
  {
    char *result = NULL;
    CONST char *p = aName;
    CONST char *q = relatedName;
    CONST char *after_access = NULL;
    CONST char *path = NULL;
    CONST char *last_slash = NULL;
    int slashes = 0;

    for (; *p; p++, q++)
      {                       /* Find extent of match */
      if (*p != *q)
        break;
      if (*p == ':')
        after_access = p + 1;
      if (*p == '/')
        {
          last_slash = p;
          slashes++;
          if (slashes == 3)
            path = p;
        }
      }

    /* q, p point to the first non-matching character or zero */

    if (!after_access)
      {                       /* Different access */
      StrAllocCopy (result, aName);
      }
    else if (slashes < 3)
      {                       /* Different nodes */
      StrAllocCopy (result, after_access);
      }
    else if (slashes == 3)
      {                       /* Same node, different path */
      StrAllocCopy (result, path);
      }
    else
      {                       /* Some path in common */
      int levels = 0;
      for (; *q && (*q != '#'); q++)
        if (*q == '/')
          levels++;
      result = (char *) malloc (3 * levels + strlen (last_slash) + 1);
      if (result == NULL)
        outofmem (__FILE__, "HTRelative");
      result[0] = '\0';
      for (; levels; levels--)
        strcat (result, "../");
      strcat (result, last_slash + 1);
      }
    if (TRACE)
      fprintf (stderr, "HT: `%s' expressed relative to\n    `%s' is\n   `%s'.",
             aName, relatedName, result);
    return result;
  }

Generated by  Doxygen 1.6.0   Back to index