[svn] Minor fixes prompted by `lint'.

[wget] / src / utils.c
diff --git a/src/utils.c b/src/utils.c

index a8e30b9fa5d61dde680623715baf4b1c32a2a82e..32c17b4bfd9026e4b8fb8240c62bb580edf0cfc4 100644 (file)
--- a/src/utils.c
+++ b/src/utils.c
@@ -1,20 +1,21 @@
  /* Various functions of utilitarian nature.
-   Copyright (C) 1995, 1996, 1997, 1998, 2000 Free Software Foundation, Inc.
+   Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001
+   Free Software Foundation, Inc.
  
-This file is part of Wget.
+This file is part of GNU Wget.
  
-This program is free software; you can redistribute it and/or modify
+GNU Wget is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; either version 2 of the License, or
  (at your option) any later version.
  
-This program is distributed in the hope that it will be useful,
+GNU Wget is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
  
  You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
+along with Wget; if not, write to the Free Software
  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
  
  #include <config.h>
@@ -50,6 +51,14 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
  #include <fcntl.h>
  #include <assert.h>
  
+/* For TIOCGWINSZ and friends: */
+#ifdef HAVE_SYS_IOCTL_H
+# include <sys/ioctl.h>
+#endif
+#ifdef HAVE_TERMIOS_H
+# include <termios.h>
+#endif
+
  #include "wget.h"
  #include "utils.h"
  #include "fnmatch.h"
@@ -303,6 +312,31 @@ xstrdup_debug (const char *s, const char *source_file, int source_line)
  
  #endif /* DEBUG_MALLOC */
  \f
+/* Utility function: like xstrdup(), but also lowercases S.  */
+
+char *
+xstrdup_lower (const char *s)
+{
+  char *copy = xstrdup (s);
+  char *p = copy;
+  for (; *p; p++)
+    *p = TOLOWER (*p);
+  return copy;
+}
+
+/* Return a count of how many times CHR occurs in STRING. */
+
+int
+count_char (const char *string, char chr)
+{
+  const char *p;
+  int count = 0;
+  for (p = string; *p; p++)
+    if (*p == chr)
+      ++count;
+  return count;
+}
+
  /* Copy the string formed by two pointers (one on the beginning, other
     on the char after the last char) to a new, malloc-ed location.
     0-terminate it.  */
@@ -403,59 +437,11 @@ datetime_str (time_t *tm)
            ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
    return output;
  }
-
-/* Returns an error message for ERRNUM.  #### This requires more work.
-   This function, as well as the whole error system, is very
-   ill-conceived.  */
-const char *
-uerrmsg (uerr_t errnum)
-{
-  switch (errnum)
-    {
-    case URLUNKNOWN:
-      return _("Unknown/unsupported protocol");
-      break;
-    case URLBADPORT:
-      return _("Invalid port specification");
-      break;
-    case URLBADHOST:
-      return _("Invalid host name");
-      break;
-    default:
-      abort ();
-      /* $@#@#$ compiler.  */
-      return NULL;
-    }
-}
  \f
  /* The Windows versions of the following two functions are defined in
     mswindows.c.  */
  
-/* A cuserid() immitation using getpwuid(), to avoid hassling with
-   utmp.  Besides, not all systems have cuesrid().  Under Windows, it
-   is defined in mswindows.c.
-
-   If WHERE is non-NULL, the username will be stored there.
-   Otherwise, it will be returned as a static buffer (as returned by
-   getpwuid()).  In the latter case, the buffer should be copied
-   before calling getpwuid() or pwd_cuserid() again.  */
  #ifndef WINDOWS
-char *
-pwd_cuserid (char *where)
-{
-  struct passwd *pwd;
-
-  if (!(pwd = getpwuid (getuid ())) || !pwd->pw_name)
-    return NULL;
-  if (where)
-    {
-      strcpy (where, pwd->pw_name);
-      return where;
-    }
-  else
-    return pwd->pw_name;
-}
-
  void
  fork_to_background (void)
  {
@@ -487,6 +473,17 @@ fork_to_background (void)
  }
  #endif /* not WINDOWS */
  \f
+#if 0
+/* debug */
+char *
+ps (char *orig)
+{
+  char *r = xstrdup (orig);
+  path_simplify (r);
+  return r;
+}
+#endif
+
  /* Canonicalize PATH, and return a new path.  The new path differs from PATH
     in that:
         Multple `/'s are collapsed to a single `/'.
@@ -495,53 +492,43 @@ fork_to_background (void)
         Non-leading `../'s and trailing `..'s are handled by removing
         portions of the path.
  
-   E.g. "a/b/c/./../d/.." will yield "a/b".  This function originates
-   from GNU Bash.
+   E.g. "a/b/c/./../d/.." will yield "a/b/".  This function originates
+   from GNU Bash and has been mutilated to unrecognition for use in
+   Wget.
  
     Changes for Wget:
         Always use '/' as stub_char.
         Don't check for local things using canon_stat.
         Change the original string instead of strdup-ing.
-       React correctly when beginning with `./' and `../'.  */
-void
+       React correctly when beginning with `./' and `../'.
+       Don't zip out trailing slashes.
+       Return a value indicating whether any modifications took place.
+
+   If you dare change this function, take a careful look at the test
+   cases below, and make sure that they pass.  */
+
+int
  path_simplify (char *path)
  {
-  register int i, start, ddot;
-  char stub_char;
+  register int i, start;
+  int changes = 0;
  
    if (!*path)
-    return;
-
-  /*stub_char = (*path == '/') ? '/' : '.';*/
-  stub_char = '/';
+    return 0;
  
-  /* Addition: Remove all `./'-s preceding the string.  If `../'-s
-     precede, put `/' in front and remove them too.  */
-  i = 0;
-  ddot = 0;
-  while (1)
-    {
-      if (path[i] == '.' && path[i + 1] == '/')
-       i += 2;
-      else if (path[i] == '.' && path[i + 1] == '.' && path[i + 2] == '/')
-       {
-         i += 3;
-         ddot = 1;
-       }
-      else
-       break;
-    }
-  if (i)
-    strcpy (path, path + i - ddot);
+  if (path[0] == '/')
+    /* Preserve initial '/'. */
+    ++path;
  
-  /* Replace single `.' or `..' with `/'.  */
+  /* Nix out leading `.' or `..' with.  */
    if ((path[0] == '.' && path[1] == '\0')
        || (path[0] == '.' && path[1] == '.' && path[2] == '\0'))
      {
-      path[0] = stub_char;
-      path[1] = '\0';
-      return;
+      path[0] = '\0';
+      changes = 1;
+      return changes;
      }
+
    /* Walk along PATH looking for things to compact.  */
    i = 0;
    while (1)
@@ -566,14 +553,7 @@ path_simplify (char *path)
         {
           strcpy (path + start + 1, path + i);
           i = start + 1;
-       }
-
-      /* Check for trailing `/'.  */
-      if (start && !path[i])
-       {
-       zero_last:
-         path[--i] = '\0';
-         break;
+         changes = 1;
         }
  
        /* Check for `../', `./' or trailing `.' by itself.  */
@@ -581,13 +561,18 @@ path_simplify (char *path)
         {
           /* Handle trailing `.' by itself.  */
           if (!path[i + 1])
-           goto zero_last;
+           {
+             path[--i] = '\0';
+             changes = 1;
+             break;
+           }
  
           /* Handle `./'.  */
           if (path[i + 1] == '/')
             {
               strcpy (path + i, path + i + 1);
               i = (start < 0) ? 0 : start;
+             changes = 1;
               continue;
             }
  
@@ -596,19 +581,55 @@ path_simplify (char *path)
               (path[i + 2] == '/' || !path[i + 2]))
             {
               while (--start > -1 && path[start] != '/');
-             strcpy (path + start + 1, path + i + 2);
+             strcpy (path + start + 1, path + i + 2 + (start == -1 && path[i + 2]));
               i = (start < 0) ? 0 : start;
+             changes = 1;
               continue;
             }
         }       /* path == '.' */
      } /* while */
  
-  if (!*path)
+  /* Addition: Remove all `./'-s and `../'-s preceding the string.  */
+  i = 0;
+  while (1)
+    {
+      if (path[i] == '.' && path[i + 1] == '/')
+       i += 2;
+      else if (path[i] == '.' && path[i + 1] == '.' && path[i + 2] == '/')
+       i += 3;
+      else
+       break;
+    }
+  if (i)
      {
-      *path = stub_char;
-      path[1] = '\0';
+      strcpy (path, path + i - 0);
+      changes = 1;
      }
+
+  return changes;
  }
+
+/* Test cases:
+   ps("")                   -> ""
+   ps("/")                  -> "/"
+   ps(".")                  -> ""
+   ps("..")                 -> ""
+   ps("/.")                 -> "/"
+   ps("/..")                -> "/"
+   ps("foo")                -> "foo"
+   ps("foo/bar")            -> "foo/bar"
+   ps("foo//bar")           -> "foo/bar"             (possibly a bug)
+   ps("foo/../bar")         -> "bar"
+   ps("foo/bar/..")         -> "foo/"
+   ps("foo/bar/../x")       -> "foo/x"
+   ps("foo/bar/../x/")      -> "foo/x/"
+   ps("foo/..")             -> ""
+   ps("/foo/..")            -> "/"
+   ps("a/b/../../c")        -> "c"
+   ps("/a/b/../../c")       -> "/c"
+   ps("./a/../b")           -> "b"
+   ps("/./a/../b")          -> "/b"
+*/
  \f
  /* "Touch" FILE, i.e. make its atime and mtime equal to the time
     specified with TM.  */
@@ -751,6 +772,32 @@ make_directory (const char *directory)
      }
    return 0;
  }
+
+/* Merge BASE with FILE.  BASE can be a directory or a file name, FILE
+   should be a file name.
+
+   file_merge("/foo/bar", "baz")  => "/foo/baz"
+   file_merge("/foo/bar/", "baz") => "/foo/bar/baz"
+   file_merge("foo", "bar")       => "bar"
+
+   In other words, it's a simpler and gentler version of uri_merge_1.  */
+
+char *
+file_merge (const char *base, const char *file)
+{
+  char *result;
+  const char *cut = (const char *)strrchr (base, '/');
+
+  if (!cut)
+    return xstrdup (file);
+
+  result = (char *)xmalloc (cut - base + 1 + strlen (file) + 1);
+  memcpy (result, base, cut - base);
+  result[cut - base] = '/';
+  strcpy (result + (cut - base) + 1, file);
+
+  return result;
+}
  \f
  static int in_acclist PARAMS ((const char *const *, const char *, int));
  
@@ -839,8 +886,8 @@ accdir (const char *directory, enum accd flags)
     match_backwards ("abc", "bc") -> 1
     match_backwards ("abc", "ab") -> 0
     match_backwards ("abc", "abc") -> 1 */
-static int
-match_backwards (const char *string, const char *pattern)
+int
+match_tail (const char *string, const char *pattern)
  {
    int i, j;
  
@@ -855,7 +902,7 @@ match_backwards (const char *string, const char *pattern)
  }
  
  /* Checks whether string S matches each element of ACCEPTS.  A list
-   element are matched either with fnmatch() or match_backwards(),
+   element are matched either with fnmatch() or match_tail(),
     according to whether the element contains wildcards or not.
  
     If the BACKWARD is 0, don't do backward comparison -- just compare
@@ -876,7 +923,7 @@ in_acclist (const char *const *accepts, const char *s, int backward)
         {
           if (backward)
             {
-             if (match_backwards (s, *accepts))
+             if (match_tail (s, *accepts))
                 return 1;
             }
           else
@@ -889,7 +936,7 @@ in_acclist (const char *const *accepts, const char *s, int backward)
    return 0;
  }
  
-/* Return the malloc-ed suffix of STR.  For instance:
+/* Return the location of STR's suffix (file extension).  Examples:
     suffix ("foo.bar")       -> "bar"
     suffix ("foo.bar.baz")   -> "baz"
     suffix ("/foo/bar")      -> NULL
@@ -899,22 +946,28 @@ suffix (const char *str)
  {
    int i;
  
-  for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--);
+  for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--)
+    ;
+
    if (str[i++] == '.')
-    return xstrdup (str + i);
+    return (char *)str + i;
    else
      return NULL;
  }
  
  /* Read a line from FP.  The function reallocs the storage as needed
     to accomodate for any length of the line.  Reallocs are done
-   storage exponentially, doubling the storage after each overflow to
-   minimize the number of calls to realloc() and fgets().  The newline
+   exponentially, doubling the storage after each overflow to minimize
+   the number of calls to realloc() and fgets().  The newline
     character at the end of line is retained.
  
     After end-of-file is encountered without anything being read, NULL
     is returned.  NULL is also returned on error.  To distinguish
-   between these two cases, use the stdio function ferror().  */
+   between these two cases, use the stdio function ferror().
+
+   A future version of this function will be rewritten to use fread()
+   instead of fgets(), and to return the length of the line, which
+   will make the function usable on files with binary content.  */
  
  char *
  read_whole_line (FILE *fp)
@@ -926,9 +979,14 @@ read_whole_line (FILE *fp)
    while (fgets (line + length, bufsize - length, fp))
      {
        length += strlen (line + length);
-      assert (length > 0);
+      if (length == 0)
+       /* Possible for example when reading from a binary file where
+          a line begins with \0.  */
+       continue;
+
        if (line[length - 1] == '\n')
         break;
+
        /* fgets() guarantees to read the whole line, or to use up the
           space we've given it.  We can double the buffer
           unconditionally.  */
@@ -1230,7 +1288,7 @@ string_set_add (struct hash_table *ht, const char *s)
    /* First check whether the set element already exists.  If it does,
       do nothing so that we don't have to free() the old element and
       then strdup() a new one.  */
-  if (hash_table_exists (ht, s))
+  if (hash_table_contains (ht, s))
      return;
  
    /* We use "1" as value.  It provides us a useful and clear arbitrary
@@ -1240,12 +1298,12 @@ string_set_add (struct hash_table *ht, const char *s)
    hash_table_put (ht, xstrdup (s), "1");
  }
  
-/* Synonym for hash_table_exists... */
+/* Synonym for hash_table_contains... */
  
  int
-string_set_exists (struct hash_table *ht, const char *s)
+string_set_contains (struct hash_table *ht, const char *s)
  {
-  return hash_table_exists (ht, s);
+  return hash_table_contains (ht, s);
  }
  
  static int
@@ -1340,7 +1398,7 @@ legible (long l)
     bytes are sufficient.  Using more might be a good idea.
  
     This function does not go through the hoops that long_to_string
-   goes to because it doesn't need to be fast.  (It's called perhaps
+   goes to because it doesn't aspire to be fast.  (It's called perhaps
     once in a Wget run.)  */
  
  static void
@@ -1383,73 +1441,40 @@ numdigit (long a)
  {
    int res = 1;
    if (a < 0)
-    a = -a;
+    {
+      a = -a;
+      ++res;
+    }
    while ((a /= 10) != 0)
      ++res;
    return res;
  }
  
  #define ONE_DIGIT(figure) *p++ = n / (figure) + '0'
-
-#define DIGITS_1(figure)                       \
-    ONE_DIGIT (figure)
-#define DIGITS_2(figure)                       \
-    (ONE_DIGIT (figure), n %= (figure));       \
-    DIGITS_1 ((figure) / 10)
-#define DIGITS_3(figure)                       \
-    (ONE_DIGIT (figure), n %= (figure));       \
-    DIGITS_2 ((figure) / 10)
-#define DIGITS_4(figure)                       \
-    (ONE_DIGIT (figure), n %= (figure));       \
-    DIGITS_3 ((figure) / 10)
-#define DIGITS_5(figure)                       \
-    (ONE_DIGIT (figure), n %= (figure));       \
-    DIGITS_4 ((figure) / 10)
-#define DIGITS_6(figure)                       \
-    (ONE_DIGIT (figure), n %= (figure));       \
-    DIGITS_5 ((figure) / 10)
-#define DIGITS_7(figure)                       \
-    (ONE_DIGIT (figure), n %= (figure));       \
-    DIGITS_6 ((figure) / 10)
-#define DIGITS_8(figure)                       \
-    (ONE_DIGIT (figure), n %= (figure));       \
-    DIGITS_7 ((figure) / 10)
-#define DIGITS_9(figure)                       \
-    (ONE_DIGIT (figure), n %= (figure));       \
-    DIGITS_8 ((figure) / 10)
-#define DIGITS_10(figure)                      \
-    (ONE_DIGIT (figure), n %= (figure));       \
-    DIGITS_9 ((figure) / 10)
-
-/* DIGITS_<11-20> are only used on 64-bit machines. */
-
-#define DIGITS_11(figure)                      \
-    (ONE_DIGIT (figure), n %= (figure));       \
-    DIGITS_10 ((figure) / 10)
-#define DIGITS_12(figure)                      \
-    (ONE_DIGIT (figure), n %= (figure));       \
-    DIGITS_11 ((figure) / 10)
-#define DIGITS_13(figure)                      \
-    (ONE_DIGIT (figure), n %= (figure));       \
-    DIGITS_12 ((figure) / 10)
-#define DIGITS_14(figure)                      \
-    (ONE_DIGIT (figure), n %= (figure));       \
-    DIGITS_13 ((figure) / 10)
-#define DIGITS_15(figure)                      \
-    (ONE_DIGIT (figure), n %= (figure));       \
-    DIGITS_14 ((figure) / 10)
-#define DIGITS_16(figure)                      \
-    (ONE_DIGIT (figure), n %= (figure));       \
-    DIGITS_15 ((figure) / 10)
-#define DIGITS_17(figure)                      \
-    (ONE_DIGIT (figure), n %= (figure));       \
-    DIGITS_16 ((figure) / 10)
-#define DIGITS_18(figure)                      \
-    (ONE_DIGIT (figure), n %= (figure));       \
-    DIGITS_17 ((figure) / 10)
-#define DIGITS_19(figure)                      \
-    (ONE_DIGIT (figure), n %= (figure));       \
-    DIGITS_18 ((figure) / 10)
+#define ONE_DIGIT_ADVANCE(figure) (ONE_DIGIT (figure), n %= (figure))
+
+#define DIGITS_1(figure) ONE_DIGIT (figure)
+#define DIGITS_2(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_1 ((figure) / 10)
+#define DIGITS_3(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_2 ((figure) / 10)
+#define DIGITS_4(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_3 ((figure) / 10)
+#define DIGITS_5(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_4 ((figure) / 10)
+#define DIGITS_6(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_5 ((figure) / 10)
+#define DIGITS_7(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_6 ((figure) / 10)
+#define DIGITS_8(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_7 ((figure) / 10)
+#define DIGITS_9(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_8 ((figure) / 10)
+#define DIGITS_10(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_9 ((figure) / 10)
+
+/* DIGITS_<11-20> are only used on machines with 64-bit longs. */
+
+#define DIGITS_11(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_10 ((figure) / 10)
+#define DIGITS_12(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_11 ((figure) / 10)
+#define DIGITS_13(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_12 ((figure) / 10)
+#define DIGITS_14(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_13 ((figure) / 10)
+#define DIGITS_15(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_14 ((figure) / 10)
+#define DIGITS_16(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_15 ((figure) / 10)
+#define DIGITS_17(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_16 ((figure) / 10)
+#define DIGITS_18(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_17 ((figure) / 10)
+#define DIGITS_19(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_18 ((figure) / 10)
  
  /* Print NUMBER to BUFFER in base 10.  This is completely equivalent
     to `sprintf(buffer, "%ld", number)', only much faster.
@@ -1460,9 +1485,9 @@ numdigit (long a)
     slow compared to this function.
  
     BUFFER should accept as many bytes as you expect the number to take
-   up.  On 64-bit machines, the maximum needed size is 24 bytes.  That
-   includes all the digits, as well as the `-' sign for negative
-   numbers and the trailing \0.  */
+   up.  On machines with 64-bit longs the maximum needed size is 24
+   bytes.  That includes the worst-case digits, the optional `-' sign,
+   and the trailing \0.  */
  
  void
  long_to_string (char *buffer, long number)
@@ -1512,6 +1537,8 @@ long_to_string (char *buffer, long number)
  }
  
  #undef ONE_DIGIT
+#undef ONE_DIGIT_ADVANCE
+
  #undef DIGITS_1
  #undef DIGITS_2
  #undef DIGITS_3
@@ -1574,7 +1601,7 @@ struct wget_timer {
  };
  
  /* Allocate a timer.  It is not legal to do anything with a freshly
-   allocated timer, except call wtimer_reset().  */
+   allocated timer, except call wtimer_reset() or wtimer_delete().  */
  
  struct wget_timer *
  wtimer_allocate (void)
@@ -1760,3 +1787,62 @@ html_quote_string (const char *s)
    *p = '\0';
    return res;
  }
+
+/* Determine the width of the terminal we're running on.  If that's
+   not possible, return 0.  */
+
+int
+determine_screen_width (void)
+{
+  /* If there's a way to get the terminal size using POSIX
+     tcgetattr(), somebody please tell me.  */
+#ifndef TIOCGWINSZ
+  return 0;
+#else  /* TIOCGWINSZ */
+  int fd;
+  struct winsize wsz;
+
+  if (opt.lfilename != NULL)
+    return 0;
+
+  fd = fileno (stderr);
+  if (ioctl (fd, TIOCGWINSZ, &wsz) < 0)
+    return 0;                  /* most likely ENOTTY */
+
+  return wsz.ws_col;
+#endif /* TIOCGWINSZ */
+}
+
+#if 0
+/* A debugging function for checking whether an MD5 library works. */
+
+#include "gen-md5.h"
+
+char *
+debug_test_md5 (char *buf)
+{
+  unsigned char raw[16];
+  static char res[33];
+  unsigned char *p1;
+  char *p2;
+  int cnt;
+  ALLOCA_MD5_CONTEXT (ctx);
+
+  gen_md5_init (ctx);
+  gen_md5_update ((unsigned char *)buf, strlen (buf), ctx);
+  gen_md5_finish (ctx, raw);
+
+  p1 = raw;
+  p2 = res;
+  cnt = 16;
+  while (cnt--)
+    {
+      *p2++ = XDIGIT_TO_xchar (*p1 >> 4);
+      *p2++ = XDIGIT_TO_xchar (*p1 & 0xf);
+      ++p1;
+    }
+  *p2 = '\0';
+
+  return res;
+}
+#endif