sjero.net Git - wget/blob - src/utils.c

   1 /* Various functions of utilitarian nature.
   2    Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GNU Wget.
   6
   7 GNU Wget is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2 of the License, or
  10 (at your option) any later version.
  11
  12 GNU Wget is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with Wget; if not, write to the Free Software
  19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
  20
  21 #include <config.h>
  22
  23 #include <stdio.h>
  24 #include <stdlib.h>
  25 #ifdef HAVE_STRING_H
  26 # include <string.h>
  27 #else  /* not HAVE_STRING_H */
  28 # include <strings.h>
  29 #endif /* not HAVE_STRING_H */
  30 #include <sys/types.h>
  31 #ifdef HAVE_UNISTD_H
  32 # include <unistd.h>
  33 #endif
  34 #ifdef HAVE_MMAP
  35 # include <sys/mman.h>
  36 #endif
  37 #ifdef HAVE_PWD_H
  38 # include <pwd.h>
  39 #endif
  40 #include <limits.h>
  41 #ifdef HAVE_UTIME_H
  42 # include <utime.h>
  43 #endif
  44 #ifdef HAVE_SYS_UTIME_H
  45 # include <sys/utime.h>
  46 #endif
  47 #include <errno.h>
  48 #ifdef NeXT
  49 # include <libc.h>              /* for access() */
  50 #endif
  51 #include <fcntl.h>
  52 #include <assert.h>
  53 #ifdef HAVE_SYS_IOCTL_H
  54 # include <sys/ioctl.h>
  55 #endif
  56
  57 #include "wget.h"
  58 #include "utils.h"
  59 #include "fnmatch.h"
  60 #include "hash.h"
  61
  62 #ifndef errno
  63 extern int errno;
  64 #endif
  65
  66 /* This section implements several wrappers around the basic
  67    allocation routines.  This is done for two reasons: first, so that
  68    the callers of these functions need not consistently check for
  69    errors.  If there is not enough virtual memory for running Wget,
  70    something is seriously wrong, and Wget exits with an appropriate
  71    error message.
  72
  73    The second reason why these are useful is that, if DEBUG_MALLOC is
  74    defined, they also provide a handy (if crude) malloc debugging
  75    interface that checks memory leaks.  */
  76
  77 /* Croak the fatal memory error and bail out with non-zero exit
  78    status.  */
  79 static void
  80 memfatal (const char *what)
  81 {
  82   /* HACK: expose save_log_p from log.c, so we can turn it off in
  83      order to prevent saving the log.  Saving the log is dangerous
  84      because logprintf() and logputs() can call malloc(), so this
  85      could infloop.  When logging is turned off, infloop can no longer
  86      happen.
  87
  88      #### This is no longer really necessary because the new routines
  89      in log.c cons only if the line exceeds eighty characters.  But
  90      this can come at the end of a line, so it's OK to be careful.
  91
  92      On a more serious note, it would be good to have a
  93      log_forced_shutdown() routine that exposes this cleanly.  */
  94   extern int save_log_p;
  95
  96   save_log_p = 0;
  97   logprintf (LOG_ALWAYS, _("%s: %s: Not enough memory.\n"), exec_name, what);
  98   exit (1);
  99 }
 100
 101 /* These functions end with _real because they need to be
 102    distinguished from the debugging functions, and from the macros.
 103    Explanation follows:
 104
 105    If memory debugging is not turned on, wget.h defines these:
 106
 107      #define xmalloc xmalloc_real
 108      #define xrealloc xrealloc_real
 109      #define xstrdup xstrdup_real
 110      #define xfree free
 111
 112    In case of memory debugging, the definitions are a bit more
 113    complex, because we want to provide more information, *and* we want
 114    to call the debugging code.  (The former is the reason why xmalloc
 115    and friends need to be macros in the first place.)  Then it looks
 116    like this:
 117
 118      #define xmalloc(a) xmalloc_debug (a, __FILE__, __LINE__)
 119      #define xfree(a)   xfree_debug (a, __FILE__, __LINE__)
 120      #define xrealloc(a, b) xrealloc_debug (a, b, __FILE__, __LINE__)
 121      #define xstrdup(a) xstrdup_debug (a, __FILE__, __LINE__)
 122
 123    Each of the *_debug function does its magic and calls the real one.  */
 124
 125 #ifdef DEBUG_MALLOC
 126 # define STATIC_IF_DEBUG static
 127 #else
 128 # define STATIC_IF_DEBUG
 129 #endif
 130
 131 STATIC_IF_DEBUG void *
 132 xmalloc_real (size_t size)
 133 {
 134   void *ptr = malloc (size);
 135   if (!ptr)
 136     memfatal ("malloc");
 137   return ptr;
 138 }
 139
 140 STATIC_IF_DEBUG void *
 141 xrealloc_real (void *ptr, size_t newsize)
 142 {
 143   void *newptr;
 144
 145   /* Not all Un*xes have the feature of realloc() that calling it with
 146      a NULL-pointer is the same as malloc(), but it is easy to
 147      simulate.  */
 148   if (ptr)
 149     newptr = realloc (ptr, newsize);
 150   else
 151     newptr = malloc (newsize);
 152   if (!newptr)
 153     memfatal ("realloc");
 154   return newptr;
 155 }
 156
 157 STATIC_IF_DEBUG char *
 158 xstrdup_real (const char *s)
 159 {
 160   char *copy;
 161
 162 #ifndef HAVE_STRDUP
 163   int l = strlen (s);
 164   copy = malloc (l + 1);
 165   if (!copy)
 166     memfatal ("strdup");
 167   memcpy (copy, s, l + 1);
 168 #else  /* HAVE_STRDUP */
 169   copy = strdup (s);
 170   if (!copy)
 171     memfatal ("strdup");
 172 #endif /* HAVE_STRDUP */
 173
 174   return copy;
 175 }
 176
 177 #ifdef DEBUG_MALLOC
 178
 179 /* Crude home-grown routines for debugging some malloc-related
 180    problems.  Featured:
 181
 182    * Counting the number of malloc and free invocations, and reporting
 183      the "balance", i.e. how many times more malloc was called than it
 184      was the case with free.
 185
 186    * Making malloc store its entry into a simple array and free remove
 187      stuff from that array.  At the end, print the pointers which have
 188      not been freed, along with the source file and the line number.
 189      This also has the side-effect of detecting freeing memory that
 190      was never allocated.
 191
 192    Note that this kind of memory leak checking strongly depends on
 193    every malloc() being followed by a free(), even if the program is
 194    about to finish.  Wget is careful to free the data structure it
 195    allocated in init.c.  */
 196
 197 static int malloc_count, free_count;
 198
 199 static struct {
 200   char *ptr;
 201   const char *file;
 202   int line;
 203 } malloc_debug[100000];
 204
 205 /* Both register_ptr and unregister_ptr take O(n) operations to run,
 206    which can be a real problem.  It would be nice to use a hash table
 207    for malloc_debug, but the functions in hash.c are not suitable
 208    because they can call malloc() themselves.  Maybe it would work if
 209    the hash table were preallocated to a huge size, and if we set the
 210    rehash threshold to 1.0.  */
 211
 212 /* Register PTR in malloc_debug.  Abort if this is not possible
 213    (presumably due to the number of current allocations exceeding the
 214    size of malloc_debug.)  */
 215
 216 static void
 217 register_ptr (void *ptr, const char *file, int line)
 218 {
 219   int i;
 220   for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
 221     if (malloc_debug[i].ptr == NULL)
 222       {
 223         malloc_debug[i].ptr = ptr;
 224         malloc_debug[i].file = file;
 225         malloc_debug[i].line = line;
 226         return;
 227       }
 228   abort ();
 229 }
 230
 231 /* Unregister PTR from malloc_debug.  Abort if PTR is not present in
 232    malloc_debug.  (This catches calling free() with a bogus pointer.)  */
 233
 234 static void
 235 unregister_ptr (void *ptr)
 236 {
 237   int i;
 238   for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
 239     if (malloc_debug[i].ptr == ptr)
 240       {
 241         malloc_debug[i].ptr = NULL;
 242         return;
 243       }
 244   abort ();
 245 }
 246
 247 /* Print the malloc debug stats that can be gathered from the above
 248    information.  Currently this is the count of mallocs, frees, the
 249    difference between the two, and the dump of the contents of
 250    malloc_debug.  The last part are the memory leaks.  */
 251
 252 void
 253 print_malloc_debug_stats (void)
 254 {
 255   int i;
 256   printf ("\nMalloc:  %d\nFree:    %d\nBalance: %d\n\n",
 257           malloc_count, free_count, malloc_count - free_count);
 258   for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
 259     if (malloc_debug[i].ptr != NULL)
 260       printf ("0x%08ld: %s:%d\n", (long)malloc_debug[i].ptr,
 261               malloc_debug[i].file, malloc_debug[i].line);
 262 }
 263
 264 void *
 265 xmalloc_debug (size_t size, const char *source_file, int source_line)
 266 {
 267   void *ptr = xmalloc_real (size);
 268   ++malloc_count;
 269   register_ptr (ptr, source_file, source_line);
 270   return ptr;
 271 }
 272
 273 void
 274 xfree_debug (void *ptr, const char *source_file, int source_line)
 275 {
 276   assert (ptr != NULL);
 277   ++free_count;
 278   unregister_ptr (ptr);
 279   free (ptr);
 280 }
 281
 282 void *
 283 xrealloc_debug (void *ptr, size_t newsize, const char *source_file, int source_line)
 284 {
 285   void *newptr = xrealloc_real (ptr, newsize);
 286   if (!ptr)
 287     {
 288       ++malloc_count;
 289       register_ptr (newptr, source_file, source_line);
 290     }
 291   else if (newptr != ptr)
 292     {
 293       unregister_ptr (ptr);
 294       register_ptr (newptr, source_file, source_line);
 295     }
 296   return newptr;
 297 }
 298
 299 char *
 300 xstrdup_debug (const char *s, const char *source_file, int source_line)
 301 {
 302   char *copy = xstrdup_real (s);
 303   ++malloc_count;
 304   register_ptr (copy, source_file, source_line);
 305   return copy;
 306 }
 307
 308 #endif /* DEBUG_MALLOC */
 309 \f
 310 /* Utility function: like xstrdup(), but also lowercases S.  */
 311
 312 char *
 313 xstrdup_lower (const char *s)
 314 {
 315   char *copy = xstrdup (s);
 316   char *p = copy;
 317   for (; *p; p++)
 318     *p = TOLOWER (*p);
 319   return copy;
 320 }
 321
 322 /* Return a count of how many times CHR occurs in STRING. */
 323
 324 int
 325 count_char (const char *string, char chr)
 326 {
 327   const char *p;
 328   int count = 0;
 329   for (p = string; *p; p++)
 330     if (*p == chr)
 331       ++count;
 332   return count;
 333 }
 334
 335 /* Copy the string formed by two pointers (one on the beginning, other
 336    on the char after the last char) to a new, malloc-ed location.
 337    0-terminate it.  */
 338 char *
 339 strdupdelim (const char *beg, const char *end)
 340 {
 341   char *res = (char *)xmalloc (end - beg + 1);
 342   memcpy (res, beg, end - beg);
 343   res[end - beg] = '\0';
 344   return res;
 345 }
 346
 347 /* Parse a string containing comma-separated elements, and return a
 348    vector of char pointers with the elements.  Spaces following the
 349    commas are ignored.  */
 350 char **
 351 sepstring (const char *s)
 352 {
 353   char **res;
 354   const char *p;
 355   int i = 0;
 356
 357   if (!s || !*s)
 358     return NULL;
 359   res = NULL;
 360   p = s;
 361   while (*s)
 362     {
 363       if (*s == ',')
 364         {
 365           res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
 366           res[i] = strdupdelim (p, s);
 367           res[++i] = NULL;
 368           ++s;
 369           /* Skip the blanks following the ','.  */
 370           while (ISSPACE (*s))
 371             ++s;
 372           p = s;
 373         }
 374       else
 375         ++s;
 376     }
 377   res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
 378   res[i] = strdupdelim (p, s);
 379   res[i + 1] = NULL;
 380   return res;
 381 }
 382 \f
 383 /* Return pointer to a static char[] buffer in which zero-terminated
 384    string-representation of TM (in form hh:mm:ss) is printed.
 385
 386    If TM is non-NULL, the current time-in-seconds will be stored
 387    there.
 388
 389    (#### This is misleading: one would expect TM would be used instead
 390    of the current time in that case.  This design was probably
 391    influenced by the design time(2), and should be changed at some
 392    points.  No callers use non-NULL TM anyway.)  */
 393
 394 char *
 395 time_str (time_t *tm)
 396 {
 397   static char output[15];
 398   struct tm *ptm;
 399   time_t secs = time (tm);
 400
 401   if (secs == -1)
 402     {
 403       /* In case of error, return the empty string.  Maybe we should
 404          just abort if this happens?  */
 405       *output = '\0';
 406       return output;
 407     }
 408   ptm = localtime (&secs);
 409   sprintf (output, "%02d:%02d:%02d", ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
 410   return output;
 411 }
 412
 413 /* Like the above, but include the date: YYYY-MM-DD hh:mm:ss.  */
 414
 415 char *
 416 datetime_str (time_t *tm)
 417 {
 418   static char output[20];       /* "YYYY-MM-DD hh:mm:ss" + \0 */
 419   struct tm *ptm;
 420   time_t secs = time (tm);
 421
 422   if (secs == -1)
 423     {
 424       /* In case of error, return the empty string.  Maybe we should
 425          just abort if this happens?  */
 426       *output = '\0';
 427       return output;
 428     }
 429   ptm = localtime (&secs);
 430   sprintf (output, "%04d-%02d-%02d %02d:%02d:%02d",
 431            ptm->tm_year + 1900, ptm->tm_mon + 1, ptm->tm_mday,
 432            ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
 433   return output;
 434 }
 435 \f
 436 /* The Windows versions of the following two functions are defined in
 437    mswindows.c.  */
 438
 439 #ifndef WINDOWS
 440 void
 441 fork_to_background (void)
 442 {
 443   pid_t pid;
 444   /* Whether we arrange our own version of opt.lfilename here.  */
 445   int changedp = 0;
 446
 447   if (!opt.lfilename)
 448     {
 449       opt.lfilename = unique_name (DEFAULT_LOGFILE);
 450       changedp = 1;
 451     }
 452   pid = fork ();
 453   if (pid < 0)
 454     {
 455       /* parent, error */
 456       perror ("fork");
 457       exit (1);
 458     }
 459   else if (pid != 0)
 460     {
 461       /* parent, no error */
 462       printf (_("Continuing in background.\n"));
 463       if (changedp)
 464         printf (_("Output will be written to `%s'.\n"), opt.lfilename);
 465       exit (0);
 466     }
 467   /* child: keep running */
 468 }
 469 #endif /* not WINDOWS */
 470 \f
 471 #if 0
 472 /* debug */
 473 char *
 474 ps (char *orig)
 475 {
 476   char *r = xstrdup (orig);
 477   path_simplify (r);
 478   return r;
 479 }
 480 #endif
 481
 482 /* Canonicalize PATH, and return a new path.  The new path differs from PATH
 483    in that:
 484         Multple `/'s are collapsed to a single `/'.
 485         Leading `./'s and trailing `/.'s are removed.
 486         Trailing `/'s are removed.
 487         Non-leading `../'s and trailing `..'s are handled by removing
 488         portions of the path.
 489
 490    E.g. "a/b/c/./../d/.." will yield "a/b".  This function originates
 491    from GNU Bash.
 492
 493    Changes for Wget:
 494         Always use '/' as stub_char.
 495         Don't check for local things using canon_stat.
 496         Change the original string instead of strdup-ing.
 497         React correctly when beginning with `./' and `../'.
 498         Don't zip out trailing slashes.  */
 499 int
 500 path_simplify (char *path)
 501 {
 502   register int i, start;
 503   int changes = 0;
 504   char stub_char;
 505
 506   if (!*path)
 507     return 0;
 508
 509   stub_char = '/';
 510
 511   if (path[0] == '/')
 512     /* Preserve initial '/'. */
 513     ++path;
 514
 515   /* Nix out leading `.' or `..' with.  */
 516   if ((path[0] == '.' && path[1] == '\0')
 517       || (path[0] == '.' && path[1] == '.' && path[2] == '\0'))
 518     {
 519       path[0] = '\0';
 520       changes = 1;
 521       return changes;
 522     }
 523
 524   /* Walk along PATH looking for things to compact.  */
 525   i = 0;
 526   while (1)
 527     {
 528       if (!path[i])
 529         break;
 530
 531       while (path[i] && path[i] != '/')
 532         i++;
 533
 534       start = i++;
 535
 536       /* If we didn't find any slashes, then there is nothing left to do.  */
 537       if (!path[start])
 538         break;
 539
 540       /* Handle multiple `/'s in a row.  */
 541       while (path[i] == '/')
 542         i++;
 543
 544       if ((start + 1) != i)
 545         {
 546           strcpy (path + start + 1, path + i);
 547           i = start + 1;
 548           changes = 1;
 549         }
 550
 551       /* Check for `../', `./' or trailing `.' by itself.  */
 552       if (path[i] == '.')
 553         {
 554           /* Handle trailing `.' by itself.  */
 555           if (!path[i + 1])
 556             {
 557               path[--i] = '\0';
 558               changes = 1;
 559               break;
 560             }
 561
 562           /* Handle `./'.  */
 563           if (path[i + 1] == '/')
 564             {
 565               strcpy (path + i, path + i + 1);
 566               i = (start < 0) ? 0 : start;
 567               changes = 1;
 568               continue;
 569             }
 570
 571           /* Handle `../' or trailing `..' by itself.  */
 572           if (path[i + 1] == '.' &&
 573               (path[i + 2] == '/' || !path[i + 2]))
 574             {
 575               while (--start > -1 && path[start] != '/');
 576               strcpy (path + start + 1, path + i + 2 + (start == -1 && path[i + 2]));
 577               i = (start < 0) ? 0 : start;
 578               changes = 1;
 579               continue;
 580             }
 581         }       /* path == '.' */
 582     } /* while */
 583
 584   /* Addition: Remove all `./'-s and `../'-s preceding the string.  */
 585   i = 0;
 586   while (1)
 587     {
 588       if (path[i] == '.' && path[i + 1] == '/')
 589         i += 2;
 590       else if (path[i] == '.' && path[i + 1] == '.' && path[i + 2] == '/')
 591         i += 3;
 592       else
 593         break;
 594     }
 595   if (i)
 596     {
 597       strcpy (path, path + i - 0);
 598       changes = 1;
 599     }
 600
 601   return changes;
 602 }
 603 \f
 604 /* "Touch" FILE, i.e. make its atime and mtime equal to the time
 605    specified with TM.  */
 606 void
 607 touch (const char *file, time_t tm)
 608 {
 609 #ifdef HAVE_STRUCT_UTIMBUF
 610   struct utimbuf times;
 611   times.actime = times.modtime = tm;
 612 #else
 613   time_t times[2];
 614   times[0] = times[1] = tm;
 615 #endif
 616
 617   if (utime (file, &times) == -1)
 618     logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
 619 }
 620
 621 /* Checks if FILE is a symbolic link, and removes it if it is.  Does
 622    nothing under MS-Windows.  */
 623 int
 624 remove_link (const char *file)
 625 {
 626   int err = 0;
 627   struct stat st;
 628
 629   if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
 630     {
 631       DEBUGP (("Unlinking %s (symlink).\n", file));
 632       err = unlink (file);
 633       if (err != 0)
 634         logprintf (LOG_VERBOSE, _("Failed to unlink symlink `%s': %s\n"),
 635                    file, strerror (errno));
 636     }
 637   return err;
 638 }
 639
 640 /* Does FILENAME exist?  This is quite a lousy implementation, since
 641    it supplies no error codes -- only a yes-or-no answer.  Thus it
 642    will return that a file does not exist if, e.g., the directory is
 643    unreadable.  I don't mind it too much currently, though.  The
 644    proper way should, of course, be to have a third, error state,
 645    other than true/false, but that would introduce uncalled-for
 646    additional complexity to the callers.  */
 647 int
 648 file_exists_p (const char *filename)
 649 {
 650 #ifdef HAVE_ACCESS
 651   return access (filename, F_OK) >= 0;
 652 #else
 653   struct stat buf;
 654   return stat (filename, &buf) >= 0;
 655 #endif
 656 }
 657
 658 /* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).
 659    Returns 0 on error.  */
 660 int
 661 file_non_directory_p (const char *path)
 662 {
 663   struct stat buf;
 664   /* Use lstat() rather than stat() so that symbolic links pointing to
 665      directories can be identified correctly.  */
 666   if (lstat (path, &buf) != 0)
 667     return 0;
 668   return S_ISDIR (buf.st_mode) ? 0 : 1;
 669 }
 670
 671 /* Return a unique filename, given a prefix and count */
 672 static char *
 673 unique_name_1 (const char *fileprefix, int count)
 674 {
 675   char *filename;
 676
 677   if (count)
 678     {
 679       filename = (char *)xmalloc (strlen (fileprefix) + numdigit (count) + 2);
 680       sprintf (filename, "%s.%d", fileprefix, count);
 681     }
 682   else
 683     filename = xstrdup (fileprefix);
 684
 685   if (!file_exists_p (filename))
 686     return filename;
 687   else
 688     {
 689       xfree (filename);
 690       return NULL;
 691     }
 692 }
 693
 694 /* Return a unique file name, based on PREFIX.  */
 695 char *
 696 unique_name (const char *prefix)
 697 {
 698   char *file = NULL;
 699   int count = 0;
 700
 701   while (!file)
 702     file = unique_name_1 (prefix, count++);
 703   return file;
 704 }
 705 \f
 706 /* Create DIRECTORY.  If some of the pathname components of DIRECTORY
 707    are missing, create them first.  In case any mkdir() call fails,
 708    return its error status.  Returns 0 on successful completion.
 709
 710    The behaviour of this function should be identical to the behaviour
 711    of `mkdir -p' on systems where mkdir supports the `-p' option.  */
 712 int
 713 make_directory (const char *directory)
 714 {
 715   int quit = 0;
 716   int i;
 717   char *dir;
 718
 719   /* Make a copy of dir, to be able to write to it.  Otherwise, the
 720      function is unsafe if called with a read-only char *argument.  */
 721   STRDUP_ALLOCA (dir, directory);
 722
 723   /* If the first character of dir is '/', skip it (and thus enable
 724      creation of absolute-pathname directories.  */
 725   for (i = (*dir == '/'); 1; ++i)
 726     {
 727       for (; dir[i] && dir[i] != '/'; i++)
 728         ;
 729       if (!dir[i])
 730         quit = 1;
 731       dir[i] = '\0';
 732       /* Check whether the directory already exists.  */
 733       if (!file_exists_p (dir))
 734         {
 735           if (mkdir (dir, 0777) < 0)
 736             return -1;
 737         }
 738       if (quit)
 739         break;
 740       else
 741         dir[i] = '/';
 742     }
 743   return 0;
 744 }
 745
 746 /* Merge BASE with FILE.  BASE can be a directory or a file name, FILE
 747    should be a file name.
 748
 749    file_merge("/foo/bar", "baz")  => "/foo/baz"
 750    file_merge("/foo/bar/", "baz") => "/foo/bar/baz"
 751    file_merge("foo", "bar")       => "bar"
 752
 753    In other words, it's a simpler and gentler version of uri_merge_1.  */
 754
 755 char *
 756 file_merge (const char *base, const char *file)
 757 {
 758   char *result;
 759   const char *cut = (const char *)strrchr (base, '/');
 760
 761   if (!cut)
 762     return xstrdup (file);
 763
 764   result = (char *)xmalloc (cut - base + 1 + strlen (file) + 1);
 765   memcpy (result, base, cut - base);
 766   result[cut - base] = '/';
 767   strcpy (result + (cut - base) + 1, file);
 768
 769   return result;
 770 }
 771 \f
 772 static int in_acclist PARAMS ((const char *const *, const char *, int));
 773
 774 /* Determine whether a file is acceptable to be followed, according to
 775    lists of patterns to accept/reject.  */
 776 int
 777 acceptable (const char *s)
 778 {
 779   int l = strlen (s);
 780
 781   while (l && s[l] != '/')
 782     --l;
 783   if (s[l] == '/')
 784     s += (l + 1);
 785   if (opt.accepts)
 786     {
 787       if (opt.rejects)
 788         return (in_acclist ((const char *const *)opt.accepts, s, 1)
 789                 && !in_acclist ((const char *const *)opt.rejects, s, 1));
 790       else
 791         return in_acclist ((const char *const *)opt.accepts, s, 1);
 792     }
 793   else if (opt.rejects)
 794     return !in_acclist ((const char *const *)opt.rejects, s, 1);
 795   return 1;
 796 }
 797
 798 /* Compare S1 and S2 frontally; S2 must begin with S1.  E.g. if S1 is
 799    `/something', frontcmp() will return 1 only if S2 begins with
 800    `/something'.  Otherwise, 0 is returned.  */
 801 int
 802 frontcmp (const char *s1, const char *s2)
 803 {
 804   for (; *s1 && *s2 && (*s1 == *s2); ++s1, ++s2);
 805   return !*s1;
 806 }
 807
 808 /* Iterate through STRLIST, and return the first element that matches
 809    S, through wildcards or front comparison (as appropriate).  */
 810 static char *
 811 proclist (char **strlist, const char *s, enum accd flags)
 812 {
 813   char **x;
 814
 815   for (x = strlist; *x; x++)
 816     if (has_wildcards_p (*x))
 817       {
 818         if (fnmatch (*x, s, FNM_PATHNAME) == 0)
 819           break;
 820       }
 821     else
 822       {
 823         char *p = *x + ((flags & ALLABS) && (**x == '/')); /* Remove '/' */
 824         if (frontcmp (p, s))
 825           break;
 826       }
 827   return *x;
 828 }
 829
 830 /* Returns whether DIRECTORY is acceptable for download, wrt the
 831    include/exclude lists.
 832
 833    If FLAGS is ALLABS, the leading `/' is ignored in paths; relative
 834    and absolute paths may be freely intermixed.  */
 835 int
 836 accdir (const char *directory, enum accd flags)
 837 {
 838   /* Remove starting '/'.  */
 839   if (flags & ALLABS && *directory == '/')
 840     ++directory;
 841   if (opt.includes)
 842     {
 843       if (!proclist (opt.includes, directory, flags))
 844         return 0;
 845     }
 846   if (opt.excludes)
 847     {
 848       if (proclist (opt.excludes, directory, flags))
 849         return 0;
 850     }
 851   return 1;
 852 }
 853
 854 /* Match the end of STRING against PATTERN.  For instance:
 855
 856    match_backwards ("abc", "bc") -> 1
 857    match_backwards ("abc", "ab") -> 0
 858    match_backwards ("abc", "abc") -> 1 */
 859 int
 860 match_tail (const char *string, const char *pattern)
 861 {
 862   int i, j;
 863
 864   for (i = strlen (string), j = strlen (pattern); i >= 0 && j >= 0; i--, j--)
 865     if (string[i] != pattern[j])
 866       break;
 867   /* If the pattern was exhausted, the match was succesful.  */
 868   if (j == -1)
 869     return 1;
 870   else
 871     return 0;
 872 }
 873
 874 /* Checks whether string S matches each element of ACCEPTS.  A list
 875    element are matched either with fnmatch() or match_tail(),
 876    according to whether the element contains wildcards or not.
 877
 878    If the BACKWARD is 0, don't do backward comparison -- just compare
 879    them normally.  */
 880 static int
 881 in_acclist (const char *const *accepts, const char *s, int backward)
 882 {
 883   for (; *accepts; accepts++)
 884     {
 885       if (has_wildcards_p (*accepts))
 886         {
 887           /* fnmatch returns 0 if the pattern *does* match the
 888              string.  */
 889           if (fnmatch (*accepts, s, 0) == 0)
 890             return 1;
 891         }
 892       else
 893         {
 894           if (backward)
 895             {
 896               if (match_tail (s, *accepts))
 897                 return 1;
 898             }
 899           else
 900             {
 901               if (!strcmp (s, *accepts))
 902                 return 1;
 903             }
 904         }
 905     }
 906   return 0;
 907 }
 908
 909 /* Return the location of STR's suffix (file extension).  Examples:
 910    suffix ("foo.bar")       -> "bar"
 911    suffix ("foo.bar.baz")   -> "baz"
 912    suffix ("/foo/bar")      -> NULL
 913    suffix ("/foo.bar/baz")  -> NULL  */
 914 char *
 915 suffix (const char *str)
 916 {
 917   int i;
 918
 919   for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--)
 920     ;
 921
 922   if (str[i++] == '.')
 923     return (char *)str + i;
 924   else
 925     return NULL;
 926 }
 927
 928 /* Read a line from FP.  The function reallocs the storage as needed
 929    to accomodate for any length of the line.  Reallocs are done
 930    storage exponentially, doubling the storage after each overflow to
 931    minimize the number of calls to realloc() and fgets().  The newline
 932    character at the end of line is retained.
 933
 934    After end-of-file is encountered without anything being read, NULL
 935    is returned.  NULL is also returned on error.  To distinguish
 936    between these two cases, use the stdio function ferror().  */
 937
 938 char *
 939 read_whole_line (FILE *fp)
 940 {
 941   int length = 0;
 942   int bufsize = 81;
 943   char *line = (char *)xmalloc (bufsize);
 944
 945   while (fgets (line + length, bufsize - length, fp))
 946     {
 947       length += strlen (line + length);
 948       assert (length > 0);
 949       if (line[length - 1] == '\n')
 950         break;
 951       /* fgets() guarantees to read the whole line, or to use up the
 952          space we've given it.  We can double the buffer
 953          unconditionally.  */
 954       bufsize <<= 1;
 955       line = xrealloc (line, bufsize);
 956     }
 957   if (length == 0 || ferror (fp))
 958     {
 959       xfree (line);
 960       return NULL;
 961     }
 962   if (length + 1 < bufsize)
 963     /* Relieve the memory from our exponential greediness.  We say
 964        `length + 1' because the terminating \0 is not included in
 965        LENGTH.  We don't need to zero-terminate the string ourselves,
 966        though, because fgets() does that.  */
 967     line = xrealloc (line, length + 1);
 968   return line;
 969 }
 970 \f
 971 /* Read FILE into memory.  A pointer to `struct file_memory' are
 972    returned; use struct element `content' to access file contents, and
 973    the element `length' to know the file length.  `content' is *not*
 974    zero-terminated, and you should *not* read or write beyond the [0,
 975    length) range of characters.
 976
 977    After you are done with the file contents, call read_file_free to
 978    release the memory.
 979
 980    Depending on the operating system and the type of file that is
 981    being read, read_file() either mmap's the file into memory, or
 982    reads the file into the core using read().
 983
 984    If file is named "-", fileno(stdin) is used for reading instead.
 985    If you want to read from a real file named "-", use "./-" instead.  */
 986
 987 struct file_memory *
 988 read_file (const char *file)
 989 {
 990   int fd;
 991   struct file_memory *fm;
 992   long size;
 993   int inhibit_close = 0;
 994
 995   /* Some magic in the finest tradition of Perl and its kin: if FILE
 996      is "-", just use stdin.  */
 997   if (HYPHENP (file))
 998     {
 999       fd = fileno (stdin);
1000       inhibit_close = 1;
1001       /* Note that we don't inhibit mmap() in this case.  If stdin is
1002          redirected from a regular file, mmap() will still work.  */
1003     }
1004   else
1005     fd = open (file, O_RDONLY);
1006   if (fd < 0)
1007     return NULL;
1008   fm = xmalloc (sizeof (struct file_memory));
1009
1010 #ifdef HAVE_MMAP
1011   {
1012     struct stat buf;
1013     if (fstat (fd, &buf) < 0)
1014       goto mmap_lose;
1015     fm->length = buf.st_size;
1016     /* NOTE: As far as I know, the callers of this function never
1017        modify the file text.  Relying on this would enable us to
1018        specify PROT_READ and MAP_SHARED for a marginal gain in
1019        efficiency, but at some cost to generality.  */
1020     fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,
1021                         MAP_PRIVATE, fd, 0);
1022     if (fm->content == (char *)MAP_FAILED)
1023       goto mmap_lose;
1024     if (!inhibit_close)
1025       close (fd);
1026
1027     fm->mmap_p = 1;
1028     return fm;
1029   }
1030
1031  mmap_lose:
1032   /* The most common reason why mmap() fails is that FD does not point
1033      to a plain file.  However, it's also possible that mmap() doesn't
1034      work for a particular type of file.  Therefore, whenever mmap()
1035      fails, we just fall back to the regular method.  */
1036 #endif /* HAVE_MMAP */
1037
1038   fm->length = 0;
1039   size = 512;                   /* number of bytes fm->contents can
1040                                    hold at any given time. */
1041   fm->content = xmalloc (size);
1042   while (1)
1043     {
1044       long nread;
1045       if (fm->length > size / 2)
1046         {
1047           /* #### I'm not sure whether the whole exponential-growth
1048              thing makes sense with kernel read.  On Linux at least,
1049              read() refuses to read more than 4K from a file at a
1050              single chunk anyway.  But other Unixes might optimize it
1051              better, and it doesn't *hurt* anything, so I'm leaving
1052              it.  */
1053
1054           /* Normally, we grow SIZE exponentially to make the number
1055              of calls to read() and realloc() logarithmic in relation
1056              to file size.  However, read() can read an amount of data
1057              smaller than requested, and it would be unreasonably to
1058              double SIZE every time *something* was read.  Therefore,
1059              we double SIZE only when the length exceeds half of the
1060              entire allocated size.  */
1061           size <<= 1;
1062           fm->content = xrealloc (fm->content, size);
1063         }
1064       nread = read (fd, fm->content + fm->length, size - fm->length);
1065       if (nread > 0)
1066         /* Successful read. */
1067         fm->length += nread;
1068       else if (nread < 0)
1069         /* Error. */
1070         goto lose;
1071       else
1072         /* EOF */
1073         break;
1074     }
1075   if (!inhibit_close)
1076     close (fd);
1077   if (size > fm->length && fm->length != 0)
1078     /* Due to exponential growth of fm->content, the allocated region
1079        might be much larger than what is actually needed.  */
1080     fm->content = xrealloc (fm->content, fm->length);
1081   fm->mmap_p = 0;
1082   return fm;
1083
1084  lose:
1085   if (!inhibit_close)
1086     close (fd);
1087   xfree (fm->content);
1088   xfree (fm);
1089   return NULL;
1090 }
1091
1092 /* Release the resources held by FM.  Specifically, this calls
1093    munmap() or xfree() on fm->content, depending whether mmap or
1094    malloc/read were used to read in the file.  It also frees the
1095    memory needed to hold the FM structure itself.  */
1096
1097 void
1098 read_file_free (struct file_memory *fm)
1099 {
1100 #ifdef HAVE_MMAP
1101   if (fm->mmap_p)
1102     {
1103       munmap (fm->content, fm->length);
1104     }
1105   else
1106 #endif
1107     {
1108       xfree (fm->content);
1109     }
1110   xfree (fm);
1111 }
1112 \f
1113 /* Free the pointers in a NULL-terminated vector of pointers, then
1114    free the pointer itself.  */
1115 void
1116 free_vec (char **vec)
1117 {
1118   if (vec)
1119     {
1120       char **p = vec;
1121       while (*p)
1122         xfree (*p++);
1123       xfree (vec);
1124     }
1125 }
1126
1127 /* Append vector V2 to vector V1.  The function frees V2 and
1128    reallocates V1 (thus you may not use the contents of neither
1129    pointer after the call).  If V1 is NULL, V2 is returned.  */
1130 char **
1131 merge_vecs (char **v1, char **v2)
1132 {
1133   int i, j;
1134
1135   if (!v1)
1136     return v2;
1137   if (!v2)
1138     return v1;
1139   if (!*v2)
1140     {
1141       /* To avoid j == 0 */
1142       xfree (v2);
1143       return v1;
1144     }
1145   /* Count v1.  */
1146   for (i = 0; v1[i]; i++);
1147   /* Count v2.  */
1148   for (j = 0; v2[j]; j++);
1149   /* Reallocate v1.  */
1150   v1 = (char **)xrealloc (v1, (i + j + 1) * sizeof (char **));
1151   memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
1152   xfree (v2);
1153   return v1;
1154 }
1155
1156 /* A set of simple-minded routines to store strings in a linked list.
1157    This used to also be used for searching, but now we have hash
1158    tables for that.  */
1159
1160 /* It's a shame that these simple things like linked lists and hash
1161    tables (see hash.c) need to be implemented over and over again.  It
1162    would be nice to be able to use the routines from glib -- see
1163    www.gtk.org for details.  However, that would make Wget depend on
1164    glib, and I want to avoid dependencies to external libraries for
1165    reasons of convenience and portability (I suspect Wget is more
1166    portable than anything ever written for Gnome).  */
1167
1168 /* Append an element to the list.  If the list has a huge number of
1169    elements, this can get slow because it has to find the list's
1170    ending.  If you think you have to call slist_append in a loop,
1171    think about calling slist_prepend() followed by slist_nreverse().  */
1172
1173 slist *
1174 slist_append (slist *l, const char *s)
1175 {
1176   slist *newel = (slist *)xmalloc (sizeof (slist));
1177   slist *beg = l;
1178
1179   newel->string = xstrdup (s);
1180   newel->next = NULL;
1181
1182   if (!l)
1183     return newel;
1184   /* Find the last element.  */
1185   while (l->next)
1186     l = l->next;
1187   l->next = newel;
1188   return beg;
1189 }
1190
1191 /* Prepend S to the list.  Unlike slist_append(), this is O(1).  */
1192
1193 slist *
1194 slist_prepend (slist *l, const char *s)
1195 {
1196   slist *newel = (slist *)xmalloc (sizeof (slist));
1197   newel->string = xstrdup (s);
1198   newel->next = l;
1199   return newel;
1200 }
1201
1202 /* Destructively reverse L. */
1203
1204 slist *
1205 slist_nreverse (slist *l)
1206 {
1207   slist *prev = NULL;
1208   while (l)
1209     {
1210       slist *next = l->next;
1211       l->next = prev;
1212       prev = l;
1213       l = next;
1214     }
1215   return prev;
1216 }
1217
1218 /* Is there a specific entry in the list?  */
1219 int
1220 slist_contains (slist *l, const char *s)
1221 {
1222   for (; l; l = l->next)
1223     if (!strcmp (l->string, s))
1224       return 1;
1225   return 0;
1226 }
1227
1228 /* Free the whole slist.  */
1229 void
1230 slist_free (slist *l)
1231 {
1232   while (l)
1233     {
1234       slist *n = l->next;
1235       xfree (l->string);
1236       xfree (l);
1237       l = n;
1238     }
1239 }
1240 \f
1241 /* Sometimes it's useful to create "sets" of strings, i.e. special
1242    hash tables where you want to store strings as keys and merely
1243    query for their existence.  Here is a set of utility routines that
1244    makes that transparent.  */
1245
1246 void
1247 string_set_add (struct hash_table *ht, const char *s)
1248 {
1249   /* First check whether the set element already exists.  If it does,
1250      do nothing so that we don't have to free() the old element and
1251      then strdup() a new one.  */
1252   if (hash_table_contains (ht, s))
1253     return;
1254
1255   /* We use "1" as value.  It provides us a useful and clear arbitrary
1256      value, and it consumes no memory -- the pointers to the same
1257      string "1" will be shared by all the key-value pairs in all `set'
1258      hash tables.  */
1259   hash_table_put (ht, xstrdup (s), "1");
1260 }
1261
1262 /* Synonym for hash_table_contains... */
1263
1264 int
1265 string_set_contains (struct hash_table *ht, const char *s)
1266 {
1267   return hash_table_contains (ht, s);
1268 }
1269
1270 static int
1271 string_set_free_mapper (void *key, void *value_ignored, void *arg_ignored)
1272 {
1273   xfree (key);
1274   return 0;
1275 }
1276
1277 void
1278 string_set_free (struct hash_table *ht)
1279 {
1280   hash_table_map (ht, string_set_free_mapper, NULL);
1281   hash_table_destroy (ht);
1282 }
1283
1284 static int
1285 free_keys_and_values_mapper (void *key, void *value, void *arg_ignored)
1286 {
1287   xfree (key);
1288   xfree (value);
1289   return 0;
1290 }
1291
1292 /* Another utility function: call free() on all keys and values of HT.  */
1293
1294 void
1295 free_keys_and_values (struct hash_table *ht)
1296 {
1297   hash_table_map (ht, free_keys_and_values_mapper, NULL);
1298 }
1299
1300 \f
1301 /* Engine for legible and legible_very_long; this function works on
1302    strings.  */
1303
1304 static char *
1305 legible_1 (const char *repr)
1306 {
1307   static char outbuf[128];
1308   int i, i1, mod;
1309   char *outptr;
1310   const char *inptr;
1311
1312   /* Reset the pointers.  */
1313   outptr = outbuf;
1314   inptr = repr;
1315   /* If the number is negative, shift the pointers.  */
1316   if (*inptr == '-')
1317     {
1318       *outptr++ = '-';
1319       ++inptr;
1320     }
1321   /* How many digits before the first separator?  */
1322   mod = strlen (inptr) % 3;
1323   /* Insert them.  */
1324   for (i = 0; i < mod; i++)
1325     *outptr++ = inptr[i];
1326   /* Now insert the rest of them, putting separator before every
1327      third digit.  */
1328   for (i1 = i, i = 0; inptr[i1]; i++, i1++)
1329     {
1330       if (i % 3 == 0 && i1 != 0)
1331         *outptr++ = ',';
1332       *outptr++ = inptr[i1];
1333     }
1334   /* Zero-terminate the string.  */
1335   *outptr = '\0';
1336   return outbuf;
1337 }
1338
1339 /* Legible -- return a static pointer to the legibly printed long.  */
1340 char *
1341 legible (long l)
1342 {
1343   char inbuf[24];
1344   /* Print the number into the buffer.  */
1345   long_to_string (inbuf, l);
1346   return legible_1 (inbuf);
1347 }
1348
1349 /* Write a string representation of NUMBER into the provided buffer.
1350    We cannot use sprintf() because we cannot be sure whether the
1351    platform supports printing of what we chose for VERY_LONG_TYPE.
1352
1353    Example: Gcc supports `long long' under many platforms, but on many
1354    of those the native libc knows nothing of it and therefore cannot
1355    print it.
1356
1357    How long BUFFER needs to be depends on the platform and the content
1358    of NUMBER.  For 64-bit VERY_LONG_TYPE (the most common case), 24
1359    bytes are sufficient.  Using more might be a good idea.
1360
1361    This function does not go through the hoops that long_to_string
1362    goes to because it doesn't aspire to be fast.  (It's called perhaps
1363    once in a Wget run.)  */
1364
1365 static void
1366 very_long_to_string (char *buffer, VERY_LONG_TYPE number)
1367 {
1368   int i = 0;
1369   int j;
1370
1371   /* Print the number backwards... */
1372   do
1373     {
1374       buffer[i++] = '0' + number % 10;
1375       number /= 10;
1376     }
1377   while (number);
1378
1379   /* ...and reverse the order of the digits. */
1380   for (j = 0; j < i / 2; j++)
1381     {
1382       char c = buffer[j];
1383       buffer[j] = buffer[i - 1 - j];
1384       buffer[i - 1 - j] = c;
1385     }
1386   buffer[i] = '\0';
1387 }
1388
1389 /* The same as legible(), but works on VERY_LONG_TYPE.  See sysdep.h.  */
1390 char *
1391 legible_very_long (VERY_LONG_TYPE l)
1392 {
1393   char inbuf[128];
1394   /* Print the number into the buffer.  */
1395   very_long_to_string (inbuf, l);
1396   return legible_1 (inbuf);
1397 }
1398
1399 /* Count the digits in a (long) integer.  */
1400 int
1401 numdigit (long a)
1402 {
1403   int res = 1;
1404   if (a < 0)
1405     {
1406       a = -a;
1407       ++res;
1408     }
1409   while ((a /= 10) != 0)
1410     ++res;
1411   return res;
1412 }
1413
1414 #define ONE_DIGIT(figure) *p++ = n / (figure) + '0'
1415 #define ONE_DIGIT_ADVANCE(figure) (ONE_DIGIT (figure), n %= (figure))
1416
1417 #define DIGITS_1(figure) ONE_DIGIT (figure)
1418 #define DIGITS_2(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_1 ((figure) / 10)
1419 #define DIGITS_3(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_2 ((figure) / 10)
1420 #define DIGITS_4(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_3 ((figure) / 10)
1421 #define DIGITS_5(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_4 ((figure) / 10)
1422 #define DIGITS_6(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_5 ((figure) / 10)
1423 #define DIGITS_7(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_6 ((figure) / 10)
1424 #define DIGITS_8(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_7 ((figure) / 10)
1425 #define DIGITS_9(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_8 ((figure) / 10)
1426 #define DIGITS_10(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_9 ((figure) / 10)
1427
1428 /* DIGITS_<11-20> are only used on machines with 64-bit longs. */
1429
1430 #define DIGITS_11(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_10 ((figure) / 10)
1431 #define DIGITS_12(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_11 ((figure) / 10)
1432 #define DIGITS_13(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_12 ((figure) / 10)
1433 #define DIGITS_14(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_13 ((figure) / 10)
1434 #define DIGITS_15(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_14 ((figure) / 10)
1435 #define DIGITS_16(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_15 ((figure) / 10)
1436 #define DIGITS_17(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_16 ((figure) / 10)
1437 #define DIGITS_18(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_17 ((figure) / 10)
1438 #define DIGITS_19(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_18 ((figure) / 10)
1439
1440 /* Print NUMBER to BUFFER in base 10.  This is completely equivalent
1441    to `sprintf(buffer, "%ld", number)', only much faster.
1442
1443    The speedup may make a difference in programs that frequently
1444    convert numbers to strings.  Some implementations of sprintf,
1445    particularly the one in GNU libc, have been known to be extremely
1446    slow compared to this function.
1447
1448    BUFFER should accept as many bytes as you expect the number to take
1449    up.  On machines with 64-bit longs the maximum needed size is 24
1450    bytes.  That includes the worst-case digits, the optional `-' sign,
1451    and the trailing \0.  */
1452
1453 void
1454 long_to_string (char *buffer, long number)
1455 {
1456   char *p = buffer;
1457   long n = number;
1458
1459 #if (SIZEOF_LONG != 4) && (SIZEOF_LONG != 8)
1460   /* We are running in a strange or misconfigured environment.  Let
1461      sprintf cope with it.  */
1462   sprintf (buffer, "%ld", n);
1463 #else  /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1464
1465   if (n < 0)
1466     {
1467       *p++ = '-';
1468       n = -n;
1469     }
1470
1471   if      (n < 10)                   { DIGITS_1 (1); }
1472   else if (n < 100)                  { DIGITS_2 (10); }
1473   else if (n < 1000)                 { DIGITS_3 (100); }
1474   else if (n < 10000)                { DIGITS_4 (1000); }
1475   else if (n < 100000)               { DIGITS_5 (10000); }
1476   else if (n < 1000000)              { DIGITS_6 (100000); }
1477   else if (n < 10000000)             { DIGITS_7 (1000000); }
1478   else if (n < 100000000)            { DIGITS_8 (10000000); }
1479   else if (n < 1000000000)           { DIGITS_9 (100000000); }
1480 #if SIZEOF_LONG == 4
1481   /* ``if (1)'' serves only to preserve editor indentation. */
1482   else if (1)                        { DIGITS_10 (1000000000); }
1483 #else  /* SIZEOF_LONG != 4 */
1484   else if (n < 10000000000L)         { DIGITS_10 (1000000000L); }
1485   else if (n < 100000000000L)        { DIGITS_11 (10000000000L); }
1486   else if (n < 1000000000000L)       { DIGITS_12 (100000000000L); }
1487   else if (n < 10000000000000L)      { DIGITS_13 (1000000000000L); }
1488   else if (n < 100000000000000L)     { DIGITS_14 (10000000000000L); }
1489   else if (n < 1000000000000000L)    { DIGITS_15 (100000000000000L); }
1490   else if (n < 10000000000000000L)   { DIGITS_16 (1000000000000000L); }
1491   else if (n < 100000000000000000L)  { DIGITS_17 (10000000000000000L); }
1492   else if (n < 1000000000000000000L) { DIGITS_18 (100000000000000000L); }
1493   else                               { DIGITS_19 (1000000000000000000L); }
1494 #endif /* SIZEOF_LONG != 4 */
1495
1496   *p = '\0';
1497 #endif /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1498 }
1499
1500 #undef ONE_DIGIT
1501 #undef ONE_DIGIT_ADVANCE
1502
1503 #undef DIGITS_1
1504 #undef DIGITS_2
1505 #undef DIGITS_3
1506 #undef DIGITS_4
1507 #undef DIGITS_5
1508 #undef DIGITS_6
1509 #undef DIGITS_7
1510 #undef DIGITS_8
1511 #undef DIGITS_9
1512 #undef DIGITS_10
1513 #undef DIGITS_11
1514 #undef DIGITS_12
1515 #undef DIGITS_13
1516 #undef DIGITS_14
1517 #undef DIGITS_15
1518 #undef DIGITS_16
1519 #undef DIGITS_17
1520 #undef DIGITS_18
1521 #undef DIGITS_19
1522 \f
1523 /* Support for timers. */
1524
1525 #undef TIMER_WINDOWS
1526 #undef TIMER_GETTIMEOFDAY
1527 #undef TIMER_TIME
1528
1529 /* Depending on the OS and availability of gettimeofday(), one and
1530    only one of the above constants will be defined.  Virtually all
1531    modern Unix systems will define TIMER_GETTIMEOFDAY; Windows will
1532    use TIMER_WINDOWS.  TIMER_TIME is a catch-all method for
1533    non-Windows systems without gettimeofday.
1534
1535    #### Perhaps we should also support ftime(), which exists on old
1536    BSD 4.2-influenced systems?  (It also existed under MS DOS Borland
1537    C, if memory serves me.)  */
1538
1539 #ifdef WINDOWS
1540 # define TIMER_WINDOWS
1541 #else  /* not WINDOWS */
1542 # ifdef HAVE_GETTIMEOFDAY
1543 #  define TIMER_GETTIMEOFDAY
1544 # else
1545 #  define TIMER_TIME
1546 # endif
1547 #endif /* not WINDOWS */
1548
1549 struct wget_timer {
1550 #ifdef TIMER_GETTIMEOFDAY
1551   long secs;
1552   long usecs;
1553 #endif
1554
1555 #ifdef TIMER_TIME
1556   time_t secs;
1557 #endif
1558
1559 #ifdef TIMER_WINDOWS
1560   ULARGE_INTEGER wintime;
1561 #endif
1562 };
1563
1564 /* Allocate a timer.  It is not legal to do anything with a freshly
1565    allocated timer, except call wtimer_reset() or wtimer_delete().  */
1566
1567 struct wget_timer *
1568 wtimer_allocate (void)
1569 {
1570   struct wget_timer *wt =
1571     (struct wget_timer *)xmalloc (sizeof (struct wget_timer));
1572   return wt;
1573 }
1574
1575 /* Allocate a new timer and reset it.  Return the new timer. */
1576
1577 struct wget_timer *
1578 wtimer_new (void)
1579 {
1580   struct wget_timer *wt = wtimer_allocate ();
1581   wtimer_reset (wt);
1582   return wt;
1583 }
1584
1585 /* Free the resources associated with the timer.  Its further use is
1586    prohibited.  */
1587
1588 void
1589 wtimer_delete (struct wget_timer *wt)
1590 {
1591   xfree (wt);
1592 }
1593
1594 /* Reset timer WT.  This establishes the starting point from which
1595    wtimer_elapsed() will return the number of elapsed
1596    milliseconds.  It is allowed to reset a previously used timer.  */
1597
1598 void
1599 wtimer_reset (struct wget_timer *wt)
1600 {
1601 #ifdef TIMER_GETTIMEOFDAY
1602   struct timeval t;
1603   gettimeofday (&t, NULL);
1604   wt->secs  = t.tv_sec;
1605   wt->usecs = t.tv_usec;
1606 #endif
1607
1608 #ifdef TIMER_TIME
1609   wt->secs = time (NULL);
1610 #endif
1611
1612 #ifdef TIMER_WINDOWS
1613   FILETIME ft;
1614   SYSTEMTIME st;
1615   GetSystemTime (&st);
1616   SystemTimeToFileTime (&st, &ft);
1617   wt->wintime.HighPart = ft.dwHighDateTime;
1618   wt->wintime.LowPart  = ft.dwLowDateTime;
1619 #endif
1620 }
1621
1622 /* Return the number of milliseconds elapsed since the timer was last
1623    reset.  It is allowed to call this function more than once to get
1624    increasingly higher elapsed values.  */
1625
1626 long
1627 wtimer_elapsed (struct wget_timer *wt)
1628 {
1629 #ifdef TIMER_GETTIMEOFDAY
1630   struct timeval t;
1631   gettimeofday (&t, NULL);
1632   return (t.tv_sec - wt->secs) * 1000 + (t.tv_usec - wt->usecs) / 1000;
1633 #endif
1634
1635 #ifdef TIMER_TIME
1636   time_t now = time (NULL);
1637   return 1000 * (now - wt->secs);
1638 #endif
1639
1640 #ifdef WINDOWS
1641   FILETIME ft;
1642   SYSTEMTIME st;
1643   ULARGE_INTEGER uli;
1644   GetSystemTime (&st);
1645   SystemTimeToFileTime (&st, &ft);
1646   uli.HighPart = ft.dwHighDateTime;
1647   uli.LowPart = ft.dwLowDateTime;
1648   return (long)((uli.QuadPart - wt->wintime.QuadPart) / 10000);
1649 #endif
1650 }
1651
1652 /* Return the assessed granularity of the timer implementation.  This
1653    is important for certain code that tries to deal with "zero" time
1654    intervals.  */
1655
1656 long
1657 wtimer_granularity (void)
1658 {
1659 #ifdef TIMER_GETTIMEOFDAY
1660   /* Granularity of gettimeofday is hugely architecture-dependent.
1661      However, it appears that on modern machines it is better than
1662      1ms.  */
1663   return 1;
1664 #endif
1665
1666 #ifdef TIMER_TIME
1667   /* This is clear. */
1668   return 1000;
1669 #endif
1670
1671 #ifdef TIMER_WINDOWS
1672   /* ? */
1673   return 1;
1674 #endif
1675 }
1676 \f
1677 /* This should probably be at a better place, but it doesn't really
1678    fit into html-parse.c.  */
1679
1680 /* The function returns the pointer to the malloc-ed quoted version of
1681    string s.  It will recognize and quote numeric and special graphic
1682    entities, as per RFC1866:
1683
1684    `&' -> `&amp;'
1685    `<' -> `&lt;'
1686    `>' -> `&gt;'
1687    `"' -> `&quot;'
1688    SP  -> `&#32;'
1689
1690    No other entities are recognized or replaced.  */
1691 char *
1692 html_quote_string (const char *s)
1693 {
1694   const char *b = s;
1695   char *p, *res;
1696   int i;
1697
1698   /* Pass through the string, and count the new size.  */
1699   for (i = 0; *s; s++, i++)
1700     {
1701       if (*s == '&')
1702         i += 4;                 /* `amp;' */
1703       else if (*s == '<' || *s == '>')
1704         i += 3;                 /* `lt;' and `gt;' */
1705       else if (*s == '\"')
1706         i += 5;                 /* `quot;' */
1707       else if (*s == ' ')
1708         i += 4;                 /* #32; */
1709     }
1710   res = (char *)xmalloc (i + 1);
1711   s = b;
1712   for (p = res; *s; s++)
1713     {
1714       switch (*s)
1715         {
1716         case '&':
1717           *p++ = '&';
1718           *p++ = 'a';
1719           *p++ = 'm';
1720           *p++ = 'p';
1721           *p++ = ';';
1722           break;
1723         case '<': case '>':
1724           *p++ = '&';
1725           *p++ = (*s == '<' ? 'l' : 'g');
1726           *p++ = 't';
1727           *p++ = ';';
1728           break;
1729         case '\"':
1730           *p++ = '&';
1731           *p++ = 'q';
1732           *p++ = 'u';
1733           *p++ = 'o';
1734           *p++ = 't';
1735           *p++ = ';';
1736           break;
1737         case ' ':
1738           *p++ = '&';
1739           *p++ = '#';
1740           *p++ = '3';
1741           *p++ = '2';
1742           *p++ = ';';
1743           break;
1744         default:
1745           *p++ = *s;
1746         }
1747     }
1748   *p = '\0';
1749   return res;
1750 }
1751
1752 /* Determine the width of the terminal we're running on.  If that's
1753    not possible, return 0.  */
1754
1755 int
1756 determine_screen_width (void)
1757 {
1758   /* If there's a way to get the terminal size using POSIX
1759      tcgetattr(), somebody please tell me.  */
1760 #ifndef TIOCGWINSZ
1761   return 0;
1762 #else  /* TIOCGWINSZ */
1763   int fd;
1764   struct winsize wsz;
1765
1766   if (opt.lfilename != NULL)
1767     return 0;
1768
1769   fd = fileno (stderr);
1770   if (ioctl (fd, TIOCGWINSZ, &wsz) < 0)
1771     return 0;                   /* most likely ENOTTY */
1772
1773   return wsz.ws_col;
1774 #endif /* TIOCGWINSZ */
1775 }
1776
1777 #if 0
1778 /* A debugging function for checking whether an MD5 library works. */
1779
1780 #include "gen-md5.h"
1781
1782 char *
1783 debug_test_md5 (char *buf)
1784 {
1785   unsigned char raw[16];
1786   static char res[33];
1787   unsigned char *p1;
1788   char *p2;
1789   int cnt;
1790   ALLOCA_MD5_CONTEXT (ctx);
1791
1792   gen_md5_init (ctx);
1793   gen_md5_update ((unsigned char *)buf, strlen (buf), ctx);
1794   gen_md5_finish (ctx, raw);
1795
1796   p1 = raw;
1797   p2 = res;
1798   cnt = 16;
1799   while (cnt--)
1800     {
1801       *p2++ = XDIGIT_TO_xchar (*p1 >> 4);
1802       *p2++ = XDIGIT_TO_xchar (*p1 & 0xf);
1803       ++p1;
1804     }
1805   *p2 = '\0';
1806
1807   return res;
1808 }
1809 #endif