sjero.net Git - wget/blob - src/utils.c

   1 /* Various functions of utilitarian nature.
   2    Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GNU Wget.
   6
   7 GNU Wget is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2 of the License, or
  10 (at your option) any later version.
  11
  12 GNU Wget is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with Wget; if not, write to the Free Software
  19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
  20
  21 #include <config.h>
  22
  23 #include <stdio.h>
  24 #include <stdlib.h>
  25 #ifdef HAVE_STRING_H
  26 # include <string.h>
  27 #else  /* not HAVE_STRING_H */
  28 # include <strings.h>
  29 #endif /* not HAVE_STRING_H */
  30 #include <sys/types.h>
  31 #ifdef HAVE_UNISTD_H
  32 # include <unistd.h>
  33 #endif
  34 #ifdef HAVE_MMAP
  35 # include <sys/mman.h>
  36 #endif
  37 #ifdef HAVE_PWD_H
  38 # include <pwd.h>
  39 #endif
  40 #include <limits.h>
  41 #ifdef HAVE_UTIME_H
  42 # include <utime.h>
  43 #endif
  44 #ifdef HAVE_SYS_UTIME_H
  45 # include <sys/utime.h>
  46 #endif
  47 #include <errno.h>
  48 #ifdef NeXT
  49 # include <libc.h>              /* for access() */
  50 #endif
  51 #include <fcntl.h>
  52 #include <assert.h>
  53 #ifdef HAVE_SYS_IOCTL_H
  54 # include <sys/ioctl.h>
  55 #endif
  56
  57 #include "wget.h"
  58 #include "utils.h"
  59 #include "fnmatch.h"
  60 #include "hash.h"
  61
  62 #ifndef errno
  63 extern int errno;
  64 #endif
  65
  66 /* This section implements several wrappers around the basic
  67    allocation routines.  This is done for two reasons: first, so that
  68    the callers of these functions need not consistently check for
  69    errors.  If there is not enough virtual memory for running Wget,
  70    something is seriously wrong, and Wget exits with an appropriate
  71    error message.
  72
  73    The second reason why these are useful is that, if DEBUG_MALLOC is
  74    defined, they also provide a handy (if crude) malloc debugging
  75    interface that checks memory leaks.  */
  76
  77 /* Croak the fatal memory error and bail out with non-zero exit
  78    status.  */
  79 static void
  80 memfatal (const char *what)
  81 {
  82   /* HACK: expose save_log_p from log.c, so we can turn it off in
  83      order to prevent saving the log.  Saving the log is dangerous
  84      because logprintf() and logputs() can call malloc(), so this
  85      could infloop.  When logging is turned off, infloop can no longer
  86      happen.
  87
  88      #### This is no longer really necessary because the new routines
  89      in log.c cons only if the line exceeds eighty characters.  But
  90      this can come at the end of a line, so it's OK to be careful.
  91
  92      On a more serious note, it would be good to have a
  93      log_forced_shutdown() routine that exposes this cleanly.  */
  94   extern int save_log_p;
  95
  96   save_log_p = 0;
  97   logprintf (LOG_ALWAYS, _("%s: %s: Not enough memory.\n"), exec_name, what);
  98   exit (1);
  99 }
 100
 101 /* These functions end with _real because they need to be
 102    distinguished from the debugging functions, and from the macros.
 103    Explanation follows:
 104
 105    If memory debugging is not turned on, wget.h defines these:
 106
 107      #define xmalloc xmalloc_real
 108      #define xrealloc xrealloc_real
 109      #define xstrdup xstrdup_real
 110      #define xfree free
 111
 112    In case of memory debugging, the definitions are a bit more
 113    complex, because we want to provide more information, *and* we want
 114    to call the debugging code.  (The former is the reason why xmalloc
 115    and friends need to be macros in the first place.)  Then it looks
 116    like this:
 117
 118      #define xmalloc(a) xmalloc_debug (a, __FILE__, __LINE__)
 119      #define xfree(a)   xfree_debug (a, __FILE__, __LINE__)
 120      #define xrealloc(a, b) xrealloc_debug (a, b, __FILE__, __LINE__)
 121      #define xstrdup(a) xstrdup_debug (a, __FILE__, __LINE__)
 122
 123    Each of the *_debug function does its magic and calls the real one.  */
 124
 125 #ifdef DEBUG_MALLOC
 126 # define STATIC_IF_DEBUG static
 127 #else
 128 # define STATIC_IF_DEBUG
 129 #endif
 130
 131 STATIC_IF_DEBUG void *
 132 xmalloc_real (size_t size)
 133 {
 134   void *ptr = malloc (size);
 135   if (!ptr)
 136     memfatal ("malloc");
 137   return ptr;
 138 }
 139
 140 STATIC_IF_DEBUG void *
 141 xrealloc_real (void *ptr, size_t newsize)
 142 {
 143   void *newptr;
 144
 145   /* Not all Un*xes have the feature of realloc() that calling it with
 146      a NULL-pointer is the same as malloc(), but it is easy to
 147      simulate.  */
 148   if (ptr)
 149     newptr = realloc (ptr, newsize);
 150   else
 151     newptr = malloc (newsize);
 152   if (!newptr)
 153     memfatal ("realloc");
 154   return newptr;
 155 }
 156
 157 STATIC_IF_DEBUG char *
 158 xstrdup_real (const char *s)
 159 {
 160   char *copy;
 161
 162 #ifndef HAVE_STRDUP
 163   int l = strlen (s);
 164   copy = malloc (l + 1);
 165   if (!copy)
 166     memfatal ("strdup");
 167   memcpy (copy, s, l + 1);
 168 #else  /* HAVE_STRDUP */
 169   copy = strdup (s);
 170   if (!copy)
 171     memfatal ("strdup");
 172 #endif /* HAVE_STRDUP */
 173
 174   return copy;
 175 }
 176
 177 #ifdef DEBUG_MALLOC
 178
 179 /* Crude home-grown routines for debugging some malloc-related
 180    problems.  Featured:
 181
 182    * Counting the number of malloc and free invocations, and reporting
 183      the "balance", i.e. how many times more malloc was called than it
 184      was the case with free.
 185
 186    * Making malloc store its entry into a simple array and free remove
 187      stuff from that array.  At the end, print the pointers which have
 188      not been freed, along with the source file and the line number.
 189      This also has the side-effect of detecting freeing memory that
 190      was never allocated.
 191
 192    Note that this kind of memory leak checking strongly depends on
 193    every malloc() being followed by a free(), even if the program is
 194    about to finish.  Wget is careful to free the data structure it
 195    allocated in init.c.  */
 196
 197 static int malloc_count, free_count;
 198
 199 static struct {
 200   char *ptr;
 201   const char *file;
 202   int line;
 203 } malloc_debug[100000];
 204
 205 /* Both register_ptr and unregister_ptr take O(n) operations to run,
 206    which can be a real problem.  It would be nice to use a hash table
 207    for malloc_debug, but the functions in hash.c are not suitable
 208    because they can call malloc() themselves.  Maybe it would work if
 209    the hash table were preallocated to a huge size, and if we set the
 210    rehash threshold to 1.0.  */
 211
 212 /* Register PTR in malloc_debug.  Abort if this is not possible
 213    (presumably due to the number of current allocations exceeding the
 214    size of malloc_debug.)  */
 215
 216 static void
 217 register_ptr (void *ptr, const char *file, int line)
 218 {
 219   int i;
 220   for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
 221     if (malloc_debug[i].ptr == NULL)
 222       {
 223         malloc_debug[i].ptr = ptr;
 224         malloc_debug[i].file = file;
 225         malloc_debug[i].line = line;
 226         return;
 227       }
 228   abort ();
 229 }
 230
 231 /* Unregister PTR from malloc_debug.  Abort if PTR is not present in
 232    malloc_debug.  (This catches calling free() with a bogus pointer.)  */
 233
 234 static void
 235 unregister_ptr (void *ptr)
 236 {
 237   int i;
 238   for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
 239     if (malloc_debug[i].ptr == ptr)
 240       {
 241         malloc_debug[i].ptr = NULL;
 242         return;
 243       }
 244   abort ();
 245 }
 246
 247 /* Print the malloc debug stats that can be gathered from the above
 248    information.  Currently this is the count of mallocs, frees, the
 249    difference between the two, and the dump of the contents of
 250    malloc_debug.  The last part are the memory leaks.  */
 251
 252 void
 253 print_malloc_debug_stats (void)
 254 {
 255   int i;
 256   printf ("\nMalloc:  %d\nFree:    %d\nBalance: %d\n\n",
 257           malloc_count, free_count, malloc_count - free_count);
 258   for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
 259     if (malloc_debug[i].ptr != NULL)
 260       printf ("0x%08ld: %s:%d\n", (long)malloc_debug[i].ptr,
 261               malloc_debug[i].file, malloc_debug[i].line);
 262 }
 263
 264 void *
 265 xmalloc_debug (size_t size, const char *source_file, int source_line)
 266 {
 267   void *ptr = xmalloc_real (size);
 268   ++malloc_count;
 269   register_ptr (ptr, source_file, source_line);
 270   return ptr;
 271 }
 272
 273 void
 274 xfree_debug (void *ptr, const char *source_file, int source_line)
 275 {
 276   assert (ptr != NULL);
 277   ++free_count;
 278   unregister_ptr (ptr);
 279   free (ptr);
 280 }
 281
 282 void *
 283 xrealloc_debug (void *ptr, size_t newsize, const char *source_file, int source_line)
 284 {
 285   void *newptr = xrealloc_real (ptr, newsize);
 286   if (!ptr)
 287     {
 288       ++malloc_count;
 289       register_ptr (newptr, source_file, source_line);
 290     }
 291   else if (newptr != ptr)
 292     {
 293       unregister_ptr (ptr);
 294       register_ptr (newptr, source_file, source_line);
 295     }
 296   return newptr;
 297 }
 298
 299 char *
 300 xstrdup_debug (const char *s, const char *source_file, int source_line)
 301 {
 302   char *copy = xstrdup_real (s);
 303   ++malloc_count;
 304   register_ptr (copy, source_file, source_line);
 305   return copy;
 306 }
 307
 308 #endif /* DEBUG_MALLOC */
 309 \f
 310 /* Utility function: like xstrdup(), but also lowercases S.  */
 311
 312 char *
 313 xstrdup_lower (const char *s)
 314 {
 315   char *copy = xstrdup (s);
 316   char *p = copy;
 317   for (; *p; p++)
 318     *p = TOLOWER (*p);
 319   return copy;
 320 }
 321
 322 /* Copy the string formed by two pointers (one on the beginning, other
 323    on the char after the last char) to a new, malloc-ed location.
 324    0-terminate it.  */
 325 char *
 326 strdupdelim (const char *beg, const char *end)
 327 {
 328   char *res = (char *)xmalloc (end - beg + 1);
 329   memcpy (res, beg, end - beg);
 330   res[end - beg] = '\0';
 331   return res;
 332 }
 333
 334 /* Parse a string containing comma-separated elements, and return a
 335    vector of char pointers with the elements.  Spaces following the
 336    commas are ignored.  */
 337 char **
 338 sepstring (const char *s)
 339 {
 340   char **res;
 341   const char *p;
 342   int i = 0;
 343
 344   if (!s || !*s)
 345     return NULL;
 346   res = NULL;
 347   p = s;
 348   while (*s)
 349     {
 350       if (*s == ',')
 351         {
 352           res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
 353           res[i] = strdupdelim (p, s);
 354           res[++i] = NULL;
 355           ++s;
 356           /* Skip the blanks following the ','.  */
 357           while (ISSPACE (*s))
 358             ++s;
 359           p = s;
 360         }
 361       else
 362         ++s;
 363     }
 364   res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
 365   res[i] = strdupdelim (p, s);
 366   res[i + 1] = NULL;
 367   return res;
 368 }
 369 \f
 370 /* Return pointer to a static char[] buffer in which zero-terminated
 371    string-representation of TM (in form hh:mm:ss) is printed.
 372
 373    If TM is non-NULL, the current time-in-seconds will be stored
 374    there.
 375
 376    (#### This is misleading: one would expect TM would be used instead
 377    of the current time in that case.  This design was probably
 378    influenced by the design time(2), and should be changed at some
 379    points.  No callers use non-NULL TM anyway.)  */
 380
 381 char *
 382 time_str (time_t *tm)
 383 {
 384   static char output[15];
 385   struct tm *ptm;
 386   time_t secs = time (tm);
 387
 388   if (secs == -1)
 389     {
 390       /* In case of error, return the empty string.  Maybe we should
 391          just abort if this happens?  */
 392       *output = '\0';
 393       return output;
 394     }
 395   ptm = localtime (&secs);
 396   sprintf (output, "%02d:%02d:%02d", ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
 397   return output;
 398 }
 399
 400 /* Like the above, but include the date: YYYY-MM-DD hh:mm:ss.  */
 401
 402 char *
 403 datetime_str (time_t *tm)
 404 {
 405   static char output[20];       /* "YYYY-MM-DD hh:mm:ss" + \0 */
 406   struct tm *ptm;
 407   time_t secs = time (tm);
 408
 409   if (secs == -1)
 410     {
 411       /* In case of error, return the empty string.  Maybe we should
 412          just abort if this happens?  */
 413       *output = '\0';
 414       return output;
 415     }
 416   ptm = localtime (&secs);
 417   sprintf (output, "%04d-%02d-%02d %02d:%02d:%02d",
 418            ptm->tm_year + 1900, ptm->tm_mon + 1, ptm->tm_mday,
 419            ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
 420   return output;
 421 }
 422 \f
 423 /* The Windows versions of the following two functions are defined in
 424    mswindows.c.  */
 425
 426 #ifndef WINDOWS
 427 void
 428 fork_to_background (void)
 429 {
 430   pid_t pid;
 431   /* Whether we arrange our own version of opt.lfilename here.  */
 432   int changedp = 0;
 433
 434   if (!opt.lfilename)
 435     {
 436       opt.lfilename = unique_name (DEFAULT_LOGFILE);
 437       changedp = 1;
 438     }
 439   pid = fork ();
 440   if (pid < 0)
 441     {
 442       /* parent, error */
 443       perror ("fork");
 444       exit (1);
 445     }
 446   else if (pid != 0)
 447     {
 448       /* parent, no error */
 449       printf (_("Continuing in background.\n"));
 450       if (changedp)
 451         printf (_("Output will be written to `%s'.\n"), opt.lfilename);
 452       exit (0);
 453     }
 454   /* child: keep running */
 455 }
 456 #endif /* not WINDOWS */
 457 \f
 458 #if 0
 459 /* debug */
 460 char *
 461 ps (char *orig)
 462 {
 463   char *r = xstrdup (orig);
 464   path_simplify (r);
 465   return r;
 466 }
 467 #endif
 468
 469 /* Canonicalize PATH, and return a new path.  The new path differs from PATH
 470    in that:
 471         Multple `/'s are collapsed to a single `/'.
 472         Leading `./'s and trailing `/.'s are removed.
 473         Trailing `/'s are removed.
 474         Non-leading `../'s and trailing `..'s are handled by removing
 475         portions of the path.
 476
 477    E.g. "a/b/c/./../d/.." will yield "a/b".  This function originates
 478    from GNU Bash.
 479
 480    Changes for Wget:
 481         Always use '/' as stub_char.
 482         Don't check for local things using canon_stat.
 483         Change the original string instead of strdup-ing.
 484         React correctly when beginning with `./' and `../'.
 485         Don't zip out trailing slashes.  */
 486 int
 487 path_simplify (char *path)
 488 {
 489   register int i, start;
 490   int changes = 0;
 491   char stub_char;
 492
 493   if (!*path)
 494     return 0;
 495
 496   stub_char = '/';
 497
 498   if (path[0] == '/')
 499     /* Preserve initial '/'. */
 500     ++path;
 501
 502   /* Nix out leading `.' or `..' with.  */
 503   if ((path[0] == '.' && path[1] == '\0')
 504       || (path[0] == '.' && path[1] == '.' && path[2] == '\0'))
 505     {
 506       path[0] = '\0';
 507       changes = 1;
 508       return changes;
 509     }
 510
 511   /* Walk along PATH looking for things to compact.  */
 512   i = 0;
 513   while (1)
 514     {
 515       if (!path[i])
 516         break;
 517
 518       while (path[i] && path[i] != '/')
 519         i++;
 520
 521       start = i++;
 522
 523       /* If we didn't find any slashes, then there is nothing left to do.  */
 524       if (!path[start])
 525         break;
 526
 527       /* Handle multiple `/'s in a row.  */
 528       while (path[i] == '/')
 529         i++;
 530
 531       if ((start + 1) != i)
 532         {
 533           strcpy (path + start + 1, path + i);
 534           i = start + 1;
 535           changes = 1;
 536         }
 537
 538       /* Check for `../', `./' or trailing `.' by itself.  */
 539       if (path[i] == '.')
 540         {
 541           /* Handle trailing `.' by itself.  */
 542           if (!path[i + 1])
 543             {
 544               path[--i] = '\0';
 545               changes = 1;
 546               break;
 547             }
 548
 549           /* Handle `./'.  */
 550           if (path[i + 1] == '/')
 551             {
 552               strcpy (path + i, path + i + 1);
 553               i = (start < 0) ? 0 : start;
 554               changes = 1;
 555               continue;
 556             }
 557
 558           /* Handle `../' or trailing `..' by itself.  */
 559           if (path[i + 1] == '.' &&
 560               (path[i + 2] == '/' || !path[i + 2]))
 561             {
 562               while (--start > -1 && path[start] != '/');
 563               strcpy (path + start + 1, path + i + 2 + (start == -1 && path[i + 2]));
 564               i = (start < 0) ? 0 : start;
 565               changes = 1;
 566               continue;
 567             }
 568         }       /* path == '.' */
 569     } /* while */
 570
 571   /* Addition: Remove all `./'-s and `../'-s preceding the string.  */
 572   i = 0;
 573   while (1)
 574     {
 575       if (path[i] == '.' && path[i + 1] == '/')
 576         i += 2;
 577       else if (path[i] == '.' && path[i + 1] == '.' && path[i + 2] == '/')
 578         i += 3;
 579       else
 580         break;
 581     }
 582   if (i)
 583     {
 584       strcpy (path, path + i - 0);
 585       changes = 1;
 586     }
 587
 588   return changes;
 589 }
 590 \f
 591 /* "Touch" FILE, i.e. make its atime and mtime equal to the time
 592    specified with TM.  */
 593 void
 594 touch (const char *file, time_t tm)
 595 {
 596 #ifdef HAVE_STRUCT_UTIMBUF
 597   struct utimbuf times;
 598   times.actime = times.modtime = tm;
 599 #else
 600   time_t times[2];
 601   times[0] = times[1] = tm;
 602 #endif
 603
 604   if (utime (file, &times) == -1)
 605     logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
 606 }
 607
 608 /* Checks if FILE is a symbolic link, and removes it if it is.  Does
 609    nothing under MS-Windows.  */
 610 int
 611 remove_link (const char *file)
 612 {
 613   int err = 0;
 614   struct stat st;
 615
 616   if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
 617     {
 618       DEBUGP (("Unlinking %s (symlink).\n", file));
 619       err = unlink (file);
 620       if (err != 0)
 621         logprintf (LOG_VERBOSE, _("Failed to unlink symlink `%s': %s\n"),
 622                    file, strerror (errno));
 623     }
 624   return err;
 625 }
 626
 627 /* Does FILENAME exist?  This is quite a lousy implementation, since
 628    it supplies no error codes -- only a yes-or-no answer.  Thus it
 629    will return that a file does not exist if, e.g., the directory is
 630    unreadable.  I don't mind it too much currently, though.  The
 631    proper way should, of course, be to have a third, error state,
 632    other than true/false, but that would introduce uncalled-for
 633    additional complexity to the callers.  */
 634 int
 635 file_exists_p (const char *filename)
 636 {
 637 #ifdef HAVE_ACCESS
 638   return access (filename, F_OK) >= 0;
 639 #else
 640   struct stat buf;
 641   return stat (filename, &buf) >= 0;
 642 #endif
 643 }
 644
 645 /* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).
 646    Returns 0 on error.  */
 647 int
 648 file_non_directory_p (const char *path)
 649 {
 650   struct stat buf;
 651   /* Use lstat() rather than stat() so that symbolic links pointing to
 652      directories can be identified correctly.  */
 653   if (lstat (path, &buf) != 0)
 654     return 0;
 655   return S_ISDIR (buf.st_mode) ? 0 : 1;
 656 }
 657
 658 /* Return a unique filename, given a prefix and count */
 659 static char *
 660 unique_name_1 (const char *fileprefix, int count)
 661 {
 662   char *filename;
 663
 664   if (count)
 665     {
 666       filename = (char *)xmalloc (strlen (fileprefix) + numdigit (count) + 2);
 667       sprintf (filename, "%s.%d", fileprefix, count);
 668     }
 669   else
 670     filename = xstrdup (fileprefix);
 671
 672   if (!file_exists_p (filename))
 673     return filename;
 674   else
 675     {
 676       xfree (filename);
 677       return NULL;
 678     }
 679 }
 680
 681 /* Return a unique file name, based on PREFIX.  */
 682 char *
 683 unique_name (const char *prefix)
 684 {
 685   char *file = NULL;
 686   int count = 0;
 687
 688   while (!file)
 689     file = unique_name_1 (prefix, count++);
 690   return file;
 691 }
 692 \f
 693 /* Create DIRECTORY.  If some of the pathname components of DIRECTORY
 694    are missing, create them first.  In case any mkdir() call fails,
 695    return its error status.  Returns 0 on successful completion.
 696
 697    The behaviour of this function should be identical to the behaviour
 698    of `mkdir -p' on systems where mkdir supports the `-p' option.  */
 699 int
 700 make_directory (const char *directory)
 701 {
 702   int quit = 0;
 703   int i;
 704   char *dir;
 705
 706   /* Make a copy of dir, to be able to write to it.  Otherwise, the
 707      function is unsafe if called with a read-only char *argument.  */
 708   STRDUP_ALLOCA (dir, directory);
 709
 710   /* If the first character of dir is '/', skip it (and thus enable
 711      creation of absolute-pathname directories.  */
 712   for (i = (*dir == '/'); 1; ++i)
 713     {
 714       for (; dir[i] && dir[i] != '/'; i++)
 715         ;
 716       if (!dir[i])
 717         quit = 1;
 718       dir[i] = '\0';
 719       /* Check whether the directory already exists.  */
 720       if (!file_exists_p (dir))
 721         {
 722           if (mkdir (dir, 0777) < 0)
 723             return -1;
 724         }
 725       if (quit)
 726         break;
 727       else
 728         dir[i] = '/';
 729     }
 730   return 0;
 731 }
 732
 733 /* Merge BASE with FILE.  BASE can be a directory or a file name, FILE
 734    should be a file name.  For example, file_merge("/foo/bar", "baz")
 735    will return "/foo/baz".  file_merge("/foo/bar/", "baz") will return
 736    "foo/bar/baz".
 737
 738    In other words, it's a simpler and gentler version of uri_merge_1.  */
 739
 740 char *
 741 file_merge (const char *base, const char *file)
 742 {
 743   char *result;
 744   const char *cut = (const char *)strrchr (base, '/');
 745
 746   if (!cut)
 747     cut = base + strlen (base);
 748
 749   result = (char *)xmalloc (cut - base + 1 + strlen (file) + 1);
 750   memcpy (result, base, cut - base);
 751   result[cut - base] = '/';
 752   strcpy (result + (cut - base) + 1, file);
 753
 754   return result;
 755 }
 756 \f
 757 static int in_acclist PARAMS ((const char *const *, const char *, int));
 758
 759 /* Determine whether a file is acceptable to be followed, according to
 760    lists of patterns to accept/reject.  */
 761 int
 762 acceptable (const char *s)
 763 {
 764   int l = strlen (s);
 765
 766   while (l && s[l] != '/')
 767     --l;
 768   if (s[l] == '/')
 769     s += (l + 1);
 770   if (opt.accepts)
 771     {
 772       if (opt.rejects)
 773         return (in_acclist ((const char *const *)opt.accepts, s, 1)
 774                 && !in_acclist ((const char *const *)opt.rejects, s, 1));
 775       else
 776         return in_acclist ((const char *const *)opt.accepts, s, 1);
 777     }
 778   else if (opt.rejects)
 779     return !in_acclist ((const char *const *)opt.rejects, s, 1);
 780   return 1;
 781 }
 782
 783 /* Compare S1 and S2 frontally; S2 must begin with S1.  E.g. if S1 is
 784    `/something', frontcmp() will return 1 only if S2 begins with
 785    `/something'.  Otherwise, 0 is returned.  */
 786 int
 787 frontcmp (const char *s1, const char *s2)
 788 {
 789   for (; *s1 && *s2 && (*s1 == *s2); ++s1, ++s2);
 790   return !*s1;
 791 }
 792
 793 /* Iterate through STRLIST, and return the first element that matches
 794    S, through wildcards or front comparison (as appropriate).  */
 795 static char *
 796 proclist (char **strlist, const char *s, enum accd flags)
 797 {
 798   char **x;
 799
 800   for (x = strlist; *x; x++)
 801     if (has_wildcards_p (*x))
 802       {
 803         if (fnmatch (*x, s, FNM_PATHNAME) == 0)
 804           break;
 805       }
 806     else
 807       {
 808         char *p = *x + ((flags & ALLABS) && (**x == '/')); /* Remove '/' */
 809         if (frontcmp (p, s))
 810           break;
 811       }
 812   return *x;
 813 }
 814
 815 /* Returns whether DIRECTORY is acceptable for download, wrt the
 816    include/exclude lists.
 817
 818    If FLAGS is ALLABS, the leading `/' is ignored in paths; relative
 819    and absolute paths may be freely intermixed.  */
 820 int
 821 accdir (const char *directory, enum accd flags)
 822 {
 823   /* Remove starting '/'.  */
 824   if (flags & ALLABS && *directory == '/')
 825     ++directory;
 826   if (opt.includes)
 827     {
 828       if (!proclist (opt.includes, directory, flags))
 829         return 0;
 830     }
 831   if (opt.excludes)
 832     {
 833       if (proclist (opt.excludes, directory, flags))
 834         return 0;
 835     }
 836   return 1;
 837 }
 838
 839 /* Match the end of STRING against PATTERN.  For instance:
 840
 841    match_backwards ("abc", "bc") -> 1
 842    match_backwards ("abc", "ab") -> 0
 843    match_backwards ("abc", "abc") -> 1 */
 844 static int
 845 match_backwards (const char *string, const char *pattern)
 846 {
 847   int i, j;
 848
 849   for (i = strlen (string), j = strlen (pattern); i >= 0 && j >= 0; i--, j--)
 850     if (string[i] != pattern[j])
 851       break;
 852   /* If the pattern was exhausted, the match was succesful.  */
 853   if (j == -1)
 854     return 1;
 855   else
 856     return 0;
 857 }
 858
 859 /* Checks whether string S matches each element of ACCEPTS.  A list
 860    element are matched either with fnmatch() or match_backwards(),
 861    according to whether the element contains wildcards or not.
 862
 863    If the BACKWARD is 0, don't do backward comparison -- just compare
 864    them normally.  */
 865 static int
 866 in_acclist (const char *const *accepts, const char *s, int backward)
 867 {
 868   for (; *accepts; accepts++)
 869     {
 870       if (has_wildcards_p (*accepts))
 871         {
 872           /* fnmatch returns 0 if the pattern *does* match the
 873              string.  */
 874           if (fnmatch (*accepts, s, 0) == 0)
 875             return 1;
 876         }
 877       else
 878         {
 879           if (backward)
 880             {
 881               if (match_backwards (s, *accepts))
 882                 return 1;
 883             }
 884           else
 885             {
 886               if (!strcmp (s, *accepts))
 887                 return 1;
 888             }
 889         }
 890     }
 891   return 0;
 892 }
 893
 894 /* Return the malloc-ed suffix of STR.  For instance:
 895    suffix ("foo.bar")       -> "bar"
 896    suffix ("foo.bar.baz")   -> "baz"
 897    suffix ("/foo/bar")      -> NULL
 898    suffix ("/foo.bar/baz")  -> NULL  */
 899 char *
 900 suffix (const char *str)
 901 {
 902   int i;
 903
 904   for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--);
 905   if (str[i++] == '.')
 906     return xstrdup (str + i);
 907   else
 908     return NULL;
 909 }
 910
 911 /* Read a line from FP.  The function reallocs the storage as needed
 912    to accomodate for any length of the line.  Reallocs are done
 913    storage exponentially, doubling the storage after each overflow to
 914    minimize the number of calls to realloc() and fgets().  The newline
 915    character at the end of line is retained.
 916
 917    After end-of-file is encountered without anything being read, NULL
 918    is returned.  NULL is also returned on error.  To distinguish
 919    between these two cases, use the stdio function ferror().  */
 920
 921 char *
 922 read_whole_line (FILE *fp)
 923 {
 924   int length = 0;
 925   int bufsize = 81;
 926   char *line = (char *)xmalloc (bufsize);
 927
 928   while (fgets (line + length, bufsize - length, fp))
 929     {
 930       length += strlen (line + length);
 931       assert (length > 0);
 932       if (line[length - 1] == '\n')
 933         break;
 934       /* fgets() guarantees to read the whole line, or to use up the
 935          space we've given it.  We can double the buffer
 936          unconditionally.  */
 937       bufsize <<= 1;
 938       line = xrealloc (line, bufsize);
 939     }
 940   if (length == 0 || ferror (fp))
 941     {
 942       xfree (line);
 943       return NULL;
 944     }
 945   if (length + 1 < bufsize)
 946     /* Relieve the memory from our exponential greediness.  We say
 947        `length + 1' because the terminating \0 is not included in
 948        LENGTH.  We don't need to zero-terminate the string ourselves,
 949        though, because fgets() does that.  */
 950     line = xrealloc (line, length + 1);
 951   return line;
 952 }
 953 \f
 954 /* Read FILE into memory.  A pointer to `struct file_memory' are
 955    returned; use struct element `content' to access file contents, and
 956    the element `length' to know the file length.  `content' is *not*
 957    zero-terminated, and you should *not* read or write beyond the [0,
 958    length) range of characters.
 959
 960    After you are done with the file contents, call read_file_free to
 961    release the memory.
 962
 963    Depending on the operating system and the type of file that is
 964    being read, read_file() either mmap's the file into memory, or
 965    reads the file into the core using read().
 966
 967    If file is named "-", fileno(stdin) is used for reading instead.
 968    If you want to read from a real file named "-", use "./-" instead.  */
 969
 970 struct file_memory *
 971 read_file (const char *file)
 972 {
 973   int fd;
 974   struct file_memory *fm;
 975   long size;
 976   int inhibit_close = 0;
 977
 978   /* Some magic in the finest tradition of Perl and its kin: if FILE
 979      is "-", just use stdin.  */
 980   if (HYPHENP (file))
 981     {
 982       fd = fileno (stdin);
 983       inhibit_close = 1;
 984       /* Note that we don't inhibit mmap() in this case.  If stdin is
 985          redirected from a regular file, mmap() will still work.  */
 986     }
 987   else
 988     fd = open (file, O_RDONLY);
 989   if (fd < 0)
 990     return NULL;
 991   fm = xmalloc (sizeof (struct file_memory));
 992
 993 #ifdef HAVE_MMAP
 994   {
 995     struct stat buf;
 996     if (fstat (fd, &buf) < 0)
 997       goto mmap_lose;
 998     fm->length = buf.st_size;
 999     /* NOTE: As far as I know, the callers of this function never
1000        modify the file text.  Relying on this would enable us to
1001        specify PROT_READ and MAP_SHARED for a marginal gain in
1002        efficiency, but at some cost to generality.  */
1003     fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,
1004                         MAP_PRIVATE, fd, 0);
1005     if (fm->content == (char *)MAP_FAILED)
1006       goto mmap_lose;
1007     if (!inhibit_close)
1008       close (fd);
1009
1010     fm->mmap_p = 1;
1011     return fm;
1012   }
1013
1014  mmap_lose:
1015   /* The most common reason why mmap() fails is that FD does not point
1016      to a plain file.  However, it's also possible that mmap() doesn't
1017      work for a particular type of file.  Therefore, whenever mmap()
1018      fails, we just fall back to the regular method.  */
1019 #endif /* HAVE_MMAP */
1020
1021   fm->length = 0;
1022   size = 512;                   /* number of bytes fm->contents can
1023                                    hold at any given time. */
1024   fm->content = xmalloc (size);
1025   while (1)
1026     {
1027       long nread;
1028       if (fm->length > size / 2)
1029         {
1030           /* #### I'm not sure whether the whole exponential-growth
1031              thing makes sense with kernel read.  On Linux at least,
1032              read() refuses to read more than 4K from a file at a
1033              single chunk anyway.  But other Unixes might optimize it
1034              better, and it doesn't *hurt* anything, so I'm leaving
1035              it.  */
1036
1037           /* Normally, we grow SIZE exponentially to make the number
1038              of calls to read() and realloc() logarithmic in relation
1039              to file size.  However, read() can read an amount of data
1040              smaller than requested, and it would be unreasonably to
1041              double SIZE every time *something* was read.  Therefore,
1042              we double SIZE only when the length exceeds half of the
1043              entire allocated size.  */
1044           size <<= 1;
1045           fm->content = xrealloc (fm->content, size);
1046         }
1047       nread = read (fd, fm->content + fm->length, size - fm->length);
1048       if (nread > 0)
1049         /* Successful read. */
1050         fm->length += nread;
1051       else if (nread < 0)
1052         /* Error. */
1053         goto lose;
1054       else
1055         /* EOF */
1056         break;
1057     }
1058   if (!inhibit_close)
1059     close (fd);
1060   if (size > fm->length && fm->length != 0)
1061     /* Due to exponential growth of fm->content, the allocated region
1062        might be much larger than what is actually needed.  */
1063     fm->content = xrealloc (fm->content, fm->length);
1064   fm->mmap_p = 0;
1065   return fm;
1066
1067  lose:
1068   if (!inhibit_close)
1069     close (fd);
1070   xfree (fm->content);
1071   xfree (fm);
1072   return NULL;
1073 }
1074
1075 /* Release the resources held by FM.  Specifically, this calls
1076    munmap() or xfree() on fm->content, depending whether mmap or
1077    malloc/read were used to read in the file.  It also frees the
1078    memory needed to hold the FM structure itself.  */
1079
1080 void
1081 read_file_free (struct file_memory *fm)
1082 {
1083 #ifdef HAVE_MMAP
1084   if (fm->mmap_p)
1085     {
1086       munmap (fm->content, fm->length);
1087     }
1088   else
1089 #endif
1090     {
1091       xfree (fm->content);
1092     }
1093   xfree (fm);
1094 }
1095 \f
1096 /* Free the pointers in a NULL-terminated vector of pointers, then
1097    free the pointer itself.  */
1098 void
1099 free_vec (char **vec)
1100 {
1101   if (vec)
1102     {
1103       char **p = vec;
1104       while (*p)
1105         xfree (*p++);
1106       xfree (vec);
1107     }
1108 }
1109
1110 /* Append vector V2 to vector V1.  The function frees V2 and
1111    reallocates V1 (thus you may not use the contents of neither
1112    pointer after the call).  If V1 is NULL, V2 is returned.  */
1113 char **
1114 merge_vecs (char **v1, char **v2)
1115 {
1116   int i, j;
1117
1118   if (!v1)
1119     return v2;
1120   if (!v2)
1121     return v1;
1122   if (!*v2)
1123     {
1124       /* To avoid j == 0 */
1125       xfree (v2);
1126       return v1;
1127     }
1128   /* Count v1.  */
1129   for (i = 0; v1[i]; i++);
1130   /* Count v2.  */
1131   for (j = 0; v2[j]; j++);
1132   /* Reallocate v1.  */
1133   v1 = (char **)xrealloc (v1, (i + j + 1) * sizeof (char **));
1134   memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
1135   xfree (v2);
1136   return v1;
1137 }
1138
1139 /* A set of simple-minded routines to store strings in a linked list.
1140    This used to also be used for searching, but now we have hash
1141    tables for that.  */
1142
1143 /* It's a shame that these simple things like linked lists and hash
1144    tables (see hash.c) need to be implemented over and over again.  It
1145    would be nice to be able to use the routines from glib -- see
1146    www.gtk.org for details.  However, that would make Wget depend on
1147    glib, and I want to avoid dependencies to external libraries for
1148    reasons of convenience and portability (I suspect Wget is more
1149    portable than anything ever written for Gnome).  */
1150
1151 /* Append an element to the list.  If the list has a huge number of
1152    elements, this can get slow because it has to find the list's
1153    ending.  If you think you have to call slist_append in a loop,
1154    think about calling slist_prepend() followed by slist_nreverse().  */
1155
1156 slist *
1157 slist_append (slist *l, const char *s)
1158 {
1159   slist *newel = (slist *)xmalloc (sizeof (slist));
1160   slist *beg = l;
1161
1162   newel->string = xstrdup (s);
1163   newel->next = NULL;
1164
1165   if (!l)
1166     return newel;
1167   /* Find the last element.  */
1168   while (l->next)
1169     l = l->next;
1170   l->next = newel;
1171   return beg;
1172 }
1173
1174 /* Prepend S to the list.  Unlike slist_append(), this is O(1).  */
1175
1176 slist *
1177 slist_prepend (slist *l, const char *s)
1178 {
1179   slist *newel = (slist *)xmalloc (sizeof (slist));
1180   newel->string = xstrdup (s);
1181   newel->next = l;
1182   return newel;
1183 }
1184
1185 /* Destructively reverse L. */
1186
1187 slist *
1188 slist_nreverse (slist *l)
1189 {
1190   slist *prev = NULL;
1191   while (l)
1192     {
1193       slist *next = l->next;
1194       l->next = prev;
1195       prev = l;
1196       l = next;
1197     }
1198   return prev;
1199 }
1200
1201 /* Is there a specific entry in the list?  */
1202 int
1203 slist_contains (slist *l, const char *s)
1204 {
1205   for (; l; l = l->next)
1206     if (!strcmp (l->string, s))
1207       return 1;
1208   return 0;
1209 }
1210
1211 /* Free the whole slist.  */
1212 void
1213 slist_free (slist *l)
1214 {
1215   while (l)
1216     {
1217       slist *n = l->next;
1218       xfree (l->string);
1219       xfree (l);
1220       l = n;
1221     }
1222 }
1223 \f
1224 /* Sometimes it's useful to create "sets" of strings, i.e. special
1225    hash tables where you want to store strings as keys and merely
1226    query for their existence.  Here is a set of utility routines that
1227    makes that transparent.  */
1228
1229 void
1230 string_set_add (struct hash_table *ht, const char *s)
1231 {
1232   /* First check whether the set element already exists.  If it does,
1233      do nothing so that we don't have to free() the old element and
1234      then strdup() a new one.  */
1235   if (hash_table_contains (ht, s))
1236     return;
1237
1238   /* We use "1" as value.  It provides us a useful and clear arbitrary
1239      value, and it consumes no memory -- the pointers to the same
1240      string "1" will be shared by all the key-value pairs in all `set'
1241      hash tables.  */
1242   hash_table_put (ht, xstrdup (s), "1");
1243 }
1244
1245 /* Synonym for hash_table_contains... */
1246
1247 int
1248 string_set_contains (struct hash_table *ht, const char *s)
1249 {
1250   return hash_table_contains (ht, s);
1251 }
1252
1253 static int
1254 string_set_free_mapper (void *key, void *value_ignored, void *arg_ignored)
1255 {
1256   xfree (key);
1257   return 0;
1258 }
1259
1260 void
1261 string_set_free (struct hash_table *ht)
1262 {
1263   hash_table_map (ht, string_set_free_mapper, NULL);
1264   hash_table_destroy (ht);
1265 }
1266
1267 static int
1268 free_keys_and_values_mapper (void *key, void *value, void *arg_ignored)
1269 {
1270   xfree (key);
1271   xfree (value);
1272   return 0;
1273 }
1274
1275 /* Another utility function: call free() on all keys and values of HT.  */
1276
1277 void
1278 free_keys_and_values (struct hash_table *ht)
1279 {
1280   hash_table_map (ht, free_keys_and_values_mapper, NULL);
1281 }
1282
1283 \f
1284 /* Engine for legible and legible_very_long; this function works on
1285    strings.  */
1286
1287 static char *
1288 legible_1 (const char *repr)
1289 {
1290   static char outbuf[128];
1291   int i, i1, mod;
1292   char *outptr;
1293   const char *inptr;
1294
1295   /* Reset the pointers.  */
1296   outptr = outbuf;
1297   inptr = repr;
1298   /* If the number is negative, shift the pointers.  */
1299   if (*inptr == '-')
1300     {
1301       *outptr++ = '-';
1302       ++inptr;
1303     }
1304   /* How many digits before the first separator?  */
1305   mod = strlen (inptr) % 3;
1306   /* Insert them.  */
1307   for (i = 0; i < mod; i++)
1308     *outptr++ = inptr[i];
1309   /* Now insert the rest of them, putting separator before every
1310      third digit.  */
1311   for (i1 = i, i = 0; inptr[i1]; i++, i1++)
1312     {
1313       if (i % 3 == 0 && i1 != 0)
1314         *outptr++ = ',';
1315       *outptr++ = inptr[i1];
1316     }
1317   /* Zero-terminate the string.  */
1318   *outptr = '\0';
1319   return outbuf;
1320 }
1321
1322 /* Legible -- return a static pointer to the legibly printed long.  */
1323 char *
1324 legible (long l)
1325 {
1326   char inbuf[24];
1327   /* Print the number into the buffer.  */
1328   long_to_string (inbuf, l);
1329   return legible_1 (inbuf);
1330 }
1331
1332 /* Write a string representation of NUMBER into the provided buffer.
1333    We cannot use sprintf() because we cannot be sure whether the
1334    platform supports printing of what we chose for VERY_LONG_TYPE.
1335
1336    Example: Gcc supports `long long' under many platforms, but on many
1337    of those the native libc knows nothing of it and therefore cannot
1338    print it.
1339
1340    How long BUFFER needs to be depends on the platform and the content
1341    of NUMBER.  For 64-bit VERY_LONG_TYPE (the most common case), 24
1342    bytes are sufficient.  Using more might be a good idea.
1343
1344    This function does not go through the hoops that long_to_string
1345    goes to because it doesn't aspire to be fast.  (It's called perhaps
1346    once in a Wget run.)  */
1347
1348 static void
1349 very_long_to_string (char *buffer, VERY_LONG_TYPE number)
1350 {
1351   int i = 0;
1352   int j;
1353
1354   /* Print the number backwards... */
1355   do
1356     {
1357       buffer[i++] = '0' + number % 10;
1358       number /= 10;
1359     }
1360   while (number);
1361
1362   /* ...and reverse the order of the digits. */
1363   for (j = 0; j < i / 2; j++)
1364     {
1365       char c = buffer[j];
1366       buffer[j] = buffer[i - 1 - j];
1367       buffer[i - 1 - j] = c;
1368     }
1369   buffer[i] = '\0';
1370 }
1371
1372 /* The same as legible(), but works on VERY_LONG_TYPE.  See sysdep.h.  */
1373 char *
1374 legible_very_long (VERY_LONG_TYPE l)
1375 {
1376   char inbuf[128];
1377   /* Print the number into the buffer.  */
1378   very_long_to_string (inbuf, l);
1379   return legible_1 (inbuf);
1380 }
1381
1382 /* Count the digits in a (long) integer.  */
1383 int
1384 numdigit (long a)
1385 {
1386   int res = 1;
1387   if (a < 0)
1388     {
1389       a = -a;
1390       ++res;
1391     }
1392   while ((a /= 10) != 0)
1393     ++res;
1394   return res;
1395 }
1396
1397 #define ONE_DIGIT(figure) *p++ = n / (figure) + '0'
1398 #define ONE_DIGIT_ADVANCE(figure) (ONE_DIGIT (figure), n %= (figure))
1399
1400 #define DIGITS_1(figure) ONE_DIGIT (figure)
1401 #define DIGITS_2(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_1 ((figure) / 10)
1402 #define DIGITS_3(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_2 ((figure) / 10)
1403 #define DIGITS_4(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_3 ((figure) / 10)
1404 #define DIGITS_5(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_4 ((figure) / 10)
1405 #define DIGITS_6(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_5 ((figure) / 10)
1406 #define DIGITS_7(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_6 ((figure) / 10)
1407 #define DIGITS_8(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_7 ((figure) / 10)
1408 #define DIGITS_9(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_8 ((figure) / 10)
1409 #define DIGITS_10(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_9 ((figure) / 10)
1410
1411 /* DIGITS_<11-20> are only used on machines with 64-bit longs. */
1412
1413 #define DIGITS_11(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_10 ((figure) / 10)
1414 #define DIGITS_12(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_11 ((figure) / 10)
1415 #define DIGITS_13(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_12 ((figure) / 10)
1416 #define DIGITS_14(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_13 ((figure) / 10)
1417 #define DIGITS_15(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_14 ((figure) / 10)
1418 #define DIGITS_16(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_15 ((figure) / 10)
1419 #define DIGITS_17(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_16 ((figure) / 10)
1420 #define DIGITS_18(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_17 ((figure) / 10)
1421 #define DIGITS_19(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_18 ((figure) / 10)
1422
1423 /* Print NUMBER to BUFFER in base 10.  This is completely equivalent
1424    to `sprintf(buffer, "%ld", number)', only much faster.
1425
1426    The speedup may make a difference in programs that frequently
1427    convert numbers to strings.  Some implementations of sprintf,
1428    particularly the one in GNU libc, have been known to be extremely
1429    slow compared to this function.
1430
1431    BUFFER should accept as many bytes as you expect the number to take
1432    up.  On machines with 64-bit longs the maximum needed size is 24
1433    bytes.  That includes the worst-case digits, the optional `-' sign,
1434    and the trailing \0.  */
1435
1436 void
1437 long_to_string (char *buffer, long number)
1438 {
1439   char *p = buffer;
1440   long n = number;
1441
1442 #if (SIZEOF_LONG != 4) && (SIZEOF_LONG != 8)
1443   /* We are running in a strange or misconfigured environment.  Let
1444      sprintf cope with it.  */
1445   sprintf (buffer, "%ld", n);
1446 #else  /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1447
1448   if (n < 0)
1449     {
1450       *p++ = '-';
1451       n = -n;
1452     }
1453
1454   if      (n < 10)                   { DIGITS_1 (1); }
1455   else if (n < 100)                  { DIGITS_2 (10); }
1456   else if (n < 1000)                 { DIGITS_3 (100); }
1457   else if (n < 10000)                { DIGITS_4 (1000); }
1458   else if (n < 100000)               { DIGITS_5 (10000); }
1459   else if (n < 1000000)              { DIGITS_6 (100000); }
1460   else if (n < 10000000)             { DIGITS_7 (1000000); }
1461   else if (n < 100000000)            { DIGITS_8 (10000000); }
1462   else if (n < 1000000000)           { DIGITS_9 (100000000); }
1463 #if SIZEOF_LONG == 4
1464   /* ``if (1)'' serves only to preserve editor indentation. */
1465   else if (1)                        { DIGITS_10 (1000000000); }
1466 #else  /* SIZEOF_LONG != 4 */
1467   else if (n < 10000000000L)         { DIGITS_10 (1000000000L); }
1468   else if (n < 100000000000L)        { DIGITS_11 (10000000000L); }
1469   else if (n < 1000000000000L)       { DIGITS_12 (100000000000L); }
1470   else if (n < 10000000000000L)      { DIGITS_13 (1000000000000L); }
1471   else if (n < 100000000000000L)     { DIGITS_14 (10000000000000L); }
1472   else if (n < 1000000000000000L)    { DIGITS_15 (100000000000000L); }
1473   else if (n < 10000000000000000L)   { DIGITS_16 (1000000000000000L); }
1474   else if (n < 100000000000000000L)  { DIGITS_17 (10000000000000000L); }
1475   else if (n < 1000000000000000000L) { DIGITS_18 (100000000000000000L); }
1476   else                               { DIGITS_19 (1000000000000000000L); }
1477 #endif /* SIZEOF_LONG != 4 */
1478
1479   *p = '\0';
1480 #endif /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1481 }
1482
1483 #undef ONE_DIGIT
1484 #undef ONE_DIGIT_ADVANCE
1485
1486 #undef DIGITS_1
1487 #undef DIGITS_2
1488 #undef DIGITS_3
1489 #undef DIGITS_4
1490 #undef DIGITS_5
1491 #undef DIGITS_6
1492 #undef DIGITS_7
1493 #undef DIGITS_8
1494 #undef DIGITS_9
1495 #undef DIGITS_10
1496 #undef DIGITS_11
1497 #undef DIGITS_12
1498 #undef DIGITS_13
1499 #undef DIGITS_14
1500 #undef DIGITS_15
1501 #undef DIGITS_16
1502 #undef DIGITS_17
1503 #undef DIGITS_18
1504 #undef DIGITS_19
1505 \f
1506 /* Support for timers. */
1507
1508 #undef TIMER_WINDOWS
1509 #undef TIMER_GETTIMEOFDAY
1510 #undef TIMER_TIME
1511
1512 /* Depending on the OS and availability of gettimeofday(), one and
1513    only one of the above constants will be defined.  Virtually all
1514    modern Unix systems will define TIMER_GETTIMEOFDAY; Windows will
1515    use TIMER_WINDOWS.  TIMER_TIME is a catch-all method for
1516    non-Windows systems without gettimeofday.
1517
1518    #### Perhaps we should also support ftime(), which exists on old
1519    BSD 4.2-influenced systems?  (It also existed under MS DOS Borland
1520    C, if memory serves me.)  */
1521
1522 #ifdef WINDOWS
1523 # define TIMER_WINDOWS
1524 #else  /* not WINDOWS */
1525 # ifdef HAVE_GETTIMEOFDAY
1526 #  define TIMER_GETTIMEOFDAY
1527 # else
1528 #  define TIMER_TIME
1529 # endif
1530 #endif /* not WINDOWS */
1531
1532 struct wget_timer {
1533 #ifdef TIMER_GETTIMEOFDAY
1534   long secs;
1535   long usecs;
1536 #endif
1537
1538 #ifdef TIMER_TIME
1539   time_t secs;
1540 #endif
1541
1542 #ifdef TIMER_WINDOWS
1543   ULARGE_INTEGER wintime;
1544 #endif
1545 };
1546
1547 /* Allocate a timer.  It is not legal to do anything with a freshly
1548    allocated timer, except call wtimer_reset() or wtimer_delete().  */
1549
1550 struct wget_timer *
1551 wtimer_allocate (void)
1552 {
1553   struct wget_timer *wt =
1554     (struct wget_timer *)xmalloc (sizeof (struct wget_timer));
1555   return wt;
1556 }
1557
1558 /* Allocate a new timer and reset it.  Return the new timer. */
1559
1560 struct wget_timer *
1561 wtimer_new (void)
1562 {
1563   struct wget_timer *wt = wtimer_allocate ();
1564   wtimer_reset (wt);
1565   return wt;
1566 }
1567
1568 /* Free the resources associated with the timer.  Its further use is
1569    prohibited.  */
1570
1571 void
1572 wtimer_delete (struct wget_timer *wt)
1573 {
1574   xfree (wt);
1575 }
1576
1577 /* Reset timer WT.  This establishes the starting point from which
1578    wtimer_elapsed() will return the number of elapsed
1579    milliseconds.  It is allowed to reset a previously used timer.  */
1580
1581 void
1582 wtimer_reset (struct wget_timer *wt)
1583 {
1584 #ifdef TIMER_GETTIMEOFDAY
1585   struct timeval t;
1586   gettimeofday (&t, NULL);
1587   wt->secs  = t.tv_sec;
1588   wt->usecs = t.tv_usec;
1589 #endif
1590
1591 #ifdef TIMER_TIME
1592   wt->secs = time (NULL);
1593 #endif
1594
1595 #ifdef TIMER_WINDOWS
1596   FILETIME ft;
1597   SYSTEMTIME st;
1598   GetSystemTime (&st);
1599   SystemTimeToFileTime (&st, &ft);
1600   wt->wintime.HighPart = ft.dwHighDateTime;
1601   wt->wintime.LowPart  = ft.dwLowDateTime;
1602 #endif
1603 }
1604
1605 /* Return the number of milliseconds elapsed since the timer was last
1606    reset.  It is allowed to call this function more than once to get
1607    increasingly higher elapsed values.  */
1608
1609 long
1610 wtimer_elapsed (struct wget_timer *wt)
1611 {
1612 #ifdef TIMER_GETTIMEOFDAY
1613   struct timeval t;
1614   gettimeofday (&t, NULL);
1615   return (t.tv_sec - wt->secs) * 1000 + (t.tv_usec - wt->usecs) / 1000;
1616 #endif
1617
1618 #ifdef TIMER_TIME
1619   time_t now = time (NULL);
1620   return 1000 * (now - wt->secs);
1621 #endif
1622
1623 #ifdef WINDOWS
1624   FILETIME ft;
1625   SYSTEMTIME st;
1626   ULARGE_INTEGER uli;
1627   GetSystemTime (&st);
1628   SystemTimeToFileTime (&st, &ft);
1629   uli.HighPart = ft.dwHighDateTime;
1630   uli.LowPart = ft.dwLowDateTime;
1631   return (long)((uli.QuadPart - wt->wintime.QuadPart) / 10000);
1632 #endif
1633 }
1634
1635 /* Return the assessed granularity of the timer implementation.  This
1636    is important for certain code that tries to deal with "zero" time
1637    intervals.  */
1638
1639 long
1640 wtimer_granularity (void)
1641 {
1642 #ifdef TIMER_GETTIMEOFDAY
1643   /* Granularity of gettimeofday is hugely architecture-dependent.
1644      However, it appears that on modern machines it is better than
1645      1ms.  */
1646   return 1;
1647 #endif
1648
1649 #ifdef TIMER_TIME
1650   /* This is clear. */
1651   return 1000;
1652 #endif
1653
1654 #ifdef TIMER_WINDOWS
1655   /* ? */
1656   return 1;
1657 #endif
1658 }
1659 \f
1660 /* This should probably be at a better place, but it doesn't really
1661    fit into html-parse.c.  */
1662
1663 /* The function returns the pointer to the malloc-ed quoted version of
1664    string s.  It will recognize and quote numeric and special graphic
1665    entities, as per RFC1866:
1666
1667    `&' -> `&amp;'
1668    `<' -> `&lt;'
1669    `>' -> `&gt;'
1670    `"' -> `&quot;'
1671    SP  -> `&#32;'
1672
1673    No other entities are recognized or replaced.  */
1674 char *
1675 html_quote_string (const char *s)
1676 {
1677   const char *b = s;
1678   char *p, *res;
1679   int i;
1680
1681   /* Pass through the string, and count the new size.  */
1682   for (i = 0; *s; s++, i++)
1683     {
1684       if (*s == '&')
1685         i += 4;                 /* `amp;' */
1686       else if (*s == '<' || *s == '>')
1687         i += 3;                 /* `lt;' and `gt;' */
1688       else if (*s == '\"')
1689         i += 5;                 /* `quot;' */
1690       else if (*s == ' ')
1691         i += 4;                 /* #32; */
1692     }
1693   res = (char *)xmalloc (i + 1);
1694   s = b;
1695   for (p = res; *s; s++)
1696     {
1697       switch (*s)
1698         {
1699         case '&':
1700           *p++ = '&';
1701           *p++ = 'a';
1702           *p++ = 'm';
1703           *p++ = 'p';
1704           *p++ = ';';
1705           break;
1706         case '<': case '>':
1707           *p++ = '&';
1708           *p++ = (*s == '<' ? 'l' : 'g');
1709           *p++ = 't';
1710           *p++ = ';';
1711           break;
1712         case '\"':
1713           *p++ = '&';
1714           *p++ = 'q';
1715           *p++ = 'u';
1716           *p++ = 'o';
1717           *p++ = 't';
1718           *p++ = ';';
1719           break;
1720         case ' ':
1721           *p++ = '&';
1722           *p++ = '#';
1723           *p++ = '3';
1724           *p++ = '2';
1725           *p++ = ';';
1726           break;
1727         default:
1728           *p++ = *s;
1729         }
1730     }
1731   *p = '\0';
1732   return res;
1733 }
1734
1735 /* Determine the width of the terminal we're running on.  If that's
1736    not possible, return 0.  */
1737
1738 int
1739 determine_screen_width (void)
1740 {
1741   /* If there's a way to get the terminal size using POSIX
1742      tcgetattr(), somebody please tell me.  */
1743 #ifndef TIOCGWINSZ
1744   return 0;
1745 #else  /* TIOCGWINSZ */
1746   int fd;
1747   struct winsize wsz;
1748
1749   if (opt.lfilename != NULL)
1750     return 0;
1751
1752   fd = fileno (stderr);
1753   if (ioctl (fd, TIOCGWINSZ, &wsz) < 0)
1754     return 0;                   /* most likely ENOTTY */
1755
1756   return wsz.ws_col;
1757 #endif /* TIOCGWINSZ */
1758 }