sjero.net Git - wget/blob - src/retr.c

   1 /* File retrieval.
   2    Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001 Free Software Foundation, Inc.
   3
   4 This file is part of GNU Wget.
   5
   6 GNU Wget is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 2 of the License, or (at
   9 your option) any later version.
  10
  11 GNU Wget is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with Wget; if not, write to the Free Software
  18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  19
  20 In addition, as a special exception, the Free Software Foundation
  21 gives permission to link the code of its release of Wget with the
  22 OpenSSL project's "OpenSSL" library (or with modified versions of it
  23 that use the same license as the "OpenSSL" library), and distribute
  24 the linked executables.  You must obey the GNU General Public License
  25 in all respects for all of the code used other than "OpenSSL".  If you
  26 modify this file, you may extend this exception to your version of the
  27 file, but you are not obligated to do so.  If you do not wish to do
  28 so, delete this exception statement from your version.  */
  29
  30 #include <config.h>
  31
  32 #include <stdio.h>
  33 #include <stdlib.h>
  34 #include <sys/types.h>
  35 #ifdef HAVE_UNISTD_H
  36 # include <unistd.h>
  37 #endif /* HAVE_UNISTD_H */
  38 #include <errno.h>
  39 #ifdef HAVE_STRING_H
  40 # include <string.h>
  41 #else
  42 # include <strings.h>
  43 #endif /* HAVE_STRING_H */
  44 #include <assert.h>
  45
  46 #include "wget.h"
  47 #include "utils.h"
  48 #include "retr.h"
  49 #include "progress.h"
  50 #include "url.h"
  51 #include "recur.h"
  52 #include "ftp.h"
  53 #include "host.h"
  54 #include "connect.h"
  55 #include "hash.h"
  56 #include "convert.h"
  57
  58 #ifdef HAVE_SSL
  59 # include "gen_sslfunc.h"       /* for ssl_iread */
  60 #endif
  61
  62 #ifndef errno
  63 extern int errno;
  64 #endif
  65
  66 /* See the comment in gethttp() why this is needed. */
  67 int global_download_count;
  68
  69 /* Total size of downloaded files.  Used to enforce quota.  */
  70 LARGE_INT total_downloaded_bytes;
  71
  72 \f
  73 static struct {
  74   long chunk_bytes;
  75   double chunk_start;
  76   double sleep_adjust;
  77 } limit_data;
  78
  79 static void
  80 limit_bandwidth_reset (void)
  81 {
  82   limit_data.chunk_bytes = 0;
  83   limit_data.chunk_start = 0;
  84 }
  85
  86 /* Limit the bandwidth by pausing the download for an amount of time.
  87    BYTES is the number of bytes received from the network, and TIMER
  88    is the timer that started at the beginning of download.  */
  89
  90 static void
  91 limit_bandwidth (long bytes, struct wget_timer *timer)
  92 {
  93   double delta_t = wtimer_read (timer) - limit_data.chunk_start;
  94   double expected;
  95
  96   limit_data.chunk_bytes += bytes;
  97
  98   /* Calculate the amount of time we expect downloading the chunk
  99      should take.  If in reality it took less time, sleep to
 100      compensate for the difference.  */
 101   expected = 1000.0 * limit_data.chunk_bytes / opt.limit_rate;
 102
 103   if (expected > delta_t)
 104     {
 105       double slp = expected - delta_t + limit_data.sleep_adjust;
 106       double t0, t1;
 107       if (slp < 200)
 108         {
 109           DEBUGP (("deferring a %.2f ms sleep (%ld/%.2f).\n",
 110                    slp, limit_data.chunk_bytes, delta_t));
 111           return;
 112         }
 113       DEBUGP (("\nsleeping %.2f ms for %ld bytes, adjust %.2f ms\n",
 114                slp, limit_data.chunk_bytes, limit_data.sleep_adjust));
 115
 116       t0 = wtimer_read (timer);
 117       xsleep (slp / 1000);
 118       wtimer_update (timer);
 119       t1 = wtimer_read (timer);
 120
 121       /* Due to scheduling, we probably slept slightly longer (or
 122          shorter) than desired.  Calculate the difference between the
 123          desired and the actual sleep, and adjust the next sleep by
 124          that amount.  */
 125       limit_data.sleep_adjust = slp - (t1 - t0);
 126     }
 127
 128   limit_data.chunk_bytes = 0;
 129   limit_data.chunk_start = wtimer_read (timer);
 130 }
 131
 132 #ifndef MIN
 133 # define MIN(i, j) ((i) <= (j) ? (i) : (j))
 134 #endif
 135
 136 /* Read the contents of file descriptor FD until it the connection
 137    terminates or a read error occurs.  The data is read in portions of
 138    up to 16K and written to OUT as it arrives.  If opt.verbose is set,
 139    the progress is shown.
 140
 141    TOREAD is the amount of data expected to arrive, normally only used
 142    by the progress gauge.  However, if EXACT is set, no more than
 143    TOREAD octets will be read.
 144
 145    STARTPOS is the position from which the download starts, used by
 146    the progress gauge.  The amount of data read gets stored to
 147    *AMOUNT_READ.  The time it took to download the data (in
 148    milliseconds) is stored to *ELAPSED.
 149
 150    The function exits and returns the amount of data read.  In case of
 151    error while reading data, -1 is returned.  In case of error while
 152    writing data, -2 is returned.  */
 153
 154 int
 155 fd_read_body (int fd, FILE *out, long toread, int exact, long startpos,
 156               long *amount_read, double *elapsed)
 157 {
 158   int ret = 0;
 159
 160   static char dlbuf[16384];
 161   int dlbufsize = sizeof (dlbuf);
 162
 163   struct wget_timer *timer = NULL;
 164   double last_successful_read_tm = 0;
 165
 166   /* The progress gauge, set according to the user preferences. */
 167   void *progress = NULL;
 168
 169   /* Non-zero if the progress gauge is interactive, i.e. if it can
 170      continually update the display.  When true, smaller timeout
 171      values are used so that the gauge can update the display when
 172      data arrives slowly. */
 173   int progress_interactive = 0;
 174
 175   *amount_read = 0;
 176
 177   if (opt.verbose)
 178     {
 179       progress = progress_create (startpos, toread);
 180       progress_interactive = progress_interactive_p (progress);
 181     }
 182
 183   if (opt.limit_rate)
 184     limit_bandwidth_reset ();
 185
 186   /* A timer is needed for tracking progress, for throttling, and for
 187      tracking elapsed time.  If either of these are requested, start
 188      the timer.  */
 189   if (progress || opt.limit_rate || elapsed)
 190     {
 191       timer = wtimer_new ();
 192       last_successful_read_tm = 0;
 193     }
 194
 195   /* Use a smaller buffer for low requested bandwidths.  For example,
 196      with --limit-rate=2k, it doesn't make sense to slurp in 16K of
 197      data and then sleep for 8s.  With buffer size equal to the limit,
 198      we never have to sleep for more than one second.  */
 199   if (opt.limit_rate && opt.limit_rate < dlbufsize)
 200     dlbufsize = opt.limit_rate;
 201
 202   /* Read from FD while there is data to read.  Normally toread==0
 203      means that it is unknown how much data is to arrive.  However, if
 204      EXACT is set, then toread==0 means what it says: that no data
 205      should be read.  */
 206   while (!exact || (*amount_read < toread))
 207     {
 208       int rdsize = exact ? MIN (toread - *amount_read, dlbufsize) : dlbufsize;
 209       double tmout = opt.read_timeout;
 210       if (progress_interactive)
 211         {
 212           double waittm;
 213           /* For interactive progress gauges, always specify a ~1s
 214              timeout, so that the gauge can be updated regularly even
 215              when the data arrives very slowly or stalls.  */
 216           tmout = 0.95;
 217           waittm = (wtimer_read (timer) - last_successful_read_tm) / 1000;
 218           if (waittm + tmout > opt.read_timeout)
 219             {
 220               /* Don't allow waiting time to exceed read timeout. */
 221               tmout = opt.read_timeout - waittm;
 222               if (tmout < 0)
 223                 {
 224                   /* We've already exceeded the timeout. */
 225                   ret = -1, errno = ETIMEDOUT;
 226                   break;
 227                 }
 228             }
 229         }
 230       ret = fd_read (fd, dlbuf, rdsize, tmout);
 231
 232       if (ret == 0 || (ret < 0 && errno != ETIMEDOUT))
 233         break;
 234       else if (ret < 0)
 235         ret = 0;                /* timeout */
 236
 237       if (progress || opt.limit_rate)
 238         {
 239           wtimer_update (timer);
 240           if (ret > 0)
 241             last_successful_read_tm = wtimer_read (timer);
 242         }
 243
 244       if (ret > 0 && out != NULL)
 245         {
 246           fwrite (dlbuf, 1, ret, out);
 247           /* Immediately flush the downloaded data.  This should not
 248              hinder performance: fast downloads will arrive in large
 249              16K chunks (which stdio would write out anyway), and slow
 250              downloads wouldn't be limited by disk speed.  */
 251           fflush (out);
 252           if (ferror (out))
 253             {
 254               ret = -2;
 255               goto out;
 256             }
 257         }
 258
 259       if (opt.limit_rate)
 260         limit_bandwidth (ret, timer);
 261
 262       *amount_read += ret;
 263       if (progress)
 264         progress_update (progress, ret, wtimer_read (timer));
 265 #ifdef WINDOWS
 266       if (toread > 0)
 267         ws_percenttitle (100.0 *
 268                          (startpos + *amount_read) / (startpos + toread));
 269 #endif
 270     }
 271   if (ret < -1)
 272     ret = -1;
 273
 274  out:
 275   if (progress)
 276     progress_finish (progress, wtimer_read (timer));
 277   if (elapsed)
 278     *elapsed = wtimer_read (timer);
 279   if (timer)
 280     wtimer_delete (timer);
 281
 282   return ret;
 283 }
 284 \f
 285 /* Read a hunk of data from FD, up until a terminator.  The terminator
 286    is whatever the TERMINATOR function determines it to be; for
 287    example, it can be a line of data, or the head of an HTTP response.
 288    The function returns the data read allocated with malloc.
 289
 290    In case of error, NULL is returned.  In case of EOF and no data
 291    read, NULL is returned and errno set to 0.  In case of EOF with
 292    data having been read, the data is returned, but it will
 293    (obviously) not contain the terminator.
 294
 295    The idea is to be able to read a line of input, or otherwise a hunk
 296    of text, such as the head of an HTTP request, without crossing the
 297    boundary, so that the next call to fd_read etc. reads the data
 298    after the hunk.  To achieve that, this function does the following:
 299
 300    1. Peek at available data.
 301
 302    2. Determine whether the peeked data, along with the previously
 303       read data, includes the terminator.
 304
 305       2a. If yes, read the data until the end of the terminator, and
 306           exit.
 307
 308       2b. If no, read the peeked data and goto 1.
 309
 310    The function is careful to assume as little as possible about the
 311    implementation of peeking.  For example, every peek is followed by
 312    a read.  If the read returns a different amount of data, the
 313    process is retried until all data arrives safely.
 314
 315    BUFSIZE is the size of the initial buffer expected to read all the
 316    data in the typical case.
 317
 318    This function should be used as a building block for other
 319    functions -- see fd_read_line as a simple example.  */
 320
 321 char *
 322 fd_read_hunk (int fd, hunk_terminator_t hunk_terminator, int bufsize)
 323 {
 324   char *hunk = xmalloc (bufsize);
 325   int tail = 0;                 /* tail position in HUNK */
 326
 327   while (1)
 328     {
 329       const char *end;
 330       int pklen, rdlen, remain;
 331
 332       /* First, peek at the available data. */
 333
 334       pklen = fd_peek (fd, hunk + tail, bufsize - 1 - tail, -1);
 335       if (pklen < 0)
 336         {
 337           xfree (hunk);
 338           return NULL;
 339         }
 340       end = hunk_terminator (hunk, tail, pklen);
 341       if (end)
 342         {
 343           /* The data contains the terminator: we'll drain the data up
 344              to the end of the terminator.  */
 345           remain = end - (hunk + tail);
 346           if (remain == 0)
 347             {
 348               /* No more data needs to be read. */
 349               hunk[tail] = '\0';
 350               return hunk;
 351             }
 352           if (bufsize - 1 < tail + remain)
 353             {
 354               bufsize = tail + remain + 1;
 355               hunk = xrealloc (hunk, bufsize);
 356             }
 357         }
 358       else
 359         /* No terminator: simply read the data we know is (or should
 360            be) available.  */
 361         remain = pklen;
 362
 363       /* Now, read the data.  Note that we make no assumptions about
 364          how much data we'll get.  (Some TCP stacks are notorious for
 365          read returning less data than the previous MSG_PEEK.)  */
 366
 367       rdlen = fd_read (fd, hunk + tail, remain, 0);
 368       if (rdlen < 0)
 369         {
 370           xfree_null (hunk);
 371           return NULL;
 372         }
 373       tail += rdlen;
 374       hunk[tail] = '\0';
 375
 376       if (rdlen == 0)
 377         {
 378           if (tail == 0)
 379             {
 380               /* EOF without anything having been read */
 381               xfree (hunk);
 382               errno = 0;
 383               return NULL;
 384             }
 385           else
 386             /* EOF seen: return the data we've read. */
 387             return hunk;
 388         }
 389       if (end && rdlen == remain)
 390         /* The terminator was seen and the remaining data drained --
 391            we got what we came for.  */
 392         return hunk;
 393
 394       /* Keep looping until all the data arrives. */
 395
 396       if (tail == bufsize - 1)
 397         {
 398           bufsize <<= 1;
 399           hunk = xrealloc (hunk, bufsize);
 400         }
 401     }
 402 }
 403
 404 static const char *
 405 line_terminator (const char *hunk, int oldlen, int peeklen)
 406 {
 407   const char *p = memchr (hunk + oldlen, '\n', peeklen);
 408   if (p)
 409     /* p+1 because we want the line to include '\n' */
 410     return p + 1;
 411   return NULL;
 412 }
 413
 414 /* Read one line from FD and return it.  The line is allocated using
 415    malloc.
 416
 417    If an error occurs, or if no data can be read, NULL is returned.
 418    In the former case errno indicates the error condition, and in the
 419    latter case, errno is NULL.  */
 420
 421 char *
 422 fd_read_line (int fd)
 423 {
 424   return fd_read_hunk (fd, line_terminator, 128);
 425 }
 426 \f
 427 /* Return a printed representation of the download rate, as
 428    appropriate for the speed.  If PAD is non-zero, strings will be
 429    padded to the width of 7 characters (xxxx.xx).  */
 430 char *
 431 retr_rate (long bytes, double msecs, int pad)
 432 {
 433   static char res[20];
 434   static char *rate_names[] = {"B/s", "KB/s", "MB/s", "GB/s" };
 435   int units = 0;
 436
 437   double dlrate = calc_rate (bytes, msecs, &units);
 438   sprintf (res, pad ? "%7.2f %s" : "%.2f %s", dlrate, rate_names[units]);
 439
 440   return res;
 441 }
 442
 443 /* Calculate the download rate and trim it as appropriate for the
 444    speed.  Appropriate means that if rate is greater than 1K/s,
 445    kilobytes are used, and if rate is greater than 1MB/s, megabytes
 446    are used.
 447
 448    UNITS is zero for B/s, one for KB/s, two for MB/s, and three for
 449    GB/s.  */
 450 double
 451 calc_rate (long bytes, double msecs, int *units)
 452 {
 453   double dlrate;
 454
 455   assert (msecs >= 0);
 456   assert (bytes >= 0);
 457
 458   if (msecs == 0)
 459     /* If elapsed time is exactly zero, it means we're under the
 460        granularity of the timer.  This often happens on systems that
 461        use time() for the timer.  */
 462     msecs = wtimer_granularity ();
 463
 464   dlrate = (double)1000 * bytes / msecs;
 465   if (dlrate < 1024.0)
 466     *units = 0;
 467   else if (dlrate < 1024.0 * 1024.0)
 468     *units = 1, dlrate /= 1024.0;
 469   else if (dlrate < 1024.0 * 1024.0 * 1024.0)
 470     *units = 2, dlrate /= (1024.0 * 1024.0);
 471   else
 472     /* Maybe someone will need this, one day. */
 473     *units = 3, dlrate /= (1024.0 * 1024.0 * 1024.0);
 474
 475   return dlrate;
 476 }
 477 \f
 478 /* Maximum number of allowed redirections.  20 was chosen as a
 479    "reasonable" value, which is low enough to not cause havoc, yet
 480    high enough to guarantee that normal retrievals will not be hurt by
 481    the check.  */
 482
 483 #define MAX_REDIRECTIONS 20
 484
 485 #define SUSPEND_POST_DATA do {                  \
 486   post_data_suspended = 1;                      \
 487   saved_post_data = opt.post_data;              \
 488   saved_post_file_name = opt.post_file_name;    \
 489   opt.post_data = NULL;                         \
 490   opt.post_file_name = NULL;                    \
 491 } while (0)
 492
 493 #define RESTORE_POST_DATA do {                          \
 494   if (post_data_suspended)                              \
 495     {                                                   \
 496       opt.post_data = saved_post_data;                  \
 497       opt.post_file_name = saved_post_file_name;        \
 498       post_data_suspended = 0;                          \
 499     }                                                   \
 500 } while (0)
 501
 502 static char *getproxy PARAMS ((struct url *));
 503
 504 /* Retrieve the given URL.  Decides which loop to call -- HTTP, FTP,
 505    FTP, proxy, etc.  */
 506
 507 /* #### This function should be rewritten so it doesn't return from
 508    multiple points. */
 509
 510 uerr_t
 511 retrieve_url (const char *origurl, char **file, char **newloc,
 512               const char *refurl, int *dt)
 513 {
 514   uerr_t result;
 515   char *url;
 516   int location_changed, dummy;
 517   char *mynewloc, *proxy;
 518   struct url *u, *proxy_url;
 519   int up_error_code;            /* url parse error code */
 520   char *local_file;
 521   int redirection_count = 0;
 522
 523   int post_data_suspended = 0;
 524   char *saved_post_data = NULL;
 525   char *saved_post_file_name = NULL;
 526
 527   /* If dt is NULL, use local storage.  */
 528   if (!dt)
 529     {
 530       dt = &dummy;
 531       dummy = 0;
 532     }
 533   url = xstrdup (origurl);
 534   if (newloc)
 535     *newloc = NULL;
 536   if (file)
 537     *file = NULL;
 538
 539   u = url_parse (url, &up_error_code);
 540   if (!u)
 541     {
 542       logprintf (LOG_NOTQUIET, "%s: %s.\n", url, url_error (up_error_code));
 543       xfree (url);
 544       return URLERROR;
 545     }
 546
 547   if (!refurl)
 548     refurl = opt.referer;
 549
 550  redirected:
 551
 552   result = NOCONERROR;
 553   mynewloc = NULL;
 554   local_file = NULL;
 555   proxy_url = NULL;
 556
 557   proxy = getproxy (u);
 558   if (proxy)
 559     {
 560       /* Parse the proxy URL.  */
 561       proxy_url = url_parse (proxy, &up_error_code);
 562       if (!proxy_url)
 563         {
 564           logprintf (LOG_NOTQUIET, _("Error parsing proxy URL %s: %s.\n"),
 565                      proxy, url_error (up_error_code));
 566           xfree (url);
 567           RESTORE_POST_DATA;
 568           return PROXERR;
 569         }
 570       if (proxy_url->scheme != SCHEME_HTTP && proxy_url->scheme != u->scheme)
 571         {
 572           logprintf (LOG_NOTQUIET, _("Error in proxy URL %s: Must be HTTP.\n"), proxy);
 573           url_free (proxy_url);
 574           xfree (url);
 575           RESTORE_POST_DATA;
 576           return PROXERR;
 577         }
 578     }
 579
 580   if (u->scheme == SCHEME_HTTP
 581 #ifdef HAVE_SSL
 582       || u->scheme == SCHEME_HTTPS
 583 #endif
 584       || (proxy_url && proxy_url->scheme == SCHEME_HTTP))
 585     {
 586       result = http_loop (u, &mynewloc, &local_file, refurl, dt, proxy_url);
 587     }
 588   else if (u->scheme == SCHEME_FTP)
 589     {
 590       /* If this is a redirection, we must not allow recursive FTP
 591          retrieval, so we save recursion to oldrec, and restore it
 592          later.  */
 593       int oldrec = opt.recursive;
 594       if (redirection_count)
 595         opt.recursive = 0;
 596       result = ftp_loop (u, dt, proxy_url);
 597       opt.recursive = oldrec;
 598
 599       /* There is a possibility of having HTTP being redirected to
 600          FTP.  In these cases we must decide whether the text is HTML
 601          according to the suffix.  The HTML suffixes are `.html',
 602          `.htm' and a few others, case-insensitive.  */
 603       if (redirection_count && local_file && u->scheme == SCHEME_FTP)
 604         {
 605           if (has_html_suffix_p (local_file))
 606             *dt |= TEXTHTML;
 607         }
 608     }
 609
 610   if (proxy_url)
 611     {
 612       url_free (proxy_url);
 613       proxy_url = NULL;
 614     }
 615
 616   location_changed = (result == NEWLOCATION);
 617   if (location_changed)
 618     {
 619       char *construced_newloc;
 620       struct url *newloc_parsed;
 621
 622       assert (mynewloc != NULL);
 623
 624       if (local_file)
 625         xfree (local_file);
 626
 627       /* The HTTP specs only allow absolute URLs to appear in
 628          redirects, but a ton of boneheaded webservers and CGIs out
 629          there break the rules and use relative URLs, and popular
 630          browsers are lenient about this, so wget should be too. */
 631       construced_newloc = uri_merge (url, mynewloc);
 632       xfree (mynewloc);
 633       mynewloc = construced_newloc;
 634
 635       /* Now, see if this new location makes sense. */
 636       newloc_parsed = url_parse (mynewloc, &up_error_code);
 637       if (!newloc_parsed)
 638         {
 639           logprintf (LOG_NOTQUIET, "%s: %s.\n", mynewloc,
 640                      url_error (up_error_code));
 641           url_free (u);
 642           xfree (url);
 643           xfree (mynewloc);
 644           RESTORE_POST_DATA;
 645           return result;
 646         }
 647
 648       /* Now mynewloc will become newloc_parsed->url, because if the
 649          Location contained relative paths like .././something, we
 650          don't want that propagating as url.  */
 651       xfree (mynewloc);
 652       mynewloc = xstrdup (newloc_parsed->url);
 653
 654       /* Check for max. number of redirections.  */
 655       if (++redirection_count > MAX_REDIRECTIONS)
 656         {
 657           logprintf (LOG_NOTQUIET, _("%d redirections exceeded.\n"),
 658                      MAX_REDIRECTIONS);
 659           url_free (newloc_parsed);
 660           url_free (u);
 661           xfree (url);
 662           xfree (mynewloc);
 663           RESTORE_POST_DATA;
 664           return WRONGCODE;
 665         }
 666
 667       xfree (url);
 668       url = mynewloc;
 669       url_free (u);
 670       u = newloc_parsed;
 671
 672       /* If we're being redirected from POST, we don't want to POST
 673          again.  Many requests answer POST with a redirection to an
 674          index page; that redirection is clearly a GET.  We "suspend"
 675          POST data for the duration of the redirections, and restore
 676          it when we're done. */
 677       if (!post_data_suspended)
 678         SUSPEND_POST_DATA;
 679
 680       goto redirected;
 681     }
 682
 683   if (local_file)
 684     {
 685       if (*dt & RETROKF)
 686         {
 687           register_download (u->url, local_file);
 688           if (redirection_count && 0 != strcmp (origurl, u->url))
 689             register_redirection (origurl, u->url);
 690           if (*dt & TEXTHTML)
 691             register_html (u->url, local_file);
 692         }
 693     }
 694
 695   if (file)
 696     *file = local_file ? local_file : NULL;
 697   else
 698     xfree_null (local_file);
 699
 700   url_free (u);
 701
 702   if (redirection_count)
 703     {
 704       if (newloc)
 705         *newloc = url;
 706       else
 707         xfree (url);
 708     }
 709   else
 710     {
 711       if (newloc)
 712         *newloc = NULL;
 713       xfree (url);
 714     }
 715
 716   ++global_download_count;
 717   RESTORE_POST_DATA;
 718
 719   return result;
 720 }
 721
 722 /* Find the URLs in the file and call retrieve_url() for each of
 723    them.  If HTML is non-zero, treat the file as HTML, and construct
 724    the URLs accordingly.
 725
 726    If opt.recursive is set, call retrieve_tree() for each file.  */
 727
 728 uerr_t
 729 retrieve_from_file (const char *file, int html, int *count)
 730 {
 731   uerr_t status;
 732   struct urlpos *url_list, *cur_url;
 733
 734   url_list = (html ? get_urls_html (file, NULL, NULL)
 735               : get_urls_file (file));
 736   status = RETROK;             /* Suppose everything is OK.  */
 737   *count = 0;                  /* Reset the URL count.  */
 738
 739   for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count)
 740     {
 741       char *filename = NULL, *new_file = NULL;
 742       int dt;
 743
 744       if (cur_url->ignore_when_downloading)
 745         continue;
 746
 747       if (opt.quota && total_downloaded_bytes > opt.quota)
 748         {
 749           status = QUOTEXC;
 750           break;
 751         }
 752       if ((opt.recursive || opt.page_requisites)
 753           && cur_url->url->scheme != SCHEME_FTP)
 754         status = retrieve_tree (cur_url->url->url);
 755       else
 756         status = retrieve_url (cur_url->url->url, &filename, &new_file, NULL, &dt);
 757
 758       if (filename && opt.delete_after && file_exists_p (filename))
 759         {
 760           DEBUGP (("Removing file due to --delete-after in"
 761                    " retrieve_from_file():\n"));
 762           logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
 763           if (unlink (filename))
 764             logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
 765           dt &= ~RETROKF;
 766         }
 767
 768       xfree_null (new_file);
 769       xfree_null (filename);
 770     }
 771
 772   /* Free the linked list of URL-s.  */
 773   free_urlpos (url_list);
 774
 775   return status;
 776 }
 777
 778 /* Print `giving up', or `retrying', depending on the impending
 779    action.  N1 and N2 are the attempt number and the attempt limit.  */
 780 void
 781 printwhat (int n1, int n2)
 782 {
 783   logputs (LOG_VERBOSE, (n1 == n2) ? _("Giving up.\n\n") : _("Retrying.\n\n"));
 784 }
 785
 786 /* If opt.wait or opt.waitretry are specified, and if certain
 787    conditions are met, sleep the appropriate number of seconds.  See
 788    the documentation of --wait and --waitretry for more information.
 789
 790    COUNT is the count of current retrieval, beginning with 1. */
 791
 792 void
 793 sleep_between_retrievals (int count)
 794 {
 795   static int first_retrieval = 1;
 796
 797   if (first_retrieval)
 798     {
 799       /* Don't sleep before the very first retrieval. */
 800       first_retrieval = 0;
 801       return;
 802     }
 803
 804   if (opt.waitretry && count > 1)
 805     {
 806       /* If opt.waitretry is specified and this is a retry, wait for
 807          COUNT-1 number of seconds, or for opt.waitretry seconds.  */
 808       if (count <= opt.waitretry)
 809         xsleep (count - 1);
 810       else
 811         xsleep (opt.waitretry);
 812     }
 813   else if (opt.wait)
 814     {
 815       if (!opt.random_wait || count > 1)
 816         /* If random-wait is not specified, or if we are sleeping
 817            between retries of the same download, sleep the fixed
 818            interval.  */
 819         xsleep (opt.wait);
 820       else
 821         {
 822           /* Sleep a random amount of time averaging in opt.wait
 823              seconds.  The sleeping amount ranges from 0 to
 824              opt.wait*2, inclusive.  */
 825           double waitsecs = 2 * opt.wait * random_float ();
 826           DEBUGP (("sleep_between_retrievals: avg=%f,sleep=%f\n",
 827                    opt.wait, waitsecs));
 828           xsleep (waitsecs);
 829         }
 830     }
 831 }
 832
 833 /* Free the linked list of urlpos.  */
 834 void
 835 free_urlpos (struct urlpos *l)
 836 {
 837   while (l)
 838     {
 839       struct urlpos *next = l->next;
 840       if (l->url)
 841         url_free (l->url);
 842       xfree_null (l->local_name);
 843       xfree (l);
 844       l = next;
 845     }
 846 }
 847
 848 /* Rotate FNAME opt.backups times */
 849 void
 850 rotate_backups(const char *fname)
 851 {
 852   int maxlen = strlen (fname) + 1 + numdigit (opt.backups) + 1;
 853   char *from = (char *)alloca (maxlen);
 854   char *to = (char *)alloca (maxlen);
 855   struct stat sb;
 856   int i;
 857
 858   if (stat (fname, &sb) == 0)
 859     if (S_ISREG (sb.st_mode) == 0)
 860       return;
 861
 862   for (i = opt.backups; i > 1; i--)
 863     {
 864       sprintf (from, "%s.%d", fname, i - 1);
 865       sprintf (to, "%s.%d", fname, i);
 866       rename (from, to);
 867     }
 868
 869   sprintf (to, "%s.%d", fname, 1);
 870   rename(fname, to);
 871 }
 872
 873 static int no_proxy_match PARAMS ((const char *, const char **));
 874
 875 /* Return the URL of the proxy appropriate for url U.  */
 876
 877 static char *
 878 getproxy (struct url *u)
 879 {
 880   char *proxy = NULL;
 881   char *rewritten_url;
 882   static char rewritten_storage[1024];
 883
 884   if (!opt.use_proxy)
 885     return NULL;
 886   if (!no_proxy_match (u->host, (const char **)opt.no_proxy))
 887     return NULL;
 888
 889   switch (u->scheme)
 890     {
 891     case SCHEME_HTTP:
 892       proxy = opt.http_proxy ? opt.http_proxy : getenv ("http_proxy");
 893       break;
 894 #ifdef HAVE_SSL
 895     case SCHEME_HTTPS:
 896       proxy = opt.https_proxy ? opt.https_proxy : getenv ("https_proxy");
 897       break;
 898 #endif
 899     case SCHEME_FTP:
 900       proxy = opt.ftp_proxy ? opt.ftp_proxy : getenv ("ftp_proxy");
 901       break;
 902     case SCHEME_INVALID:
 903       break;
 904     }
 905   if (!proxy || !*proxy)
 906     return NULL;
 907
 908   /* Handle shorthands.  `rewritten_storage' is a kludge to allow
 909      getproxy() to return static storage. */
 910   rewritten_url = rewrite_shorthand_url (proxy);
 911   if (rewritten_url)
 912     {
 913       strncpy (rewritten_storage, rewritten_url, sizeof (rewritten_storage));
 914       rewritten_storage[sizeof (rewritten_storage) - 1] = '\0';
 915       proxy = rewritten_storage;
 916     }
 917
 918   return proxy;
 919 }
 920
 921 /* Should a host be accessed through proxy, concerning no_proxy?  */
 922 int
 923 no_proxy_match (const char *host, const char **no_proxy)
 924 {
 925   if (!no_proxy)
 926     return 1;
 927   else
 928     return !sufmatch (no_proxy, host);
 929 }