sjero.net Git - wget/blob - src/retr.c

   1 /* File retrieval.
   2    Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001 Free Software Foundation, Inc.
   3
   4 This file is part of GNU Wget.
   5
   6 GNU Wget is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 2 of the License, or (at
   9 your option) any later version.
  10
  11 GNU Wget is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with Wget; if not, write to the Free Software
  18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  19
  20 In addition, as a special exception, the Free Software Foundation
  21 gives permission to link the code of its release of Wget with the
  22 OpenSSL project's "OpenSSL" library (or with modified versions of it
  23 that use the same license as the "OpenSSL" library), and distribute
  24 the linked executables.  You must obey the GNU General Public License
  25 in all respects for all of the code used other than "OpenSSL".  If you
  26 modify this file, you may extend this exception to your version of the
  27 file, but you are not obligated to do so.  If you do not wish to do
  28 so, delete this exception statement from your version.  */
  29
  30 #include <config.h>
  31
  32 #include <stdio.h>
  33 #include <stdlib.h>
  34 #include <sys/types.h>
  35 #ifdef HAVE_UNISTD_H
  36 # include <unistd.h>
  37 #endif /* HAVE_UNISTD_H */
  38 #include <errno.h>
  39 #ifdef HAVE_STRING_H
  40 # include <string.h>
  41 #else
  42 # include <strings.h>
  43 #endif /* HAVE_STRING_H */
  44 #include <assert.h>
  45
  46 #include "wget.h"
  47 #include "utils.h"
  48 #include "retr.h"
  49 #include "progress.h"
  50 #include "url.h"
  51 #include "recur.h"
  52 #include "ftp.h"
  53 #include "host.h"
  54 #include "connect.h"
  55 #include "hash.h"
  56 #include "convert.h"
  57
  58 #ifdef HAVE_SSL
  59 # include "gen_sslfunc.h"       /* for ssl_iread */
  60 #endif
  61
  62 #ifndef errno
  63 extern int errno;
  64 #endif
  65
  66 /* See the comment in gethttp() why this is needed. */
  67 int global_download_count;
  68
  69 /* Total size of downloaded files.  Used to enforce quota.  */
  70 LARGE_INT total_downloaded_bytes;
  71
  72 \f
  73 static struct {
  74   long chunk_bytes;
  75   double chunk_start;
  76   double sleep_adjust;
  77 } limit_data;
  78
  79 static void
  80 limit_bandwidth_reset (void)
  81 {
  82   limit_data.chunk_bytes = 0;
  83   limit_data.chunk_start = 0;
  84 }
  85
  86 /* Limit the bandwidth by pausing the download for an amount of time.
  87    BYTES is the number of bytes received from the network, and TIMER
  88    is the timer that started at the beginning of download.  */
  89
  90 static void
  91 limit_bandwidth (long bytes, struct wget_timer *timer)
  92 {
  93   double delta_t = wtimer_read (timer) - limit_data.chunk_start;
  94   double expected;
  95
  96   limit_data.chunk_bytes += bytes;
  97
  98   /* Calculate the amount of time we expect downloading the chunk
  99      should take.  If in reality it took less time, sleep to
 100      compensate for the difference.  */
 101   expected = 1000.0 * limit_data.chunk_bytes / opt.limit_rate;
 102
 103   if (expected > delta_t)
 104     {
 105       double slp = expected - delta_t + limit_data.sleep_adjust;
 106       double t0, t1;
 107       if (slp < 200)
 108         {
 109           DEBUGP (("deferring a %.2f ms sleep (%ld/%.2f).\n",
 110                    slp, limit_data.chunk_bytes, delta_t));
 111           return;
 112         }
 113       DEBUGP (("\nsleeping %.2f ms for %ld bytes, adjust %.2f ms\n",
 114                slp, limit_data.chunk_bytes, limit_data.sleep_adjust));
 115
 116       t0 = wtimer_read (timer);
 117       xsleep (slp / 1000);
 118       wtimer_update (timer);
 119       t1 = wtimer_read (timer);
 120
 121       /* Due to scheduling, we probably slept slightly longer (or
 122          shorter) than desired.  Calculate the difference between the
 123          desired and the actual sleep, and adjust the next sleep by
 124          that amount.  */
 125       limit_data.sleep_adjust = slp - (t1 - t0);
 126     }
 127
 128   limit_data.chunk_bytes = 0;
 129   limit_data.chunk_start = wtimer_read (timer);
 130 }
 131
 132 #ifndef MIN
 133 # define MIN(i, j) ((i) <= (j) ? (i) : (j))
 134 #endif
 135
 136 /* Reads the contents of file descriptor FD, until it is closed, or a
 137    read error occurs.  The data is read in 8K chunks, and stored to
 138    stream fp, which should have been open for writing.
 139
 140    The EXPECTED argument is passed to show_progress() unchanged, but
 141    otherwise ignored.
 142
 143    If opt.verbose is set, the progress is also shown.  RESTVAL
 144    represents a value from which to start downloading (which will be
 145    shown accordingly).  If RESTVAL is non-zero, the stream should have
 146    been open for appending.
 147
 148    The function exits and returns codes of 0, -1 and -2 if the
 149    connection was closed, there was a read error, or if it could not
 150    write to the output stream, respectively.  */
 151
 152 int
 153 fd_read_body (int fd, FILE *out, long *len, long restval, long expected,
 154               int use_expected, double *elapsed)
 155 {
 156   int res = 0;
 157
 158   static char dlbuf[16384];
 159   int dlbufsize = sizeof (dlbuf);
 160
 161   struct wget_timer *timer = wtimer_allocate ();
 162   double last_successful_read_tm;
 163
 164   /* The progress gauge, set according to the user preferences. */
 165   void *progress = NULL;
 166
 167   /* Non-zero if the progress gauge is interactive, i.e. if it can
 168      continually update the display.  When true, smaller timeout
 169      values are used so that the gauge can update the display when
 170      data arrives slowly. */
 171   int progress_interactive = 0;
 172
 173   *len = restval;
 174
 175   if (opt.verbose)
 176     {
 177       progress = progress_create (restval, expected);
 178       progress_interactive = progress_interactive_p (progress);
 179     }
 180
 181   if (opt.limit_rate)
 182     limit_bandwidth_reset ();
 183   wtimer_reset (timer);
 184   last_successful_read_tm = 0;
 185
 186   /* Use a smaller buffer for low requested bandwidths.  For example,
 187      with --limit-rate=2k, it doesn't make sense to slurp in 16K of
 188      data and then sleep for 8s.  With buffer size equal to the limit,
 189      we never have to sleep for more than one second.  */
 190   if (opt.limit_rate && opt.limit_rate < dlbufsize)
 191     dlbufsize = opt.limit_rate;
 192
 193   /* Read from fd while there is available data.
 194
 195      Normally, if expected is 0, it means that it is not known how
 196      much data is expected.  However, if use_expected is specified,
 197      then expected being zero means exactly that.  */
 198   while (!use_expected || (*len < expected))
 199     {
 200       int amount_to_read = (use_expected
 201                             ? MIN (expected - *len, dlbufsize) : dlbufsize);
 202       double tmout = opt.read_timeout;
 203       if (progress_interactive)
 204         {
 205           double waittm;
 206           /* For interactive progress gauges, always specify a ~1s
 207              timeout, so that the gauge can be updated regularly even
 208              when the data arrives very slowly or stalls.  */
 209           tmout = 0.95;
 210           waittm = (wtimer_read (timer) - last_successful_read_tm) / 1000;
 211           if (waittm + tmout > opt.read_timeout)
 212             {
 213               /* Don't allow waiting time to exceed read timeout. */
 214               tmout = opt.read_timeout - waittm;
 215               if (tmout < 0)
 216                 {
 217                   /* We've already exceeded the timeout. */
 218                   res = -1, errno = ETIMEDOUT;
 219                   break;
 220                 }
 221             }
 222         }
 223       res = fd_read (fd, dlbuf, amount_to_read, tmout);
 224
 225       if (res == 0 || (res < 0 && errno != ETIMEDOUT))
 226         break;
 227       else if (res < 0)
 228         res = 0;                /* timeout */
 229
 230       wtimer_update (timer);
 231       if (res > 0)
 232         {
 233           fwrite (dlbuf, 1, res, out);
 234           /* Always flush the contents of the network packet.  This
 235              should not hinder performance: fast downloads will be
 236              received in 16K chunks (which stdio would write out
 237              anyway), and slow downloads won't be limited by disk
 238              performance.  */
 239           fflush (out);
 240           if (ferror (out))
 241             {
 242               res = -2;
 243               goto out;
 244             }
 245           last_successful_read_tm = wtimer_read (timer);
 246         }
 247
 248       if (opt.limit_rate)
 249         limit_bandwidth (res, timer);
 250
 251       *len += res;
 252       if (progress)
 253         progress_update (progress, res, wtimer_read (timer));
 254 #ifdef WINDOWS
 255       if (use_expected && expected > 0)
 256         ws_percenttitle (100.0 * (double)(*len) / (double)expected);
 257 #endif
 258     }
 259   if (res < -1)
 260     res = -1;
 261
 262  out:
 263   if (progress)
 264     progress_finish (progress, wtimer_read (timer));
 265   if (elapsed)
 266     *elapsed = wtimer_read (timer);
 267   wtimer_delete (timer);
 268
 269   return res;
 270 }
 271 \f
 272 /* Read a hunk of data from FD, up until a terminator.  The terminator
 273    is whatever the TERMINATOR function determines it to be; for
 274    example, it can be a line of data, or the head of an HTTP response.
 275    The function returns the data read allocated with malloc.
 276
 277    In case of error, NULL is returned.  In case of EOF and no data
 278    read, NULL is returned and errno set to 0.  In case of EOF with
 279    data having been read, the data is returned, but it will
 280    (obviously) not contain the terminator.
 281
 282    The idea is to be able to read a line of input, or otherwise a hunk
 283    of text, such as the head of an HTTP request, without crossing the
 284    boundary, so that the next call to fd_read etc. reads the data
 285    after the hunk.  To achieve that, this function does the following:
 286
 287    1. Peek at available data.
 288
 289    2. Determine whether the peeked data, along with the previously
 290       read data, includes the terminator.
 291
 292       2a. If yes, read the data until the end of the terminator, and
 293           exit.
 294
 295       2b. If no, read the peeked data and goto 1.
 296
 297    The function is careful to assume as little as possible about the
 298    implementation of peeking.  For example, every peek is followed by
 299    a read.  If the read returns a different amount of data, the
 300    process is retried until all data arrives safely.
 301
 302    BUFSIZE is the size of the initial buffer expected to read all the
 303    data in the typical case.
 304
 305    This function should be used as a building block for other
 306    functions -- see fd_read_line as a simple example.  */
 307
 308 char *
 309 fd_read_hunk (int fd, hunk_terminator_t hunk_terminator, int bufsize)
 310 {
 311   char *hunk = xmalloc (bufsize);
 312   int tail = 0;                 /* tail position in HUNK */
 313
 314   while (1)
 315     {
 316       const char *end;
 317       int pklen, rdlen, remain;
 318
 319       /* First, peek at the available data. */
 320
 321       pklen = fd_peek (fd, hunk + tail, bufsize - 1 - tail, -1);
 322       if (pklen < 0)
 323         {
 324           xfree (hunk);
 325           return NULL;
 326         }
 327       end = hunk_terminator (hunk, tail, pklen);
 328       if (end)
 329         {
 330           /* The data contains the terminator: we'll drain the data up
 331              to the end of the terminator.  */
 332           remain = end - (hunk + tail);
 333           if (remain == 0)
 334             {
 335               /* No more data needs to be read. */
 336               hunk[tail] = '\0';
 337               return hunk;
 338             }
 339           if (bufsize - 1 < tail + remain)
 340             {
 341               bufsize = tail + remain + 1;
 342               hunk = xrealloc (hunk, bufsize);
 343             }
 344         }
 345       else
 346         /* No terminator: simply read the data we know is (or should
 347            be) available.  */
 348         remain = pklen;
 349
 350       /* Now, read the data.  Note that we make no assumptions about
 351          how much data we'll get.  (Some TCP stacks are notorious for
 352          read returning less data than the previous MSG_PEEK.)  */
 353
 354       rdlen = fd_read (fd, hunk + tail, remain, 0);
 355       if (rdlen < 0)
 356         {
 357           xfree_null (hunk);
 358           return NULL;
 359         }
 360       tail += rdlen;
 361       hunk[tail] = '\0';
 362
 363       if (rdlen == 0)
 364         {
 365           if (tail == 0)
 366             {
 367               /* EOF without anything having been read */
 368               xfree (hunk);
 369               errno = 0;
 370               return NULL;
 371             }
 372           else
 373             /* EOF seen: return the data we've read. */
 374             return hunk;
 375         }
 376       if (end && rdlen == remain)
 377         /* The terminator was seen and the remaining data drained --
 378            we got what we came for.  */
 379         return hunk;
 380
 381       /* Keep looping until all the data arrives. */
 382
 383       if (tail == bufsize - 1)
 384         {
 385           bufsize <<= 1;
 386           hunk = xrealloc (hunk, bufsize);
 387         }
 388     }
 389 }
 390
 391 static const char *
 392 line_terminator (const char *hunk, int oldlen, int peeklen)
 393 {
 394   const char *p = memchr (hunk + oldlen, '\n', peeklen);
 395   if (p)
 396     /* p+1 because we want the line to include '\n' */
 397     return p + 1;
 398   return NULL;
 399 }
 400
 401 /* Read one line from FD and return it.  The line is allocated using
 402    malloc.
 403
 404    If an error occurs, or if no data can be read, NULL is returned.
 405    In the former case errno indicates the error condition, and in the
 406    latter case, errno is NULL.  */
 407
 408 char *
 409 fd_read_line (int fd)
 410 {
 411   return fd_read_hunk (fd, line_terminator, 128);
 412 }
 413 \f
 414 /* Return a printed representation of the download rate, as
 415    appropriate for the speed.  If PAD is non-zero, strings will be
 416    padded to the width of 7 characters (xxxx.xx).  */
 417 char *
 418 retr_rate (long bytes, double msecs, int pad)
 419 {
 420   static char res[20];
 421   static char *rate_names[] = {"B/s", "KB/s", "MB/s", "GB/s" };
 422   int units = 0;
 423
 424   double dlrate = calc_rate (bytes, msecs, &units);
 425   sprintf (res, pad ? "%7.2f %s" : "%.2f %s", dlrate, rate_names[units]);
 426
 427   return res;
 428 }
 429
 430 /* Calculate the download rate and trim it as appropriate for the
 431    speed.  Appropriate means that if rate is greater than 1K/s,
 432    kilobytes are used, and if rate is greater than 1MB/s, megabytes
 433    are used.
 434
 435    UNITS is zero for B/s, one for KB/s, two for MB/s, and three for
 436    GB/s.  */
 437 double
 438 calc_rate (long bytes, double msecs, int *units)
 439 {
 440   double dlrate;
 441
 442   assert (msecs >= 0);
 443   assert (bytes >= 0);
 444
 445   if (msecs == 0)
 446     /* If elapsed time is exactly zero, it means we're under the
 447        granularity of the timer.  This often happens on systems that
 448        use time() for the timer.  */
 449     msecs = wtimer_granularity ();
 450
 451   dlrate = (double)1000 * bytes / msecs;
 452   if (dlrate < 1024.0)
 453     *units = 0;
 454   else if (dlrate < 1024.0 * 1024.0)
 455     *units = 1, dlrate /= 1024.0;
 456   else if (dlrate < 1024.0 * 1024.0 * 1024.0)
 457     *units = 2, dlrate /= (1024.0 * 1024.0);
 458   else
 459     /* Maybe someone will need this, one day. */
 460     *units = 3, dlrate /= (1024.0 * 1024.0 * 1024.0);
 461
 462   return dlrate;
 463 }
 464 \f
 465 /* Maximum number of allowed redirections.  20 was chosen as a
 466    "reasonable" value, which is low enough to not cause havoc, yet
 467    high enough to guarantee that normal retrievals will not be hurt by
 468    the check.  */
 469
 470 #define MAX_REDIRECTIONS 20
 471
 472 #define SUSPEND_POST_DATA do {                  \
 473   post_data_suspended = 1;                      \
 474   saved_post_data = opt.post_data;              \
 475   saved_post_file_name = opt.post_file_name;    \
 476   opt.post_data = NULL;                         \
 477   opt.post_file_name = NULL;                    \
 478 } while (0)
 479
 480 #define RESTORE_POST_DATA do {                          \
 481   if (post_data_suspended)                              \
 482     {                                                   \
 483       opt.post_data = saved_post_data;                  \
 484       opt.post_file_name = saved_post_file_name;        \
 485       post_data_suspended = 0;                          \
 486     }                                                   \
 487 } while (0)
 488
 489 static char *getproxy PARAMS ((struct url *));
 490
 491 /* Retrieve the given URL.  Decides which loop to call -- HTTP, FTP,
 492    FTP, proxy, etc.  */
 493
 494 /* #### This function should be rewritten so it doesn't return from
 495    multiple points. */
 496
 497 uerr_t
 498 retrieve_url (const char *origurl, char **file, char **newloc,
 499               const char *refurl, int *dt)
 500 {
 501   uerr_t result;
 502   char *url;
 503   int location_changed, dummy;
 504   char *mynewloc, *proxy;
 505   struct url *u, *proxy_url;
 506   int up_error_code;            /* url parse error code */
 507   char *local_file;
 508   int redirection_count = 0;
 509
 510   int post_data_suspended = 0;
 511   char *saved_post_data = NULL;
 512   char *saved_post_file_name = NULL;
 513
 514   /* If dt is NULL, use local storage.  */
 515   if (!dt)
 516     {
 517       dt = &dummy;
 518       dummy = 0;
 519     }
 520   url = xstrdup (origurl);
 521   if (newloc)
 522     *newloc = NULL;
 523   if (file)
 524     *file = NULL;
 525
 526   u = url_parse (url, &up_error_code);
 527   if (!u)
 528     {
 529       logprintf (LOG_NOTQUIET, "%s: %s.\n", url, url_error (up_error_code));
 530       xfree (url);
 531       return URLERROR;
 532     }
 533
 534   if (!refurl)
 535     refurl = opt.referer;
 536
 537  redirected:
 538
 539   result = NOCONERROR;
 540   mynewloc = NULL;
 541   local_file = NULL;
 542   proxy_url = NULL;
 543
 544   proxy = getproxy (u);
 545   if (proxy)
 546     {
 547       /* Parse the proxy URL.  */
 548       proxy_url = url_parse (proxy, &up_error_code);
 549       if (!proxy_url)
 550         {
 551           logprintf (LOG_NOTQUIET, _("Error parsing proxy URL %s: %s.\n"),
 552                      proxy, url_error (up_error_code));
 553           xfree (url);
 554           RESTORE_POST_DATA;
 555           return PROXERR;
 556         }
 557       if (proxy_url->scheme != SCHEME_HTTP && proxy_url->scheme != u->scheme)
 558         {
 559           logprintf (LOG_NOTQUIET, _("Error in proxy URL %s: Must be HTTP.\n"), proxy);
 560           url_free (proxy_url);
 561           xfree (url);
 562           RESTORE_POST_DATA;
 563           return PROXERR;
 564         }
 565     }
 566
 567   if (u->scheme == SCHEME_HTTP
 568 #ifdef HAVE_SSL
 569       || u->scheme == SCHEME_HTTPS
 570 #endif
 571       || (proxy_url && proxy_url->scheme == SCHEME_HTTP))
 572     {
 573       result = http_loop (u, &mynewloc, &local_file, refurl, dt, proxy_url);
 574     }
 575   else if (u->scheme == SCHEME_FTP)
 576     {
 577       /* If this is a redirection, we must not allow recursive FTP
 578          retrieval, so we save recursion to oldrec, and restore it
 579          later.  */
 580       int oldrec = opt.recursive;
 581       if (redirection_count)
 582         opt.recursive = 0;
 583       result = ftp_loop (u, dt, proxy_url);
 584       opt.recursive = oldrec;
 585
 586       /* There is a possibility of having HTTP being redirected to
 587          FTP.  In these cases we must decide whether the text is HTML
 588          according to the suffix.  The HTML suffixes are `.html',
 589          `.htm' and a few others, case-insensitive.  */
 590       if (redirection_count && local_file && u->scheme == SCHEME_FTP)
 591         {
 592           if (has_html_suffix_p (local_file))
 593             *dt |= TEXTHTML;
 594         }
 595     }
 596
 597   if (proxy_url)
 598     {
 599       url_free (proxy_url);
 600       proxy_url = NULL;
 601     }
 602
 603   location_changed = (result == NEWLOCATION);
 604   if (location_changed)
 605     {
 606       char *construced_newloc;
 607       struct url *newloc_parsed;
 608
 609       assert (mynewloc != NULL);
 610
 611       if (local_file)
 612         xfree (local_file);
 613
 614       /* The HTTP specs only allow absolute URLs to appear in
 615          redirects, but a ton of boneheaded webservers and CGIs out
 616          there break the rules and use relative URLs, and popular
 617          browsers are lenient about this, so wget should be too. */
 618       construced_newloc = uri_merge (url, mynewloc);
 619       xfree (mynewloc);
 620       mynewloc = construced_newloc;
 621
 622       /* Now, see if this new location makes sense. */
 623       newloc_parsed = url_parse (mynewloc, &up_error_code);
 624       if (!newloc_parsed)
 625         {
 626           logprintf (LOG_NOTQUIET, "%s: %s.\n", mynewloc,
 627                      url_error (up_error_code));
 628           url_free (u);
 629           xfree (url);
 630           xfree (mynewloc);
 631           RESTORE_POST_DATA;
 632           return result;
 633         }
 634
 635       /* Now mynewloc will become newloc_parsed->url, because if the
 636          Location contained relative paths like .././something, we
 637          don't want that propagating as url.  */
 638       xfree (mynewloc);
 639       mynewloc = xstrdup (newloc_parsed->url);
 640
 641       /* Check for max. number of redirections.  */
 642       if (++redirection_count > MAX_REDIRECTIONS)
 643         {
 644           logprintf (LOG_NOTQUIET, _("%d redirections exceeded.\n"),
 645                      MAX_REDIRECTIONS);
 646           url_free (newloc_parsed);
 647           url_free (u);
 648           xfree (url);
 649           xfree (mynewloc);
 650           RESTORE_POST_DATA;
 651           return WRONGCODE;
 652         }
 653
 654       xfree (url);
 655       url = mynewloc;
 656       url_free (u);
 657       u = newloc_parsed;
 658
 659       /* If we're being redirected from POST, we don't want to POST
 660          again.  Many requests answer POST with a redirection to an
 661          index page; that redirection is clearly a GET.  We "suspend"
 662          POST data for the duration of the redirections, and restore
 663          it when we're done. */
 664       if (!post_data_suspended)
 665         SUSPEND_POST_DATA;
 666
 667       goto redirected;
 668     }
 669
 670   if (local_file)
 671     {
 672       if (*dt & RETROKF)
 673         {
 674           register_download (u->url, local_file);
 675           if (redirection_count && 0 != strcmp (origurl, u->url))
 676             register_redirection (origurl, u->url);
 677           if (*dt & TEXTHTML)
 678             register_html (u->url, local_file);
 679         }
 680     }
 681
 682   if (file)
 683     *file = local_file ? local_file : NULL;
 684   else
 685     xfree_null (local_file);
 686
 687   url_free (u);
 688
 689   if (redirection_count)
 690     {
 691       if (newloc)
 692         *newloc = url;
 693       else
 694         xfree (url);
 695     }
 696   else
 697     {
 698       if (newloc)
 699         *newloc = NULL;
 700       xfree (url);
 701     }
 702
 703   ++global_download_count;
 704   RESTORE_POST_DATA;
 705
 706   return result;
 707 }
 708
 709 /* Find the URLs in the file and call retrieve_url() for each of
 710    them.  If HTML is non-zero, treat the file as HTML, and construct
 711    the URLs accordingly.
 712
 713    If opt.recursive is set, call retrieve_tree() for each file.  */
 714
 715 uerr_t
 716 retrieve_from_file (const char *file, int html, int *count)
 717 {
 718   uerr_t status;
 719   struct urlpos *url_list, *cur_url;
 720
 721   url_list = (html ? get_urls_html (file, NULL, NULL)
 722               : get_urls_file (file));
 723   status = RETROK;             /* Suppose everything is OK.  */
 724   *count = 0;                  /* Reset the URL count.  */
 725
 726   for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count)
 727     {
 728       char *filename = NULL, *new_file = NULL;
 729       int dt;
 730
 731       if (cur_url->ignore_when_downloading)
 732         continue;
 733
 734       if (opt.quota && total_downloaded_bytes > opt.quota)
 735         {
 736           status = QUOTEXC;
 737           break;
 738         }
 739       if ((opt.recursive || opt.page_requisites)
 740           && cur_url->url->scheme != SCHEME_FTP)
 741         status = retrieve_tree (cur_url->url->url);
 742       else
 743         status = retrieve_url (cur_url->url->url, &filename, &new_file, NULL, &dt);
 744
 745       if (filename && opt.delete_after && file_exists_p (filename))
 746         {
 747           DEBUGP (("Removing file due to --delete-after in"
 748                    " retrieve_from_file():\n"));
 749           logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
 750           if (unlink (filename))
 751             logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
 752           dt &= ~RETROKF;
 753         }
 754
 755       xfree_null (new_file);
 756       xfree_null (filename);
 757     }
 758
 759   /* Free the linked list of URL-s.  */
 760   free_urlpos (url_list);
 761
 762   return status;
 763 }
 764
 765 /* Print `giving up', or `retrying', depending on the impending
 766    action.  N1 and N2 are the attempt number and the attempt limit.  */
 767 void
 768 printwhat (int n1, int n2)
 769 {
 770   logputs (LOG_VERBOSE, (n1 == n2) ? _("Giving up.\n\n") : _("Retrying.\n\n"));
 771 }
 772
 773 /* If opt.wait or opt.waitretry are specified, and if certain
 774    conditions are met, sleep the appropriate number of seconds.  See
 775    the documentation of --wait and --waitretry for more information.
 776
 777    COUNT is the count of current retrieval, beginning with 1. */
 778
 779 void
 780 sleep_between_retrievals (int count)
 781 {
 782   static int first_retrieval = 1;
 783
 784   if (first_retrieval)
 785     {
 786       /* Don't sleep before the very first retrieval. */
 787       first_retrieval = 0;
 788       return;
 789     }
 790
 791   if (opt.waitretry && count > 1)
 792     {
 793       /* If opt.waitretry is specified and this is a retry, wait for
 794          COUNT-1 number of seconds, or for opt.waitretry seconds.  */
 795       if (count <= opt.waitretry)
 796         xsleep (count - 1);
 797       else
 798         xsleep (opt.waitretry);
 799     }
 800   else if (opt.wait)
 801     {
 802       if (!opt.random_wait || count > 1)
 803         /* If random-wait is not specified, or if we are sleeping
 804            between retries of the same download, sleep the fixed
 805            interval.  */
 806         xsleep (opt.wait);
 807       else
 808         {
 809           /* Sleep a random amount of time averaging in opt.wait
 810              seconds.  The sleeping amount ranges from 0 to
 811              opt.wait*2, inclusive.  */
 812           double waitsecs = 2 * opt.wait * random_float ();
 813           DEBUGP (("sleep_between_retrievals: avg=%f,sleep=%f\n",
 814                    opt.wait, waitsecs));
 815           xsleep (waitsecs);
 816         }
 817     }
 818 }
 819
 820 /* Free the linked list of urlpos.  */
 821 void
 822 free_urlpos (struct urlpos *l)
 823 {
 824   while (l)
 825     {
 826       struct urlpos *next = l->next;
 827       if (l->url)
 828         url_free (l->url);
 829       xfree_null (l->local_name);
 830       xfree (l);
 831       l = next;
 832     }
 833 }
 834
 835 /* Rotate FNAME opt.backups times */
 836 void
 837 rotate_backups(const char *fname)
 838 {
 839   int maxlen = strlen (fname) + 1 + numdigit (opt.backups) + 1;
 840   char *from = (char *)alloca (maxlen);
 841   char *to = (char *)alloca (maxlen);
 842   struct stat sb;
 843   int i;
 844
 845   if (stat (fname, &sb) == 0)
 846     if (S_ISREG (sb.st_mode) == 0)
 847       return;
 848
 849   for (i = opt.backups; i > 1; i--)
 850     {
 851       sprintf (from, "%s.%d", fname, i - 1);
 852       sprintf (to, "%s.%d", fname, i);
 853       rename (from, to);
 854     }
 855
 856   sprintf (to, "%s.%d", fname, 1);
 857   rename(fname, to);
 858 }
 859
 860 static int no_proxy_match PARAMS ((const char *, const char **));
 861
 862 /* Return the URL of the proxy appropriate for url U.  */
 863
 864 static char *
 865 getproxy (struct url *u)
 866 {
 867   char *proxy = NULL;
 868   char *rewritten_url;
 869   static char rewritten_storage[1024];
 870
 871   if (!opt.use_proxy)
 872     return NULL;
 873   if (!no_proxy_match (u->host, (const char **)opt.no_proxy))
 874     return NULL;
 875
 876   switch (u->scheme)
 877     {
 878     case SCHEME_HTTP:
 879       proxy = opt.http_proxy ? opt.http_proxy : getenv ("http_proxy");
 880       break;
 881 #ifdef HAVE_SSL
 882     case SCHEME_HTTPS:
 883       proxy = opt.https_proxy ? opt.https_proxy : getenv ("https_proxy");
 884       break;
 885 #endif
 886     case SCHEME_FTP:
 887       proxy = opt.ftp_proxy ? opt.ftp_proxy : getenv ("ftp_proxy");
 888       break;
 889     case SCHEME_INVALID:
 890       break;
 891     }
 892   if (!proxy || !*proxy)
 893     return NULL;
 894
 895   /* Handle shorthands.  `rewritten_storage' is a kludge to allow
 896      getproxy() to return static storage. */
 897   rewritten_url = rewrite_shorthand_url (proxy);
 898   if (rewritten_url)
 899     {
 900       strncpy (rewritten_storage, rewritten_url, sizeof (rewritten_storage));
 901       rewritten_storage[sizeof (rewritten_storage) - 1] = '\0';
 902       proxy = rewritten_storage;
 903     }
 904
 905   return proxy;
 906 }
 907
 908 /* Should a host be accessed through proxy, concerning no_proxy?  */
 909 int
 910 no_proxy_match (const char *host, const char **no_proxy)
 911 {
 912   if (!no_proxy)
 913     return 1;
 914   else
 915     return !sufmatch (no_proxy, host);
 916 }