sjero.net Git - wget/blob - src/retr.c

   1 /* File retrieval.
   2    Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001 Free Software Foundation, Inc.
   3
   4 This file is part of GNU Wget.
   5
   6 GNU Wget is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 2 of the License, or (at
   9 your option) any later version.
  10
  11 GNU Wget is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with Wget; if not, write to the Free Software
  18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  19
  20 In addition, as a special exception, the Free Software Foundation
  21 gives permission to link the code of its release of Wget with the
  22 OpenSSL project's "OpenSSL" library (or with modified versions of it
  23 that use the same license as the "OpenSSL" library), and distribute
  24 the linked executables.  You must obey the GNU General Public License
  25 in all respects for all of the code used other than "OpenSSL".  If you
  26 modify this file, you may extend this exception to your version of the
  27 file, but you are not obligated to do so.  If you do not wish to do
  28 so, delete this exception statement from your version.  */
  29
  30 #include <config.h>
  31
  32 #include <stdio.h>
  33 #include <stdlib.h>
  34 #include <sys/types.h>
  35 #ifdef HAVE_UNISTD_H
  36 # include <unistd.h>
  37 #endif /* HAVE_UNISTD_H */
  38 #include <errno.h>
  39 #ifdef HAVE_STRING_H
  40 # include <string.h>
  41 #else
  42 # include <strings.h>
  43 #endif /* HAVE_STRING_H */
  44 #include <assert.h>
  45
  46 #include "wget.h"
  47 #include "utils.h"
  48 #include "retr.h"
  49 #include "progress.h"
  50 #include "url.h"
  51 #include "recur.h"
  52 #include "ftp.h"
  53 #include "host.h"
  54 #include "connect.h"
  55 #include "hash.h"
  56 #include "convert.h"
  57
  58 #ifdef HAVE_SSL
  59 # include "gen_sslfunc.h"       /* for ssl_iread */
  60 #endif
  61
  62 #ifndef errno
  63 extern int errno;
  64 #endif
  65
  66 /* See the comment in gethttp() why this is needed. */
  67 int global_download_count;
  68
  69 /* Total size of downloaded files.  Used to enforce quota.  */
  70 LARGE_INT total_downloaded_bytes;
  71
  72 \f
  73 static struct {
  74   long chunk_bytes;
  75   double chunk_start;
  76   double sleep_adjust;
  77 } limit_data;
  78
  79 static void
  80 limit_bandwidth_reset (void)
  81 {
  82   limit_data.chunk_bytes = 0;
  83   limit_data.chunk_start = 0;
  84 }
  85
  86 /* Limit the bandwidth by pausing the download for an amount of time.
  87    BYTES is the number of bytes received from the network, and TIMER
  88    is the timer that started at the beginning of download.  */
  89
  90 static void
  91 limit_bandwidth (long bytes, struct wget_timer *timer)
  92 {
  93   double delta_t = wtimer_read (timer) - limit_data.chunk_start;
  94   double expected;
  95
  96   limit_data.chunk_bytes += bytes;
  97
  98   /* Calculate the amount of time we expect downloading the chunk
  99      should take.  If in reality it took less time, sleep to
 100      compensate for the difference.  */
 101   expected = 1000.0 * limit_data.chunk_bytes / opt.limit_rate;
 102
 103   if (expected > delta_t)
 104     {
 105       double slp = expected - delta_t + limit_data.sleep_adjust;
 106       double t0, t1;
 107       if (slp < 200)
 108         {
 109           DEBUGP (("deferring a %.2f ms sleep (%ld/%.2f).\n",
 110                    slp, limit_data.chunk_bytes, delta_t));
 111           return;
 112         }
 113       DEBUGP (("\nsleeping %.2f ms for %ld bytes, adjust %.2f ms\n",
 114                slp, limit_data.chunk_bytes, limit_data.sleep_adjust));
 115
 116       t0 = wtimer_read (timer);
 117       xsleep (slp / 1000);
 118       wtimer_update (timer);
 119       t1 = wtimer_read (timer);
 120
 121       /* Due to scheduling, we probably slept slightly longer (or
 122          shorter) than desired.  Calculate the difference between the
 123          desired and the actual sleep, and adjust the next sleep by
 124          that amount.  */
 125       limit_data.sleep_adjust = slp - (t1 - t0);
 126     }
 127
 128   limit_data.chunk_bytes = 0;
 129   limit_data.chunk_start = wtimer_read (timer);
 130 }
 131
 132 #ifndef MIN
 133 # define MIN(i, j) ((i) <= (j) ? (i) : (j))
 134 #endif
 135
 136 /* Reads the contents of file descriptor FD, until it is closed, or a
 137    read error occurs.  The data is read in 8K chunks, and stored to
 138    stream fp, which should have been open for writing.
 139
 140    The EXPECTED argument is passed to show_progress() unchanged, but
 141    otherwise ignored.
 142
 143    If opt.verbose is set, the progress is also shown.  RESTVAL
 144    represents a value from which to start downloading (which will be
 145    shown accordingly).  If RESTVAL is non-zero, the stream should have
 146    been open for appending.
 147
 148    The function exits and returns codes of 0, -1 and -2 if the
 149    connection was closed, there was a read error, or if it could not
 150    write to the output stream, respectively.  */
 151
 152 int
 153 fd_read_body (int fd, FILE *out, long *len, long restval, long expected,
 154               int use_expected, double *elapsed)
 155 {
 156   int res = 0;
 157
 158   static char dlbuf[16384];
 159   int dlbufsize = sizeof (dlbuf);
 160
 161   struct wget_timer *timer = wtimer_allocate ();
 162   double last_successful_read_tm;
 163
 164   /* The progress gauge, set according to the user preferences. */
 165   void *progress = NULL;
 166
 167   /* Non-zero if the progress gauge is interactive, i.e. if it can
 168      continually update the display.  When true, smaller timeout
 169      values are used so that the gauge can update the display when
 170      data arrives slowly. */
 171   int progress_interactive = 0;
 172
 173   *len = restval;
 174
 175   if (opt.verbose)
 176     {
 177       progress = progress_create (restval, expected);
 178       progress_interactive = progress_interactive_p (progress);
 179     }
 180
 181   if (opt.limit_rate)
 182     limit_bandwidth_reset ();
 183   wtimer_reset (timer);
 184   last_successful_read_tm = 0;
 185
 186   /* Use a smaller buffer for low requested bandwidths.  For example,
 187      with --limit-rate=2k, it doesn't make sense to slurp in 16K of
 188      data and then sleep for 8s.  With buffer size equal to the limit,
 189      we never have to sleep for more than one second.  */
 190   if (opt.limit_rate && opt.limit_rate < dlbufsize)
 191     dlbufsize = opt.limit_rate;
 192
 193   /* Read from fd while there is available data.
 194
 195      Normally, if expected is 0, it means that it is not known how
 196      much data is expected.  However, if use_expected is specified,
 197      then expected being zero means exactly that.  */
 198   while (!use_expected || (*len < expected))
 199     {
 200       int amount_to_read = (use_expected
 201                             ? MIN (expected - *len, dlbufsize) : dlbufsize);
 202       double tmout = opt.read_timeout;
 203       if (progress_interactive)
 204         {
 205           double waittm;
 206           /* For interactive progress gauges, always specify a ~1s
 207              timeout, so that the gauge can be updated regularly even
 208              when the data arrives very slowly or stalls.  */
 209           tmout = 0.95;
 210           waittm = (wtimer_read (timer) - last_successful_read_tm) / 1000;
 211           if (waittm + tmout > opt.read_timeout)
 212             {
 213               /* Don't allow waiting time to exceed read timeout. */
 214               tmout = opt.read_timeout - waittm;
 215               if (tmout < 0)
 216                 {
 217                   /* We've already exceeded the timeout. */
 218                   res = -1, errno = ETIMEDOUT;
 219                   break;
 220                 }
 221             }
 222         }
 223       res = fd_read (fd, dlbuf, amount_to_read, tmout);
 224
 225       if (res == 0 || (res < 0 && errno != ETIMEDOUT))
 226         break;
 227       else if (res < 0)
 228         res = 0;                /* timeout */
 229
 230       wtimer_update (timer);
 231       if (res > 0)
 232         {
 233           fwrite (dlbuf, 1, res, out);
 234           /* Always flush the contents of the network packet.  This
 235              should not hinder performance: fast downloads will be
 236              received in 16K chunks (which stdio would write out
 237              anyway), and slow downloads won't be limited by disk
 238              performance.  */
 239           fflush (out);
 240           if (ferror (out))
 241             {
 242               res = -2;
 243               goto out;
 244             }
 245           last_successful_read_tm = wtimer_read (timer);
 246         }
 247
 248       if (opt.limit_rate)
 249         limit_bandwidth (res, timer);
 250
 251       *len += res;
 252       if (progress)
 253         progress_update (progress, res, wtimer_read (timer));
 254 #ifdef WINDOWS
 255       if (use_expected && expected > 0)
 256         ws_percenttitle (100.0 * (double)(*len) / (double)expected);
 257 #endif
 258     }
 259   if (res < -1)
 260     res = -1;
 261
 262  out:
 263   if (progress)
 264     progress_finish (progress, wtimer_read (timer));
 265   if (elapsed)
 266     *elapsed = wtimer_read (timer);
 267   wtimer_delete (timer);
 268
 269   return res;
 270 }
 271 \f
 272 typedef const char *(*finder_t) PARAMS ((const char *, int, int));
 273
 274 /* Read a hunk of data from FD, up until a terminator.  The terminator
 275    is whatever the TERMINATOR function determines it to be; for
 276    example, it can be a line of data, or the head of an HTTP response.
 277    The function returns the data read allocated with malloc.
 278
 279    In case of error, NULL is returned.  In case of EOF and no data
 280    read, NULL is returned and errno set to 0.  In case of EOF with
 281    data having been read, the data is returned, but it will
 282    (obviously) not contain the terminator.
 283
 284    The idea is to be able to read a line of input, or otherwise a hunk
 285    of text, such as the head of an HTTP request, without crossing the
 286    boundary, so that the next call to fd_read etc. reads the data
 287    after the hunk.  To achieve that, this function does the following:
 288
 289    1. Peek at available data.
 290
 291    2. Determine whether the peeked data, along with the previously
 292       read data, includes the terminator.
 293
 294       2a. If yes, read the data until the end of the terminator, and
 295           exit.
 296
 297       2b. If no, read the peeked data and goto 1.
 298
 299    The function is careful to assume as little as possible about the
 300    implementation of peeking.  For example, every peek is followed by
 301    a read.  If the read returns a different amount of data, the
 302    process is retried until all data arrives safely.
 303
 304    BUFSIZE is the size of the initial buffer expected to read all the
 305    data in the typical case.
 306
 307    This function should be used as a building block for other
 308    functions -- see fd_read_line as a simple example.  */
 309
 310 char *
 311 fd_read_hunk (int fd, hunk_terminator_t hunk_terminator, int bufsize)
 312 {
 313   char *hunk = xmalloc (bufsize);
 314   int tail = 0;                 /* tail position in HUNK */
 315
 316   while (1)
 317     {
 318       const char *end;
 319       int pklen, rdlen, remain;
 320
 321       /* First, peek at the available data. */
 322
 323       pklen = fd_peek (fd, hunk + tail, bufsize - 1 - tail, -1);
 324       if (pklen < 0)
 325         {
 326           xfree (hunk);
 327           return NULL;
 328         }
 329       end = hunk_terminator (hunk, tail, pklen);
 330       if (end)
 331         {
 332           /* The data contains the terminator: we'll drain the data up
 333              to the end of the terminator.  */
 334           remain = end - (hunk + tail);
 335           if (remain == 0)
 336             {
 337               /* No more data needs to be read. */
 338               hunk[tail] = '\0';
 339               return hunk;
 340             }
 341           if (bufsize - 1 < tail + remain)
 342             {
 343               bufsize = tail + remain + 1;
 344               hunk = xrealloc (hunk, bufsize);
 345             }
 346         }
 347       else
 348         /* No terminator: simply read the data we know is (or should
 349            be) available.  */
 350         remain = pklen;
 351
 352       /* Now, read the data.  Note that we make no assumptions about
 353          how much data we'll get.  (Some TCP stacks are notorious for
 354          read returning less data than the previous MSG_PEEK.)  */
 355
 356       rdlen = fd_read (fd, hunk + tail, remain, 0);
 357       if (rdlen < 0)
 358         {
 359           xfree_null (hunk);
 360           return NULL;
 361         }
 362       tail += rdlen;
 363       hunk[tail] = '\0';
 364
 365       if (rdlen == 0)
 366         {
 367           if (tail == 0)
 368             {
 369               /* EOF without anything having been read */
 370               xfree (hunk);
 371               errno = 0;
 372               return NULL;
 373             }
 374           else
 375             /* EOF seen: return the data we've read. */
 376             return hunk;
 377         }
 378       if (end && rdlen == remain)
 379         /* The terminator was seen and the remaining data drained --
 380            we got what we came for.  */
 381         return hunk;
 382
 383       /* Keep looping until all the data arrives. */
 384
 385       if (tail == bufsize - 1)
 386         {
 387           bufsize <<= 1;
 388           hunk = xrealloc (hunk, bufsize);
 389         }
 390     }
 391 }
 392
 393 static const char *
 394 line_terminator (const char *hunk, int oldlen, int peeklen)
 395 {
 396   const char *p = memchr (hunk + oldlen, '\n', peeklen);
 397   if (p)
 398     /* p+1 because we want the line to include '\n' */
 399     return p + 1;
 400   return NULL;
 401 }
 402
 403 /* Read one line from FD and return it.  The line is allocated using
 404    malloc.
 405
 406    If an error occurs, or if no data can be read, NULL is returned.
 407    In the former case errno indicates the error condition, and in the
 408    latter case, errno is NULL.  */
 409
 410 char *
 411 fd_read_line (int fd)
 412 {
 413   return fd_read_hunk (fd, line_terminator, 128);
 414 }
 415 \f
 416 /* Return a printed representation of the download rate, as
 417    appropriate for the speed.  If PAD is non-zero, strings will be
 418    padded to the width of 7 characters (xxxx.xx).  */
 419 char *
 420 retr_rate (long bytes, double msecs, int pad)
 421 {
 422   static char res[20];
 423   static char *rate_names[] = {"B/s", "KB/s", "MB/s", "GB/s" };
 424   int units = 0;
 425
 426   double dlrate = calc_rate (bytes, msecs, &units);
 427   sprintf (res, pad ? "%7.2f %s" : "%.2f %s", dlrate, rate_names[units]);
 428
 429   return res;
 430 }
 431
 432 /* Calculate the download rate and trim it as appropriate for the
 433    speed.  Appropriate means that if rate is greater than 1K/s,
 434    kilobytes are used, and if rate is greater than 1MB/s, megabytes
 435    are used.
 436
 437    UNITS is zero for B/s, one for KB/s, two for MB/s, and three for
 438    GB/s.  */
 439 double
 440 calc_rate (long bytes, double msecs, int *units)
 441 {
 442   double dlrate;
 443
 444   assert (msecs >= 0);
 445   assert (bytes >= 0);
 446
 447   if (msecs == 0)
 448     /* If elapsed time is exactly zero, it means we're under the
 449        granularity of the timer.  This often happens on systems that
 450        use time() for the timer.  */
 451     msecs = wtimer_granularity ();
 452
 453   dlrate = (double)1000 * bytes / msecs;
 454   if (dlrate < 1024.0)
 455     *units = 0;
 456   else if (dlrate < 1024.0 * 1024.0)
 457     *units = 1, dlrate /= 1024.0;
 458   else if (dlrate < 1024.0 * 1024.0 * 1024.0)
 459     *units = 2, dlrate /= (1024.0 * 1024.0);
 460   else
 461     /* Maybe someone will need this, one day. */
 462     *units = 3, dlrate /= (1024.0 * 1024.0 * 1024.0);
 463
 464   return dlrate;
 465 }
 466 \f
 467 /* Maximum number of allowed redirections.  20 was chosen as a
 468    "reasonable" value, which is low enough to not cause havoc, yet
 469    high enough to guarantee that normal retrievals will not be hurt by
 470    the check.  */
 471
 472 #define MAX_REDIRECTIONS 20
 473
 474 #define SUSPEND_POST_DATA do {                  \
 475   post_data_suspended = 1;                      \
 476   saved_post_data = opt.post_data;              \
 477   saved_post_file_name = opt.post_file_name;    \
 478   opt.post_data = NULL;                         \
 479   opt.post_file_name = NULL;                    \
 480 } while (0)
 481
 482 #define RESTORE_POST_DATA do {                          \
 483   if (post_data_suspended)                              \
 484     {                                                   \
 485       opt.post_data = saved_post_data;                  \
 486       opt.post_file_name = saved_post_file_name;        \
 487       post_data_suspended = 0;                          \
 488     }                                                   \
 489 } while (0)
 490
 491 static char *getproxy PARAMS ((struct url *));
 492
 493 /* Retrieve the given URL.  Decides which loop to call -- HTTP, FTP,
 494    FTP, proxy, etc.  */
 495
 496 /* #### This function should be rewritten so it doesn't return from
 497    multiple points. */
 498
 499 uerr_t
 500 retrieve_url (const char *origurl, char **file, char **newloc,
 501               const char *refurl, int *dt)
 502 {
 503   uerr_t result;
 504   char *url;
 505   int location_changed, dummy;
 506   char *mynewloc, *proxy;
 507   struct url *u, *proxy_url;
 508   int up_error_code;            /* url parse error code */
 509   char *local_file;
 510   int redirection_count = 0;
 511
 512   int post_data_suspended = 0;
 513   char *saved_post_data = NULL;
 514   char *saved_post_file_name = NULL;
 515
 516   /* If dt is NULL, use local storage.  */
 517   if (!dt)
 518     {
 519       dt = &dummy;
 520       dummy = 0;
 521     }
 522   url = xstrdup (origurl);
 523   if (newloc)
 524     *newloc = NULL;
 525   if (file)
 526     *file = NULL;
 527
 528   u = url_parse (url, &up_error_code);
 529   if (!u)
 530     {
 531       logprintf (LOG_NOTQUIET, "%s: %s.\n", url, url_error (up_error_code));
 532       xfree (url);
 533       return URLERROR;
 534     }
 535
 536   if (!refurl)
 537     refurl = opt.referer;
 538
 539  redirected:
 540
 541   result = NOCONERROR;
 542   mynewloc = NULL;
 543   local_file = NULL;
 544   proxy_url = NULL;
 545
 546   proxy = getproxy (u);
 547   if (proxy)
 548     {
 549       /* Parse the proxy URL.  */
 550       proxy_url = url_parse (proxy, &up_error_code);
 551       if (!proxy_url)
 552         {
 553           logprintf (LOG_NOTQUIET, _("Error parsing proxy URL %s: %s.\n"),
 554                      proxy, url_error (up_error_code));
 555           xfree (url);
 556           RESTORE_POST_DATA;
 557           return PROXERR;
 558         }
 559       if (proxy_url->scheme != SCHEME_HTTP && proxy_url->scheme != u->scheme)
 560         {
 561           logprintf (LOG_NOTQUIET, _("Error in proxy URL %s: Must be HTTP.\n"), proxy);
 562           url_free (proxy_url);
 563           xfree (url);
 564           RESTORE_POST_DATA;
 565           return PROXERR;
 566         }
 567     }
 568
 569   if (u->scheme == SCHEME_HTTP
 570 #ifdef HAVE_SSL
 571       || u->scheme == SCHEME_HTTPS
 572 #endif
 573       || (proxy_url && proxy_url->scheme == SCHEME_HTTP))
 574     {
 575       result = http_loop (u, &mynewloc, &local_file, refurl, dt, proxy_url);
 576     }
 577   else if (u->scheme == SCHEME_FTP)
 578     {
 579       /* If this is a redirection, we must not allow recursive FTP
 580          retrieval, so we save recursion to oldrec, and restore it
 581          later.  */
 582       int oldrec = opt.recursive;
 583       if (redirection_count)
 584         opt.recursive = 0;
 585       result = ftp_loop (u, dt, proxy_url);
 586       opt.recursive = oldrec;
 587
 588       /* There is a possibility of having HTTP being redirected to
 589          FTP.  In these cases we must decide whether the text is HTML
 590          according to the suffix.  The HTML suffixes are `.html',
 591          `.htm' and a few others, case-insensitive.  */
 592       if (redirection_count && local_file && u->scheme == SCHEME_FTP)
 593         {
 594           if (has_html_suffix_p (local_file))
 595             *dt |= TEXTHTML;
 596         }
 597     }
 598
 599   if (proxy_url)
 600     {
 601       url_free (proxy_url);
 602       proxy_url = NULL;
 603     }
 604
 605   location_changed = (result == NEWLOCATION);
 606   if (location_changed)
 607     {
 608       char *construced_newloc;
 609       struct url *newloc_parsed;
 610
 611       assert (mynewloc != NULL);
 612
 613       if (local_file)
 614         xfree (local_file);
 615
 616       /* The HTTP specs only allow absolute URLs to appear in
 617          redirects, but a ton of boneheaded webservers and CGIs out
 618          there break the rules and use relative URLs, and popular
 619          browsers are lenient about this, so wget should be too. */
 620       construced_newloc = uri_merge (url, mynewloc);
 621       xfree (mynewloc);
 622       mynewloc = construced_newloc;
 623
 624       /* Now, see if this new location makes sense. */
 625       newloc_parsed = url_parse (mynewloc, &up_error_code);
 626       if (!newloc_parsed)
 627         {
 628           logprintf (LOG_NOTQUIET, "%s: %s.\n", mynewloc,
 629                      url_error (up_error_code));
 630           url_free (u);
 631           xfree (url);
 632           xfree (mynewloc);
 633           RESTORE_POST_DATA;
 634           return result;
 635         }
 636
 637       /* Now mynewloc will become newloc_parsed->url, because if the
 638          Location contained relative paths like .././something, we
 639          don't want that propagating as url.  */
 640       xfree (mynewloc);
 641       mynewloc = xstrdup (newloc_parsed->url);
 642
 643       /* Check for max. number of redirections.  */
 644       if (++redirection_count > MAX_REDIRECTIONS)
 645         {
 646           logprintf (LOG_NOTQUIET, _("%d redirections exceeded.\n"),
 647                      MAX_REDIRECTIONS);
 648           url_free (newloc_parsed);
 649           url_free (u);
 650           xfree (url);
 651           xfree (mynewloc);
 652           RESTORE_POST_DATA;
 653           return WRONGCODE;
 654         }
 655
 656       xfree (url);
 657       url = mynewloc;
 658       url_free (u);
 659       u = newloc_parsed;
 660
 661       /* If we're being redirected from POST, we don't want to POST
 662          again.  Many requests answer POST with a redirection to an
 663          index page; that redirection is clearly a GET.  We "suspend"
 664          POST data for the duration of the redirections, and restore
 665          it when we're done. */
 666       if (!post_data_suspended)
 667         SUSPEND_POST_DATA;
 668
 669       goto redirected;
 670     }
 671
 672   if (local_file)
 673     {
 674       if (*dt & RETROKF)
 675         {
 676           register_download (u->url, local_file);
 677           if (redirection_count && 0 != strcmp (origurl, u->url))
 678             register_redirection (origurl, u->url);
 679           if (*dt & TEXTHTML)
 680             register_html (u->url, local_file);
 681         }
 682     }
 683
 684   if (file)
 685     *file = local_file ? local_file : NULL;
 686   else
 687     xfree_null (local_file);
 688
 689   url_free (u);
 690
 691   if (redirection_count)
 692     {
 693       if (newloc)
 694         *newloc = url;
 695       else
 696         xfree (url);
 697     }
 698   else
 699     {
 700       if (newloc)
 701         *newloc = NULL;
 702       xfree (url);
 703     }
 704
 705   ++global_download_count;
 706   RESTORE_POST_DATA;
 707
 708   return result;
 709 }
 710
 711 /* Find the URLs in the file and call retrieve_url() for each of
 712    them.  If HTML is non-zero, treat the file as HTML, and construct
 713    the URLs accordingly.
 714
 715    If opt.recursive is set, call retrieve_tree() for each file.  */
 716
 717 uerr_t
 718 retrieve_from_file (const char *file, int html, int *count)
 719 {
 720   uerr_t status;
 721   struct urlpos *url_list, *cur_url;
 722
 723   url_list = (html ? get_urls_html (file, NULL, NULL)
 724               : get_urls_file (file));
 725   status = RETROK;             /* Suppose everything is OK.  */
 726   *count = 0;                  /* Reset the URL count.  */
 727
 728   for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count)
 729     {
 730       char *filename = NULL, *new_file = NULL;
 731       int dt;
 732
 733       if (cur_url->ignore_when_downloading)
 734         continue;
 735
 736       if (opt.quota && total_downloaded_bytes > opt.quota)
 737         {
 738           status = QUOTEXC;
 739           break;
 740         }
 741       if ((opt.recursive || opt.page_requisites)
 742           && cur_url->url->scheme != SCHEME_FTP)
 743         status = retrieve_tree (cur_url->url->url);
 744       else
 745         status = retrieve_url (cur_url->url->url, &filename, &new_file, NULL, &dt);
 746
 747       if (filename && opt.delete_after && file_exists_p (filename))
 748         {
 749           DEBUGP (("Removing file due to --delete-after in"
 750                    " retrieve_from_file():\n"));
 751           logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
 752           if (unlink (filename))
 753             logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
 754           dt &= ~RETROKF;
 755         }
 756
 757       xfree_null (new_file);
 758       xfree_null (filename);
 759     }
 760
 761   /* Free the linked list of URL-s.  */
 762   free_urlpos (url_list);
 763
 764   return status;
 765 }
 766
 767 /* Print `giving up', or `retrying', depending on the impending
 768    action.  N1 and N2 are the attempt number and the attempt limit.  */
 769 void
 770 printwhat (int n1, int n2)
 771 {
 772   logputs (LOG_VERBOSE, (n1 == n2) ? _("Giving up.\n\n") : _("Retrying.\n\n"));
 773 }
 774
 775 /* If opt.wait or opt.waitretry are specified, and if certain
 776    conditions are met, sleep the appropriate number of seconds.  See
 777    the documentation of --wait and --waitretry for more information.
 778
 779    COUNT is the count of current retrieval, beginning with 1. */
 780
 781 void
 782 sleep_between_retrievals (int count)
 783 {
 784   static int first_retrieval = 1;
 785
 786   if (first_retrieval)
 787     {
 788       /* Don't sleep before the very first retrieval. */
 789       first_retrieval = 0;
 790       return;
 791     }
 792
 793   if (opt.waitretry && count > 1)
 794     {
 795       /* If opt.waitretry is specified and this is a retry, wait for
 796          COUNT-1 number of seconds, or for opt.waitretry seconds.  */
 797       if (count <= opt.waitretry)
 798         xsleep (count - 1);
 799       else
 800         xsleep (opt.waitretry);
 801     }
 802   else if (opt.wait)
 803     {
 804       if (!opt.random_wait || count > 1)
 805         /* If random-wait is not specified, or if we are sleeping
 806            between retries of the same download, sleep the fixed
 807            interval.  */
 808         xsleep (opt.wait);
 809       else
 810         {
 811           /* Sleep a random amount of time averaging in opt.wait
 812              seconds.  The sleeping amount ranges from 0 to
 813              opt.wait*2, inclusive.  */
 814           double waitsecs = 2 * opt.wait * random_float ();
 815           DEBUGP (("sleep_between_retrievals: avg=%f,sleep=%f\n",
 816                    opt.wait, waitsecs));
 817           xsleep (waitsecs);
 818         }
 819     }
 820 }
 821
 822 /* Free the linked list of urlpos.  */
 823 void
 824 free_urlpos (struct urlpos *l)
 825 {
 826   while (l)
 827     {
 828       struct urlpos *next = l->next;
 829       if (l->url)
 830         url_free (l->url);
 831       xfree_null (l->local_name);
 832       xfree (l);
 833       l = next;
 834     }
 835 }
 836
 837 /* Rotate FNAME opt.backups times */
 838 void
 839 rotate_backups(const char *fname)
 840 {
 841   int maxlen = strlen (fname) + 1 + numdigit (opt.backups) + 1;
 842   char *from = (char *)alloca (maxlen);
 843   char *to = (char *)alloca (maxlen);
 844   struct stat sb;
 845   int i;
 846
 847   if (stat (fname, &sb) == 0)
 848     if (S_ISREG (sb.st_mode) == 0)
 849       return;
 850
 851   for (i = opt.backups; i > 1; i--)
 852     {
 853       sprintf (from, "%s.%d", fname, i - 1);
 854       sprintf (to, "%s.%d", fname, i);
 855       rename (from, to);
 856     }
 857
 858   sprintf (to, "%s.%d", fname, 1);
 859   rename(fname, to);
 860 }
 861
 862 static int no_proxy_match PARAMS ((const char *, const char **));
 863
 864 /* Return the URL of the proxy appropriate for url U.  */
 865
 866 static char *
 867 getproxy (struct url *u)
 868 {
 869   char *proxy = NULL;
 870   char *rewritten_url;
 871   static char rewritten_storage[1024];
 872
 873   if (!opt.use_proxy)
 874     return NULL;
 875   if (!no_proxy_match (u->host, (const char **)opt.no_proxy))
 876     return NULL;
 877
 878   switch (u->scheme)
 879     {
 880     case SCHEME_HTTP:
 881       proxy = opt.http_proxy ? opt.http_proxy : getenv ("http_proxy");
 882       break;
 883 #ifdef HAVE_SSL
 884     case SCHEME_HTTPS:
 885       proxy = opt.https_proxy ? opt.https_proxy : getenv ("https_proxy");
 886       break;
 887 #endif
 888     case SCHEME_FTP:
 889       proxy = opt.ftp_proxy ? opt.ftp_proxy : getenv ("ftp_proxy");
 890       break;
 891     case SCHEME_INVALID:
 892       break;
 893     }
 894   if (!proxy || !*proxy)
 895     return NULL;
 896
 897   /* Handle shorthands.  `rewritten_storage' is a kludge to allow
 898      getproxy() to return static storage. */
 899   rewritten_url = rewrite_shorthand_url (proxy);
 900   if (rewritten_url)
 901     {
 902       strncpy (rewritten_storage, rewritten_url, sizeof (rewritten_storage));
 903       rewritten_storage[sizeof (rewritten_storage) - 1] = '\0';
 904       proxy = rewritten_storage;
 905     }
 906
 907   return proxy;
 908 }
 909
 910 /* Should a host be accessed through proxy, concerning no_proxy?  */
 911 int
 912 no_proxy_match (const char *host, const char **no_proxy)
 913 {
 914   if (!no_proxy)
 915     return 1;
 916   else
 917     return !sufmatch (no_proxy, host);
 918 }