sjero.net Git - wget/blob - src/retr.c

   1 /* File retrieval.
   2    Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
   3    2004, 2005, 2006, 2007 Free Software Foundation, Inc.
   4
   5 This file is part of GNU Wget.
   6
   7 GNU Wget is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 3 of the License, or (at
  10 your option) any later version.
  11
  12 GNU Wget is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with Wget.  If not, see <http://www.gnu.org/licenses/>.
  19
  20 Additional permission under GNU GPL version 3 section 7
  21
  22 If you modify this program, or any covered work, by linking or
  23 combining it with the OpenSSL project's OpenSSL library (or a
  24 modified version of that library), containing parts covered by the
  25 terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
  26 grants you additional permission to convey the resulting work.
  27 Corresponding Source for a non-source form of such a combination
  28 shall include the source code for the parts of OpenSSL used as well
  29 as that of the covered work.  */
  30
  31 #include "wget.h"
  32
  33 #include <stdio.h>
  34 #include <stdlib.h>
  35 #ifdef HAVE_UNISTD_H
  36 # include <unistd.h>
  37 #endif /* HAVE_UNISTD_H */
  38 #include <errno.h>
  39 #include <string.h>
  40 #include <assert.h>
  41
  42 #include "utils.h"
  43 #include "retr.h"
  44 #include "progress.h"
  45 #include "url.h"
  46 #include "recur.h"
  47 #include "ftp.h"
  48 #include "http.h"
  49 #include "host.h"
  50 #include "connect.h"
  51 #include "hash.h"
  52 #include "convert.h"
  53 #include "ptimer.h"
  54
  55 /* Total size of downloaded files.  Used to enforce quota.  */
  56 SUM_SIZE_INT total_downloaded_bytes;
  57
  58 /* Total download time in seconds. */
  59 double total_download_time;
  60
  61 /* If non-NULL, the stream to which output should be written.  This
  62    stream is initialized when `-O' is used.  */
  63 FILE *output_stream;
  64
  65 /* Whether output_document is a regular file we can manipulate,
  66    i.e. not `-' or a device file. */
  67 bool output_stream_regular;
  68 \f
  69 static struct {
  70   wgint chunk_bytes;
  71   double chunk_start;
  72   double sleep_adjust;
  73 } limit_data;
  74
  75 static void
  76 limit_bandwidth_reset (void)
  77 {
  78   xzero (limit_data);
  79 }
  80
  81 /* Limit the bandwidth by pausing the download for an amount of time.
  82    BYTES is the number of bytes received from the network, and TIMER
  83    is the timer that started at the beginning of download.  */
  84
  85 static void
  86 limit_bandwidth (wgint bytes, struct ptimer *timer)
  87 {
  88   double delta_t = ptimer_read (timer) - limit_data.chunk_start;
  89   double expected;
  90
  91   limit_data.chunk_bytes += bytes;
  92
  93   /* Calculate the amount of time we expect downloading the chunk
  94      should take.  If in reality it took less time, sleep to
  95      compensate for the difference.  */
  96   expected = (double) limit_data.chunk_bytes / opt.limit_rate;
  97
  98   if (expected > delta_t)
  99     {
 100       double slp = expected - delta_t + limit_data.sleep_adjust;
 101       double t0, t1;
 102       if (slp < 0.2)
 103         {
 104           DEBUGP (("deferring a %.2f ms sleep (%s/%.2f).\n",
 105                    slp * 1000, number_to_static_string (limit_data.chunk_bytes),
 106                    delta_t));
 107           return;
 108         }
 109       DEBUGP (("\nsleeping %.2f ms for %s bytes, adjust %.2f ms\n",
 110                slp * 1000, number_to_static_string (limit_data.chunk_bytes),
 111                limit_data.sleep_adjust));
 112
 113       t0 = ptimer_read (timer);
 114       xsleep (slp);
 115       t1 = ptimer_measure (timer);
 116
 117       /* Due to scheduling, we probably slept slightly longer (or
 118          shorter) than desired.  Calculate the difference between the
 119          desired and the actual sleep, and adjust the next sleep by
 120          that amount.  */
 121       limit_data.sleep_adjust = slp - (t1 - t0);
 122       /* If sleep_adjust is very large, it's likely due to suspension
 123          and not clock inaccuracy.  Don't enforce those.  */
 124       if (limit_data.sleep_adjust > 0.5)
 125         limit_data.sleep_adjust = 0.5;
 126       else if (limit_data.sleep_adjust < -0.5)
 127         limit_data.sleep_adjust = -0.5;
 128     }
 129
 130   limit_data.chunk_bytes = 0;
 131   limit_data.chunk_start = ptimer_read (timer);
 132 }
 133
 134 #ifndef MIN
 135 # define MIN(i, j) ((i) <= (j) ? (i) : (j))
 136 #endif
 137
 138 /* Write data in BUF to OUT.  However, if *SKIP is non-zero, skip that
 139    amount of data and decrease SKIP.  Increment *TOTAL by the amount
 140    of data written.  */
 141
 142 static int
 143 write_data (FILE *out, const char *buf, int bufsize, wgint *skip,
 144             wgint *written)
 145 {
 146   if (!out)
 147     return 1;
 148   if (*skip > bufsize)
 149     {
 150       *skip -= bufsize;
 151       return 1;
 152     }
 153   if (*skip)
 154     {
 155       buf += *skip;
 156       bufsize -= *skip;
 157       *skip = 0;
 158       if (bufsize == 0)
 159         return 1;
 160     }
 161
 162   fwrite (buf, 1, bufsize, out);
 163   *written += bufsize;
 164
 165   /* Immediately flush the downloaded data.  This should not hinder
 166      performance: fast downloads will arrive in large 16K chunks
 167      (which stdio would write out immediately anyway), and slow
 168      downloads wouldn't be limited by disk speed.  */
 169   fflush (out);
 170   return !ferror (out);
 171 }
 172
 173 /* Read the contents of file descriptor FD until it the connection
 174    terminates or a read error occurs.  The data is read in portions of
 175    up to 16K and written to OUT as it arrives.  If opt.verbose is set,
 176    the progress is shown.
 177
 178    TOREAD is the amount of data expected to arrive, normally only used
 179    by the progress gauge.
 180
 181    STARTPOS is the position from which the download starts, used by
 182    the progress gauge.  If QTYREAD is non-NULL, the value it points to
 183    is incremented by the amount of data read from the network.  If
 184    QTYWRITTEN is non-NULL, the value it points to is incremented by
 185    the amount of data written to disk.  The time it took to download
 186    the data is stored to ELAPSED.
 187
 188    The function exits and returns the amount of data read.  In case of
 189    error while reading data, -1 is returned.  In case of error while
 190    writing data, -2 is returned.  */
 191
 192 int
 193 fd_read_body (int fd, FILE *out, wgint toread, wgint startpos,
 194               wgint *qtyread, wgint *qtywritten, double *elapsed, int flags)
 195 {
 196   int ret = 0;
 197
 198   static char dlbuf[16384];
 199   int dlbufsize = sizeof (dlbuf);
 200
 201   struct ptimer *timer = NULL;
 202   double last_successful_read_tm = 0;
 203
 204   /* The progress gauge, set according to the user preferences. */
 205   void *progress = NULL;
 206
 207   /* Non-zero if the progress gauge is interactive, i.e. if it can
 208      continually update the display.  When true, smaller timeout
 209      values are used so that the gauge can update the display when
 210      data arrives slowly. */
 211   bool progress_interactive = false;
 212
 213   bool exact = !!(flags & rb_read_exactly);
 214   wgint skip = 0;
 215
 216   /* How much data we've read/written.  */
 217   wgint sum_read = 0;
 218   wgint sum_written = 0;
 219
 220   if (flags & rb_skip_startpos)
 221     skip = startpos;
 222
 223   if (opt.verbose)
 224     {
 225       /* If we're skipping STARTPOS bytes, pass 0 as the INITIAL
 226          argument to progress_create because the indicator doesn't
 227          (yet) know about "skipping" data.  */
 228       progress = progress_create (skip ? 0 : startpos, startpos + toread);
 229       progress_interactive = progress_interactive_p (progress);
 230     }
 231
 232   if (opt.limit_rate)
 233     limit_bandwidth_reset ();
 234
 235   /* A timer is needed for tracking progress, for throttling, and for
 236      tracking elapsed time.  If either of these are requested, start
 237      the timer.  */
 238   if (progress || opt.limit_rate || elapsed)
 239     {
 240       timer = ptimer_new ();
 241       last_successful_read_tm = 0;
 242     }
 243
 244   /* Use a smaller buffer for low requested bandwidths.  For example,
 245      with --limit-rate=2k, it doesn't make sense to slurp in 16K of
 246      data and then sleep for 8s.  With buffer size equal to the limit,
 247      we never have to sleep for more than one second.  */
 248   if (opt.limit_rate && opt.limit_rate < dlbufsize)
 249     dlbufsize = opt.limit_rate;
 250
 251   /* Read from FD while there is data to read.  Normally toread==0
 252      means that it is unknown how much data is to arrive.  However, if
 253      EXACT is set, then toread==0 means what it says: that no data
 254      should be read.  */
 255   while (!exact || (sum_read < toread))
 256     {
 257       int rdsize = exact ? MIN (toread - sum_read, dlbufsize) : dlbufsize;
 258       double tmout = opt.read_timeout;
 259       if (progress_interactive)
 260         {
 261           /* For interactive progress gauges, always specify a ~1s
 262              timeout, so that the gauge can be updated regularly even
 263              when the data arrives very slowly or stalls.  */
 264           tmout = 0.95;
 265           if (opt.read_timeout)
 266             {
 267               double waittm;
 268               waittm = ptimer_read (timer) - last_successful_read_tm;
 269               if (waittm + tmout > opt.read_timeout)
 270                 {
 271                   /* Don't let total idle time exceed read timeout. */
 272                   tmout = opt.read_timeout - waittm;
 273                   if (tmout < 0)
 274                     {
 275                       /* We've already exceeded the timeout. */
 276                       ret = -1, errno = ETIMEDOUT;
 277                       break;
 278                     }
 279                 }
 280             }
 281         }
 282       ret = fd_read (fd, dlbuf, rdsize, tmout);
 283
 284       if (progress_interactive && ret < 0 && errno == ETIMEDOUT)
 285         ret = 0;                /* interactive timeout, handled above */
 286       else if (ret <= 0)
 287         break;                  /* EOF or read error */
 288
 289       if (progress || opt.limit_rate)
 290         {
 291           ptimer_measure (timer);
 292           if (ret > 0)
 293             last_successful_read_tm = ptimer_read (timer);
 294         }
 295
 296       if (ret > 0)
 297         {
 298           sum_read += ret;
 299           if (!write_data (out, dlbuf, ret, &skip, &sum_written))
 300             {
 301               ret = -2;
 302               goto out;
 303             }
 304         }
 305
 306       if (opt.limit_rate)
 307         limit_bandwidth (ret, timer);
 308
 309       if (progress)
 310         progress_update (progress, ret, ptimer_read (timer));
 311 #ifdef WINDOWS
 312       if (toread > 0 && !opt.quiet)
 313         ws_percenttitle (100.0 *
 314                          (startpos + sum_read) / (startpos + toread));
 315 #endif
 316     }
 317   if (ret < -1)
 318     ret = -1;
 319
 320  out:
 321   if (progress)
 322     progress_finish (progress, ptimer_read (timer));
 323
 324   if (elapsed)
 325     *elapsed = ptimer_read (timer);
 326   if (timer)
 327     ptimer_destroy (timer);
 328
 329   if (qtyread)
 330     *qtyread += sum_read;
 331   if (qtywritten)
 332     *qtywritten += sum_written;
 333
 334   return ret;
 335 }
 336 \f
 337 /* Read a hunk of data from FD, up until a terminator.  The hunk is
 338    limited by whatever the TERMINATOR callback chooses as its
 339    terminator.  For example, if terminator stops at newline, the hunk
 340    will consist of a line of data; if terminator stops at two
 341    newlines, it can be used to read the head of an HTTP response.
 342    Upon determining the boundary, the function returns the data (up to
 343    the terminator) in malloc-allocated storage.
 344
 345    In case of read error, NULL is returned.  In case of EOF and no
 346    data read, NULL is returned and errno set to 0.  In case of having
 347    read some data, but encountering EOF before seeing the terminator,
 348    the data that has been read is returned, but it will (obviously)
 349    not contain the terminator.
 350
 351    The TERMINATOR function is called with three arguments: the
 352    beginning of the data read so far, the beginning of the current
 353    block of peeked-at data, and the length of the current block.
 354    Depending on its needs, the function is free to choose whether to
 355    analyze all data or just the newly arrived data.  If TERMINATOR
 356    returns NULL, it means that the terminator has not been seen.
 357    Otherwise it should return a pointer to the charactre immediately
 358    following the terminator.
 359
 360    The idea is to be able to read a line of input, or otherwise a hunk
 361    of text, such as the head of an HTTP request, without crossing the
 362    boundary, so that the next call to fd_read etc. reads the data
 363    after the hunk.  To achieve that, this function does the following:
 364
 365    1. Peek at incoming data.
 366
 367    2. Determine whether the peeked data, along with the previously
 368       read data, includes the terminator.
 369
 370       2a. If yes, read the data until the end of the terminator, and
 371           exit.
 372
 373       2b. If no, read the peeked data and goto 1.
 374
 375    The function is careful to assume as little as possible about the
 376    implementation of peeking.  For example, every peek is followed by
 377    a read.  If the read returns a different amount of data, the
 378    process is retried until all data arrives safely.
 379
 380    SIZEHINT is the buffer size sufficient to hold all the data in the
 381    typical case (it is used as the initial buffer size).  MAXSIZE is
 382    the maximum amount of memory this function is allowed to allocate,
 383    or 0 if no upper limit is to be enforced.
 384
 385    This function should be used as a building block for other
 386    functions -- see fd_read_line as a simple example.  */
 387
 388 char *
 389 fd_read_hunk (int fd, hunk_terminator_t terminator, long sizehint, long maxsize)
 390 {
 391   long bufsize = sizehint;
 392   char *hunk = xmalloc (bufsize);
 393   int tail = 0;                 /* tail position in HUNK */
 394
 395   assert (maxsize >= bufsize);
 396
 397   while (1)
 398     {
 399       const char *end;
 400       int pklen, rdlen, remain;
 401
 402       /* First, peek at the available data. */
 403
 404       pklen = fd_peek (fd, hunk + tail, bufsize - 1 - tail, -1);
 405       if (pklen < 0)
 406         {
 407           xfree (hunk);
 408           return NULL;
 409         }
 410       end = terminator (hunk, hunk + tail, pklen);
 411       if (end)
 412         {
 413           /* The data contains the terminator: we'll drain the data up
 414              to the end of the terminator.  */
 415           remain = end - (hunk + tail);
 416           assert (remain >= 0);
 417           if (remain == 0)
 418             {
 419               /* No more data needs to be read. */
 420               hunk[tail] = '\0';
 421               return hunk;
 422             }
 423           if (bufsize - 1 < tail + remain)
 424             {
 425               bufsize = tail + remain + 1;
 426               hunk = xrealloc (hunk, bufsize);
 427             }
 428         }
 429       else
 430         /* No terminator: simply read the data we know is (or should
 431            be) available.  */
 432         remain = pklen;
 433
 434       /* Now, read the data.  Note that we make no assumptions about
 435          how much data we'll get.  (Some TCP stacks are notorious for
 436          read returning less data than the previous MSG_PEEK.)  */
 437
 438       rdlen = fd_read (fd, hunk + tail, remain, 0);
 439       if (rdlen < 0)
 440         {
 441           xfree_null (hunk);
 442           return NULL;
 443         }
 444       tail += rdlen;
 445       hunk[tail] = '\0';
 446
 447       if (rdlen == 0)
 448         {
 449           if (tail == 0)
 450             {
 451               /* EOF without anything having been read */
 452               xfree (hunk);
 453               errno = 0;
 454               return NULL;
 455             }
 456           else
 457             /* EOF seen: return the data we've read. */
 458             return hunk;
 459         }
 460       if (end && rdlen == remain)
 461         /* The terminator was seen and the remaining data drained --
 462            we got what we came for.  */
 463         return hunk;
 464
 465       /* Keep looping until all the data arrives. */
 466
 467       if (tail == bufsize - 1)
 468         {
 469           /* Double the buffer size, but refuse to allocate more than
 470              MAXSIZE bytes.  */
 471           if (maxsize && bufsize >= maxsize)
 472             {
 473               xfree (hunk);
 474               errno = ENOMEM;
 475               return NULL;
 476             }
 477           bufsize <<= 1;
 478           if (maxsize && bufsize > maxsize)
 479             bufsize = maxsize;
 480           hunk = xrealloc (hunk, bufsize);
 481         }
 482     }
 483 }
 484
 485 static const char *
 486 line_terminator (const char *start, const char *peeked, int peeklen)
 487 {
 488   const char *p = memchr (peeked, '\n', peeklen);
 489   if (p)
 490     /* p+1 because the line must include '\n' */
 491     return p + 1;
 492   return NULL;
 493 }
 494
 495 /* The maximum size of the single line we agree to accept.  This is
 496    not meant to impose an arbitrary limit, but to protect the user
 497    from Wget slurping up available memory upon encountering malicious
 498    or buggy server output.  Define it to 0 to remove the limit.  */
 499 #define FD_READ_LINE_MAX 4096
 500
 501 /* Read one line from FD and return it.  The line is allocated using
 502    malloc, but is never larger than FD_READ_LINE_MAX.
 503
 504    If an error occurs, or if no data can be read, NULL is returned.
 505    In the former case errno indicates the error condition, and in the
 506    latter case, errno is NULL.  */
 507
 508 char *
 509 fd_read_line (int fd)
 510 {
 511   return fd_read_hunk (fd, line_terminator, 128, FD_READ_LINE_MAX);
 512 }
 513 \f
 514 /* Return a printed representation of the download rate, along with
 515    the units appropriate for the download speed.  */
 516
 517 const char *
 518 retr_rate (wgint bytes, double secs)
 519 {
 520   static char res[20];
 521   static const char *rate_names[] = {"B/s", "KB/s", "MB/s", "GB/s" };
 522   int units;
 523
 524   double dlrate = calc_rate (bytes, secs, &units);
 525   /* Use more digits for smaller numbers (regardless of unit used),
 526      e.g. "1022", "247", "12.5", "2.38".  */
 527   sprintf (res, "%.*f %s",
 528            dlrate >= 99.95 ? 0 : dlrate >= 9.995 ? 1 : 2,
 529            dlrate, rate_names[units]);
 530
 531   return res;
 532 }
 533
 534 /* Calculate the download rate and trim it as appropriate for the
 535    speed.  Appropriate means that if rate is greater than 1K/s,
 536    kilobytes are used, and if rate is greater than 1MB/s, megabytes
 537    are used.
 538
 539    UNITS is zero for B/s, one for KB/s, two for MB/s, and three for
 540    GB/s.  */
 541
 542 double
 543 calc_rate (wgint bytes, double secs, int *units)
 544 {
 545   double dlrate;
 546
 547   assert (secs >= 0);
 548   assert (bytes >= 0);
 549
 550   if (secs == 0)
 551     /* If elapsed time is exactly zero, it means we're under the
 552        resolution of the timer.  This can easily happen on systems
 553        that use time() for the timer.  Since the interval lies between
 554        0 and the timer's resolution, assume half the resolution.  */
 555     secs = ptimer_resolution () / 2.0;
 556
 557   dlrate = bytes / secs;
 558   if (dlrate < 1024.0)
 559     *units = 0;
 560   else if (dlrate < 1024.0 * 1024.0)
 561     *units = 1, dlrate /= 1024.0;
 562   else if (dlrate < 1024.0 * 1024.0 * 1024.0)
 563     *units = 2, dlrate /= (1024.0 * 1024.0);
 564   else
 565     /* Maybe someone will need this, one day. */
 566     *units = 3, dlrate /= (1024.0 * 1024.0 * 1024.0);
 567
 568   return dlrate;
 569 }
 570 \f
 571
 572 #define SUSPEND_POST_DATA do {                  \
 573   post_data_suspended = true;                   \
 574   saved_post_data = opt.post_data;              \
 575   saved_post_file_name = opt.post_file_name;    \
 576   opt.post_data = NULL;                         \
 577   opt.post_file_name = NULL;                    \
 578 } while (0)
 579
 580 #define RESTORE_POST_DATA do {                          \
 581   if (post_data_suspended)                              \
 582     {                                                   \
 583       opt.post_data = saved_post_data;                  \
 584       opt.post_file_name = saved_post_file_name;        \
 585       post_data_suspended = false;                      \
 586     }                                                   \
 587 } while (0)
 588
 589 static char *getproxy (struct url *);
 590
 591 /* Retrieve the given URL.  Decides which loop to call -- HTTP, FTP,
 592    FTP, proxy, etc.  */
 593
 594 /* #### This function should be rewritten so it doesn't return from
 595    multiple points. */
 596
 597 uerr_t
 598 retrieve_url (const char *origurl, char **file, char **newloc,
 599               const char *refurl, int *dt, bool recursive)
 600 {
 601   uerr_t result;
 602   char *url;
 603   bool location_changed;
 604   int dummy;
 605   char *mynewloc, *proxy;
 606   struct url *u, *proxy_url;
 607   int up_error_code;            /* url parse error code */
 608   char *local_file;
 609   int redirection_count = 0;
 610
 611   bool post_data_suspended = false;
 612   char *saved_post_data = NULL;
 613   char *saved_post_file_name = NULL;
 614
 615   /* If dt is NULL, use local storage.  */
 616   if (!dt)
 617     {
 618       dt = &dummy;
 619       dummy = 0;
 620     }
 621   url = xstrdup (origurl);
 622   if (newloc)
 623     *newloc = NULL;
 624   if (file)
 625     *file = NULL;
 626
 627   u = url_parse (url, &up_error_code);
 628   if (!u)
 629     {
 630       logprintf (LOG_NOTQUIET, "%s: %s.\n", url, url_error (up_error_code));
 631       xfree (url);
 632       return URLERROR;
 633     }
 634
 635   if (!refurl)
 636     refurl = opt.referer;
 637
 638  redirected:
 639
 640   result = NOCONERROR;
 641   mynewloc = NULL;
 642   local_file = NULL;
 643   proxy_url = NULL;
 644
 645   proxy = getproxy (u);
 646   if (proxy)
 647     {
 648       /* Parse the proxy URL.  */
 649       proxy_url = url_parse (proxy, &up_error_code);
 650       if (!proxy_url)
 651         {
 652           logprintf (LOG_NOTQUIET, _("Error parsing proxy URL %s: %s.\n"),
 653                      proxy, url_error (up_error_code));
 654           xfree (url);
 655           RESTORE_POST_DATA;
 656           return PROXERR;
 657         }
 658       if (proxy_url->scheme != SCHEME_HTTP && proxy_url->scheme != u->scheme)
 659         {
 660           logprintf (LOG_NOTQUIET, _("Error in proxy URL %s: Must be HTTP.\n"), proxy);
 661           url_free (proxy_url);
 662           xfree (url);
 663           RESTORE_POST_DATA;
 664           return PROXERR;
 665         }
 666     }
 667
 668   if (u->scheme == SCHEME_HTTP
 669 #ifdef HAVE_SSL
 670       || u->scheme == SCHEME_HTTPS
 671 #endif
 672       || (proxy_url && proxy_url->scheme == SCHEME_HTTP))
 673     {
 674       result = http_loop (u, &mynewloc, &local_file, refurl, dt, proxy_url);
 675     }
 676   else if (u->scheme == SCHEME_FTP)
 677     {
 678       /* If this is a redirection, temporarily turn off opt.ftp_glob
 679          and opt.recursive, both being undesirable when following
 680          redirects.  */
 681       bool oldrec = recursive, glob = opt.ftp_glob;
 682       if (redirection_count)
 683         oldrec = glob = false;
 684
 685       result = ftp_loop (u, dt, proxy_url, recursive, glob);
 686       recursive = oldrec;
 687
 688       /* There is a possibility of having HTTP being redirected to
 689          FTP.  In these cases we must decide whether the text is HTML
 690          according to the suffix.  The HTML suffixes are `.html',
 691          `.htm' and a few others, case-insensitive.  */
 692       if (redirection_count && local_file && u->scheme == SCHEME_FTP)
 693         {
 694           if (has_html_suffix_p (local_file))
 695             *dt |= TEXTHTML;
 696         }
 697     }
 698
 699   if (proxy_url)
 700     {
 701       url_free (proxy_url);
 702       proxy_url = NULL;
 703     }
 704
 705   location_changed = (result == NEWLOCATION);
 706   if (location_changed)
 707     {
 708       char *construced_newloc;
 709       struct url *newloc_parsed;
 710
 711       assert (mynewloc != NULL);
 712
 713       if (local_file)
 714         xfree (local_file);
 715
 716       /* The HTTP specs only allow absolute URLs to appear in
 717          redirects, but a ton of boneheaded webservers and CGIs out
 718          there break the rules and use relative URLs, and popular
 719          browsers are lenient about this, so wget should be too. */
 720       construced_newloc = uri_merge (url, mynewloc);
 721       xfree (mynewloc);
 722       mynewloc = construced_newloc;
 723
 724       /* Now, see if this new location makes sense. */
 725       newloc_parsed = url_parse (mynewloc, &up_error_code);
 726       if (!newloc_parsed)
 727         {
 728           logprintf (LOG_NOTQUIET, "%s: %s.\n", escnonprint_uri (mynewloc),
 729                      url_error (up_error_code));
 730           url_free (u);
 731           xfree (url);
 732           xfree (mynewloc);
 733           RESTORE_POST_DATA;
 734           return result;
 735         }
 736
 737       /* Now mynewloc will become newloc_parsed->url, because if the
 738          Location contained relative paths like .././something, we
 739          don't want that propagating as url.  */
 740       xfree (mynewloc);
 741       mynewloc = xstrdup (newloc_parsed->url);
 742
 743       /* Check for max. number of redirections.  */
 744       if (++redirection_count > opt.max_redirect)
 745         {
 746           logprintf (LOG_NOTQUIET, _("%d redirections exceeded.\n"),
 747                      opt.max_redirect);
 748           url_free (newloc_parsed);
 749           url_free (u);
 750           xfree (url);
 751           xfree (mynewloc);
 752           RESTORE_POST_DATA;
 753           return WRONGCODE;
 754         }
 755
 756       xfree (url);
 757       url = mynewloc;
 758       url_free (u);
 759       u = newloc_parsed;
 760
 761       /* If we're being redirected from POST, we don't want to POST
 762          again.  Many requests answer POST with a redirection to an
 763          index page; that redirection is clearly a GET.  We "suspend"
 764          POST data for the duration of the redirections, and restore
 765          it when we're done. */
 766       if (!post_data_suspended)
 767         SUSPEND_POST_DATA;
 768
 769       goto redirected;
 770     }
 771
 772   if (local_file)
 773     {
 774       if (*dt & RETROKF)
 775         {
 776           register_download (u->url, local_file);
 777           if (redirection_count && 0 != strcmp (origurl, u->url))
 778             register_redirection (origurl, u->url);
 779           if (*dt & TEXTHTML)
 780             register_html (u->url, local_file);
 781         }
 782     }
 783
 784   if (file)
 785     *file = local_file ? local_file : NULL;
 786   else
 787     xfree_null (local_file);
 788
 789   url_free (u);
 790
 791   if (redirection_count)
 792     {
 793       if (newloc)
 794         *newloc = url;
 795       else
 796         xfree (url);
 797     }
 798   else
 799     {
 800       if (newloc)
 801         *newloc = NULL;
 802       xfree (url);
 803     }
 804
 805   RESTORE_POST_DATA;
 806
 807   return result;
 808 }
 809
 810 /* Find the URLs in the file and call retrieve_url() for each of them.
 811    If HTML is true, treat the file as HTML, and construct the URLs
 812    accordingly.
 813
 814    If opt.recursive is set, call retrieve_tree() for each file.  */
 815
 816 uerr_t
 817 retrieve_from_file (const char *file, bool html, int *count)
 818 {
 819   uerr_t status;
 820   struct urlpos *url_list, *cur_url;
 821
 822   url_list = (html ? get_urls_html (file, NULL, NULL)
 823               : get_urls_file (file));
 824   status = RETROK;             /* Suppose everything is OK.  */
 825   *count = 0;                  /* Reset the URL count.  */
 826
 827   for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count)
 828     {
 829       char *filename = NULL, *new_file = NULL;
 830       int dt;
 831
 832       if (cur_url->ignore_when_downloading)
 833         continue;
 834
 835       if (opt.quota && total_downloaded_bytes > opt.quota)
 836         {
 837           status = QUOTEXC;
 838           break;
 839         }
 840       if ((opt.recursive || opt.page_requisites)
 841           && (cur_url->url->scheme != SCHEME_FTP || getproxy (cur_url->url)))
 842         {
 843           int old_follow_ftp = opt.follow_ftp;
 844
 845           /* Turn opt.follow_ftp on in case of recursive FTP retrieval */
 846           if (cur_url->url->scheme == SCHEME_FTP)
 847             opt.follow_ftp = 1;
 848
 849           status = retrieve_tree (cur_url->url->url);
 850
 851           opt.follow_ftp = old_follow_ftp;
 852         }
 853       else
 854         status = retrieve_url (cur_url->url->url, &filename, &new_file, NULL, &dt, opt.recursive);
 855
 856       if (filename && opt.delete_after && file_exists_p (filename))
 857         {
 858           DEBUGP (("\
 859 Removing file due to --delete-after in retrieve_from_file():\n"));
 860           logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
 861           if (unlink (filename))
 862             logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
 863           dt &= ~RETROKF;
 864         }
 865
 866       xfree_null (new_file);
 867       xfree_null (filename);
 868     }
 869
 870   /* Free the linked list of URL-s.  */
 871   free_urlpos (url_list);
 872
 873   return status;
 874 }
 875
 876 /* Print `giving up', or `retrying', depending on the impending
 877    action.  N1 and N2 are the attempt number and the attempt limit.  */
 878 void
 879 printwhat (int n1, int n2)
 880 {
 881   logputs (LOG_VERBOSE, (n1 == n2) ? _("Giving up.\n\n") : _("Retrying.\n\n"));
 882 }
 883
 884 /* If opt.wait or opt.waitretry are specified, and if certain
 885    conditions are met, sleep the appropriate number of seconds.  See
 886    the documentation of --wait and --waitretry for more information.
 887
 888    COUNT is the count of current retrieval, beginning with 1. */
 889
 890 void
 891 sleep_between_retrievals (int count)
 892 {
 893   static bool first_retrieval = true;
 894
 895   if (first_retrieval)
 896     {
 897       /* Don't sleep before the very first retrieval. */
 898       first_retrieval = false;
 899       return;
 900     }
 901
 902   if (opt.waitretry && count > 1)
 903     {
 904       /* If opt.waitretry is specified and this is a retry, wait for
 905          COUNT-1 number of seconds, or for opt.waitretry seconds.  */
 906       if (count <= opt.waitretry)
 907         xsleep (count - 1);
 908       else
 909         xsleep (opt.waitretry);
 910     }
 911   else if (opt.wait)
 912     {
 913       if (!opt.random_wait || count > 1)
 914         /* If random-wait is not specified, or if we are sleeping
 915            between retries of the same download, sleep the fixed
 916            interval.  */
 917         xsleep (opt.wait);
 918       else
 919         {
 920           /* Sleep a random amount of time averaging in opt.wait
 921              seconds.  The sleeping amount ranges from 0.5*opt.wait to
 922              1.5*opt.wait.  */
 923           double waitsecs = (0.5 + random_float ()) * opt.wait;
 924           DEBUGP (("sleep_between_retrievals: avg=%f,sleep=%f\n",
 925                    opt.wait, waitsecs));
 926           xsleep (waitsecs);
 927         }
 928     }
 929 }
 930
 931 /* Free the linked list of urlpos.  */
 932 void
 933 free_urlpos (struct urlpos *l)
 934 {
 935   while (l)
 936     {
 937       struct urlpos *next = l->next;
 938       if (l->url)
 939         url_free (l->url);
 940       xfree_null (l->local_name);
 941       xfree (l);
 942       l = next;
 943     }
 944 }
 945
 946 /* Rotate FNAME opt.backups times */
 947 void
 948 rotate_backups(const char *fname)
 949 {
 950   int maxlen = strlen (fname) + 1 + numdigit (opt.backups) + 1;
 951   char *from = (char *)alloca (maxlen);
 952   char *to = (char *)alloca (maxlen);
 953   struct_stat sb;
 954   int i;
 955
 956   if (stat (fname, &sb) == 0)
 957     if (S_ISREG (sb.st_mode) == 0)
 958       return;
 959
 960   for (i = opt.backups; i > 1; i--)
 961     {
 962       sprintf (from, "%s.%d", fname, i - 1);
 963       sprintf (to, "%s.%d", fname, i);
 964       rename (from, to);
 965     }
 966
 967   sprintf (to, "%s.%d", fname, 1);
 968   rename(fname, to);
 969 }
 970
 971 static bool no_proxy_match (const char *, const char **);
 972
 973 /* Return the URL of the proxy appropriate for url U.  */
 974
 975 static char *
 976 getproxy (struct url *u)
 977 {
 978   char *proxy = NULL;
 979   char *rewritten_url;
 980   static char rewritten_storage[1024];
 981
 982   if (!opt.use_proxy)
 983     return NULL;
 984   if (no_proxy_match (u->host, (const char **)opt.no_proxy))
 985     return NULL;
 986
 987   switch (u->scheme)
 988     {
 989     case SCHEME_HTTP:
 990       proxy = opt.http_proxy ? opt.http_proxy : getenv ("http_proxy");
 991       break;
 992 #ifdef HAVE_SSL
 993     case SCHEME_HTTPS:
 994       proxy = opt.https_proxy ? opt.https_proxy : getenv ("https_proxy");
 995       break;
 996 #endif
 997     case SCHEME_FTP:
 998       proxy = opt.ftp_proxy ? opt.ftp_proxy : getenv ("ftp_proxy");
 999       break;
1000     case SCHEME_INVALID:
1001       break;
1002     }
1003   if (!proxy || !*proxy)
1004     return NULL;
1005
1006   /* Handle shorthands.  `rewritten_storage' is a kludge to allow
1007      getproxy() to return static storage. */
1008   rewritten_url = rewrite_shorthand_url (proxy);
1009   if (rewritten_url)
1010     {
1011       strncpy (rewritten_storage, rewritten_url, sizeof (rewritten_storage));
1012       rewritten_storage[sizeof (rewritten_storage) - 1] = '\0';
1013       proxy = rewritten_storage;
1014     }
1015
1016   return proxy;
1017 }
1018
1019 /* Returns true if URL would be downloaded through a proxy. */
1020
1021 bool
1022 url_uses_proxy (const char *url)
1023 {
1024   bool ret;
1025   struct url *u = url_parse (url, NULL);
1026   if (!u)
1027     return false;
1028   ret = getproxy (u) != NULL;
1029   url_free (u);
1030   return ret;
1031 }
1032
1033 /* Should a host be accessed through proxy, concerning no_proxy?  */
1034 static bool
1035 no_proxy_match (const char *host, const char **no_proxy)
1036 {
1037   if (!no_proxy)
1038     return false;
1039   else
1040     return sufmatch (no_proxy, host);
1041 }