sjero.net Git - wget/blob - src/retr.c

   1 /* File retrieval.
   2    Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001 Free Software Foundation, Inc.
   3
   4 This file is part of GNU Wget.
   5
   6 GNU Wget is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 2 of the License, or (at
   9 your option) any later version.
  10
  11 GNU Wget is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with Wget; if not, write to the Free Software
  18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  19
  20 In addition, as a special exception, the Free Software Foundation
  21 gives permission to link the code of its release of Wget with the
  22 OpenSSL project's "OpenSSL" library (or with modified versions of it
  23 that use the same license as the "OpenSSL" library), and distribute
  24 the linked executables.  You must obey the GNU General Public License
  25 in all respects for all of the code used other than "OpenSSL".  If you
  26 modify this file, you may extend this exception to your version of the
  27 file, but you are not obligated to do so.  If you do not wish to do
  28 so, delete this exception statement from your version.  */
  29
  30 #include <config.h>
  31
  32 #include <stdio.h>
  33 #include <stdlib.h>
  34 #include <sys/types.h>
  35 #ifdef HAVE_UNISTD_H
  36 # include <unistd.h>
  37 #endif /* HAVE_UNISTD_H */
  38 #include <errno.h>
  39 #ifdef HAVE_STRING_H
  40 # include <string.h>
  41 #else
  42 # include <strings.h>
  43 #endif /* HAVE_STRING_H */
  44 #include <assert.h>
  45
  46 #include "wget.h"
  47 #include "utils.h"
  48 #include "retr.h"
  49 #include "progress.h"
  50 #include "url.h"
  51 #include "recur.h"
  52 #include "ftp.h"
  53 #include "host.h"
  54 #include "connect.h"
  55 #include "hash.h"
  56 #include "convert.h"
  57 #include "ptimer.h"
  58
  59 #ifndef errno
  60 extern int errno;
  61 #endif
  62
  63 /* Total size of downloaded files.  Used to enforce quota.  */
  64 LARGE_INT total_downloaded_bytes;
  65
  66 /* If non-NULL, the stream to which output should be written.  This
  67    stream is initialized when `-O' is used.  */
  68 FILE *output_stream;
  69
  70 /* Whether output_document is a regular file we can manipulate,
  71    i.e. not `-' or a device file. */
  72 int output_stream_regular;
  73 \f
  74 static struct {
  75   wgint chunk_bytes;
  76   double chunk_start;
  77   double sleep_adjust;
  78 } limit_data;
  79
  80 static void
  81 limit_bandwidth_reset (void)
  82 {
  83   limit_data.chunk_bytes = 0;
  84   limit_data.chunk_start = 0;
  85   limit_data.sleep_adjust = 0;
  86 }
  87
  88 /* Limit the bandwidth by pausing the download for an amount of time.
  89    BYTES is the number of bytes received from the network, and TIMER
  90    is the timer that started at the beginning of download.  */
  91
  92 static void
  93 limit_bandwidth (wgint bytes, struct ptimer *timer)
  94 {
  95   double delta_t = ptimer_read (timer) - limit_data.chunk_start;
  96   double expected;
  97
  98   limit_data.chunk_bytes += bytes;
  99
 100   /* Calculate the amount of time we expect downloading the chunk
 101      should take.  If in reality it took less time, sleep to
 102      compensate for the difference.  */
 103   expected = 1000.0 * limit_data.chunk_bytes / opt.limit_rate;
 104
 105   if (expected > delta_t)
 106     {
 107       double slp = expected - delta_t + limit_data.sleep_adjust;
 108       double t0, t1;
 109       if (slp < 200)
 110         {
 111           DEBUGP (("deferring a %.2f ms sleep (%s/%.2f).\n",
 112                    slp, number_to_static_string (limit_data.chunk_bytes),
 113                    delta_t));
 114           return;
 115         }
 116       DEBUGP (("\nsleeping %.2f ms for %s bytes, adjust %.2f ms\n",
 117                slp, number_to_static_string (limit_data.chunk_bytes),
 118                limit_data.sleep_adjust));
 119
 120       t0 = ptimer_read (timer);
 121       xsleep (slp / 1000);
 122       t1 = ptimer_measure (timer);
 123
 124       /* Due to scheduling, we probably slept slightly longer (or
 125          shorter) than desired.  Calculate the difference between the
 126          desired and the actual sleep, and adjust the next sleep by
 127          that amount.  */
 128       limit_data.sleep_adjust = slp - (t1 - t0);
 129       /* If sleep_adjust is very large, it's likely due to suspension
 130          and not clock inaccuracy.  Don't enforce those.  */
 131       if (limit_data.sleep_adjust > 500)
 132         limit_data.sleep_adjust = 500;
 133       else if (limit_data.sleep_adjust < -500)
 134         limit_data.sleep_adjust = -500;
 135     }
 136
 137   limit_data.chunk_bytes = 0;
 138   limit_data.chunk_start = ptimer_read (timer);
 139 }
 140
 141 #ifndef MIN
 142 # define MIN(i, j) ((i) <= (j) ? (i) : (j))
 143 #endif
 144
 145 /* Write data in BUF to OUT.  However, if *SKIP is non-zero, skip that
 146    amount of data and decrease SKIP.  Increment *TOTAL by the amount
 147    of data written.  */
 148
 149 static int
 150 write_data (FILE *out, const char *buf, int bufsize, wgint *skip,
 151             wgint *written)
 152 {
 153   if (!out)
 154     return 1;
 155   if (*skip > bufsize)
 156     {
 157       *skip -= bufsize;
 158       return 1;
 159     }
 160   if (*skip)
 161     {
 162       buf += *skip;
 163       bufsize -= *skip;
 164       *skip = 0;
 165       if (bufsize == 0)
 166         return 1;
 167     }
 168
 169   fwrite (buf, 1, bufsize, out);
 170   *written += bufsize;
 171
 172   /* Immediately flush the downloaded data.  This should not hinder
 173      performance: fast downloads will arrive in large 16K chunks
 174      (which stdio would write out immediately anyway), and slow
 175      downloads wouldn't be limited by disk speed.  */
 176   fflush (out);
 177   return !ferror (out);
 178 }
 179
 180 /* Read the contents of file descriptor FD until it the connection
 181    terminates or a read error occurs.  The data is read in portions of
 182    up to 16K and written to OUT as it arrives.  If opt.verbose is set,
 183    the progress is shown.
 184
 185    TOREAD is the amount of data expected to arrive, normally only used
 186    by the progress gauge.
 187
 188    STARTPOS is the position from which the download starts, used by
 189    the progress gauge.  If QTYREAD is non-NULL, the value it points to
 190    is incremented by the amount of data read from the network.  If
 191    QTYWRITTEN is non-NULL, the value it points to is incremented by
 192    the amount of data written to disk.  The time it took to download
 193    the data (in milliseconds) is stored to ELAPSED.
 194
 195    The function exits and returns the amount of data read.  In case of
 196    error while reading data, -1 is returned.  In case of error while
 197    writing data, -2 is returned.  */
 198
 199 int
 200 fd_read_body (int fd, FILE *out, wgint toread, wgint startpos,
 201               wgint *qtyread, wgint *qtywritten, double *elapsed, int flags)
 202 {
 203   int ret = 0;
 204
 205   static char dlbuf[16384];
 206   int dlbufsize = sizeof (dlbuf);
 207
 208   struct ptimer *timer = NULL;
 209   double last_successful_read_tm = 0;
 210
 211   /* The progress gauge, set according to the user preferences. */
 212   void *progress = NULL;
 213
 214   /* Non-zero if the progress gauge is interactive, i.e. if it can
 215      continually update the display.  When true, smaller timeout
 216      values are used so that the gauge can update the display when
 217      data arrives slowly. */
 218   int progress_interactive = 0;
 219
 220   int exact = flags & rb_read_exactly;
 221   wgint skip = 0;
 222
 223   /* How much data we've read/written.  */
 224   wgint sum_read = 0;
 225   wgint sum_written = 0;
 226
 227   if (flags & rb_skip_startpos)
 228     skip = startpos;
 229
 230   if (opt.verbose)
 231     {
 232       /* If we're skipping STARTPOS bytes, pass 0 as the INITIAL
 233          argument to progress_create because the indicator doesn't
 234          (yet) know about "skipping" data.  */
 235       progress = progress_create (skip ? 0 : startpos, startpos + toread);
 236       progress_interactive = progress_interactive_p (progress);
 237     }
 238
 239   if (opt.limit_rate)
 240     limit_bandwidth_reset ();
 241
 242   /* A timer is needed for tracking progress, for throttling, and for
 243      tracking elapsed time.  If either of these are requested, start
 244      the timer.  */
 245   if (progress || opt.limit_rate || elapsed)
 246     {
 247       timer = ptimer_new ();
 248       last_successful_read_tm = 0;
 249     }
 250
 251   /* Use a smaller buffer for low requested bandwidths.  For example,
 252      with --limit-rate=2k, it doesn't make sense to slurp in 16K of
 253      data and then sleep for 8s.  With buffer size equal to the limit,
 254      we never have to sleep for more than one second.  */
 255   if (opt.limit_rate && opt.limit_rate < dlbufsize)
 256     dlbufsize = opt.limit_rate;
 257
 258   /* Read from FD while there is data to read.  Normally toread==0
 259      means that it is unknown how much data is to arrive.  However, if
 260      EXACT is set, then toread==0 means what it says: that no data
 261      should be read.  */
 262   while (!exact || (sum_read < toread))
 263     {
 264       int rdsize = exact ? MIN (toread - sum_read, dlbufsize) : dlbufsize;
 265       double tmout = opt.read_timeout;
 266       if (progress_interactive)
 267         {
 268           /* For interactive progress gauges, always specify a ~1s
 269              timeout, so that the gauge can be updated regularly even
 270              when the data arrives very slowly or stalls.  */
 271           tmout = 0.95;
 272           if (opt.read_timeout)
 273             {
 274               double waittm;
 275               waittm = (ptimer_read (timer) - last_successful_read_tm) / 1000;
 276               if (waittm + tmout > opt.read_timeout)
 277                 {
 278                   /* Don't let total idle time exceed read timeout. */
 279                   tmout = opt.read_timeout - waittm;
 280                   if (tmout < 0)
 281                     {
 282                       /* We've already exceeded the timeout. */
 283                       ret = -1, errno = ETIMEDOUT;
 284                       break;
 285                     }
 286                 }
 287             }
 288         }
 289       ret = fd_read (fd, dlbuf, rdsize, tmout);
 290
 291       if (progress_interactive && ret < 0 && errno == ETIMEDOUT)
 292         ret = 0;                /* interactive timeout, handled above */
 293       else if (ret <= 0)
 294         break;                  /* EOF or read error */
 295
 296       if (progress || opt.limit_rate)
 297         {
 298           ptimer_measure (timer);
 299           if (ret > 0)
 300             last_successful_read_tm = ptimer_read (timer);
 301         }
 302
 303       if (ret > 0)
 304         {
 305           sum_read += ret;
 306           if (!write_data (out, dlbuf, ret, &skip, &sum_written))
 307             {
 308               ret = -2;
 309               goto out_;
 310             }
 311         }
 312
 313       if (opt.limit_rate)
 314         limit_bandwidth (ret, timer);
 315
 316       if (progress)
 317         progress_update (progress, ret, ptimer_read (timer));
 318 #ifdef WINDOWS
 319       if (toread > 0 && !opt.quiet)
 320         ws_percenttitle (100.0 *
 321                          (startpos + sum_read) / (startpos + toread));
 322 #endif
 323     }
 324   if (ret < -1)
 325     ret = -1;
 326
 327  out_:
 328   if (progress)
 329     progress_finish (progress, ptimer_read (timer));
 330
 331   if (elapsed)
 332     *elapsed = ptimer_read (timer);
 333   if (timer)
 334     ptimer_destroy (timer);
 335
 336   if (qtyread)
 337     *qtyread += sum_read;
 338   if (qtywritten)
 339     *qtywritten += sum_written;
 340
 341   return ret;
 342 }
 343 \f
 344 /* Read a hunk of data from FD, up until a terminator.  The terminator
 345    is whatever the TERMINATOR function determines it to be; for
 346    example, it can be a line of data, or the head of an HTTP response.
 347    The function returns the data read allocated with malloc.
 348
 349    In case of error, NULL is returned.  In case of EOF and no data
 350    read, NULL is returned and errno set to 0.  In case of EOF with
 351    data having been read, the data is returned, but it will
 352    (obviously) not contain the terminator.
 353
 354    The idea is to be able to read a line of input, or otherwise a hunk
 355    of text, such as the head of an HTTP request, without crossing the
 356    boundary, so that the next call to fd_read etc. reads the data
 357    after the hunk.  To achieve that, this function does the following:
 358
 359    1. Peek at available data.
 360
 361    2. Determine whether the peeked data, along with the previously
 362       read data, includes the terminator.
 363
 364       2a. If yes, read the data until the end of the terminator, and
 365           exit.
 366
 367       2b. If no, read the peeked data and goto 1.
 368
 369    The function is careful to assume as little as possible about the
 370    implementation of peeking.  For example, every peek is followed by
 371    a read.  If the read returns a different amount of data, the
 372    process is retried until all data arrives safely.
 373
 374    SIZEHINT is the buffer size sufficient to hold all the data in the
 375    typical case (it is used as the initial buffer size).  MAXSIZE is
 376    the maximum amount of memory this function is allowed to allocate,
 377    or 0 if no upper limit is to be enforced.
 378
 379    This function should be used as a building block for other
 380    functions -- see fd_read_line as a simple example.  */
 381
 382 char *
 383 fd_read_hunk (int fd, hunk_terminator_t terminator, long sizehint, long maxsize)
 384 {
 385   long bufsize = sizehint;
 386   char *hunk = xmalloc (bufsize);
 387   int tail = 0;                 /* tail position in HUNK */
 388
 389   assert (maxsize >= bufsize);
 390
 391   while (1)
 392     {
 393       const char *end;
 394       int pklen, rdlen, remain;
 395
 396       /* First, peek at the available data. */
 397
 398       pklen = fd_peek (fd, hunk + tail, bufsize - 1 - tail, -1.0);
 399       if (pklen < 0)
 400         {
 401           xfree (hunk);
 402           return NULL;
 403         }
 404       end = terminator (hunk, tail, pklen);
 405       if (end)
 406         {
 407           /* The data contains the terminator: we'll drain the data up
 408              to the end of the terminator.  */
 409           remain = end - (hunk + tail);
 410           if (remain == 0)
 411             {
 412               /* No more data needs to be read. */
 413               hunk[tail] = '\0';
 414               return hunk;
 415             }
 416           if (bufsize - 1 < tail + remain)
 417             {
 418               bufsize = tail + remain + 1;
 419               hunk = xrealloc (hunk, bufsize);
 420             }
 421         }
 422       else
 423         /* No terminator: simply read the data we know is (or should
 424            be) available.  */
 425         remain = pklen;
 426
 427       /* Now, read the data.  Note that we make no assumptions about
 428          how much data we'll get.  (Some TCP stacks are notorious for
 429          read returning less data than the previous MSG_PEEK.)  */
 430
 431       rdlen = fd_read (fd, hunk + tail, remain, 0.0);
 432       if (rdlen < 0)
 433         {
 434           xfree_null (hunk);
 435           return NULL;
 436         }
 437       tail += rdlen;
 438       hunk[tail] = '\0';
 439
 440       if (rdlen == 0)
 441         {
 442           if (tail == 0)
 443             {
 444               /* EOF without anything having been read */
 445               xfree (hunk);
 446               errno = 0;
 447               return NULL;
 448             }
 449           else
 450             /* EOF seen: return the data we've read. */
 451             return hunk;
 452         }
 453       if (end && rdlen == remain)
 454         /* The terminator was seen and the remaining data drained --
 455            we got what we came for.  */
 456         return hunk;
 457
 458       /* Keep looping until all the data arrives. */
 459
 460       if (tail == bufsize - 1)
 461         {
 462           /* Double the buffer size, but refuse to allocate more than
 463              MAXSIZE bytes.  */
 464           if (maxsize && bufsize >= maxsize)
 465             {
 466               xfree (hunk);
 467               errno = ENOMEM;
 468               return NULL;
 469             }
 470           bufsize <<= 1;
 471           if (maxsize && bufsize > maxsize)
 472             bufsize = maxsize;
 473           hunk = xrealloc (hunk, bufsize);
 474         }
 475     }
 476 }
 477
 478 static const char *
 479 line_terminator (const char *hunk, int oldlen, int peeklen)
 480 {
 481   const char *p = memchr (hunk + oldlen, '\n', peeklen);
 482   if (p)
 483     /* p+1 because we want the line to include '\n' */
 484     return p + 1;
 485   return NULL;
 486 }
 487
 488 /* The maximum size of the single line we agree to accept.  This is
 489    not meant to impose an arbitrary limit, but to protect the user
 490    from Wget slurping up available memory upon encountering malicious
 491    or buggy server output.  Define it to 0 to remove the limit.  */
 492 #define FD_READ_LINE_MAX 4096
 493
 494 /* Read one line from FD and return it.  The line is allocated using
 495    malloc, but is never larger than FD_READ_LINE_MAX.
 496
 497    If an error occurs, or if no data can be read, NULL is returned.
 498    In the former case errno indicates the error condition, and in the
 499    latter case, errno is NULL.  */
 500
 501 char *
 502 fd_read_line (int fd)
 503 {
 504   return fd_read_hunk (fd, line_terminator, 128, FD_READ_LINE_MAX);
 505 }
 506 \f
 507 /* Return a printed representation of the download rate, as
 508    appropriate for the speed.  If PAD is non-zero, strings will be
 509    padded to the width of 7 characters (xxxx.xx).  */
 510 char *
 511 retr_rate (wgint bytes, double msecs, int pad)
 512 {
 513   static char res[20];
 514   static const char *rate_names[] = {"B/s", "KB/s", "MB/s", "GB/s" };
 515   int units = 0;
 516
 517   double dlrate = calc_rate (bytes, msecs, &units);
 518   sprintf (res, pad ? "%7.2f %s" : "%.2f %s", dlrate, rate_names[units]);
 519
 520   return res;
 521 }
 522
 523 /* Calculate the download rate and trim it as appropriate for the
 524    speed.  Appropriate means that if rate is greater than 1K/s,
 525    kilobytes are used, and if rate is greater than 1MB/s, megabytes
 526    are used.
 527
 528    UNITS is zero for B/s, one for KB/s, two for MB/s, and three for
 529    GB/s.  */
 530 double
 531 calc_rate (wgint bytes, double msecs, int *units)
 532 {
 533   double dlrate;
 534
 535   assert (msecs >= 0);
 536   assert (bytes >= 0);
 537
 538   if (msecs == 0)
 539     /* If elapsed time is exactly zero, it means we're under the
 540        resolution of the timer.  This can easily happen on systems
 541        that use time() for the timer.  Since the interval lies between
 542        0 and the timer's resolution, assume half the resolution.  */
 543     msecs = ptimer_resolution () / 2.0;
 544
 545   dlrate = 1000.0 * bytes / msecs;
 546   if (dlrate < 1024.0)
 547     *units = 0;
 548   else if (dlrate < 1024.0 * 1024.0)
 549     *units = 1, dlrate /= 1024.0;
 550   else if (dlrate < 1024.0 * 1024.0 * 1024.0)
 551     *units = 2, dlrate /= (1024.0 * 1024.0);
 552   else
 553     /* Maybe someone will need this, one day. */
 554     *units = 3, dlrate /= (1024.0 * 1024.0 * 1024.0);
 555
 556   return dlrate;
 557 }
 558 \f
 559 /* Maximum number of allowed redirections.  20 was chosen as a
 560    "reasonable" value, which is low enough to not cause havoc, yet
 561    high enough to guarantee that normal retrievals will not be hurt by
 562    the check.  */
 563
 564 #define MAX_REDIRECTIONS 20
 565
 566 #define SUSPEND_POST_DATA do {                  \
 567   post_data_suspended = 1;                      \
 568   saved_post_data = opt.post_data;              \
 569   saved_post_file_name = opt.post_file_name;    \
 570   opt.post_data = NULL;                         \
 571   opt.post_file_name = NULL;                    \
 572 } while (0)
 573
 574 #define RESTORE_POST_DATA do {                          \
 575   if (post_data_suspended)                              \
 576     {                                                   \
 577       opt.post_data = saved_post_data;                  \
 578       opt.post_file_name = saved_post_file_name;        \
 579       post_data_suspended = 0;                          \
 580     }                                                   \
 581 } while (0)
 582
 583 static char *getproxy PARAMS ((struct url *));
 584
 585 /* Retrieve the given URL.  Decides which loop to call -- HTTP, FTP,
 586    FTP, proxy, etc.  */
 587
 588 /* #### This function should be rewritten so it doesn't return from
 589    multiple points. */
 590
 591 uerr_t
 592 retrieve_url (const char *origurl, char **file, char **newloc,
 593               const char *refurl, int *dt)
 594 {
 595   uerr_t result;
 596   char *url;
 597   int location_changed, dummy;
 598   char *mynewloc, *proxy;
 599   struct url *u, *proxy_url;
 600   int up_error_code;            /* url parse error code */
 601   char *local_file;
 602   int redirection_count = 0;
 603
 604   int post_data_suspended = 0;
 605   char *saved_post_data = NULL;
 606   char *saved_post_file_name = NULL;
 607
 608   /* If dt is NULL, use local storage.  */
 609   if (!dt)
 610     {
 611       dt = &dummy;
 612       dummy = 0;
 613     }
 614   url = xstrdup (origurl);
 615   if (newloc)
 616     *newloc = NULL;
 617   if (file)
 618     *file = NULL;
 619
 620   u = url_parse (url, &up_error_code);
 621   if (!u)
 622     {
 623       logprintf (LOG_NOTQUIET, "%s: %s.\n", url, url_error (up_error_code));
 624       xfree (url);
 625       return URLERROR;
 626     }
 627
 628   if (!refurl)
 629     refurl = opt.referer;
 630
 631  redirected:
 632
 633   result = NOCONERROR;
 634   mynewloc = NULL;
 635   local_file = NULL;
 636   proxy_url = NULL;
 637
 638   proxy = getproxy (u);
 639   if (proxy)
 640     {
 641       /* Parse the proxy URL.  */
 642       proxy_url = url_parse (proxy, &up_error_code);
 643       if (!proxy_url)
 644         {
 645           logprintf (LOG_NOTQUIET, _("Error parsing proxy URL %s: %s.\n"),
 646                      proxy, url_error (up_error_code));
 647           xfree (url);
 648           RESTORE_POST_DATA;
 649           return PROXERR;
 650         }
 651       if (proxy_url->scheme != SCHEME_HTTP && proxy_url->scheme != u->scheme)
 652         {
 653           logprintf (LOG_NOTQUIET, _("Error in proxy URL %s: Must be HTTP.\n"), proxy);
 654           url_free (proxy_url);
 655           xfree (url);
 656           RESTORE_POST_DATA;
 657           return PROXERR;
 658         }
 659     }
 660
 661   if (u->scheme == SCHEME_HTTP
 662 #ifdef HAVE_SSL
 663       || u->scheme == SCHEME_HTTPS
 664 #endif
 665       || (proxy_url && proxy_url->scheme == SCHEME_HTTP))
 666     {
 667       result = http_loop (u, &mynewloc, &local_file, refurl, dt, proxy_url);
 668     }
 669   else if (u->scheme == SCHEME_FTP)
 670     {
 671       /* If this is a redirection, we must not allow recursive FTP
 672          retrieval, so we save recursion to oldrec, and restore it
 673          later.  */
 674       int oldrec = opt.recursive;
 675       if (redirection_count)
 676         opt.recursive = 0;
 677       result = ftp_loop (u, dt, proxy_url);
 678       opt.recursive = oldrec;
 679
 680       /* There is a possibility of having HTTP being redirected to
 681          FTP.  In these cases we must decide whether the text is HTML
 682          according to the suffix.  The HTML suffixes are `.html',
 683          `.htm' and a few others, case-insensitive.  */
 684       if (redirection_count && local_file && u->scheme == SCHEME_FTP)
 685         {
 686           if (has_html_suffix_p (local_file))
 687             *dt |= TEXTHTML;
 688         }
 689     }
 690
 691   if (proxy_url)
 692     {
 693       url_free (proxy_url);
 694       proxy_url = NULL;
 695     }
 696
 697   location_changed = (result == NEWLOCATION);
 698   if (location_changed)
 699     {
 700       char *construced_newloc;
 701       struct url *newloc_parsed;
 702
 703       assert (mynewloc != NULL);
 704
 705       if (local_file)
 706         xfree (local_file);
 707
 708       /* The HTTP specs only allow absolute URLs to appear in
 709          redirects, but a ton of boneheaded webservers and CGIs out
 710          there break the rules and use relative URLs, and popular
 711          browsers are lenient about this, so wget should be too. */
 712       construced_newloc = uri_merge (url, mynewloc);
 713       xfree (mynewloc);
 714       mynewloc = construced_newloc;
 715
 716       /* Now, see if this new location makes sense. */
 717       newloc_parsed = url_parse (mynewloc, &up_error_code);
 718       if (!newloc_parsed)
 719         {
 720           logprintf (LOG_NOTQUIET, "%s: %s.\n", escnonprint_uri (mynewloc),
 721                      url_error (up_error_code));
 722           url_free (u);
 723           xfree (url);
 724           xfree (mynewloc);
 725           RESTORE_POST_DATA;
 726           return result;
 727         }
 728
 729       /* Now mynewloc will become newloc_parsed->url, because if the
 730          Location contained relative paths like .././something, we
 731          don't want that propagating as url.  */
 732       xfree (mynewloc);
 733       mynewloc = xstrdup (newloc_parsed->url);
 734
 735       /* Check for max. number of redirections.  */
 736       if (++redirection_count > MAX_REDIRECTIONS)
 737         {
 738           logprintf (LOG_NOTQUIET, _("%d redirections exceeded.\n"),
 739                      MAX_REDIRECTIONS);
 740           url_free (newloc_parsed);
 741           url_free (u);
 742           xfree (url);
 743           xfree (mynewloc);
 744           RESTORE_POST_DATA;
 745           return WRONGCODE;
 746         }
 747
 748       xfree (url);
 749       url = mynewloc;
 750       url_free (u);
 751       u = newloc_parsed;
 752
 753       /* If we're being redirected from POST, we don't want to POST
 754          again.  Many requests answer POST with a redirection to an
 755          index page; that redirection is clearly a GET.  We "suspend"
 756          POST data for the duration of the redirections, and restore
 757          it when we're done. */
 758       if (!post_data_suspended)
 759         SUSPEND_POST_DATA;
 760
 761       goto redirected;
 762     }
 763
 764   if (local_file)
 765     {
 766       if (*dt & RETROKF)
 767         {
 768           register_download (u->url, local_file);
 769           if (redirection_count && 0 != strcmp (origurl, u->url))
 770             register_redirection (origurl, u->url);
 771           if (*dt & TEXTHTML)
 772             register_html (u->url, local_file);
 773         }
 774     }
 775
 776   if (file)
 777     *file = local_file ? local_file : NULL;
 778   else
 779     xfree_null (local_file);
 780
 781   url_free (u);
 782
 783   if (redirection_count)
 784     {
 785       if (newloc)
 786         *newloc = url;
 787       else
 788         xfree (url);
 789     }
 790   else
 791     {
 792       if (newloc)
 793         *newloc = NULL;
 794       xfree (url);
 795     }
 796
 797   RESTORE_POST_DATA;
 798
 799   return result;
 800 }
 801
 802 /* Find the URLs in the file and call retrieve_url() for each of
 803    them.  If HTML is non-zero, treat the file as HTML, and construct
 804    the URLs accordingly.
 805
 806    If opt.recursive is set, call retrieve_tree() for each file.  */
 807
 808 uerr_t
 809 retrieve_from_file (const char *file, int html, int *count)
 810 {
 811   uerr_t status;
 812   struct urlpos *url_list, *cur_url;
 813
 814   url_list = (html ? get_urls_html (file, NULL, NULL)
 815               : get_urls_file (file));
 816   status = RETROK;             /* Suppose everything is OK.  */
 817   *count = 0;                  /* Reset the URL count.  */
 818
 819   for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count)
 820     {
 821       char *filename = NULL, *new_file = NULL;
 822       int dt;
 823
 824       if (cur_url->ignore_when_downloading)
 825         continue;
 826
 827       if (opt.quota && total_downloaded_bytes > opt.quota)
 828         {
 829           status = QUOTEXC;
 830           break;
 831         }
 832       if ((opt.recursive || opt.page_requisites)
 833           && cur_url->url->scheme != SCHEME_FTP)
 834         status = retrieve_tree (cur_url->url->url);
 835       else
 836         status = retrieve_url (cur_url->url->url, &filename, &new_file, NULL, &dt);
 837
 838       if (filename && opt.delete_after && file_exists_p (filename))
 839         {
 840           DEBUGP (("\
 841 Removing file due to --delete-after in retrieve_from_file():\n"));
 842           logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
 843           if (unlink (filename))
 844             logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
 845           dt &= ~RETROKF;
 846         }
 847
 848       xfree_null (new_file);
 849       xfree_null (filename);
 850     }
 851
 852   /* Free the linked list of URL-s.  */
 853   free_urlpos (url_list);
 854
 855   return status;
 856 }
 857
 858 /* Print `giving up', or `retrying', depending on the impending
 859    action.  N1 and N2 are the attempt number and the attempt limit.  */
 860 void
 861 printwhat (int n1, int n2)
 862 {
 863   logputs (LOG_VERBOSE, (n1 == n2) ? _("Giving up.\n\n") : _("Retrying.\n\n"));
 864 }
 865
 866 /* If opt.wait or opt.waitretry are specified, and if certain
 867    conditions are met, sleep the appropriate number of seconds.  See
 868    the documentation of --wait and --waitretry for more information.
 869
 870    COUNT is the count of current retrieval, beginning with 1. */
 871
 872 void
 873 sleep_between_retrievals (int count)
 874 {
 875   static int first_retrieval = 1;
 876
 877   if (first_retrieval)
 878     {
 879       /* Don't sleep before the very first retrieval. */
 880       first_retrieval = 0;
 881       return;
 882     }
 883
 884   if (opt.waitretry && count > 1)
 885     {
 886       /* If opt.waitretry is specified and this is a retry, wait for
 887          COUNT-1 number of seconds, or for opt.waitretry seconds.  */
 888       if (count <= opt.waitretry)
 889         xsleep (count - 1.0);
 890       else
 891         xsleep (opt.waitretry);
 892     }
 893   else if (opt.wait)
 894     {
 895       if (!opt.random_wait || count > 1)
 896         /* If random-wait is not specified, or if we are sleeping
 897            between retries of the same download, sleep the fixed
 898            interval.  */
 899         xsleep (opt.wait);
 900       else
 901         {
 902           /* Sleep a random amount of time averaging in opt.wait
 903              seconds.  The sleeping amount ranges from 0 to
 904              opt.wait*2, inclusive.  */
 905           double waitsecs = 2 * opt.wait * random_float ();
 906           DEBUGP (("sleep_between_retrievals: avg=%f,sleep=%f\n",
 907                    opt.wait, waitsecs));
 908           xsleep (waitsecs);
 909         }
 910     }
 911 }
 912
 913 /* Free the linked list of urlpos.  */
 914 void
 915 free_urlpos (struct urlpos *l)
 916 {
 917   while (l)
 918     {
 919       struct urlpos *next = l->next;
 920       if (l->url)
 921         url_free (l->url);
 922       xfree_null (l->local_name);
 923       xfree (l);
 924       l = next;
 925     }
 926 }
 927
 928 /* Rotate FNAME opt.backups times */
 929 void
 930 rotate_backups(const char *fname)
 931 {
 932   int maxlen = strlen (fname) + 1 + numdigit (opt.backups) + 1;
 933   char *from = (char *)alloca (maxlen);
 934   char *to = (char *)alloca (maxlen);
 935   struct_stat sb;
 936   int i;
 937
 938   if (stat (fname, &sb) == 0)
 939     if (S_ISREG (sb.st_mode) == 0)
 940       return;
 941
 942   for (i = opt.backups; i > 1; i--)
 943     {
 944       sprintf (from, "%s.%d", fname, i - 1);
 945       sprintf (to, "%s.%d", fname, i);
 946       rename (from, to);
 947     }
 948
 949   sprintf (to, "%s.%d", fname, 1);
 950   rename(fname, to);
 951 }
 952
 953 static int no_proxy_match PARAMS ((const char *, const char **));
 954
 955 /* Return the URL of the proxy appropriate for url U.  */
 956
 957 static char *
 958 getproxy (struct url *u)
 959 {
 960   char *proxy = NULL;
 961   char *rewritten_url;
 962   static char rewritten_storage[1024];
 963
 964   if (!opt.use_proxy)
 965     return NULL;
 966   if (!no_proxy_match (u->host, (const char **)opt.no_proxy))
 967     return NULL;
 968
 969   switch (u->scheme)
 970     {
 971     case SCHEME_HTTP:
 972       proxy = opt.http_proxy ? opt.http_proxy : getenv ("http_proxy");
 973       break;
 974 #ifdef HAVE_SSL
 975     case SCHEME_HTTPS:
 976       proxy = opt.https_proxy ? opt.https_proxy : getenv ("https_proxy");
 977       break;
 978 #endif
 979     case SCHEME_FTP:
 980       proxy = opt.ftp_proxy ? opt.ftp_proxy : getenv ("ftp_proxy");
 981       break;
 982     case SCHEME_INVALID:
 983       break;
 984     }
 985   if (!proxy || !*proxy)
 986     return NULL;
 987
 988   /* Handle shorthands.  `rewritten_storage' is a kludge to allow
 989      getproxy() to return static storage. */
 990   rewritten_url = rewrite_shorthand_url (proxy);
 991   if (rewritten_url)
 992     {
 993       strncpy (rewritten_storage, rewritten_url, sizeof (rewritten_storage));
 994       rewritten_storage[sizeof (rewritten_storage) - 1] = '\0';
 995       proxy = rewritten_storage;
 996     }
 997
 998   return proxy;
 999 }
1000
1001 /* Should a host be accessed through proxy, concerning no_proxy?  */
1002 static int
1003 no_proxy_match (const char *host, const char **no_proxy)
1004 {
1005   if (!no_proxy)
1006     return 1;
1007   else
1008     return !sufmatch (no_proxy, host);
1009 }