sjero.net Git - wget/blob - src/retr.c

   1 /* File retrieval.
   2    Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001 Free Software Foundation, Inc.
   3
   4 This file is part of GNU Wget.
   5
   6 GNU Wget is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 2 of the License, or (at
   9 your option) any later version.
  10
  11 GNU Wget is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with Wget; if not, write to the Free Software
  18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  19
  20 In addition, as a special exception, the Free Software Foundation
  21 gives permission to link the code of its release of Wget with the
  22 OpenSSL project's "OpenSSL" library (or with modified versions of it
  23 that use the same license as the "OpenSSL" library), and distribute
  24 the linked executables.  You must obey the GNU General Public License
  25 in all respects for all of the code used other than "OpenSSL".  If you
  26 modify this file, you may extend this exception to your version of the
  27 file, but you are not obligated to do so.  If you do not wish to do
  28 so, delete this exception statement from your version.  */
  29
  30 #include <config.h>
  31
  32 #include <stdio.h>
  33 #include <stdlib.h>
  34 #ifdef HAVE_UNISTD_H
  35 # include <unistd.h>
  36 #endif /* HAVE_UNISTD_H */
  37 #include <errno.h>
  38 #include <string.h>
  39 #include <assert.h>
  40
  41 #include "wget.h"
  42 #include "utils.h"
  43 #include "retr.h"
  44 #include "progress.h"
  45 #include "url.h"
  46 #include "recur.h"
  47 #include "ftp.h"
  48 #include "http.h"
  49 #include "host.h"
  50 #include "connect.h"
  51 #include "hash.h"
  52 #include "convert.h"
  53 #include "ptimer.h"
  54
  55 /* Total size of downloaded files.  Used to enforce quota.  */
  56 SUM_SIZE_INT total_downloaded_bytes;
  57
  58 /* Total download time in milliseconds. */
  59 double total_download_time;
  60
  61 /* If non-NULL, the stream to which output should be written.  This
  62    stream is initialized when `-O' is used.  */
  63 FILE *output_stream;
  64
  65 /* Whether output_document is a regular file we can manipulate,
  66    i.e. not `-' or a device file. */
  67 bool output_stream_regular;
  68 \f
  69 static struct {
  70   wgint chunk_bytes;
  71   double chunk_start;
  72   double sleep_adjust;
  73 } limit_data;
  74
  75 static void
  76 limit_bandwidth_reset (void)
  77 {
  78   limit_data.chunk_bytes = 0;
  79   limit_data.chunk_start = 0;
  80   limit_data.sleep_adjust = 0;
  81 }
  82
  83 /* Limit the bandwidth by pausing the download for an amount of time.
  84    BYTES is the number of bytes received from the network, and TIMER
  85    is the timer that started at the beginning of download.  */
  86
  87 static void
  88 limit_bandwidth (wgint bytes, struct ptimer *timer)
  89 {
  90   double delta_t = ptimer_read (timer) - limit_data.chunk_start;
  91   double expected;
  92
  93   limit_data.chunk_bytes += bytes;
  94
  95   /* Calculate the amount of time we expect downloading the chunk
  96      should take.  If in reality it took less time, sleep to
  97      compensate for the difference.  */
  98   expected = 1000.0 * limit_data.chunk_bytes / opt.limit_rate;
  99
 100   if (expected > delta_t)
 101     {
 102       double slp = expected - delta_t + limit_data.sleep_adjust;
 103       double t0, t1;
 104       if (slp < 200)
 105         {
 106           DEBUGP (("deferring a %.2f ms sleep (%s/%.2f).\n",
 107                    slp, number_to_static_string (limit_data.chunk_bytes),
 108                    delta_t));
 109           return;
 110         }
 111       DEBUGP (("\nsleeping %.2f ms for %s bytes, adjust %.2f ms\n",
 112                slp, number_to_static_string (limit_data.chunk_bytes),
 113                limit_data.sleep_adjust));
 114
 115       t0 = ptimer_read (timer);
 116       xsleep (slp / 1000);
 117       t1 = ptimer_measure (timer);
 118
 119       /* Due to scheduling, we probably slept slightly longer (or
 120          shorter) than desired.  Calculate the difference between the
 121          desired and the actual sleep, and adjust the next sleep by
 122          that amount.  */
 123       limit_data.sleep_adjust = slp - (t1 - t0);
 124       /* If sleep_adjust is very large, it's likely due to suspension
 125          and not clock inaccuracy.  Don't enforce those.  */
 126       if (limit_data.sleep_adjust > 500)
 127         limit_data.sleep_adjust = 500;
 128       else if (limit_data.sleep_adjust < -500)
 129         limit_data.sleep_adjust = -500;
 130     }
 131
 132   limit_data.chunk_bytes = 0;
 133   limit_data.chunk_start = ptimer_read (timer);
 134 }
 135
 136 #ifndef MIN
 137 # define MIN(i, j) ((i) <= (j) ? (i) : (j))
 138 #endif
 139
 140 /* Write data in BUF to OUT.  However, if *SKIP is non-zero, skip that
 141    amount of data and decrease SKIP.  Increment *TOTAL by the amount
 142    of data written.  */
 143
 144 static int
 145 write_data (FILE *out, const char *buf, int bufsize, wgint *skip,
 146             wgint *written)
 147 {
 148   if (!out)
 149     return 1;
 150   if (*skip > bufsize)
 151     {
 152       *skip -= bufsize;
 153       return 1;
 154     }
 155   if (*skip)
 156     {
 157       buf += *skip;
 158       bufsize -= *skip;
 159       *skip = 0;
 160       if (bufsize == 0)
 161         return 1;
 162     }
 163
 164   fwrite (buf, 1, bufsize, out);
 165   *written += bufsize;
 166
 167   /* Immediately flush the downloaded data.  This should not hinder
 168      performance: fast downloads will arrive in large 16K chunks
 169      (which stdio would write out immediately anyway), and slow
 170      downloads wouldn't be limited by disk speed.  */
 171   fflush (out);
 172   return !ferror (out);
 173 }
 174
 175 /* Read the contents of file descriptor FD until it the connection
 176    terminates or a read error occurs.  The data is read in portions of
 177    up to 16K and written to OUT as it arrives.  If opt.verbose is set,
 178    the progress is shown.
 179
 180    TOREAD is the amount of data expected to arrive, normally only used
 181    by the progress gauge.
 182
 183    STARTPOS is the position from which the download starts, used by
 184    the progress gauge.  If QTYREAD is non-NULL, the value it points to
 185    is incremented by the amount of data read from the network.  If
 186    QTYWRITTEN is non-NULL, the value it points to is incremented by
 187    the amount of data written to disk.  The time it took to download
 188    the data (in milliseconds) is stored to ELAPSED.
 189
 190    The function exits and returns the amount of data read.  In case of
 191    error while reading data, -1 is returned.  In case of error while
 192    writing data, -2 is returned.  */
 193
 194 int
 195 fd_read_body (int fd, FILE *out, wgint toread, wgint startpos,
 196               wgint *qtyread, wgint *qtywritten, double *elapsed, int flags)
 197 {
 198   int ret = 0;
 199
 200   static char dlbuf[16384];
 201   int dlbufsize = sizeof (dlbuf);
 202
 203   struct ptimer *timer = NULL;
 204   double last_successful_read_tm = 0;
 205
 206   /* The progress gauge, set according to the user preferences. */
 207   void *progress = NULL;
 208
 209   /* Non-zero if the progress gauge is interactive, i.e. if it can
 210      continually update the display.  When true, smaller timeout
 211      values are used so that the gauge can update the display when
 212      data arrives slowly. */
 213   bool progress_interactive = false;
 214
 215   bool exact = !!(flags & rb_read_exactly);
 216   wgint skip = 0;
 217
 218   /* How much data we've read/written.  */
 219   wgint sum_read = 0;
 220   wgint sum_written = 0;
 221
 222   if (flags & rb_skip_startpos)
 223     skip = startpos;
 224
 225   if (opt.verbose)
 226     {
 227       /* If we're skipping STARTPOS bytes, pass 0 as the INITIAL
 228          argument to progress_create because the indicator doesn't
 229          (yet) know about "skipping" data.  */
 230       progress = progress_create (skip ? 0 : startpos, startpos + toread);
 231       progress_interactive = progress_interactive_p (progress);
 232     }
 233
 234   if (opt.limit_rate)
 235     limit_bandwidth_reset ();
 236
 237   /* A timer is needed for tracking progress, for throttling, and for
 238      tracking elapsed time.  If either of these are requested, start
 239      the timer.  */
 240   if (progress || opt.limit_rate || elapsed)
 241     {
 242       timer = ptimer_new ();
 243       last_successful_read_tm = 0;
 244     }
 245
 246   /* Use a smaller buffer for low requested bandwidths.  For example,
 247      with --limit-rate=2k, it doesn't make sense to slurp in 16K of
 248      data and then sleep for 8s.  With buffer size equal to the limit,
 249      we never have to sleep for more than one second.  */
 250   if (opt.limit_rate && opt.limit_rate < dlbufsize)
 251     dlbufsize = opt.limit_rate;
 252
 253   /* Read from FD while there is data to read.  Normally toread==0
 254      means that it is unknown how much data is to arrive.  However, if
 255      EXACT is set, then toread==0 means what it says: that no data
 256      should be read.  */
 257   while (!exact || (sum_read < toread))
 258     {
 259       int rdsize = exact ? MIN (toread - sum_read, dlbufsize) : dlbufsize;
 260       double tmout = opt.read_timeout;
 261       if (progress_interactive)
 262         {
 263           /* For interactive progress gauges, always specify a ~1s
 264              timeout, so that the gauge can be updated regularly even
 265              when the data arrives very slowly or stalls.  */
 266           tmout = 0.95;
 267           if (opt.read_timeout)
 268             {
 269               double waittm;
 270               waittm = (ptimer_read (timer) - last_successful_read_tm) / 1000;
 271               if (waittm + tmout > opt.read_timeout)
 272                 {
 273                   /* Don't let total idle time exceed read timeout. */
 274                   tmout = opt.read_timeout - waittm;
 275                   if (tmout < 0)
 276                     {
 277                       /* We've already exceeded the timeout. */
 278                       ret = -1, errno = ETIMEDOUT;
 279                       break;
 280                     }
 281                 }
 282             }
 283         }
 284       ret = fd_read (fd, dlbuf, rdsize, tmout);
 285
 286       if (progress_interactive && ret < 0 && errno == ETIMEDOUT)
 287         ret = 0;                /* interactive timeout, handled above */
 288       else if (ret <= 0)
 289         break;                  /* EOF or read error */
 290
 291       if (progress || opt.limit_rate)
 292         {
 293           ptimer_measure (timer);
 294           if (ret > 0)
 295             last_successful_read_tm = ptimer_read (timer);
 296         }
 297
 298       if (ret > 0)
 299         {
 300           sum_read += ret;
 301           if (!write_data (out, dlbuf, ret, &skip, &sum_written))
 302             {
 303               ret = -2;
 304               goto out;
 305             }
 306         }
 307
 308       if (opt.limit_rate)
 309         limit_bandwidth (ret, timer);
 310
 311       if (progress)
 312         progress_update (progress, ret, ptimer_read (timer));
 313 #ifdef WINDOWS
 314       if (toread > 0 && !opt.quiet)
 315         ws_percenttitle (100.0 *
 316                          (startpos + sum_read) / (startpos + toread));
 317 #endif
 318     }
 319   if (ret < -1)
 320     ret = -1;
 321
 322  out:
 323   if (progress)
 324     progress_finish (progress, ptimer_read (timer));
 325
 326   if (elapsed)
 327     *elapsed = ptimer_read (timer);
 328   if (timer)
 329     ptimer_destroy (timer);
 330
 331   if (qtyread)
 332     *qtyread += sum_read;
 333   if (qtywritten)
 334     *qtywritten += sum_written;
 335
 336   return ret;
 337 }
 338 \f
 339 /* Read a hunk of data from FD, up until a terminator.  The terminator
 340    is whatever the TERMINATOR function determines it to be; for
 341    example, it can be a line of data, or the head of an HTTP response.
 342    The function returns the data read allocated with malloc.
 343
 344    In case of error, NULL is returned.  In case of EOF and no data
 345    read, NULL is returned and errno set to 0.  In case of EOF with
 346    data having been read, the data is returned, but it will
 347    (obviously) not contain the terminator.
 348
 349    The idea is to be able to read a line of input, or otherwise a hunk
 350    of text, such as the head of an HTTP request, without crossing the
 351    boundary, so that the next call to fd_read etc. reads the data
 352    after the hunk.  To achieve that, this function does the following:
 353
 354    1. Peek at available data.
 355
 356    2. Determine whether the peeked data, along with the previously
 357       read data, includes the terminator.
 358
 359       2a. If yes, read the data until the end of the terminator, and
 360           exit.
 361
 362       2b. If no, read the peeked data and goto 1.
 363
 364    The function is careful to assume as little as possible about the
 365    implementation of peeking.  For example, every peek is followed by
 366    a read.  If the read returns a different amount of data, the
 367    process is retried until all data arrives safely.
 368
 369    SIZEHINT is the buffer size sufficient to hold all the data in the
 370    typical case (it is used as the initial buffer size).  MAXSIZE is
 371    the maximum amount of memory this function is allowed to allocate,
 372    or 0 if no upper limit is to be enforced.
 373
 374    This function should be used as a building block for other
 375    functions -- see fd_read_line as a simple example.  */
 376
 377 char *
 378 fd_read_hunk (int fd, hunk_terminator_t terminator, long sizehint, long maxsize)
 379 {
 380   long bufsize = sizehint;
 381   char *hunk = xmalloc (bufsize);
 382   int tail = 0;                 /* tail position in HUNK */
 383
 384   assert (maxsize >= bufsize);
 385
 386   while (1)
 387     {
 388       const char *end;
 389       int pklen, rdlen, remain;
 390
 391       /* First, peek at the available data. */
 392
 393       pklen = fd_peek (fd, hunk + tail, bufsize - 1 - tail, -1);
 394       if (pklen < 0)
 395         {
 396           xfree (hunk);
 397           return NULL;
 398         }
 399       end = terminator (hunk, tail, pklen);
 400       if (end)
 401         {
 402           /* The data contains the terminator: we'll drain the data up
 403              to the end of the terminator.  */
 404           remain = end - (hunk + tail);
 405           if (remain == 0)
 406             {
 407               /* No more data needs to be read. */
 408               hunk[tail] = '\0';
 409               return hunk;
 410             }
 411           if (bufsize - 1 < tail + remain)
 412             {
 413               bufsize = tail + remain + 1;
 414               hunk = xrealloc (hunk, bufsize);
 415             }
 416         }
 417       else
 418         /* No terminator: simply read the data we know is (or should
 419            be) available.  */
 420         remain = pklen;
 421
 422       /* Now, read the data.  Note that we make no assumptions about
 423          how much data we'll get.  (Some TCP stacks are notorious for
 424          read returning less data than the previous MSG_PEEK.)  */
 425
 426       rdlen = fd_read (fd, hunk + tail, remain, 0);
 427       if (rdlen < 0)
 428         {
 429           xfree_null (hunk);
 430           return NULL;
 431         }
 432       tail += rdlen;
 433       hunk[tail] = '\0';
 434
 435       if (rdlen == 0)
 436         {
 437           if (tail == 0)
 438             {
 439               /* EOF without anything having been read */
 440               xfree (hunk);
 441               errno = 0;
 442               return NULL;
 443             }
 444           else
 445             /* EOF seen: return the data we've read. */
 446             return hunk;
 447         }
 448       if (end && rdlen == remain)
 449         /* The terminator was seen and the remaining data drained --
 450            we got what we came for.  */
 451         return hunk;
 452
 453       /* Keep looping until all the data arrives. */
 454
 455       if (tail == bufsize - 1)
 456         {
 457           /* Double the buffer size, but refuse to allocate more than
 458              MAXSIZE bytes.  */
 459           if (maxsize && bufsize >= maxsize)
 460             {
 461               xfree (hunk);
 462               errno = ENOMEM;
 463               return NULL;
 464             }
 465           bufsize <<= 1;
 466           if (maxsize && bufsize > maxsize)
 467             bufsize = maxsize;
 468           hunk = xrealloc (hunk, bufsize);
 469         }
 470     }
 471 }
 472
 473 static const char *
 474 line_terminator (const char *hunk, int oldlen, int peeklen)
 475 {
 476   const char *p = memchr (hunk + oldlen, '\n', peeklen);
 477   if (p)
 478     /* p+1 because we want the line to include '\n' */
 479     return p + 1;
 480   return NULL;
 481 }
 482
 483 /* The maximum size of the single line we agree to accept.  This is
 484    not meant to impose an arbitrary limit, but to protect the user
 485    from Wget slurping up available memory upon encountering malicious
 486    or buggy server output.  Define it to 0 to remove the limit.  */
 487 #define FD_READ_LINE_MAX 4096
 488
 489 /* Read one line from FD and return it.  The line is allocated using
 490    malloc, but is never larger than FD_READ_LINE_MAX.
 491
 492    If an error occurs, or if no data can be read, NULL is returned.
 493    In the former case errno indicates the error condition, and in the
 494    latter case, errno is NULL.  */
 495
 496 char *
 497 fd_read_line (int fd)
 498 {
 499   return fd_read_hunk (fd, line_terminator, 128, FD_READ_LINE_MAX);
 500 }
 501 \f
 502 /* Return a printed representation of the download rate, along with
 503    the units appropriate for the download speed.  */
 504
 505 const char *
 506 retr_rate (wgint bytes, double msecs)
 507 {
 508   static char res[20];
 509   static const char *rate_names[] = {"B/s", "KB/s", "MB/s", "GB/s" };
 510   int units = 0;
 511
 512   double dlrate = calc_rate (bytes, msecs, &units);
 513   sprintf (res, "%.2f %s", dlrate, rate_names[units]);
 514
 515   return res;
 516 }
 517
 518 /* Calculate the download rate and trim it as appropriate for the
 519    speed.  Appropriate means that if rate is greater than 1K/s,
 520    kilobytes are used, and if rate is greater than 1MB/s, megabytes
 521    are used.
 522
 523    UNITS is zero for B/s, one for KB/s, two for MB/s, and three for
 524    GB/s.  */
 525 double
 526 calc_rate (wgint bytes, double msecs, int *units)
 527 {
 528   double dlrate;
 529
 530   assert (msecs >= 0);
 531   assert (bytes >= 0);
 532
 533   if (msecs == 0)
 534     /* If elapsed time is exactly zero, it means we're under the
 535        resolution of the timer.  This can easily happen on systems
 536        that use time() for the timer.  Since the interval lies between
 537        0 and the timer's resolution, assume half the resolution.  */
 538     msecs = ptimer_resolution () / 2.0;
 539
 540   dlrate = 1000.0 * bytes / msecs;
 541   if (dlrate < 1024.0)
 542     *units = 0;
 543   else if (dlrate < 1024.0 * 1024.0)
 544     *units = 1, dlrate /= 1024.0;
 545   else if (dlrate < 1024.0 * 1024.0 * 1024.0)
 546     *units = 2, dlrate /= (1024.0 * 1024.0);
 547   else
 548     /* Maybe someone will need this, one day. */
 549     *units = 3, dlrate /= (1024.0 * 1024.0 * 1024.0);
 550
 551   return dlrate;
 552 }
 553 \f
 554 /* Maximum number of allowed redirections.  20 was chosen as a
 555    "reasonable" value, which is low enough to not cause havoc, yet
 556    high enough to guarantee that normal retrievals will not be hurt by
 557    the check.  */
 558
 559 #define MAX_REDIRECTIONS 20
 560
 561 #define SUSPEND_POST_DATA do {                  \
 562   post_data_suspended = true;                   \
 563   saved_post_data = opt.post_data;              \
 564   saved_post_file_name = opt.post_file_name;    \
 565   opt.post_data = NULL;                         \
 566   opt.post_file_name = NULL;                    \
 567 } while (0)
 568
 569 #define RESTORE_POST_DATA do {                          \
 570   if (post_data_suspended)                              \
 571     {                                                   \
 572       opt.post_data = saved_post_data;                  \
 573       opt.post_file_name = saved_post_file_name;        \
 574       post_data_suspended = false;                      \
 575     }                                                   \
 576 } while (0)
 577
 578 static char *getproxy (struct url *);
 579
 580 /* Retrieve the given URL.  Decides which loop to call -- HTTP, FTP,
 581    FTP, proxy, etc.  */
 582
 583 /* #### This function should be rewritten so it doesn't return from
 584    multiple points. */
 585
 586 uerr_t
 587 retrieve_url (const char *origurl, char **file, char **newloc,
 588               const char *refurl, int *dt)
 589 {
 590   uerr_t result;
 591   char *url;
 592   bool location_changed;
 593   int dummy;
 594   char *mynewloc, *proxy;
 595   struct url *u, *proxy_url;
 596   int up_error_code;            /* url parse error code */
 597   char *local_file;
 598   int redirection_count = 0;
 599
 600   bool post_data_suspended = false;
 601   char *saved_post_data = NULL;
 602   char *saved_post_file_name = NULL;
 603
 604   /* If dt is NULL, use local storage.  */
 605   if (!dt)
 606     {
 607       dt = &dummy;
 608       dummy = 0;
 609     }
 610   url = xstrdup (origurl);
 611   if (newloc)
 612     *newloc = NULL;
 613   if (file)
 614     *file = NULL;
 615
 616   u = url_parse (url, &up_error_code);
 617   if (!u)
 618     {
 619       logprintf (LOG_NOTQUIET, "%s: %s.\n", url, url_error (up_error_code));
 620       xfree (url);
 621       return URLERROR;
 622     }
 623
 624   if (!refurl)
 625     refurl = opt.referer;
 626
 627  redirected:
 628
 629   result = NOCONERROR;
 630   mynewloc = NULL;
 631   local_file = NULL;
 632   proxy_url = NULL;
 633
 634   proxy = getproxy (u);
 635   if (proxy)
 636     {
 637       /* Parse the proxy URL.  */
 638       proxy_url = url_parse (proxy, &up_error_code);
 639       if (!proxy_url)
 640         {
 641           logprintf (LOG_NOTQUIET, _("Error parsing proxy URL %s: %s.\n"),
 642                      proxy, url_error (up_error_code));
 643           xfree (url);
 644           RESTORE_POST_DATA;
 645           return PROXERR;
 646         }
 647       if (proxy_url->scheme != SCHEME_HTTP && proxy_url->scheme != u->scheme)
 648         {
 649           logprintf (LOG_NOTQUIET, _("Error in proxy URL %s: Must be HTTP.\n"), proxy);
 650           url_free (proxy_url);
 651           xfree (url);
 652           RESTORE_POST_DATA;
 653           return PROXERR;
 654         }
 655     }
 656
 657   if (u->scheme == SCHEME_HTTP
 658 #ifdef HAVE_SSL
 659       || u->scheme == SCHEME_HTTPS
 660 #endif
 661       || (proxy_url && proxy_url->scheme == SCHEME_HTTP))
 662     {
 663       result = http_loop (u, &mynewloc, &local_file, refurl, dt, proxy_url);
 664     }
 665   else if (u->scheme == SCHEME_FTP)
 666     {
 667       /* If this is a redirection, temporarily turn off opt.ftp_glob
 668          and opt.recursive, both being undesirable when following
 669          redirects.  */
 670       bool oldrec = opt.recursive, oldglob = opt.ftp_glob;
 671       if (redirection_count)
 672         opt.recursive = opt.ftp_glob = false;
 673
 674       result = ftp_loop (u, dt, proxy_url);
 675       opt.recursive = oldrec;
 676       opt.ftp_glob = oldglob;
 677
 678       /* There is a possibility of having HTTP being redirected to
 679          FTP.  In these cases we must decide whether the text is HTML
 680          according to the suffix.  The HTML suffixes are `.html',
 681          `.htm' and a few others, case-insensitive.  */
 682       if (redirection_count && local_file && u->scheme == SCHEME_FTP)
 683         {
 684           if (has_html_suffix_p (local_file))
 685             *dt |= TEXTHTML;
 686         }
 687     }
 688
 689   if (proxy_url)
 690     {
 691       url_free (proxy_url);
 692       proxy_url = NULL;
 693     }
 694
 695   location_changed = (result == NEWLOCATION);
 696   if (location_changed)
 697     {
 698       char *construced_newloc;
 699       struct url *newloc_parsed;
 700
 701       assert (mynewloc != NULL);
 702
 703       if (local_file)
 704         xfree (local_file);
 705
 706       /* The HTTP specs only allow absolute URLs to appear in
 707          redirects, but a ton of boneheaded webservers and CGIs out
 708          there break the rules and use relative URLs, and popular
 709          browsers are lenient about this, so wget should be too. */
 710       construced_newloc = uri_merge (url, mynewloc);
 711       xfree (mynewloc);
 712       mynewloc = construced_newloc;
 713
 714       /* Now, see if this new location makes sense. */
 715       newloc_parsed = url_parse (mynewloc, &up_error_code);
 716       if (!newloc_parsed)
 717         {
 718           logprintf (LOG_NOTQUIET, "%s: %s.\n", escnonprint_uri (mynewloc),
 719                      url_error (up_error_code));
 720           url_free (u);
 721           xfree (url);
 722           xfree (mynewloc);
 723           RESTORE_POST_DATA;
 724           return result;
 725         }
 726
 727       /* Now mynewloc will become newloc_parsed->url, because if the
 728          Location contained relative paths like .././something, we
 729          don't want that propagating as url.  */
 730       xfree (mynewloc);
 731       mynewloc = xstrdup (newloc_parsed->url);
 732
 733       /* Check for max. number of redirections.  */
 734       if (++redirection_count > MAX_REDIRECTIONS)
 735         {
 736           logprintf (LOG_NOTQUIET, _("%d redirections exceeded.\n"),
 737                      MAX_REDIRECTIONS);
 738           url_free (newloc_parsed);
 739           url_free (u);
 740           xfree (url);
 741           xfree (mynewloc);
 742           RESTORE_POST_DATA;
 743           return WRONGCODE;
 744         }
 745
 746       xfree (url);
 747       url = mynewloc;
 748       url_free (u);
 749       u = newloc_parsed;
 750
 751       /* If we're being redirected from POST, we don't want to POST
 752          again.  Many requests answer POST with a redirection to an
 753          index page; that redirection is clearly a GET.  We "suspend"
 754          POST data for the duration of the redirections, and restore
 755          it when we're done. */
 756       if (!post_data_suspended)
 757         SUSPEND_POST_DATA;
 758
 759       goto redirected;
 760     }
 761
 762   if (local_file)
 763     {
 764       if (*dt & RETROKF)
 765         {
 766           register_download (u->url, local_file);
 767           if (redirection_count && 0 != strcmp (origurl, u->url))
 768             register_redirection (origurl, u->url);
 769           if (*dt & TEXTHTML)
 770             register_html (u->url, local_file);
 771         }
 772     }
 773
 774   if (file)
 775     *file = local_file ? local_file : NULL;
 776   else
 777     xfree_null (local_file);
 778
 779   url_free (u);
 780
 781   if (redirection_count)
 782     {
 783       if (newloc)
 784         *newloc = url;
 785       else
 786         xfree (url);
 787     }
 788   else
 789     {
 790       if (newloc)
 791         *newloc = NULL;
 792       xfree (url);
 793     }
 794
 795   RESTORE_POST_DATA;
 796
 797   return result;
 798 }
 799
 800 /* Find the URLs in the file and call retrieve_url() for each of them.
 801    If HTML is true, treat the file as HTML, and construct the URLs
 802    accordingly.
 803
 804    If opt.recursive is set, call retrieve_tree() for each file.  */
 805
 806 uerr_t
 807 retrieve_from_file (const char *file, bool html, int *count)
 808 {
 809   uerr_t status;
 810   struct urlpos *url_list, *cur_url;
 811
 812   url_list = (html ? get_urls_html (file, NULL, NULL)
 813               : get_urls_file (file));
 814   status = RETROK;             /* Suppose everything is OK.  */
 815   *count = 0;                  /* Reset the URL count.  */
 816
 817   for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count)
 818     {
 819       char *filename = NULL, *new_file = NULL;
 820       int dt;
 821
 822       if (cur_url->ignore_when_downloading)
 823         continue;
 824
 825       if (opt.quota && total_downloaded_bytes > opt.quota)
 826         {
 827           status = QUOTEXC;
 828           break;
 829         }
 830       if ((opt.recursive || opt.page_requisites)
 831           && cur_url->url->scheme != SCHEME_FTP)
 832         status = retrieve_tree (cur_url->url->url);
 833       else
 834         status = retrieve_url (cur_url->url->url, &filename, &new_file, NULL, &dt);
 835
 836       if (filename && opt.delete_after && file_exists_p (filename))
 837         {
 838           DEBUGP (("\
 839 Removing file due to --delete-after in retrieve_from_file():\n"));
 840           logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
 841           if (unlink (filename))
 842             logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
 843           dt &= ~RETROKF;
 844         }
 845
 846       xfree_null (new_file);
 847       xfree_null (filename);
 848     }
 849
 850   /* Free the linked list of URL-s.  */
 851   free_urlpos (url_list);
 852
 853   return status;
 854 }
 855
 856 /* Print `giving up', or `retrying', depending on the impending
 857    action.  N1 and N2 are the attempt number and the attempt limit.  */
 858 void
 859 printwhat (int n1, int n2)
 860 {
 861   logputs (LOG_VERBOSE, (n1 == n2) ? _("Giving up.\n\n") : _("Retrying.\n\n"));
 862 }
 863
 864 /* If opt.wait or opt.waitretry are specified, and if certain
 865    conditions are met, sleep the appropriate number of seconds.  See
 866    the documentation of --wait and --waitretry for more information.
 867
 868    COUNT is the count of current retrieval, beginning with 1. */
 869
 870 void
 871 sleep_between_retrievals (int count)
 872 {
 873   static bool first_retrieval = true;
 874
 875   if (first_retrieval)
 876     {
 877       /* Don't sleep before the very first retrieval. */
 878       first_retrieval = false;
 879       return;
 880     }
 881
 882   if (opt.waitretry && count > 1)
 883     {
 884       /* If opt.waitretry is specified and this is a retry, wait for
 885          COUNT-1 number of seconds, or for opt.waitretry seconds.  */
 886       if (count <= opt.waitretry)
 887         xsleep (count - 1);
 888       else
 889         xsleep (opt.waitretry);
 890     }
 891   else if (opt.wait)
 892     {
 893       if (!opt.random_wait || count > 1)
 894         /* If random-wait is not specified, or if we are sleeping
 895            between retries of the same download, sleep the fixed
 896            interval.  */
 897         xsleep (opt.wait);
 898       else
 899         {
 900           /* Sleep a random amount of time averaging in opt.wait
 901              seconds.  The sleeping amount ranges from 0 to
 902              opt.wait*2, inclusive.  */
 903           double waitsecs = 2 * opt.wait * random_float ();
 904           DEBUGP (("sleep_between_retrievals: avg=%f,sleep=%f\n",
 905                    opt.wait, waitsecs));
 906           xsleep (waitsecs);
 907         }
 908     }
 909 }
 910
 911 /* Free the linked list of urlpos.  */
 912 void
 913 free_urlpos (struct urlpos *l)
 914 {
 915   while (l)
 916     {
 917       struct urlpos *next = l->next;
 918       if (l->url)
 919         url_free (l->url);
 920       xfree_null (l->local_name);
 921       xfree (l);
 922       l = next;
 923     }
 924 }
 925
 926 /* Rotate FNAME opt.backups times */
 927 void
 928 rotate_backups(const char *fname)
 929 {
 930   int maxlen = strlen (fname) + 1 + numdigit (opt.backups) + 1;
 931   char *from = (char *)alloca (maxlen);
 932   char *to = (char *)alloca (maxlen);
 933   struct_stat sb;
 934   int i;
 935
 936   if (stat (fname, &sb) == 0)
 937     if (S_ISREG (sb.st_mode) == 0)
 938       return;
 939
 940   for (i = opt.backups; i > 1; i--)
 941     {
 942       sprintf (from, "%s.%d", fname, i - 1);
 943       sprintf (to, "%s.%d", fname, i);
 944       rename (from, to);
 945     }
 946
 947   sprintf (to, "%s.%d", fname, 1);
 948   rename(fname, to);
 949 }
 950
 951 static bool no_proxy_match (const char *, const char **);
 952
 953 /* Return the URL of the proxy appropriate for url U.  */
 954
 955 static char *
 956 getproxy (struct url *u)
 957 {
 958   char *proxy = NULL;
 959   char *rewritten_url;
 960   static char rewritten_storage[1024];
 961
 962   if (!opt.use_proxy)
 963     return NULL;
 964   if (!no_proxy_match (u->host, (const char **)opt.no_proxy))
 965     return NULL;
 966
 967   switch (u->scheme)
 968     {
 969     case SCHEME_HTTP:
 970       proxy = opt.http_proxy ? opt.http_proxy : getenv ("http_proxy");
 971       break;
 972 #ifdef HAVE_SSL
 973     case SCHEME_HTTPS:
 974       proxy = opt.https_proxy ? opt.https_proxy : getenv ("https_proxy");
 975       break;
 976 #endif
 977     case SCHEME_FTP:
 978       proxy = opt.ftp_proxy ? opt.ftp_proxy : getenv ("ftp_proxy");
 979       break;
 980     case SCHEME_INVALID:
 981       break;
 982     }
 983   if (!proxy || !*proxy)
 984     return NULL;
 985
 986   /* Handle shorthands.  `rewritten_storage' is a kludge to allow
 987      getproxy() to return static storage. */
 988   rewritten_url = rewrite_shorthand_url (proxy);
 989   if (rewritten_url)
 990     {
 991       strncpy (rewritten_storage, rewritten_url, sizeof (rewritten_storage));
 992       rewritten_storage[sizeof (rewritten_storage) - 1] = '\0';
 993       proxy = rewritten_storage;
 994     }
 995
 996   return proxy;
 997 }
 998
 999 /* Should a host be accessed through proxy, concerning no_proxy?  */
1000 static bool
1001 no_proxy_match (const char *host, const char **no_proxy)
1002 {
1003   if (!no_proxy)
1004     return true;
1005   else
1006     return !sufmatch (no_proxy, host);
1007 }