sjero.net Git - wget/blob - src/retr.c

   1 /* File retrieval.
   2    Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001 Free Software Foundation, Inc.
   3
   4 This file is part of GNU Wget.
   5
   6 GNU Wget is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 2 of the License, or (at
   9 your option) any later version.
  10
  11 GNU Wget is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with Wget; if not, write to the Free Software
  18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  19
  20 In addition, as a special exception, the Free Software Foundation
  21 gives permission to link the code of its release of Wget with the
  22 OpenSSL project's "OpenSSL" library (or with modified versions of it
  23 that use the same license as the "OpenSSL" library), and distribute
  24 the linked executables.  You must obey the GNU General Public License
  25 in all respects for all of the code used other than "OpenSSL".  If you
  26 modify this file, you may extend this exception to your version of the
  27 file, but you are not obligated to do so.  If you do not wish to do
  28 so, delete this exception statement from your version.  */
  29
  30 #include <config.h>
  31
  32 #include <stdio.h>
  33 #include <stdlib.h>
  34 #include <sys/types.h>
  35 #ifdef HAVE_UNISTD_H
  36 # include <unistd.h>
  37 #endif /* HAVE_UNISTD_H */
  38 #include <errno.h>
  39 #ifdef HAVE_STRING_H
  40 # include <string.h>
  41 #else
  42 # include <strings.h>
  43 #endif /* HAVE_STRING_H */
  44 #include <assert.h>
  45
  46 #include "wget.h"
  47 #include "utils.h"
  48 #include "retr.h"
  49 #include "progress.h"
  50 #include "url.h"
  51 #include "recur.h"
  52 #include "ftp.h"
  53 #include "host.h"
  54 #include "connect.h"
  55 #include "hash.h"
  56 #include "convert.h"
  57 #include "ptimer.h"
  58
  59 #ifndef errno
  60 extern int errno;
  61 #endif
  62
  63 /* Total size of downloaded files.  Used to enforce quota.  */
  64 LARGE_INT total_downloaded_bytes;
  65
  66 /* If non-NULL, the stream to which output should be written.  This
  67    stream is initialized when `-O' is used.  */
  68 FILE *output_stream;
  69
  70 /* Whether output_document is a regular file we can manipulate,
  71    i.e. not `-' or a device file. */
  72 int output_stream_regular;
  73 \f
  74 static struct {
  75   wgint chunk_bytes;
  76   double chunk_start;
  77   double sleep_adjust;
  78 } limit_data;
  79
  80 static void
  81 limit_bandwidth_reset (void)
  82 {
  83   limit_data.chunk_bytes = 0;
  84   limit_data.chunk_start = 0;
  85 }
  86
  87 /* Limit the bandwidth by pausing the download for an amount of time.
  88    BYTES is the number of bytes received from the network, and TIMER
  89    is the timer that started at the beginning of download.  */
  90
  91 static void
  92 limit_bandwidth (wgint bytes, struct ptimer *timer)
  93 {
  94   double delta_t = ptimer_read (timer) - limit_data.chunk_start;
  95   double expected;
  96
  97   limit_data.chunk_bytes += bytes;
  98
  99   /* Calculate the amount of time we expect downloading the chunk
 100      should take.  If in reality it took less time, sleep to
 101      compensate for the difference.  */
 102   expected = 1000.0 * limit_data.chunk_bytes / opt.limit_rate;
 103
 104   if (expected > delta_t)
 105     {
 106       double slp = expected - delta_t + limit_data.sleep_adjust;
 107       double t0, t1;
 108       if (slp < 200)
 109         {
 110           DEBUGP (("deferring a %.2f ms sleep (%s/%.2f).\n",
 111                    slp, number_to_static_string (limit_data.chunk_bytes),
 112                    delta_t));
 113           return;
 114         }
 115       DEBUGP (("\nsleeping %.2f ms for %s bytes, adjust %.2f ms\n",
 116                slp, number_to_static_string (limit_data.chunk_bytes),
 117                limit_data.sleep_adjust));
 118
 119       t0 = ptimer_read (timer);
 120       xsleep (slp / 1000);
 121       t1 = ptimer_measure (timer);
 122
 123       /* Due to scheduling, we probably slept slightly longer (or
 124          shorter) than desired.  Calculate the difference between the
 125          desired and the actual sleep, and adjust the next sleep by
 126          that amount.  */
 127       limit_data.sleep_adjust = slp - (t1 - t0);
 128     }
 129
 130   limit_data.chunk_bytes = 0;
 131   limit_data.chunk_start = ptimer_read (timer);
 132 }
 133
 134 #ifndef MIN
 135 # define MIN(i, j) ((i) <= (j) ? (i) : (j))
 136 #endif
 137
 138 /* Write data in BUF to OUT.  However, if *SKIP is non-zero, skip that
 139    amount of data and decrease SKIP.  Increment *TOTAL by the amount
 140    of data written.  */
 141
 142 static int
 143 write_data (FILE *out, const char *buf, int bufsize, wgint *skip,
 144             wgint *written)
 145 {
 146   if (!out)
 147     return 1;
 148   if (*skip > bufsize)
 149     {
 150       *skip -= bufsize;
 151       return 1;
 152     }
 153   if (*skip)
 154     {
 155       buf += *skip;
 156       bufsize -= *skip;
 157       *skip = 0;
 158       if (bufsize == 0)
 159         return 1;
 160     }
 161
 162   fwrite (buf, 1, bufsize, out);
 163   *written += bufsize;
 164
 165   /* Immediately flush the downloaded data.  This should not hinder
 166      performance: fast downloads will arrive in large 16K chunks
 167      (which stdio would write out immediately anyway), and slow
 168      downloads wouldn't be limited by disk speed.  */
 169   fflush (out);
 170   return !ferror (out);
 171 }
 172
 173 /* Read the contents of file descriptor FD until it the connection
 174    terminates or a read error occurs.  The data is read in portions of
 175    up to 16K and written to OUT as it arrives.  If opt.verbose is set,
 176    the progress is shown.
 177
 178    TOREAD is the amount of data expected to arrive, normally only used
 179    by the progress gauge.
 180
 181    STARTPOS is the position from which the download starts, used by
 182    the progress gauge.  If QTYREAD is non-NULL, the value it points to
 183    is incremented by the amount of data read from the network.  If
 184    QTYWRITTEN is non-NULL, the value it points to is incremented by
 185    the amount of data written to disk.  The time it took to download
 186    the data (in milliseconds) is stored to ELAPSED.
 187
 188    The function exits and returns the amount of data read.  In case of
 189    error while reading data, -1 is returned.  In case of error while
 190    writing data, -2 is returned.  */
 191
 192 int
 193 fd_read_body (int fd, FILE *out, wgint toread, wgint startpos,
 194               wgint *qtyread, wgint *qtywritten, double *elapsed, int flags)
 195 {
 196   int ret = 0;
 197
 198   static char dlbuf[16384];
 199   int dlbufsize = sizeof (dlbuf);
 200
 201   struct ptimer *timer = NULL;
 202   double last_successful_read_tm = 0;
 203
 204   /* The progress gauge, set according to the user preferences. */
 205   void *progress = NULL;
 206
 207   /* Non-zero if the progress gauge is interactive, i.e. if it can
 208      continually update the display.  When true, smaller timeout
 209      values are used so that the gauge can update the display when
 210      data arrives slowly. */
 211   int progress_interactive = 0;
 212
 213   int exact = flags & rb_read_exactly;
 214   wgint skip = 0;
 215
 216   /* How much data we've read/written.  */
 217   wgint sum_read = 0;
 218   wgint sum_written = 0;
 219
 220   if (flags & rb_skip_startpos)
 221     skip = startpos;
 222
 223   if (opt.verbose)
 224     {
 225       /* If we're skipping STARTPOS bytes, pass 0 as the INITIAL
 226          argument to progress_create because the indicator doesn't
 227          (yet) know about "skipping" data.  */
 228       progress = progress_create (skip ? 0 : startpos, startpos + toread);
 229       progress_interactive = progress_interactive_p (progress);
 230     }
 231
 232   if (opt.limit_rate)
 233     limit_bandwidth_reset ();
 234
 235   /* A timer is needed for tracking progress, for throttling, and for
 236      tracking elapsed time.  If either of these are requested, start
 237      the timer.  */
 238   if (progress || opt.limit_rate || elapsed)
 239     {
 240       timer = ptimer_new ();
 241       last_successful_read_tm = 0;
 242     }
 243
 244   /* Use a smaller buffer for low requested bandwidths.  For example,
 245      with --limit-rate=2k, it doesn't make sense to slurp in 16K of
 246      data and then sleep for 8s.  With buffer size equal to the limit,
 247      we never have to sleep for more than one second.  */
 248   if (opt.limit_rate && opt.limit_rate < dlbufsize)
 249     dlbufsize = opt.limit_rate;
 250
 251   /* Read from FD while there is data to read.  Normally toread==0
 252      means that it is unknown how much data is to arrive.  However, if
 253      EXACT is set, then toread==0 means what it says: that no data
 254      should be read.  */
 255   while (!exact || (sum_read < toread))
 256     {
 257       int rdsize = exact ? MIN (toread - sum_read, dlbufsize) : dlbufsize;
 258       double tmout = opt.read_timeout;
 259       if (progress_interactive)
 260         {
 261           /* For interactive progress gauges, always specify a ~1s
 262              timeout, so that the gauge can be updated regularly even
 263              when the data arrives very slowly or stalls.  */
 264           tmout = 0.95;
 265           if (opt.read_timeout)
 266             {
 267               double waittm;
 268               waittm = (ptimer_read (timer) - last_successful_read_tm) / 1000;
 269               if (waittm + tmout > opt.read_timeout)
 270                 {
 271                   /* Don't let total idle time exceed read timeout. */
 272                   tmout = opt.read_timeout - waittm;
 273                   if (tmout < 0)
 274                     {
 275                       /* We've already exceeded the timeout. */
 276                       ret = -1, errno = ETIMEDOUT;
 277                       break;
 278                     }
 279                 }
 280             }
 281         }
 282       ret = fd_read (fd, dlbuf, rdsize, tmout);
 283
 284       if (ret == 0 || (ret < 0 && errno != ETIMEDOUT))
 285         break;                  /* read error */
 286       else if (ret < 0)
 287         ret = 0;                /* read timeout */
 288
 289       if (progress || opt.limit_rate)
 290         {
 291           ptimer_measure (timer);
 292           if (ret > 0)
 293             last_successful_read_tm = ptimer_read (timer);
 294         }
 295
 296       if (ret > 0)
 297         {
 298           sum_read += ret;
 299           if (!write_data (out, dlbuf, ret, &skip, &sum_written))
 300             {
 301               ret = -2;
 302               goto out;
 303             }
 304         }
 305
 306       if (opt.limit_rate)
 307         limit_bandwidth (ret, timer);
 308
 309       if (progress)
 310         progress_update (progress, ret, ptimer_read (timer));
 311 #ifdef WINDOWS
 312       if (toread > 0 && !opt.quiet)
 313         ws_percenttitle (100.0 *
 314                          (startpos + sum_read) / (startpos + toread));
 315 #endif
 316     }
 317   if (ret < -1)
 318     ret = -1;
 319
 320  out:
 321   if (progress)
 322     progress_finish (progress, ptimer_read (timer));
 323
 324   if (elapsed)
 325     *elapsed = ptimer_read (timer);
 326   if (timer)
 327     ptimer_destroy (timer);
 328
 329   if (qtyread)
 330     *qtyread += sum_read;
 331   if (qtywritten)
 332     *qtywritten += sum_written;
 333
 334   return ret;
 335 }
 336 \f
 337 /* Read a hunk of data from FD, up until a terminator.  The terminator
 338    is whatever the TERMINATOR function determines it to be; for
 339    example, it can be a line of data, or the head of an HTTP response.
 340    The function returns the data read allocated with malloc.
 341
 342    In case of error, NULL is returned.  In case of EOF and no data
 343    read, NULL is returned and errno set to 0.  In case of EOF with
 344    data having been read, the data is returned, but it will
 345    (obviously) not contain the terminator.
 346
 347    The idea is to be able to read a line of input, or otherwise a hunk
 348    of text, such as the head of an HTTP request, without crossing the
 349    boundary, so that the next call to fd_read etc. reads the data
 350    after the hunk.  To achieve that, this function does the following:
 351
 352    1. Peek at available data.
 353
 354    2. Determine whether the peeked data, along with the previously
 355       read data, includes the terminator.
 356
 357       2a. If yes, read the data until the end of the terminator, and
 358           exit.
 359
 360       2b. If no, read the peeked data and goto 1.
 361
 362    The function is careful to assume as little as possible about the
 363    implementation of peeking.  For example, every peek is followed by
 364    a read.  If the read returns a different amount of data, the
 365    process is retried until all data arrives safely.
 366
 367    SIZEHINT is the buffer size sufficient to hold all the data in the
 368    typical case (it is used as the initial buffer size).  MAXSIZE is
 369    the maximum amount of memory this function is allowed to allocate,
 370    or 0 if no upper limit is to be enforced.
 371
 372    This function should be used as a building block for other
 373    functions -- see fd_read_line as a simple example.  */
 374
 375 char *
 376 fd_read_hunk (int fd, hunk_terminator_t terminator, long sizehint, long maxsize)
 377 {
 378   long bufsize = sizehint;
 379   char *hunk = xmalloc (bufsize);
 380   int tail = 0;                 /* tail position in HUNK */
 381
 382   assert (maxsize >= bufsize);
 383
 384   while (1)
 385     {
 386       const char *end;
 387       int pklen, rdlen, remain;
 388
 389       /* First, peek at the available data. */
 390
 391       pklen = fd_peek (fd, hunk + tail, bufsize - 1 - tail, -1);
 392       if (pklen < 0)
 393         {
 394           xfree (hunk);
 395           return NULL;
 396         }
 397       end = terminator (hunk, tail, pklen);
 398       if (end)
 399         {
 400           /* The data contains the terminator: we'll drain the data up
 401              to the end of the terminator.  */
 402           remain = end - (hunk + tail);
 403           if (remain == 0)
 404             {
 405               /* No more data needs to be read. */
 406               hunk[tail] = '\0';
 407               return hunk;
 408             }
 409           if (bufsize - 1 < tail + remain)
 410             {
 411               bufsize = tail + remain + 1;
 412               hunk = xrealloc (hunk, bufsize);
 413             }
 414         }
 415       else
 416         /* No terminator: simply read the data we know is (or should
 417            be) available.  */
 418         remain = pklen;
 419
 420       /* Now, read the data.  Note that we make no assumptions about
 421          how much data we'll get.  (Some TCP stacks are notorious for
 422          read returning less data than the previous MSG_PEEK.)  */
 423
 424       rdlen = fd_read (fd, hunk + tail, remain, 0);
 425       if (rdlen < 0)
 426         {
 427           xfree_null (hunk);
 428           return NULL;
 429         }
 430       tail += rdlen;
 431       hunk[tail] = '\0';
 432
 433       if (rdlen == 0)
 434         {
 435           if (tail == 0)
 436             {
 437               /* EOF without anything having been read */
 438               xfree (hunk);
 439               errno = 0;
 440               return NULL;
 441             }
 442           else
 443             /* EOF seen: return the data we've read. */
 444             return hunk;
 445         }
 446       if (end && rdlen == remain)
 447         /* The terminator was seen and the remaining data drained --
 448            we got what we came for.  */
 449         return hunk;
 450
 451       /* Keep looping until all the data arrives. */
 452
 453       if (tail == bufsize - 1)
 454         {
 455           /* Double the buffer size, but refuse to allocate more than
 456              MAXSIZE bytes.  */
 457           if (maxsize && bufsize >= maxsize)
 458             {
 459               xfree (hunk);
 460               errno = ENOMEM;
 461               return NULL;
 462             }
 463           bufsize <<= 1;
 464           if (maxsize && bufsize > maxsize)
 465             bufsize = maxsize;
 466           hunk = xrealloc (hunk, bufsize);
 467         }
 468     }
 469 }
 470
 471 static const char *
 472 line_terminator (const char *hunk, int oldlen, int peeklen)
 473 {
 474   const char *p = memchr (hunk + oldlen, '\n', peeklen);
 475   if (p)
 476     /* p+1 because we want the line to include '\n' */
 477     return p + 1;
 478   return NULL;
 479 }
 480
 481 /* The maximum size of the single line we agree to accept.  This is
 482    not meant to impose an arbitrary limit, but to protect the user
 483    from Wget slurping up available memory upon encountering malicious
 484    or buggy server output.  Define it to 0 to remove the limit.  */
 485 #define FD_READ_LINE_MAX 4096
 486
 487 /* Read one line from FD and return it.  The line is allocated using
 488    malloc, but is never larger than FD_READ_LINE_MAX.
 489
 490    If an error occurs, or if no data can be read, NULL is returned.
 491    In the former case errno indicates the error condition, and in the
 492    latter case, errno is NULL.  */
 493
 494 char *
 495 fd_read_line (int fd)
 496 {
 497   return fd_read_hunk (fd, line_terminator, 128, FD_READ_LINE_MAX);
 498 }
 499 \f
 500 /* Return a printed representation of the download rate, as
 501    appropriate for the speed.  If PAD is non-zero, strings will be
 502    padded to the width of 7 characters (xxxx.xx).  */
 503 char *
 504 retr_rate (wgint bytes, double msecs, int pad)
 505 {
 506   static char res[20];
 507   static const char *rate_names[] = {"B/s", "KB/s", "MB/s", "GB/s" };
 508   int units = 0;
 509
 510   double dlrate = calc_rate (bytes, msecs, &units);
 511   sprintf (res, pad ? "%7.2f %s" : "%.2f %s", dlrate, rate_names[units]);
 512
 513   return res;
 514 }
 515
 516 /* Calculate the download rate and trim it as appropriate for the
 517    speed.  Appropriate means that if rate is greater than 1K/s,
 518    kilobytes are used, and if rate is greater than 1MB/s, megabytes
 519    are used.
 520
 521    UNITS is zero for B/s, one for KB/s, two for MB/s, and three for
 522    GB/s.  */
 523 double
 524 calc_rate (wgint bytes, double msecs, int *units)
 525 {
 526   double dlrate;
 527
 528   assert (msecs >= 0);
 529   assert (bytes >= 0);
 530
 531   if (msecs == 0)
 532     /* If elapsed time is exactly zero, it means we're under the
 533        resolution of the timer.  This can easily happen on systems
 534        that use time() for the timer.  Since the interval lies between
 535        0 and the timer's resolution, assume half the resolution.  */
 536     msecs = ptimer_resolution () / 2.0;
 537
 538   dlrate = 1000.0 * bytes / msecs;
 539   if (dlrate < 1024.0)
 540     *units = 0;
 541   else if (dlrate < 1024.0 * 1024.0)
 542     *units = 1, dlrate /= 1024.0;
 543   else if (dlrate < 1024.0 * 1024.0 * 1024.0)
 544     *units = 2, dlrate /= (1024.0 * 1024.0);
 545   else
 546     /* Maybe someone will need this, one day. */
 547     *units = 3, dlrate /= (1024.0 * 1024.0 * 1024.0);
 548
 549   return dlrate;
 550 }
 551 \f
 552 /* Maximum number of allowed redirections.  20 was chosen as a
 553    "reasonable" value, which is low enough to not cause havoc, yet
 554    high enough to guarantee that normal retrievals will not be hurt by
 555    the check.  */
 556
 557 #define MAX_REDIRECTIONS 20
 558
 559 #define SUSPEND_POST_DATA do {                  \
 560   post_data_suspended = 1;                      \
 561   saved_post_data = opt.post_data;              \
 562   saved_post_file_name = opt.post_file_name;    \
 563   opt.post_data = NULL;                         \
 564   opt.post_file_name = NULL;                    \
 565 } while (0)
 566
 567 #define RESTORE_POST_DATA do {                          \
 568   if (post_data_suspended)                              \
 569     {                                                   \
 570       opt.post_data = saved_post_data;                  \
 571       opt.post_file_name = saved_post_file_name;        \
 572       post_data_suspended = 0;                          \
 573     }                                                   \
 574 } while (0)
 575
 576 static char *getproxy PARAMS ((struct url *));
 577
 578 /* Retrieve the given URL.  Decides which loop to call -- HTTP, FTP,
 579    FTP, proxy, etc.  */
 580
 581 /* #### This function should be rewritten so it doesn't return from
 582    multiple points. */
 583
 584 uerr_t
 585 retrieve_url (const char *origurl, char **file, char **newloc,
 586               const char *refurl, int *dt)
 587 {
 588   uerr_t result;
 589   char *url;
 590   int location_changed, dummy;
 591   char *mynewloc, *proxy;
 592   struct url *u, *proxy_url;
 593   int up_error_code;            /* url parse error code */
 594   char *local_file;
 595   int redirection_count = 0;
 596
 597   int post_data_suspended = 0;
 598   char *saved_post_data = NULL;
 599   char *saved_post_file_name = NULL;
 600
 601   /* If dt is NULL, use local storage.  */
 602   if (!dt)
 603     {
 604       dt = &dummy;
 605       dummy = 0;
 606     }
 607   url = xstrdup (origurl);
 608   if (newloc)
 609     *newloc = NULL;
 610   if (file)
 611     *file = NULL;
 612
 613   u = url_parse (url, &up_error_code);
 614   if (!u)
 615     {
 616       logprintf (LOG_NOTQUIET, "%s: %s.\n", url, url_error (up_error_code));
 617       xfree (url);
 618       return URLERROR;
 619     }
 620
 621   if (!refurl)
 622     refurl = opt.referer;
 623
 624  redirected:
 625
 626   result = NOCONERROR;
 627   mynewloc = NULL;
 628   local_file = NULL;
 629   proxy_url = NULL;
 630
 631   proxy = getproxy (u);
 632   if (proxy)
 633     {
 634       /* Parse the proxy URL.  */
 635       proxy_url = url_parse (proxy, &up_error_code);
 636       if (!proxy_url)
 637         {
 638           logprintf (LOG_NOTQUIET, _("Error parsing proxy URL %s: %s.\n"),
 639                      proxy, url_error (up_error_code));
 640           xfree (url);
 641           RESTORE_POST_DATA;
 642           return PROXERR;
 643         }
 644       if (proxy_url->scheme != SCHEME_HTTP && proxy_url->scheme != u->scheme)
 645         {
 646           logprintf (LOG_NOTQUIET, _("Error in proxy URL %s: Must be HTTP.\n"), proxy);
 647           url_free (proxy_url);
 648           xfree (url);
 649           RESTORE_POST_DATA;
 650           return PROXERR;
 651         }
 652     }
 653
 654   if (u->scheme == SCHEME_HTTP
 655 #ifdef HAVE_SSL
 656       || u->scheme == SCHEME_HTTPS
 657 #endif
 658       || (proxy_url && proxy_url->scheme == SCHEME_HTTP))
 659     {
 660       result = http_loop (u, &mynewloc, &local_file, refurl, dt, proxy_url);
 661     }
 662   else if (u->scheme == SCHEME_FTP)
 663     {
 664       /* If this is a redirection, we must not allow recursive FTP
 665          retrieval, so we save recursion to oldrec, and restore it
 666          later.  */
 667       int oldrec = opt.recursive;
 668       if (redirection_count)
 669         opt.recursive = 0;
 670       result = ftp_loop (u, dt, proxy_url);
 671       opt.recursive = oldrec;
 672
 673       /* There is a possibility of having HTTP being redirected to
 674          FTP.  In these cases we must decide whether the text is HTML
 675          according to the suffix.  The HTML suffixes are `.html',
 676          `.htm' and a few others, case-insensitive.  */
 677       if (redirection_count && local_file && u->scheme == SCHEME_FTP)
 678         {
 679           if (has_html_suffix_p (local_file))
 680             *dt |= TEXTHTML;
 681         }
 682     }
 683
 684   if (proxy_url)
 685     {
 686       url_free (proxy_url);
 687       proxy_url = NULL;
 688     }
 689
 690   location_changed = (result == NEWLOCATION);
 691   if (location_changed)
 692     {
 693       char *construced_newloc;
 694       struct url *newloc_parsed;
 695
 696       assert (mynewloc != NULL);
 697
 698       if (local_file)
 699         xfree (local_file);
 700
 701       /* The HTTP specs only allow absolute URLs to appear in
 702          redirects, but a ton of boneheaded webservers and CGIs out
 703          there break the rules and use relative URLs, and popular
 704          browsers are lenient about this, so wget should be too. */
 705       construced_newloc = uri_merge (url, mynewloc);
 706       xfree (mynewloc);
 707       mynewloc = construced_newloc;
 708
 709       /* Now, see if this new location makes sense. */
 710       newloc_parsed = url_parse (mynewloc, &up_error_code);
 711       if (!newloc_parsed)
 712         {
 713           logprintf (LOG_NOTQUIET, "%s: %s.\n", escnonprint_uri (mynewloc),
 714                      url_error (up_error_code));
 715           url_free (u);
 716           xfree (url);
 717           xfree (mynewloc);
 718           RESTORE_POST_DATA;
 719           return result;
 720         }
 721
 722       /* Now mynewloc will become newloc_parsed->url, because if the
 723          Location contained relative paths like .././something, we
 724          don't want that propagating as url.  */
 725       xfree (mynewloc);
 726       mynewloc = xstrdup (newloc_parsed->url);
 727
 728       /* Check for max. number of redirections.  */
 729       if (++redirection_count > MAX_REDIRECTIONS)
 730         {
 731           logprintf (LOG_NOTQUIET, _("%d redirections exceeded.\n"),
 732                      MAX_REDIRECTIONS);
 733           url_free (newloc_parsed);
 734           url_free (u);
 735           xfree (url);
 736           xfree (mynewloc);
 737           RESTORE_POST_DATA;
 738           return WRONGCODE;
 739         }
 740
 741       xfree (url);
 742       url = mynewloc;
 743       url_free (u);
 744       u = newloc_parsed;
 745
 746       /* If we're being redirected from POST, we don't want to POST
 747          again.  Many requests answer POST with a redirection to an
 748          index page; that redirection is clearly a GET.  We "suspend"
 749          POST data for the duration of the redirections, and restore
 750          it when we're done. */
 751       if (!post_data_suspended)
 752         SUSPEND_POST_DATA;
 753
 754       goto redirected;
 755     }
 756
 757   if (local_file)
 758     {
 759       if (*dt & RETROKF)
 760         {
 761           register_download (u->url, local_file);
 762           if (redirection_count && 0 != strcmp (origurl, u->url))
 763             register_redirection (origurl, u->url);
 764           if (*dt & TEXTHTML)
 765             register_html (u->url, local_file);
 766         }
 767     }
 768
 769   if (file)
 770     *file = local_file ? local_file : NULL;
 771   else
 772     xfree_null (local_file);
 773
 774   url_free (u);
 775
 776   if (redirection_count)
 777     {
 778       if (newloc)
 779         *newloc = url;
 780       else
 781         xfree (url);
 782     }
 783   else
 784     {
 785       if (newloc)
 786         *newloc = NULL;
 787       xfree (url);
 788     }
 789
 790   RESTORE_POST_DATA;
 791
 792   return result;
 793 }
 794
 795 /* Find the URLs in the file and call retrieve_url() for each of
 796    them.  If HTML is non-zero, treat the file as HTML, and construct
 797    the URLs accordingly.
 798
 799    If opt.recursive is set, call retrieve_tree() for each file.  */
 800
 801 uerr_t
 802 retrieve_from_file (const char *file, int html, int *count)
 803 {
 804   uerr_t status;
 805   struct urlpos *url_list, *cur_url;
 806
 807   url_list = (html ? get_urls_html (file, NULL, NULL)
 808               : get_urls_file (file));
 809   status = RETROK;             /* Suppose everything is OK.  */
 810   *count = 0;                  /* Reset the URL count.  */
 811
 812   for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count)
 813     {
 814       char *filename = NULL, *new_file = NULL;
 815       int dt;
 816
 817       if (cur_url->ignore_when_downloading)
 818         continue;
 819
 820       if (opt.quota && total_downloaded_bytes > opt.quota)
 821         {
 822           status = QUOTEXC;
 823           break;
 824         }
 825       if ((opt.recursive || opt.page_requisites)
 826           && cur_url->url->scheme != SCHEME_FTP)
 827         status = retrieve_tree (cur_url->url->url);
 828       else
 829         status = retrieve_url (cur_url->url->url, &filename, &new_file, NULL, &dt);
 830
 831       if (filename && opt.delete_after && file_exists_p (filename))
 832         {
 833           DEBUGP (("Removing file due to --delete-after in"
 834                    " retrieve_from_file():\n"));
 835           logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
 836           if (unlink (filename))
 837             logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
 838           dt &= ~RETROKF;
 839         }
 840
 841       xfree_null (new_file);
 842       xfree_null (filename);
 843     }
 844
 845   /* Free the linked list of URL-s.  */
 846   free_urlpos (url_list);
 847
 848   return status;
 849 }
 850
 851 /* Print `giving up', or `retrying', depending on the impending
 852    action.  N1 and N2 are the attempt number and the attempt limit.  */
 853 void
 854 printwhat (int n1, int n2)
 855 {
 856   logputs (LOG_VERBOSE, (n1 == n2) ? _("Giving up.\n\n") : _("Retrying.\n\n"));
 857 }
 858
 859 /* If opt.wait or opt.waitretry are specified, and if certain
 860    conditions are met, sleep the appropriate number of seconds.  See
 861    the documentation of --wait and --waitretry for more information.
 862
 863    COUNT is the count of current retrieval, beginning with 1. */
 864
 865 void
 866 sleep_between_retrievals (int count)
 867 {
 868   static int first_retrieval = 1;
 869
 870   if (first_retrieval)
 871     {
 872       /* Don't sleep before the very first retrieval. */
 873       first_retrieval = 0;
 874       return;
 875     }
 876
 877   if (opt.waitretry && count > 1)
 878     {
 879       /* If opt.waitretry is specified and this is a retry, wait for
 880          COUNT-1 number of seconds, or for opt.waitretry seconds.  */
 881       if (count <= opt.waitretry)
 882         xsleep (count - 1);
 883       else
 884         xsleep (opt.waitretry);
 885     }
 886   else if (opt.wait)
 887     {
 888       if (!opt.random_wait || count > 1)
 889         /* If random-wait is not specified, or if we are sleeping
 890            between retries of the same download, sleep the fixed
 891            interval.  */
 892         xsleep (opt.wait);
 893       else
 894         {
 895           /* Sleep a random amount of time averaging in opt.wait
 896              seconds.  The sleeping amount ranges from 0 to
 897              opt.wait*2, inclusive.  */
 898           double waitsecs = 2 * opt.wait * random_float ();
 899           DEBUGP (("sleep_between_retrievals: avg=%f,sleep=%f\n",
 900                    opt.wait, waitsecs));
 901           xsleep (waitsecs);
 902         }
 903     }
 904 }
 905
 906 /* Free the linked list of urlpos.  */
 907 void
 908 free_urlpos (struct urlpos *l)
 909 {
 910   while (l)
 911     {
 912       struct urlpos *next = l->next;
 913       if (l->url)
 914         url_free (l->url);
 915       xfree_null (l->local_name);
 916       xfree (l);
 917       l = next;
 918     }
 919 }
 920
 921 /* Rotate FNAME opt.backups times */
 922 void
 923 rotate_backups(const char *fname)
 924 {
 925   int maxlen = strlen (fname) + 1 + numdigit (opt.backups) + 1;
 926   char *from = (char *)alloca (maxlen);
 927   char *to = (char *)alloca (maxlen);
 928   struct_stat sb;
 929   int i;
 930
 931   if (stat (fname, &sb) == 0)
 932     if (S_ISREG (sb.st_mode) == 0)
 933       return;
 934
 935   for (i = opt.backups; i > 1; i--)
 936     {
 937       sprintf (from, "%s.%d", fname, i - 1);
 938       sprintf (to, "%s.%d", fname, i);
 939       rename (from, to);
 940     }
 941
 942   sprintf (to, "%s.%d", fname, 1);
 943   rename(fname, to);
 944 }
 945
 946 static int no_proxy_match PARAMS ((const char *, const char **));
 947
 948 /* Return the URL of the proxy appropriate for url U.  */
 949
 950 static char *
 951 getproxy (struct url *u)
 952 {
 953   char *proxy = NULL;
 954   char *rewritten_url;
 955   static char rewritten_storage[1024];
 956
 957   if (!opt.use_proxy)
 958     return NULL;
 959   if (!no_proxy_match (u->host, (const char **)opt.no_proxy))
 960     return NULL;
 961
 962   switch (u->scheme)
 963     {
 964     case SCHEME_HTTP:
 965       proxy = opt.http_proxy ? opt.http_proxy : getenv ("http_proxy");
 966       break;
 967 #ifdef HAVE_SSL
 968     case SCHEME_HTTPS:
 969       proxy = opt.https_proxy ? opt.https_proxy : getenv ("https_proxy");
 970       break;
 971 #endif
 972     case SCHEME_FTP:
 973       proxy = opt.ftp_proxy ? opt.ftp_proxy : getenv ("ftp_proxy");
 974       break;
 975     case SCHEME_INVALID:
 976       break;
 977     }
 978   if (!proxy || !*proxy)
 979     return NULL;
 980
 981   /* Handle shorthands.  `rewritten_storage' is a kludge to allow
 982      getproxy() to return static storage. */
 983   rewritten_url = rewrite_shorthand_url (proxy);
 984   if (rewritten_url)
 985     {
 986       strncpy (rewritten_storage, rewritten_url, sizeof (rewritten_storage));
 987       rewritten_storage[sizeof (rewritten_storage) - 1] = '\0';
 988       proxy = rewritten_storage;
 989     }
 990
 991   return proxy;
 992 }
 993
 994 /* Should a host be accessed through proxy, concerning no_proxy?  */
 995 int
 996 no_proxy_match (const char *host, const char **no_proxy)
 997 {
 998   if (!no_proxy)
 999     return 1;
1000   else
1001     return !sufmatch (no_proxy, host);
1002 }