sjero.net Git - wget/blob - src/retr.c

   1 /* File retrieval.
   2    Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001 Free Software Foundation, Inc.
   3
   4 This file is part of GNU Wget.
   5
   6 GNU Wget is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 2 of the License, or (at
   9 your option) any later version.
  10
  11 GNU Wget is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with Wget; if not, write to the Free Software
  18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  19
  20 In addition, as a special exception, the Free Software Foundation
  21 gives permission to link the code of its release of Wget with the
  22 OpenSSL project's "OpenSSL" library (or with modified versions of it
  23 that use the same license as the "OpenSSL" library), and distribute
  24 the linked executables.  You must obey the GNU General Public License
  25 in all respects for all of the code used other than "OpenSSL".  If you
  26 modify this file, you may extend this exception to your version of the
  27 file, but you are not obligated to do so.  If you do not wish to do
  28 so, delete this exception statement from your version.  */
  29
  30 #include <config.h>
  31
  32 #include <stdio.h>
  33 #include <stdlib.h>
  34 #include <sys/types.h>
  35 #ifdef HAVE_UNISTD_H
  36 # include <unistd.h>
  37 #endif /* HAVE_UNISTD_H */
  38 #include <errno.h>
  39 #ifdef HAVE_STRING_H
  40 # include <string.h>
  41 #else
  42 # include <strings.h>
  43 #endif /* HAVE_STRING_H */
  44 #include <assert.h>
  45
  46 #include "wget.h"
  47 #include "utils.h"
  48 #include "retr.h"
  49 #include "progress.h"
  50 #include "url.h"
  51 #include "recur.h"
  52 #include "ftp.h"
  53 #include "host.h"
  54 #include "connect.h"
  55 #include "hash.h"
  56 #include "convert.h"
  57
  58 #ifdef HAVE_SSL
  59 # include "gen_sslfunc.h"       /* for ssl_iread */
  60 #endif
  61
  62 #ifndef errno
  63 extern int errno;
  64 #endif
  65
  66 /* Total size of downloaded files.  Used to enforce quota.  */
  67 LARGE_INT total_downloaded_bytes;
  68
  69 /* If non-NULL, the stream to which output should be written.  This
  70    stream is initialized when `-O' is used.  */
  71 FILE *output_stream;
  72
  73 /* Whether output_document is a regular file we can manipulate,
  74    i.e. not `-' or a device file. */
  75 int output_stream_regular;
  76 \f
  77 static struct {
  78   long chunk_bytes;
  79   double chunk_start;
  80   double sleep_adjust;
  81 } limit_data;
  82
  83 static void
  84 limit_bandwidth_reset (void)
  85 {
  86   limit_data.chunk_bytes = 0;
  87   limit_data.chunk_start = 0;
  88 }
  89
  90 /* Limit the bandwidth by pausing the download for an amount of time.
  91    BYTES is the number of bytes received from the network, and TIMER
  92    is the timer that started at the beginning of download.  */
  93
  94 static void
  95 limit_bandwidth (long bytes, struct wget_timer *timer)
  96 {
  97   double delta_t = wtimer_read (timer) - limit_data.chunk_start;
  98   double expected;
  99
 100   limit_data.chunk_bytes += bytes;
 101
 102   /* Calculate the amount of time we expect downloading the chunk
 103      should take.  If in reality it took less time, sleep to
 104      compensate for the difference.  */
 105   expected = 1000.0 * limit_data.chunk_bytes / opt.limit_rate;
 106
 107   if (expected > delta_t)
 108     {
 109       double slp = expected - delta_t + limit_data.sleep_adjust;
 110       double t0, t1;
 111       if (slp < 200)
 112         {
 113           DEBUGP (("deferring a %.2f ms sleep (%ld/%.2f).\n",
 114                    slp, limit_data.chunk_bytes, delta_t));
 115           return;
 116         }
 117       DEBUGP (("\nsleeping %.2f ms for %ld bytes, adjust %.2f ms\n",
 118                slp, limit_data.chunk_bytes, limit_data.sleep_adjust));
 119
 120       t0 = wtimer_read (timer);
 121       xsleep (slp / 1000);
 122       wtimer_update (timer);
 123       t1 = wtimer_read (timer);
 124
 125       /* Due to scheduling, we probably slept slightly longer (or
 126          shorter) than desired.  Calculate the difference between the
 127          desired and the actual sleep, and adjust the next sleep by
 128          that amount.  */
 129       limit_data.sleep_adjust = slp - (t1 - t0);
 130     }
 131
 132   limit_data.chunk_bytes = 0;
 133   limit_data.chunk_start = wtimer_read (timer);
 134 }
 135
 136 #ifndef MIN
 137 # define MIN(i, j) ((i) <= (j) ? (i) : (j))
 138 #endif
 139
 140 /* Write data in BUF to OUT.  However, if *SKIP is non-zero, skip that
 141    amount of data and decrease SKIP.  Increment *TOTAL by the amount
 142    of data written.  */
 143
 144 static int
 145 write_data (FILE *out, const char *buf, int bufsize, long *skip,
 146             long *written)
 147 {
 148   if (!out)
 149     return 1;
 150   if (*skip > bufsize)
 151     {
 152       *skip -= bufsize;
 153       return 1;
 154     }
 155   if (*skip)
 156     {
 157       buf += *skip;
 158       bufsize -= *skip;
 159       *skip = 0;
 160       if (bufsize == 0)
 161         return 1;
 162     }
 163
 164   fwrite (buf, 1, bufsize, out);
 165   *written += bufsize;
 166
 167   /* Immediately flush the downloaded data.  This should not hinder
 168      performance: fast downloads will arrive in large 16K chunks
 169      (which stdio would write out immediately anyway), and slow
 170      downloads wouldn't be limited by disk speed.  */
 171   fflush (out);
 172   return !ferror (out);
 173 }
 174
 175 /* Read the contents of file descriptor FD until it the connection
 176    terminates or a read error occurs.  The data is read in portions of
 177    up to 16K and written to OUT as it arrives.  If opt.verbose is set,
 178    the progress is shown.
 179
 180    TOREAD is the amount of data expected to arrive, normally only used
 181    by the progress gauge.
 182
 183    STARTPOS is the position from which the download starts, used by
 184    the progress gauge.  If QTYREAD is non-NULL, the value it points to
 185    is incremented by the amount of data read from the network.  If
 186    QTYWRITTEN is non-NULL, the value it points to is incremented by
 187    the amount of data written to disk.  The time it took to download
 188    the data (in milliseconds) is stored to ELAPSED.
 189
 190    The function exits and returns the amount of data read.  In case of
 191    error while reading data, -1 is returned.  In case of error while
 192    writing data, -2 is returned.  */
 193
 194 int
 195 fd_read_body (int fd, FILE *out, long toread, long startpos,
 196               long *qtyread, long *qtywritten, double *elapsed, int flags)
 197 {
 198   int ret = 0;
 199
 200   static char dlbuf[16384];
 201   int dlbufsize = sizeof (dlbuf);
 202
 203   struct wget_timer *timer = NULL;
 204   double last_successful_read_tm = 0;
 205
 206   /* The progress gauge, set according to the user preferences. */
 207   void *progress = NULL;
 208
 209   /* Non-zero if the progress gauge is interactive, i.e. if it can
 210      continually update the display.  When true, smaller timeout
 211      values are used so that the gauge can update the display when
 212      data arrives slowly. */
 213   int progress_interactive = 0;
 214
 215   int exact = flags & rb_read_exactly;
 216   long skip = 0;
 217
 218   /* How much data we've read/written.  */
 219   long sum_read = 0;
 220   long sum_written = 0;
 221
 222   if (flags & rb_skip_startpos)
 223     skip = startpos;
 224
 225   if (opt.verbose)
 226     {
 227       /* If we're skipping STARTPOS bytes, hide it from
 228          progress_create because the indicator can't deal with it.  */
 229       progress = progress_create (skip ? 0 : startpos, toread);
 230       progress_interactive = progress_interactive_p (progress);
 231     }
 232
 233   if (opt.limit_rate)
 234     limit_bandwidth_reset ();
 235
 236   /* A timer is needed for tracking progress, for throttling, and for
 237      tracking elapsed time.  If either of these are requested, start
 238      the timer.  */
 239   if (progress || opt.limit_rate || elapsed)
 240     {
 241       timer = wtimer_new ();
 242       last_successful_read_tm = 0;
 243     }
 244
 245   /* Use a smaller buffer for low requested bandwidths.  For example,
 246      with --limit-rate=2k, it doesn't make sense to slurp in 16K of
 247      data and then sleep for 8s.  With buffer size equal to the limit,
 248      we never have to sleep for more than one second.  */
 249   if (opt.limit_rate && opt.limit_rate < dlbufsize)
 250     dlbufsize = opt.limit_rate;
 251
 252   /* Read from FD while there is data to read.  Normally toread==0
 253      means that it is unknown how much data is to arrive.  However, if
 254      EXACT is set, then toread==0 means what it says: that no data
 255      should be read.  */
 256   while (!exact || (sum_read < toread))
 257     {
 258       int rdsize = exact ? MIN (toread - sum_read, dlbufsize) : dlbufsize;
 259       double tmout = opt.read_timeout;
 260       if (progress_interactive)
 261         {
 262           double waittm;
 263           /* For interactive progress gauges, always specify a ~1s
 264              timeout, so that the gauge can be updated regularly even
 265              when the data arrives very slowly or stalls.  */
 266           tmout = 0.95;
 267           waittm = (wtimer_read (timer) - last_successful_read_tm) / 1000;
 268           if (waittm + tmout > opt.read_timeout)
 269             {
 270               /* Don't let total idle time exceed read timeout. */
 271               tmout = opt.read_timeout - waittm;
 272               if (tmout < 0)
 273                 {
 274                   /* We've already exceeded the timeout. */
 275                   ret = -1, errno = ETIMEDOUT;
 276                   break;
 277                 }
 278             }
 279         }
 280       ret = fd_read (fd, dlbuf, rdsize, tmout);
 281
 282       if (ret == 0 || (ret < 0 && errno != ETIMEDOUT))
 283         break;                  /* read error */
 284       else if (ret < 0)
 285         ret = 0;                /* read timeout */
 286
 287       if (progress || opt.limit_rate)
 288         {
 289           wtimer_update (timer);
 290           if (ret > 0)
 291             last_successful_read_tm = wtimer_read (timer);
 292         }
 293
 294       if (ret > 0)
 295         {
 296           sum_read += ret;
 297           if (!write_data (out, dlbuf, ret, &skip, &sum_written))
 298             {
 299               ret = -2;
 300               goto out;
 301             }
 302         }
 303
 304       if (opt.limit_rate)
 305         limit_bandwidth (ret, timer);
 306
 307       if (progress)
 308         progress_update (progress, ret, wtimer_read (timer));
 309 #ifdef WINDOWS
 310       if (toread > 0)
 311         ws_percenttitle (100.0 *
 312                          (startpos + sum_read) / (startpos + toread));
 313 #endif
 314     }
 315   if (ret < -1)
 316     ret = -1;
 317
 318  out:
 319   if (progress)
 320     progress_finish (progress, wtimer_read (timer));
 321
 322   if (elapsed)
 323     *elapsed = wtimer_read (timer);
 324   if (timer)
 325     wtimer_delete (timer);
 326
 327   if (qtyread)
 328     *qtyread += sum_read;
 329   if (qtywritten)
 330     *qtywritten += sum_written;
 331
 332   return ret;
 333 }
 334 \f
 335 /* Read a hunk of data from FD, up until a terminator.  The terminator
 336    is whatever the TERMINATOR function determines it to be; for
 337    example, it can be a line of data, or the head of an HTTP response.
 338    The function returns the data read allocated with malloc.
 339
 340    In case of error, NULL is returned.  In case of EOF and no data
 341    read, NULL is returned and errno set to 0.  In case of EOF with
 342    data having been read, the data is returned, but it will
 343    (obviously) not contain the terminator.
 344
 345    The idea is to be able to read a line of input, or otherwise a hunk
 346    of text, such as the head of an HTTP request, without crossing the
 347    boundary, so that the next call to fd_read etc. reads the data
 348    after the hunk.  To achieve that, this function does the following:
 349
 350    1. Peek at available data.
 351
 352    2. Determine whether the peeked data, along with the previously
 353       read data, includes the terminator.
 354
 355       2a. If yes, read the data until the end of the terminator, and
 356           exit.
 357
 358       2b. If no, read the peeked data and goto 1.
 359
 360    The function is careful to assume as little as possible about the
 361    implementation of peeking.  For example, every peek is followed by
 362    a read.  If the read returns a different amount of data, the
 363    process is retried until all data arrives safely.
 364
 365    BUFSIZE is the size of the initial buffer expected to read all the
 366    data in the typical case.
 367
 368    This function should be used as a building block for other
 369    functions -- see fd_read_line as a simple example.  */
 370
 371 char *
 372 fd_read_hunk (int fd, hunk_terminator_t hunk_terminator, int bufsize)
 373 {
 374   char *hunk = xmalloc (bufsize);
 375   int tail = 0;                 /* tail position in HUNK */
 376
 377   while (1)
 378     {
 379       const char *end;
 380       int pklen, rdlen, remain;
 381
 382       /* First, peek at the available data. */
 383
 384       pklen = fd_peek (fd, hunk + tail, bufsize - 1 - tail, -1);
 385       if (pklen < 0)
 386         {
 387           xfree (hunk);
 388           return NULL;
 389         }
 390       end = hunk_terminator (hunk, tail, pklen);
 391       if (end)
 392         {
 393           /* The data contains the terminator: we'll drain the data up
 394              to the end of the terminator.  */
 395           remain = end - (hunk + tail);
 396           if (remain == 0)
 397             {
 398               /* No more data needs to be read. */
 399               hunk[tail] = '\0';
 400               return hunk;
 401             }
 402           if (bufsize - 1 < tail + remain)
 403             {
 404               bufsize = tail + remain + 1;
 405               hunk = xrealloc (hunk, bufsize);
 406             }
 407         }
 408       else
 409         /* No terminator: simply read the data we know is (or should
 410            be) available.  */
 411         remain = pklen;
 412
 413       /* Now, read the data.  Note that we make no assumptions about
 414          how much data we'll get.  (Some TCP stacks are notorious for
 415          read returning less data than the previous MSG_PEEK.)  */
 416
 417       rdlen = fd_read (fd, hunk + tail, remain, 0);
 418       if (rdlen < 0)
 419         {
 420           xfree_null (hunk);
 421           return NULL;
 422         }
 423       tail += rdlen;
 424       hunk[tail] = '\0';
 425
 426       if (rdlen == 0)
 427         {
 428           if (tail == 0)
 429             {
 430               /* EOF without anything having been read */
 431               xfree (hunk);
 432               errno = 0;
 433               return NULL;
 434             }
 435           else
 436             /* EOF seen: return the data we've read. */
 437             return hunk;
 438         }
 439       if (end && rdlen == remain)
 440         /* The terminator was seen and the remaining data drained --
 441            we got what we came for.  */
 442         return hunk;
 443
 444       /* Keep looping until all the data arrives. */
 445
 446       if (tail == bufsize - 1)
 447         {
 448           bufsize <<= 1;
 449           hunk = xrealloc (hunk, bufsize);
 450         }
 451     }
 452 }
 453
 454 static const char *
 455 line_terminator (const char *hunk, int oldlen, int peeklen)
 456 {
 457   const char *p = memchr (hunk + oldlen, '\n', peeklen);
 458   if (p)
 459     /* p+1 because we want the line to include '\n' */
 460     return p + 1;
 461   return NULL;
 462 }
 463
 464 /* Read one line from FD and return it.  The line is allocated using
 465    malloc.
 466
 467    If an error occurs, or if no data can be read, NULL is returned.
 468    In the former case errno indicates the error condition, and in the
 469    latter case, errno is NULL.  */
 470
 471 char *
 472 fd_read_line (int fd)
 473 {
 474   return fd_read_hunk (fd, line_terminator, 128);
 475 }
 476 \f
 477 /* Return a printed representation of the download rate, as
 478    appropriate for the speed.  If PAD is non-zero, strings will be
 479    padded to the width of 7 characters (xxxx.xx).  */
 480 char *
 481 retr_rate (long bytes, double msecs, int pad)
 482 {
 483   static char res[20];
 484   static char *rate_names[] = {"B/s", "KB/s", "MB/s", "GB/s" };
 485   int units = 0;
 486
 487   double dlrate = calc_rate (bytes, msecs, &units);
 488   sprintf (res, pad ? "%7.2f %s" : "%.2f %s", dlrate, rate_names[units]);
 489
 490   return res;
 491 }
 492
 493 /* Calculate the download rate and trim it as appropriate for the
 494    speed.  Appropriate means that if rate is greater than 1K/s,
 495    kilobytes are used, and if rate is greater than 1MB/s, megabytes
 496    are used.
 497
 498    UNITS is zero for B/s, one for KB/s, two for MB/s, and three for
 499    GB/s.  */
 500 double
 501 calc_rate (long bytes, double msecs, int *units)
 502 {
 503   double dlrate;
 504
 505   assert (msecs >= 0);
 506   assert (bytes >= 0);
 507
 508   if (msecs == 0)
 509     /* If elapsed time is exactly zero, it means we're under the
 510        granularity of the timer.  This often happens on systems that
 511        use time() for the timer.  */
 512     msecs = wtimer_granularity ();
 513
 514   dlrate = (double)1000 * bytes / msecs;
 515   if (dlrate < 1024.0)
 516     *units = 0;
 517   else if (dlrate < 1024.0 * 1024.0)
 518     *units = 1, dlrate /= 1024.0;
 519   else if (dlrate < 1024.0 * 1024.0 * 1024.0)
 520     *units = 2, dlrate /= (1024.0 * 1024.0);
 521   else
 522     /* Maybe someone will need this, one day. */
 523     *units = 3, dlrate /= (1024.0 * 1024.0 * 1024.0);
 524
 525   return dlrate;
 526 }
 527 \f
 528 /* Maximum number of allowed redirections.  20 was chosen as a
 529    "reasonable" value, which is low enough to not cause havoc, yet
 530    high enough to guarantee that normal retrievals will not be hurt by
 531    the check.  */
 532
 533 #define MAX_REDIRECTIONS 20
 534
 535 #define SUSPEND_POST_DATA do {                  \
 536   post_data_suspended = 1;                      \
 537   saved_post_data = opt.post_data;              \
 538   saved_post_file_name = opt.post_file_name;    \
 539   opt.post_data = NULL;                         \
 540   opt.post_file_name = NULL;                    \
 541 } while (0)
 542
 543 #define RESTORE_POST_DATA do {                          \
 544   if (post_data_suspended)                              \
 545     {                                                   \
 546       opt.post_data = saved_post_data;                  \
 547       opt.post_file_name = saved_post_file_name;        \
 548       post_data_suspended = 0;                          \
 549     }                                                   \
 550 } while (0)
 551
 552 static char *getproxy PARAMS ((struct url *));
 553
 554 /* Retrieve the given URL.  Decides which loop to call -- HTTP, FTP,
 555    FTP, proxy, etc.  */
 556
 557 /* #### This function should be rewritten so it doesn't return from
 558    multiple points. */
 559
 560 uerr_t
 561 retrieve_url (const char *origurl, char **file, char **newloc,
 562               const char *refurl, int *dt)
 563 {
 564   uerr_t result;
 565   char *url;
 566   int location_changed, dummy;
 567   char *mynewloc, *proxy;
 568   struct url *u, *proxy_url;
 569   int up_error_code;            /* url parse error code */
 570   char *local_file;
 571   int redirection_count = 0;
 572
 573   int post_data_suspended = 0;
 574   char *saved_post_data = NULL;
 575   char *saved_post_file_name = NULL;
 576
 577   /* If dt is NULL, use local storage.  */
 578   if (!dt)
 579     {
 580       dt = &dummy;
 581       dummy = 0;
 582     }
 583   url = xstrdup (origurl);
 584   if (newloc)
 585     *newloc = NULL;
 586   if (file)
 587     *file = NULL;
 588
 589   u = url_parse (url, &up_error_code);
 590   if (!u)
 591     {
 592       logprintf (LOG_NOTQUIET, "%s: %s.\n", url, url_error (up_error_code));
 593       xfree (url);
 594       return URLERROR;
 595     }
 596
 597   if (!refurl)
 598     refurl = opt.referer;
 599
 600  redirected:
 601
 602   result = NOCONERROR;
 603   mynewloc = NULL;
 604   local_file = NULL;
 605   proxy_url = NULL;
 606
 607   proxy = getproxy (u);
 608   if (proxy)
 609     {
 610       /* Parse the proxy URL.  */
 611       proxy_url = url_parse (proxy, &up_error_code);
 612       if (!proxy_url)
 613         {
 614           logprintf (LOG_NOTQUIET, _("Error parsing proxy URL %s: %s.\n"),
 615                      proxy, url_error (up_error_code));
 616           xfree (url);
 617           RESTORE_POST_DATA;
 618           return PROXERR;
 619         }
 620       if (proxy_url->scheme != SCHEME_HTTP && proxy_url->scheme != u->scheme)
 621         {
 622           logprintf (LOG_NOTQUIET, _("Error in proxy URL %s: Must be HTTP.\n"), proxy);
 623           url_free (proxy_url);
 624           xfree (url);
 625           RESTORE_POST_DATA;
 626           return PROXERR;
 627         }
 628     }
 629
 630   if (u->scheme == SCHEME_HTTP
 631 #ifdef HAVE_SSL
 632       || u->scheme == SCHEME_HTTPS
 633 #endif
 634       || (proxy_url && proxy_url->scheme == SCHEME_HTTP))
 635     {
 636       result = http_loop (u, &mynewloc, &local_file, refurl, dt, proxy_url);
 637     }
 638   else if (u->scheme == SCHEME_FTP)
 639     {
 640       /* If this is a redirection, we must not allow recursive FTP
 641          retrieval, so we save recursion to oldrec, and restore it
 642          later.  */
 643       int oldrec = opt.recursive;
 644       if (redirection_count)
 645         opt.recursive = 0;
 646       result = ftp_loop (u, dt, proxy_url);
 647       opt.recursive = oldrec;
 648
 649       /* There is a possibility of having HTTP being redirected to
 650          FTP.  In these cases we must decide whether the text is HTML
 651          according to the suffix.  The HTML suffixes are `.html',
 652          `.htm' and a few others, case-insensitive.  */
 653       if (redirection_count && local_file && u->scheme == SCHEME_FTP)
 654         {
 655           if (has_html_suffix_p (local_file))
 656             *dt |= TEXTHTML;
 657         }
 658     }
 659
 660   if (proxy_url)
 661     {
 662       url_free (proxy_url);
 663       proxy_url = NULL;
 664     }
 665
 666   location_changed = (result == NEWLOCATION);
 667   if (location_changed)
 668     {
 669       char *construced_newloc;
 670       struct url *newloc_parsed;
 671
 672       assert (mynewloc != NULL);
 673
 674       if (local_file)
 675         xfree (local_file);
 676
 677       /* The HTTP specs only allow absolute URLs to appear in
 678          redirects, but a ton of boneheaded webservers and CGIs out
 679          there break the rules and use relative URLs, and popular
 680          browsers are lenient about this, so wget should be too. */
 681       construced_newloc = uri_merge (url, mynewloc);
 682       xfree (mynewloc);
 683       mynewloc = construced_newloc;
 684
 685       /* Now, see if this new location makes sense. */
 686       newloc_parsed = url_parse (mynewloc, &up_error_code);
 687       if (!newloc_parsed)
 688         {
 689           logprintf (LOG_NOTQUIET, "%s: %s.\n", mynewloc,
 690                      url_error (up_error_code));
 691           url_free (u);
 692           xfree (url);
 693           xfree (mynewloc);
 694           RESTORE_POST_DATA;
 695           return result;
 696         }
 697
 698       /* Now mynewloc will become newloc_parsed->url, because if the
 699          Location contained relative paths like .././something, we
 700          don't want that propagating as url.  */
 701       xfree (mynewloc);
 702       mynewloc = xstrdup (newloc_parsed->url);
 703
 704       /* Check for max. number of redirections.  */
 705       if (++redirection_count > MAX_REDIRECTIONS)
 706         {
 707           logprintf (LOG_NOTQUIET, _("%d redirections exceeded.\n"),
 708                      MAX_REDIRECTIONS);
 709           url_free (newloc_parsed);
 710           url_free (u);
 711           xfree (url);
 712           xfree (mynewloc);
 713           RESTORE_POST_DATA;
 714           return WRONGCODE;
 715         }
 716
 717       xfree (url);
 718       url = mynewloc;
 719       url_free (u);
 720       u = newloc_parsed;
 721
 722       /* If we're being redirected from POST, we don't want to POST
 723          again.  Many requests answer POST with a redirection to an
 724          index page; that redirection is clearly a GET.  We "suspend"
 725          POST data for the duration of the redirections, and restore
 726          it when we're done. */
 727       if (!post_data_suspended)
 728         SUSPEND_POST_DATA;
 729
 730       goto redirected;
 731     }
 732
 733   if (local_file)
 734     {
 735       if (*dt & RETROKF)
 736         {
 737           register_download (u->url, local_file);
 738           if (redirection_count && 0 != strcmp (origurl, u->url))
 739             register_redirection (origurl, u->url);
 740           if (*dt & TEXTHTML)
 741             register_html (u->url, local_file);
 742         }
 743     }
 744
 745   if (file)
 746     *file = local_file ? local_file : NULL;
 747   else
 748     xfree_null (local_file);
 749
 750   url_free (u);
 751
 752   if (redirection_count)
 753     {
 754       if (newloc)
 755         *newloc = url;
 756       else
 757         xfree (url);
 758     }
 759   else
 760     {
 761       if (newloc)
 762         *newloc = NULL;
 763       xfree (url);
 764     }
 765
 766   RESTORE_POST_DATA;
 767
 768   return result;
 769 }
 770
 771 /* Find the URLs in the file and call retrieve_url() for each of
 772    them.  If HTML is non-zero, treat the file as HTML, and construct
 773    the URLs accordingly.
 774
 775    If opt.recursive is set, call retrieve_tree() for each file.  */
 776
 777 uerr_t
 778 retrieve_from_file (const char *file, int html, int *count)
 779 {
 780   uerr_t status;
 781   struct urlpos *url_list, *cur_url;
 782
 783   url_list = (html ? get_urls_html (file, NULL, NULL)
 784               : get_urls_file (file));
 785   status = RETROK;             /* Suppose everything is OK.  */
 786   *count = 0;                  /* Reset the URL count.  */
 787
 788   for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count)
 789     {
 790       char *filename = NULL, *new_file = NULL;
 791       int dt;
 792
 793       if (cur_url->ignore_when_downloading)
 794         continue;
 795
 796       if (opt.quota && total_downloaded_bytes > opt.quota)
 797         {
 798           status = QUOTEXC;
 799           break;
 800         }
 801       if ((opt.recursive || opt.page_requisites)
 802           && cur_url->url->scheme != SCHEME_FTP)
 803         status = retrieve_tree (cur_url->url->url);
 804       else
 805         status = retrieve_url (cur_url->url->url, &filename, &new_file, NULL, &dt);
 806
 807       if (filename && opt.delete_after && file_exists_p (filename))
 808         {
 809           DEBUGP (("Removing file due to --delete-after in"
 810                    " retrieve_from_file():\n"));
 811           logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
 812           if (unlink (filename))
 813             logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
 814           dt &= ~RETROKF;
 815         }
 816
 817       xfree_null (new_file);
 818       xfree_null (filename);
 819     }
 820
 821   /* Free the linked list of URL-s.  */
 822   free_urlpos (url_list);
 823
 824   return status;
 825 }
 826
 827 /* Print `giving up', or `retrying', depending on the impending
 828    action.  N1 and N2 are the attempt number and the attempt limit.  */
 829 void
 830 printwhat (int n1, int n2)
 831 {
 832   logputs (LOG_VERBOSE, (n1 == n2) ? _("Giving up.\n\n") : _("Retrying.\n\n"));
 833 }
 834
 835 /* If opt.wait or opt.waitretry are specified, and if certain
 836    conditions are met, sleep the appropriate number of seconds.  See
 837    the documentation of --wait and --waitretry for more information.
 838
 839    COUNT is the count of current retrieval, beginning with 1. */
 840
 841 void
 842 sleep_between_retrievals (int count)
 843 {
 844   static int first_retrieval = 1;
 845
 846   if (first_retrieval)
 847     {
 848       /* Don't sleep before the very first retrieval. */
 849       first_retrieval = 0;
 850       return;
 851     }
 852
 853   if (opt.waitretry && count > 1)
 854     {
 855       /* If opt.waitretry is specified and this is a retry, wait for
 856          COUNT-1 number of seconds, or for opt.waitretry seconds.  */
 857       if (count <= opt.waitretry)
 858         xsleep (count - 1);
 859       else
 860         xsleep (opt.waitretry);
 861     }
 862   else if (opt.wait)
 863     {
 864       if (!opt.random_wait || count > 1)
 865         /* If random-wait is not specified, or if we are sleeping
 866            between retries of the same download, sleep the fixed
 867            interval.  */
 868         xsleep (opt.wait);
 869       else
 870         {
 871           /* Sleep a random amount of time averaging in opt.wait
 872              seconds.  The sleeping amount ranges from 0 to
 873              opt.wait*2, inclusive.  */
 874           double waitsecs = 2 * opt.wait * random_float ();
 875           DEBUGP (("sleep_between_retrievals: avg=%f,sleep=%f\n",
 876                    opt.wait, waitsecs));
 877           xsleep (waitsecs);
 878         }
 879     }
 880 }
 881
 882 /* Free the linked list of urlpos.  */
 883 void
 884 free_urlpos (struct urlpos *l)
 885 {
 886   while (l)
 887     {
 888       struct urlpos *next = l->next;
 889       if (l->url)
 890         url_free (l->url);
 891       xfree_null (l->local_name);
 892       xfree (l);
 893       l = next;
 894     }
 895 }
 896
 897 /* Rotate FNAME opt.backups times */
 898 void
 899 rotate_backups(const char *fname)
 900 {
 901   int maxlen = strlen (fname) + 1 + numdigit (opt.backups) + 1;
 902   char *from = (char *)alloca (maxlen);
 903   char *to = (char *)alloca (maxlen);
 904   struct stat sb;
 905   int i;
 906
 907   if (stat (fname, &sb) == 0)
 908     if (S_ISREG (sb.st_mode) == 0)
 909       return;
 910
 911   for (i = opt.backups; i > 1; i--)
 912     {
 913       sprintf (from, "%s.%d", fname, i - 1);
 914       sprintf (to, "%s.%d", fname, i);
 915       rename (from, to);
 916     }
 917
 918   sprintf (to, "%s.%d", fname, 1);
 919   rename(fname, to);
 920 }
 921
 922 static int no_proxy_match PARAMS ((const char *, const char **));
 923
 924 /* Return the URL of the proxy appropriate for url U.  */
 925
 926 static char *
 927 getproxy (struct url *u)
 928 {
 929   char *proxy = NULL;
 930   char *rewritten_url;
 931   static char rewritten_storage[1024];
 932
 933   if (!opt.use_proxy)
 934     return NULL;
 935   if (!no_proxy_match (u->host, (const char **)opt.no_proxy))
 936     return NULL;
 937
 938   switch (u->scheme)
 939     {
 940     case SCHEME_HTTP:
 941       proxy = opt.http_proxy ? opt.http_proxy : getenv ("http_proxy");
 942       break;
 943 #ifdef HAVE_SSL
 944     case SCHEME_HTTPS:
 945       proxy = opt.https_proxy ? opt.https_proxy : getenv ("https_proxy");
 946       break;
 947 #endif
 948     case SCHEME_FTP:
 949       proxy = opt.ftp_proxy ? opt.ftp_proxy : getenv ("ftp_proxy");
 950       break;
 951     case SCHEME_INVALID:
 952       break;
 953     }
 954   if (!proxy || !*proxy)
 955     return NULL;
 956
 957   /* Handle shorthands.  `rewritten_storage' is a kludge to allow
 958      getproxy() to return static storage. */
 959   rewritten_url = rewrite_shorthand_url (proxy);
 960   if (rewritten_url)
 961     {
 962       strncpy (rewritten_storage, rewritten_url, sizeof (rewritten_storage));
 963       rewritten_storage[sizeof (rewritten_storage) - 1] = '\0';
 964       proxy = rewritten_storage;
 965     }
 966
 967   return proxy;
 968 }
 969
 970 /* Should a host be accessed through proxy, concerning no_proxy?  */
 971 int
 972 no_proxy_match (const char *host, const char **no_proxy)
 973 {
 974   if (!no_proxy)
 975     return 1;
 976   else
 977     return !sufmatch (no_proxy, host);
 978 }