sjero.net Git - wget/blob - src/retr.c

   1 /* File retrieval.
   2    Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001 Free Software Foundation, Inc.
   3
   4 This file is part of GNU Wget.
   5
   6 GNU Wget is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 2 of the License, or (at
   9 your option) any later version.
  10
  11 GNU Wget is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with Wget; if not, write to the Free Software
  18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  19
  20 In addition, as a special exception, the Free Software Foundation
  21 gives permission to link the code of its release of Wget with the
  22 OpenSSL project's "OpenSSL" library (or with modified versions of it
  23 that use the same license as the "OpenSSL" library), and distribute
  24 the linked executables.  You must obey the GNU General Public License
  25 in all respects for all of the code used other than "OpenSSL".  If you
  26 modify this file, you may extend this exception to your version of the
  27 file, but you are not obligated to do so.  If you do not wish to do
  28 so, delete this exception statement from your version.  */
  29
  30 #include <config.h>
  31
  32 #include <stdio.h>
  33 #include <stdlib.h>
  34 #include <sys/types.h>
  35 #ifdef HAVE_UNISTD_H
  36 # include <unistd.h>
  37 #endif /* HAVE_UNISTD_H */
  38 #include <errno.h>
  39 #ifdef HAVE_STRING_H
  40 # include <string.h>
  41 #else
  42 # include <strings.h>
  43 #endif /* HAVE_STRING_H */
  44 #include <assert.h>
  45
  46 #include "wget.h"
  47 #include "utils.h"
  48 #include "retr.h"
  49 #include "progress.h"
  50 #include "url.h"
  51 #include "recur.h"
  52 #include "ftp.h"
  53 #include "host.h"
  54 #include "connect.h"
  55 #include "hash.h"
  56 #include "convert.h"
  57
  58 #ifdef HAVE_SSL
  59 # include "gen_sslfunc.h"       /* for ssl_iread */
  60 #endif
  61
  62 #ifndef errno
  63 extern int errno;
  64 #endif
  65
  66 /* Total size of downloaded files.  Used to enforce quota.  */
  67 LARGE_INT total_downloaded_bytes;
  68
  69 /* If non-NULL, the stream to which output should be written.  This
  70    stream is initialized when `-O' is used.  */
  71 FILE *output_stream;
  72
  73 /* Whether output_document is a regular file we can manipulate,
  74    i.e. not `-' or a device file. */
  75 int output_stream_regular;
  76 \f
  77 static struct {
  78   long chunk_bytes;
  79   double chunk_start;
  80   double sleep_adjust;
  81 } limit_data;
  82
  83 static void
  84 limit_bandwidth_reset (void)
  85 {
  86   limit_data.chunk_bytes = 0;
  87   limit_data.chunk_start = 0;
  88 }
  89
  90 /* Limit the bandwidth by pausing the download for an amount of time.
  91    BYTES is the number of bytes received from the network, and TIMER
  92    is the timer that started at the beginning of download.  */
  93
  94 static void
  95 limit_bandwidth (long bytes, struct wget_timer *timer)
  96 {
  97   double delta_t = wtimer_read (timer) - limit_data.chunk_start;
  98   double expected;
  99
 100   limit_data.chunk_bytes += bytes;
 101
 102   /* Calculate the amount of time we expect downloading the chunk
 103      should take.  If in reality it took less time, sleep to
 104      compensate for the difference.  */
 105   expected = 1000.0 * limit_data.chunk_bytes / opt.limit_rate;
 106
 107   if (expected > delta_t)
 108     {
 109       double slp = expected - delta_t + limit_data.sleep_adjust;
 110       double t0, t1;
 111       if (slp < 200)
 112         {
 113           DEBUGP (("deferring a %.2f ms sleep (%ld/%.2f).\n",
 114                    slp, limit_data.chunk_bytes, delta_t));
 115           return;
 116         }
 117       DEBUGP (("\nsleeping %.2f ms for %ld bytes, adjust %.2f ms\n",
 118                slp, limit_data.chunk_bytes, limit_data.sleep_adjust));
 119
 120       t0 = wtimer_read (timer);
 121       xsleep (slp / 1000);
 122       wtimer_update (timer);
 123       t1 = wtimer_read (timer);
 124
 125       /* Due to scheduling, we probably slept slightly longer (or
 126          shorter) than desired.  Calculate the difference between the
 127          desired and the actual sleep, and adjust the next sleep by
 128          that amount.  */
 129       limit_data.sleep_adjust = slp - (t1 - t0);
 130     }
 131
 132   limit_data.chunk_bytes = 0;
 133   limit_data.chunk_start = wtimer_read (timer);
 134 }
 135
 136 #ifndef MIN
 137 # define MIN(i, j) ((i) <= (j) ? (i) : (j))
 138 #endif
 139
 140 /* Write data in BUF to OUT.  However, if *SKIP is non-zero, skip that
 141    amount of data and decrease SKIP.  Increment *TOTAL by the amount
 142    of data written.  */
 143
 144 static int
 145 write_data (FILE *out, const char *buf, int bufsize, long *skip,
 146             long *transferred)
 147 {
 148   if (!out)
 149     return 1;
 150   if (*skip > bufsize)
 151     {
 152       *skip -= bufsize;
 153       return 1;
 154     }
 155   if (*skip)
 156     {
 157       buf += *skip;
 158       bufsize -= *skip;
 159       *skip = 0;
 160       if (bufsize == 0)
 161         return 1;
 162     }
 163   *transferred += bufsize;
 164   fwrite (buf, 1, bufsize, out);
 165
 166   /* Immediately flush the downloaded data.  This should not hinder
 167      performance: fast downloads will arrive in large 16K chunks
 168      (which stdio would write out immediately anyway), and slow
 169      downloads wouldn't be limited by disk speed.  */
 170   fflush (out);
 171   return !ferror (out);
 172 }
 173
 174 /* Read the contents of file descriptor FD until it the connection
 175    terminates or a read error occurs.  The data is read in portions of
 176    up to 16K and written to OUT as it arrives.  If opt.verbose is set,
 177    the progress is shown.
 178
 179    TOREAD is the amount of data expected to arrive, normally only used
 180    by the progress gauge.
 181
 182    STARTPOS is the position from which the download starts, used by
 183    the progress gauge.  The amount of data read gets stored to
 184    *TRANSFERRED.  The time it took to download the data (in
 185    milliseconds) is stored to *ELAPSED.
 186
 187    The function exits and returns the amount of data read.  In case of
 188    error while reading data, -1 is returned.  In case of error while
 189    writing data, -2 is returned.  */
 190
 191 int
 192 fd_read_body (int fd, FILE *out, long toread, long startpos,
 193               long *transferred, double *elapsed, int flags)
 194 {
 195   int ret = 0;
 196
 197   static char dlbuf[16384];
 198   int dlbufsize = sizeof (dlbuf);
 199
 200   struct wget_timer *timer = NULL;
 201   double last_successful_read_tm = 0;
 202
 203   /* The progress gauge, set according to the user preferences. */
 204   void *progress = NULL;
 205
 206   /* Non-zero if the progress gauge is interactive, i.e. if it can
 207      continually update the display.  When true, smaller timeout
 208      values are used so that the gauge can update the display when
 209      data arrives slowly. */
 210   int progress_interactive = 0;
 211
 212   int exact = flags & rb_read_exactly;
 213   long skip = 0;
 214
 215   /* How much data we've read.  This is used internally and is
 216      unaffected by skipping STARTPOS.  */
 217   long total_read = 0;
 218
 219   *transferred = 0;
 220   if (flags & rb_skip_startpos)
 221     skip = startpos;
 222
 223   if (opt.verbose)
 224     {
 225       /* If we're skipping STARTPOS bytes, hide it from
 226          progress_create because the indicator can't deal with it.  */
 227       progress = progress_create (skip ? 0 : startpos, toread);
 228       progress_interactive = progress_interactive_p (progress);
 229     }
 230
 231   if (opt.limit_rate)
 232     limit_bandwidth_reset ();
 233
 234   /* A timer is needed for tracking progress, for throttling, and for
 235      tracking elapsed time.  If either of these are requested, start
 236      the timer.  */
 237   if (progress || opt.limit_rate || elapsed)
 238     {
 239       timer = wtimer_new ();
 240       last_successful_read_tm = 0;
 241     }
 242
 243   /* Use a smaller buffer for low requested bandwidths.  For example,
 244      with --limit-rate=2k, it doesn't make sense to slurp in 16K of
 245      data and then sleep for 8s.  With buffer size equal to the limit,
 246      we never have to sleep for more than one second.  */
 247   if (opt.limit_rate && opt.limit_rate < dlbufsize)
 248     dlbufsize = opt.limit_rate;
 249
 250   /* Read from FD while there is data to read.  Normally toread==0
 251      means that it is unknown how much data is to arrive.  However, if
 252      EXACT is set, then toread==0 means what it says: that no data
 253      should be read.  */
 254   while (!exact || (total_read < toread))
 255     {
 256       int rdsize = exact ? MIN (toread - total_read, dlbufsize) : dlbufsize;
 257       double tmout = opt.read_timeout;
 258       if (progress_interactive)
 259         {
 260           double waittm;
 261           /* For interactive progress gauges, always specify a ~1s
 262              timeout, so that the gauge can be updated regularly even
 263              when the data arrives very slowly or stalls.  */
 264           tmout = 0.95;
 265           waittm = (wtimer_read (timer) - last_successful_read_tm) / 1000;
 266           if (waittm + tmout > opt.read_timeout)
 267             {
 268               /* Don't allow waiting time to exceed read timeout. */
 269               tmout = opt.read_timeout - waittm;
 270               if (tmout < 0)
 271                 {
 272                   /* We've already exceeded the timeout. */
 273                   ret = -1, errno = ETIMEDOUT;
 274                   break;
 275                 }
 276             }
 277         }
 278       ret = fd_read (fd, dlbuf, rdsize, tmout);
 279
 280       if (ret == 0 || (ret < 0 && errno != ETIMEDOUT))
 281         break;
 282       else if (ret < 0)
 283         ret = 0;                /* timeout */
 284
 285       if (progress || opt.limit_rate)
 286         {
 287           wtimer_update (timer);
 288           if (ret > 0)
 289             last_successful_read_tm = wtimer_read (timer);
 290         }
 291
 292       if (ret > 0)
 293         {
 294           total_read += ret;
 295           if (!write_data (out, dlbuf, ret, &skip, transferred))
 296             {
 297               ret = -2;
 298               goto out;
 299             }
 300         }
 301
 302       if (opt.limit_rate)
 303         limit_bandwidth (ret, timer);
 304
 305       if (progress)
 306         progress_update (progress, ret, wtimer_read (timer));
 307 #ifdef WINDOWS
 308       if (toread > 0)
 309         ws_percenttitle (100.0 *
 310                          (startpos + total_read) / (startpos + toread));
 311 #endif
 312     }
 313   if (ret < -1)
 314     ret = -1;
 315
 316  out:
 317   if (progress)
 318     progress_finish (progress, wtimer_read (timer));
 319   if (elapsed)
 320     *elapsed = wtimer_read (timer);
 321   if (timer)
 322     wtimer_delete (timer);
 323
 324   return ret;
 325 }
 326 \f
 327 /* Read a hunk of data from FD, up until a terminator.  The terminator
 328    is whatever the TERMINATOR function determines it to be; for
 329    example, it can be a line of data, or the head of an HTTP response.
 330    The function returns the data read allocated with malloc.
 331
 332    In case of error, NULL is returned.  In case of EOF and no data
 333    read, NULL is returned and errno set to 0.  In case of EOF with
 334    data having been read, the data is returned, but it will
 335    (obviously) not contain the terminator.
 336
 337    The idea is to be able to read a line of input, or otherwise a hunk
 338    of text, such as the head of an HTTP request, without crossing the
 339    boundary, so that the next call to fd_read etc. reads the data
 340    after the hunk.  To achieve that, this function does the following:
 341
 342    1. Peek at available data.
 343
 344    2. Determine whether the peeked data, along with the previously
 345       read data, includes the terminator.
 346
 347       2a. If yes, read the data until the end of the terminator, and
 348           exit.
 349
 350       2b. If no, read the peeked data and goto 1.
 351
 352    The function is careful to assume as little as possible about the
 353    implementation of peeking.  For example, every peek is followed by
 354    a read.  If the read returns a different amount of data, the
 355    process is retried until all data arrives safely.
 356
 357    BUFSIZE is the size of the initial buffer expected to read all the
 358    data in the typical case.
 359
 360    This function should be used as a building block for other
 361    functions -- see fd_read_line as a simple example.  */
 362
 363 char *
 364 fd_read_hunk (int fd, hunk_terminator_t hunk_terminator, int bufsize)
 365 {
 366   char *hunk = xmalloc (bufsize);
 367   int tail = 0;                 /* tail position in HUNK */
 368
 369   while (1)
 370     {
 371       const char *end;
 372       int pklen, rdlen, remain;
 373
 374       /* First, peek at the available data. */
 375
 376       pklen = fd_peek (fd, hunk + tail, bufsize - 1 - tail, -1);
 377       if (pklen < 0)
 378         {
 379           xfree (hunk);
 380           return NULL;
 381         }
 382       end = hunk_terminator (hunk, tail, pklen);
 383       if (end)
 384         {
 385           /* The data contains the terminator: we'll drain the data up
 386              to the end of the terminator.  */
 387           remain = end - (hunk + tail);
 388           if (remain == 0)
 389             {
 390               /* No more data needs to be read. */
 391               hunk[tail] = '\0';
 392               return hunk;
 393             }
 394           if (bufsize - 1 < tail + remain)
 395             {
 396               bufsize = tail + remain + 1;
 397               hunk = xrealloc (hunk, bufsize);
 398             }
 399         }
 400       else
 401         /* No terminator: simply read the data we know is (or should
 402            be) available.  */
 403         remain = pklen;
 404
 405       /* Now, read the data.  Note that we make no assumptions about
 406          how much data we'll get.  (Some TCP stacks are notorious for
 407          read returning less data than the previous MSG_PEEK.)  */
 408
 409       rdlen = fd_read (fd, hunk + tail, remain, 0);
 410       if (rdlen < 0)
 411         {
 412           xfree_null (hunk);
 413           return NULL;
 414         }
 415       tail += rdlen;
 416       hunk[tail] = '\0';
 417
 418       if (rdlen == 0)
 419         {
 420           if (tail == 0)
 421             {
 422               /* EOF without anything having been read */
 423               xfree (hunk);
 424               errno = 0;
 425               return NULL;
 426             }
 427           else
 428             /* EOF seen: return the data we've read. */
 429             return hunk;
 430         }
 431       if (end && rdlen == remain)
 432         /* The terminator was seen and the remaining data drained --
 433            we got what we came for.  */
 434         return hunk;
 435
 436       /* Keep looping until all the data arrives. */
 437
 438       if (tail == bufsize - 1)
 439         {
 440           bufsize <<= 1;
 441           hunk = xrealloc (hunk, bufsize);
 442         }
 443     }
 444 }
 445
 446 static const char *
 447 line_terminator (const char *hunk, int oldlen, int peeklen)
 448 {
 449   const char *p = memchr (hunk + oldlen, '\n', peeklen);
 450   if (p)
 451     /* p+1 because we want the line to include '\n' */
 452     return p + 1;
 453   return NULL;
 454 }
 455
 456 /* Read one line from FD and return it.  The line is allocated using
 457    malloc.
 458
 459    If an error occurs, or if no data can be read, NULL is returned.
 460    In the former case errno indicates the error condition, and in the
 461    latter case, errno is NULL.  */
 462
 463 char *
 464 fd_read_line (int fd)
 465 {
 466   return fd_read_hunk (fd, line_terminator, 128);
 467 }
 468 \f
 469 /* Return a printed representation of the download rate, as
 470    appropriate for the speed.  If PAD is non-zero, strings will be
 471    padded to the width of 7 characters (xxxx.xx).  */
 472 char *
 473 retr_rate (long bytes, double msecs, int pad)
 474 {
 475   static char res[20];
 476   static char *rate_names[] = {"B/s", "KB/s", "MB/s", "GB/s" };
 477   int units = 0;
 478
 479   double dlrate = calc_rate (bytes, msecs, &units);
 480   sprintf (res, pad ? "%7.2f %s" : "%.2f %s", dlrate, rate_names[units]);
 481
 482   return res;
 483 }
 484
 485 /* Calculate the download rate and trim it as appropriate for the
 486    speed.  Appropriate means that if rate is greater than 1K/s,
 487    kilobytes are used, and if rate is greater than 1MB/s, megabytes
 488    are used.
 489
 490    UNITS is zero for B/s, one for KB/s, two for MB/s, and three for
 491    GB/s.  */
 492 double
 493 calc_rate (long bytes, double msecs, int *units)
 494 {
 495   double dlrate;
 496
 497   assert (msecs >= 0);
 498   assert (bytes >= 0);
 499
 500   if (msecs == 0)
 501     /* If elapsed time is exactly zero, it means we're under the
 502        granularity of the timer.  This often happens on systems that
 503        use time() for the timer.  */
 504     msecs = wtimer_granularity ();
 505
 506   dlrate = (double)1000 * bytes / msecs;
 507   if (dlrate < 1024.0)
 508     *units = 0;
 509   else if (dlrate < 1024.0 * 1024.0)
 510     *units = 1, dlrate /= 1024.0;
 511   else if (dlrate < 1024.0 * 1024.0 * 1024.0)
 512     *units = 2, dlrate /= (1024.0 * 1024.0);
 513   else
 514     /* Maybe someone will need this, one day. */
 515     *units = 3, dlrate /= (1024.0 * 1024.0 * 1024.0);
 516
 517   return dlrate;
 518 }
 519 \f
 520 /* Maximum number of allowed redirections.  20 was chosen as a
 521    "reasonable" value, which is low enough to not cause havoc, yet
 522    high enough to guarantee that normal retrievals will not be hurt by
 523    the check.  */
 524
 525 #define MAX_REDIRECTIONS 20
 526
 527 #define SUSPEND_POST_DATA do {                  \
 528   post_data_suspended = 1;                      \
 529   saved_post_data = opt.post_data;              \
 530   saved_post_file_name = opt.post_file_name;    \
 531   opt.post_data = NULL;                         \
 532   opt.post_file_name = NULL;                    \
 533 } while (0)
 534
 535 #define RESTORE_POST_DATA do {                          \
 536   if (post_data_suspended)                              \
 537     {                                                   \
 538       opt.post_data = saved_post_data;                  \
 539       opt.post_file_name = saved_post_file_name;        \
 540       post_data_suspended = 0;                          \
 541     }                                                   \
 542 } while (0)
 543
 544 static char *getproxy PARAMS ((struct url *));
 545
 546 /* Retrieve the given URL.  Decides which loop to call -- HTTP, FTP,
 547    FTP, proxy, etc.  */
 548
 549 /* #### This function should be rewritten so it doesn't return from
 550    multiple points. */
 551
 552 uerr_t
 553 retrieve_url (const char *origurl, char **file, char **newloc,
 554               const char *refurl, int *dt)
 555 {
 556   uerr_t result;
 557   char *url;
 558   int location_changed, dummy;
 559   char *mynewloc, *proxy;
 560   struct url *u, *proxy_url;
 561   int up_error_code;            /* url parse error code */
 562   char *local_file;
 563   int redirection_count = 0;
 564
 565   int post_data_suspended = 0;
 566   char *saved_post_data = NULL;
 567   char *saved_post_file_name = NULL;
 568
 569   /* If dt is NULL, use local storage.  */
 570   if (!dt)
 571     {
 572       dt = &dummy;
 573       dummy = 0;
 574     }
 575   url = xstrdup (origurl);
 576   if (newloc)
 577     *newloc = NULL;
 578   if (file)
 579     *file = NULL;
 580
 581   u = url_parse (url, &up_error_code);
 582   if (!u)
 583     {
 584       logprintf (LOG_NOTQUIET, "%s: %s.\n", url, url_error (up_error_code));
 585       xfree (url);
 586       return URLERROR;
 587     }
 588
 589   if (!refurl)
 590     refurl = opt.referer;
 591
 592  redirected:
 593
 594   result = NOCONERROR;
 595   mynewloc = NULL;
 596   local_file = NULL;
 597   proxy_url = NULL;
 598
 599   proxy = getproxy (u);
 600   if (proxy)
 601     {
 602       /* Parse the proxy URL.  */
 603       proxy_url = url_parse (proxy, &up_error_code);
 604       if (!proxy_url)
 605         {
 606           logprintf (LOG_NOTQUIET, _("Error parsing proxy URL %s: %s.\n"),
 607                      proxy, url_error (up_error_code));
 608           xfree (url);
 609           RESTORE_POST_DATA;
 610           return PROXERR;
 611         }
 612       if (proxy_url->scheme != SCHEME_HTTP && proxy_url->scheme != u->scheme)
 613         {
 614           logprintf (LOG_NOTQUIET, _("Error in proxy URL %s: Must be HTTP.\n"), proxy);
 615           url_free (proxy_url);
 616           xfree (url);
 617           RESTORE_POST_DATA;
 618           return PROXERR;
 619         }
 620     }
 621
 622   if (u->scheme == SCHEME_HTTP
 623 #ifdef HAVE_SSL
 624       || u->scheme == SCHEME_HTTPS
 625 #endif
 626       || (proxy_url && proxy_url->scheme == SCHEME_HTTP))
 627     {
 628       result = http_loop (u, &mynewloc, &local_file, refurl, dt, proxy_url);
 629     }
 630   else if (u->scheme == SCHEME_FTP)
 631     {
 632       /* If this is a redirection, we must not allow recursive FTP
 633          retrieval, so we save recursion to oldrec, and restore it
 634          later.  */
 635       int oldrec = opt.recursive;
 636       if (redirection_count)
 637         opt.recursive = 0;
 638       result = ftp_loop (u, dt, proxy_url);
 639       opt.recursive = oldrec;
 640
 641       /* There is a possibility of having HTTP being redirected to
 642          FTP.  In these cases we must decide whether the text is HTML
 643          according to the suffix.  The HTML suffixes are `.html',
 644          `.htm' and a few others, case-insensitive.  */
 645       if (redirection_count && local_file && u->scheme == SCHEME_FTP)
 646         {
 647           if (has_html_suffix_p (local_file))
 648             *dt |= TEXTHTML;
 649         }
 650     }
 651
 652   if (proxy_url)
 653     {
 654       url_free (proxy_url);
 655       proxy_url = NULL;
 656     }
 657
 658   location_changed = (result == NEWLOCATION);
 659   if (location_changed)
 660     {
 661       char *construced_newloc;
 662       struct url *newloc_parsed;
 663
 664       assert (mynewloc != NULL);
 665
 666       if (local_file)
 667         xfree (local_file);
 668
 669       /* The HTTP specs only allow absolute URLs to appear in
 670          redirects, but a ton of boneheaded webservers and CGIs out
 671          there break the rules and use relative URLs, and popular
 672          browsers are lenient about this, so wget should be too. */
 673       construced_newloc = uri_merge (url, mynewloc);
 674       xfree (mynewloc);
 675       mynewloc = construced_newloc;
 676
 677       /* Now, see if this new location makes sense. */
 678       newloc_parsed = url_parse (mynewloc, &up_error_code);
 679       if (!newloc_parsed)
 680         {
 681           logprintf (LOG_NOTQUIET, "%s: %s.\n", mynewloc,
 682                      url_error (up_error_code));
 683           url_free (u);
 684           xfree (url);
 685           xfree (mynewloc);
 686           RESTORE_POST_DATA;
 687           return result;
 688         }
 689
 690       /* Now mynewloc will become newloc_parsed->url, because if the
 691          Location contained relative paths like .././something, we
 692          don't want that propagating as url.  */
 693       xfree (mynewloc);
 694       mynewloc = xstrdup (newloc_parsed->url);
 695
 696       /* Check for max. number of redirections.  */
 697       if (++redirection_count > MAX_REDIRECTIONS)
 698         {
 699           logprintf (LOG_NOTQUIET, _("%d redirections exceeded.\n"),
 700                      MAX_REDIRECTIONS);
 701           url_free (newloc_parsed);
 702           url_free (u);
 703           xfree (url);
 704           xfree (mynewloc);
 705           RESTORE_POST_DATA;
 706           return WRONGCODE;
 707         }
 708
 709       xfree (url);
 710       url = mynewloc;
 711       url_free (u);
 712       u = newloc_parsed;
 713
 714       /* If we're being redirected from POST, we don't want to POST
 715          again.  Many requests answer POST with a redirection to an
 716          index page; that redirection is clearly a GET.  We "suspend"
 717          POST data for the duration of the redirections, and restore
 718          it when we're done. */
 719       if (!post_data_suspended)
 720         SUSPEND_POST_DATA;
 721
 722       goto redirected;
 723     }
 724
 725   if (local_file)
 726     {
 727       if (*dt & RETROKF)
 728         {
 729           register_download (u->url, local_file);
 730           if (redirection_count && 0 != strcmp (origurl, u->url))
 731             register_redirection (origurl, u->url);
 732           if (*dt & TEXTHTML)
 733             register_html (u->url, local_file);
 734         }
 735     }
 736
 737   if (file)
 738     *file = local_file ? local_file : NULL;
 739   else
 740     xfree_null (local_file);
 741
 742   url_free (u);
 743
 744   if (redirection_count)
 745     {
 746       if (newloc)
 747         *newloc = url;
 748       else
 749         xfree (url);
 750     }
 751   else
 752     {
 753       if (newloc)
 754         *newloc = NULL;
 755       xfree (url);
 756     }
 757
 758   RESTORE_POST_DATA;
 759
 760   return result;
 761 }
 762
 763 /* Find the URLs in the file and call retrieve_url() for each of
 764    them.  If HTML is non-zero, treat the file as HTML, and construct
 765    the URLs accordingly.
 766
 767    If opt.recursive is set, call retrieve_tree() for each file.  */
 768
 769 uerr_t
 770 retrieve_from_file (const char *file, int html, int *count)
 771 {
 772   uerr_t status;
 773   struct urlpos *url_list, *cur_url;
 774
 775   url_list = (html ? get_urls_html (file, NULL, NULL)
 776               : get_urls_file (file));
 777   status = RETROK;             /* Suppose everything is OK.  */
 778   *count = 0;                  /* Reset the URL count.  */
 779
 780   for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count)
 781     {
 782       char *filename = NULL, *new_file = NULL;
 783       int dt;
 784
 785       if (cur_url->ignore_when_downloading)
 786         continue;
 787
 788       if (opt.quota && total_downloaded_bytes > opt.quota)
 789         {
 790           status = QUOTEXC;
 791           break;
 792         }
 793       if ((opt.recursive || opt.page_requisites)
 794           && cur_url->url->scheme != SCHEME_FTP)
 795         status = retrieve_tree (cur_url->url->url);
 796       else
 797         status = retrieve_url (cur_url->url->url, &filename, &new_file, NULL, &dt);
 798
 799       if (filename && opt.delete_after && file_exists_p (filename))
 800         {
 801           DEBUGP (("Removing file due to --delete-after in"
 802                    " retrieve_from_file():\n"));
 803           logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
 804           if (unlink (filename))
 805             logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
 806           dt &= ~RETROKF;
 807         }
 808
 809       xfree_null (new_file);
 810       xfree_null (filename);
 811     }
 812
 813   /* Free the linked list of URL-s.  */
 814   free_urlpos (url_list);
 815
 816   return status;
 817 }
 818
 819 /* Print `giving up', or `retrying', depending on the impending
 820    action.  N1 and N2 are the attempt number and the attempt limit.  */
 821 void
 822 printwhat (int n1, int n2)
 823 {
 824   logputs (LOG_VERBOSE, (n1 == n2) ? _("Giving up.\n\n") : _("Retrying.\n\n"));
 825 }
 826
 827 /* If opt.wait or opt.waitretry are specified, and if certain
 828    conditions are met, sleep the appropriate number of seconds.  See
 829    the documentation of --wait and --waitretry for more information.
 830
 831    COUNT is the count of current retrieval, beginning with 1. */
 832
 833 void
 834 sleep_between_retrievals (int count)
 835 {
 836   static int first_retrieval = 1;
 837
 838   if (first_retrieval)
 839     {
 840       /* Don't sleep before the very first retrieval. */
 841       first_retrieval = 0;
 842       return;
 843     }
 844
 845   if (opt.waitretry && count > 1)
 846     {
 847       /* If opt.waitretry is specified and this is a retry, wait for
 848          COUNT-1 number of seconds, or for opt.waitretry seconds.  */
 849       if (count <= opt.waitretry)
 850         xsleep (count - 1);
 851       else
 852         xsleep (opt.waitretry);
 853     }
 854   else if (opt.wait)
 855     {
 856       if (!opt.random_wait || count > 1)
 857         /* If random-wait is not specified, or if we are sleeping
 858            between retries of the same download, sleep the fixed
 859            interval.  */
 860         xsleep (opt.wait);
 861       else
 862         {
 863           /* Sleep a random amount of time averaging in opt.wait
 864              seconds.  The sleeping amount ranges from 0 to
 865              opt.wait*2, inclusive.  */
 866           double waitsecs = 2 * opt.wait * random_float ();
 867           DEBUGP (("sleep_between_retrievals: avg=%f,sleep=%f\n",
 868                    opt.wait, waitsecs));
 869           xsleep (waitsecs);
 870         }
 871     }
 872 }
 873
 874 /* Free the linked list of urlpos.  */
 875 void
 876 free_urlpos (struct urlpos *l)
 877 {
 878   while (l)
 879     {
 880       struct urlpos *next = l->next;
 881       if (l->url)
 882         url_free (l->url);
 883       xfree_null (l->local_name);
 884       xfree (l);
 885       l = next;
 886     }
 887 }
 888
 889 /* Rotate FNAME opt.backups times */
 890 void
 891 rotate_backups(const char *fname)
 892 {
 893   int maxlen = strlen (fname) + 1 + numdigit (opt.backups) + 1;
 894   char *from = (char *)alloca (maxlen);
 895   char *to = (char *)alloca (maxlen);
 896   struct stat sb;
 897   int i;
 898
 899   if (stat (fname, &sb) == 0)
 900     if (S_ISREG (sb.st_mode) == 0)
 901       return;
 902
 903   for (i = opt.backups; i > 1; i--)
 904     {
 905       sprintf (from, "%s.%d", fname, i - 1);
 906       sprintf (to, "%s.%d", fname, i);
 907       rename (from, to);
 908     }
 909
 910   sprintf (to, "%s.%d", fname, 1);
 911   rename(fname, to);
 912 }
 913
 914 static int no_proxy_match PARAMS ((const char *, const char **));
 915
 916 /* Return the URL of the proxy appropriate for url U.  */
 917
 918 static char *
 919 getproxy (struct url *u)
 920 {
 921   char *proxy = NULL;
 922   char *rewritten_url;
 923   static char rewritten_storage[1024];
 924
 925   if (!opt.use_proxy)
 926     return NULL;
 927   if (!no_proxy_match (u->host, (const char **)opt.no_proxy))
 928     return NULL;
 929
 930   switch (u->scheme)
 931     {
 932     case SCHEME_HTTP:
 933       proxy = opt.http_proxy ? opt.http_proxy : getenv ("http_proxy");
 934       break;
 935 #ifdef HAVE_SSL
 936     case SCHEME_HTTPS:
 937       proxy = opt.https_proxy ? opt.https_proxy : getenv ("https_proxy");
 938       break;
 939 #endif
 940     case SCHEME_FTP:
 941       proxy = opt.ftp_proxy ? opt.ftp_proxy : getenv ("ftp_proxy");
 942       break;
 943     case SCHEME_INVALID:
 944       break;
 945     }
 946   if (!proxy || !*proxy)
 947     return NULL;
 948
 949   /* Handle shorthands.  `rewritten_storage' is a kludge to allow
 950      getproxy() to return static storage. */
 951   rewritten_url = rewrite_shorthand_url (proxy);
 952   if (rewritten_url)
 953     {
 954       strncpy (rewritten_storage, rewritten_url, sizeof (rewritten_storage));
 955       rewritten_storage[sizeof (rewritten_storage) - 1] = '\0';
 956       proxy = rewritten_storage;
 957     }
 958
 959   return proxy;
 960 }
 961
 962 /* Should a host be accessed through proxy, concerning no_proxy?  */
 963 int
 964 no_proxy_match (const char *host, const char **no_proxy)
 965 {
 966   if (!no_proxy)
 967     return 1;
 968   else
 969     return !sufmatch (no_proxy, host);
 970 }