sjero.net Git - wget/blob - src/retr.c

   1 /* File retrieval.
   2    Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001 Free Software Foundation, Inc.
   3
   4 This file is part of GNU Wget.
   5
   6 GNU Wget is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 2 of the License, or (at
   9 your option) any later version.
  10
  11 GNU Wget is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with Wget; if not, write to the Free Software
  18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  19
  20 In addition, as a special exception, the Free Software Foundation
  21 gives permission to link the code of its release of Wget with the
  22 OpenSSL project's "OpenSSL" library (or with modified versions of it
  23 that use the same license as the "OpenSSL" library), and distribute
  24 the linked executables.  You must obey the GNU General Public License
  25 in all respects for all of the code used other than "OpenSSL".  If you
  26 modify this file, you may extend this exception to your version of the
  27 file, but you are not obligated to do so.  If you do not wish to do
  28 so, delete this exception statement from your version.  */
  29
  30 #include <config.h>
  31
  32 #include <stdio.h>
  33 #include <stdlib.h>
  34 #include <sys/types.h>
  35 #ifdef HAVE_UNISTD_H
  36 # include <unistd.h>
  37 #endif /* HAVE_UNISTD_H */
  38 #include <errno.h>
  39 #ifdef HAVE_STRING_H
  40 # include <string.h>
  41 #else
  42 # include <strings.h>
  43 #endif /* HAVE_STRING_H */
  44 #include <assert.h>
  45
  46 #include "wget.h"
  47 #include "utils.h"
  48 #include "retr.h"
  49 #include "progress.h"
  50 #include "url.h"
  51 #include "recur.h"
  52 #include "ftp.h"
  53 #include "host.h"
  54 #include "connect.h"
  55 #include "hash.h"
  56 #include "convert.h"
  57
  58 #ifdef HAVE_SSL
  59 # include "gen_sslfunc.h"       /* for ssl_iread */
  60 #endif
  61
  62 #ifndef errno
  63 extern int errno;
  64 #endif
  65
  66 /* Total size of downloaded files.  Used to enforce quota.  */
  67 LARGE_INT total_downloaded_bytes;
  68
  69 /* If non-NULL, the stream to which output should be written.  This
  70    stream is initialized when `-O' is used.  */
  71 FILE *output_stream;
  72
  73 /* Whether output_document is a regular file we can manipulate,
  74    i.e. not `-' or a device file. */
  75 int output_stream_regular;
  76 \f
  77 static struct {
  78   long chunk_bytes;
  79   double chunk_start;
  80   double sleep_adjust;
  81 } limit_data;
  82
  83 static void
  84 limit_bandwidth_reset (void)
  85 {
  86   limit_data.chunk_bytes = 0;
  87   limit_data.chunk_start = 0;
  88 }
  89
  90 /* Limit the bandwidth by pausing the download for an amount of time.
  91    BYTES is the number of bytes received from the network, and TIMER
  92    is the timer that started at the beginning of download.  */
  93
  94 static void
  95 limit_bandwidth (long bytes, struct wget_timer *timer)
  96 {
  97   double delta_t = wtimer_read (timer) - limit_data.chunk_start;
  98   double expected;
  99
 100   limit_data.chunk_bytes += bytes;
 101
 102   /* Calculate the amount of time we expect downloading the chunk
 103      should take.  If in reality it took less time, sleep to
 104      compensate for the difference.  */
 105   expected = 1000.0 * limit_data.chunk_bytes / opt.limit_rate;
 106
 107   if (expected > delta_t)
 108     {
 109       double slp = expected - delta_t + limit_data.sleep_adjust;
 110       double t0, t1;
 111       if (slp < 200)
 112         {
 113           DEBUGP (("deferring a %.2f ms sleep (%ld/%.2f).\n",
 114                    slp, limit_data.chunk_bytes, delta_t));
 115           return;
 116         }
 117       DEBUGP (("\nsleeping %.2f ms for %ld bytes, adjust %.2f ms\n",
 118                slp, limit_data.chunk_bytes, limit_data.sleep_adjust));
 119
 120       t0 = wtimer_read (timer);
 121       xsleep (slp / 1000);
 122       wtimer_update (timer);
 123       t1 = wtimer_read (timer);
 124
 125       /* Due to scheduling, we probably slept slightly longer (or
 126          shorter) than desired.  Calculate the difference between the
 127          desired and the actual sleep, and adjust the next sleep by
 128          that amount.  */
 129       limit_data.sleep_adjust = slp - (t1 - t0);
 130     }
 131
 132   limit_data.chunk_bytes = 0;
 133   limit_data.chunk_start = wtimer_read (timer);
 134 }
 135
 136 #ifndef MIN
 137 # define MIN(i, j) ((i) <= (j) ? (i) : (j))
 138 #endif
 139
 140 /* Write data in BUF to OUT.  However, if *SKIP is non-zero, skip that
 141    amount of data and decrease SKIP.  Increment *TOTAL by the amount
 142    of data written.  */
 143
 144 static int
 145 write_data (FILE *out, const char *buf, int bufsize, long *skip,
 146             long *written)
 147 {
 148   if (!out)
 149     return 1;
 150   if (*skip > bufsize)
 151     {
 152       *skip -= bufsize;
 153       return 1;
 154     }
 155   if (*skip)
 156     {
 157       buf += *skip;
 158       bufsize -= *skip;
 159       *skip = 0;
 160       if (bufsize == 0)
 161         return 1;
 162     }
 163
 164   fwrite (buf, 1, bufsize, out);
 165   *written += bufsize;
 166
 167   /* Immediately flush the downloaded data.  This should not hinder
 168      performance: fast downloads will arrive in large 16K chunks
 169      (which stdio would write out immediately anyway), and slow
 170      downloads wouldn't be limited by disk speed.  */
 171   fflush (out);
 172   return !ferror (out);
 173 }
 174
 175 /* Read the contents of file descriptor FD until it the connection
 176    terminates or a read error occurs.  The data is read in portions of
 177    up to 16K and written to OUT as it arrives.  If opt.verbose is set,
 178    the progress is shown.
 179
 180    TOREAD is the amount of data expected to arrive, normally only used
 181    by the progress gauge.
 182
 183    STARTPOS is the position from which the download starts, used by
 184    the progress gauge.  If QTYREAD is non-NULL, the value it points to
 185    is incremented by the amount of data read from the network.  If
 186    QTYWRITTEN is non-NULL, the value it points to is incremented by
 187    the amount of data written to disk.  The time it took to download
 188    the data (in milliseconds) is stored to ELAPSED.
 189
 190    The function exits and returns the amount of data read.  In case of
 191    error while reading data, -1 is returned.  In case of error while
 192    writing data, -2 is returned.  */
 193
 194 int
 195 fd_read_body (int fd, FILE *out, long toread, long startpos,
 196               long *qtyread, long *qtywritten, double *elapsed, int flags)
 197 {
 198   int ret = 0;
 199
 200   static char dlbuf[16384];
 201   int dlbufsize = sizeof (dlbuf);
 202
 203   struct wget_timer *timer = NULL;
 204   double last_successful_read_tm = 0;
 205
 206   /* The progress gauge, set according to the user preferences. */
 207   void *progress = NULL;
 208
 209   /* Non-zero if the progress gauge is interactive, i.e. if it can
 210      continually update the display.  When true, smaller timeout
 211      values are used so that the gauge can update the display when
 212      data arrives slowly. */
 213   int progress_interactive = 0;
 214
 215   int exact = flags & rb_read_exactly;
 216   long skip = 0;
 217
 218   /* How much data we've read/written.  */
 219   long sum_read = 0;
 220   long sum_written = 0;
 221
 222   if (flags & rb_skip_startpos)
 223     skip = startpos;
 224
 225   if (opt.verbose)
 226     {
 227       /* If we're skipping STARTPOS bytes, pass 0 as the INITIAL
 228          argument to progress_create because the indicator doesn't
 229          (yet) know about "skipping" data.  */
 230       progress = progress_create (skip ? 0 : startpos, startpos + toread);
 231       progress_interactive = progress_interactive_p (progress);
 232     }
 233
 234   if (opt.limit_rate)
 235     limit_bandwidth_reset ();
 236
 237   /* A timer is needed for tracking progress, for throttling, and for
 238      tracking elapsed time.  If either of these are requested, start
 239      the timer.  */
 240   if (progress || opt.limit_rate || elapsed)
 241     {
 242       timer = wtimer_new ();
 243       last_successful_read_tm = 0;
 244     }
 245
 246   /* Use a smaller buffer for low requested bandwidths.  For example,
 247      with --limit-rate=2k, it doesn't make sense to slurp in 16K of
 248      data and then sleep for 8s.  With buffer size equal to the limit,
 249      we never have to sleep for more than one second.  */
 250   if (opt.limit_rate && opt.limit_rate < dlbufsize)
 251     dlbufsize = opt.limit_rate;
 252
 253   /* Read from FD while there is data to read.  Normally toread==0
 254      means that it is unknown how much data is to arrive.  However, if
 255      EXACT is set, then toread==0 means what it says: that no data
 256      should be read.  */
 257   while (!exact || (sum_read < toread))
 258     {
 259       int rdsize = exact ? MIN (toread - sum_read, dlbufsize) : dlbufsize;
 260       double tmout = opt.read_timeout;
 261       if (progress_interactive)
 262         {
 263           /* For interactive progress gauges, always specify a ~1s
 264              timeout, so that the gauge can be updated regularly even
 265              when the data arrives very slowly or stalls.  */
 266           tmout = 0.95;
 267           if (opt.read_timeout)
 268             {
 269               double waittm;
 270               waittm = (wtimer_read (timer) - last_successful_read_tm) / 1000;
 271               if (waittm + tmout > opt.read_timeout)
 272                 {
 273                   /* Don't let total idle time exceed read timeout. */
 274                   tmout = opt.read_timeout - waittm;
 275                   if (tmout < 0)
 276                     {
 277                       /* We've already exceeded the timeout. */
 278                       ret = -1, errno = ETIMEDOUT;
 279                       break;
 280                     }
 281                 }
 282             }
 283         }
 284       ret = fd_read (fd, dlbuf, rdsize, tmout);
 285
 286       if (ret == 0 || (ret < 0 && errno != ETIMEDOUT))
 287         break;                  /* read error */
 288       else if (ret < 0)
 289         ret = 0;                /* read timeout */
 290
 291       if (progress || opt.limit_rate)
 292         {
 293           wtimer_update (timer);
 294           if (ret > 0)
 295             last_successful_read_tm = wtimer_read (timer);
 296         }
 297
 298       if (ret > 0)
 299         {
 300           sum_read += ret;
 301           if (!write_data (out, dlbuf, ret, &skip, &sum_written))
 302             {
 303               ret = -2;
 304               goto out;
 305             }
 306         }
 307
 308       if (opt.limit_rate)
 309         limit_bandwidth (ret, timer);
 310
 311       if (progress)
 312         progress_update (progress, ret, wtimer_read (timer));
 313 #ifdef WINDOWS
 314       if (toread > 0)
 315         ws_percenttitle (100.0 *
 316                          (startpos + sum_read) / (startpos + toread));
 317 #endif
 318     }
 319   if (ret < -1)
 320     ret = -1;
 321
 322  out:
 323   if (progress)
 324     progress_finish (progress, wtimer_read (timer));
 325
 326   if (elapsed)
 327     *elapsed = wtimer_read (timer);
 328   if (timer)
 329     wtimer_delete (timer);
 330
 331   if (qtyread)
 332     *qtyread += sum_read;
 333   if (qtywritten)
 334     *qtywritten += sum_written;
 335
 336   return ret;
 337 }
 338 \f
 339 /* Read a hunk of data from FD, up until a terminator.  The terminator
 340    is whatever the TERMINATOR function determines it to be; for
 341    example, it can be a line of data, or the head of an HTTP response.
 342    The function returns the data read allocated with malloc.
 343
 344    In case of error, NULL is returned.  In case of EOF and no data
 345    read, NULL is returned and errno set to 0.  In case of EOF with
 346    data having been read, the data is returned, but it will
 347    (obviously) not contain the terminator.
 348
 349    The idea is to be able to read a line of input, or otherwise a hunk
 350    of text, such as the head of an HTTP request, without crossing the
 351    boundary, so that the next call to fd_read etc. reads the data
 352    after the hunk.  To achieve that, this function does the following:
 353
 354    1. Peek at available data.
 355
 356    2. Determine whether the peeked data, along with the previously
 357       read data, includes the terminator.
 358
 359       2a. If yes, read the data until the end of the terminator, and
 360           exit.
 361
 362       2b. If no, read the peeked data and goto 1.
 363
 364    The function is careful to assume as little as possible about the
 365    implementation of peeking.  For example, every peek is followed by
 366    a read.  If the read returns a different amount of data, the
 367    process is retried until all data arrives safely.
 368
 369    BUFSIZE is the size of the initial buffer expected to read all the
 370    data in the typical case.
 371
 372    This function should be used as a building block for other
 373    functions -- see fd_read_line as a simple example.  */
 374
 375 char *
 376 fd_read_hunk (int fd, hunk_terminator_t hunk_terminator, int bufsize)
 377 {
 378   char *hunk = xmalloc (bufsize);
 379   int tail = 0;                 /* tail position in HUNK */
 380
 381   while (1)
 382     {
 383       const char *end;
 384       int pklen, rdlen, remain;
 385
 386       /* First, peek at the available data. */
 387
 388       pklen = fd_peek (fd, hunk + tail, bufsize - 1 - tail, -1);
 389       if (pklen < 0)
 390         {
 391           xfree (hunk);
 392           return NULL;
 393         }
 394       end = hunk_terminator (hunk, tail, pklen);
 395       if (end)
 396         {
 397           /* The data contains the terminator: we'll drain the data up
 398              to the end of the terminator.  */
 399           remain = end - (hunk + tail);
 400           if (remain == 0)
 401             {
 402               /* No more data needs to be read. */
 403               hunk[tail] = '\0';
 404               return hunk;
 405             }
 406           if (bufsize - 1 < tail + remain)
 407             {
 408               bufsize = tail + remain + 1;
 409               hunk = xrealloc (hunk, bufsize);
 410             }
 411         }
 412       else
 413         /* No terminator: simply read the data we know is (or should
 414            be) available.  */
 415         remain = pklen;
 416
 417       /* Now, read the data.  Note that we make no assumptions about
 418          how much data we'll get.  (Some TCP stacks are notorious for
 419          read returning less data than the previous MSG_PEEK.)  */
 420
 421       rdlen = fd_read (fd, hunk + tail, remain, 0);
 422       if (rdlen < 0)
 423         {
 424           xfree_null (hunk);
 425           return NULL;
 426         }
 427       tail += rdlen;
 428       hunk[tail] = '\0';
 429
 430       if (rdlen == 0)
 431         {
 432           if (tail == 0)
 433             {
 434               /* EOF without anything having been read */
 435               xfree (hunk);
 436               errno = 0;
 437               return NULL;
 438             }
 439           else
 440             /* EOF seen: return the data we've read. */
 441             return hunk;
 442         }
 443       if (end && rdlen == remain)
 444         /* The terminator was seen and the remaining data drained --
 445            we got what we came for.  */
 446         return hunk;
 447
 448       /* Keep looping until all the data arrives. */
 449
 450       if (tail == bufsize - 1)
 451         {
 452           bufsize <<= 1;
 453           hunk = xrealloc (hunk, bufsize);
 454         }
 455     }
 456 }
 457
 458 static const char *
 459 line_terminator (const char *hunk, int oldlen, int peeklen)
 460 {
 461   const char *p = memchr (hunk + oldlen, '\n', peeklen);
 462   if (p)
 463     /* p+1 because we want the line to include '\n' */
 464     return p + 1;
 465   return NULL;
 466 }
 467
 468 /* Read one line from FD and return it.  The line is allocated using
 469    malloc.
 470
 471    If an error occurs, or if no data can be read, NULL is returned.
 472    In the former case errno indicates the error condition, and in the
 473    latter case, errno is NULL.  */
 474
 475 char *
 476 fd_read_line (int fd)
 477 {
 478   return fd_read_hunk (fd, line_terminator, 128);
 479 }
 480 \f
 481 /* Return a printed representation of the download rate, as
 482    appropriate for the speed.  If PAD is non-zero, strings will be
 483    padded to the width of 7 characters (xxxx.xx).  */
 484 char *
 485 retr_rate (long bytes, double msecs, int pad)
 486 {
 487   static char res[20];
 488   static const char *rate_names[] = {"B/s", "KB/s", "MB/s", "GB/s" };
 489   int units = 0;
 490
 491   double dlrate = calc_rate (bytes, msecs, &units);
 492   sprintf (res, pad ? "%7.2f %s" : "%.2f %s", dlrate, rate_names[units]);
 493
 494   return res;
 495 }
 496
 497 /* Calculate the download rate and trim it as appropriate for the
 498    speed.  Appropriate means that if rate is greater than 1K/s,
 499    kilobytes are used, and if rate is greater than 1MB/s, megabytes
 500    are used.
 501
 502    UNITS is zero for B/s, one for KB/s, two for MB/s, and three for
 503    GB/s.  */
 504 double
 505 calc_rate (long bytes, double msecs, int *units)
 506 {
 507   double dlrate;
 508
 509   assert (msecs >= 0);
 510   assert (bytes >= 0);
 511
 512   if (msecs == 0)
 513     /* If elapsed time is exactly zero, it means we're under the
 514        granularity of the timer.  This often happens on systems that
 515        use time() for the timer.  */
 516     msecs = wtimer_granularity ();
 517
 518   dlrate = (double)1000 * bytes / msecs;
 519   if (dlrate < 1024.0)
 520     *units = 0;
 521   else if (dlrate < 1024.0 * 1024.0)
 522     *units = 1, dlrate /= 1024.0;
 523   else if (dlrate < 1024.0 * 1024.0 * 1024.0)
 524     *units = 2, dlrate /= (1024.0 * 1024.0);
 525   else
 526     /* Maybe someone will need this, one day. */
 527     *units = 3, dlrate /= (1024.0 * 1024.0 * 1024.0);
 528
 529   return dlrate;
 530 }
 531 \f
 532 /* Maximum number of allowed redirections.  20 was chosen as a
 533    "reasonable" value, which is low enough to not cause havoc, yet
 534    high enough to guarantee that normal retrievals will not be hurt by
 535    the check.  */
 536
 537 #define MAX_REDIRECTIONS 20
 538
 539 #define SUSPEND_POST_DATA do {                  \
 540   post_data_suspended = 1;                      \
 541   saved_post_data = opt.post_data;              \
 542   saved_post_file_name = opt.post_file_name;    \
 543   opt.post_data = NULL;                         \
 544   opt.post_file_name = NULL;                    \
 545 } while (0)
 546
 547 #define RESTORE_POST_DATA do {                          \
 548   if (post_data_suspended)                              \
 549     {                                                   \
 550       opt.post_data = saved_post_data;                  \
 551       opt.post_file_name = saved_post_file_name;        \
 552       post_data_suspended = 0;                          \
 553     }                                                   \
 554 } while (0)
 555
 556 static char *getproxy PARAMS ((struct url *));
 557
 558 /* Retrieve the given URL.  Decides which loop to call -- HTTP, FTP,
 559    FTP, proxy, etc.  */
 560
 561 /* #### This function should be rewritten so it doesn't return from
 562    multiple points. */
 563
 564 uerr_t
 565 retrieve_url (const char *origurl, char **file, char **newloc,
 566               const char *refurl, int *dt)
 567 {
 568   uerr_t result;
 569   char *url;
 570   int location_changed, dummy;
 571   char *mynewloc, *proxy;
 572   struct url *u, *proxy_url;
 573   int up_error_code;            /* url parse error code */
 574   char *local_file;
 575   int redirection_count = 0;
 576
 577   int post_data_suspended = 0;
 578   char *saved_post_data = NULL;
 579   char *saved_post_file_name = NULL;
 580
 581   /* If dt is NULL, use local storage.  */
 582   if (!dt)
 583     {
 584       dt = &dummy;
 585       dummy = 0;
 586     }
 587   url = xstrdup (origurl);
 588   if (newloc)
 589     *newloc = NULL;
 590   if (file)
 591     *file = NULL;
 592
 593   u = url_parse (url, &up_error_code);
 594   if (!u)
 595     {
 596       logprintf (LOG_NOTQUIET, "%s: %s.\n", url, url_error (up_error_code));
 597       xfree (url);
 598       return URLERROR;
 599     }
 600
 601   if (!refurl)
 602     refurl = opt.referer;
 603
 604  redirected:
 605
 606   result = NOCONERROR;
 607   mynewloc = NULL;
 608   local_file = NULL;
 609   proxy_url = NULL;
 610
 611   proxy = getproxy (u);
 612   if (proxy)
 613     {
 614       /* Parse the proxy URL.  */
 615       proxy_url = url_parse (proxy, &up_error_code);
 616       if (!proxy_url)
 617         {
 618           logprintf (LOG_NOTQUIET, _("Error parsing proxy URL %s: %s.\n"),
 619                      proxy, url_error (up_error_code));
 620           xfree (url);
 621           RESTORE_POST_DATA;
 622           return PROXERR;
 623         }
 624       if (proxy_url->scheme != SCHEME_HTTP && proxy_url->scheme != u->scheme)
 625         {
 626           logprintf (LOG_NOTQUIET, _("Error in proxy URL %s: Must be HTTP.\n"), proxy);
 627           url_free (proxy_url);
 628           xfree (url);
 629           RESTORE_POST_DATA;
 630           return PROXERR;
 631         }
 632     }
 633
 634   if (u->scheme == SCHEME_HTTP
 635 #ifdef HAVE_SSL
 636       || u->scheme == SCHEME_HTTPS
 637 #endif
 638       || (proxy_url && proxy_url->scheme == SCHEME_HTTP))
 639     {
 640       result = http_loop (u, &mynewloc, &local_file, refurl, dt, proxy_url);
 641     }
 642   else if (u->scheme == SCHEME_FTP)
 643     {
 644       /* If this is a redirection, we must not allow recursive FTP
 645          retrieval, so we save recursion to oldrec, and restore it
 646          later.  */
 647       int oldrec = opt.recursive;
 648       if (redirection_count)
 649         opt.recursive = 0;
 650       result = ftp_loop (u, dt, proxy_url);
 651       opt.recursive = oldrec;
 652
 653       /* There is a possibility of having HTTP being redirected to
 654          FTP.  In these cases we must decide whether the text is HTML
 655          according to the suffix.  The HTML suffixes are `.html',
 656          `.htm' and a few others, case-insensitive.  */
 657       if (redirection_count && local_file && u->scheme == SCHEME_FTP)
 658         {
 659           if (has_html_suffix_p (local_file))
 660             *dt |= TEXTHTML;
 661         }
 662     }
 663
 664   if (proxy_url)
 665     {
 666       url_free (proxy_url);
 667       proxy_url = NULL;
 668     }
 669
 670   location_changed = (result == NEWLOCATION);
 671   if (location_changed)
 672     {
 673       char *construced_newloc;
 674       struct url *newloc_parsed;
 675
 676       assert (mynewloc != NULL);
 677
 678       if (local_file)
 679         xfree (local_file);
 680
 681       /* The HTTP specs only allow absolute URLs to appear in
 682          redirects, but a ton of boneheaded webservers and CGIs out
 683          there break the rules and use relative URLs, and popular
 684          browsers are lenient about this, so wget should be too. */
 685       construced_newloc = uri_merge (url, mynewloc);
 686       xfree (mynewloc);
 687       mynewloc = construced_newloc;
 688
 689       /* Now, see if this new location makes sense. */
 690       newloc_parsed = url_parse (mynewloc, &up_error_code);
 691       if (!newloc_parsed)
 692         {
 693           logprintf (LOG_NOTQUIET, "%s: %s.\n", mynewloc,
 694                      url_error (up_error_code));
 695           url_free (u);
 696           xfree (url);
 697           xfree (mynewloc);
 698           RESTORE_POST_DATA;
 699           return result;
 700         }
 701
 702       /* Now mynewloc will become newloc_parsed->url, because if the
 703          Location contained relative paths like .././something, we
 704          don't want that propagating as url.  */
 705       xfree (mynewloc);
 706       mynewloc = xstrdup (newloc_parsed->url);
 707
 708       /* Check for max. number of redirections.  */
 709       if (++redirection_count > MAX_REDIRECTIONS)
 710         {
 711           logprintf (LOG_NOTQUIET, _("%d redirections exceeded.\n"),
 712                      MAX_REDIRECTIONS);
 713           url_free (newloc_parsed);
 714           url_free (u);
 715           xfree (url);
 716           xfree (mynewloc);
 717           RESTORE_POST_DATA;
 718           return WRONGCODE;
 719         }
 720
 721       xfree (url);
 722       url = mynewloc;
 723       url_free (u);
 724       u = newloc_parsed;
 725
 726       /* If we're being redirected from POST, we don't want to POST
 727          again.  Many requests answer POST with a redirection to an
 728          index page; that redirection is clearly a GET.  We "suspend"
 729          POST data for the duration of the redirections, and restore
 730          it when we're done. */
 731       if (!post_data_suspended)
 732         SUSPEND_POST_DATA;
 733
 734       goto redirected;
 735     }
 736
 737   if (local_file)
 738     {
 739       if (*dt & RETROKF)
 740         {
 741           register_download (u->url, local_file);
 742           if (redirection_count && 0 != strcmp (origurl, u->url))
 743             register_redirection (origurl, u->url);
 744           if (*dt & TEXTHTML)
 745             register_html (u->url, local_file);
 746         }
 747     }
 748
 749   if (file)
 750     *file = local_file ? local_file : NULL;
 751   else
 752     xfree_null (local_file);
 753
 754   url_free (u);
 755
 756   if (redirection_count)
 757     {
 758       if (newloc)
 759         *newloc = url;
 760       else
 761         xfree (url);
 762     }
 763   else
 764     {
 765       if (newloc)
 766         *newloc = NULL;
 767       xfree (url);
 768     }
 769
 770   RESTORE_POST_DATA;
 771
 772   return result;
 773 }
 774
 775 /* Find the URLs in the file and call retrieve_url() for each of
 776    them.  If HTML is non-zero, treat the file as HTML, and construct
 777    the URLs accordingly.
 778
 779    If opt.recursive is set, call retrieve_tree() for each file.  */
 780
 781 uerr_t
 782 retrieve_from_file (const char *file, int html, int *count)
 783 {
 784   uerr_t status;
 785   struct urlpos *url_list, *cur_url;
 786
 787   url_list = (html ? get_urls_html (file, NULL, NULL)
 788               : get_urls_file (file));
 789   status = RETROK;             /* Suppose everything is OK.  */
 790   *count = 0;                  /* Reset the URL count.  */
 791
 792   for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count)
 793     {
 794       char *filename = NULL, *new_file = NULL;
 795       int dt;
 796
 797       if (cur_url->ignore_when_downloading)
 798         continue;
 799
 800       if (opt.quota && total_downloaded_bytes > opt.quota)
 801         {
 802           status = QUOTEXC;
 803           break;
 804         }
 805       if ((opt.recursive || opt.page_requisites)
 806           && cur_url->url->scheme != SCHEME_FTP)
 807         status = retrieve_tree (cur_url->url->url);
 808       else
 809         status = retrieve_url (cur_url->url->url, &filename, &new_file, NULL, &dt);
 810
 811       if (filename && opt.delete_after && file_exists_p (filename))
 812         {
 813           DEBUGP (("Removing file due to --delete-after in"
 814                    " retrieve_from_file():\n"));
 815           logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
 816           if (unlink (filename))
 817             logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
 818           dt &= ~RETROKF;
 819         }
 820
 821       xfree_null (new_file);
 822       xfree_null (filename);
 823     }
 824
 825   /* Free the linked list of URL-s.  */
 826   free_urlpos (url_list);
 827
 828   return status;
 829 }
 830
 831 /* Print `giving up', or `retrying', depending on the impending
 832    action.  N1 and N2 are the attempt number and the attempt limit.  */
 833 void
 834 printwhat (int n1, int n2)
 835 {
 836   logputs (LOG_VERBOSE, (n1 == n2) ? _("Giving up.\n\n") : _("Retrying.\n\n"));
 837 }
 838
 839 /* If opt.wait or opt.waitretry are specified, and if certain
 840    conditions are met, sleep the appropriate number of seconds.  See
 841    the documentation of --wait and --waitretry for more information.
 842
 843    COUNT is the count of current retrieval, beginning with 1. */
 844
 845 void
 846 sleep_between_retrievals (int count)
 847 {
 848   static int first_retrieval = 1;
 849
 850   if (first_retrieval)
 851     {
 852       /* Don't sleep before the very first retrieval. */
 853       first_retrieval = 0;
 854       return;
 855     }
 856
 857   if (opt.waitretry && count > 1)
 858     {
 859       /* If opt.waitretry is specified and this is a retry, wait for
 860          COUNT-1 number of seconds, or for opt.waitretry seconds.  */
 861       if (count <= opt.waitretry)
 862         xsleep (count - 1);
 863       else
 864         xsleep (opt.waitretry);
 865     }
 866   else if (opt.wait)
 867     {
 868       if (!opt.random_wait || count > 1)
 869         /* If random-wait is not specified, or if we are sleeping
 870            between retries of the same download, sleep the fixed
 871            interval.  */
 872         xsleep (opt.wait);
 873       else
 874         {
 875           /* Sleep a random amount of time averaging in opt.wait
 876              seconds.  The sleeping amount ranges from 0 to
 877              opt.wait*2, inclusive.  */
 878           double waitsecs = 2 * opt.wait * random_float ();
 879           DEBUGP (("sleep_between_retrievals: avg=%f,sleep=%f\n",
 880                    opt.wait, waitsecs));
 881           xsleep (waitsecs);
 882         }
 883     }
 884 }
 885
 886 /* Free the linked list of urlpos.  */
 887 void
 888 free_urlpos (struct urlpos *l)
 889 {
 890   while (l)
 891     {
 892       struct urlpos *next = l->next;
 893       if (l->url)
 894         url_free (l->url);
 895       xfree_null (l->local_name);
 896       xfree (l);
 897       l = next;
 898     }
 899 }
 900
 901 /* Rotate FNAME opt.backups times */
 902 void
 903 rotate_backups(const char *fname)
 904 {
 905   int maxlen = strlen (fname) + 1 + numdigit (opt.backups) + 1;
 906   char *from = (char *)alloca (maxlen);
 907   char *to = (char *)alloca (maxlen);
 908   struct stat sb;
 909   int i;
 910
 911   if (stat (fname, &sb) == 0)
 912     if (S_ISREG (sb.st_mode) == 0)
 913       return;
 914
 915   for (i = opt.backups; i > 1; i--)
 916     {
 917       sprintf (from, "%s.%d", fname, i - 1);
 918       sprintf (to, "%s.%d", fname, i);
 919       rename (from, to);
 920     }
 921
 922   sprintf (to, "%s.%d", fname, 1);
 923   rename(fname, to);
 924 }
 925
 926 static int no_proxy_match PARAMS ((const char *, const char **));
 927
 928 /* Return the URL of the proxy appropriate for url U.  */
 929
 930 static char *
 931 getproxy (struct url *u)
 932 {
 933   char *proxy = NULL;
 934   char *rewritten_url;
 935   static char rewritten_storage[1024];
 936
 937   if (!opt.use_proxy)
 938     return NULL;
 939   if (!no_proxy_match (u->host, (const char **)opt.no_proxy))
 940     return NULL;
 941
 942   switch (u->scheme)
 943     {
 944     case SCHEME_HTTP:
 945       proxy = opt.http_proxy ? opt.http_proxy : getenv ("http_proxy");
 946       break;
 947 #ifdef HAVE_SSL
 948     case SCHEME_HTTPS:
 949       proxy = opt.https_proxy ? opt.https_proxy : getenv ("https_proxy");
 950       break;
 951 #endif
 952     case SCHEME_FTP:
 953       proxy = opt.ftp_proxy ? opt.ftp_proxy : getenv ("ftp_proxy");
 954       break;
 955     case SCHEME_INVALID:
 956       break;
 957     }
 958   if (!proxy || !*proxy)
 959     return NULL;
 960
 961   /* Handle shorthands.  `rewritten_storage' is a kludge to allow
 962      getproxy() to return static storage. */
 963   rewritten_url = rewrite_shorthand_url (proxy);
 964   if (rewritten_url)
 965     {
 966       strncpy (rewritten_storage, rewritten_url, sizeof (rewritten_storage));
 967       rewritten_storage[sizeof (rewritten_storage) - 1] = '\0';
 968       proxy = rewritten_storage;
 969     }
 970
 971   return proxy;
 972 }
 973
 974 /* Should a host be accessed through proxy, concerning no_proxy?  */
 975 int
 976 no_proxy_match (const char *host, const char **no_proxy)
 977 {
 978   if (!no_proxy)
 979     return 1;
 980   else
 981     return !sufmatch (no_proxy, host);
 982 }