sjero.net Git - wget/blob - src/retr.c

   1 /* File retrieval.
   2    Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001 Free Software Foundation, Inc.
   3
   4 This file is part of GNU Wget.
   5
   6 GNU Wget is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 2 of the License, or (at
   9 your option) any later version.
  10
  11 GNU Wget is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with Wget; if not, write to the Free Software
  18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  19
  20 In addition, as a special exception, the Free Software Foundation
  21 gives permission to link the code of its release of Wget with the
  22 OpenSSL project's "OpenSSL" library (or with modified versions of it
  23 that use the same license as the "OpenSSL" library), and distribute
  24 the linked executables.  You must obey the GNU General Public License
  25 in all respects for all of the code used other than "OpenSSL".  If you
  26 modify this file, you may extend this exception to your version of the
  27 file, but you are not obligated to do so.  If you do not wish to do
  28 so, delete this exception statement from your version.  */
  29
  30 #include <config.h>
  31
  32 #include <stdio.h>
  33 #include <stdlib.h>
  34 #include <sys/types.h>
  35 #ifdef HAVE_UNISTD_H
  36 # include <unistd.h>
  37 #endif /* HAVE_UNISTD_H */
  38 #include <errno.h>
  39 #ifdef HAVE_STRING_H
  40 # include <string.h>
  41 #else
  42 # include <strings.h>
  43 #endif /* HAVE_STRING_H */
  44 #include <assert.h>
  45
  46 #include "wget.h"
  47 #include "utils.h"
  48 #include "retr.h"
  49 #include "progress.h"
  50 #include "url.h"
  51 #include "recur.h"
  52 #include "ftp.h"
  53 #include "host.h"
  54 #include "connect.h"
  55 #include "hash.h"
  56 #include "convert.h"
  57
  58 #ifdef HAVE_SSL
  59 # include "gen_sslfunc.h"       /* for ssl_iread */
  60 #endif
  61
  62 #ifndef errno
  63 extern int errno;
  64 #endif
  65
  66 /* Total size of downloaded files.  Used to enforce quota.  */
  67 LARGE_INT total_downloaded_bytes;
  68
  69 /* If non-NULL, the stream to which output should be written.  This
  70    stream is initialized when `-O' is used.  */
  71 FILE *output_stream;
  72
  73 /* Whether output_document is a regular file we can manipulate,
  74    i.e. not `-' or a device file. */
  75 int output_stream_regular;
  76 \f
  77 static struct {
  78   long chunk_bytes;
  79   double chunk_start;
  80   double sleep_adjust;
  81 } limit_data;
  82
  83 static void
  84 limit_bandwidth_reset (void)
  85 {
  86   limit_data.chunk_bytes = 0;
  87   limit_data.chunk_start = 0;
  88 }
  89
  90 /* Limit the bandwidth by pausing the download for an amount of time.
  91    BYTES is the number of bytes received from the network, and TIMER
  92    is the timer that started at the beginning of download.  */
  93
  94 static void
  95 limit_bandwidth (long bytes, struct wget_timer *timer)
  96 {
  97   double delta_t = wtimer_read (timer) - limit_data.chunk_start;
  98   double expected;
  99
 100   limit_data.chunk_bytes += bytes;
 101
 102   /* Calculate the amount of time we expect downloading the chunk
 103      should take.  If in reality it took less time, sleep to
 104      compensate for the difference.  */
 105   expected = 1000.0 * limit_data.chunk_bytes / opt.limit_rate;
 106
 107   if (expected > delta_t)
 108     {
 109       double slp = expected - delta_t + limit_data.sleep_adjust;
 110       double t0, t1;
 111       if (slp < 200)
 112         {
 113           DEBUGP (("deferring a %.2f ms sleep (%ld/%.2f).\n",
 114                    slp, limit_data.chunk_bytes, delta_t));
 115           return;
 116         }
 117       DEBUGP (("\nsleeping %.2f ms for %ld bytes, adjust %.2f ms\n",
 118                slp, limit_data.chunk_bytes, limit_data.sleep_adjust));
 119
 120       t0 = wtimer_read (timer);
 121       xsleep (slp / 1000);
 122       wtimer_update (timer);
 123       t1 = wtimer_read (timer);
 124
 125       /* Due to scheduling, we probably slept slightly longer (or
 126          shorter) than desired.  Calculate the difference between the
 127          desired and the actual sleep, and adjust the next sleep by
 128          that amount.  */
 129       limit_data.sleep_adjust = slp - (t1 - t0);
 130     }
 131
 132   limit_data.chunk_bytes = 0;
 133   limit_data.chunk_start = wtimer_read (timer);
 134 }
 135
 136 #ifndef MIN
 137 # define MIN(i, j) ((i) <= (j) ? (i) : (j))
 138 #endif
 139
 140 /* Write data in BUF to OUT.  However, if *SKIP is non-zero, skip that
 141    amount of data and decrease SKIP.  Increment *TOTAL by the amount
 142    of data written.  */
 143
 144 static int
 145 write_data (FILE *out, const char *buf, int bufsize, long *skip,
 146             long *written)
 147 {
 148   if (!out)
 149     return 1;
 150   if (*skip > bufsize)
 151     {
 152       *skip -= bufsize;
 153       return 1;
 154     }
 155   if (*skip)
 156     {
 157       buf += *skip;
 158       bufsize -= *skip;
 159       *skip = 0;
 160       if (bufsize == 0)
 161         return 1;
 162     }
 163
 164   fwrite (buf, 1, bufsize, out);
 165   *written += bufsize;
 166
 167   /* Immediately flush the downloaded data.  This should not hinder
 168      performance: fast downloads will arrive in large 16K chunks
 169      (which stdio would write out immediately anyway), and slow
 170      downloads wouldn't be limited by disk speed.  */
 171   fflush (out);
 172   return !ferror (out);
 173 }
 174
 175 /* Read the contents of file descriptor FD until it the connection
 176    terminates or a read error occurs.  The data is read in portions of
 177    up to 16K and written to OUT as it arrives.  If opt.verbose is set,
 178    the progress is shown.
 179
 180    TOREAD is the amount of data expected to arrive, normally only used
 181    by the progress gauge.
 182
 183    STARTPOS is the position from which the download starts, used by
 184    the progress gauge.  If QTYREAD is non-NULL, the value it points to
 185    is incremented by the amount of data read from the network.  If
 186    QTYWRITTEN is non-NULL, the value it points to is incremented by
 187    the amount of data written to disk.  The time it took to download
 188    the data (in milliseconds) is stored to ELAPSED.
 189
 190    The function exits and returns the amount of data read.  In case of
 191    error while reading data, -1 is returned.  In case of error while
 192    writing data, -2 is returned.  */
 193
 194 int
 195 fd_read_body (int fd, FILE *out, long toread, long startpos,
 196               long *qtyread, long *qtywritten, double *elapsed, int flags)
 197 {
 198   int ret = 0;
 199
 200   static char dlbuf[16384];
 201   int dlbufsize = sizeof (dlbuf);
 202
 203   struct wget_timer *timer = NULL;
 204   double last_successful_read_tm = 0;
 205
 206   /* The progress gauge, set according to the user preferences. */
 207   void *progress = NULL;
 208
 209   /* Non-zero if the progress gauge is interactive, i.e. if it can
 210      continually update the display.  When true, smaller timeout
 211      values are used so that the gauge can update the display when
 212      data arrives slowly. */
 213   int progress_interactive = 0;
 214
 215   int exact = flags & rb_read_exactly;
 216   long skip = 0;
 217
 218   /* How much data we've read/written.  */
 219   long sum_read = 0;
 220   long sum_written = 0;
 221
 222   if (flags & rb_skip_startpos)
 223     skip = startpos;
 224
 225   if (opt.verbose)
 226     {
 227       /* If we're skipping STARTPOS bytes, hide it from
 228          progress_create because the indicator can't deal with it.  */
 229       progress = progress_create (skip ? 0 : startpos, toread);
 230       progress_interactive = progress_interactive_p (progress);
 231     }
 232
 233   if (opt.limit_rate)
 234     limit_bandwidth_reset ();
 235
 236   /* A timer is needed for tracking progress, for throttling, and for
 237      tracking elapsed time.  If either of these are requested, start
 238      the timer.  */
 239   if (progress || opt.limit_rate || elapsed)
 240     {
 241       timer = wtimer_new ();
 242       last_successful_read_tm = 0;
 243     }
 244
 245   /* Use a smaller buffer for low requested bandwidths.  For example,
 246      with --limit-rate=2k, it doesn't make sense to slurp in 16K of
 247      data and then sleep for 8s.  With buffer size equal to the limit,
 248      we never have to sleep for more than one second.  */
 249   if (opt.limit_rate && opt.limit_rate < dlbufsize)
 250     dlbufsize = opt.limit_rate;
 251
 252   /* Read from FD while there is data to read.  Normally toread==0
 253      means that it is unknown how much data is to arrive.  However, if
 254      EXACT is set, then toread==0 means what it says: that no data
 255      should be read.  */
 256   while (!exact || (sum_read < toread))
 257     {
 258       int rdsize = exact ? MIN (toread - sum_read, dlbufsize) : dlbufsize;
 259       double tmout = opt.read_timeout;
 260       if (progress_interactive)
 261         {
 262           /* For interactive progress gauges, always specify a ~1s
 263              timeout, so that the gauge can be updated regularly even
 264              when the data arrives very slowly or stalls.  */
 265           tmout = 0.95;
 266           if (opt.read_timeout)
 267             {
 268               double waittm;
 269               waittm = (wtimer_read (timer) - last_successful_read_tm) / 1000;
 270               if (waittm + tmout > opt.read_timeout)
 271                 {
 272                   /* Don't let total idle time exceed read timeout. */
 273                   tmout = opt.read_timeout - waittm;
 274                   if (tmout < 0)
 275                     {
 276                       /* We've already exceeded the timeout. */
 277                       ret = -1, errno = ETIMEDOUT;
 278                       break;
 279                     }
 280                 }
 281             }
 282         }
 283       ret = fd_read (fd, dlbuf, rdsize, tmout);
 284
 285       if (ret == 0 || (ret < 0 && errno != ETIMEDOUT))
 286         break;                  /* read error */
 287       else if (ret < 0)
 288         ret = 0;                /* read timeout */
 289
 290       if (progress || opt.limit_rate)
 291         {
 292           wtimer_update (timer);
 293           if (ret > 0)
 294             last_successful_read_tm = wtimer_read (timer);
 295         }
 296
 297       if (ret > 0)
 298         {
 299           sum_read += ret;
 300           if (!write_data (out, dlbuf, ret, &skip, &sum_written))
 301             {
 302               ret = -2;
 303               goto out;
 304             }
 305         }
 306
 307       if (opt.limit_rate)
 308         limit_bandwidth (ret, timer);
 309
 310       if (progress)
 311         progress_update (progress, ret, wtimer_read (timer));
 312 #ifdef WINDOWS
 313       if (toread > 0)
 314         ws_percenttitle (100.0 *
 315                          (startpos + sum_read) / (startpos + toread));
 316 #endif
 317     }
 318   if (ret < -1)
 319     ret = -1;
 320
 321  out:
 322   if (progress)
 323     progress_finish (progress, wtimer_read (timer));
 324
 325   if (elapsed)
 326     *elapsed = wtimer_read (timer);
 327   if (timer)
 328     wtimer_delete (timer);
 329
 330   if (qtyread)
 331     *qtyread += sum_read;
 332   if (qtywritten)
 333     *qtywritten += sum_written;
 334
 335   return ret;
 336 }
 337 \f
 338 /* Read a hunk of data from FD, up until a terminator.  The terminator
 339    is whatever the TERMINATOR function determines it to be; for
 340    example, it can be a line of data, or the head of an HTTP response.
 341    The function returns the data read allocated with malloc.
 342
 343    In case of error, NULL is returned.  In case of EOF and no data
 344    read, NULL is returned and errno set to 0.  In case of EOF with
 345    data having been read, the data is returned, but it will
 346    (obviously) not contain the terminator.
 347
 348    The idea is to be able to read a line of input, or otherwise a hunk
 349    of text, such as the head of an HTTP request, without crossing the
 350    boundary, so that the next call to fd_read etc. reads the data
 351    after the hunk.  To achieve that, this function does the following:
 352
 353    1. Peek at available data.
 354
 355    2. Determine whether the peeked data, along with the previously
 356       read data, includes the terminator.
 357
 358       2a. If yes, read the data until the end of the terminator, and
 359           exit.
 360
 361       2b. If no, read the peeked data and goto 1.
 362
 363    The function is careful to assume as little as possible about the
 364    implementation of peeking.  For example, every peek is followed by
 365    a read.  If the read returns a different amount of data, the
 366    process is retried until all data arrives safely.
 367
 368    BUFSIZE is the size of the initial buffer expected to read all the
 369    data in the typical case.
 370
 371    This function should be used as a building block for other
 372    functions -- see fd_read_line as a simple example.  */
 373
 374 char *
 375 fd_read_hunk (int fd, hunk_terminator_t hunk_terminator, int bufsize)
 376 {
 377   char *hunk = xmalloc (bufsize);
 378   int tail = 0;                 /* tail position in HUNK */
 379
 380   while (1)
 381     {
 382       const char *end;
 383       int pklen, rdlen, remain;
 384
 385       /* First, peek at the available data. */
 386
 387       pklen = fd_peek (fd, hunk + tail, bufsize - 1 - tail, -1);
 388       if (pklen < 0)
 389         {
 390           xfree (hunk);
 391           return NULL;
 392         }
 393       end = hunk_terminator (hunk, tail, pklen);
 394       if (end)
 395         {
 396           /* The data contains the terminator: we'll drain the data up
 397              to the end of the terminator.  */
 398           remain = end - (hunk + tail);
 399           if (remain == 0)
 400             {
 401               /* No more data needs to be read. */
 402               hunk[tail] = '\0';
 403               return hunk;
 404             }
 405           if (bufsize - 1 < tail + remain)
 406             {
 407               bufsize = tail + remain + 1;
 408               hunk = xrealloc (hunk, bufsize);
 409             }
 410         }
 411       else
 412         /* No terminator: simply read the data we know is (or should
 413            be) available.  */
 414         remain = pklen;
 415
 416       /* Now, read the data.  Note that we make no assumptions about
 417          how much data we'll get.  (Some TCP stacks are notorious for
 418          read returning less data than the previous MSG_PEEK.)  */
 419
 420       rdlen = fd_read (fd, hunk + tail, remain, 0);
 421       if (rdlen < 0)
 422         {
 423           xfree_null (hunk);
 424           return NULL;
 425         }
 426       tail += rdlen;
 427       hunk[tail] = '\0';
 428
 429       if (rdlen == 0)
 430         {
 431           if (tail == 0)
 432             {
 433               /* EOF without anything having been read */
 434               xfree (hunk);
 435               errno = 0;
 436               return NULL;
 437             }
 438           else
 439             /* EOF seen: return the data we've read. */
 440             return hunk;
 441         }
 442       if (end && rdlen == remain)
 443         /* The terminator was seen and the remaining data drained --
 444            we got what we came for.  */
 445         return hunk;
 446
 447       /* Keep looping until all the data arrives. */
 448
 449       if (tail == bufsize - 1)
 450         {
 451           bufsize <<= 1;
 452           hunk = xrealloc (hunk, bufsize);
 453         }
 454     }
 455 }
 456
 457 static const char *
 458 line_terminator (const char *hunk, int oldlen, int peeklen)
 459 {
 460   const char *p = memchr (hunk + oldlen, '\n', peeklen);
 461   if (p)
 462     /* p+1 because we want the line to include '\n' */
 463     return p + 1;
 464   return NULL;
 465 }
 466
 467 /* Read one line from FD and return it.  The line is allocated using
 468    malloc.
 469
 470    If an error occurs, or if no data can be read, NULL is returned.
 471    In the former case errno indicates the error condition, and in the
 472    latter case, errno is NULL.  */
 473
 474 char *
 475 fd_read_line (int fd)
 476 {
 477   return fd_read_hunk (fd, line_terminator, 128);
 478 }
 479 \f
 480 /* Return a printed representation of the download rate, as
 481    appropriate for the speed.  If PAD is non-zero, strings will be
 482    padded to the width of 7 characters (xxxx.xx).  */
 483 char *
 484 retr_rate (long bytes, double msecs, int pad)
 485 {
 486   static char res[20];
 487   static char *rate_names[] = {"B/s", "KB/s", "MB/s", "GB/s" };
 488   int units = 0;
 489
 490   double dlrate = calc_rate (bytes, msecs, &units);
 491   sprintf (res, pad ? "%7.2f %s" : "%.2f %s", dlrate, rate_names[units]);
 492
 493   return res;
 494 }
 495
 496 /* Calculate the download rate and trim it as appropriate for the
 497    speed.  Appropriate means that if rate is greater than 1K/s,
 498    kilobytes are used, and if rate is greater than 1MB/s, megabytes
 499    are used.
 500
 501    UNITS is zero for B/s, one for KB/s, two for MB/s, and three for
 502    GB/s.  */
 503 double
 504 calc_rate (long bytes, double msecs, int *units)
 505 {
 506   double dlrate;
 507
 508   assert (msecs >= 0);
 509   assert (bytes >= 0);
 510
 511   if (msecs == 0)
 512     /* If elapsed time is exactly zero, it means we're under the
 513        granularity of the timer.  This often happens on systems that
 514        use time() for the timer.  */
 515     msecs = wtimer_granularity ();
 516
 517   dlrate = (double)1000 * bytes / msecs;
 518   if (dlrate < 1024.0)
 519     *units = 0;
 520   else if (dlrate < 1024.0 * 1024.0)
 521     *units = 1, dlrate /= 1024.0;
 522   else if (dlrate < 1024.0 * 1024.0 * 1024.0)
 523     *units = 2, dlrate /= (1024.0 * 1024.0);
 524   else
 525     /* Maybe someone will need this, one day. */
 526     *units = 3, dlrate /= (1024.0 * 1024.0 * 1024.0);
 527
 528   return dlrate;
 529 }
 530 \f
 531 /* Maximum number of allowed redirections.  20 was chosen as a
 532    "reasonable" value, which is low enough to not cause havoc, yet
 533    high enough to guarantee that normal retrievals will not be hurt by
 534    the check.  */
 535
 536 #define MAX_REDIRECTIONS 20
 537
 538 #define SUSPEND_POST_DATA do {                  \
 539   post_data_suspended = 1;                      \
 540   saved_post_data = opt.post_data;              \
 541   saved_post_file_name = opt.post_file_name;    \
 542   opt.post_data = NULL;                         \
 543   opt.post_file_name = NULL;                    \
 544 } while (0)
 545
 546 #define RESTORE_POST_DATA do {                          \
 547   if (post_data_suspended)                              \
 548     {                                                   \
 549       opt.post_data = saved_post_data;                  \
 550       opt.post_file_name = saved_post_file_name;        \
 551       post_data_suspended = 0;                          \
 552     }                                                   \
 553 } while (0)
 554
 555 static char *getproxy PARAMS ((struct url *));
 556
 557 /* Retrieve the given URL.  Decides which loop to call -- HTTP, FTP,
 558    FTP, proxy, etc.  */
 559
 560 /* #### This function should be rewritten so it doesn't return from
 561    multiple points. */
 562
 563 uerr_t
 564 retrieve_url (const char *origurl, char **file, char **newloc,
 565               const char *refurl, int *dt)
 566 {
 567   uerr_t result;
 568   char *url;
 569   int location_changed, dummy;
 570   char *mynewloc, *proxy;
 571   struct url *u, *proxy_url;
 572   int up_error_code;            /* url parse error code */
 573   char *local_file;
 574   int redirection_count = 0;
 575
 576   int post_data_suspended = 0;
 577   char *saved_post_data = NULL;
 578   char *saved_post_file_name = NULL;
 579
 580   /* If dt is NULL, use local storage.  */
 581   if (!dt)
 582     {
 583       dt = &dummy;
 584       dummy = 0;
 585     }
 586   url = xstrdup (origurl);
 587   if (newloc)
 588     *newloc = NULL;
 589   if (file)
 590     *file = NULL;
 591
 592   u = url_parse (url, &up_error_code);
 593   if (!u)
 594     {
 595       logprintf (LOG_NOTQUIET, "%s: %s.\n", url, url_error (up_error_code));
 596       xfree (url);
 597       return URLERROR;
 598     }
 599
 600   if (!refurl)
 601     refurl = opt.referer;
 602
 603  redirected:
 604
 605   result = NOCONERROR;
 606   mynewloc = NULL;
 607   local_file = NULL;
 608   proxy_url = NULL;
 609
 610   proxy = getproxy (u);
 611   if (proxy)
 612     {
 613       /* Parse the proxy URL.  */
 614       proxy_url = url_parse (proxy, &up_error_code);
 615       if (!proxy_url)
 616         {
 617           logprintf (LOG_NOTQUIET, _("Error parsing proxy URL %s: %s.\n"),
 618                      proxy, url_error (up_error_code));
 619           xfree (url);
 620           RESTORE_POST_DATA;
 621           return PROXERR;
 622         }
 623       if (proxy_url->scheme != SCHEME_HTTP && proxy_url->scheme != u->scheme)
 624         {
 625           logprintf (LOG_NOTQUIET, _("Error in proxy URL %s: Must be HTTP.\n"), proxy);
 626           url_free (proxy_url);
 627           xfree (url);
 628           RESTORE_POST_DATA;
 629           return PROXERR;
 630         }
 631     }
 632
 633   if (u->scheme == SCHEME_HTTP
 634 #ifdef HAVE_SSL
 635       || u->scheme == SCHEME_HTTPS
 636 #endif
 637       || (proxy_url && proxy_url->scheme == SCHEME_HTTP))
 638     {
 639       result = http_loop (u, &mynewloc, &local_file, refurl, dt, proxy_url);
 640     }
 641   else if (u->scheme == SCHEME_FTP)
 642     {
 643       /* If this is a redirection, we must not allow recursive FTP
 644          retrieval, so we save recursion to oldrec, and restore it
 645          later.  */
 646       int oldrec = opt.recursive;
 647       if (redirection_count)
 648         opt.recursive = 0;
 649       result = ftp_loop (u, dt, proxy_url);
 650       opt.recursive = oldrec;
 651
 652       /* There is a possibility of having HTTP being redirected to
 653          FTP.  In these cases we must decide whether the text is HTML
 654          according to the suffix.  The HTML suffixes are `.html',
 655          `.htm' and a few others, case-insensitive.  */
 656       if (redirection_count && local_file && u->scheme == SCHEME_FTP)
 657         {
 658           if (has_html_suffix_p (local_file))
 659             *dt |= TEXTHTML;
 660         }
 661     }
 662
 663   if (proxy_url)
 664     {
 665       url_free (proxy_url);
 666       proxy_url = NULL;
 667     }
 668
 669   location_changed = (result == NEWLOCATION);
 670   if (location_changed)
 671     {
 672       char *construced_newloc;
 673       struct url *newloc_parsed;
 674
 675       assert (mynewloc != NULL);
 676
 677       if (local_file)
 678         xfree (local_file);
 679
 680       /* The HTTP specs only allow absolute URLs to appear in
 681          redirects, but a ton of boneheaded webservers and CGIs out
 682          there break the rules and use relative URLs, and popular
 683          browsers are lenient about this, so wget should be too. */
 684       construced_newloc = uri_merge (url, mynewloc);
 685       xfree (mynewloc);
 686       mynewloc = construced_newloc;
 687
 688       /* Now, see if this new location makes sense. */
 689       newloc_parsed = url_parse (mynewloc, &up_error_code);
 690       if (!newloc_parsed)
 691         {
 692           logprintf (LOG_NOTQUIET, "%s: %s.\n", mynewloc,
 693                      url_error (up_error_code));
 694           url_free (u);
 695           xfree (url);
 696           xfree (mynewloc);
 697           RESTORE_POST_DATA;
 698           return result;
 699         }
 700
 701       /* Now mynewloc will become newloc_parsed->url, because if the
 702          Location contained relative paths like .././something, we
 703          don't want that propagating as url.  */
 704       xfree (mynewloc);
 705       mynewloc = xstrdup (newloc_parsed->url);
 706
 707       /* Check for max. number of redirections.  */
 708       if (++redirection_count > MAX_REDIRECTIONS)
 709         {
 710           logprintf (LOG_NOTQUIET, _("%d redirections exceeded.\n"),
 711                      MAX_REDIRECTIONS);
 712           url_free (newloc_parsed);
 713           url_free (u);
 714           xfree (url);
 715           xfree (mynewloc);
 716           RESTORE_POST_DATA;
 717           return WRONGCODE;
 718         }
 719
 720       xfree (url);
 721       url = mynewloc;
 722       url_free (u);
 723       u = newloc_parsed;
 724
 725       /* If we're being redirected from POST, we don't want to POST
 726          again.  Many requests answer POST with a redirection to an
 727          index page; that redirection is clearly a GET.  We "suspend"
 728          POST data for the duration of the redirections, and restore
 729          it when we're done. */
 730       if (!post_data_suspended)
 731         SUSPEND_POST_DATA;
 732
 733       goto redirected;
 734     }
 735
 736   if (local_file)
 737     {
 738       if (*dt & RETROKF)
 739         {
 740           register_download (u->url, local_file);
 741           if (redirection_count && 0 != strcmp (origurl, u->url))
 742             register_redirection (origurl, u->url);
 743           if (*dt & TEXTHTML)
 744             register_html (u->url, local_file);
 745         }
 746     }
 747
 748   if (file)
 749     *file = local_file ? local_file : NULL;
 750   else
 751     xfree_null (local_file);
 752
 753   url_free (u);
 754
 755   if (redirection_count)
 756     {
 757       if (newloc)
 758         *newloc = url;
 759       else
 760         xfree (url);
 761     }
 762   else
 763     {
 764       if (newloc)
 765         *newloc = NULL;
 766       xfree (url);
 767     }
 768
 769   RESTORE_POST_DATA;
 770
 771   return result;
 772 }
 773
 774 /* Find the URLs in the file and call retrieve_url() for each of
 775    them.  If HTML is non-zero, treat the file as HTML, and construct
 776    the URLs accordingly.
 777
 778    If opt.recursive is set, call retrieve_tree() for each file.  */
 779
 780 uerr_t
 781 retrieve_from_file (const char *file, int html, int *count)
 782 {
 783   uerr_t status;
 784   struct urlpos *url_list, *cur_url;
 785
 786   url_list = (html ? get_urls_html (file, NULL, NULL)
 787               : get_urls_file (file));
 788   status = RETROK;             /* Suppose everything is OK.  */
 789   *count = 0;                  /* Reset the URL count.  */
 790
 791   for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count)
 792     {
 793       char *filename = NULL, *new_file = NULL;
 794       int dt;
 795
 796       if (cur_url->ignore_when_downloading)
 797         continue;
 798
 799       if (opt.quota && total_downloaded_bytes > opt.quota)
 800         {
 801           status = QUOTEXC;
 802           break;
 803         }
 804       if ((opt.recursive || opt.page_requisites)
 805           && cur_url->url->scheme != SCHEME_FTP)
 806         status = retrieve_tree (cur_url->url->url);
 807       else
 808         status = retrieve_url (cur_url->url->url, &filename, &new_file, NULL, &dt);
 809
 810       if (filename && opt.delete_after && file_exists_p (filename))
 811         {
 812           DEBUGP (("Removing file due to --delete-after in"
 813                    " retrieve_from_file():\n"));
 814           logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
 815           if (unlink (filename))
 816             logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
 817           dt &= ~RETROKF;
 818         }
 819
 820       xfree_null (new_file);
 821       xfree_null (filename);
 822     }
 823
 824   /* Free the linked list of URL-s.  */
 825   free_urlpos (url_list);
 826
 827   return status;
 828 }
 829
 830 /* Print `giving up', or `retrying', depending on the impending
 831    action.  N1 and N2 are the attempt number and the attempt limit.  */
 832 void
 833 printwhat (int n1, int n2)
 834 {
 835   logputs (LOG_VERBOSE, (n1 == n2) ? _("Giving up.\n\n") : _("Retrying.\n\n"));
 836 }
 837
 838 /* If opt.wait or opt.waitretry are specified, and if certain
 839    conditions are met, sleep the appropriate number of seconds.  See
 840    the documentation of --wait and --waitretry for more information.
 841
 842    COUNT is the count of current retrieval, beginning with 1. */
 843
 844 void
 845 sleep_between_retrievals (int count)
 846 {
 847   static int first_retrieval = 1;
 848
 849   if (first_retrieval)
 850     {
 851       /* Don't sleep before the very first retrieval. */
 852       first_retrieval = 0;
 853       return;
 854     }
 855
 856   if (opt.waitretry && count > 1)
 857     {
 858       /* If opt.waitretry is specified and this is a retry, wait for
 859          COUNT-1 number of seconds, or for opt.waitretry seconds.  */
 860       if (count <= opt.waitretry)
 861         xsleep (count - 1);
 862       else
 863         xsleep (opt.waitretry);
 864     }
 865   else if (opt.wait)
 866     {
 867       if (!opt.random_wait || count > 1)
 868         /* If random-wait is not specified, or if we are sleeping
 869            between retries of the same download, sleep the fixed
 870            interval.  */
 871         xsleep (opt.wait);
 872       else
 873         {
 874           /* Sleep a random amount of time averaging in opt.wait
 875              seconds.  The sleeping amount ranges from 0 to
 876              opt.wait*2, inclusive.  */
 877           double waitsecs = 2 * opt.wait * random_float ();
 878           DEBUGP (("sleep_between_retrievals: avg=%f,sleep=%f\n",
 879                    opt.wait, waitsecs));
 880           xsleep (waitsecs);
 881         }
 882     }
 883 }
 884
 885 /* Free the linked list of urlpos.  */
 886 void
 887 free_urlpos (struct urlpos *l)
 888 {
 889   while (l)
 890     {
 891       struct urlpos *next = l->next;
 892       if (l->url)
 893         url_free (l->url);
 894       xfree_null (l->local_name);
 895       xfree (l);
 896       l = next;
 897     }
 898 }
 899
 900 /* Rotate FNAME opt.backups times */
 901 void
 902 rotate_backups(const char *fname)
 903 {
 904   int maxlen = strlen (fname) + 1 + numdigit (opt.backups) + 1;
 905   char *from = (char *)alloca (maxlen);
 906   char *to = (char *)alloca (maxlen);
 907   struct stat sb;
 908   int i;
 909
 910   if (stat (fname, &sb) == 0)
 911     if (S_ISREG (sb.st_mode) == 0)
 912       return;
 913
 914   for (i = opt.backups; i > 1; i--)
 915     {
 916       sprintf (from, "%s.%d", fname, i - 1);
 917       sprintf (to, "%s.%d", fname, i);
 918       rename (from, to);
 919     }
 920
 921   sprintf (to, "%s.%d", fname, 1);
 922   rename(fname, to);
 923 }
 924
 925 static int no_proxy_match PARAMS ((const char *, const char **));
 926
 927 /* Return the URL of the proxy appropriate for url U.  */
 928
 929 static char *
 930 getproxy (struct url *u)
 931 {
 932   char *proxy = NULL;
 933   char *rewritten_url;
 934   static char rewritten_storage[1024];
 935
 936   if (!opt.use_proxy)
 937     return NULL;
 938   if (!no_proxy_match (u->host, (const char **)opt.no_proxy))
 939     return NULL;
 940
 941   switch (u->scheme)
 942     {
 943     case SCHEME_HTTP:
 944       proxy = opt.http_proxy ? opt.http_proxy : getenv ("http_proxy");
 945       break;
 946 #ifdef HAVE_SSL
 947     case SCHEME_HTTPS:
 948       proxy = opt.https_proxy ? opt.https_proxy : getenv ("https_proxy");
 949       break;
 950 #endif
 951     case SCHEME_FTP:
 952       proxy = opt.ftp_proxy ? opt.ftp_proxy : getenv ("ftp_proxy");
 953       break;
 954     case SCHEME_INVALID:
 955       break;
 956     }
 957   if (!proxy || !*proxy)
 958     return NULL;
 959
 960   /* Handle shorthands.  `rewritten_storage' is a kludge to allow
 961      getproxy() to return static storage. */
 962   rewritten_url = rewrite_shorthand_url (proxy);
 963   if (rewritten_url)
 964     {
 965       strncpy (rewritten_storage, rewritten_url, sizeof (rewritten_storage));
 966       rewritten_storage[sizeof (rewritten_storage) - 1] = '\0';
 967       proxy = rewritten_storage;
 968     }
 969
 970   return proxy;
 971 }
 972
 973 /* Should a host be accessed through proxy, concerning no_proxy?  */
 974 int
 975 no_proxy_match (const char *host, const char **no_proxy)
 976 {
 977   if (!no_proxy)
 978     return 1;
 979   else
 980     return !sufmatch (no_proxy, host);
 981 }