sjero.net Git - wget/blob - src/retr.c

   1 /* File retrieval.
   2    Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001 Free Software Foundation, Inc.
   3
   4 This file is part of GNU Wget.
   5
   6 GNU Wget is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 2 of the License, or (at
   9 your option) any later version.
  10
  11 GNU Wget is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with Wget; if not, write to the Free Software
  18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  19
  20 In addition, as a special exception, the Free Software Foundation
  21 gives permission to link the code of its release of Wget with the
  22 OpenSSL project's "OpenSSL" library (or with modified versions of it
  23 that use the same license as the "OpenSSL" library), and distribute
  24 the linked executables.  You must obey the GNU General Public License
  25 in all respects for all of the code used other than "OpenSSL".  If you
  26 modify this file, you may extend this exception to your version of the
  27 file, but you are not obligated to do so.  If you do not wish to do
  28 so, delete this exception statement from your version.  */
  29
  30 #include <config.h>
  31
  32 #include <stdio.h>
  33 #include <stdlib.h>
  34 #include <sys/types.h>
  35 #ifdef HAVE_UNISTD_H
  36 # include <unistd.h>
  37 #endif /* HAVE_UNISTD_H */
  38 #include <errno.h>
  39 #ifdef HAVE_STRING_H
  40 # include <string.h>
  41 #else
  42 # include <strings.h>
  43 #endif /* HAVE_STRING_H */
  44 #include <assert.h>
  45
  46 #include "wget.h"
  47 #include "utils.h"
  48 #include "retr.h"
  49 #include "progress.h"
  50 #include "url.h"
  51 #include "recur.h"
  52 #include "ftp.h"
  53 #include "host.h"
  54 #include "connect.h"
  55 #include "hash.h"
  56 #include "convert.h"
  57
  58 #ifdef HAVE_SSL
  59 # include "gen_sslfunc.h"       /* for ssl_iread */
  60 #endif
  61
  62 #ifndef errno
  63 extern int errno;
  64 #endif
  65
  66 /* Total size of downloaded files.  Used to enforce quota.  */
  67 LARGE_INT total_downloaded_bytes;
  68
  69 /* If non-NULL, the stream to which output should be written.  This
  70    stream is initialized when `-O' is used.  */
  71 FILE *output_stream;
  72
  73 /* Whether output_document is a regular file we can manipulate,
  74    i.e. not `-' or a device file. */
  75 int output_stream_regular;
  76 \f
  77 static struct {
  78   long chunk_bytes;
  79   double chunk_start;
  80   double sleep_adjust;
  81 } limit_data;
  82
  83 static void
  84 limit_bandwidth_reset (void)
  85 {
  86   limit_data.chunk_bytes = 0;
  87   limit_data.chunk_start = 0;
  88 }
  89
  90 /* Limit the bandwidth by pausing the download for an amount of time.
  91    BYTES is the number of bytes received from the network, and TIMER
  92    is the timer that started at the beginning of download.  */
  93
  94 static void
  95 limit_bandwidth (long bytes, struct wget_timer *timer)
  96 {
  97   double delta_t = wtimer_read (timer) - limit_data.chunk_start;
  98   double expected;
  99
 100   limit_data.chunk_bytes += bytes;
 101
 102   /* Calculate the amount of time we expect downloading the chunk
 103      should take.  If in reality it took less time, sleep to
 104      compensate for the difference.  */
 105   expected = 1000.0 * limit_data.chunk_bytes / opt.limit_rate;
 106
 107   if (expected > delta_t)
 108     {
 109       double slp = expected - delta_t + limit_data.sleep_adjust;
 110       double t0, t1;
 111       if (slp < 200)
 112         {
 113           DEBUGP (("deferring a %.2f ms sleep (%ld/%.2f).\n",
 114                    slp, limit_data.chunk_bytes, delta_t));
 115           return;
 116         }
 117       DEBUGP (("\nsleeping %.2f ms for %ld bytes, adjust %.2f ms\n",
 118                slp, limit_data.chunk_bytes, limit_data.sleep_adjust));
 119
 120       t0 = wtimer_read (timer);
 121       xsleep (slp / 1000);
 122       wtimer_update (timer);
 123       t1 = wtimer_read (timer);
 124
 125       /* Due to scheduling, we probably slept slightly longer (or
 126          shorter) than desired.  Calculate the difference between the
 127          desired and the actual sleep, and adjust the next sleep by
 128          that amount.  */
 129       limit_data.sleep_adjust = slp - (t1 - t0);
 130     }
 131
 132   limit_data.chunk_bytes = 0;
 133   limit_data.chunk_start = wtimer_read (timer);
 134 }
 135
 136 #ifndef MIN
 137 # define MIN(i, j) ((i) <= (j) ? (i) : (j))
 138 #endif
 139
 140 /* Write data in BUF to OUT.  However, if *SKIP is non-zero, skip that
 141    amount of data and decrease SKIP.  Increment *TOTAL by the amount
 142    of data written.  */
 143
 144 static int
 145 write_data (FILE *out, const char *buf, int bufsize, long *skip,
 146             long *written)
 147 {
 148   if (!out)
 149     return 1;
 150   if (*skip > bufsize)
 151     {
 152       *skip -= bufsize;
 153       return 1;
 154     }
 155   if (*skip)
 156     {
 157       buf += *skip;
 158       bufsize -= *skip;
 159       *skip = 0;
 160       if (bufsize == 0)
 161         return 1;
 162     }
 163
 164   fwrite (buf, 1, bufsize, out);
 165   *written += bufsize;
 166
 167   /* Immediately flush the downloaded data.  This should not hinder
 168      performance: fast downloads will arrive in large 16K chunks
 169      (which stdio would write out immediately anyway), and slow
 170      downloads wouldn't be limited by disk speed.  */
 171   fflush (out);
 172   return !ferror (out);
 173 }
 174
 175 /* Read the contents of file descriptor FD until it the connection
 176    terminates or a read error occurs.  The data is read in portions of
 177    up to 16K and written to OUT as it arrives.  If opt.verbose is set,
 178    the progress is shown.
 179
 180    TOREAD is the amount of data expected to arrive, normally only used
 181    by the progress gauge.
 182
 183    STARTPOS is the position from which the download starts, used by
 184    the progress gauge.  If QTYREAD is non-NULL, the value it points to
 185    is incremented by the amount of data read from the network.  If
 186    QTYWRITTEN is non-NULL, the value it points to is incremented by
 187    the amount of data written to disk.  The time it took to download
 188    the data (in milliseconds) is stored to ELAPSED.
 189
 190    The function exits and returns the amount of data read.  In case of
 191    error while reading data, -1 is returned.  In case of error while
 192    writing data, -2 is returned.  */
 193
 194 int
 195 fd_read_body (int fd, FILE *out, long toread, long startpos,
 196               long *qtyread, long *qtywritten, double *elapsed, int flags)
 197 {
 198   int ret = 0;
 199
 200   static char dlbuf[16384];
 201   int dlbufsize = sizeof (dlbuf);
 202
 203   struct wget_timer *timer = NULL;
 204   double last_successful_read_tm = 0;
 205
 206   /* The progress gauge, set according to the user preferences. */
 207   void *progress = NULL;
 208
 209   /* Non-zero if the progress gauge is interactive, i.e. if it can
 210      continually update the display.  When true, smaller timeout
 211      values are used so that the gauge can update the display when
 212      data arrives slowly. */
 213   int progress_interactive = 0;
 214
 215   int exact = flags & rb_read_exactly;
 216   long skip = 0;
 217
 218   /* How much data we've read/written.  */
 219   long sum_read = 0;
 220   long sum_written = 0;
 221
 222   if (flags & rb_skip_startpos)
 223     skip = startpos;
 224
 225   if (opt.verbose)
 226     {
 227       /* If we're skipping STARTPOS bytes, pass 0 as the INITIAL
 228          argument to progress_create because the indicator doesn't
 229          (yet) know about "skipping" data.  */
 230       progress = progress_create (skip ? 0 : startpos, startpos + toread);
 231       progress_interactive = progress_interactive_p (progress);
 232     }
 233
 234   if (opt.limit_rate)
 235     limit_bandwidth_reset ();
 236
 237   /* A timer is needed for tracking progress, for throttling, and for
 238      tracking elapsed time.  If either of these are requested, start
 239      the timer.  */
 240   if (progress || opt.limit_rate || elapsed)
 241     {
 242       timer = wtimer_new ();
 243       last_successful_read_tm = 0;
 244     }
 245
 246   /* Use a smaller buffer for low requested bandwidths.  For example,
 247      with --limit-rate=2k, it doesn't make sense to slurp in 16K of
 248      data and then sleep for 8s.  With buffer size equal to the limit,
 249      we never have to sleep for more than one second.  */
 250   if (opt.limit_rate && opt.limit_rate < dlbufsize)
 251     dlbufsize = opt.limit_rate;
 252
 253   /* Read from FD while there is data to read.  Normally toread==0
 254      means that it is unknown how much data is to arrive.  However, if
 255      EXACT is set, then toread==0 means what it says: that no data
 256      should be read.  */
 257   while (!exact || (sum_read < toread))
 258     {
 259       int rdsize = exact ? MIN (toread - sum_read, dlbufsize) : dlbufsize;
 260       double tmout = opt.read_timeout;
 261       if (progress_interactive)
 262         {
 263           /* For interactive progress gauges, always specify a ~1s
 264              timeout, so that the gauge can be updated regularly even
 265              when the data arrives very slowly or stalls.  */
 266           tmout = 0.95;
 267           if (opt.read_timeout)
 268             {
 269               double waittm;
 270               waittm = (wtimer_read (timer) - last_successful_read_tm) / 1000;
 271               if (waittm + tmout > opt.read_timeout)
 272                 {
 273                   /* Don't let total idle time exceed read timeout. */
 274                   tmout = opt.read_timeout - waittm;
 275                   if (tmout < 0)
 276                     {
 277                       /* We've already exceeded the timeout. */
 278                       ret = -1, errno = ETIMEDOUT;
 279                       break;
 280                     }
 281                 }
 282             }
 283         }
 284       ret = fd_read (fd, dlbuf, rdsize, tmout);
 285
 286       /* when retrieving from http-proxy wget sometimes does not trust the
 287        * file length reported by server.
 288        * this check is to tell wget not to stubbornly try to read again and
 289        * again until another errno code was received. */
 290       if ( ret == -1 && errno == ETIMEDOUT && sum_read == toread && toread > 0 )
 291         break;
 292
 293       if (ret == 0 || (ret < 0 && errno != ETIMEDOUT))
 294         break;                  /* read error */
 295       else if (ret < 0)
 296         ret = 0;                /* read timeout */
 297
 298       if (progress || opt.limit_rate)
 299         {
 300           wtimer_update (timer);
 301           if (ret > 0)
 302             last_successful_read_tm = wtimer_read (timer);
 303         }
 304
 305       if (ret > 0)
 306         {
 307           sum_read += ret;
 308           if (!write_data (out, dlbuf, ret, &skip, &sum_written))
 309             {
 310               ret = -2;
 311               goto out;
 312             }
 313         }
 314
 315       if (opt.limit_rate)
 316         limit_bandwidth (ret, timer);
 317
 318       if (progress)
 319         progress_update (progress, ret, wtimer_read (timer));
 320 #ifdef WINDOWS
 321       if (toread > 0 && !opt.quiet)
 322         ws_percenttitle (100.0 *
 323                          (startpos + sum_read) / (startpos + toread));
 324 #endif
 325     }
 326   if (ret < -1)
 327     ret = -1;
 328
 329  out:
 330   if (progress)
 331     progress_finish (progress, wtimer_read (timer));
 332
 333   if (elapsed)
 334     *elapsed = wtimer_read (timer);
 335   if (timer)
 336     wtimer_delete (timer);
 337
 338   if (qtyread)
 339     *qtyread += sum_read;
 340   if (qtywritten)
 341     *qtywritten += sum_written;
 342
 343   return ret;
 344 }
 345 \f
 346 /* Read a hunk of data from FD, up until a terminator.  The terminator
 347    is whatever the TERMINATOR function determines it to be; for
 348    example, it can be a line of data, or the head of an HTTP response.
 349    The function returns the data read allocated with malloc.
 350
 351    In case of error, NULL is returned.  In case of EOF and no data
 352    read, NULL is returned and errno set to 0.  In case of EOF with
 353    data having been read, the data is returned, but it will
 354    (obviously) not contain the terminator.
 355
 356    The idea is to be able to read a line of input, or otherwise a hunk
 357    of text, such as the head of an HTTP request, without crossing the
 358    boundary, so that the next call to fd_read etc. reads the data
 359    after the hunk.  To achieve that, this function does the following:
 360
 361    1. Peek at available data.
 362
 363    2. Determine whether the peeked data, along with the previously
 364       read data, includes the terminator.
 365
 366       2a. If yes, read the data until the end of the terminator, and
 367           exit.
 368
 369       2b. If no, read the peeked data and goto 1.
 370
 371    The function is careful to assume as little as possible about the
 372    implementation of peeking.  For example, every peek is followed by
 373    a read.  If the read returns a different amount of data, the
 374    process is retried until all data arrives safely.
 375
 376    BUFSIZE is the size of the initial buffer expected to read all the
 377    data in the typical case.
 378
 379    This function should be used as a building block for other
 380    functions -- see fd_read_line as a simple example.  */
 381
 382 char *
 383 fd_read_hunk (int fd, hunk_terminator_t hunk_terminator, int bufsize)
 384 {
 385   char *hunk = xmalloc (bufsize);
 386   int tail = 0;                 /* tail position in HUNK */
 387
 388   while (1)
 389     {
 390       const char *end;
 391       int pklen, rdlen, remain;
 392
 393       /* First, peek at the available data. */
 394
 395       pklen = fd_peek (fd, hunk + tail, bufsize - 1 - tail, -1);
 396       if (pklen < 0)
 397         {
 398           xfree (hunk);
 399           return NULL;
 400         }
 401       end = hunk_terminator (hunk, tail, pklen);
 402       if (end)
 403         {
 404           /* The data contains the terminator: we'll drain the data up
 405              to the end of the terminator.  */
 406           remain = end - (hunk + tail);
 407           if (remain == 0)
 408             {
 409               /* No more data needs to be read. */
 410               hunk[tail] = '\0';
 411               return hunk;
 412             }
 413           if (bufsize - 1 < tail + remain)
 414             {
 415               bufsize = tail + remain + 1;
 416               hunk = xrealloc (hunk, bufsize);
 417             }
 418         }
 419       else
 420         /* No terminator: simply read the data we know is (or should
 421            be) available.  */
 422         remain = pklen;
 423
 424       /* Now, read the data.  Note that we make no assumptions about
 425          how much data we'll get.  (Some TCP stacks are notorious for
 426          read returning less data than the previous MSG_PEEK.)  */
 427
 428       rdlen = fd_read (fd, hunk + tail, remain, 0);
 429       if (rdlen < 0)
 430         {
 431           xfree_null (hunk);
 432           return NULL;
 433         }
 434       tail += rdlen;
 435       hunk[tail] = '\0';
 436
 437       if (rdlen == 0)
 438         {
 439           if (tail == 0)
 440             {
 441               /* EOF without anything having been read */
 442               xfree (hunk);
 443               errno = 0;
 444               return NULL;
 445             }
 446           else
 447             /* EOF seen: return the data we've read. */
 448             return hunk;
 449         }
 450       if (end && rdlen == remain)
 451         /* The terminator was seen and the remaining data drained --
 452            we got what we came for.  */
 453         return hunk;
 454
 455       /* Keep looping until all the data arrives. */
 456
 457       if (tail == bufsize - 1)
 458         {
 459           bufsize <<= 1;
 460           hunk = xrealloc (hunk, bufsize);
 461         }
 462     }
 463 }
 464
 465 static const char *
 466 line_terminator (const char *hunk, int oldlen, int peeklen)
 467 {
 468   const char *p = memchr (hunk + oldlen, '\n', peeklen);
 469   if (p)
 470     /* p+1 because we want the line to include '\n' */
 471     return p + 1;
 472   return NULL;
 473 }
 474
 475 /* Read one line from FD and return it.  The line is allocated using
 476    malloc.
 477
 478    If an error occurs, or if no data can be read, NULL is returned.
 479    In the former case errno indicates the error condition, and in the
 480    latter case, errno is NULL.  */
 481
 482 char *
 483 fd_read_line (int fd)
 484 {
 485   return fd_read_hunk (fd, line_terminator, 128);
 486 }
 487 \f
 488 /* Return a printed representation of the download rate, as
 489    appropriate for the speed.  If PAD is non-zero, strings will be
 490    padded to the width of 7 characters (xxxx.xx).  */
 491 char *
 492 retr_rate (long bytes, double msecs, int pad)
 493 {
 494   static char res[20];
 495   static const char *rate_names[] = {"B/s", "KB/s", "MB/s", "GB/s" };
 496   int units = 0;
 497
 498   double dlrate = calc_rate (bytes, msecs, &units);
 499   sprintf (res, pad ? "%7.2f %s" : "%.2f %s", dlrate, rate_names[units]);
 500
 501   return res;
 502 }
 503
 504 /* Calculate the download rate and trim it as appropriate for the
 505    speed.  Appropriate means that if rate is greater than 1K/s,
 506    kilobytes are used, and if rate is greater than 1MB/s, megabytes
 507    are used.
 508
 509    UNITS is zero for B/s, one for KB/s, two for MB/s, and three for
 510    GB/s.  */
 511 double
 512 calc_rate (long bytes, double msecs, int *units)
 513 {
 514   double dlrate;
 515
 516   assert (msecs >= 0);
 517   assert (bytes >= 0);
 518
 519   if (msecs == 0)
 520     /* If elapsed time is exactly zero, it means we're under the
 521        granularity of the timer.  This often happens on systems that
 522        use time() for the timer.  */
 523     msecs = wtimer_granularity ();
 524
 525   dlrate = (double)1000 * bytes / msecs;
 526   if (dlrate < 1024.0)
 527     *units = 0;
 528   else if (dlrate < 1024.0 * 1024.0)
 529     *units = 1, dlrate /= 1024.0;
 530   else if (dlrate < 1024.0 * 1024.0 * 1024.0)
 531     *units = 2, dlrate /= (1024.0 * 1024.0);
 532   else
 533     /* Maybe someone will need this, one day. */
 534     *units = 3, dlrate /= (1024.0 * 1024.0 * 1024.0);
 535
 536   return dlrate;
 537 }
 538 \f
 539 /* Maximum number of allowed redirections.  20 was chosen as a
 540    "reasonable" value, which is low enough to not cause havoc, yet
 541    high enough to guarantee that normal retrievals will not be hurt by
 542    the check.  */
 543
 544 #define MAX_REDIRECTIONS 20
 545
 546 #define SUSPEND_POST_DATA do {                  \
 547   post_data_suspended = 1;                      \
 548   saved_post_data = opt.post_data;              \
 549   saved_post_file_name = opt.post_file_name;    \
 550   opt.post_data = NULL;                         \
 551   opt.post_file_name = NULL;                    \
 552 } while (0)
 553
 554 #define RESTORE_POST_DATA do {                          \
 555   if (post_data_suspended)                              \
 556     {                                                   \
 557       opt.post_data = saved_post_data;                  \
 558       opt.post_file_name = saved_post_file_name;        \
 559       post_data_suspended = 0;                          \
 560     }                                                   \
 561 } while (0)
 562
 563 static char *getproxy PARAMS ((struct url *));
 564
 565 /* Retrieve the given URL.  Decides which loop to call -- HTTP, FTP,
 566    FTP, proxy, etc.  */
 567
 568 /* #### This function should be rewritten so it doesn't return from
 569    multiple points. */
 570
 571 uerr_t
 572 retrieve_url (const char *origurl, char **file, char **newloc,
 573               const char *refurl, int *dt)
 574 {
 575   uerr_t result;
 576   char *url;
 577   int location_changed, dummy;
 578   char *mynewloc, *proxy;
 579   struct url *u, *proxy_url;
 580   int up_error_code;            /* url parse error code */
 581   char *local_file;
 582   int redirection_count = 0;
 583
 584   int post_data_suspended = 0;
 585   char *saved_post_data = NULL;
 586   char *saved_post_file_name = NULL;
 587
 588   /* If dt is NULL, use local storage.  */
 589   if (!dt)
 590     {
 591       dt = &dummy;
 592       dummy = 0;
 593     }
 594   url = xstrdup (origurl);
 595   if (newloc)
 596     *newloc = NULL;
 597   if (file)
 598     *file = NULL;
 599
 600   u = url_parse (url, &up_error_code);
 601   if (!u)
 602     {
 603       logprintf (LOG_NOTQUIET, "%s: %s.\n", url, url_error (up_error_code));
 604       xfree (url);
 605       return URLERROR;
 606     }
 607
 608   if (!refurl)
 609     refurl = opt.referer;
 610
 611  redirected:
 612
 613   result = NOCONERROR;
 614   mynewloc = NULL;
 615   local_file = NULL;
 616   proxy_url = NULL;
 617
 618   proxy = getproxy (u);
 619   if (proxy)
 620     {
 621       /* Parse the proxy URL.  */
 622       proxy_url = url_parse (proxy, &up_error_code);
 623       if (!proxy_url)
 624         {
 625           logprintf (LOG_NOTQUIET, _("Error parsing proxy URL %s: %s.\n"),
 626                      proxy, url_error (up_error_code));
 627           xfree (url);
 628           RESTORE_POST_DATA;
 629           return PROXERR;
 630         }
 631       if (proxy_url->scheme != SCHEME_HTTP && proxy_url->scheme != u->scheme)
 632         {
 633           logprintf (LOG_NOTQUIET, _("Error in proxy URL %s: Must be HTTP.\n"), proxy);
 634           url_free (proxy_url);
 635           xfree (url);
 636           RESTORE_POST_DATA;
 637           return PROXERR;
 638         }
 639     }
 640
 641   if (u->scheme == SCHEME_HTTP
 642 #ifdef HAVE_SSL
 643       || u->scheme == SCHEME_HTTPS
 644 #endif
 645       || (proxy_url && proxy_url->scheme == SCHEME_HTTP))
 646     {
 647       result = http_loop (u, &mynewloc, &local_file, refurl, dt, proxy_url);
 648     }
 649   else if (u->scheme == SCHEME_FTP)
 650     {
 651       /* If this is a redirection, we must not allow recursive FTP
 652          retrieval, so we save recursion to oldrec, and restore it
 653          later.  */
 654       int oldrec = opt.recursive;
 655       if (redirection_count)
 656         opt.recursive = 0;
 657       result = ftp_loop (u, dt, proxy_url);
 658       opt.recursive = oldrec;
 659
 660       /* There is a possibility of having HTTP being redirected to
 661          FTP.  In these cases we must decide whether the text is HTML
 662          according to the suffix.  The HTML suffixes are `.html',
 663          `.htm' and a few others, case-insensitive.  */
 664       if (redirection_count && local_file && u->scheme == SCHEME_FTP)
 665         {
 666           if (has_html_suffix_p (local_file))
 667             *dt |= TEXTHTML;
 668         }
 669     }
 670
 671   if (proxy_url)
 672     {
 673       url_free (proxy_url);
 674       proxy_url = NULL;
 675     }
 676
 677   location_changed = (result == NEWLOCATION);
 678   if (location_changed)
 679     {
 680       char *construced_newloc;
 681       struct url *newloc_parsed;
 682
 683       assert (mynewloc != NULL);
 684
 685       if (local_file)
 686         xfree (local_file);
 687
 688       /* The HTTP specs only allow absolute URLs to appear in
 689          redirects, but a ton of boneheaded webservers and CGIs out
 690          there break the rules and use relative URLs, and popular
 691          browsers are lenient about this, so wget should be too. */
 692       construced_newloc = uri_merge (url, mynewloc);
 693       xfree (mynewloc);
 694       mynewloc = construced_newloc;
 695
 696       /* Now, see if this new location makes sense. */
 697       newloc_parsed = url_parse (mynewloc, &up_error_code);
 698       if (!newloc_parsed)
 699         {
 700           logprintf (LOG_NOTQUIET, "%s: %s.\n", mynewloc,
 701                      url_error (up_error_code));
 702           url_free (u);
 703           xfree (url);
 704           xfree (mynewloc);
 705           RESTORE_POST_DATA;
 706           return result;
 707         }
 708
 709       /* Now mynewloc will become newloc_parsed->url, because if the
 710          Location contained relative paths like .././something, we
 711          don't want that propagating as url.  */
 712       xfree (mynewloc);
 713       mynewloc = xstrdup (newloc_parsed->url);
 714
 715       /* Check for max. number of redirections.  */
 716       if (++redirection_count > MAX_REDIRECTIONS)
 717         {
 718           logprintf (LOG_NOTQUIET, _("%d redirections exceeded.\n"),
 719                      MAX_REDIRECTIONS);
 720           url_free (newloc_parsed);
 721           url_free (u);
 722           xfree (url);
 723           xfree (mynewloc);
 724           RESTORE_POST_DATA;
 725           return WRONGCODE;
 726         }
 727
 728       xfree (url);
 729       url = mynewloc;
 730       url_free (u);
 731       u = newloc_parsed;
 732
 733       /* If we're being redirected from POST, we don't want to POST
 734          again.  Many requests answer POST with a redirection to an
 735          index page; that redirection is clearly a GET.  We "suspend"
 736          POST data for the duration of the redirections, and restore
 737          it when we're done. */
 738       if (!post_data_suspended)
 739         SUSPEND_POST_DATA;
 740
 741       goto redirected;
 742     }
 743
 744   if (local_file)
 745     {
 746       if (*dt & RETROKF)
 747         {
 748           register_download (u->url, local_file);
 749           if (redirection_count && 0 != strcmp (origurl, u->url))
 750             register_redirection (origurl, u->url);
 751           if (*dt & TEXTHTML)
 752             register_html (u->url, local_file);
 753         }
 754     }
 755
 756   if (file)
 757     *file = local_file ? local_file : NULL;
 758   else
 759     xfree_null (local_file);
 760
 761   url_free (u);
 762
 763   if (redirection_count)
 764     {
 765       if (newloc)
 766         *newloc = url;
 767       else
 768         xfree (url);
 769     }
 770   else
 771     {
 772       if (newloc)
 773         *newloc = NULL;
 774       xfree (url);
 775     }
 776
 777   RESTORE_POST_DATA;
 778
 779   return result;
 780 }
 781
 782 /* Find the URLs in the file and call retrieve_url() for each of
 783    them.  If HTML is non-zero, treat the file as HTML, and construct
 784    the URLs accordingly.
 785
 786    If opt.recursive is set, call retrieve_tree() for each file.  */
 787
 788 uerr_t
 789 retrieve_from_file (const char *file, int html, int *count)
 790 {
 791   uerr_t status;
 792   struct urlpos *url_list, *cur_url;
 793
 794   url_list = (html ? get_urls_html (file, NULL, NULL)
 795               : get_urls_file (file));
 796   status = RETROK;             /* Suppose everything is OK.  */
 797   *count = 0;                  /* Reset the URL count.  */
 798
 799   for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count)
 800     {
 801       char *filename = NULL, *new_file = NULL;
 802       int dt;
 803
 804       if (cur_url->ignore_when_downloading)
 805         continue;
 806
 807       if (opt.quota && total_downloaded_bytes > opt.quota)
 808         {
 809           status = QUOTEXC;
 810           break;
 811         }
 812       if ((opt.recursive || opt.page_requisites)
 813           && cur_url->url->scheme != SCHEME_FTP)
 814         status = retrieve_tree (cur_url->url->url);
 815       else
 816         status = retrieve_url (cur_url->url->url, &filename, &new_file, NULL, &dt);
 817
 818       if (filename && opt.delete_after && file_exists_p (filename))
 819         {
 820           DEBUGP (("Removing file due to --delete-after in"
 821                    " retrieve_from_file():\n"));
 822           logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
 823           if (unlink (filename))
 824             logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
 825           dt &= ~RETROKF;
 826         }
 827
 828       xfree_null (new_file);
 829       xfree_null (filename);
 830     }
 831
 832   /* Free the linked list of URL-s.  */
 833   free_urlpos (url_list);
 834
 835   return status;
 836 }
 837
 838 /* Print `giving up', or `retrying', depending on the impending
 839    action.  N1 and N2 are the attempt number and the attempt limit.  */
 840 void
 841 printwhat (int n1, int n2)
 842 {
 843   logputs (LOG_VERBOSE, (n1 == n2) ? _("Giving up.\n\n") : _("Retrying.\n\n"));
 844 }
 845
 846 /* If opt.wait or opt.waitretry are specified, and if certain
 847    conditions are met, sleep the appropriate number of seconds.  See
 848    the documentation of --wait and --waitretry for more information.
 849
 850    COUNT is the count of current retrieval, beginning with 1. */
 851
 852 void
 853 sleep_between_retrievals (int count)
 854 {
 855   static int first_retrieval = 1;
 856
 857   if (first_retrieval)
 858     {
 859       /* Don't sleep before the very first retrieval. */
 860       first_retrieval = 0;
 861       return;
 862     }
 863
 864   if (opt.waitretry && count > 1)
 865     {
 866       /* If opt.waitretry is specified and this is a retry, wait for
 867          COUNT-1 number of seconds, or for opt.waitretry seconds.  */
 868       if (count <= opt.waitretry)
 869         xsleep (count - 1);
 870       else
 871         xsleep (opt.waitretry);
 872     }
 873   else if (opt.wait)
 874     {
 875       if (!opt.random_wait || count > 1)
 876         /* If random-wait is not specified, or if we are sleeping
 877            between retries of the same download, sleep the fixed
 878            interval.  */
 879         xsleep (opt.wait);
 880       else
 881         {
 882           /* Sleep a random amount of time averaging in opt.wait
 883              seconds.  The sleeping amount ranges from 0 to
 884              opt.wait*2, inclusive.  */
 885           double waitsecs = 2 * opt.wait * random_float ();
 886           DEBUGP (("sleep_between_retrievals: avg=%f,sleep=%f\n",
 887                    opt.wait, waitsecs));
 888           xsleep (waitsecs);
 889         }
 890     }
 891 }
 892
 893 /* Free the linked list of urlpos.  */
 894 void
 895 free_urlpos (struct urlpos *l)
 896 {
 897   while (l)
 898     {
 899       struct urlpos *next = l->next;
 900       if (l->url)
 901         url_free (l->url);
 902       xfree_null (l->local_name);
 903       xfree (l);
 904       l = next;
 905     }
 906 }
 907
 908 /* Rotate FNAME opt.backups times */
 909 void
 910 rotate_backups(const char *fname)
 911 {
 912   int maxlen = strlen (fname) + 1 + numdigit (opt.backups) + 1;
 913   char *from = (char *)alloca (maxlen);
 914   char *to = (char *)alloca (maxlen);
 915   struct stat sb;
 916   int i;
 917
 918   if (stat (fname, &sb) == 0)
 919     if (S_ISREG (sb.st_mode) == 0)
 920       return;
 921
 922   for (i = opt.backups; i > 1; i--)
 923     {
 924       sprintf (from, "%s.%d", fname, i - 1);
 925       sprintf (to, "%s.%d", fname, i);
 926       rename (from, to);
 927     }
 928
 929   sprintf (to, "%s.%d", fname, 1);
 930   rename(fname, to);
 931 }
 932
 933 static int no_proxy_match PARAMS ((const char *, const char **));
 934
 935 /* Return the URL of the proxy appropriate for url U.  */
 936
 937 static char *
 938 getproxy (struct url *u)
 939 {
 940   char *proxy = NULL;
 941   char *rewritten_url;
 942   static char rewritten_storage[1024];
 943
 944   if (!opt.use_proxy)
 945     return NULL;
 946   if (!no_proxy_match (u->host, (const char **)opt.no_proxy))
 947     return NULL;
 948
 949   switch (u->scheme)
 950     {
 951     case SCHEME_HTTP:
 952       proxy = opt.http_proxy ? opt.http_proxy : getenv ("http_proxy");
 953       break;
 954 #ifdef HAVE_SSL
 955     case SCHEME_HTTPS:
 956       proxy = opt.https_proxy ? opt.https_proxy : getenv ("https_proxy");
 957       break;
 958 #endif
 959     case SCHEME_FTP:
 960       proxy = opt.ftp_proxy ? opt.ftp_proxy : getenv ("ftp_proxy");
 961       break;
 962     case SCHEME_INVALID:
 963       break;
 964     }
 965   if (!proxy || !*proxy)
 966     return NULL;
 967
 968   /* Handle shorthands.  `rewritten_storage' is a kludge to allow
 969      getproxy() to return static storage. */
 970   rewritten_url = rewrite_shorthand_url (proxy);
 971   if (rewritten_url)
 972     {
 973       strncpy (rewritten_storage, rewritten_url, sizeof (rewritten_storage));
 974       rewritten_storage[sizeof (rewritten_storage) - 1] = '\0';
 975       proxy = rewritten_storage;
 976     }
 977
 978   return proxy;
 979 }
 980
 981 /* Should a host be accessed through proxy, concerning no_proxy?  */
 982 int
 983 no_proxy_match (const char *host, const char **no_proxy)
 984 {
 985   if (!no_proxy)
 986     return 1;
 987   else
 988     return !sufmatch (no_proxy, host);
 989 }