sjero.net Git - wget/blob - src/retr.c

   1 /* File retrieval.
   2    Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001 Free Software Foundation, Inc.
   3
   4 This file is part of GNU Wget.
   5
   6 GNU Wget is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 2 of the License, or (at
   9 your option) any later version.
  10
  11 GNU Wget is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with Wget; if not, write to the Free Software
  18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  19
  20 In addition, as a special exception, the Free Software Foundation
  21 gives permission to link the code of its release of Wget with the
  22 OpenSSL project's "OpenSSL" library (or with modified versions of it
  23 that use the same license as the "OpenSSL" library), and distribute
  24 the linked executables.  You must obey the GNU General Public License
  25 in all respects for all of the code used other than "OpenSSL".  If you
  26 modify this file, you may extend this exception to your version of the
  27 file, but you are not obligated to do so.  If you do not wish to do
  28 so, delete this exception statement from your version.  */
  29
  30 #include <config.h>
  31
  32 #include <stdio.h>
  33 #include <stdlib.h>
  34 #include <sys/types.h>
  35 #ifdef HAVE_UNISTD_H
  36 # include <unistd.h>
  37 #endif /* HAVE_UNISTD_H */
  38 #include <errno.h>
  39 #ifdef HAVE_STRING_H
  40 # include <string.h>
  41 #else
  42 # include <strings.h>
  43 #endif /* HAVE_STRING_H */
  44 #include <assert.h>
  45
  46 #include "wget.h"
  47 #include "utils.h"
  48 #include "retr.h"
  49 #include "progress.h"
  50 #include "url.h"
  51 #include "recur.h"
  52 #include "ftp.h"
  53 #include "host.h"
  54 #include "connect.h"
  55 #include "hash.h"
  56 #include "convert.h"
  57
  58 #ifdef HAVE_SSL
  59 # include "gen_sslfunc.h"       /* for ssl_iread */
  60 #endif
  61
  62 #ifndef errno
  63 extern int errno;
  64 #endif
  65
  66 /* See the comment in gethttp() why this is needed. */
  67 int global_download_count;
  68
  69 /* Total size of downloaded files.  Used to enforce quota.  */
  70 LARGE_INT total_downloaded_bytes;
  71
  72 \f
  73 static struct {
  74   long chunk_bytes;
  75   double chunk_start;
  76   double sleep_adjust;
  77 } limit_data;
  78
  79 static void
  80 limit_bandwidth_reset (void)
  81 {
  82   limit_data.chunk_bytes = 0;
  83   limit_data.chunk_start = 0;
  84 }
  85
  86 /* Limit the bandwidth by pausing the download for an amount of time.
  87    BYTES is the number of bytes received from the network, and DELTA
  88    is the number of milliseconds it took to receive them.  */
  89
  90 static void
  91 limit_bandwidth (long bytes, double *dltime, struct wget_timer *timer)
  92 {
  93   double delta_t = *dltime - limit_data.chunk_start;
  94   double expected;
  95
  96   limit_data.chunk_bytes += bytes;
  97
  98   /* Calculate the amount of time we expect downloading the chunk
  99      should take.  If in reality it took less time, sleep to
 100      compensate for the difference.  */
 101   expected = 1000.0 * limit_data.chunk_bytes / opt.limit_rate;
 102
 103   if (expected > delta_t)
 104     {
 105       double slp = expected - delta_t + limit_data.sleep_adjust;
 106       double t0, t1;
 107       if (slp < 200)
 108         {
 109           DEBUGP (("deferring a %.2f ms sleep (%ld/%.2f).\n",
 110                    slp, limit_data.chunk_bytes, delta_t));
 111           return;
 112         }
 113       DEBUGP (("\nsleeping %.2f ms for %ld bytes, adjust %.2f ms\n",
 114                slp, limit_data.chunk_bytes, limit_data.sleep_adjust));
 115
 116       t0 = *dltime;
 117       usleep ((unsigned long) (1000 * slp));
 118       t1 = wtimer_elapsed (timer);
 119
 120       /* Due to scheduling, we probably slept slightly longer (or
 121          shorter) than desired.  Calculate the difference between the
 122          desired and the actual sleep, and adjust the next sleep by
 123          that amount.  */
 124       limit_data.sleep_adjust = slp - (t1 - t0);
 125
 126       /* Since we've called wtimer_elapsed, we might as well update
 127          the caller's dltime. */
 128       *dltime = t1;
 129     }
 130
 131   limit_data.chunk_bytes = 0;
 132   limit_data.chunk_start = *dltime;
 133 }
 134
 135 #define MIN(i, j) ((i) <= (j) ? (i) : (j))
 136
 137 /* Reads the contents of file descriptor FD, until it is closed, or a
 138    read error occurs.  The data is read in 8K chunks, and stored to
 139    stream fp, which should have been open for writing.  If BUF is
 140    non-NULL and its file descriptor is equal to FD, flush RBUF first.
 141    This function will *not* use the rbuf_* functions!
 142
 143    The EXPECTED argument is passed to show_progress() unchanged, but
 144    otherwise ignored.
 145
 146    If opt.verbose is set, the progress is also shown.  RESTVAL
 147    represents a value from which to start downloading (which will be
 148    shown accordingly).  If RESTVAL is non-zero, the stream should have
 149    been open for appending.
 150
 151    The function exits and returns codes of 0, -1 and -2 if the
 152    connection was closed, there was a read error, or if it could not
 153    write to the output stream, respectively.
 154
 155    IMPORTANT: The function flushes the contents of the buffer in
 156    rbuf_flush() before actually reading from fd.  If you wish to read
 157    from fd immediately, flush or discard the buffer.  */
 158 int
 159 get_contents (int fd, FILE *fp, long *len, long restval, long expected,
 160               struct rbuf *rbuf, int use_expected, double *elapsed)
 161 {
 162   int res = 0;
 163
 164   static char dlbuf[16384];
 165   int dlbufsize = sizeof (dlbuf);
 166
 167   void *progress = NULL;
 168   struct wget_timer *timer = wtimer_allocate ();
 169   double dltime = 0;
 170
 171   *len = restval;
 172
 173   if (opt.verbose)
 174     progress = progress_create (restval, expected);
 175
 176   if (rbuf && RBUF_FD (rbuf) == fd)
 177     {
 178       int sz = 0;
 179       while ((res = rbuf_flush (rbuf, dlbuf, sizeof (dlbuf))) != 0)
 180         {
 181           fwrite (dlbuf, 1, res, fp);
 182           *len += res;
 183           sz += res;
 184         }
 185       if (sz)
 186         fflush (fp);
 187       if (ferror (fp))
 188         {
 189           res = -2;
 190           goto out;
 191         }
 192       if (progress)
 193         progress_update (progress, sz, 0);
 194     }
 195
 196   if (opt.limit_rate)
 197     limit_bandwidth_reset ();
 198   wtimer_reset (timer);
 199
 200   /* Use a smaller buffer for low requested bandwidths.  For example,
 201      with --limit-rate=2k, it doesn't make sense to slurp in 16K of
 202      data and then sleep for 8s.  With buffer size equal to the limit,
 203      we never have to sleep for more than one second.  */
 204   if (opt.limit_rate && opt.limit_rate < dlbufsize)
 205     dlbufsize = opt.limit_rate;
 206
 207   /* Read from fd while there is available data.
 208
 209      Normally, if expected is 0, it means that it is not known how
 210      much data is expected.  However, if use_expected is specified,
 211      then expected being zero means exactly that.  */
 212   while (!use_expected || (*len < expected))
 213     {
 214       int amount_to_read = (use_expected
 215                             ? MIN (expected - *len, dlbufsize) : dlbufsize);
 216 #ifdef HAVE_SSL
 217       if (rbuf->ssl!=NULL)
 218         res = ssl_iread (rbuf->ssl, dlbuf, amount_to_read);
 219       else
 220 #endif /* HAVE_SSL */
 221         res = iread (fd, dlbuf, amount_to_read);
 222
 223       if (res <= 0)
 224         break;
 225
 226       fwrite (dlbuf, 1, res, fp);
 227       /* Always flush the contents of the network packet.  This should
 228          not hinder performance: fast downloads will be received in
 229          16K chunks (which stdio would write out anyway), and slow
 230          downloads won't be limited with disk performance.  */
 231       fflush (fp);
 232       if (ferror (fp))
 233         {
 234           res = -2;
 235           goto out;
 236         }
 237
 238       dltime = wtimer_elapsed (timer);
 239       if (opt.limit_rate)
 240         limit_bandwidth (res, &dltime, timer);
 241
 242       *len += res;
 243       if (progress)
 244         progress_update (progress, res, dltime);
 245 #ifdef WINDOWS
 246       if (use_expected && expected > 0)
 247         ws_percenttitle (100.0 * (double)(*len) / (double)expected);
 248 #endif
 249     }
 250   if (res < -1)
 251     res = -1;
 252
 253  out:
 254   if (progress)
 255     progress_finish (progress, dltime);
 256   if (elapsed)
 257     *elapsed = dltime;
 258   wtimer_delete (timer);
 259
 260   return res;
 261 }
 262 \f
 263 /* Return a printed representation of the download rate, as
 264    appropriate for the speed.  If PAD is non-zero, strings will be
 265    padded to the width of 7 characters (xxxx.xx).  */
 266 char *
 267 retr_rate (long bytes, double msecs, int pad)
 268 {
 269   static char res[20];
 270   static char *rate_names[] = {"B/s", "KB/s", "MB/s", "GB/s" };
 271   int units = 0;
 272
 273   double dlrate = calc_rate (bytes, msecs, &units);
 274   sprintf (res, pad ? "%7.2f %s" : "%.2f %s", dlrate, rate_names[units]);
 275
 276   return res;
 277 }
 278
 279 /* Calculate the download rate and trim it as appropriate for the
 280    speed.  Appropriate means that if rate is greater than 1K/s,
 281    kilobytes are used, and if rate is greater than 1MB/s, megabytes
 282    are used.
 283
 284    UNITS is zero for B/s, one for KB/s, two for MB/s, and three for
 285    GB/s.  */
 286 double
 287 calc_rate (long bytes, double msecs, int *units)
 288 {
 289   double dlrate;
 290
 291   assert (msecs >= 0);
 292   assert (bytes >= 0);
 293
 294   if (msecs == 0)
 295     /* If elapsed time is exactly zero, it means we're under the
 296        granularity of the timer.  This often happens on systems that
 297        use time() for the timer.  */
 298     msecs = wtimer_granularity ();
 299
 300   dlrate = (double)1000 * bytes / msecs;
 301   if (dlrate < 1024.0)
 302     *units = 0;
 303   else if (dlrate < 1024.0 * 1024.0)
 304     *units = 1, dlrate /= 1024.0;
 305   else if (dlrate < 1024.0 * 1024.0 * 1024.0)
 306     *units = 2, dlrate /= (1024.0 * 1024.0);
 307   else
 308     /* Maybe someone will need this, one day. */
 309     *units = 3, dlrate /= (1024.0 * 1024.0 * 1024.0);
 310
 311   return dlrate;
 312 }
 313 \f
 314 /* Maximum number of allowed redirections.  20 was chosen as a
 315    "reasonable" value, which is low enough to not cause havoc, yet
 316    high enough to guarantee that normal retrievals will not be hurt by
 317    the check.  */
 318
 319 #define MAX_REDIRECTIONS 20
 320
 321 #define SUSPEND_POST_DATA do {                  \
 322   post_data_suspended = 1;                      \
 323   saved_post_data = opt.post_data;              \
 324   saved_post_file_name = opt.post_file_name;    \
 325   opt.post_data = NULL;                         \
 326   opt.post_file_name = NULL;                    \
 327 } while (0)
 328
 329 #define RESTORE_POST_DATA do {                          \
 330   if (post_data_suspended)                              \
 331     {                                                   \
 332       opt.post_data = saved_post_data;                  \
 333       opt.post_file_name = saved_post_file_name;        \
 334       post_data_suspended = 0;                          \
 335     }                                                   \
 336 } while (0)
 337
 338 static char *getproxy PARAMS ((struct url *));
 339
 340 /* Retrieve the given URL.  Decides which loop to call -- HTTP, FTP,
 341    FTP, proxy, etc.  */
 342
 343 /* #### This function should be rewritten so it doesn't return from
 344    multiple points. */
 345
 346 uerr_t
 347 retrieve_url (const char *origurl, char **file, char **newloc,
 348               const char *refurl, int *dt)
 349 {
 350   uerr_t result;
 351   char *url;
 352   int location_changed, dummy;
 353   char *mynewloc, *proxy;
 354   struct url *u, *proxy_url;
 355   int up_error_code;            /* url parse error code */
 356   char *local_file;
 357   int redirection_count = 0;
 358
 359   int post_data_suspended = 0;
 360   char *saved_post_data = NULL;
 361   char *saved_post_file_name = NULL;
 362
 363   /* If dt is NULL, use local storage.  */
 364   if (!dt)
 365     {
 366       dt = &dummy;
 367       dummy = 0;
 368     }
 369   url = xstrdup (origurl);
 370   if (newloc)
 371     *newloc = NULL;
 372   if (file)
 373     *file = NULL;
 374
 375   u = url_parse (url, &up_error_code);
 376   if (!u)
 377     {
 378       logprintf (LOG_NOTQUIET, "%s: %s.\n", url, url_error (up_error_code));
 379       xfree (url);
 380       return URLERROR;
 381     }
 382
 383   if (!refurl)
 384     refurl = opt.referer;
 385
 386  redirected:
 387
 388   result = NOCONERROR;
 389   mynewloc = NULL;
 390   local_file = NULL;
 391   proxy_url = NULL;
 392
 393   proxy = getproxy (u);
 394   if (proxy)
 395     {
 396       /* Parse the proxy URL.  */
 397       proxy_url = url_parse (proxy, &up_error_code);
 398       if (!proxy_url)
 399         {
 400           logprintf (LOG_NOTQUIET, _("Error parsing proxy URL %s: %s.\n"),
 401                      proxy, url_error (up_error_code));
 402           xfree (url);
 403           RESTORE_POST_DATA;
 404           return PROXERR;
 405         }
 406       if (proxy_url->scheme != SCHEME_HTTP && proxy_url->scheme != u->scheme)
 407         {
 408           logprintf (LOG_NOTQUIET, _("Error in proxy URL %s: Must be HTTP.\n"), proxy);
 409           url_free (proxy_url);
 410           xfree (url);
 411           RESTORE_POST_DATA;
 412           return PROXERR;
 413         }
 414     }
 415
 416   if (u->scheme == SCHEME_HTTP
 417 #ifdef HAVE_SSL
 418       || u->scheme == SCHEME_HTTPS
 419 #endif
 420       || (proxy_url && proxy_url->scheme == SCHEME_HTTP))
 421     {
 422       result = http_loop (u, &mynewloc, &local_file, refurl, dt, proxy_url);
 423     }
 424   else if (u->scheme == SCHEME_FTP)
 425     {
 426       /* If this is a redirection, we must not allow recursive FTP
 427          retrieval, so we save recursion to oldrec, and restore it
 428          later.  */
 429       int oldrec = opt.recursive;
 430       if (redirection_count)
 431         opt.recursive = 0;
 432       result = ftp_loop (u, dt, proxy_url);
 433       opt.recursive = oldrec;
 434
 435       /* There is a possibility of having HTTP being redirected to
 436          FTP.  In these cases we must decide whether the text is HTML
 437          according to the suffix.  The HTML suffixes are `.html',
 438          `.htm' and a few others, case-insensitive.  */
 439       if (redirection_count && local_file && u->scheme == SCHEME_FTP)
 440         {
 441           if (has_html_suffix_p (local_file))
 442             *dt |= TEXTHTML;
 443         }
 444     }
 445
 446   if (proxy_url)
 447     {
 448       url_free (proxy_url);
 449       proxy_url = NULL;
 450     }
 451
 452   location_changed = (result == NEWLOCATION);
 453   if (location_changed)
 454     {
 455       char *construced_newloc;
 456       struct url *newloc_parsed;
 457
 458       assert (mynewloc != NULL);
 459
 460       if (local_file)
 461         xfree (local_file);
 462
 463       /* The HTTP specs only allow absolute URLs to appear in
 464          redirects, but a ton of boneheaded webservers and CGIs out
 465          there break the rules and use relative URLs, and popular
 466          browsers are lenient about this, so wget should be too. */
 467       construced_newloc = uri_merge (url, mynewloc);
 468       xfree (mynewloc);
 469       mynewloc = construced_newloc;
 470
 471       /* Now, see if this new location makes sense. */
 472       newloc_parsed = url_parse (mynewloc, &up_error_code);
 473       if (!newloc_parsed)
 474         {
 475           logprintf (LOG_NOTQUIET, "%s: %s.\n", mynewloc,
 476                      url_error (up_error_code));
 477           url_free (u);
 478           xfree (url);
 479           xfree (mynewloc);
 480           RESTORE_POST_DATA;
 481           return result;
 482         }
 483
 484       /* Now mynewloc will become newloc_parsed->url, because if the
 485          Location contained relative paths like .././something, we
 486          don't want that propagating as url.  */
 487       xfree (mynewloc);
 488       mynewloc = xstrdup (newloc_parsed->url);
 489
 490       /* Check for max. number of redirections.  */
 491       if (++redirection_count > MAX_REDIRECTIONS)
 492         {
 493           logprintf (LOG_NOTQUIET, _("%d redirections exceeded.\n"),
 494                      MAX_REDIRECTIONS);
 495           url_free (newloc_parsed);
 496           url_free (u);
 497           xfree (url);
 498           xfree (mynewloc);
 499           RESTORE_POST_DATA;
 500           return WRONGCODE;
 501         }
 502
 503       xfree (url);
 504       url = mynewloc;
 505       url_free (u);
 506       u = newloc_parsed;
 507
 508       /* If we're being redirected from POST, we don't want to POST
 509          again.  Many requests answer POST with a redirection to an
 510          index page; that redirection is clearly a GET.  We "suspend"
 511          POST data for the duration of the redirections, and restore
 512          it when we're done. */
 513       if (!post_data_suspended)
 514         SUSPEND_POST_DATA;
 515
 516       goto redirected;
 517     }
 518
 519   if (local_file)
 520     {
 521       if (*dt & RETROKF)
 522         {
 523           register_download (u->url, local_file);
 524           if (redirection_count && 0 != strcmp (origurl, u->url))
 525             register_redirection (origurl, u->url);
 526           if (*dt & TEXTHTML)
 527             register_html (u->url, local_file);
 528         }
 529     }
 530
 531   if (file)
 532     *file = local_file ? local_file : NULL;
 533   else
 534     FREE_MAYBE (local_file);
 535
 536   url_free (u);
 537
 538   if (redirection_count)
 539     {
 540       if (newloc)
 541         *newloc = url;
 542       else
 543         xfree (url);
 544     }
 545   else
 546     {
 547       if (newloc)
 548         *newloc = NULL;
 549       xfree (url);
 550     }
 551
 552   ++global_download_count;
 553   RESTORE_POST_DATA;
 554
 555   return result;
 556 }
 557
 558 /* Find the URLs in the file and call retrieve_url() for each of
 559    them.  If HTML is non-zero, treat the file as HTML, and construct
 560    the URLs accordingly.
 561
 562    If opt.recursive is set, call retrieve_tree() for each file.  */
 563
 564 uerr_t
 565 retrieve_from_file (const char *file, int html, int *count)
 566 {
 567   uerr_t status;
 568   struct urlpos *url_list, *cur_url;
 569
 570   url_list = (html ? get_urls_html (file, NULL, NULL)
 571               : get_urls_file (file));
 572   status = RETROK;             /* Suppose everything is OK.  */
 573   *count = 0;                  /* Reset the URL count.  */
 574
 575   for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count)
 576     {
 577       char *filename = NULL, *new_file = NULL;
 578       int dt;
 579
 580       if (cur_url->ignore_when_downloading)
 581         continue;
 582
 583       if (opt.quota && total_downloaded_bytes > opt.quota)
 584         {
 585           status = QUOTEXC;
 586           break;
 587         }
 588       if ((opt.recursive || opt.page_requisites)
 589           && cur_url->url->scheme != SCHEME_FTP)
 590         status = retrieve_tree (cur_url->url->url);
 591       else
 592         status = retrieve_url (cur_url->url->url, &filename, &new_file, NULL, &dt);
 593
 594       if (filename && opt.delete_after && file_exists_p (filename))
 595         {
 596           DEBUGP (("Removing file due to --delete-after in"
 597                    " retrieve_from_file():\n"));
 598           logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
 599           if (unlink (filename))
 600             logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
 601           dt &= ~RETROKF;
 602         }
 603
 604       FREE_MAYBE (new_file);
 605       FREE_MAYBE (filename);
 606     }
 607
 608   /* Free the linked list of URL-s.  */
 609   free_urlpos (url_list);
 610
 611   return status;
 612 }
 613
 614 /* Print `giving up', or `retrying', depending on the impending
 615    action.  N1 and N2 are the attempt number and the attempt limit.  */
 616 void
 617 printwhat (int n1, int n2)
 618 {
 619   logputs (LOG_VERBOSE, (n1 == n2) ? _("Giving up.\n\n") : _("Retrying.\n\n"));
 620 }
 621
 622 /* If opt.wait or opt.waitretry are specified, and if certain
 623    conditions are met, sleep the appropriate number of seconds.  See
 624    the documentation of --wait and --waitretry for more information.
 625
 626    COUNT is the count of current retrieval, beginning with 1. */
 627
 628 void
 629 sleep_between_retrievals (int count)
 630 {
 631   static int first_retrieval = 1;
 632
 633   if (first_retrieval)
 634     {
 635       /* Don't sleep before the very first retrieval. */
 636       first_retrieval = 0;
 637       return;
 638     }
 639
 640   if (opt.waitretry && count > 1)
 641     {
 642       /* If opt.waitretry is specified and this is a retry, wait for
 643          COUNT-1 number of seconds, or for opt.waitretry seconds.  */
 644       if (count <= opt.waitretry)
 645         sleep (count - 1);
 646       else
 647         usleep (1000000L * opt.waitretry);
 648     }
 649   else if (opt.wait)
 650     {
 651       if (!opt.random_wait || count > 1)
 652         /* If random-wait is not specified, or if we are sleeping
 653            between retries of the same download, sleep the fixed
 654            interval.  */
 655         usleep (1000000L * opt.wait);
 656       else
 657         {
 658           /* Sleep a random amount of time averaging in opt.wait
 659              seconds.  The sleeping amount ranges from 0 to
 660              opt.wait*2, inclusive.  */
 661           double waitsecs = 2 * opt.wait * random_float ();
 662           DEBUGP (("sleep_between_retrievals: avg=%f,sleep=%f\n",
 663                    opt.wait, waitsecs));
 664           usleep (1000000L * waitsecs);
 665         }
 666     }
 667 }
 668
 669 /* Free the linked list of urlpos.  */
 670 void
 671 free_urlpos (struct urlpos *l)
 672 {
 673   while (l)
 674     {
 675       struct urlpos *next = l->next;
 676       if (l->url)
 677         url_free (l->url);
 678       FREE_MAYBE (l->local_name);
 679       xfree (l);
 680       l = next;
 681     }
 682 }
 683
 684 /* Rotate FNAME opt.backups times */
 685 void
 686 rotate_backups(const char *fname)
 687 {
 688   int maxlen = strlen (fname) + 1 + numdigit (opt.backups) + 1;
 689   char *from = (char *)alloca (maxlen);
 690   char *to = (char *)alloca (maxlen);
 691   struct stat sb;
 692   int i;
 693
 694   if (stat (fname, &sb) == 0)
 695     if (S_ISREG (sb.st_mode) == 0)
 696       return;
 697
 698   for (i = opt.backups; i > 1; i--)
 699     {
 700       sprintf (from, "%s.%d", fname, i - 1);
 701       sprintf (to, "%s.%d", fname, i);
 702       rename (from, to);
 703     }
 704
 705   sprintf (to, "%s.%d", fname, 1);
 706   rename(fname, to);
 707 }
 708
 709 static int no_proxy_match PARAMS ((const char *, const char **));
 710
 711 /* Return the URL of the proxy appropriate for url U.  */
 712
 713 static char *
 714 getproxy (struct url *u)
 715 {
 716   char *proxy = NULL;
 717   char *rewritten_url;
 718   static char rewritten_storage[1024];
 719
 720   if (!opt.use_proxy)
 721     return NULL;
 722   if (!no_proxy_match (u->host, (const char **)opt.no_proxy))
 723     return NULL;
 724
 725   switch (u->scheme)
 726     {
 727     case SCHEME_HTTP:
 728       proxy = opt.http_proxy ? opt.http_proxy : getenv ("http_proxy");
 729       break;
 730 #ifdef HAVE_SSL
 731     case SCHEME_HTTPS:
 732       proxy = opt.https_proxy ? opt.https_proxy : getenv ("https_proxy");
 733       break;
 734 #endif
 735     case SCHEME_FTP:
 736       proxy = opt.ftp_proxy ? opt.ftp_proxy : getenv ("ftp_proxy");
 737       break;
 738     case SCHEME_INVALID:
 739       break;
 740     }
 741   if (!proxy || !*proxy)
 742     return NULL;
 743
 744   /* Handle shorthands.  `rewritten_storage' is a kludge to allow
 745      getproxy() to return static storage. */
 746   rewritten_url = rewrite_shorthand_url (proxy);
 747   if (rewritten_url)
 748     {
 749       strncpy (rewritten_storage, rewritten_url, sizeof(rewritten_storage));
 750       rewritten_storage[sizeof (rewritten_storage) - 1] = '\0';
 751       proxy = rewritten_storage;
 752     }
 753
 754   return proxy;
 755 }
 756
 757 /* Should a host be accessed through proxy, concerning no_proxy?  */
 758 int
 759 no_proxy_match (const char *host, const char **no_proxy)
 760 {
 761   if (!no_proxy)
 762     return 1;
 763   else
 764     return !sufmatch (no_proxy, host);
 765 }