sjero.net Git - wget/blob - src/url.c

   1 /* URL handling.
   2    Copyright (C) 1995, 1996, 1997, 2000, 2001, 2003, 2003
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GNU Wget.
   6
   7 GNU Wget is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2 of the License, or (at
  10 your option) any later version.
  11
  12 GNU Wget is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with Wget; if not, write to the Free Software
  19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  20
  21 In addition, as a special exception, the Free Software Foundation
  22 gives permission to link the code of its release of Wget with the
  23 OpenSSL project's "OpenSSL" library (or with modified versions of it
  24 that use the same license as the "OpenSSL" library), and distribute
  25 the linked executables.  You must obey the GNU General Public License
  26 in all respects for all of the code used other than "OpenSSL".  If you
  27 modify this file, you may extend this exception to your version of the
  28 file, but you are not obligated to do so.  If you do not wish to do
  29 so, delete this exception statement from your version.  */
  30
  31 #include <config.h>
  32
  33 #include <stdio.h>
  34 #include <stdlib.h>
  35 #ifdef HAVE_STRING_H
  36 # include <string.h>
  37 #else
  38 # include <strings.h>
  39 #endif
  40 #include <sys/types.h>
  41 #ifdef HAVE_UNISTD_H
  42 # include <unistd.h>
  43 #endif
  44 #include <errno.h>
  45 #include <assert.h>
  46
  47 #include "wget.h"
  48 #include "utils.h"
  49 #include "url.h"
  50
  51 #ifndef errno
  52 extern int errno;
  53 #endif
  54
  55 struct scheme_data
  56 {
  57   char *leading_string;
  58   int default_port;
  59   int enabled;
  60 };
  61
  62 /* Supported schemes: */
  63 static struct scheme_data supported_schemes[] =
  64 {
  65   { "http://",  DEFAULT_HTTP_PORT,  1 },
  66 #ifdef HAVE_SSL
  67   { "https://", DEFAULT_HTTPS_PORT, 1 },
  68 #endif
  69   { "ftp://",   DEFAULT_FTP_PORT,   1 },
  70
  71   /* SCHEME_INVALID */
  72   { NULL,       -1,                 0 }
  73 };
  74
  75 /* Forward declarations: */
  76
  77 static int path_simplify PARAMS ((char *));
  78 \f
  79 /* Support for escaping and unescaping of URL strings.  */
  80
  81 /* Table of "reserved" and "unsafe" characters.  Those terms are
  82    rfc1738-speak, as such largely obsoleted by rfc2396 and later
  83    specs, but the general idea remains.
  84
  85    A reserved character is the one that you can't decode without
  86    changing the meaning of the URL.  For example, you can't decode
  87    "/foo/%2f/bar" into "/foo///bar" because the number and contents of
  88    path components is different.  Non-reserved characters can be
  89    changed, so "/foo/%78/bar" is safe to change to "/foo/x/bar".  Wget
  90    uses the rfc1738 set of reserved characters, plus "$" and ",", as
  91    recommended by rfc2396.
  92
  93    An unsafe characters is the one that should be encoded when URLs
  94    are placed in foreign environments.  E.g. space and newline are
  95    unsafe in HTTP contexts because HTTP uses them as separator and
  96    terminator, so they must be encoded to %20 and %0A respectively.
  97    "*" is unsafe in shell context, etc.
  98
  99    We determine whether a character is unsafe through static table
 100    lookup.  This code assumes ASCII character set and 8-bit chars.  */
 101
 102 enum {
 103   /* rfc1738 reserved chars + "$" and ",".  */
 104   urlchr_reserved = 1,
 105
 106   /* rfc1738 unsafe chars, plus non-printables.  */
 107   urlchr_unsafe   = 2
 108 };
 109
 110 #define urlchr_test(c, mask) (urlchr_table[(unsigned char)(c)] & (mask))
 111 #define URL_RESERVED_CHAR(c) urlchr_test(c, urlchr_reserved)
 112 #define URL_UNSAFE_CHAR(c) urlchr_test(c, urlchr_unsafe)
 113
 114 /* Shorthands for the table: */
 115 #define R  urlchr_reserved
 116 #define U  urlchr_unsafe
 117 #define RU R|U
 118
 119 const static unsigned char urlchr_table[256] =
 120 {
 121   U,  U,  U,  U,   U,  U,  U,  U,   /* NUL SOH STX ETX  EOT ENQ ACK BEL */
 122   U,  U,  U,  U,   U,  U,  U,  U,   /* BS  HT  LF  VT   FF  CR  SO  SI  */
 123   U,  U,  U,  U,   U,  U,  U,  U,   /* DLE DC1 DC2 DC3  DC4 NAK SYN ETB */
 124   U,  U,  U,  U,   U,  U,  U,  U,   /* CAN EM  SUB ESC  FS  GS  RS  US  */
 125   U,  0,  U, RU,   R,  U,  R,  0,   /* SP  !   "   #    $   %   &   '   */
 126   0,  0,  0,  R,   R,  0,  0,  R,   /* (   )   *   +    ,   -   .   /   */
 127   0,  0,  0,  0,   0,  0,  0,  0,   /* 0   1   2   3    4   5   6   7   */
 128   0,  0, RU,  R,   U,  R,  U,  R,   /* 8   9   :   ;    <   =   >   ?   */
 129  RU,  0,  0,  0,   0,  0,  0,  0,   /* @   A   B   C    D   E   F   G   */
 130   0,  0,  0,  0,   0,  0,  0,  0,   /* H   I   J   K    L   M   N   O   */
 131   0,  0,  0,  0,   0,  0,  0,  0,   /* P   Q   R   S    T   U   V   W   */
 132   0,  0,  0, RU,   U, RU,  U,  0,   /* X   Y   Z   [    \   ]   ^   _   */
 133   U,  0,  0,  0,   0,  0,  0,  0,   /* `   a   b   c    d   e   f   g   */
 134   0,  0,  0,  0,   0,  0,  0,  0,   /* h   i   j   k    l   m   n   o   */
 135   0,  0,  0,  0,   0,  0,  0,  0,   /* p   q   r   s    t   u   v   w   */
 136   0,  0,  0,  U,   U,  U,  U,  U,   /* x   y   z   {    |   }   ~   DEL */
 137
 138   U, U, U, U,  U, U, U, U,  U, U, U, U,  U, U, U, U,
 139   U, U, U, U,  U, U, U, U,  U, U, U, U,  U, U, U, U,
 140   U, U, U, U,  U, U, U, U,  U, U, U, U,  U, U, U, U,
 141   U, U, U, U,  U, U, U, U,  U, U, U, U,  U, U, U, U,
 142
 143   U, U, U, U,  U, U, U, U,  U, U, U, U,  U, U, U, U,
 144   U, U, U, U,  U, U, U, U,  U, U, U, U,  U, U, U, U,
 145   U, U, U, U,  U, U, U, U,  U, U, U, U,  U, U, U, U,
 146   U, U, U, U,  U, U, U, U,  U, U, U, U,  U, U, U, U,
 147 };
 148 #undef R
 149 #undef U
 150 #undef RU
 151
 152 /* URL-unescape the string S.
 153
 154    This is done by transforming the sequences "%HH" to the character
 155    represented by the hexadecimal digits HH.  If % is not followed by
 156    two hexadecimal digits, it is inserted literally.
 157
 158    The transformation is done in place.  If you need the original
 159    string intact, make a copy before calling this function.  */
 160
 161 static void
 162 url_unescape (char *s)
 163 {
 164   char *t = s;                  /* t - tortoise */
 165   char *h = s;                  /* h - hare     */
 166
 167   for (; *h; h++, t++)
 168     {
 169       if (*h != '%')
 170         {
 171         copychar:
 172           *t = *h;
 173         }
 174       else
 175         {
 176           /* Do nothing if '%' is not followed by two hex digits. */
 177           if (!h[1] || !h[2] || !(ISXDIGIT (h[1]) && ISXDIGIT (h[2])))
 178             goto copychar;
 179           *t = X2DIGITS_TO_NUM (h[1], h[2]);
 180           h += 2;
 181         }
 182     }
 183   *t = '\0';
 184 }
 185
 186 /* The core of url_escape_* functions.  Escapes the characters that
 187    match the provided mask in urlchr_table.
 188
 189    If ALLOW_PASSTHROUGH is non-zero, a string with no unsafe chars
 190    will be returned unchanged.  If ALLOW_PASSTHROUGH is zero, a
 191    freshly allocated string will be returned in all cases.  */
 192
 193 static char *
 194 url_escape_1 (const char *s, unsigned char mask, int allow_passthrough)
 195 {
 196   const char *p1;
 197   char *p2, *newstr;
 198   int newlen;
 199   int addition = 0;
 200
 201   for (p1 = s; *p1; p1++)
 202     if (urlchr_test (*p1, mask))
 203       addition += 2;            /* Two more characters (hex digits) */
 204
 205   if (!addition)
 206     return allow_passthrough ? (char *)s : xstrdup (s);
 207
 208   newlen = (p1 - s) + addition;
 209   newstr = (char *)xmalloc (newlen + 1);
 210
 211   p1 = s;
 212   p2 = newstr;
 213   while (*p1)
 214     {
 215       /* Quote the characters that match the test mask. */
 216       if (urlchr_test (*p1, mask))
 217         {
 218           unsigned char c = *p1++;
 219           *p2++ = '%';
 220           *p2++ = XNUM_TO_DIGIT (c >> 4);
 221           *p2++ = XNUM_TO_DIGIT (c & 0xf);
 222         }
 223       else
 224         *p2++ = *p1++;
 225     }
 226   assert (p2 - newstr == newlen);
 227   *p2 = '\0';
 228
 229   return newstr;
 230 }
 231
 232 /* URL-escape the unsafe characters (see urlchr_table) in a given
 233    string, returning a freshly allocated string.  */
 234
 235 char *
 236 url_escape (const char *s)
 237 {
 238   return url_escape_1 (s, urlchr_unsafe, 0);
 239 }
 240
 241 /* URL-escape the unsafe characters (see urlchr_table) in a given
 242    string.  If no characters are unsafe, S is returned.  */
 243
 244 static char *
 245 url_escape_allow_passthrough (const char *s)
 246 {
 247   return url_escape_1 (s, urlchr_unsafe, 1);
 248 }
 249 \f
 250 enum copy_method { CM_DECODE, CM_ENCODE, CM_PASSTHROUGH };
 251
 252 /* Decide whether to encode, decode, or pass through the char at P.
 253    This used to be a macro, but it got a little too convoluted.  */
 254 static inline enum copy_method
 255 decide_copy_method (const char *p)
 256 {
 257   if (*p == '%')
 258     {
 259       if (ISXDIGIT (*(p + 1)) && ISXDIGIT (*(p + 2)))
 260         {
 261           /* %xx sequence: decode it, unless it would decode to an
 262              unsafe or a reserved char; in that case, leave it as
 263              is. */
 264           char preempt = X2DIGITS_TO_NUM (*(p + 1), *(p + 2));
 265           if (URL_UNSAFE_CHAR (preempt) || URL_RESERVED_CHAR (preempt))
 266             return CM_PASSTHROUGH;
 267           else
 268             return CM_DECODE;
 269         }
 270       else
 271         /* Garbled %.. sequence: encode `%'. */
 272         return CM_ENCODE;
 273     }
 274   else if (URL_UNSAFE_CHAR (*p) && !URL_RESERVED_CHAR (*p))
 275     return CM_ENCODE;
 276   else
 277     return CM_PASSTHROUGH;
 278 }
 279
 280 /* Translate a %-escaped (but possibly non-conformant) input string S
 281    into a %-escaped (and conformant) output string.  If no characters
 282    are encoded or decoded, return the same string S; otherwise, return
 283    a freshly allocated string with the new contents.
 284
 285    After a URL has been run through this function, the protocols that
 286    use `%' as the quote character can use the resulting string as-is,
 287    while those that don't call url_unescape() to get to the intended
 288    data.  This function is also stable: after an input string is
 289    transformed the first time, all further transformations of the
 290    result yield the same result string.
 291
 292    Let's discuss why this function is needed.
 293
 294    Imagine Wget is to retrieve `http://abc.xyz/abc def'.  Since a raw
 295    space character would mess up the HTTP request, it needs to be
 296    quoted, like this:
 297
 298        GET /abc%20def HTTP/1.0
 299
 300    It appears that the unsafe chars need to be quoted, for example
 301    with url_escape.  But what if we're requested to download
 302    `abc%20def'?  url_escape transforms "%" to "%25", which would leave
 303    us with `abc%2520def'.  This is incorrect -- since %-escapes are
 304    part of URL syntax, "%20" is the correct way to denote a literal
 305    space on the Wget command line.  This leaves us in the conclusion
 306    that in that case Wget should not call url_escape, but leave the
 307    `%20' as is.
 308
 309    And what if the requested URI is `abc%20 def'?  If we call
 310    url_escape, we end up with `/abc%2520%20def', which is almost
 311    certainly not intended.  If we don't call url_escape, we are left
 312    with the embedded space and cannot complete the request.  What the
 313    user meant was for Wget to request `/abc%20%20def', and this is
 314    where reencode_escapes kicks in.
 315
 316    Wget used to solve this by first decoding %-quotes, and then
 317    encoding all the "unsafe" characters found in the resulting string.
 318    This was wrong because it didn't preserve certain URL special
 319    (reserved) characters.  For instance, URI containing "a%2B+b" (0x2b
 320    == '+') would get translated to "a%2B%2Bb" or "a++b" depending on
 321    whether we considered `+' reserved (it is).  One of these results
 322    is inevitable because by the second step we would lose information
 323    on whether the `+' was originally encoded or not.  Both results
 324    were wrong because in CGI parameters + means space, while %2B means
 325    literal plus.  reencode_escapes correctly translates the above to
 326    "a%2B+b", i.e. returns the original string.
 327
 328    This function uses an algorithm proposed by Anon Sricharoenchai:
 329
 330    1. Encode all URL_UNSAFE and the "%" that are not followed by 2
 331       hexdigits.
 332
 333    2. Decode all "%XX" except URL_UNSAFE, URL_RESERVED (";/?:@=&") and
 334       "+".
 335
 336    ...except that this code conflates the two steps, and decides
 337    whether to encode, decode, or pass through each character in turn.
 338    The function still uses two passes, but their logic is the same --
 339    the first pass exists merely for the sake of allocation.  Another
 340    small difference is that we include `+' to URL_RESERVED.
 341
 342    Anon's test case:
 343
 344    "http://abc.xyz/%20%3F%%36%31%25aa% a?a=%61+a%2Ba&b=b%26c%3Dc"
 345    ->
 346    "http://abc.xyz/%20%3F%2561%25aa%25%20a?a=a+a%2Ba&b=b%26c%3Dc"
 347
 348    Simpler test cases:
 349
 350    "foo bar"         -> "foo%20bar"
 351    "foo%20bar"       -> "foo%20bar"
 352    "foo %20bar"      -> "foo%20%20bar"
 353    "foo%%20bar"      -> "foo%25%20bar"       (0x25 == '%')
 354    "foo%25%20bar"    -> "foo%25%20bar"
 355    "foo%2%20bar"     -> "foo%252%20bar"
 356    "foo+bar"         -> "foo+bar"            (plus is reserved!)
 357    "foo%2b+bar"      -> "foo%2b+bar"  */
 358
 359 static char *
 360 reencode_escapes (const char *s)
 361 {
 362   const char *p1;
 363   char *newstr, *p2;
 364   int oldlen, newlen;
 365
 366   int encode_count = 0;
 367   int decode_count = 0;
 368
 369   /* First, pass through the string to see if there's anything to do,
 370      and to calculate the new length.  */
 371   for (p1 = s; *p1; p1++)
 372     {
 373       switch (decide_copy_method (p1))
 374         {
 375         case CM_ENCODE:
 376           ++encode_count;
 377           break;
 378         case CM_DECODE:
 379           ++decode_count;
 380           break;
 381         case CM_PASSTHROUGH:
 382           break;
 383         }
 384     }
 385
 386   if (!encode_count && !decode_count)
 387     /* The string is good as it is. */
 388     return (char *)s;           /* C const model sucks. */
 389
 390   oldlen = p1 - s;
 391   /* Each encoding adds two characters (hex digits), while each
 392      decoding removes two characters.  */
 393   newlen = oldlen + 2 * (encode_count - decode_count);
 394   newstr = xmalloc (newlen + 1);
 395
 396   p1 = s;
 397   p2 = newstr;
 398
 399   while (*p1)
 400     {
 401       switch (decide_copy_method (p1))
 402         {
 403         case CM_ENCODE:
 404           {
 405             unsigned char c = *p1++;
 406             *p2++ = '%';
 407             *p2++ = XNUM_TO_DIGIT (c >> 4);
 408             *p2++ = XNUM_TO_DIGIT (c & 0xf);
 409           }
 410           break;
 411         case CM_DECODE:
 412           *p2++ = X2DIGITS_TO_NUM (p1[1], p1[2]);
 413           p1 += 3;              /* skip %xx */
 414           break;
 415         case CM_PASSTHROUGH:
 416           *p2++ = *p1++;
 417         }
 418     }
 419   *p2 = '\0';
 420   assert (p2 - newstr == newlen);
 421   return newstr;
 422 }
 423 \f
 424 /* Returns the scheme type if the scheme is supported, or
 425    SCHEME_INVALID if not.  */
 426
 427 enum url_scheme
 428 url_scheme (const char *url)
 429 {
 430   int i;
 431
 432   for (i = 0; supported_schemes[i].leading_string; i++)
 433     if (0 == strncasecmp (url, supported_schemes[i].leading_string,
 434                           strlen (supported_schemes[i].leading_string)))
 435       {
 436         if (supported_schemes[i].enabled)
 437           return (enum url_scheme) i;
 438         else
 439           return SCHEME_INVALID;
 440       }
 441
 442   return SCHEME_INVALID;
 443 }
 444
 445 #define SCHEME_CHAR(ch) (ISALNUM (ch) || (ch) == '-' || (ch) == '+')
 446
 447 /* Return 1 if the URL begins with any "scheme", 0 otherwise.  As
 448    currently implemented, it returns true if URL begins with
 449    [-+a-zA-Z0-9]+: .  */
 450
 451 int
 452 url_has_scheme (const char *url)
 453 {
 454   const char *p = url;
 455
 456   /* The first char must be a scheme char. */
 457   if (!*p || !SCHEME_CHAR (*p))
 458     return 0;
 459   ++p;
 460   /* Followed by 0 or more scheme chars. */
 461   while (*p && SCHEME_CHAR (*p))
 462     ++p;
 463   /* Terminated by ':'. */
 464   return *p == ':';
 465 }
 466
 467 int
 468 scheme_default_port (enum url_scheme scheme)
 469 {
 470   return supported_schemes[scheme].default_port;
 471 }
 472
 473 void
 474 scheme_disable (enum url_scheme scheme)
 475 {
 476   supported_schemes[scheme].enabled = 0;
 477 }
 478
 479 /* Skip the username and password, if present here.  The function
 480    should *not* be called with the complete URL, but with the part
 481    right after the scheme.
 482
 483    If no username and password are found, return 0.  */
 484
 485 static int
 486 url_skip_credentials (const char *url)
 487 {
 488   /* Look for '@' that comes before terminators, such as '/', '?',
 489      '#', or ';'.  */
 490   const char *p = (const char *)strpbrk (url, "@/?#;");
 491   if (!p || *p != '@')
 492     return 0;
 493   return p + 1 - url;
 494 }
 495
 496 /* Parse credentials contained in [BEG, END).  The region is expected
 497    to have come from a URL and is unescaped.  */
 498
 499 static int
 500 parse_credentials (const char *beg, const char *end, char **user, char **passwd)
 501 {
 502   char *colon;
 503   const char *userend;
 504
 505   if (beg == end)
 506     return 0;                   /* empty user name */
 507
 508   colon = memchr (beg, ':', end - beg);
 509   if (colon == beg)
 510     return 0;                   /* again empty user name */
 511
 512   if (colon)
 513     {
 514       *passwd = strdupdelim (colon + 1, end);
 515       userend = colon;
 516       url_unescape (*passwd);
 517     }
 518   else
 519     {
 520       *passwd = NULL;
 521       userend = end;
 522     }
 523   *user = strdupdelim (beg, userend);
 524   url_unescape (*user);
 525   return 1;
 526 }
 527
 528 /* Used by main.c: detect URLs written using the "shorthand" URL forms
 529    popularized by Netscape and NcFTP.  HTTP shorthands look like this:
 530
 531    www.foo.com[:port]/dir/file   -> http://www.foo.com[:port]/dir/file
 532    www.foo.com[:port]            -> http://www.foo.com[:port]
 533
 534    FTP shorthands look like this:
 535
 536    foo.bar.com:dir/file          -> ftp://foo.bar.com/dir/file
 537    foo.bar.com:/absdir/file      -> ftp://foo.bar.com//absdir/file
 538
 539    If the URL needs not or cannot be rewritten, return NULL.  */
 540
 541 char *
 542 rewrite_shorthand_url (const char *url)
 543 {
 544   const char *p;
 545
 546   if (url_has_scheme (url))
 547     return NULL;
 548
 549   /* Look for a ':' or '/'.  The former signifies NcFTP syntax, the
 550      latter Netscape.  */
 551   for (p = url; *p && *p != ':' && *p != '/'; p++)
 552     ;
 553
 554   if (p == url)
 555     return NULL;
 556
 557   if (*p == ':')
 558     {
 559       const char *pp;
 560       char *res;
 561       /* If the characters after the colon and before the next slash
 562          or end of string are all digits, it's HTTP.  */
 563       int digits = 0;
 564       for (pp = p + 1; ISDIGIT (*pp); pp++)
 565         ++digits;
 566       if (digits > 0 && (*pp == '/' || *pp == '\0'))
 567         goto http;
 568
 569       /* Prepend "ftp://" to the entire URL... */
 570       res = xmalloc (6 + strlen (url) + 1);
 571       sprintf (res, "ftp://%s", url);
 572       /* ...and replace ':' with '/'. */
 573       res[6 + (p - url)] = '/';
 574       return res;
 575     }
 576   else
 577     {
 578       char *res;
 579     http:
 580       /* Just prepend "http://" to what we have. */
 581       res = xmalloc (7 + strlen (url) + 1);
 582       sprintf (res, "http://%s", url);
 583       return res;
 584     }
 585 }
 586 \f
 587 static void split_path PARAMS ((const char *, char **, char **));
 588
 589 /* Like strpbrk, with the exception that it returns the pointer to the
 590    terminating zero (end-of-string aka "eos") if no matching character
 591    is found.
 592
 593    Although I normally balk at Gcc-specific optimizations, it probably
 594    makes sense here: glibc has optimizations that detect strpbrk being
 595    called with literal string as ACCEPT and inline the search.  That
 596    optimization is defeated if strpbrk is hidden within the call to
 597    another function.  (And no, making strpbrk_or_eos inline doesn't
 598    help because the check for literal accept is in the
 599    preprocessor.)  */
 600
 601 #ifdef __GNUC__
 602
 603 #define strpbrk_or_eos(s, accept) ({            \
 604   char *SOE_p = strpbrk (s, accept);            \
 605   if (!SOE_p)                                   \
 606     SOE_p = (char *)s + strlen (s);             \
 607   SOE_p;                                        \
 608 })
 609
 610 #else  /* not __GNUC__ */
 611
 612 static char *
 613 strpbrk_or_eos (const char *s, const char *accept)
 614 {
 615   char *p = strpbrk (s, accept);
 616   if (!p)
 617     p = (char *)s + strlen (s);
 618   return p;
 619 }
 620 #endif
 621
 622 /* Turn STR into lowercase; return non-zero if a character was
 623    actually changed. */
 624
 625 static int
 626 lowercase_str (char *str)
 627 {
 628   int change = 0;
 629   for (; *str; str++)
 630     if (ISUPPER (*str))
 631       {
 632         change = 1;
 633         *str = TOLOWER (*str);
 634       }
 635   return change;
 636 }
 637
 638 static char *parse_errors[] = {
 639 #define PE_NO_ERROR                     0
 640   N_("No error"),
 641 #define PE_UNSUPPORTED_SCHEME           1
 642   N_("Unsupported scheme"),
 643 #define PE_EMPTY_HOST                   2
 644   N_("Empty host"),
 645 #define PE_BAD_PORT_NUMBER              3
 646   N_("Bad port number"),
 647 #define PE_INVALID_USER_NAME            4
 648   N_("Invalid user name"),
 649 #define PE_UNTERMINATED_IPV6_ADDRESS    5
 650   N_("Unterminated IPv6 numeric address"),
 651 #define PE_IPV6_NOT_SUPPORTED           6
 652   N_("IPv6 addresses not supported"),
 653 #define PE_INVALID_IPV6_ADDRESS         7
 654   N_("Invalid IPv6 numeric address")
 655 };
 656
 657 #ifdef ENABLE_IPV6
 658 /* The following two functions were adapted from glibc. */
 659
 660 static int
 661 is_valid_ipv4_address (const char *str, const char *end)
 662 {
 663   int saw_digit = 0;
 664   int octets = 0;
 665   int val = 0;
 666
 667   while (str < end)
 668     {
 669       int ch = *str++;
 670
 671       if (ch >= '0' && ch <= '9')
 672         {
 673           val = val * 10 + (ch - '0');
 674
 675           if (val > 255)
 676             return 0;
 677           if (saw_digit == 0)
 678             {
 679               if (++octets > 4)
 680                 return 0;
 681               saw_digit = 1;
 682             }
 683         }
 684       else if (ch == '.' && saw_digit == 1)
 685         {
 686           if (octets == 4)
 687             return 0;
 688           val = 0;
 689           saw_digit = 0;
 690         }
 691       else
 692         return 0;
 693     }
 694   if (octets < 4)
 695     return 0;
 696
 697   return 1;
 698 }
 699
 700 static int
 701 is_valid_ipv6_address (const char *str, const char *end)
 702 {
 703   enum {
 704     NS_INADDRSZ  = 4,
 705     NS_IN6ADDRSZ = 16,
 706     NS_INT16SZ   = 2
 707   };
 708
 709   const char *curtok;
 710   int tp;
 711   const char *colonp;
 712   int saw_xdigit;
 713   unsigned int val;
 714
 715   tp = 0;
 716   colonp = NULL;
 717
 718   if (str == end)
 719     return 0;
 720
 721   /* Leading :: requires some special handling. */
 722   if (*str == ':')
 723     {
 724       ++str;
 725       if (str == end || *str != ':')
 726         return 0;
 727     }
 728
 729   curtok = str;
 730   saw_xdigit = 0;
 731   val = 0;
 732
 733   while (str < end)
 734     {
 735       int ch = *str++;
 736
 737       /* if ch is a number, add it to val. */
 738       if (ISXDIGIT (ch))
 739         {
 740           val <<= 4;
 741           val |= XDIGIT_TO_NUM (ch);
 742           if (val > 0xffff)
 743             return 0;
 744           saw_xdigit = 1;
 745           continue;
 746         }
 747
 748       /* if ch is a colon ... */
 749       if (ch == ':')
 750         {
 751           curtok = str;
 752           if (saw_xdigit == 0)
 753             {
 754               if (colonp != NULL)
 755                 return 0;
 756               colonp = str + tp;
 757               continue;
 758             }
 759           else if (str == end)
 760             return 0;
 761           if (tp > NS_IN6ADDRSZ - NS_INT16SZ)
 762             return 0;
 763           tp += NS_INT16SZ;
 764           saw_xdigit = 0;
 765           val = 0;
 766           continue;
 767         }
 768
 769       /* if ch is a dot ... */
 770       if (ch == '.' && (tp <= NS_IN6ADDRSZ - NS_INADDRSZ)
 771           && is_valid_ipv4_address (curtok, end) == 1)
 772         {
 773           tp += NS_INADDRSZ;
 774           saw_xdigit = 0;
 775           break;
 776         }
 777
 778       return 0;
 779     }
 780
 781   if (saw_xdigit == 1)
 782     {
 783       if (tp > NS_IN6ADDRSZ - NS_INT16SZ)
 784         return 0;
 785       tp += NS_INT16SZ;
 786     }
 787
 788   if (colonp != NULL)
 789     {
 790       if (tp == NS_IN6ADDRSZ)
 791         return 0;
 792       tp = NS_IN6ADDRSZ;
 793     }
 794
 795   if (tp != NS_IN6ADDRSZ)
 796     return 0;
 797
 798   return 1;
 799 }
 800 #endif
 801
 802 /* Parse a URL.
 803
 804    Return a new struct url if successful, NULL on error.  In case of
 805    error, and if ERROR is not NULL, also set *ERROR to the appropriate
 806    error code. */
 807 struct url *
 808 url_parse (const char *url, int *error)
 809 {
 810   struct url *u;
 811   const char *p;
 812   int path_modified, host_modified;
 813
 814   enum url_scheme scheme;
 815
 816   const char *uname_b,     *uname_e;
 817   const char *host_b,      *host_e;
 818   const char *path_b,      *path_e;
 819   const char *params_b,    *params_e;
 820   const char *query_b,     *query_e;
 821   const char *fragment_b,  *fragment_e;
 822
 823   int port;
 824   char *user = NULL, *passwd = NULL;
 825
 826   char *url_encoded = NULL;
 827
 828   int error_code;
 829
 830   scheme = url_scheme (url);
 831   if (scheme == SCHEME_INVALID)
 832     {
 833       error_code = PE_UNSUPPORTED_SCHEME;
 834       goto error;
 835     }
 836
 837   url_encoded = reencode_escapes (url);
 838   p = url_encoded;
 839
 840   p += strlen (supported_schemes[scheme].leading_string);
 841   uname_b = p;
 842   p += url_skip_credentials (p);
 843   uname_e = p;
 844
 845   /* scheme://user:pass@host[:port]... */
 846   /*                    ^              */
 847
 848   /* We attempt to break down the URL into the components path,
 849      params, query, and fragment.  They are ordered like this:
 850
 851        scheme://host[:port][/path][;params][?query][#fragment]  */
 852
 853   params_b   = params_e   = NULL;
 854   query_b    = query_e    = NULL;
 855   fragment_b = fragment_e = NULL;
 856
 857   host_b = p;
 858
 859   if (*p == '[')
 860     {
 861       /* Handle IPv6 address inside square brackets.  Ideally we'd
 862          just look for the terminating ']', but rfc2732 mandates
 863          rejecting invalid IPv6 addresses.  */
 864
 865       /* The address begins after '['. */
 866       host_b = p + 1;
 867       host_e = strchr (host_b, ']');
 868
 869       if (!host_e)
 870         {
 871           error_code = PE_UNTERMINATED_IPV6_ADDRESS;
 872           goto error;
 873         }
 874
 875 #ifdef ENABLE_IPV6
 876       /* Check if the IPv6 address is valid. */
 877       if (!is_valid_ipv6_address(host_b, host_e))
 878         {
 879           error_code = PE_INVALID_IPV6_ADDRESS;
 880           goto error;
 881         }
 882
 883       /* Continue parsing after the closing ']'. */
 884       p = host_e + 1;
 885 #else
 886       error_code = PE_IPV6_NOT_SUPPORTED;
 887       goto error;
 888 #endif
 889     }
 890   else
 891     {
 892       p = strpbrk_or_eos (p, ":/;?#");
 893       host_e = p;
 894     }
 895
 896   if (host_b == host_e)
 897     {
 898       error_code = PE_EMPTY_HOST;
 899       goto error;
 900     }
 901
 902   port = scheme_default_port (scheme);
 903   if (*p == ':')
 904     {
 905       const char *port_b, *port_e, *pp;
 906
 907       /* scheme://host:port/tralala */
 908       /*              ^             */
 909       ++p;
 910       port_b = p;
 911       p = strpbrk_or_eos (p, "/;?#");
 912       port_e = p;
 913
 914       /* Allow empty port, as per rfc2396. */
 915       if (port_b != port_e)
 916         {
 917           for (port = 0, pp = port_b; pp < port_e; pp++)
 918             {
 919               if (!ISDIGIT (*pp))
 920                 {
 921                   /* http://host:12randomgarbage/blah */
 922                   /*               ^                  */
 923                   error_code = PE_BAD_PORT_NUMBER;
 924                   goto error;
 925                 }
 926               port = 10 * port + (*pp - '0');
 927             }
 928         }
 929     }
 930
 931   if (*p == '/')
 932     {
 933       ++p;
 934       path_b = p;
 935       p = strpbrk_or_eos (p, ";?#");
 936       path_e = p;
 937     }
 938   else
 939     {
 940       /* Path is not allowed not to exist. */
 941       path_b = path_e = p;
 942     }
 943
 944   if (*p == ';')
 945     {
 946       ++p;
 947       params_b = p;
 948       p = strpbrk_or_eos (p, "?#");
 949       params_e = p;
 950     }
 951   if (*p == '?')
 952     {
 953       ++p;
 954       query_b = p;
 955       p = strpbrk_or_eos (p, "#");
 956       query_e = p;
 957
 958       /* Hack that allows users to use '?' (a wildcard character) in
 959          FTP URLs without it being interpreted as a query string
 960          delimiter.  */
 961       if (scheme == SCHEME_FTP)
 962         {
 963           query_b = query_e = NULL;
 964           path_e = p;
 965         }
 966     }
 967   if (*p == '#')
 968     {
 969       ++p;
 970       fragment_b = p;
 971       p += strlen (p);
 972       fragment_e = p;
 973     }
 974   assert (*p == 0);
 975
 976   if (uname_b != uname_e)
 977     {
 978       /* http://user:pass@host */
 979       /*        ^         ^    */
 980       /*     uname_b   uname_e */
 981       if (!parse_credentials (uname_b, uname_e - 1, &user, &passwd))
 982         {
 983           error_code = PE_INVALID_USER_NAME;
 984           goto error;
 985         }
 986     }
 987
 988   u = xnew0 (struct url);
 989   u->scheme = scheme;
 990   u->host   = strdupdelim (host_b, host_e);
 991   u->port   = port;
 992   u->user   = user;
 993   u->passwd = passwd;
 994
 995   u->path = strdupdelim (path_b, path_e);
 996   path_modified = path_simplify (u->path);
 997   split_path (u->path, &u->dir, &u->file);
 998
 999   host_modified = lowercase_str (u->host);
1000
1001   if (params_b)
1002     u->params = strdupdelim (params_b, params_e);
1003   if (query_b)
1004     u->query = strdupdelim (query_b, query_e);
1005   if (fragment_b)
1006     u->fragment = strdupdelim (fragment_b, fragment_e);
1007
1008   if (path_modified || u->fragment || host_modified || path_b == path_e)
1009     {
1010       /* If we suspect that a transformation has rendered what
1011          url_string might return different from URL_ENCODED, rebuild
1012          u->url using url_string.  */
1013       u->url = url_string (u, 0);
1014
1015       if (url_encoded != url)
1016         xfree ((char *) url_encoded);
1017     }
1018   else
1019     {
1020       if (url_encoded == url)
1021         u->url = xstrdup (url);
1022       else
1023         u->url = url_encoded;
1024     }
1025   url_encoded = NULL;
1026
1027   return u;
1028
1029  error:
1030   /* Cleanup in case of error: */
1031   if (url_encoded && url_encoded != url)
1032     xfree (url_encoded);
1033
1034   /* Transmit the error code to the caller, if the caller wants to
1035      know.  */
1036   if (error)
1037     *error = error_code;
1038   return NULL;
1039 }
1040
1041 /* Return the error message string from ERROR_CODE, which should have
1042    been retrieved from url_parse.  The error message is translated.  */
1043
1044 const char *
1045 url_error (int error_code)
1046 {
1047   assert (error_code >= 0 && error_code < countof (parse_errors));
1048   return _(parse_errors[error_code]);
1049 }
1050
1051 /* Split PATH into DIR and FILE.  PATH comes from the URL and is
1052    expected to be URL-escaped.
1053
1054    The path is split into directory (the part up to the last slash)
1055    and file (the part after the last slash), which are subsequently
1056    unescaped.  Examples:
1057
1058    PATH                 DIR           FILE
1059    "foo/bar/baz"        "foo/bar"     "baz"
1060    "foo/bar/"           "foo/bar"     ""
1061    "foo"                ""            "foo"
1062    "foo/bar/baz%2fqux"  "foo/bar"     "baz/qux" (!)
1063
1064    DIR and FILE are freshly allocated.  */
1065
1066 static void
1067 split_path (const char *path, char **dir, char **file)
1068 {
1069   char *last_slash = strrchr (path, '/');
1070   if (!last_slash)
1071     {
1072       *dir = xstrdup ("");
1073       *file = xstrdup (path);
1074     }
1075   else
1076     {
1077       *dir = strdupdelim (path, last_slash);
1078       *file = xstrdup (last_slash + 1);
1079     }
1080   url_unescape (*dir);
1081   url_unescape (*file);
1082 }
1083
1084 /* Note: URL's "full path" is the path with the query string and
1085    params appended.  The "fragment" (#foo) is intentionally ignored,
1086    but that might be changed.  For example, if the original URL was
1087    "http://host:port/foo/bar/baz;bullshit?querystring#uselessfragment",
1088    the full path will be "/foo/bar/baz;bullshit?querystring".  */
1089
1090 /* Return the length of the full path, without the terminating
1091    zero.  */
1092
1093 static int
1094 full_path_length (const struct url *url)
1095 {
1096   int len = 0;
1097
1098 #define FROB(el) if (url->el) len += 1 + strlen (url->el)
1099
1100   FROB (path);
1101   FROB (params);
1102   FROB (query);
1103
1104 #undef FROB
1105
1106   return len;
1107 }
1108
1109 /* Write out the full path. */
1110
1111 static void
1112 full_path_write (const struct url *url, char *where)
1113 {
1114 #define FROB(el, chr) do {                      \
1115   char *f_el = url->el;                         \
1116   if (f_el) {                                   \
1117     int l = strlen (f_el);                      \
1118     *where++ = chr;                             \
1119     memcpy (where, f_el, l);                    \
1120     where += l;                                 \
1121   }                                             \
1122 } while (0)
1123
1124   FROB (path, '/');
1125   FROB (params, ';');
1126   FROB (query, '?');
1127
1128 #undef FROB
1129 }
1130
1131 /* Public function for getting the "full path".  E.g. if u->path is
1132    "foo/bar" and u->query is "param=value", full_path will be
1133    "/foo/bar?param=value". */
1134
1135 char *
1136 url_full_path (const struct url *url)
1137 {
1138   int length = full_path_length (url);
1139   char *full_path = (char *)xmalloc(length + 1);
1140
1141   full_path_write (url, full_path);
1142   full_path[length] = '\0';
1143
1144   return full_path;
1145 }
1146
1147 /* Escape unsafe and reserved characters, except for the slash
1148    characters.  */
1149
1150 static char *
1151 url_escape_dir (const char *dir)
1152 {
1153   char *newdir = url_escape_1 (dir, urlchr_unsafe | urlchr_reserved, 1);
1154   char *h, *t;
1155   if (newdir == dir)
1156     return (char *)dir;
1157
1158   /* Unescape slashes in NEWDIR. */
1159
1160   h = newdir;                   /* hare */
1161   t = newdir;                   /* tortoise */
1162
1163   for (; *h; h++, t++)
1164     {
1165       /* url_escape_1 having converted '/' to "%2F" exactly. */
1166       if (*h == '%' && h[1] == '2' && h[2] == 'F')
1167         {
1168           *t = '/';
1169           h += 2;
1170         }
1171       else
1172         *t = *h;
1173     }
1174   *t = '\0';
1175
1176   return newdir;
1177 }
1178
1179 /* Sync u->path and u->url with u->dir and u->file.  Called after
1180    u->file or u->dir have been changed, typically by the FTP code.  */
1181
1182 static void
1183 sync_path (struct url *u)
1184 {
1185   char *newpath, *efile, *edir;
1186
1187   xfree (u->path);
1188
1189   /* u->dir and u->file are not escaped.  URL-escape them before
1190      reassembling them into u->path.  That way, if they contain
1191      separators like '?' or even if u->file contains slashes, the
1192      path will be correctly assembled.  (u->file can contain slashes
1193      if the URL specifies it with %2f, or if an FTP server returns
1194      it.)  */
1195   edir = url_escape_dir (u->dir);
1196   efile = url_escape_1 (u->file, urlchr_unsafe | urlchr_reserved, 1);
1197
1198   if (!*edir)
1199     newpath = xstrdup (efile);
1200   else
1201     {
1202       int dirlen = strlen (edir);
1203       int filelen = strlen (efile);
1204
1205       /* Copy "DIR/FILE" to newpath. */
1206       char *p = newpath = xmalloc (dirlen + 1 + filelen + 1);
1207       memcpy (p, edir, dirlen);
1208       p += dirlen;
1209       *p++ = '/';
1210       memcpy (p, efile, filelen);
1211       p += filelen;
1212       *p++ = '\0';
1213     }
1214
1215   u->path = newpath;
1216
1217   if (edir != u->dir)
1218     xfree (edir);
1219   if (efile != u->file)
1220     xfree (efile);
1221
1222   /* Regenerate u->url as well.  */
1223   xfree (u->url);
1224   u->url = url_string (u, 0);
1225 }
1226
1227 /* Mutators.  Code in ftp.c insists on changing u->dir and u->file.
1228    This way we can sync u->path and u->url when they get changed.  */
1229
1230 void
1231 url_set_dir (struct url *url, const char *newdir)
1232 {
1233   xfree (url->dir);
1234   url->dir = xstrdup (newdir);
1235   sync_path (url);
1236 }
1237
1238 void
1239 url_set_file (struct url *url, const char *newfile)
1240 {
1241   xfree (url->file);
1242   url->file = xstrdup (newfile);
1243   sync_path (url);
1244 }
1245
1246 void
1247 url_free (struct url *url)
1248 {
1249   xfree (url->host);
1250   xfree (url->path);
1251   xfree (url->url);
1252
1253   xfree_null (url->params);
1254   xfree_null (url->query);
1255   xfree_null (url->fragment);
1256   xfree_null (url->user);
1257   xfree_null (url->passwd);
1258
1259   xfree (url->dir);
1260   xfree (url->file);
1261
1262   xfree (url);
1263 }
1264 \f
1265 /* Create all the necessary directories for PATH (a file).  Calls
1266    mkdirhier() internally.  */
1267 int
1268 mkalldirs (const char *path)
1269 {
1270   const char *p;
1271   char *t;
1272   struct stat st;
1273   int res;
1274
1275   p = path + strlen (path);
1276   for (; *p != '/' && p != path; p--)
1277     ;
1278
1279   /* Don't create if it's just a file.  */
1280   if ((p == path) && (*p != '/'))
1281     return 0;
1282   t = strdupdelim (path, p);
1283
1284   /* Check whether the directory exists.  */
1285   if ((stat (t, &st) == 0))
1286     {
1287       if (S_ISDIR (st.st_mode))
1288         {
1289           xfree (t);
1290           return 0;
1291         }
1292       else
1293         {
1294           /* If the dir exists as a file name, remove it first.  This
1295              is *only* for Wget to work with buggy old CERN http
1296              servers.  Here is the scenario: When Wget tries to
1297              retrieve a directory without a slash, e.g.
1298              http://foo/bar (bar being a directory), CERN server will
1299              not redirect it too http://foo/bar/ -- it will generate a
1300              directory listing containing links to bar/file1,
1301              bar/file2, etc.  Wget will lose because it saves this
1302              HTML listing to a file `bar', so it cannot create the
1303              directory.  To work around this, if the file of the same
1304              name exists, we just remove it and create the directory
1305              anyway.  */
1306           DEBUGP (("Removing %s because of directory danger!\n", t));
1307           unlink (t);
1308         }
1309     }
1310   res = make_directory (t);
1311   if (res != 0)
1312     logprintf (LOG_NOTQUIET, "%s: %s", t, strerror (errno));
1313   xfree (t);
1314   return res;
1315 }
1316 \f
1317 /* Functions for constructing the file name out of URL components.  */
1318
1319 /* A growable string structure, used by url_file_name and friends.
1320    This should perhaps be moved to utils.c.
1321
1322    The idea is to have a convenient and efficient way to construct a
1323    string by having various functions append data to it.  Instead of
1324    passing the obligatory BASEVAR, SIZEVAR and TAILPOS to all the
1325    functions in questions, we pass the pointer to this struct.  */
1326
1327 struct growable {
1328   char *base;
1329   int size;
1330   int tail;
1331 };
1332
1333 /* Ensure that the string can accept APPEND_COUNT more characters past
1334    the current TAIL position.  If necessary, this will grow the string
1335    and update its allocated size.  If the string is already large
1336    enough to take TAIL+APPEND_COUNT characters, this does nothing.  */
1337 #define GROW(g, append_size) do {                                       \
1338   struct growable *G_ = g;                                              \
1339   DO_REALLOC (G_->base, G_->size, G_->tail + append_size, char);        \
1340 } while (0)
1341
1342 /* Return the tail position of the string. */
1343 #define TAIL(r) ((r)->base + (r)->tail)
1344
1345 /* Move the tail position by APPEND_COUNT characters. */
1346 #define TAIL_INCR(r, append_count) ((r)->tail += append_count)
1347
1348 /* Append the string STR to DEST.  NOTICE: the string in DEST is not
1349    terminated.  */
1350
1351 static void
1352 append_string (const char *str, struct growable *dest)
1353 {
1354   int l = strlen (str);
1355   GROW (dest, l);
1356   memcpy (TAIL (dest), str, l);
1357   TAIL_INCR (dest, l);
1358 }
1359
1360 /* Append CH to DEST.  For example, append_char (0, DEST)
1361    zero-terminates DEST.  */
1362
1363 static void
1364 append_char (char ch, struct growable *dest)
1365 {
1366   GROW (dest, 1);
1367   *TAIL (dest) = ch;
1368   TAIL_INCR (dest, 1);
1369 }
1370
1371 enum {
1372   filechr_not_unix    = 1,      /* unusable on Unix, / and \0 */
1373   filechr_not_windows = 2,      /* unusable on Windows, one of \|/<>?:*" */
1374   filechr_control     = 4       /* a control character, e.g. 0-31 */
1375 };
1376
1377 #define FILE_CHAR_TEST(c, mask) (filechr_table[(unsigned char)(c)] & (mask))
1378
1379 /* Shorthands for the table: */
1380 #define U filechr_not_unix
1381 #define W filechr_not_windows
1382 #define C filechr_control
1383
1384 #define UW U|W
1385 #define UWC U|W|C
1386
1387 /* Table of characters unsafe under various conditions (see above).
1388
1389    Arguably we could also claim `%' to be unsafe, since we use it as
1390    the escape character.  If we ever want to be able to reliably
1391    translate file name back to URL, this would become important
1392    crucial.  Right now, it's better to be minimal in escaping.  */
1393
1394 const static unsigned char filechr_table[256] =
1395 {
1396 UWC,  C,  C,  C,   C,  C,  C,  C,   /* NUL SOH STX ETX  EOT ENQ ACK BEL */
1397   C,  C,  C,  C,   C,  C,  C,  C,   /* BS  HT  LF  VT   FF  CR  SO  SI  */
1398   C,  C,  C,  C,   C,  C,  C,  C,   /* DLE DC1 DC2 DC3  DC4 NAK SYN ETB */
1399   C,  C,  C,  C,   C,  C,  C,  C,   /* CAN EM  SUB ESC  FS  GS  RS  US  */
1400   0,  0,  W,  0,   0,  0,  0,  0,   /* SP  !   "   #    $   %   &   '   */
1401   0,  0,  W,  0,   0,  0,  0, UW,   /* (   )   *   +    ,   -   .   /   */
1402   0,  0,  0,  0,   0,  0,  0,  0,   /* 0   1   2   3    4   5   6   7   */
1403   0,  0,  W,  0,   W,  0,  W,  W,   /* 8   9   :   ;    <   =   >   ?   */
1404   0,  0,  0,  0,   0,  0,  0,  0,   /* @   A   B   C    D   E   F   G   */
1405   0,  0,  0,  0,   0,  0,  0,  0,   /* H   I   J   K    L   M   N   O   */
1406   0,  0,  0,  0,   0,  0,  0,  0,   /* P   Q   R   S    T   U   V   W   */
1407   0,  0,  0,  0,   W,  0,  0,  0,   /* X   Y   Z   [    \   ]   ^   _   */
1408   0,  0,  0,  0,   0,  0,  0,  0,   /* `   a   b   c    d   e   f   g   */
1409   0,  0,  0,  0,   0,  0,  0,  0,   /* h   i   j   k    l   m   n   o   */
1410   0,  0,  0,  0,   0,  0,  0,  0,   /* p   q   r   s    t   u   v   w   */
1411   0,  0,  0,  0,   0,  0,  0,  0,   /* x   y   z   {    |   }   ~   DEL */
1412
1413   C, C, C, C,  C, C, C, C,  C, C, C, C,  C, C, C, C, /* 128-143 */
1414   C, C, C, C,  C, C, C, C,  C, C, C, C,  C, C, C, C, /* 144-159 */
1415   0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
1416   0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
1417
1418   0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
1419   0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
1420   0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
1421   0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
1422 };
1423 #undef U
1424 #undef W
1425 #undef C
1426 #undef UW
1427 #undef UWC
1428
1429 /* FN_PORT_SEP is the separator between host and port in file names
1430    for non-standard port numbers.  On Unix this is normally ':', as in
1431    "www.xemacs.org:4001/index.html".  Under Windows, we set it to +
1432    because Windows can't handle ':' in file names.  */
1433 #define FN_PORT_SEP  (opt.restrict_files_os != restrict_windows ? ':' : '+')
1434
1435 /* FN_QUERY_SEP is the separator between the file name and the URL
1436    query, normally '?'.  Since Windows cannot handle '?' as part of
1437    file name, we use '@' instead there.  */
1438 #define FN_QUERY_SEP (opt.restrict_files_os != restrict_windows ? '?' : '@')
1439
1440 /* Quote path element, characters in [b, e), as file name, and append
1441    the quoted string to DEST.  Each character is quoted as per
1442    file_unsafe_char and the corresponding table.
1443
1444    If ESCAPED_P is non-zero, the path element is considered to be
1445    URL-escaped and will be unescaped prior to inspection.  */
1446
1447 static void
1448 append_uri_pathel (const char *b, const char *e, int escaped_p,
1449                    struct growable *dest)
1450 {
1451   const char *p;
1452   int quoted, outlen;
1453
1454   int mask;
1455   if (opt.restrict_files_os == restrict_unix)
1456     mask = filechr_not_unix;
1457   else
1458     mask = filechr_not_windows;
1459   if (opt.restrict_files_ctrl)
1460     mask |= filechr_control;
1461
1462   /* Copy [b, e) to PATHEL and URL-unescape it. */
1463   if (escaped_p)
1464     {
1465       char *unescaped;
1466       BOUNDED_TO_ALLOCA (b, e, unescaped);
1467       url_unescape (unescaped);
1468       b = unescaped;
1469       e = unescaped + strlen (unescaped);
1470     }
1471
1472   /* Defang ".." when found as component of path.  Remember that path
1473      comes from the URL and might contain malicious input.  */
1474   if (e - b == 2 && b[0] == '.' && b[1] == '.')
1475     {
1476       b = "%2E%2E";
1477       e = b + 6;
1478     }
1479
1480   /* Walk the PATHEL string and check how many characters we'll need
1481      to quote.  */
1482   quoted = 0;
1483   for (p = b; p < e; p++)
1484     if (FILE_CHAR_TEST (*p, mask))
1485       ++quoted;
1486
1487   /* Calculate the length of the output string.  e-b is the input
1488      string length.  Each quoted char introduces two additional
1489      characters in the string, hence 2*quoted.  */
1490   outlen = (e - b) + (2 * quoted);
1491   GROW (dest, outlen);
1492
1493   if (!quoted)
1494     {
1495       /* If there's nothing to quote, we can simply append the string
1496          without processing it again.  */
1497       memcpy (TAIL (dest), b, outlen);
1498     }
1499   else
1500     {
1501       char *q = TAIL (dest);
1502       for (p = b; p < e; p++)
1503         {
1504           if (!FILE_CHAR_TEST (*p, mask))
1505             *q++ = *p;
1506           else
1507             {
1508               unsigned char ch = *p;
1509               *q++ = '%';
1510               *q++ = XNUM_TO_DIGIT (ch >> 4);
1511               *q++ = XNUM_TO_DIGIT (ch & 0xf);
1512             }
1513         }
1514       assert (q - TAIL (dest) == outlen);
1515     }
1516   TAIL_INCR (dest, outlen);
1517 }
1518
1519 /* Append to DEST the directory structure that corresponds the
1520    directory part of URL's path.  For example, if the URL is
1521    http://server/dir1/dir2/file, this appends "/dir1/dir2".
1522
1523    Each path element ("dir1" and "dir2" in the above example) is
1524    examined, url-unescaped, and re-escaped as file name element.
1525
1526    Additionally, it cuts as many directories from the path as
1527    specified by opt.cut_dirs.  For example, if opt.cut_dirs is 1, it
1528    will produce "bar" for the above example.  For 2 or more, it will
1529    produce "".
1530
1531    Each component of the path is quoted for use as file name.  */
1532
1533 static void
1534 append_dir_structure (const struct url *u, struct growable *dest)
1535 {
1536   char *pathel, *next;
1537   int cut = opt.cut_dirs;
1538
1539   /* Go through the path components, de-URL-quote them, and quote them
1540      (if necessary) as file names.  */
1541
1542   pathel = u->path;
1543   for (; (next = strchr (pathel, '/')) != NULL; pathel = next + 1)
1544     {
1545       if (cut-- > 0)
1546         continue;
1547       if (pathel == next)
1548         /* Ignore empty pathels.  */
1549         continue;
1550
1551       if (dest->tail)
1552         append_char ('/', dest);
1553       append_uri_pathel (pathel, next, 1, dest);
1554     }
1555 }
1556
1557 /* Return a unique file name that matches the given URL as good as
1558    possible.  Does not create directories on the file system.  */
1559
1560 char *
1561 url_file_name (const struct url *u)
1562 {
1563   struct growable fnres;
1564
1565   char *u_file, *u_query;
1566   char *fname, *unique;
1567
1568   fnres.base = NULL;
1569   fnres.size = 0;
1570   fnres.tail = 0;
1571
1572   /* Start with the directory prefix, if specified. */
1573   if (opt.dir_prefix)
1574     append_string (opt.dir_prefix, &fnres);
1575
1576   /* If "dirstruct" is turned on (typically the case with -r), add
1577      the host and port (unless those have been turned off) and
1578      directory structure.  */
1579   if (opt.dirstruct)
1580     {
1581       if (opt.add_hostdir)
1582         {
1583           if (fnres.tail)
1584             append_char ('/', &fnres);
1585           append_string (u->host, &fnres);
1586           if (u->port != scheme_default_port (u->scheme))
1587             {
1588               char portstr[24];
1589               number_to_string (portstr, u->port);
1590               append_char (FN_PORT_SEP, &fnres);
1591               append_string (portstr, &fnres);
1592             }
1593         }
1594
1595       append_dir_structure (u, &fnres);
1596     }
1597
1598   /* Add the file name. */
1599   if (fnres.tail)
1600     append_char ('/', &fnres);
1601   u_file = *u->file ? u->file : "index.html";
1602   append_uri_pathel (u_file, u_file + strlen (u_file), 0, &fnres);
1603
1604   /* Append "?query" to the file name. */
1605   u_query = u->query && *u->query ? u->query : NULL;
1606   if (u_query)
1607     {
1608       append_char (FN_QUERY_SEP, &fnres);
1609       append_uri_pathel (u_query, u_query + strlen (u_query), 1, &fnres);
1610     }
1611
1612   /* Zero-terminate the file name. */
1613   append_char ('\0', &fnres);
1614
1615   fname = fnres.base;
1616
1617   /* Check the cases in which the unique extensions are not used:
1618      1) Clobbering is turned off (-nc).
1619      2) Retrieval with regetting.
1620      3) Timestamping is used.
1621      4) Hierarchy is built.
1622
1623      The exception is the case when file does exist and is a
1624      directory (see `mkalldirs' for explanation).  */
1625
1626   if ((opt.noclobber || opt.always_rest || opt.timestamping || opt.dirstruct)
1627       && !(file_exists_p (fname) && !file_non_directory_p (fname)))
1628     return fname;
1629
1630   unique = unique_name (fname, 1);
1631   if (unique != fname)
1632     xfree (fname);
1633   return unique;
1634 }
1635 \f
1636 /* Resolve "." and ".." elements of PATH by destructively modifying
1637    PATH and return non-zero if PATH has been modified, zero otherwise.
1638
1639    The algorithm is in spirit similar to the one described in rfc1808,
1640    although implemented differently, in one pass.  To recap, path
1641    elements containing only "." are removed, and ".." is taken to mean
1642    "back up one element".  Single leading and trailing slashes are
1643    preserved.
1644
1645    This function does not handle URL escapes explicitly.  If you're
1646    passing paths from URLs, make sure to unquote "%2e" and "%2E" to
1647    ".", so that this function can find the dots.  (Wget's URL parser
1648    calls reencode_escapes, which see.)
1649
1650    For example, "a/b/c/./../d/.." will yield "a/b/".  More exhaustive
1651    test examples are provided below.  If you change anything in this
1652    function, run test_path_simplify to make sure you haven't broken a
1653    test case.  */
1654
1655 static int
1656 path_simplify (char *path)
1657 {
1658   char *h = path;               /* hare */
1659   char *t = path;               /* tortoise */
1660   char *beg = path;             /* boundary for backing the tortoise */
1661   char *end = path + strlen (path);
1662
1663   while (h < end)
1664     {
1665       /* Hare should be at the beginning of a path element. */
1666
1667       if (h[0] == '.' && (h[1] == '/' || h[1] == '\0'))
1668         {
1669           /* Ignore "./". */
1670           h += 2;
1671         }
1672       else if (h[0] == '.' && h[1] == '.' && (h[2] == '/' || h[2] == '\0'))
1673         {
1674           /* Handle "../" by retreating the tortoise by one path
1675              element -- but not past beggining.  */
1676           if (t > beg)
1677             {
1678               /* Move backwards until T hits the beginning of the
1679                  previous path element or the beginning of path. */
1680               for (--t; t > beg && t[-1] != '/'; t--)
1681                 ;
1682             }
1683           else
1684             {
1685               /* If we're at the beginning, copy the "../" literally
1686                  move the beginning so a later ".." doesn't remove
1687                  it.  */
1688               beg = t + 3;
1689               goto regular;
1690             }
1691           h += 3;
1692         }
1693       else
1694         {
1695         regular:
1696           /* A regular path element.  If H hasn't advanced past T,
1697              simply skip to the next path element.  Otherwise, copy
1698              the path element until the next slash.  */
1699           if (t == h)
1700             {
1701               /* Skip the path element, including the slash.  */
1702               while (h < end && *h != '/')
1703                 t++, h++;
1704               if (h < end)
1705                 t++, h++;
1706             }
1707           else
1708             {
1709               /* Copy the path element, including the final slash.  */
1710               while (h < end && *h != '/')
1711                 *t++ = *h++;
1712               if (h < end)
1713                 *t++ = *h++;
1714             }
1715         }
1716     }
1717
1718   if (t != h)
1719     *t = '\0';
1720
1721   return t != h;
1722 }
1723 \f
1724 /* Return the length of URL's path.  Path is considered to be
1725    terminated by one of '?', ';', '#', or by the end of the
1726    string.  */
1727
1728 static int
1729 path_length (const char *url)
1730 {
1731   const char *q = strpbrk_or_eos (url, "?;#");
1732   return q - url;
1733 }
1734
1735 /* Find the last occurrence of character C in the range [b, e), or
1736    NULL, if none are present.  We might want to use memrchr (a GNU
1737    extension) under GNU libc.  */
1738
1739 static const char *
1740 find_last_char (const char *b, const char *e, char c)
1741 {
1742   for (; e > b; e--)
1743     if (*e == c)
1744       return e;
1745   return NULL;
1746 }
1747
1748 /* Merge BASE with LINK and return the resulting URI.
1749
1750    Either of the URIs may be absolute or relative, complete with the
1751    host name, or path only.  This tries to reasonably handle all
1752    foreseeable cases.  It only employs minimal URL parsing, without
1753    knowledge of the specifics of schemes.
1754
1755    I briefly considered making this function call path_simplify after
1756    the merging process, as rfc1738 seems to suggest.  This is a bad
1757    idea for several reasons: 1) it complexifies the code, and 2)
1758    url_parse has to simplify path anyway, so it's wasteful to boot.  */
1759
1760 char *
1761 uri_merge (const char *base, const char *link)
1762 {
1763   int linklength;
1764   const char *end;
1765   char *merge;
1766
1767   if (url_has_scheme (link))
1768     return xstrdup (link);
1769
1770   /* We may not examine BASE past END. */
1771   end = base + path_length (base);
1772   linklength = strlen (link);
1773
1774   if (!*link)
1775     {
1776       /* Empty LINK points back to BASE, query string and all. */
1777       return xstrdup (base);
1778     }
1779   else if (*link == '?')
1780     {
1781       /* LINK points to the same location, but changes the query
1782          string.  Examples: */
1783       /* uri_merge("path",         "?new") -> "path?new"     */
1784       /* uri_merge("path?foo",     "?new") -> "path?new"     */
1785       /* uri_merge("path?foo#bar", "?new") -> "path?new"     */
1786       /* uri_merge("path#foo",     "?new") -> "path?new"     */
1787       int baselength = end - base;
1788       merge = xmalloc (baselength + linklength + 1);
1789       memcpy (merge, base, baselength);
1790       memcpy (merge + baselength, link, linklength);
1791       merge[baselength + linklength] = '\0';
1792     }
1793   else if (*link == '#')
1794     {
1795       /* uri_merge("path",         "#new") -> "path#new"     */
1796       /* uri_merge("path#foo",     "#new") -> "path#new"     */
1797       /* uri_merge("path?foo",     "#new") -> "path?foo#new" */
1798       /* uri_merge("path?foo#bar", "#new") -> "path?foo#new" */
1799       int baselength;
1800       const char *end1 = strchr (base, '#');
1801       if (!end1)
1802         end1 = base + strlen (base);
1803       baselength = end1 - base;
1804       merge = xmalloc (baselength + linklength + 1);
1805       memcpy (merge, base, baselength);
1806       memcpy (merge + baselength, link, linklength);
1807       merge[baselength + linklength] = '\0';
1808     }
1809   else if (*link == '/' && *(link + 1) == '/')
1810     {
1811       /* LINK begins with "//" and so is a net path: we need to
1812          replace everything after (and including) the double slash
1813          with LINK. */
1814
1815       /* uri_merge("foo", "//new/bar")            -> "//new/bar"      */
1816       /* uri_merge("//old/foo", "//new/bar")      -> "//new/bar"      */
1817       /* uri_merge("http://old/foo", "//new/bar") -> "http://new/bar" */
1818
1819       int span;
1820       const char *slash;
1821       const char *start_insert;
1822
1823       /* Look for first slash. */
1824       slash = memchr (base, '/', end - base);
1825       /* If found slash and it is a double slash, then replace
1826          from this point, else default to replacing from the
1827          beginning.  */
1828       if (slash && *(slash + 1) == '/')
1829         start_insert = slash;
1830       else
1831         start_insert = base;
1832
1833       span = start_insert - base;
1834       merge = (char *)xmalloc (span + linklength + 1);
1835       if (span)
1836         memcpy (merge, base, span);
1837       memcpy (merge + span, link, linklength);
1838       merge[span + linklength] = '\0';
1839     }
1840   else if (*link == '/')
1841     {
1842       /* LINK is an absolute path: we need to replace everything
1843          after (and including) the FIRST slash with LINK.
1844
1845          So, if BASE is "http://host/whatever/foo/bar", and LINK is
1846          "/qux/xyzzy", our result should be
1847          "http://host/qux/xyzzy".  */
1848       int span;
1849       const char *slash;
1850       const char *start_insert = NULL; /* for gcc to shut up. */
1851       const char *pos = base;
1852       int seen_slash_slash = 0;
1853       /* We're looking for the first slash, but want to ignore
1854          double slash. */
1855     again:
1856       slash = memchr (pos, '/', end - pos);
1857       if (slash && !seen_slash_slash)
1858         if (*(slash + 1) == '/')
1859           {
1860             pos = slash + 2;
1861             seen_slash_slash = 1;
1862             goto again;
1863           }
1864
1865       /* At this point, SLASH is the location of the first / after
1866          "//", or the first slash altogether.  START_INSERT is the
1867          pointer to the location where LINK will be inserted.  When
1868          examining the last two examples, keep in mind that LINK
1869          begins with '/'. */
1870
1871       if (!slash && !seen_slash_slash)
1872         /* example: "foo" */
1873         /*           ^    */
1874         start_insert = base;
1875       else if (!slash && seen_slash_slash)
1876         /* example: "http://foo" */
1877         /*                     ^ */
1878         start_insert = end;
1879       else if (slash && !seen_slash_slash)
1880         /* example: "foo/bar" */
1881         /*           ^        */
1882         start_insert = base;
1883       else if (slash && seen_slash_slash)
1884         /* example: "http://something/" */
1885         /*                           ^  */
1886         start_insert = slash;
1887
1888       span = start_insert - base;
1889       merge = (char *)xmalloc (span + linklength + 1);
1890       if (span)
1891         memcpy (merge, base, span);
1892       memcpy (merge + span, link, linklength);
1893       merge[span + linklength] = '\0';
1894     }
1895   else
1896     {
1897       /* LINK is a relative URL: we need to replace everything
1898          after last slash (possibly empty) with LINK.
1899
1900          So, if BASE is "whatever/foo/bar", and LINK is "qux/xyzzy",
1901          our result should be "whatever/foo/qux/xyzzy".  */
1902       int need_explicit_slash = 0;
1903       int span;
1904       const char *start_insert;
1905       const char *last_slash = find_last_char (base, end, '/');
1906       if (!last_slash)
1907         {
1908           /* No slash found at all.  Replace what we have with LINK. */
1909           start_insert = base;
1910         }
1911       else if (last_slash && last_slash >= base + 2
1912                && last_slash[-2] == ':' && last_slash[-1] == '/')
1913         {
1914           /* example: http://host"  */
1915           /*                      ^ */
1916           start_insert = end + 1;
1917           need_explicit_slash = 1;
1918         }
1919       else
1920         {
1921           /* example: "whatever/foo/bar" */
1922           /*                        ^    */
1923           start_insert = last_slash + 1;
1924         }
1925
1926       span = start_insert - base;
1927       merge = (char *)xmalloc (span + linklength + 1);
1928       if (span)
1929         memcpy (merge, base, span);
1930       if (need_explicit_slash)
1931         merge[span - 1] = '/';
1932       memcpy (merge + span, link, linklength);
1933       merge[span + linklength] = '\0';
1934     }
1935
1936   return merge;
1937 }
1938 \f
1939 #define APPEND(p, s) do {                       \
1940   int len = strlen (s);                         \
1941   memcpy (p, s, len);                           \
1942   p += len;                                     \
1943 } while (0)
1944
1945 /* Use this instead of password when the actual password is supposed
1946    to be hidden.  We intentionally use a generic string without giving
1947    away the number of characters in the password, like previous
1948    versions did.  */
1949 #define HIDDEN_PASSWORD "*password*"
1950
1951 /* Recreate the URL string from the data in URL.
1952
1953    If HIDE is non-zero (as it is when we're calling this on a URL we
1954    plan to print, but not when calling it to canonicalize a URL for
1955    use within the program), password will be hidden.  Unsafe
1956    characters in the URL will be quoted.  */
1957
1958 char *
1959 url_string (const struct url *url, int hide_password)
1960 {
1961   int size;
1962   char *result, *p;
1963   char *quoted_user = NULL, *quoted_passwd = NULL;
1964
1965   int scheme_port  = supported_schemes[url->scheme].default_port;
1966   char *scheme_str = supported_schemes[url->scheme].leading_string;
1967   int fplen = full_path_length (url);
1968
1969   int brackets_around_host = 0;
1970
1971   assert (scheme_str != NULL);
1972
1973   /* Make sure the user name and password are quoted. */
1974   if (url->user)
1975     {
1976       quoted_user = url_escape_allow_passthrough (url->user);
1977       if (url->passwd)
1978         {
1979           if (hide_password)
1980             quoted_passwd = HIDDEN_PASSWORD;
1981           else
1982             quoted_passwd = url_escape_allow_passthrough (url->passwd);
1983         }
1984     }
1985
1986   if (strchr (url->host, ':'))
1987     brackets_around_host = 1;
1988
1989   size = (strlen (scheme_str)
1990           + strlen (url->host)
1991           + (brackets_around_host ? 2 : 0)
1992           + fplen
1993           + 1);
1994   if (url->port != scheme_port)
1995     size += 1 + numdigit (url->port);
1996   if (quoted_user)
1997     {
1998       size += 1 + strlen (quoted_user);
1999       if (quoted_passwd)
2000         size += 1 + strlen (quoted_passwd);
2001     }
2002
2003   p = result = xmalloc (size);
2004
2005   APPEND (p, scheme_str);
2006   if (quoted_user)
2007     {
2008       APPEND (p, quoted_user);
2009       if (quoted_passwd)
2010         {
2011           *p++ = ':';
2012           APPEND (p, quoted_passwd);
2013         }
2014       *p++ = '@';
2015     }
2016
2017   if (brackets_around_host)
2018     *p++ = '[';
2019   APPEND (p, url->host);
2020   if (brackets_around_host)
2021     *p++ = ']';
2022   if (url->port != scheme_port)
2023     {
2024       *p++ = ':';
2025       p = number_to_string (p, url->port);
2026     }
2027
2028   full_path_write (url, p);
2029   p += fplen;
2030   *p++ = '\0';
2031
2032   assert (p - result == size);
2033
2034   if (quoted_user && quoted_user != url->user)
2035     xfree (quoted_user);
2036   if (quoted_passwd && !hide_password
2037       && quoted_passwd != url->passwd)
2038     xfree (quoted_passwd);
2039
2040   return result;
2041 }
2042 \f
2043 /* Return non-zero if scheme a is similar to scheme b.
2044
2045    Schemes are similar if they are equal.  If SSL is supported, schemes
2046    are also similar if one is http (SCHEME_HTTP) and the other is https
2047    (SCHEME_HTTPS).  */
2048 int
2049 schemes_are_similar_p (enum url_scheme a, enum url_scheme b)
2050 {
2051   if (a == b)
2052     return 1;
2053 #ifdef HAVE_SSL
2054   if ((a == SCHEME_HTTP && b == SCHEME_HTTPS)
2055       || (a == SCHEME_HTTPS && b == SCHEME_HTTP))
2056     return 1;
2057 #endif
2058   return 0;
2059 }
2060 \f
2061 #if 0
2062 /* Debugging and testing support for path_simplify. */
2063
2064 /* Debug: run path_simplify on PATH and return the result in a new
2065    string.  Useful for calling from the debugger.  */
2066 static char *
2067 ps (char *path)
2068 {
2069   char *copy = xstrdup (path);
2070   path_simplify (copy);
2071   return copy;
2072 }
2073
2074 static void
2075 run_test (char *test, char *expected_result, int expected_change)
2076 {
2077   char *test_copy = xstrdup (test);
2078   int modified = path_simplify (test_copy);
2079
2080   if (0 != strcmp (test_copy, expected_result))
2081     {
2082       printf ("Failed path_simplify(\"%s\"): expected \"%s\", got \"%s\".\n",
2083               test, expected_result, test_copy);
2084     }
2085   if (modified != expected_change)
2086     {
2087       if (expected_change == 1)
2088         printf ("Expected modification with path_simplify(\"%s\").\n",
2089                 test);
2090       else
2091         printf ("Expected no modification with path_simplify(\"%s\").\n",
2092                 test);
2093     }
2094   xfree (test_copy);
2095 }
2096
2097 static void
2098 test_path_simplify (void)
2099 {
2100   static struct {
2101     char *test, *result;
2102     int should_modify;
2103   } tests[] = {
2104     { "",                       "",             0 },
2105     { ".",                      "",             1 },
2106     { "./",                     "",             1 },
2107     { "..",                     "..",           0 },
2108     { "../",                    "../",          0 },
2109     { "foo",                    "foo",          0 },
2110     { "foo/bar",                "foo/bar",      0 },
2111     { "foo///bar",              "foo///bar",    0 },
2112     { "foo/.",                  "foo/",         1 },
2113     { "foo/./",                 "foo/",         1 },
2114     { "foo./",                  "foo./",        0 },
2115     { "foo/../bar",             "bar",          1 },
2116     { "foo/../bar/",            "bar/",         1 },
2117     { "foo/bar/..",             "foo/",         1 },
2118     { "foo/bar/../x",           "foo/x",        1 },
2119     { "foo/bar/../x/",          "foo/x/",       1 },
2120     { "foo/..",                 "",             1 },
2121     { "foo/../..",              "..",           1 },
2122     { "foo/../../..",           "../..",        1 },
2123     { "foo/../../bar/../../baz", "../../baz",   1 },
2124     { "a/b/../../c",            "c",            1 },
2125     { "./a/../b",               "b",            1 }
2126   };
2127   int i;
2128
2129   for (i = 0; i < countof (tests); i++)
2130     {
2131       char *test = tests[i].test;
2132       char *expected_result = tests[i].result;
2133       int   expected_change = tests[i].should_modify;
2134       run_test (test, expected_result, expected_change);
2135     }
2136 }
2137 #endif