#ifndef WINDOWS
# include <sys/socket.h>
- # include <netdb.h>
+ # ifdef __VMS
+ # include "vms_ip.h"
+ # else /* def __VMS */
+ # include <netdb.h>
+ # endif /* def __VMS [else] */
# include <netinet/in.h>
# ifndef __BEOS__
# include <arpa/inet.h>
#include "connect.h"
#include "hash.h"
+/* Apparently needed for Interix: */
+#ifdef HAVE_STDINT_H
+# include <stdint.h>
+#endif
+
/* Define sockaddr_storage where unavailable (presumably on IPv4-only
hosts). */
{
/* #### We should be able to print the error message here. */
logprintf (LOG_NOTQUIET,
- _("%s: unable to resolve bind address `%s'; disabling bind.\n"),
- exec_name, opt.bind_address);
+ _("%s: unable to resolve bind address %s; disabling bind.\n"),
+ exec_name, quote (opt.bind_address));
should_bind = false;
return false;
}
if (print)
{
const char *txt_addr = print_address (ip);
- if (print && 0 != strcmp (print, txt_addr))
- logprintf (LOG_VERBOSE, _("Connecting to %s|%s|:%d... "),
- escnonprint (print), txt_addr, port);
+ if (0 != strcmp (print, txt_addr))
+ {
+ char *str = NULL, *name;
+
+ if (opt.enable_iri && (name = idn_decode ((char *) print)) != NULL)
+ {
+ int len = strlen (print) + strlen (name) + 4;
+ str = xmalloc (len);
+ snprintf (str, len, "%s (%s)", name, print);
+ str[len-1] = '\0';
+ xfree (name);
+ }
+
+ logprintf (LOG_VERBOSE, _("Connecting to %s|%s|:%d... "),
+ str ? str : escnonprint_uri (print), txt_addr, port);
+
+ if (str)
+ xfree (str);
+ }
else
logprintf (LOG_VERBOSE, _("Connecting to %s:%d... "), txt_addr, port);
}
if (!al)
{
logprintf (LOG_NOTQUIET,
- _("%s: unable to resolve host address `%s'\n"),
- exec_name, host);
+ _("%s: unable to resolve host address %s\n"),
+ exec_name, quote (host));
return E_HOST;
}
#include "hash.h"
#include "ptimer.h"
#include "res.h"
+#include "html-url.h"
+#include "css-url.h"
static struct hash_table *dl_file_url_map;
struct hash_table *dl_url_file_map;
-/* Set of HTML files downloaded in this Wget run, used for link
+/* Set of HTML/CSS files downloaded in this Wget run, used for link
conversion after Wget is done. */
struct hash_table *downloaded_html_set;
+struct hash_table *downloaded_css_set;
static void convert_links (const char *, struct urlpos *);
-/* This function is called when the retrieval is done to convert the
- links that have been downloaded. It has to be called at the end of
- the retrieval, because only then does Wget know conclusively which
- URLs have been downloaded, and which not, so it can tell which
- direction to convert to.
-
- The "direction" means that the URLs to the files that have been
- downloaded get converted to the relative URL which will point to
- that file. And the other URLs get converted to the remote URL on
- the server.
-
- All the downloaded HTMLs are kept in downloaded_html_files, and
- downloaded URLs in urls_downloaded. All the information is
- extracted from these two lists. */
void
-convert_all_links (void)
+convert_links_in_hashtable (struct hash_table *downloaded_set,
+ int is_css,
+ int *file_count)
{
int i;
- double secs;
- int file_count = 0;
-
- struct ptimer *timer = ptimer_new ();
int cnt;
char **file_array;
cnt = 0;
- if (downloaded_html_set)
- cnt = hash_table_count (downloaded_html_set);
+ if (downloaded_set)
+ cnt = hash_table_count (downloaded_set);
if (cnt == 0)
- goto cleanup;
+ return;
file_array = alloca_array (char *, cnt);
- string_set_to_array (downloaded_html_set, file_array);
+ string_set_to_array (downloaded_set, file_array);
for (i = 0; i < cnt; i++)
{
char *url;
char *file = file_array[i];
- /* Determine the URL of the HTML file. get_urls_html will need
+ /* Determine the URL of the file. get_urls_{html,css} will need
it. */
url = hash_table_get (dl_file_url_map, file);
if (!url)
DEBUGP (("Scanning %s (from %s)\n", file, url));
- /* Parse the HTML file... */
- urls = get_urls_html (file, url, NULL);
+ /* Parse the file... */
+ urls = is_css ? get_urls_css_file (file, url) :
+ get_urls_html (file, url, NULL, NULL);
/* We don't respect meta_disallow_follow here because, even if
the file is not followed, we might still want to convert the
/* Convert the links in the file. */
convert_links (file, urls);
- ++file_count;
+ ++*file_count;
/* Free the data. */
free_urlpos (urls);
}
+}
+
+/* This function is called when the retrieval is done to convert the
+ links that have been downloaded. It has to be called at the end of
+ the retrieval, because only then does Wget know conclusively which
+ URLs have been downloaded, and which not, so it can tell which
+ direction to convert to.
+
+ The "direction" means that the URLs to the files that have been
+ downloaded get converted to the relative URL which will point to
+ that file. And the other URLs get converted to the remote URL on
+ the server.
+
+ All the downloaded HTMLs are kept in downloaded_html_files, and
+ downloaded URLs in urls_downloaded. All the information is
+ extracted from these two lists. */
+
+void
+convert_all_links (void)
+{
+ double secs;
+ int file_count = 0;
+
+ struct ptimer *timer = ptimer_new ();
+
+ convert_links_in_hashtable (downloaded_html_set, 0, &file_count);
+ convert_links_in_hashtable (downloaded_css_set, 1, &file_count);
secs = ptimer_measure (timer);
logprintf (LOG_VERBOSE, _("Converted %d files in %s seconds.\n"),
file_count, print_decimal (secs));
-cleanup:
+
ptimer_destroy (timer);
}
static void write_backup_file (const char *, downloaded_file_t);
+static const char *replace_plain (const char*, int, FILE*, const char *);
static const char *replace_attr (const char *, int, FILE *, const char *);
static const char *replace_attr_refresh_hack (const char *, int, FILE *,
const char *, int);
static char *local_quote_string (const char *);
static char *construct_relative (const char *, const char *);
-/* Change the links in one HTML file. LINKS is a list of links in the
+/* Change the links in one file. LINKS is a list of links in the
document, along with their positions and the desired direction of
the conversion. */
static void
zeroes from the mmaped region. */
if (unlink (file) < 0 && errno != ENOENT)
{
- logprintf (LOG_NOTQUIET, _("Unable to delete `%s': %s\n"),
- file, strerror (errno));
+ logprintf (LOG_NOTQUIET, _("Unable to delete %s: %s\n"),
+ quote (file), strerror (errno));
read_file_free (fm);
return;
}
char *newname = construct_relative (file, link->local_name);
char *quoted_newname = local_quote_string (newname);
- if (!link->link_refresh_p)
+ if (link->link_css_p)
+ p = replace_plain (p, link->size, fp, quoted_newname);
+ else if (!link->link_refresh_p)
p = replace_attr (p, link->size, fp, quoted_newname);
else
p = replace_attr_refresh_hack (p, link->size, fp, quoted_newname,
char *newlink = link->url->url;
char *quoted_newlink = html_quote_string (newlink);
- if (!link->link_refresh_p)
+ if (link->link_css_p)
+ p = replace_plain (p, link->size, fp, quoted_newlink);
+ else if (!link->link_refresh_p)
p = replace_attr (p, link->size, fp, quoted_newlink);
else
p = replace_attr_refresh_hack (p, link->size, fp, quoted_newlink,
/* Rather than just writing over the original .html file with the
converted version, save the former to *.orig. Note we only do
this for files we've _successfully_ downloaded, so we don't
- clobber .orig files sitting around from previous invocations. */
+ clobber .orig files sitting around from previous invocations.
+ On VMS, use "_orig" instead of ".orig". See "wget.h". */
/* Construct the backup filename as the original name plus ".orig". */
size_t filename_len = strlen (file);
char* filename_plus_orig_suffix;
+ /* TODO: hack this to work with css files */
if (downloaded_file_return == FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED)
{
/* Just write "orig" over "html". We need to do it this way
else /* downloaded_file_return == FILE_DOWNLOADED_NORMALLY */
{
/* Append ".orig" to the name. */
- filename_plus_orig_suffix = alloca (filename_len + sizeof (".orig"));
+ filename_plus_orig_suffix = alloca (filename_len + sizeof ("ORIG_SFX"));
strcpy (filename_plus_orig_suffix, file);
- strcpy (filename_plus_orig_suffix + filename_len, ".orig");
+ strcpy (filename_plus_orig_suffix + filename_len, "ORIG_SFX");
}
if (!converted_files)
static bool find_fragment (const char *, int, const char **, const char **);
+/* Replace a string with NEW_TEXT. Ignore quoting. */
+static const char *
+replace_plain (const char *p, int size, FILE *fp, const char *new_text)
+{
+ fputs (new_text, fp);
+ p += size;
+ return p;
+}
+
/* Replace an attribute's original text with NEW_TEXT. */
static const char *
string_set_add (downloaded_html_set, file);
}
+/* Register that FILE is a CSS file that has been downloaded. */
+
+void
+register_css (const char *url, const char *file)
+{
+ if (!downloaded_css_set)
+ downloaded_css_set = make_string_hash_table (0);
+ string_set_add (downloaded_css_set, file);
+}
+
static void downloaded_files_free (void);
/* Cleanup the data structures associated with this file. */
return FTPRERR;
/* Strip trailing CRLF before printing the line, so that
- escnonprint doesn't include bogus \012 and \015. */
+ quotting doesn't include bogus \012 and \015. */
p = strchr (line, '\0');
if (p > line && p[-1] == '\n')
*--p = '\0';
*--p = '\0';
if (opt.server_response)
- logprintf (LOG_NOTQUIET, "%s\n", escnonprint (line));
+ logprintf (LOG_NOTQUIET, "%s\n",
+ quotearg_style (escape_quoting_style, line));
else
- DEBUGP (("%s\n", escnonprint (line)));
+ DEBUGP (("%s\n", quotearg_style (escape_quoting_style, line)));
/* The last line of output is the one that begins with "ddd ". */
if (c_isdigit (line[0]) && c_isdigit (line[1]) && c_isdigit (line[2])
if (*p == '\r' || *p == '\n')
*p = ' ';
DEBUGP (("\nDetected newlines in %s \"%s\"; changing to %s \"%s\"\n",
- command, escnonprint (value), command, escnonprint (defanged)));
+ command, quotearg_style (escape_quoting_style, value),
+ command, quotearg_style (escape_quoting_style, defanged)));
/* Make VALUE point to the defanged copy of the string. */
value = defanged;
}
"331 s/key ",
"331 opiekey "
};
- int i;
+ size_t i;
const char *seed = NULL;
for (i = 0; i < countof (skey_head); i++)
return FTPOK;
}
+ /* Sends DELE command to the FTP server. */
+ uerr_t
+ ftp_dele (int csock, const char *file)
+ {
+ char *request, *respline;
+ int nwritten;
+ uerr_t err;
+
+ /* Send DELE request. */
+ request = ftp_request ("DELE", file);
+ nwritten = fd_write (csock, request, strlen (request), -1.0);
+ if (nwritten < 0)
+ {
+ xfree (request);
+ return WRITEFAILED;
+ }
+ xfree (request);
+ /* Get appropriate response. */
+ err = ftp_response (csock, &respline);
+ if (err != FTPOK)
+ return err; /* Return with early bad status. */
+
+ /* All OK, so far. */
+ if (*respline == '5')
+ {
+ err = FTPNSFOD; /* Permanent Negative Completion. */
+ }
+ else if (*respline != '2') /* Success might be 226 or 250 (or ???). */
+ {
+ err = FTPRERR; /* Not Positive Completion. */
+ }
+
+ xfree (respline); /* Free "respline" storage. */
+ return err; /* Return response-based status code. */
+ }
+
/* Sends REST command to the FTP server. */
uerr_t
ftp_rest (int csock, wgint offset)
/* Sends the LIST command to the server. If FILE is NULL, send just
`LIST' (no space). */
uerr_t
- ftp_list (int csock, const char *file)
+ ftp_list (int csock, const char *file, enum stype rs)
{
char *request, *respline;
int nwritten;
uerr_t err;
bool ok = false;
- int i = 0;
+ size_t i = 0;
/* Try `LIST -a' first and revert to `LIST' in case of failure. */
const char *list_commands[] = { "LIST -a",
"LIST" };
+ /* 2008-01-29 SMS. For a VMS FTP server, where "LIST -a" may not
+ fail, but will never do what is desired here, skip directly to the
+ simple "LIST" command (assumed to be the last one in the list).
+ */
+ if (rs == ST_VMS)
+ i = countof (list_commands)- 1;
+
do {
/* Send request. */
request = ftp_request (list_commands[i], file);
if (!len) return 0;
if (line[len - 1] == '\n')
line[--len] = '\0';
+ if (!len) return 0;
if (line[len - 1] == '\r')
line[--len] = '\0';
for ( ; *line ; line++ ) if (*line == '\t') *line = ' ';
return dir;
}
- /* Converts VMS symbolic permissions to number-style ones, e.g. string
- RWED,RWE,RE to 755. "D" (delete) is taken to be equal to "W"
- (write). Inspired by a patch of Stoyan Lekov <lekov@eda.bg>. */
- static int
- vmsperms (const char *s)
- {
- int perms = 0;
- do
+
+ /* Convert the VMS-style directory listing stored in "file" to a
+ linked list of fileinfo (system-independent) entries. The contents
+ of FILE are considered to be produced by the standard VMS
+ "DIRECTORY [/SIZE [= ALL]] /DATE [/OWNER] [/PROTECTION]" command,
+ more or less. (Different VMS FTP servers may have different headers,
+ and may not supply the same data, but all should be subsets of this.)
+
+ VMS normally provides local (server) time and date information.
+ Define the logical name or environment variable
+ "WGET_TIMEZONE_DIFFERENTIAL" (seconds) to adjust the receiving local
+ times if different from the remote local times.
+
+ 2005-02-23 SMS.
+ Added code to eliminate "^" escape characters from ODS5 extended file
+ names. The TCPIP FTP server (V5.4) seems to prefer requests which do
+ not use the escaped names which it provides.
+ */
+
+ #define VMS_DEFAULT_PROT_FILE 0644
+ #define VMS_DEFAULT_PROT_DIR 0755
+
+ /* 2005-02-23 SMS.
+ eat_carets().
+
+ Delete ODS5 extended file name escape characters ("^") in the
+ original buffer.
+ Note that the current scheme does not handle all EFN cases, but it
+ could be made more complicated.
+ */
+
+ static void eat_carets( char *str)
+ /* char *str; Source pointer. */
+ {
+ char *strd; /* Destination pointer. */
+ char hdgt;
+ unsigned char uchr;
+ unsigned char prop;
+
+ /* Skip ahead to the first "^", if any. */
+ while ((*str != '\0') && (*str != '^'))
+ str++;
+
+ /* If no caret was found, quit early. */
+ if (*str != '\0')
+ {
+ /* Shift characters leftward as carets are found. */
+ strd = str;
+ while (*str != '\0')
{
- switch (*s) {
- case ',': perms <<= 3; break;
- case 'R': perms |= 4; break;
- case 'W': perms |= 2; break;
- case 'D': perms |= 2; break;
- case 'E': perms |= 1; break;
- default: DEBUGP(("wrong VMS permissons!\n"));
+ uchr = *str;
+ if (uchr == '^')
+ {
+ /* Found a caret. Skip it, and check the next character. */
+ uchr = *(++str);
+ prop = char_prop[ uchr];
+ if (prop& 64)
+ {
+ /* Hex digit. Get char code from this and next hex digit. */
+ if (uchr <= '9')
+ {
+ hdgt = uchr- '0'; /* '0' - '9' -> 0 - 9. */
+ }
+ else
+ {
+ hdgt = ((uchr- 'A')& 7)+ 10; /* [Aa] - [Ff] -> 10 - 15. */
+ }
+ hdgt <<= 4; /* X16. */
+ uchr = *(++str); /* Next char must be hex digit. */
+ if (uchr <= '9')
+ {
+ uchr = hdgt+ uchr- '0';
+ }
+ else
+ {
+ uchr = hdgt+ ((uchr- 'A')& 15)+ 10;
+ }
+ }
+ else if (uchr == '_')
+ {
+ /* Convert escaped "_" to " ". */
+ uchr = ' ';
+ }
+ else if (uchr == '/')
+ {
+ /* Convert escaped "/" (invalid Zip) to "?" (invalid VMS). */
+ /* Note that this is a left-over from Info-ZIP code, and is
+ probably of little value here, except perhaps to avoid
+ directory confusion which an unconverted slash might cause.
+ */
+ uchr = '?';
+ }
+ /* Else, not a hex digit. Must be a simple escaped character
+ (or Unicode, which is not yet handled here).
+ */
}
+ /* Else, not a caret. Use as-is. */
+ *strd = uchr;
+
+ /* Advance destination and source pointers. */
+ strd++;
+ str++;
}
- while (*++s);
- return perms;
+ /* Terminate the destination string. */
+ *strd = '\0';
+ }
}
ftp_parse_vms_ls (const char *file)
{
FILE *fp;
- /* #### A third copy of more-or-less the same array ? */
- static const char *months[] = {
- "JAN", "FEB", "MAR", "APR", "MAY", "JUN",
- "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"
- };
- int i;
- int year, month, day; /* for time analysis */
- int hour, min, sec;
+ int dt, i, j, len;
+ int perms;
+ time_t timenow;
struct tm timestruct;
+ char date_str[ 32];
- char *line, *tok; /* tokenizer */
+ char *line, *tok; /* tokenizer */
struct fileinfo *dir, *l, cur; /* list creation */
- fp = fopen (file, "rb");
+ fp = fopen (file, "r");
if (!fp)
{
logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
}
dir = l = NULL;
- /* Skip empty line. */
- line = read_whole_line (fp);
- xfree_null (line);
+ /* Skip blank lines, Directory heading, and more blank lines. */
- /* Skip "Directory PUB$DEVICE[PUB]" */
- line = read_whole_line (fp);
- xfree_null (line);
+ j = 0; /* Expecting initial blank line(s). */
+ while (1)
+ {
+ line = read_whole_line (fp);
+ if (line == NULL)
+ {
+ break;
+ }
+ else
+ {
+ i = clean_line (line);
+ if (i <= 0)
+ {
+ xfree (line); /* Free useless line storage. */
+ continue; /* Blank line. Keep looking. */
+ }
+ else
+ {
+ if ((j == 0) && (line[ i- 1] == ']'))
+ {
+ /* Found Directory heading line. Next non-blank line
+ is significant.
+ */
+ j = 1;
+ }
+ else if (!strncmp (line, "Total of ", 9))
+ {
+ /* Found "Total of ..." footing line. No valid data
+ will follow (empty directory).
+ */
+ xfree (line); /* Free useless line storage. */
+ line = NULL; /* Arrange for early exit. */
+ break;
+ }
+ else
+ {
+ break; /* Must be significant data. */
+ }
+ }
+ xfree (line); /* Free useless line storage. */
+ }
+ }
- /* Skip empty line. */
- line = read_whole_line (fp);
- xfree_null (line);
+ /* Read remainder of file until the next blank line or EOF. */
- /* Line loop to end of file: */
- while ((line = read_whole_line (fp)) != NULL)
+ while (line != NULL)
{
char *p;
- i = clean_line (line);
- if (!i)
- {
- xfree (line);
- break;
- }
- /* First column: Name. A bit of black magic again. The name my be
- either ABCD.EXT or ABCD.EXT;NUM and it might be on a separate
- line. Therefore we will first try to get the complete name
- until the first space character; if it fails, we assume that the name
- occupies the whole line. After that we search for the version
- separator ";", we remove it and check the extension of the file;
- extension .DIR denotes directory. */
+ /* The first token is the file name. After a long name, other
+ data may be on the following line. A valid directory name ends
+ in ".DIR;1" (any case), although some VMS FTP servers may omit
+ the version number (";1").
+ */
tok = strtok(line, " ");
if (tok == NULL) tok = line;
- DEBUGP(("file name: '%s'\n", tok));
- for (p = tok ; *p && *p != ';' ; p++)
- ;
- if (*p == ';') *p = '\0';
- p = tok + strlen(tok) - 4;
- if (!strcmp(p, ".DIR")) *p = '\0';
- cur.name = xstrdup(tok);
- DEBUGP(("Name: '%s'\n", cur.name));
-
- /* If the name ends on .DIR or .DIR;#, it's a directory. We also set
- the file size to zero as the listing does tell us only the size in
- filesystem blocks - for an integrity check (when mirroring, for
- example) we would need the size in bytes. */
-
- if (! *p)
+ DEBUGP(("file name: '%s'\n", tok));
+
+ /* Stripping the version number on a VMS system would be wrong.
+ It may be foolish on a non-VMS system, too, but that's someone
+ else's problem. (Define PRESERVE_VMS_VERSIONS for proper
+ operation on other operating systems.)
+
+ 2005-02-23 SMS.
+ ODS5 extended file names may contain escaped semi-colons, so
+ the version number is identified as right-side decimal digits
+ led by a non-escaped semi-colon. It may be absent.
+ */
+
+ #if (!defined( __VMS) && !defined( PRESERVE_VMS_VERSIONS))
+ for (p = tok+ strlen( tok); (--p > tok) && c_isdigit( *p); );
+ if ((*p == ';') && (*(p- 1) != '^'))
+ {
+ *p = '\0';
+ }
+ #endif /* (!defined( __VMS) && !defined( PRESERVE_VMS_VERSIONS)) */
+
+ /* 2005-02-23 SMS.
+ Eliminate "^" escape characters from ODS5 extended file name.
+ (A caret is invalid in an ODS2 name, so this is always safe.)
+ */
+ eat_carets( tok);
+ DEBUGP(("file name-^: '%s'\n", tok));
+
+ /* Differentiate between a directory and any other file. A VMS
+ listing may not include file protections (permissions). Set a
+ default permissions value (according to the file type), which
+ may be overwritten later. Store directory names without the
+ ".DIR;1" file type and version number, as the plain name is
+ what will work in a CWD command.
+ */
+ len = strlen( tok);
+ if (!strncasecmp( (tok+ (len- 4)), ".DIR", 4))
{
+ *(tok+ (len -= 4)) = '\0'; /* Discard ".DIR". */
cur.type = FT_DIRECTORY;
- cur.size = 0;
- DEBUGP(("Directory\n"));
+ cur.perms = VMS_DEFAULT_PROT_DIR;
+ DEBUGP(("Directory (nv)\n"));
+ }
+ else if (!strncasecmp( (tok+ (len- 6)), ".DIR;1", 6))
+ {
+ *(tok+ (len -= 6)) = '\0'; /* Discard ".DIR;1". */
+ cur.type = FT_DIRECTORY;
+ cur.perms = VMS_DEFAULT_PROT_DIR;
+ DEBUGP(("Directory (v)\n"));
}
else
{
cur.type = FT_PLAINFILE;
+ cur.perms = VMS_DEFAULT_PROT_FILE;
DEBUGP(("File\n"));
}
+ cur.name = xstrdup(tok);
+ DEBUGP(("Name: '%s'\n", cur.name));
+
+ /* Null the date and time string. */
+ *date_str = '\0';
+ /* VMS lacks symbolic links. */
+ cur.linkto = NULL;
+
+ /* VMS reports file sizes in (512-byte) disk blocks, not bytes,
+ hence useless for an integrity check based on byte-count.
+ Set size to unknown.
+ */
cur.size = 0;
- /* Second column, if exists, or the first column of the next line
- contain file size in blocks. We will skip it. */
+ /* Get token 2, if any. A long name may force all other data onto
+ a second line. If needed, read the second line.
+ */
tok = strtok(NULL, " ");
if (tok == NULL)
- {
- DEBUGP(("Getting additional line\n"));
- xfree (line);
- line = read_whole_line (fp);
- if (!line)
- {
- DEBUGP(("empty line read, leaving listing parser\n"));
- break;
- }
- i = clean_line (line);
- if (!i)
{
- DEBUGP(("confusing VMS listing item, leaving listing parser\n"));
+ DEBUGP(("Getting additional line.\n"));
xfree (line);
- break;
- }
- tok = strtok(line, " ");
- }
- DEBUGP(("second token: '%s'\n", tok));
-
- /* Third/Second column: Date DD-MMM-YYYY. */
-
- tok = strtok(NULL, "-");
- if (tok == NULL) continue;
- DEBUGP(("day: '%s'\n",tok));
- day = atoi(tok);
- tok = strtok(NULL, "-");
- if (!tok)
- {
- /* If the server produces garbage like
- 'EA95_0PS.GZ;1 No privilege for attempted operation'
- the first strtok(NULL, "-") will return everything until the end
- of the line and only the next strtok() call will return NULL. */
- DEBUGP(("nonsense in VMS listing, skipping this line\n"));
- xfree (line);
- break;
- }
- for (i=0; i<12; i++) if (!strcmp(tok,months[i])) break;
- /* Uknown months are mapped to January */
- month = i % 12 ;
- tok = strtok (NULL, " ");
- if (tok == NULL) continue;
- year = atoi (tok) - 1900;
- DEBUGP(("date parsed\n"));
-
- /* Fourth/Third column: Time hh:mm[:ss] */
- tok = strtok (NULL, " ");
- if (tok == NULL) continue;
- min = sec = 0;
- p = tok;
- hour = atoi (p);
- for (; *p && *p != ':'; ++p)
- ;
- if (*p)
- min = atoi (++p);
- for (; *p && *p != ':'; ++p)
- ;
- if (*p)
- sec = atoi (++p);
-
- DEBUGP(("YYYY/MM/DD HH:MM:SS - %d/%02d/%02d %02d:%02d:%02d\n",
- year+1900, month, day, hour, min, sec));
-
- /* Build the time-stamp (copy & paste from above) */
- timestruct.tm_sec = sec;
- timestruct.tm_min = min;
- timestruct.tm_hour = hour;
- timestruct.tm_mday = day;
- timestruct.tm_mon = month;
- timestruct.tm_year = year;
- timestruct.tm_wday = 0;
- timestruct.tm_yday = 0;
- timestruct.tm_isdst = -1;
- cur.tstamp = mktime (×truct); /* store the time-stamp */
-
- DEBUGP(("Timestamp: %ld\n", cur.tstamp));
-
- /* Skip the fifth column */
-
- tok = strtok(NULL, " ");
- if (tok == NULL) continue;
+ line = read_whole_line (fp);
+ if (!line)
+ {
+ DEBUGP(("EOF. Leaving listing parser.\n"));
+ break;
+ }
- /* Sixth column: Permissions */
+ /* Second line must begin with " ". Otherwise, it's a first
+ line (and we may be confused).
+ */
+ if (i <= 0)
+ {
+ /* Blank line. End of significant file listing. */
+ DEBUGP(("Blank line. Leaving listing parser.\n"));
+ xfree (line); /* Free useless line storage. */
+ break;
+ }
+ else if (line[ 0] != ' ')
+ {
+ DEBUGP(("Non-blank in column 1. Must be a new file name?\n"));
+ continue;
+ }
+ else
+ {
+ tok = strtok (line, " ");
+ if (tok == NULL)
+ {
+ /* Unexpected non-empty but apparently blank line. */
+ DEBUGP(("Null token. Leaving listing parser.\n"));
+ xfree (line); /* Free useless line storage. */
+ break;
+ }
+ }
+ }
- tok = strtok(NULL, ","); /* Skip the VMS-specific SYSTEM permissons */
- if (tok == NULL) continue;
- tok = strtok(NULL, ")");
- if (tok == NULL)
+ /* Analyze tokens. (Order is not significant, except date must
+ precede time.)
+
+ Size: ddd or ddd/ddd (where "ddd" is a decimal number)
+ Date: DD-MMM-YYYY
+ Time: HH:MM or HH:MM:SS or HH:MM:SS.CC
+ Owner: [user] or [user,group]
+ Protection: (ppp,ppp,ppp,ppp) (where "ppp" is "RWED" or some
+ subset thereof, for System, Owner, Group, World.
+
+ If permission is lacking, info may be replaced by the string:
+ "No privilege for attempted operation".
+ */
+ while (tok != NULL)
+ {
+ DEBUGP (("Token: >%s<: ", tok));
+
+ if ((strlen( tok) < 12) && (strchr( tok, '-') != NULL))
+ {
+ /* Date. */
+ DEBUGP (("Date.\n"));
+ strcpy( date_str, tok);
+ strcat( date_str, " ");
+ }
+ else if ((strlen( tok) < 12) && (strchr( tok, ':') != NULL))
+ {
+ /* Time. */
+ DEBUGP (("Time. "));
+ strncat( date_str,
+ tok,
+ (sizeof( date_str)- strlen( date_str)- 1));
+ DEBUGP (("Date time: >%s<\n", date_str));
+ }
+ else if (strchr( tok, '[') != NULL)
+ {
+ /* Owner. (Ignore.) */
+ DEBUGP (("Owner.\n"));
+ }
+ else if (strchr( tok, '(') != NULL)
+ {
+ /* Protections (permissions). */
+ perms = 0;
+ j = 0;
+ for (i = 0; i < strlen( tok); i++)
+ {
+ switch (tok[ i])
+ {
+ case '(':
+ break;
+ case ')':
+ break;
+ case ',':
+ if (j == 0)
+ {
+ perms = 0;
+ j = 1;
+ }
+ else
+ {
+ perms <<= 3;
+ }
+ break;
+ case 'R':
+ perms |= 4;
+ break;
+ case 'W':
+ perms |= 2;
+ break;
+ case 'E':
+ perms |= 1;
+ break;
+ case 'D':
+ perms |= 2;
+ break;
+ }
+ }
+ cur.perms = perms;
+ DEBUGP (("Prot. perms = %0o.\n", cur.perms));
+ }
+ else
+ {
+ /* Nondescript. Probably size(s), probably in blocks.
+ Could be "No privilege ..." message. (Ignore.)
+ */
+ DEBUGP (("Ignored (size?).\n"));
+ }
+
+ tok = strtok (NULL, " ");
+ }
+
+ /* Tokens exhausted. Interpret the data, and fill in the
+ structure.
+ */
+ /* Fill tm timestruct according to date-time string. Fractional
+ seconds are ignored. Default to current time, if conversion
+ fails.
+ */
+ timenow = time( NULL);
+ localtime_r( &timenow, ×truct);
+ strptime( date_str, "%d-%b-%Y %H:%M:%S", ×truct);
+
+ /* Convert struct tm local time to time_t local time. */
+ timenow = mktime (×truct);
+ /* Offset local time according to environment variable (seconds). */
+ if ((tok = getenv( "WGET_TIMEZONE_DIFFERENTIAL")) != NULL)
{
- DEBUGP(("confusing VMS permissions, skipping line\n"));
- xfree (line);
- continue;
+ dt = atoi( tok);
+ DEBUGP (("Time differential = %d.\n", dt));
+ }
+ else
+ {
+ dt = 0;
}
- /* Permissons have the format "RWED,RWED,RE" */
- cur.perms = vmsperms(tok);
- DEBUGP(("permissions: %s -> 0%o\n", tok, cur.perms));
- cur.linkto = NULL;
+ if (dt >= 0)
+ {
+ timenow += dt;
+ }
+ else
+ {
+ timenow -= (-dt);
+ }
+ cur.tstamp = timenow; /* Store the time-stamp. */
+ DEBUGP(("Timestamp: %ld\n", cur.tstamp));
- /* And put everything into the linked list */
+ /* Add the data for this item to the linked list, */
if (!dir)
{
- l = dir = xnew (struct fileinfo);
+ l = dir = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
memcpy (l, &cur, sizeof (cur));
l->prev = l->next = NULL;
}
else
{
cur.prev = l;
- l->next = xnew (struct fileinfo);
+ l->next = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
l = l->next;
memcpy (l, &cur, sizeof (cur));
l->next = NULL;
}
+ /* Free old line storage. Read a new line. */
xfree (line);
+ line = read_whole_line (fp);
+ if (line != NULL)
+ {
+ i = clean_line (line);
+ if (i <= 0)
+ {
+ /* Blank line. End of significant file listing. */
+ xfree (line); /* Free useless line storage. */
+ break;
+ }
+ }
}
fclose (fp);
{
FILE *fp;
char *upwd;
+ char *htcldir; /* HTML-clean dir name */
char *htclfile; /* HTML-clean file name */
+ char *urlclfile; /* URL-clean file name */
if (!output_stream)
{
}
else
upwd = xstrdup ("");
+
+ htcldir = html_quote_string (u->dir);
+
fprintf (fp, "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">\n");
fprintf (fp, "<html>\n<head>\n<title>");
- fprintf (fp, _("Index of /%s on %s:%d"), u->dir, u->host, u->port);
+ fprintf (fp, _("Index of /%s on %s:%d"), htcldir, u->host, u->port);
fprintf (fp, "</title>\n</head>\n<body>\n<h1>");
- fprintf (fp, _("Index of /%s on %s:%d"), u->dir, u->host, u->port);
+ fprintf (fp, _("Index of /%s on %s:%d"), htcldir, u->host, u->port);
fprintf (fp, "</h1>\n<hr>\n<pre>\n");
+
while (f)
{
fprintf (fp, " ");
"Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
};
- struct tm *ptm = localtime ((time_t *)&f->tstamp);
+ time_t tstamp = f->tstamp;
+ struct tm *ptm = localtime (&tstamp);
fprintf (fp, "%d %s %02d ", ptm->tm_year + 1900, months[ptm->tm_mon],
ptm->tm_mday);
break;
}
htclfile = html_quote_string (f->name);
+ urlclfile = url_escape_unsafe_and_reserved (f->name);
fprintf (fp, "<a href=\"ftp://%s%s:%d", upwd, u->host, u->port);
if (*u->dir != '/')
putc ('/', fp);
- fprintf (fp, "%s", u->dir);
+ /* XXX: Should probably URL-escape dir components here, rather
+ * than just HTML-escape, for consistency with the next bit where
+ * we use urlclfile for the file component. Anyway, this is safer
+ * than what we had... */
+ fprintf (fp, "%s", htcldir);
if (*u->dir)
putc ('/', fp);
- fprintf (fp, "%s", htclfile);
+ fprintf (fp, "%s", urlclfile);
if (f->type == FT_DIRECTORY)
putc ('/', fp);
fprintf (fp, "\">%s", htclfile);
fprintf (fp, "-> %s", f->linkto ? f->linkto : "(nil)");
putc ('\n', fp);
xfree (htclfile);
+ xfree (urlclfile);
f = f->next;
}
fprintf (fp, "</pre>\n</body>\n</html>\n");
+ xfree (htcldir);
xfree (upwd);
if (!output_stream)
fclose (fp);
/* File Transfer Protocol support.
Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
- 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+ 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
This file is part of GNU Wget.
#include "convert.h" /* for downloaded_file */
#include "recur.h" /* for INFINITE_RECURSION */
+ #ifdef __VMS
+ # include "vms.h"
+ #endif /* def __VMS */
+
+
/* File where the "ls -al" listing will be saved. */
#ifdef MSDOS
#define LIST_FILENAME "_listing"
struct url *proxy; /* FTWK-style proxy */
} ccon;
+extern int numurls;
/* Look for regexp "( *[0-9]+ *byte" (literal parenthesis) anywhere in
the string S, and return the number converted to wgint, if found, 0
logprintf (LOG_VERBOSE, " (%s)", human_readable (size));
if (start > 0)
{
- if (start >= 1024)
+ if (size - start >= 1024)
logprintf (LOG_VERBOSE, _(", %s (%s) remaining"),
number_to_static_string (size - start),
human_readable (size - start));
logputs (LOG_VERBOSE, !authoritative ? _(" (unauthoritative)\n") : "\n");
}
+static uerr_t ftp_get_listing (struct url *, ccon *, struct fileinfo **);
+
/* Retrieves a file with denoted parameters through opening an FTP
connection to the server. It always closes the data connection,
and closes the control connection in case of error. */
bool rest_failed = false;
int flags;
wgint rd_size;
+ char type_char;
assert (con != NULL);
assert (con->target != NULL);
csock = con->csock;
else /* cmd & DO_LOGIN */
{
- char type_char;
char *host = con->proxy ? con->proxy->host : u->host;
int port = con->proxy ? con->proxy->port : u->port;
char *logname = user;
con->csock = -1;
/* Second: Login with proper USER/PASS sequence. */
- logprintf (LOG_VERBOSE, _("Logging in as %s ... "), escnonprint (user));
+ logprintf (LOG_VERBOSE, _("Logging in as %s ... "),
+ quotearg_style (escape_quoting_style, user));
if (opt.server_response)
logputs (LOG_ALWAYS, "\n");
err = ftp_login (csock, logname, passwd);
default:
abort ();
}
+
+ #if 0
+ /* 2004-09-17 SMS.
+ Don't help me out. Please.
+ A reasonably recent VMS FTP server will cope just fine with
+ UNIX file specifications. This code just spoils things.
+ Discarding the device name, for example, is not a wise move.
+ This code was disabled but left in as an example of what not
+ to do.
+ */
+
/* VMS will report something like "PUB$DEVICE:[INITIAL.FOLDER]".
Convert it to "/INITIAL/FOLDER" */
if (con->rs == ST_VMS)
DEBUGP ((" new = '%s'\n\n", con->id));
}
}
+ #endif /* 0 */
+
if (!opt.server_response)
logputs (LOG_VERBOSE, _("done.\n"));
logputs (LOG_VERBOSE, _("==> CWD not needed.\n"));
else
{
+ char *targ;
+ int cwd_count;
+ int cwd_end;
+ int cwd_start;
+
char *target = u->dir;
DEBUGP (("changing working directory\n"));
in "bar", not in "foo/bar", as would be customary
elsewhere. */
+ /* 2004-09-20 SMS.
+ Why is this wise even on UNIX? It certainly fouls VMS.
+ See below for a more reliable, more universal method.
+ */
+
+ /* 2008-04-22 MJC.
+ I'm not crazy about it either. I'm informed it's useful
+ for misconfigured servers that have some dirs in the path
+ with +x but -r, but this method is not RFC-conformant. I
+ understand the need to deal with crappy server
+ configurations, but it's far better to use the canonical
+ method first, and fall back to kludges second.
+ */
+
if (target[0] != '/'
&& !(con->rs != ST_UNIX
&& c_isalpha (target[0])
&& target[1] == ':')
- && con->rs != ST_OS400)
+ && (con->rs != ST_OS400)
+ && (con->rs != ST_VMS))
{
int idlen = strlen (con->id);
char *ntarget, *p;
target = ntarget;
}
+ #if 0
+ /* 2004-09-17 SMS.
+ Don't help me out. Please.
+ A reasonably recent VMS FTP server will cope just fine with
+ UNIX file specifications. This code just spoils things.
+ Discarding the device name, for example, is not a wise
+ move.
+ This code was disabled but left in as an example of what
+ not to do.
+ */
+
/* If the FTP host runs VMS, we will have to convert the absolute
directory path in UNIX notation to absolute directory path in
VMS notation as VMS FTP servers do not like UNIX notation of
DEBUGP ((" Unix: '%s'\n VMS: '%s'\n", target, ntarget));
target = ntarget;
}
+ #endif /* 0 */
+
+ /* 2004-09-20 SMS.
+ A relative directory is relative to the initial directory.
+ Thus, what _is_ useful on VMS (and probably elsewhere) is
+ to CWD to the initial directory (ideally, whatever the
+ server reports, _exactly_, NOT badly UNIX-ixed), and then
+ CWD to the (new) relative directory. This should probably
+ be restructured as a function, called once or twice, but
+ I'm lazy enough to take the badly indented loop short-cut
+ for now.
+ */
+
+ /* Decide on one pass (absolute) or two (relative).
+ The VMS restriction may be relaxed when the squirrely code
+ above is reformed.
+ */
+ if ((con->rs == ST_VMS) && (target[0] != '/'))
+ {
+ cwd_start = 0;
+ DEBUGP (("Using two-step CWD for relative path.\n"));
+ }
+ else
+ {
+ /* Go straight to the target. */
+ cwd_start = 1;
+ }
+
+ /* At least one VMS FTP server (TCPware V5.6-2) can switch to
+ a UNIX emulation mode when given a UNIX-like directory
+ specification (like "a/b/c"). If allowed to continue this
+ way, LIST interpretation will be confused, because the
+ system type (SYST response) will not be re-checked, and
+ future UNIX-format directory listings (for multiple URLs or
+ "-r") will be horribly misinterpreted.
+
+ The cheap and nasty work-around is to do a "CWD []" after a
+ UNIX-like directory specification is used. (A single-level
+ directory is harmless.) This puts the TCPware server back
+ into VMS mode, and does no harm on other servers.
+
+ Unlike the rest of this block, this particular behavior
+ _is_ VMS-specific, so it gets its own VMS test.
+ */
+ if ((con->rs == ST_VMS) && (strchr( target, '/') != NULL))
+ {
+ cwd_end = 3;
+ DEBUGP (("Using extra \"CWD []\" step for VMS server.\n"));
+ }
+ else
+ {
+ cwd_end = 2;
+ }
+
+ /* 2004-09-20 SMS. */
+ /* Sorry about the deviant indenting. Laziness. */
+
+ for (cwd_count = cwd_start; cwd_count < cwd_end; cwd_count++)
+ {
+ switch (cwd_count)
+ {
+ case 0:
+ /* Step one (optional): Go to the initial directory,
+ exactly as reported by the server.
+ */
+ targ = con->id;
+ break;
+
+ case 1:
+ /* Step two: Go to the target directory. (Absolute or
+ relative will work now.)
+ */
+ targ = target;
+ break;
+
+ case 2:
+ /* Step three (optional): "CWD []" to restore server
+ VMS-ness.
+ */
+ targ = "[]";
+ break;
+
+ default:
+ /* Can't happen. */
+ assert (1);
+ }
if (!opt.server_response)
- logprintf (LOG_VERBOSE, "==> CWD %s ... ",
- logprintf (LOG_VERBOSE, "==> CWD (%d) %s ... ",
- cwd_count, escnonprint (target));
- err = ftp_cwd (csock, targ);
++ logprintf (LOG_VERBOSE, "==> CWD (%d) %s ... ", cwd_count,
+ quotearg_style (escape_quoting_style, target));
+ err = ftp_cwd (csock, target);
/* FTPRERR, WRITEFAILED, FTPNSFOD */
switch (err)
{
return err;
case FTPNSFOD:
logputs (LOG_VERBOSE, "\n");
- logprintf (LOG_NOTQUIET, _("No such directory `%s'.\n\n"),
- escnonprint (u->dir));
+ logprintf (LOG_NOTQUIET, _("No such directory %s.\n\n"),
+ quote (u->dir));
fd_close (csock);
con->csock = -1;
return err;
}
if (!opt.server_response)
logputs (LOG_VERBOSE, _("done.\n"));
- }
+
+ } /* for */
+
+ /* 2004-09-20 SMS. */
+ /* End of deviant indenting. */
+
+ } /* else */
}
else /* do not CWD */
logputs (LOG_VERBOSE, _("==> CWD not required.\n"));
if (opt.verbose)
{
if (!opt.server_response)
- logprintf (LOG_VERBOSE, "==> SIZE %s ... ", escnonprint (u->file));
+ logprintf (LOG_VERBOSE, "==> SIZE %s ... ",
+ quotearg_style (escape_quoting_style, u->file));
}
err = ftp_size (csock, u->file, len);
if (cmd & DO_RETR)
{
- /* If we're in spider mode, don't really retrieve anything. The
- fact that we got to this point should be proof enough that
- the file exists, vaguely akin to HTTP's concept of a "HEAD"
- request. */
+ /* If we're in spider mode, don't really retrieve anything except
+ the directory listing and verify whether the given "file" exists. */
if (opt.spider)
{
+ bool exists = false;
+ uerr_t res;
+ struct fileinfo *f;
+ res = ftp_get_listing (u, con, &f);
+ /* Set the DO_RETR command flag again, because it gets unset when
+ calling ftp_get_listing() and would otherwise cause an assertion
+ failure earlier on when this function gets repeatedly called
+ (e.g., when recursing). */
+ con->cmd |= DO_RETR;
+ if (res == RETROK)
+ {
+ while (f)
+ {
+ if (!strcmp (f->name, u->file))
+ {
+ exists = true;
+ break;
+ }
+ f = f->next;
+ }
+ if (exists)
+ {
+ logputs (LOG_VERBOSE, "\n");
+ logprintf (LOG_NOTQUIET, _("File %s exists.\n"),
+ quote (u->file));
+ }
+ else
+ {
+ logputs (LOG_VERBOSE, "\n");
+ logprintf (LOG_NOTQUIET, _("No such file %s.\n"),
+ quote (u->file));
+ }
+ }
fd_close (csock);
con->csock = -1;
fd_close (dtsock);
{
if (restval)
logputs (LOG_VERBOSE, "\n");
- logprintf (LOG_VERBOSE, "==> RETR %s ... ", escnonprint (u->file));
+ logprintf (LOG_VERBOSE, "==> RETR %s ... ",
+ quotearg_style (escape_quoting_style, u->file));
}
}
return err;
case FTPNSFOD:
logputs (LOG_VERBOSE, "\n");
- logprintf (LOG_NOTQUIET, _("No such file `%s'.\n\n"),
- escnonprint (u->file));
+ logprintf (LOG_NOTQUIET, _("No such file %s.\n\n"),
+ quote (u->file));
fd_close (dtsock);
fd_close (local_sock);
return err;
case FTPOK:
+ if (getenv( "FTP_DELETE") != NULL)
+ {
+ err = ftp_dele (csock, u->file);
+ }
break;
default:
abort ();
/* As Maciej W. Rozycki (macro@ds2.pg.gda.pl) says, `LIST'
without arguments is better than `LIST .'; confirmed by
RFC959. */
- err = ftp_list (csock, NULL);
+ err = ftp_list (csock, NULL, con->rs);
/* FTPRERR, WRITEFAILED */
switch (err)
{
return err;
case FTPNSFOD:
logputs (LOG_VERBOSE, "\n");
- logprintf (LOG_NOTQUIET, _("No such file or directory `%s'.\n\n"),
- ".");
+ logprintf (LOG_NOTQUIET, _("No such file or directory %s.\n\n"),
+ quote ("."));
fd_close (dtsock);
fd_close (local_sock);
return err;
if (dtsock < 0)
{
logprintf (LOG_NOTQUIET, "accept: %s\n", strerror (errno));
- return err;
+ return CONERROR;
}
}
/* Open the file -- if output_stream is set, use it instead. */
+
+ /* 2005-04-17 SMS.
+ Note that having the output_stream ("-O") file opened in main()
+ (main.c) rather limits the ability in VMS to open the file
+ differently for ASCII versus binary FTP here. (Of course, doing it
+ there allows a open failure to be detected immediately, without first
+ connecting to the server.)
+ */
if (!output_stream || con->cmd & DO_LIST)
{
+ /* On VMS, alter the name as required. */
+ #ifdef __VMS
+ char *targ;
+
+ targ = ods_conform( con->target);
+ if (targ != con->target)
+ {
+ xfree( con->target);
+ con->target = targ;
+ }
+ #endif /* def __VMS */
+
mkalldirs (con->target);
if (opt.backups)
rotate_backups (con->target);
+ /* 2005-04-15 SMS.
+ For VMS, define common fopen() optional arguments, and a handy macro
+ for use as a variable "binary" flag.
+ Elsewhere, define a constant "binary" flag.
+ Isn't it nice to have distinct text and binary file types?
+ */
+ # define BIN_TYPE_TRANSFER (type_char != 'A')
+ #ifdef __VMS
+ # define FOPEN_OPT_ARGS "fop=sqo", "acc", acc_cb, &open_id
+ # define FOPEN_OPT_ARGS_BIN "ctx=bin,stm", "rfm=fix", "mrs=512" FOPEN_OPT_ARGS
+ # define BIN_TYPE_FILE (BIN_TYPE_TRANSFER && (opt.ftp_stmlf == 0))
+ #else /* def __VMS */
+ # define BIN_TYPE_FILE 1
+ #endif /* def __VMS [else] */
+
if (restval && !(con->cmd & DO_LIST))
- fp = fopen (con->target, "ab");
+ {
+ #ifdef __VMS
+ int open_id;
+
+ if (BIN_TYPE_FILE)
+ {
+ open_id = 3;
+ fp = fopen (con->target, "ab", FOPEN_OPT_ARGS_BIN);
+ }
+ else
+ {
+ open_id = 4;
+ fp = fopen (con->target, "a", FOPEN_OPT_ARGS);
+ }
+ #else /* def __VMS */
+ fp = fopen (con->target, "ab");
+ #endif /* def __VMS [else] */
+ }
else if (opt.noclobber || opt.always_rest || opt.timestamping || opt.dirstruct
|| opt.output_document)
- fp = fopen (con->target, "wb");
+ {
+ #ifdef __VMS
+ int open_id;
+
+ if (BIN_TYPE_FILE)
+ {
+ open_id = 5;
+ fp = fopen (con->target, "wb", FOPEN_OPT_ARGS_BIN);
+ }
+ else
+ {
+ open_id = 6;
+ fp = fopen (con->target, "w", FOPEN_OPT_ARGS);
+ }
+ #else /* def __VMS */
+ fp = fopen (con->target, "wb");
+ #endif /* def __VMS [else] */
+ }
else
{
fp = fopen_excl (con->target, true);
print it out. */
if (opt.server_response && (con->cmd & DO_LIST))
{
+ /* 2005-02-25 SMS.
+ Much of this work may already have been done, but repeating it should
+ do no damage beyond wasting time.
+ */
+ /* On VMS, alter the name as required. */
+ #ifdef __VMS
+ char *targ;
+
+ targ = ods_conform( con->target);
+ if (targ != con->target)
+ {
+ xfree( con->target);
+ con->target = targ;
+ }
+ #endif /* def __VMS */
+
mkalldirs (con->target);
fp = fopen (con->target, "r");
if (!fp)
char *p = strchr (line, '\0');
while (p > line && (p[-1] == '\n' || p[-1] == '\r'))
*--p = '\0';
- logprintf (LOG_ALWAYS, "%s\n", escnonprint (line));
+ logprintf (LOG_ALWAYS, "%s\n",
+ quotearg_style (escape_quoting_style, line));
xfree (line);
}
fclose (fp);
uerr_t err;
struct_stat st;
- if (!con->target)
- con->target = url_file_name (u);
+ /* Get the target, and set the name for the message accordingly. */
+ if ((f == NULL) && (con->target))
+ {
+ /* Explicit file (like ".listing"). */
+ locf = con->target;
+ }
+ else
+ {
+ /* URL-derived file. Consider "-O file" name. */
+ con->target = url_file_name (u);
+ if (!opt.output_document)
+ locf = con->target;
+ else
+ locf = opt.output_document;
+ }
- if (opt.noclobber && file_exists_p (con->target))
+ /* If the output_document was given, then this check was already done and
+ the file didn't exist. Hence the !opt.output_document */
+ if (opt.noclobber && !opt.output_document && file_exists_p (con->target))
{
logprintf (LOG_VERBOSE,
- _("File `%s' already there; not retrieving.\n"), con->target);
+ _("File %s already there; not retrieving.\n"), quote (con->target));
/* If the file is there, we suppose it's retrieved OK. */
return RETROK;
}
/* Remove it if it's a link. */
remove_link (con->target);
- if (!opt.output_document)
- locf = con->target;
- else
- locf = opt.output_document;
count = 0;
strcpy (tmp, " ");
if (count > 1)
sprintf (tmp, _("(try:%2d)"), count);
- logprintf (LOG_VERBOSE, "--%s-- %s\n %s => `%s'\n",
- tms, hurl, tmp, locf);
+ logprintf (LOG_VERBOSE, "--%s-- %s\n %s => %s\n",
+ tms, hurl, tmp, quote (locf));
#ifdef WINDOWS
ws_changetitle (hurl);
#endif
con->csock = -1;
}
if (!opt.spider)
- logprintf (LOG_VERBOSE, _("%s (%s) - `%s' saved [%s]\n\n"),
- tms, tmrate, locf, number_to_static_string (len));
+ {
+ bool write_to_stdout = (opt.output_document && HYPHENP (opt.output_document));
+
+ logprintf (LOG_VERBOSE,
+ write_to_stdout
+ ? _("%s (%s) - written to stdout %s[%s]\n\n")
+ : _("%s (%s) - %s saved [%s]\n\n"),
+ tms, tmrate,
+ write_to_stdout ? "" : quote (locf),
+ number_to_static_string (len));
+ }
if (!opt.verbose && !opt.quiet)
{
/* Need to hide the password from the URL. The `if' is here
number of bytes and files downloaded. */
{
total_downloaded_bytes += len;
- opt.numurls++;
+ numurls++;
}
/* Deletion of listing files is not controlled by --delete-after, but
for instance, may want to know how many bytes and files they've
downloaded through it. */
total_downloaded_bytes += len;
- opt.numurls++;
+ numurls++;
if (opt.delete_after)
{
uf = url_file_name (u);
lf = file_merge (uf, LIST_FILENAME);
xfree (uf);
- DEBUGP ((_("Using `%s' as listing tmp file.\n"), lf));
+ DEBUGP ((_("Using %s as listing tmp file.\n"), quote (lf)));
- con->target = lf;
+ con->target = xstrdup (lf);
+ xfree (lf);
err = ftp_loop_internal (u, NULL, con);
+ lf = xstrdup (con->target);
+ xfree (con->target);
con->target = old_target;
if (err == RETROK)
- *f = ftp_parse_ls (lf, con->rs);
- else
- *f = NULL;
- if (opt.remove_listing)
{
- if (unlink (lf))
- logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
- else
- logprintf (LOG_VERBOSE, _("Removed `%s'.\n"), lf);
+ *f = ftp_parse_ls (lf, con->rs);
+ if (opt.remove_listing)
+ {
+ if (unlink (lf))
+ logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
+ else
+ logprintf (LOG_VERBOSE, _("Removed %s.\n"), quote (lf));
+ }
}
+ else
+ *f = NULL;
xfree (lf);
con->cmd &= ~DO_LIST;
return err;
struct fileinfo *orig;
wgint local_size;
time_t tml;
- bool dlthis;
+ bool dlthis; /* Download this (file). */
+ const char *actual_target = NULL;
/* Increase the depth. */
++depth;
/* Remote file is older, file sizes can be compared and
are both equal. */
logprintf (LOG_VERBOSE, _("\
-Remote file no newer than local file `%s' -- not retrieving.\n"), con->target);
+Remote file no newer than local file %s -- not retrieving.\n"), quote (con->target));
dlthis = false;
}
else if (eq_size)
{
/* Remote file is newer or sizes cannot be matched */
logprintf (LOG_VERBOSE, _("\
-Remote file is newer than local file `%s' -- retrieving.\n\n"),
- con->target);
+Remote file is newer than local file %s -- retrieving.\n\n"),
+ quote (con->target));
}
else
{
{
logprintf (LOG_VERBOSE, _("\
Already have correct symlink %s -> %s\n\n"),
- con->target, escnonprint (f->linkto));
+ quote (con->target),
+ quote (f->linkto));
dlthis = false;
break;
}
}
}
logprintf (LOG_VERBOSE, _("Creating symlink %s -> %s\n"),
- con->target, escnonprint (f->linkto));
+ quote (con->target), quote (f->linkto));
/* Unlink before creating symlink! */
unlink (con->target);
if (symlink (f->linkto, con->target) == -1)
} /* have f->linkto */
#else /* not HAVE_SYMLINK */
logprintf (LOG_NOTQUIET,
- _("Symlinks not supported, skipping symlink `%s'.\n"),
- con->target);
+ _("Symlinks not supported, skipping symlink %s.\n"),
+ quote (con->target));
#endif /* not HAVE_SYMLINK */
}
else /* opt.retr_symlinks */
break;
case FT_DIRECTORY:
if (!opt.recursive)
- logprintf (LOG_NOTQUIET, _("Skipping directory `%s'.\n"),
- escnonprint (f->name));
+ logprintf (LOG_NOTQUIET, _("Skipping directory %s.\n"),
+ quote (f->name));
break;
case FT_PLAINFILE:
/* Call the retrieve loop. */
break;
case FT_UNKNOWN:
logprintf (LOG_NOTQUIET, _("%s: unknown/unsupported file type.\n"),
- escnonprint (f->name));
+ quote (f->name));
break;
} /* switch */
- /* Set the time-stamp information to the local file. Symlinks
- are not to be stamped because it sets the stamp on the
- original. :( */
- if (!(f->type == FT_SYMLINK && !opt.retr_symlinks)
- && f->tstamp != -1
- && dlthis
- && file_exists_p (con->target))
+
+ /* 2004-12-15 SMS.
+ * Set permissions _before_ setting the times, as setting the
+ * permissions changes the modified-time, at least on VMS.
+ * Also, use the opt.output_document name here, too, as
+ * appropriate. (Do the test once, and save the result.)
+ */
+
- /* #### This code repeats in http.c and ftp.c. Move it to a
- function! */
- actual_target = NULL;
- if (opt.output_document)
- {
- if (output_stream_regular)
- actual_target = opt.output_document;
- }
- else
- actual_target = con->target;
++ set_local_file (&actual_target, con->target);
+
+ /* If downloading a plain file, set valid (non-zero) permissions. */
+ if (dlthis && (actual_target != NULL) && (f->type == FT_PLAINFILE))
{
- const char *fl = NULL;
- set_local_file (&fl, con->target);
- if (fl)
- touch (fl, f->tstamp);
+ if (f->perms)
+ chmod (actual_target, f->perms);
+ else
+ DEBUGP (("Unrecognized permissions for %s.\n", actual_target));
}
- else if (f->tstamp == -1)
- logprintf (LOG_NOTQUIET, _("%s: corrupt time-stamp.\n"), con->target);
- if (f->perms && f->type == FT_PLAINFILE && dlthis)
+ /* Set the time-stamp information to the local file. Symlinks
+ are not to be stamped because it sets the stamp on the
+ original. :( */
-
+ if (actual_target != NULL)
{
- if (opt.preserve_perm)
- chmod (con->target, f->perms);
+ if (!(f->type == FT_SYMLINK && !opt.retr_symlinks)
+ && f->tstamp != -1
+ && dlthis
+ && file_exists_p (con->target))
+ {
+ touch (actual_target, f->tstamp);
+ }
+ else if (f->tstamp == -1)
+ logprintf (LOG_NOTQUIET, _("%s: corrupt time-stamp.\n"),
- actual_target);
++ actual_target);
}
- else
- DEBUGP (("Unrecognized permissions for %s.\n", con->target));
xfree (con->target);
con->target = old_target;
if (!accdir (newdir))
{
logprintf (LOG_VERBOSE, _("\
-Not descending to `%s' as it is excluded/not-included.\n"),
- escnonprint (newdir));
+Not descending to %s as it is excluded/not-included.\n"),
+ quote (newdir));
continue;
}
{
if (f->type != FT_DIRECTORY && !acceptable (f->name))
{
- logprintf (LOG_VERBOSE, _("Rejecting `%s'.\n"),
- escnonprint (f->name));
+ logprintf (LOG_VERBOSE, _("Rejecting %s.\n"),
+ quote (f->name));
f = delelement (f, &start);
}
else
{
if (has_insecure_name_p (f->name))
{
- logprintf (LOG_VERBOSE, _("Rejecting `%s'.\n"),
- escnonprint (f->name));
+ logprintf (LOG_VERBOSE, _("Rejecting %s.\n"),
+ quote (f->name));
f = delelement (f, &start);
}
else
if (matchres == -1)
{
logprintf (LOG_NOTQUIET, _("Error matching %s against %s: %s\n"),
- u->file, escnonprint (f->name), strerror (errno));
+ u->file, quotearg_style (escape_quoting_style, f->name),
+ strerror (errno));
break;
}
if (matchres == FNM_NOMATCH)
/* No luck. */
/* #### This message SUCKS. We should see what was the
reason that nothing was retrieved. */
- logprintf (LOG_VERBOSE, _("No matches on pattern `%s'.\n"),
- escnonprint (u->file));
+ logprintf (LOG_VERBOSE, _("No matches on pattern %s.\n"),
+ quote (u->file));
}
- else /* GLOB_GETONE or GLOB_GETALL */
+ else if (action == GLOB_GETONE) /* GLOB_GETONE or GLOB_GETALL */
{
/* Let's try retrieving it anyway. */
con->st |= ON_YOUR_OWN;
res = ftp_loop_internal (u, NULL, con);
return res;
}
+
+ /* If action == GLOB_GETALL, and the file list is empty, there's
+ no point in trying to download anything or in complaining about
+ it. (An empty directory should not cause complaints.)
+ */
}
freefileinfo (start);
if (opt.quota && total_downloaded_bytes > opt.quota)
else
sz = -1;
logprintf (LOG_NOTQUIET,
- _("Wrote HTML-ized index to `%s' [%s].\n"),
- filename, number_to_static_string (sz));
+ _("Wrote HTML-ized index to %s [%s].\n"),
+ quote (filename), number_to_static_string (sz));
}
else
logprintf (LOG_NOTQUIET,
- _("Wrote HTML-ized index to `%s'.\n"),
- filename);
+ _("Wrote HTML-ized index to %s.\n"),
+ quote (filename));
}
xfree (filename);
}
#include <assert.h>
#ifndef WINDOWS
+# include <sys/types.h>
# include <sys/socket.h>
# include <netinet/in.h>
# ifndef __BEOS__
# include <arpa/inet.h>
# endif
- # include <netdb.h>
+ # ifdef __VMS
+ # include "vms_ip.h"
+ # else /* def __VMS */
+ # include <netdb.h>
+ # endif /* def __VMS [else] */
# define SET_H_ERRNO(err) ((void)(h_errno = (err)))
#else /* WINDOWS */
# define SET_H_ERRNO(err) WSASetLastError (err)
# define NO_ADDRESS NO_DATA
#endif
+#if !HAVE_DECL_H_ERRNO
+extern int h_errno;
+#endif
+
+
/* Lists of IP addresses that result from running DNS queries. See
lookup_host for details. */
/* No luck with the cache; resolve HOST. */
if (!silent && !numeric_address)
- logprintf (LOG_VERBOSE, _("Resolving %s... "), escnonprint (host));
+ {
+ char *str = NULL, *name;
+
+ if (opt.enable_iri && (name = idn_decode ((char *) host)) != NULL)
+ {
+ int len = strlen (host) + strlen (name) + 4;
+ str = xmalloc (len);
+ snprintf (str, len, "%s (%s)", name, host);
+ str[len-1] = '\0';
+ xfree (name);
+ }
+
+ logprintf (LOG_VERBOSE, _("Resolving %s... "),
+ quotearg_style (escape_quoting_style, str ? str : host));
+
+ if (str)
+ xfree (str);
+ }
#ifdef ENABLE_IPV6
{
/* HTTP support.
Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
- 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+ 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
This file is part of GNU Wget.
#include "test.h"
#endif
- extern char *version_string;
+ #include "version.h"
+
+ #ifdef __VMS
+ # include "vms.h"
+ #endif /* def __VMS */
/* Forward decls. */
+struct http_stat;
static char *create_authorization_line (const char *, const char *,
const char *, const char *,
const char *, bool *);
static char *basic_authentication_encode (const char *, const char *);
static bool known_authentication_scheme_p (const char *, const char *);
+static void ensure_extension (struct http_stat *, const char *, int *);
static void load_cookies (void);
#ifndef MIN
#define TEXTHTML_S "text/html"
#define TEXTXHTML_S "application/xhtml+xml"
+#define TEXTCSS_S "text/css"
/* Some status code validation macros: */
#define H_20X(x) (((x) >= 200) && ((x) < 300))
int hcount, hcapacity;
};
+extern int numurls;
+
/* Create a new, empty request. At least request_set_method must be
called before the request can be used. */
else if (basic_authed_hosts
&& hash_table_contains(basic_authed_hosts, hostname))
{
- DEBUGP(("Found `%s' in basic_authed_hosts.\n", hostname));
+ DEBUGP(("Found %s in basic_authed_hosts.\n", quote (hostname)));
do_challenge = true;
}
else
{
- DEBUGP(("Host `%s' has not issued a general basic challenge.\n",
- hostname));
+ DEBUGP(("Host %s has not issued a general basic challenge.\n",
+ quote (hostname)));
}
if (do_challenge)
{
if (!hash_table_contains(basic_authed_hosts, hostname))
{
hash_table_put (basic_authed_hosts, xstrdup(hostname), NULL);
- DEBUGP(("Inserted `%s' into basic_authed_hosts\n", hostname));
+ DEBUGP(("Inserted %s into basic_authed_hosts\n", quote (hostname)));
}
}
{
char *copy;
BOUNDED_TO_ALLOCA(b, e, copy);
- logprintf (LOG_VERBOSE, "%s%s\n", prefix, escnonprint(copy));
+ logprintf (LOG_ALWAYS, "%s%s\n", prefix,
+ quotearg_style (escape_quoting_style, copy));
}
/* Print the server response, line by line, omitting the trailing CRLF
char *remote_time; /* remote time-stamp string */
char *error; /* textual HTTP error */
int statcode; /* status code */
+ char *message; /* status message */
wgint rd_size; /* amount of data read from socket */
double dltime; /* time it took to download the data */
const char *referer; /* value of the referer header. */
xfree_null (hs->rderrmsg);
xfree_null (hs->local_file);
xfree_null (hs->orig_file_name);
+ xfree_null (hs->message);
/* Guard against being called twice. */
hs->newloc = NULL;
&& (c_isspace (line[sizeof (string_constant) - 1]) \
|| !line[sizeof (string_constant) - 1]))
+ #ifdef __VMS
+ #define SET_USER_AGENT(req) do { \
+ if (!opt.useragent) \
+ request_set_header (req, "User-Agent", \
+ aprintf ("Wget/%s (VMS %s %s)", \
+ VERSION_STRING, vms_arch(), vms_vers()), \
+ rel_value); \
+ else if (*opt.useragent) \
+ request_set_header (req, "User-Agent", opt.useragent, rel_none); \
+ } while (0)
+ #else /* def __VMS */
#define SET_USER_AGENT(req) do { \
if (!opt.useragent) \
request_set_header (req, "User-Agent", \
- aprintf ("Wget/%s", version_string), rel_value); \
+ aprintf ("Wget/%s (%s)", \
+ VERSION_STRING, OS_TYPE), \
+ rel_value); \
else if (*opt.useragent) \
request_set_header (req, "User-Agent", opt.useragent, rel_none); \
} while (0)
+ #endif /* def __VMS [else] */
/* The flags that allow clobbering the file (opening with "wb").
Defined here to avoid repetition later. #### This will require
If PROXY is non-NULL, the connection will be made to the proxy
server, and u->url will be requested. */
static uerr_t
-gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
+gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
+ struct iri *iri)
{
struct request *req;
hs->newloc = NULL;
hs->remote_time = NULL;
hs->error = NULL;
+ hs->message = NULL;
conn = u;
user = user ? user : (opt.http_user ? opt.http_user : opt.user);
passwd = passwd ? passwd : (opt.http_passwd ? opt.http_passwd : opt.passwd);
- if (user && passwd
- && !u->user) /* We only do "site-wide" authentication with "global"
- user/password values; URL user/password info overrides. */
+ /* We only do "site-wide" authentication with "global" user/password
+ * values unless --auth-no-challange has been requested; URL user/password
+ * info overrides. */
+ if (user && passwd && (!u->user || opt.auth_without_challenge))
{
/* If this is a host for which we've already received a Basic
* challenge, we'll go ahead and send Basic authentication creds. */
basic_auth_finished = maybe_send_basic_creds(u->host, user, passwd, req);
}
- proxyauth = NULL;
- if (proxy)
- {
- char *proxy_user, *proxy_passwd;
- /* For normal username and password, URL components override
- command-line/wgetrc parameters. With proxy
- authentication, it's the reverse, because proxy URLs are
- normally the "permanent" ones, so command-line args
- should take precedence. */
- if (opt.proxy_user && opt.proxy_passwd)
- {
- proxy_user = opt.proxy_user;
- proxy_passwd = opt.proxy_passwd;
- }
- else
- {
- proxy_user = proxy->user;
- proxy_passwd = proxy->passwd;
- }
- /* #### This does not appear right. Can't the proxy request,
- say, `Digest' authentication? */
- if (proxy_user && proxy_passwd)
- proxyauth = basic_authentication_encode (proxy_user, proxy_passwd);
-
- /* If we're using a proxy, we will be connecting to the proxy
- server. */
- conn = proxy;
-
- /* Proxy authorization over SSL is handled below. */
-#ifdef HAVE_SSL
- if (u->scheme != SCHEME_HTTPS)
-#endif
- request_set_header (req, "Proxy-Authorization", proxyauth, rel_value);
- }
-
/* Generate the Host header, HOST:PORT. Take into account that:
- Broken server-side software often doesn't recognize the PORT
post_data_size = file_size (opt.post_file_name);
if (post_data_size == -1)
{
- logprintf (LOG_NOTQUIET, _("POST data file `%s' missing: %s\n"),
- opt.post_file_name, strerror (errno));
+ logprintf (LOG_NOTQUIET, _("POST data file %s missing: %s\n"),
+ quote (opt.post_file_name), strerror (errno));
post_data_size = 0;
}
}
without authorization header fails. (Expected to happen at least
for the Digest authorization scheme.) */
+ proxyauth = NULL;
+ if (proxy)
+ {
+ char *proxy_user, *proxy_passwd;
+ /* For normal username and password, URL components override
+ command-line/wgetrc parameters. With proxy
+ authentication, it's the reverse, because proxy URLs are
+ normally the "permanent" ones, so command-line args
+ should take precedence. */
+ if (opt.proxy_user && opt.proxy_passwd)
+ {
+ proxy_user = opt.proxy_user;
+ proxy_passwd = opt.proxy_passwd;
+ }
+ else
+ {
+ proxy_user = proxy->user;
+ proxy_passwd = proxy->passwd;
+ }
+ /* #### This does not appear right. Can't the proxy request,
+ say, `Digest' authentication? */
+ if (proxy_user && proxy_passwd)
+ proxyauth = basic_authentication_encode (proxy_user, proxy_passwd);
+
+ /* If we're using a proxy, we will be connecting to the proxy
+ server. */
+ conn = proxy;
+
+ /* Proxy authorization over SSL is handled below. */
+#ifdef HAVE_SSL
+ if (u->scheme != SCHEME_HTTPS)
+#endif
+ request_set_header (req, "Proxy-Authorization", proxyauth, rel_value);
+ }
+
keep_alive = false;
/* Establish the connection. */
sock = pconn.socket;
using_ssl = pconn.ssl;
logprintf (LOG_VERBOSE, _("Reusing existing connection to %s:%d.\n"),
- escnonprint (pconn.host), pconn.port);
+ quotearg_style (escape_quoting_style, pconn.host),
+ pconn.port);
DEBUGP (("Reusing fd %d.\n", sock));
if (pconn.authorized)
/* If the connection is already authorized, the "Basic"
{
request_free (req);
logprintf(LOG_NOTQUIET,
- _("%s: unable to resolve host address `%s'\n"),
- exec_name, relevant->host);
+ _("%s: unable to resolve host address %s\n"),
+ exec_name, quote (relevant->host));
return HOSTERR;
}
}
resp = resp_new (head);
statcode = resp_status (resp, &message);
+ hs->message = xstrdup (message);
resp_free (resp);
xfree (head);
if (statcode != 200)
{
failed_tunnel:
logprintf (LOG_NOTQUIET, _("Proxy tunneling failed: %s"),
- message ? escnonprint (message) : "?");
+ message ? quotearg_style (escape_quoting_style, message) : "?");
xfree_null (message);
return CONSSLERR;
}
if (conn->scheme == SCHEME_HTTPS)
{
- if (!ssl_connect (sock) || !ssl_check_certificate (sock, u->host))
+ if (!ssl_connect_wget (sock) || !ssl_check_certificate (sock, u->host))
{
fd_close (sock);
return CONSSLERR;
/* Check for status line. */
message = NULL;
statcode = resp_status (resp, &message);
+ hs->message = xstrdup (message);
if (!opt.server_response)
logprintf (LOG_VERBOSE, "%2d %s\n", statcode,
- message ? escnonprint (message) : "");
+ message ? quotearg_style (escape_quoting_style, message) : "");
else
{
logprintf (LOG_VERBOSE, "\n");
print_server_response (resp, " ");
}
+ /* Check for keep-alive related responses. */
+ if (!inhibit_keep_alive && contlen != -1)
+ {
+ if (resp_header_copy (resp, "Keep-Alive", NULL, 0))
+ keep_alive = true;
+ else if (resp_header_copy (resp, "Connection", hdrval, sizeof (hdrval)))
+ {
+ if (0 == strcasecmp (hdrval, "Keep-Alive"))
+ keep_alive = true;
+ }
+ }
+
+ if (keep_alive)
+ /* The server has promised that it will not close the connection
+ when we're done. This means that we can register it. */
+ register_persistent (conn->host, conn->port, sock, using_ssl);
+
+ if (statcode == HTTP_STATUS_UNAUTHORIZED)
+ {
+ /* Authorization is required. */
+ if (keep_alive && !head_only && skip_short_body (sock, contlen))
+ CLOSE_FINISH (sock);
+ else
+ CLOSE_INVALIDATE (sock);
+ pconn.authorized = false;
+ if (!auth_finished && (user && passwd))
+ {
+ /* IIS sends multiple copies of WWW-Authenticate, one with
+ the value "negotiate", and other(s) with data. Loop over
+ all the occurrences and pick the one we recognize. */
+ int wapos;
+ const char *wabeg, *waend;
+ char *www_authenticate = NULL;
+ for (wapos = 0;
+ (wapos = resp_header_locate (resp, "WWW-Authenticate", wapos,
+ &wabeg, &waend)) != -1;
+ ++wapos)
+ if (known_authentication_scheme_p (wabeg, waend))
+ {
+ BOUNDED_TO_ALLOCA (wabeg, waend, www_authenticate);
+ break;
+ }
+
+ if (!www_authenticate)
+ {
+ /* If the authentication header is missing or
+ unrecognized, there's no sense in retrying. */
+ logputs (LOG_NOTQUIET, _("Unknown authentication scheme.\n"));
+ }
+ else if (!basic_auth_finished
+ || !BEGINS_WITH (www_authenticate, "Basic"))
+ {
+ char *pth;
+ pth = url_full_path (u);
+ request_set_header (req, "Authorization",
+ create_authorization_line (www_authenticate,
+ user, passwd,
+ request_method (req),
+ pth,
+ &auth_finished),
+ rel_value);
+ if (BEGINS_WITH (www_authenticate, "NTLM"))
+ ntlm_seen = true;
+ else if (!u->user && BEGINS_WITH (www_authenticate, "Basic"))
+ {
+ /* Need to register this host as using basic auth,
+ * so we automatically send creds next time. */
+ register_basic_auth_host (u->host);
+ }
+ xfree (pth);
+ xfree_null (message);
+ resp_free (resp);
+ xfree (head);
+ goto retry_with_auth;
+ }
+ else
+ {
+ /* We already did Basic auth, and it failed. Gotta
+ * give up. */
+ }
+ }
+ logputs (LOG_NOTQUIET, _("Authorization failed.\n"));
+ request_free (req);
+ xfree_null (message);
+ resp_free (resp);
+ xfree (head);
+ return AUTHFAILED;
+ }
+ else /* statcode != HTTP_STATUS_UNAUTHORIZED */
+ {
+ /* Kludge: if NTLM is used, mark the TCP connection as authorized. */
+ if (ntlm_seen)
+ pconn.authorized = true;
+ }
+
/* Determine the local filename if needed. Notice that if -O is used
* hstat.local_file is set by http_loop to the argument of -O. */
if (!hs->local_file)
hs->local_file = url_file_name (u);
}
}
-
+
/* TODO: perform this check only once. */
if (!hs->existence_checked && file_exists_p (hs->local_file))
{
- if (opt.noclobber)
+ if (opt.noclobber && !opt.output_document)
{
/* If opt.noclobber is turned on and file already exists, do not
- retrieve the file */
+ retrieve the file. But if the output_document was given, then this
+ test was already done and the file didn't exist. Hence the !opt.output_document */
logprintf (LOG_VERBOSE, _("\
-File `%s' already there; not retrieving.\n\n"), hs->local_file);
+File %s already there; not retrieving.\n\n"), quote (hs->local_file));
/* If the file is there, we suppose it's retrieved OK. */
*dt |= RETROKF;
if (has_html_suffix_p (hs->local_file))
*dt |= TEXTHTML;
+ xfree (head);
+ xfree_null (message);
return RETRUNNEEDED;
}
else if (!ALLOW_CLOBBER)
if (opt.timestamping && !hs->timestamp_checked)
{
size_t filename_len = strlen (hs->local_file);
- char *filename_plus_orig_suffix = alloca (filename_len + sizeof (".orig"));
+ char *filename_plus_orig_suffix = alloca (filename_len + sizeof (ORIG_SFX));
bool local_dot_orig_file_exists = false;
char *local_filename = NULL;
struct_stat st;
--hniksic */
memcpy (filename_plus_orig_suffix, hs->local_file, filename_len);
memcpy (filename_plus_orig_suffix + filename_len,
- ".orig", sizeof (".orig"));
+ ORIG_SFX, sizeof (ORIG_SFX));
/* Try to stat() the .orig file. */
if (stat (filename_plus_orig_suffix, &st) == 0)
local_dot_orig_file_exists = true;
local_filename = filename_plus_orig_suffix;
}
- }
+ }
if (!local_dot_orig_file_exists)
/* Couldn't stat() <file>.orig, so try to stat() <file>. */
contlen = parsed;
}
- /* Check for keep-alive related responses. */
- if (!inhibit_keep_alive && contlen != -1)
- {
- if (resp_header_copy (resp, "Keep-Alive", NULL, 0))
- keep_alive = true;
- else if (resp_header_copy (resp, "Connection", hdrval, sizeof (hdrval)))
- {
- if (0 == strcasecmp (hdrval, "Keep-Alive"))
- keep_alive = true;
- }
- }
- if (keep_alive)
- /* The server has promised that it will not close the connection
- when we're done. This means that we can register it. */
- register_persistent (conn->host, conn->port, sock, using_ssl);
-
- if (statcode == HTTP_STATUS_UNAUTHORIZED)
- {
- /* Authorization is required. */
- if (keep_alive && !head_only && skip_short_body (sock, contlen))
- CLOSE_FINISH (sock);
- else
- CLOSE_INVALIDATE (sock);
- pconn.authorized = false;
- if (!auth_finished && (user && passwd))
- {
- /* IIS sends multiple copies of WWW-Authenticate, one with
- the value "negotiate", and other(s) with data. Loop over
- all the occurrences and pick the one we recognize. */
- int wapos;
- const char *wabeg, *waend;
- char *www_authenticate = NULL;
- for (wapos = 0;
- (wapos = resp_header_locate (resp, "WWW-Authenticate", wapos,
- &wabeg, &waend)) != -1;
- ++wapos)
- if (known_authentication_scheme_p (wabeg, waend))
- {
- BOUNDED_TO_ALLOCA (wabeg, waend, www_authenticate);
- break;
- }
-
- if (!www_authenticate)
- {
- /* If the authentication header is missing or
- unrecognized, there's no sense in retrying. */
- logputs (LOG_NOTQUIET, _("Unknown authentication scheme.\n"));
- }
- else if (!basic_auth_finished
- || !BEGINS_WITH (www_authenticate, "Basic"))
- {
- char *pth;
- pth = url_full_path (u);
- request_set_header (req, "Authorization",
- create_authorization_line (www_authenticate,
- user, passwd,
- request_method (req),
- pth,
- &auth_finished),
- rel_value);
- if (BEGINS_WITH (www_authenticate, "NTLM"))
- ntlm_seen = true;
- else if (!u->user && BEGINS_WITH (www_authenticate, "Basic"))
- {
- /* Need to register this host as using basic auth,
- * so we automatically send creds next time. */
- register_basic_auth_host (u->host);
- }
- xfree (pth);
- goto retry_with_auth;
- }
- else
- {
- /* We already did Basic auth, and it failed. Gotta
- * give up. */
- }
- }
- logputs (LOG_NOTQUIET, _("Authorization failed.\n"));
- request_free (req);
- return AUTHFAILED;
- }
- else /* statcode != HTTP_STATUS_UNAUTHORIZED */
- {
- /* Kludge: if NTLM is used, mark the TCP connection as authorized. */
- if (ntlm_seen)
- pconn.authorized = true;
- }
request_free (req);
hs->statcode = statcode;
char *tmp = strchr (type, ';');
if (tmp)
{
+ /* sXXXav: only needed if IRI support is enabled */
+ char *tmp2 = tmp + 1;
+
while (tmp > type && c_isspace (tmp[-1]))
--tmp;
*tmp = '\0';
+
+ /* Try to get remote encoding if needed */
+ if (opt.enable_iri && !opt.encoding_remote)
+ {
+ tmp = parse_charset (tmp2);
+ if (tmp)
+ set_content_encoding (iri, tmp);
+ }
}
}
hs->newloc = resp_header_strdup (resp, "Location");
else
CLOSE_INVALIDATE (sock);
xfree_null (type);
+ xfree (head);
return NEWLOCATION;
}
}
else
*dt &= ~TEXTHTML;
- if (opt.html_extension && (*dt & TEXTHTML))
- /* -E / --html-extension / html_extension = on was specified, and this is a
- text/html file. If some case-insensitive variation on ".htm[l]" isn't
- already the file's suffix, tack on ".html". */
- {
- char *last_period_in_local_filename = strrchr (hs->local_file, '.');
+ if (type &&
+ 0 == strncasecmp (type, TEXTCSS_S, strlen (TEXTCSS_S)))
+ *dt |= TEXTCSS;
+ else
+ *dt &= ~TEXTCSS;
- if (last_period_in_local_filename == NULL
- || !(0 == strcasecmp (last_period_in_local_filename, ".htm")
- || 0 == strcasecmp (last_period_in_local_filename, ".html")))
+ if (opt.html_extension)
+ {
+ if (*dt & TEXTHTML)
+ /* -E / --html-extension / html_extension = on was specified,
+ and this is a text/html file. If some case-insensitive
+ variation on ".htm[l]" isn't already the file's suffix,
+ tack on ".html". */
{
- int local_filename_len = strlen (hs->local_file);
- /* Resize the local file, allowing for ".html" preceded by
- optional ".NUMBER". */
- hs->local_file = xrealloc (hs->local_file,
- local_filename_len + 24 + sizeof (".html"));
- strcpy(hs->local_file + local_filename_len, ".html");
- /* If clobbering is not allowed and the file, as named,
- exists, tack on ".NUMBER.html" instead. */
- if (!ALLOW_CLOBBER && file_exists_p (hs->local_file))
- {
- int ext_num = 1;
- do
- sprintf (hs->local_file + local_filename_len,
- ".%d.html", ext_num++);
- while (file_exists_p (hs->local_file));
- }
- *dt |= ADDED_HTML_EXTENSION;
+ ensure_extension (hs, ".html", dt);
+ }
+ else if (*dt & TEXTCSS)
+ {
+ ensure_extension (hs, ".css", dt);
}
}
- if (statcode == HTTP_STATUS_RANGE_NOT_SATISFIABLE)
+ if (statcode == HTTP_STATUS_RANGE_NOT_SATISFIABLE
+ || (hs->restval > 0 && statcode == HTTP_STATUS_OK
+ && contrange == 0 && hs->restval >= contlen)
+ )
{
/* If `-c' is in use and the file has been fully downloaded (or
the remote file has shrunk), Wget effectively requests bytes
- after the end of file and the server response with 416. */
+ after the end of file and the server response with 416
+ (or 200 with a <= Content-Length. */
logputs (LOG_VERBOSE, _("\
\n The file is already fully retrieved; nothing to do.\n\n"));
/* In case the caller inspects. */
xfree_null (type);
CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
might be more bytes in the body. */
+ xfree (head);
return RETRUNNEEDED;
}
if ((contrange != 0 && contrange != hs->restval)
Bail out. */
xfree_null (type);
CLOSE_INVALIDATE (sock);
+ xfree (head);
return RANGEERR;
}
if (contlen == -1)
logputs (LOG_VERBOSE,
opt.ignore_length ? _("ignored") : _("unspecified"));
if (type)
- logprintf (LOG_VERBOSE, " [%s]\n", escnonprint (type));
+ logprintf (LOG_VERBOSE, " [%s]\n", quotearg_style (escape_quoting_style, type));
else
logputs (LOG_VERBOSE, "\n");
}
CLOSE_FINISH (sock);
else
CLOSE_INVALIDATE (sock);
+ xfree (head);
return RETRFINISHED;
}
+ /* 2005-06-17 SMS.
+ For VMS, define common fopen() optional arguments.
+ */
+ #ifdef __VMS
+ # define FOPEN_OPT_ARGS "fop=sqo", "acc", acc_cb, &open_id
+ # define FOPEN_BIN_FLAG 3
+ #else /* def __VMS */
+ # define FOPEN_BIN_FLAG 1
+ #endif /* def __VMS [else] */
+
/* Open the local file. */
if (!output_stream)
{
if (opt.backups)
rotate_backups (hs->local_file);
if (hs->restval)
- fp = fopen (hs->local_file, "ab");
+ {
+ #ifdef __VMS
+ int open_id;
+
+ open_id = 21;
+ fp = fopen (hs->local_file, "ab", FOPEN_OPT_ARGS);
+ #else /* def __VMS */
+ fp = fopen (hs->local_file, "ab");
+ #endif /* def __VMS [else] */
+ }
else if (ALLOW_CLOBBER)
- fp = fopen (hs->local_file, "wb");
+ {
+ #ifdef __VMS
+ int open_id;
+
+ open_id = 22;
+ fp = fopen (hs->local_file, "wb", FOPEN_OPT_ARGS);
+ #else /* def __VMS */
+ fp = fopen (hs->local_file, "wb");
+ #endif /* def __VMS [else] */
+ }
else
{
fp = fopen_excl (hs->local_file, true);
_("%s has sprung into existence.\n"),
hs->local_file);
CLOSE_INVALIDATE (sock);
+ xfree (head);
return FOPEN_EXCL_ERR;
}
}
{
logprintf (LOG_NOTQUIET, "%s: %s\n", hs->local_file, strerror (errno));
CLOSE_INVALIDATE (sock);
+ xfree (head);
return FOPENERR;
}
}
/* Print fetch message, if opt.verbose. */
if (opt.verbose)
{
- logprintf (LOG_NOTQUIET, _("Saving to: `%s'\n"),
- HYPHENP (hs->local_file) ? "STDOUT" : hs->local_file);
+ logprintf (LOG_NOTQUIET, _("Saving to: %s\n"),
+ HYPHENP (hs->local_file) ? quote ("STDOUT") : quote (hs->local_file));
}
/* This confuses the timestamping code that checks for file size.
retried, and retried, and retried, and... */
uerr_t
http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
- int *dt, struct url *proxy)
+ int *dt, struct url *proxy, struct iri *iri)
{
int count;
bool got_head = false; /* used for time-stamping and filename detection */
uerr_t err, ret = TRYLIMEXC;
time_t tmr = -1; /* remote time-stamp */
struct http_stat hstat; /* HTTP status */
- struct_stat st;
+ struct_stat st;
bool send_head_first = true;
+ char *file_name;
/* Assert that no value for *LOCAL_FILE was passed. */
assert (local_file == NULL || *local_file == NULL);
-
+
/* Set LOCAL_FILE parameter. */
if (local_file && opt.output_document)
*local_file = HYPHENP (opt.output_document) ? NULL : xstrdup (opt.output_document);
-
+
/* Reset NEWLOC parameter. */
*newloc = NULL;
/* TODO: Ick! This code is now in both gethttp and http_loop, and is
* screaming for some refactoring. */
- if (got_name && file_exists_p (hstat.local_file) && opt.noclobber)
+ if (got_name && file_exists_p (hstat.local_file) && opt.noclobber && !opt.output_document)
{
/* If opt.noclobber is turned on and file already exists, do not
- retrieve the file */
+ retrieve the file. But if the output_document was given, then this
+ test was already done and the file didn't exist. Hence the !opt.output_document */
logprintf (LOG_VERBOSE, _("\
-File `%s' already there; not retrieving.\n\n"),
- hstat.local_file);
+File %s already there; not retrieving.\n\n"),
+ quote (hstat.local_file));
/* If the file is there, we suppose it's retrieved OK. */
*dt |= RETROKF;
if (has_html_suffix_p (hstat.local_file))
*dt |= TEXTHTML;
- return RETRUNNEEDED;
+ ret = RETROK;
+ goto exit;
}
/* Reset the counter. */
count = 0;
-
+
/* Reset the document type. */
*dt = 0;
-
+
/* Skip preliminary HEAD request if we're not in spider mode AND
* if -O was given or HTTP Content-Disposition support is disabled. */
if (!opt.spider
/* Send preliminary HEAD request if -N is given and we have an existing
* destination file. */
- if (opt.timestamping
+ file_name = url_file_name (u);
+ if (opt.timestamping
&& !opt.content_disposition
- && file_exists_p (url_file_name (u)))
+ && file_exists_p (file_name))
send_head_first = true;
+ xfree (file_name);
/* THE loop */
do
/* Increment the pass counter. */
++count;
sleep_between_retrievals (count);
-
+
/* Get the current time string. */
tms = datetime_str (time (NULL));
-
+
if (opt.spider && !got_head)
logprintf (LOG_VERBOSE, _("\
Spider mode enabled. Check if remote file exists.\n"));
if (opt.verbose)
{
char *hurl = url_string (u, URL_AUTH_HIDE_PASSWD);
-
- if (count > 1)
+
+ if (count > 1)
{
char tmp[256];
sprintf (tmp, _("(try:%2d)"), count);
logprintf (LOG_NOTQUIET, "--%s-- %s %s\n",
tms, tmp, hurl);
}
- else
+ else
{
logprintf (LOG_NOTQUIET, "--%s-- %s\n",
tms, hurl);
}
-
+
#ifdef WINDOWS
ws_changetitle (hurl);
#endif
/* Default document type is empty. However, if spider mode is
on or time-stamping is employed, HEAD_ONLY commands is
encoded within *dt. */
- if (send_head_first && !got_head)
+ if (send_head_first && !got_head)
*dt |= HEAD_ONLY;
else
*dt &= ~HEAD_ONLY;
*dt &= ~SEND_NOCACHE;
/* Try fetching the document, or at least its head. */
- err = gethttp (u, &hstat, dt, proxy);
+ err = gethttp (u, &hstat, dt, proxy, iri);
/* Time? */
tms = datetime_str (time (NULL));
-
+
/* Get the new location (with or without the redirection). */
if (hstat.newloc)
*newloc = xstrdup (hstat.newloc);
case FWRITEERR: case FOPENERR:
/* Another fatal error. */
logputs (LOG_VERBOSE, "\n");
- logprintf (LOG_NOTQUIET, _("Cannot write to `%s' (%s).\n"),
- hstat.local_file, strerror (errno));
+ logprintf (LOG_NOTQUIET, _("Cannot write to %s (%s).\n"),
+ quote (hstat.local_file), strerror (errno));
case HOSTERR: case CONIMPOSSIBLE: case PROXERR: case AUTHFAILED:
case SSLINITFAILED: case CONTNOTSUPPORTED:
/* Fatal errors just return from the function. */
hstat.statcode);
ret = WRONGCODE;
}
- else
+ else
{
ret = NEWLOCATION;
}
/* All possibilities should have been exhausted. */
abort ();
}
-
+
if (!(*dt & RETROKF))
{
char *hurl = NULL;
continue;
}
/* Maybe we should always keep track of broken links, not just in
- * spider mode. */
- else if (opt.spider)
+ * spider mode.
+ * Don't log error if it was UTF-8 encoded because we will try
+ * once unencoded. */
+ else if (opt.spider && !iri->utf8_encode)
{
/* #### Again: ugly ugly ugly! */
- if (!hurl)
+ if (!hurl)
hurl = url_string (u, URL_AUTH_HIDE_PASSWD);
nonexisting_url (hurl);
logprintf (LOG_NOTQUIET, _("\
else
{
logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"),
- tms, hstat.statcode, escnonprint (hstat.error));
+ tms, hstat.statcode,
+ quotearg_style (escape_quoting_style, hstat.error));
}
logputs (LOG_VERBOSE, "\n");
ret = WRONGCODE;
|| hstat.orig_file_size == hstat.contlen)
{
logprintf (LOG_VERBOSE, _("\
-Server file no newer than local file `%s' -- not retrieving.\n\n"),
- hstat.orig_file_name);
+Server file no newer than local file %s -- not retrieving.\n\n"),
+ quote (hstat.orig_file_name));
ret = RETROK;
goto exit;
}
if (opt.spider)
{
+ bool finished = true;
if (opt.recursive)
{
if (*dt & TEXTHTML)
{
logputs (LOG_VERBOSE, _("\
Remote file exists and could contain links to other resources -- retrieving.\n\n"));
+ finished = false;
}
else
{
logprintf (LOG_VERBOSE, _("\
Remote file exists but does not contain any link -- not retrieving.\n\n"));
ret = RETROK; /* RETRUNNEEDED is not for caller. */
- goto exit;
}
}
else
Remote file exists.\n\n"));
}
ret = RETROK; /* RETRUNNEEDED is not for caller. */
+ }
+
+ if (finished)
+ {
+ logprintf (LOG_NONVERBOSE,
+ _("%s URL:%s %2d %s\n"),
+ tms, u->url, hstat.statcode,
+ hstat.message ? quotearg_style (escape_quoting_style, hstat.message) : "");
goto exit;
}
}
&& ((hstat.len == hstat.contlen) ||
((hstat.res == 0) && (hstat.contlen == -1))))
{
- /* #### This code repeats in http.c and ftp.c. Move it to a
- function! */
const char *fl = NULL;
- if (opt.output_document)
- {
- if (output_stream_regular)
- fl = opt.output_document;
- }
- else
- fl = hstat.local_file;
+ set_local_file (&fl, hstat.local_file);
if (fl)
{
time_t newtmr = -1;
&& hstat.remote_time && hstat.remote_time[0])
{
newtmr = http_atotm (hstat.remote_time);
- if (newtmr != -1)
+ if (newtmr != (time_t)-1)
tmr = newtmr;
}
touch (fl, tmr);
{
if (*dt & RETROKF)
{
+ bool write_to_stdout = (opt.output_document && HYPHENP (opt.output_document));
+
logprintf (LOG_VERBOSE,
- _("%s (%s) - `%s' saved [%s/%s]\n\n"),
- tms, tmrate, hstat.local_file,
+ write_to_stdout
+ ? _("%s (%s) - written to stdout %s[%s/%s]\n\n")
+ : _("%s (%s) - %s saved [%s/%s]\n\n"),
+ tms, tmrate,
+ write_to_stdout ? "" : quote (hstat.local_file),
number_to_static_string (hstat.len),
number_to_static_string (hstat.contlen));
logprintf (LOG_NONVERBOSE,
number_to_static_string (hstat.contlen),
hstat.local_file, count);
}
- ++opt.numurls;
+ ++numurls;
total_downloaded_bytes += hstat.len;
/* Remember that we downloaded the file for later ".orig" code. */
{
if (*dt & RETROKF)
{
+ bool write_to_stdout = (opt.output_document && HYPHENP (opt.output_document));
+
logprintf (LOG_VERBOSE,
- _("%s (%s) - `%s' saved [%s]\n\n"),
- tms, tmrate, hstat.local_file,
+ write_to_stdout
+ ? _("%s (%s) - written to stdout %s[%s]\n\n")
+ : _("%s (%s) - %s saved [%s]\n\n"),
+ tms, tmrate,
+ write_to_stdout ? "" : quote (hstat.local_file),
number_to_static_string (hstat.len));
logprintf (LOG_NONVERBOSE,
"%s URL:%s [%s] -> \"%s\" [%d]\n",
tms, u->url, number_to_static_string (hstat.len),
hstat.local_file, count);
}
- ++opt.numurls;
+ ++numurls;
total_downloaded_bytes += hstat.len;
/* Remember that we downloaded the file for later ".orig" code. */
printwhat (count, opt.ntry);
continue;
}
- else
+ else if (hstat.len != hstat.restval)
/* Getting here would mean reading more data than
requested with content-length, which we never do. */
abort ();
+ else
+ {
+ /* Getting here probably means that the content-length was
+ * _less_ than the original, local size. We should probably
+ * truncate or re-read, or something. FIXME */
+ ret = RETROK;
+ goto exit;
+ }
}
else /* from now on hstat.res can only be -1 */
{
Netscape cookie specification.) */
};
const char *oldlocale;
- int i;
+ char savedlocale[256];
+ size_t i;
time_t ret = (time_t) -1;
/* Solaris strptime fails to recognize English month names in
non-English locales, which we work around by temporarily setting
locale to C before invoking strptime. */
oldlocale = setlocale (LC_TIME, NULL);
+ if (oldlocale)
+ {
+ size_t l = strlen (oldlocale);
+ if (l >= sizeof savedlocale)
+ savedlocale[0] = '\0';
+ else
+ memcpy (savedlocale, oldlocale, l);
+ }
+ else savedlocale[0] = '\0';
+
setlocale (LC_TIME, "C");
for (i = 0; i < countof (time_formats); i++)
}
/* Restore the previous locale. */
- setlocale (LC_TIME, oldlocale);
+ if (savedlocale[0])
+ setlocale (LC_TIME, savedlocale);
return ret;
}
au += 6; /* skip over `Digest' */
while (extract_param (&au, &name, &value, ','))
{
- int i;
+ size_t i;
+ size_t namelen = name.e - name.b;
for (i = 0; i < countof (options); i++)
- if (name.e - name.b == strlen (options[i].name)
- && 0 == strncmp (name.b, options[i].name, name.e - name.b))
+ if (namelen == strlen (options[i].name)
+ && 0 == strncmp (name.b, options[i].name,
+ namelen))
{
*options[i].variable = strdupdelim (value.b, value.e);
break;
first argument and are followed by whitespace or terminating \0.
The comparison is case-insensitive. */
#define STARTS(literal, b, e) \
- ((e) - (b) >= STRSIZE (literal) \
+ ((e > b) \
+ && ((size_t) ((e) - (b))) >= STRSIZE (literal) \
&& 0 == strncasecmp (b, literal, STRSIZE (literal)) \
- && ((e) - (b) == STRSIZE (literal) \
+ && ((size_t) ((e) - (b)) == STRSIZE (literal) \
|| c_isspace (b[STRSIZE (literal)])))
static bool
cookie_jar_delete (wget_cookie_jar);
}
+void
+ensure_extension (struct http_stat *hs, const char *ext, int *dt)
+{
+ char *last_period_in_local_filename = strrchr (hs->local_file, '.');
+ char shortext[8];
+ int len = strlen (ext);
+ if (len == 5)
+ {
+ strncpy (shortext, ext, len - 1);
+ shortext[len - 2] = '\0';
+ }
+
+ if (last_period_in_local_filename == NULL
+ || !(0 == strcasecmp (last_period_in_local_filename, shortext)
+ || 0 == strcasecmp (last_period_in_local_filename, ext)))
+ {
+ int local_filename_len = strlen (hs->local_file);
+ /* Resize the local file, allowing for ".html" preceded by
+ optional ".NUMBER". */
+ hs->local_file = xrealloc (hs->local_file,
+ local_filename_len + 24 + len);
+ strcpy (hs->local_file + local_filename_len, ext);
+ /* If clobbering is not allowed and the file, as named,
+ exists, tack on ".NUMBER.html" instead. */
+ if (!ALLOW_CLOBBER && file_exists_p (hs->local_file))
+ {
+ int ext_num = 1;
+ do
+ sprintf (hs->local_file + local_filename_len,
+ ".%d%s", ext_num++, ext);
+ while (file_exists_p (hs->local_file));
+ }
+ *dt |= ADDED_HTML_EXTENSION;
+ }
+}
+
#ifdef TESTING
/* Reading/parsing the initialization file.
Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
- 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+ 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
This file is part of GNU Wget.
#include "test.h"
#endif
-/* We want tilde expansion enabled only when reading `.wgetrc' lines;
- otherwise, it will be performed by the shell. This variable will
- be set by the wgetrc-reading function. */
-
-static bool enable_tilde_expansion;
#define CMD_DECLARE(func) static bool func (const char *, const char *, void *)
{ "accept", &opt.accepts, cmd_vector },
{ "addhostdir", &opt.add_hostdir, cmd_boolean },
{ "alwaysrest", &opt.always_rest, cmd_boolean }, /* deprecated */
+ { "askpassword", &opt.ask_passwd, cmd_boolean },
{ "authnochallenge", &opt.auth_without_challenge,
cmd_boolean },
{ "background", &opt.background, cmd_boolean },
#ifdef ENABLE_DEBUG
{ "debug", &opt.debug, cmd_boolean },
#endif
+ { "defaultpage", &opt.default_page, cmd_string},
{ "deleteafter", &opt.delete_after, cmd_boolean },
{ "dirprefix", &opt.dir_prefix, cmd_directory },
{ "dirstruct", NULL, cmd_spec_dirstruct },
{ "ftppasswd", &opt.ftp_passwd, cmd_string }, /* deprecated */
{ "ftppassword", &opt.ftp_passwd, cmd_string },
{ "ftpproxy", &opt.ftp_proxy, cmd_string },
+ #ifdef __VMS
+ { "ftpstmlf", &opt.ftp_stmlf, cmd_boolean },
+ #endif /* def __VMS */
{ "ftpuser", &opt.ftp_user, cmd_string },
{ "glob", &opt.ftp_glob, cmd_boolean },
{ "header", NULL, cmd_spec_header },
{ "inet6only", &opt.ipv6_only, cmd_boolean },
#endif
{ "input", &opt.input_filename, cmd_file },
+ { "iri", &opt.enable_iri, cmd_boolean },
{ "keepsessioncookies", &opt.keep_session_cookies, cmd_boolean },
{ "limitrate", &opt.limit_rate, cmd_bytes },
{ "loadcookies", &opt.cookies_input, cmd_file },
+ { "locale", &opt.locale, cmd_string },
{ "logfile", &opt.lfilename, cmd_file },
{ "login", &opt.ftp_user, cmd_string },/* deprecated*/
{ "maxredirect", &opt.max_redirect, cmd_number },
{ "referer", &opt.referer, cmd_string },
{ "reject", &opt.rejects, cmd_vector },
{ "relativeonly", &opt.relative_only, cmd_boolean },
+ { "remoteencoding", &opt.encoding_remote, cmd_string },
{ "removelisting", &opt.remove_listing, cmd_boolean },
{ "restrictfilenames", NULL, cmd_spec_restrict_file_names },
{ "retrsymlinks", &opt.retr_symlinks, cmd_boolean },
tmp = getenv ("no_proxy");
if (tmp)
opt.no_proxy = sepstring (tmp);
+ opt.prefer_family = prefer_none;
opt.allow_cache = true;
opt.read_timeout = 900;
opt.restrict_files_case = restrict_no_case_restriction;
opt.max_redirect = 20;
+
+ opt.waitretry = 10;
+
+#ifdef ENABLE_IRI
+ opt.enable_iri = true;
+#else
+ opt.enable_iri = false;
+#endif
+ opt.locale = NULL;
+ opt.encoding_remote = NULL;
}
\f
/* Return the user's home directory (strdup-ed), or NULL if none is
char *
home_dir (void)
{
- char *home = getenv ("HOME");
+ static char buf[PATH_MAX];
+ static char *home;
if (!home)
{
+ home = getenv ("HOME");
+ if (!home)
+ {
#if defined(MSDOS)
- /* Under MSDOS, if $HOME isn't defined, use the directory where
- `wget.exe' resides. */
- const char *_w32_get_argv0 (void); /* in libwatt.a/pcconfig.c */
- char *p, buf[PATH_MAX];
-
- strcpy (buf, _w32_get_argv0 ());
- p = strrchr (buf, '/'); /* djgpp */
- if (!p)
- p = strrchr (buf, '\\'); /* others */
- assert (p);
- *p = '\0';
- home = buf;
+ /* Under MSDOS, if $HOME isn't defined, use the directory where
+ `wget.exe' resides. */
+ const char *_w32_get_argv0 (void); /* in libwatt.a/pcconfig.c */
+ char *p;
+
+ strcpy (buf, _w32_get_argv0 ());
+ p = strrchr (buf, '/'); /* djgpp */
+ if (!p)
+ p = strrchr (buf, '\\'); /* others */
+ assert (p);
+ *p = '\0';
+ home = buf;
#elif !defined(WINDOWS)
- /* If HOME is not defined, try getting it from the password
- file. */
- struct passwd *pwd = getpwuid (getuid ());
- if (!pwd || !pwd->pw_dir)
- return NULL;
- home = pwd->pw_dir;
+ /* If HOME is not defined, try getting it from the password
+ file. */
+ struct passwd *pwd = getpwuid (getuid ());
+ if (!pwd || !pwd->pw_dir)
+ return NULL;
+ strcpy (buf, pwd->pw_dir);
+ home = buf;
#else /* !WINDOWS */
- /* Under Windows, if $HOME isn't defined, use the directory where
- `wget.exe' resides. */
- home = ws_mypath ();
+ /* Under Windows, if $HOME isn't defined, use the directory where
+ `wget.exe' resides. */
+ home = ws_mypath ();
#endif /* WINDOWS */
+ }
}
return home ? xstrdup (home) : NULL;
}
-/* Return the path to the user's .wgetrc. This is either the value of
- `WGETRC' environment variable, or `$HOME/.wgetrc'.
-
+/* Check the 'WGETRC' environment variable and return the file name
+ if 'WGETRC' is set and is a valid file.
If the `WGETRC' variable exists but the file does not exist, the
function will exit(). */
-static char *
-wgetrc_file_name (void)
+char *
+wgetrc_env_file_name (void)
{
- char *env, *home;
- char *file = NULL;
-
- /* Try the environment. */
- env = getenv ("WGETRC");
+ char *env = getenv ("WGETRC");
if (env && *env)
{
if (!file_exists_p (env))
}
return xstrdup (env);
}
+ return NULL;
+}
+/* Check for the existance of '$HOME/.wgetrc' and return it's path
+ if it exists and is set. */
+char *
+wgetrc_user_file_name (void)
+{
+ char *home = home_dir ();
+ char *file = NULL;
+ /* If that failed, try $HOME/.wgetrc (or equivalent). */
+
+ #ifdef __VMS
+ file = "SYS$LOGIN:.wgetrc";
+ #else /* def __VMS */
+ home = home_dir ();
if (home)
file = aprintf ("%s/.wgetrc", home);
xfree_null (home);
- char *home = NULL;
+ #endif /* def __VMS [else] */
+
+ if (!file)
+ return NULL;
+ if (!file_exists_p (file))
+ {
+ xfree (file);
+ return NULL;
+ }
+ return file;
+}
+
+/* Return the path to the user's .wgetrc. This is either the value of
+ `WGETRC' environment variable, or `$HOME/.wgetrc'.
+
+ Additionally, for windows, look in the directory where wget.exe
+ resides. */
+char *
+wgetrc_file_name (void)
+{
+ char *file = wgetrc_env_file_name ();
+ if (file && *file)
+ return file;
+
+ file = wgetrc_user_file_name ();
+
#ifdef WINDOWS
/* Under Windows, if we still haven't found .wgetrc, look for the file
`wget.ini' in the directory where `wget.exe' resides; we do this for
backward compatibility with previous versions of Wget.
SYSTEM_WGETRC should not be defined under WINDOWS. */
- home = home_dir ();
-- if (!file || !file_exists_p (file))
++ if (!file)
{
++ char *home = home_dir ();
xfree_null (file);
file = NULL;
home = ws_mypath ();
if (home)
-- file = aprintf ("%s/wget.ini", home);
++ {
++ file = aprintf ("%s/wget.ini", home);
++ if (!file_exists_p (file))
++ {
++ xfree (file);
++ file = NULL;
++ }
++ xfree (home);
++ }
}
- xfree_null (home);
#endif /* WINDOWS */
-- if (!file)
-- return NULL;
-- if (!file_exists_p (file))
-- {
-- xfree (file);
-- return NULL;
-- }
return file;
}
static enum parse_line parse_line (const char *, char **, char **, int *);
static bool setval_internal (int, const char *, const char *);
+static bool setval_internal_tilde (int, const char *, const char *);
/* Initialize variables from a wgetrc file. Returns zero (failure) if
there were errors in the file. */
int ln;
int errcnt = 0;
- fp = fopen (file, "rb");
+ fp = fopen (file, "r");
if (!fp)
{
fprintf (stderr, _("%s: Cannot read %s (%s).\n"), exec_name,
file, strerror (errno));
return true; /* not a fatal error */
}
- enable_tilde_expansion = true;
ln = 1;
while ((line = read_whole_line (fp)) != NULL)
{
{
case line_ok:
/* If everything is OK, set the value. */
- if (!setval_internal (comind, com, val))
+ if (!setval_internal_tilde (comind, com, val))
{
fprintf (stderr, _("%s: Error in %s at line %d.\n"),
exec_name, file, ln);
++errcnt;
break;
case line_unknown_command:
- fprintf (stderr, _("%s: Unknown command `%s' in %s at line %d.\n"),
- exec_name, com, file, ln);
+ fprintf (stderr, _("%s: Unknown command %s in %s at line %d.\n"),
+ exec_name, quote (com), file, ln);
++errcnt;
break;
case line_empty:
xfree (line);
++ln;
}
- enable_tilde_expansion = false;
fclose (fp);
return errcnt == 0;
void
initialize (void)
{
- char *file;
+ char *file, *env_sysrc;
int ok = true;
/* Load the hard-coded defaults. */
defaults ();
-
- /* If SYSTEM_WGETRC is defined, use it. */
+
+ /* Run a non-standard system rc file when the according environment
+ variable has been set. For internal testing purposes only! */
+ env_sysrc = getenv ("SYSTEM_WGETRC");
+ if (env_sysrc && file_exists_p (env_sysrc))
+ ok &= run_wgetrc (env_sysrc);
+ /* Otherwise, if SYSTEM_WGETRC is defined, use it. */
#ifdef SYSTEM_WGETRC
- if (file_exists_p (SYSTEM_WGETRC))
+ else if (file_exists_p (SYSTEM_WGETRC))
ok &= run_wgetrc (SYSTEM_WGETRC);
#endif
/* Override it with your own, if one exists. */
if (!strcmp (file, SYSTEM_WGETRC))
{
fprintf (stderr, _("\
-%s: Warning: Both system and user wgetrc point to `%s'.\n"),
- exec_name, file);
+%s: Warning: Both system and user wgetrc point to %s.\n"),
+ exec_name, quote (file));
}
else
#endif
xfree (file);
return;
}
-\f
+
/* Remove dashes and underscores from S, modifying S in the
process. */
return line_ok;
}
+#if defined(WINDOWS) || defined(MSDOS)
+# define ISSEP(c) ((c) == '/' || (c) == '\\')
+#else
+# define ISSEP(c) ((c) == '/')
+#endif
+
/* Run commands[comind].action. */
static bool
setval_internal (int comind, const char *com, const char *val)
{
- assert (0 <= comind && comind < countof (commands));
+ assert (0 <= comind && ((size_t) comind) < countof (commands));
DEBUGP (("Setting %s (%s) to %s\n", com, commands[comind].name, val));
return commands[comind].action (com, val, commands[comind].place);
}
+static bool
+setval_internal_tilde (int comind, const char *com, const char *val)
+{
+ bool ret;
+ int homelen;
+ char *home;
+ char **pstring;
+ ret = setval_internal (comind, com, val);
+
+ /* We make tilde expansion for cmd_file and cmd_directory */
+ if (((commands[comind].action == cmd_file) ||
+ (commands[comind].action == cmd_directory))
+ && ret && (*val == '~' && ISSEP (val[1])))
+ {
+ pstring = commands[comind].place;
+ home = home_dir ();
+ if (home)
+ {
+ homelen = strlen (home);
+ while (homelen && ISSEP (home[homelen - 1]))
+ home[--homelen] = '\0';
+
+ /* Skip the leading "~/". */
+ for (++val; ISSEP (*val); val++)
+ ;
+ *pstring = concat_strings (home, "/", val, (char *)0);
+ }
+ }
+ return ret;
+}
+
/* Run command COM with value VAL. If running the command produces an
error, report the error and exit.
xfree (val);
break;
default:
- fprintf (stderr, _("%s: Invalid --execute command `%s'\n"),
- exec_name, opt);
+ fprintf (stderr, _("%s: Invalid --execute command %s\n"),
+ exec_name, quote (opt));
exit (2);
}
}
else
{
fprintf (stderr,
- _("%s: %s: Invalid boolean `%s'; use `on' or `off'.\n"),
- exec_name, com, val);
+ _("%s: %s: Invalid boolean %s; use `on' or `off'.\n"),
+ exec_name, com, quote (val));
return false;
}
if (!simple_atoi (val, val + strlen (val), place)
|| *(int *) place < 0)
{
- fprintf (stderr, _("%s: %s: Invalid number `%s'.\n"),
- exec_name, com, val);
+ fprintf (stderr, _("%s: %s: Invalid number %s.\n"),
+ exec_name, com, quote (val));
return false;
}
return true;
return true;
}
-#if defined(WINDOWS) || defined(MSDOS)
-# define ISSEP(c) ((c) == '/' || (c) == '\\')
-#else
-# define ISSEP(c) ((c) == '/')
-#endif
/* Like the above, but handles tilde-expansion when reading a user's
`.wgetrc'. In that case, and if VAL begins with `~', the tilde
/* #### If VAL is empty, perhaps should set *PLACE to NULL. */
- if (!enable_tilde_expansion || !(*val == '~' && ISSEP (val[1])))
- {
- noexpand:
- *pstring = xstrdup (val);
- }
- else
- {
- int homelen;
- char *home = home_dir ();
- if (!home)
- goto noexpand;
-
- homelen = strlen (home);
- while (homelen && ISSEP (home[homelen - 1]))
- home[--homelen] = '\0';
-
- /* Skip the leading "~/". */
- for (++val; ISSEP (*val); val++)
- ;
-
- *pstring = concat_strings (home, "/", val, (char *) 0);
- }
+ *pstring = xstrdup (val);
#if defined(WINDOWS) || defined(MSDOS)
/* Convert "\" to "/". */
double byte_value;
if (!parse_bytes_helper (val, &byte_value))
{
- fprintf (stderr, _("%s: %s: Invalid byte value `%s'\n"),
- exec_name, com, val);
+ fprintf (stderr, _("%s: %s: Invalid byte value %s\n"),
+ exec_name, com, quote (val));
return false;
}
*(wgint *)place = (wgint)byte_value;
double byte_value;
if (!parse_bytes_helper (val, &byte_value))
{
- fprintf (stderr, _("%s: %s: Invalid byte value `%s'\n"),
- exec_name, com, val);
+ fprintf (stderr, _("%s: %s: Invalid byte value %s\n"),
+ exec_name, com, quote (val));
return false;
}
*(SUM_SIZE_INT *) place = (SUM_SIZE_INT) byte_value;
if (val == end)
{
err:
- fprintf (stderr, _("%s: %s: Invalid time period `%s'\n"),
- exec_name, com, val);
+ fprintf (stderr, _("%s: %s: Invalid time period %s\n"),
+ exec_name, com, quote (val));
return false;
}
};
int ok = decode_string (val, choices, countof (choices), place);
if (!ok)
- fprintf (stderr, _("%s: %s: Invalid value `%s'.\n"), exec_name, com, val);
+ fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val));
return ok;
}
#endif
if (!check_user_specified_header (val))
{
- fprintf (stderr, _("%s: %s: Invalid header `%s'.\n"),
- exec_name, com, val);
+ fprintf (stderr, _("%s: %s: Invalid header %s.\n"),
+ exec_name, com, quote (val));
return false;
}
opt.user_headers = vec_append (opt.user_headers, val);
{ "IPv6", prefer_ipv6 },
{ "none", prefer_none },
};
- int prefer_family = prefer_ipv4;
+ int prefer_family = prefer_none;
int ok = decode_string (val, choices, countof (choices), &prefer_family);
if (!ok)
- fprintf (stderr, _("%s: %s: Invalid value `%s'.\n"), exec_name, com, val);
+ fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val));
opt.prefer_family = prefer_family;
return ok;
}
{
if (!valid_progress_implementation_p (val))
{
- fprintf (stderr, _("%s: %s: Invalid progress type `%s'.\n"),
- exec_name, com, val);
+ fprintf (stderr, _("%s: %s: Invalid progress type %s.\n"),
+ exec_name, com, quote (val));
return false;
}
xfree_null (opt.progress_type);
else
{
fprintf (stderr,
- _("%s: %s: Invalid restriction `%s', use [unix|windows],[lowercase|uppercase],[nocontrol].\n"),
- exec_name, com, val);
+ _("%s: %s: Invalid restriction %s, use [unix|windows],[lowercase|uppercase],[nocontrol].\n"),
+ exec_name, com, quote (val));
return false;
}
};
int ok = decode_string (val, choices, countof (choices), place);
if (!ok)
- fprintf (stderr, _("%s: %s: Invalid value `%s'.\n"), exec_name, com, val);
+ fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val));
return ok;
}
#endif
/* Disallow embedded newlines. */
if (strchr (val, '\n'))
{
- fprintf (stderr, _("%s: %s: Invalid value `%s'.\n"),
- exec_name, com, val);
+ fprintf (stderr, _("%s: %s: Invalid value %s.\n"),
+ exec_name, com, quote (val));
return false;
}
xfree_null (opt.useragent);
xfree_null (opt.cookies_output);
xfree_null (opt.user);
xfree_null (opt.passwd);
+ xfree_null (opt.base_href);
+
#endif /* DEBUG_MALLOC */
}
\f
#ifdef TESTING
+const char *
+test_commands_sorted()
+{
+ int prev_idx = 0, next_idx = 1;
+ int command_count = countof (commands) - 1;
+ int cmp = 0;
+ while (next_idx <= command_count)
+ {
+ cmp = strcasecmp (commands[prev_idx].name, commands[next_idx].name);
+ if (cmp > 0)
+ {
+ mu_assert ("FAILED", false);
+ break;
+ }
+ else
+ {
+ prev_idx ++;
+ next_idx ++;
+ }
+ }
+ return NULL;
+}
+
const char *
test_cmd_spec_restrict_file_names()
{
#include "utils.h"
#include "log.h"
- /* This file implement support for "logging". Logging means printing
+ /* 2005-10-25 SMS.
+ VMS log files are often VFC record format, not stream, so fputs() can
+ produce multiple records, even when there's no newline terminator in
+ the buffer. The result is unsightly output with spurious newlines.
+ Using fprintf() instead of fputs(), along with inhibiting some
+ fflush() activity below, seems to solve the problem.
+ */
+ #ifdef __VMS
+ # define FPUTS( s, f) fprintf( (f), "%s", (s))
+ #else /* def __VMS */
+ # define FPUTS( s, f) fputs( (s), (f))
+ #endif /* def __VMS [else] */
+
-/* This file impplement support for "logging". Logging means printing
++/* This file implements support for "logging". Logging means printing
output, plus several additional features:
- Cataloguing output by importance. You can specify that a log
return;
CHECK_VERBOSE (o);
- fputs (s, fp);
+ FPUTS (s, fp);
if (save_context_p)
saved_append (s);
if (flush_log_p)
/* Writing succeeded. */
saved_append (write_ptr);
- fputs (write_ptr, fp);
+ FPUTS (write_ptr, fp);
if (state->bigmsg)
xfree (state->bigmsg);
{
FILE *fp = get_log_fp ();
if (fp)
- fflush (fp);
+ {
+ /* 2005-10-25 SMS.
+ On VMS, flush only for a terminal. See note at FPUTS macro, above.
+ */
+ #ifdef __VMS
+ if (isatty( fileno( fp)))
+ {
+ fflush (fp);
+ }
+ #else /* def __VMS */
+ fflush (fp);
+ #endif /* def __VMS [else] */
+ }
needs_flushing = false;
}
{
struct log_ln *ln = log_lines + num;
if (ln->content)
- fputs (ln->content, fp);
+ FPUTS (ln->content, fp);
ROT_ADVANCE (num);
}
while (num != log_line_current);
if (trailing_line)
if (log_lines[log_line_current].content)
- fputs (log_lines[log_line_current].content, fp);
+ FPUTS (log_lines[log_line_current].content, fp);
fflush (fp);
}
\f
void
log_cleanup (void)
{
- int i;
+ size_t i;
for (i = 0; i < countof (ring); i++)
xfree_null (ring[i].buffer);
}
logfp = unique_create (DEFAULT_LOGFILE, false, &logfile);
if (logfp)
{
- fprintf (stderr, _("\n%s received, redirecting output to `%s'.\n"),
- redirect_request_signal_name, logfile);
+ fprintf (stderr, _("\n%s received, redirecting output to %s.\n"),
+ redirect_request_signal_name, quote (logfile));
xfree (logfile);
/* Dump the context output to the newly opened log. */
log_dump_context ();
/* Command line parsing.
Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
- 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+ 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
This file is part of GNU Wget.
#include "http.h" /* for save_cookies */
#include <getopt.h>
+#include <getpass.h>
+#include <quote.h>
+ #ifdef __VMS
+ #include "vms.h"
+ #endif /* __VMS */
+
+ #include "version.h"
+
#ifndef PATH_SEPARATOR
# define PATH_SEPARATOR '/'
#endif
struct options opt;
+/* defined in version.c */
extern char *version_string;
+extern char *compilation_string;
+extern char *system_getrc;
+extern char *link_string;
+/* defined in build_info.c */
+extern char *compiled_features[];
+/* Used for --version output in print_version */
+#define MAX_CHARS_PER_LINE 72
+#define TABULATION 4
#if defined(SIGHUP) || defined(SIGUSR1)
static void redirect_output_signal (int);
#endif
const char *exec_name;
+
+/* Number of successfully downloaded URLs */
+int numurls = 0;
\f
#ifndef TESTING
/* Initialize I18N/L10N. That amounts to invoking setlocale, and
{
{ "accept", 'A', OPT_VALUE, "accept", -1 },
{ "append-output", 'a', OPT__APPEND_OUTPUT, NULL, required_argument },
+ { "ask-password", 0, OPT_BOOLEAN, "askpassword", -1 },
{ "auth-no-challenge", 0, OPT_BOOLEAN, "authnochallenge", -1 },
{ "background", 'b', OPT_BOOLEAN, "background", -1 },
{ "backup-converted", 'K', OPT_BOOLEAN, "backupconverted", -1 },
{ "cookies", 0, OPT_BOOLEAN, "cookies", -1 },
{ "cut-dirs", 0, OPT_VALUE, "cutdirs", -1 },
{ WHEN_DEBUG ("debug"), 'd', OPT_BOOLEAN, "debug", -1 },
+ { "default-page", 0, OPT_VALUE, "defaultpage", -1 },
{ "delete-after", 0, OPT_BOOLEAN, "deleteafter", -1 },
{ "directories", 0, OPT_BOOLEAN, "dirstruct", -1 },
{ "directory-prefix", 'P', OPT_VALUE, "dirprefix", -1 },
{ "force-directories", 'x', OPT_BOOLEAN, "dirstruct", -1 },
{ "force-html", 'F', OPT_BOOLEAN, "forcehtml", -1 },
{ "ftp-password", 0, OPT_VALUE, "ftppassword", -1 },
+ #ifdef __VMS
+ { "ftp-stmlf", 0, OPT_BOOLEAN, "ftpstmlf", -1 },
+ #endif /* def __VMS */
{ "ftp-user", 0, OPT_VALUE, "ftpuser", -1 },
{ "glob", 0, OPT_BOOLEAN, "glob", -1 },
{ "header", 0, OPT_VALUE, "header", -1 },
{ "inet6-only", '6', OPT_BOOLEAN, "inet6only", -1 },
#endif
{ "input-file", 'i', OPT_VALUE, "input", -1 },
+ { "iri", 0, OPT_BOOLEAN, "iri", -1 },
{ "keep-session-cookies", 0, OPT_BOOLEAN, "keepsessioncookies", -1 },
{ "level", 'l', OPT_VALUE, "reclevel", -1 },
{ "limit-rate", 0, OPT_VALUE, "limitrate", -1 },
{ "load-cookies", 0, OPT_VALUE, "loadcookies", -1 },
+ { "locale", 0, OPT_VALUE, "locale", -1 },
{ "max-redirect", 0, OPT_VALUE, "maxredirect", -1 },
{ "mirror", 'm', OPT_BOOLEAN, "mirror", -1 },
{ "no", 'n', OPT__NO, NULL, required_argument },
{ "referer", 0, OPT_VALUE, "referer", -1 },
{ "reject", 'R', OPT_VALUE, "reject", -1 },
{ "relative", 'L', OPT_BOOLEAN, "relativeonly", -1 },
+ { "remote-encoding", 0, OPT_VALUE, "remoteencoding", -1},
{ "remove-listing", 0, OPT_BOOLEAN, "removelisting", -1 },
{ "restrict-file-names", 0, OPT_BOOLEAN, "restrictfilenames", -1 },
{ "retr-symlinks", 0, OPT_BOOLEAN, "retrsymlinks", -1 },
init_switches (void)
{
char *p = short_options;
- int i, o = 0;
+ size_t i, o = 0;
for (i = 0; i < countof (option_data); i++)
{
struct cmdline_option *opt = &option_data[i];
N_("\
-nv, --no-verbose turn off verboseness, without being quiet.\n"),
N_("\
- -i, --input-file=FILE download URLs found in FILE.\n"),
+ -i, --input-file=FILE download URLs found in local or external FILE.\n"),
N_("\
-F, --force-html treat input file as HTML.\n"),
N_("\
--user=USER set both ftp and http user to USER.\n"),
N_("\
--password=PASS set both ftp and http password to PASS.\n"),
+ N_("\
+ --ask-password prompt for passwords.\n"),
"\n",
N_("\
--http-password=PASS set http password to PASS.\n"),
N_("\
--no-cache disallow server-cached data.\n"),
+ N_ ("\
+ --default-page=NAME Change the default page name (normally\n\
+ this is `index.html'.).\n"),
N_("\
-E, --html-extension save HTML documents with `.html' extension.\n"),
N_("\
N_("\
FTP options:\n"),
+ #ifdef __VMS
+ N_("\
+ --ftp-stmlf Use Stream_LF format for all binary FTP files.\n"),
+ #endif /* def __VMS */
N_("\
--ftp-user=USER set ftp user to USER.\n"),
N_("\
N_("\
--delete-after delete files locally after downloading them.\n"),
N_("\
- -k, --convert-links make links in downloaded HTML point to local files.\n"),
+ -k, --convert-links make links in downloaded HTML or CSS point to\n\
+ local files.\n"),
+ #ifdef __VMS
+ N_("\
+ -K, --backup-converted before converting file X, back up as X_orig.\n"),
+ #else /* def __VMS */
N_("\
-K, --backup-converted before converting file X, back up as X.orig.\n"),
+ #endif /* def __VMS [else] */
N_("\
-m, --mirror shortcut for -N -r -l inf --no-remove-listing.\n"),
N_("\
N_("Mail bug reports and suggestions to <bug-wget@gnu.org>.\n")
};
- int i;
+ size_t i;
printf (_("GNU Wget %s, a non-interactive network retriever.\n"),
- version_string);
+ VERSION_STRING);
print_usage ();
for (i = 0; i < countof (help); i++)
return buf;
}
+static char *
+prompt_for_password (void)
+{
+ if (opt.user)
+ printf (_("Password for user %s: "), quote (opt.user));
+ else
+ printf (_("Password: "));
+ return getpass("");
+}
+
+/* Function that prints the line argument while limiting it
+ to at most line_length. prefix is printed on the first line
+ and an appropriate number of spaces are added on subsequent
+ lines.*/
+static void
+format_and_print_line (const char *prefix, const char *line,
+ int line_length)
+{
+ int remaining_chars;
+ char *line_dup, *token;
+
+ assert (prefix != NULL);
+ assert (line != NULL);
+
+ line_dup = xstrdup (line);
+
+ if (line_length <= 0)
+ line_length = MAX_CHARS_PER_LINE - TABULATION;
+
+ printf ("%s", prefix);
+ remaining_chars = line_length;
+ /* We break on spaces. */
+ token = strtok (line_dup, " ");
+ while (token != NULL)
+ {
+ /* If however a token is much larger than the maximum
+ line length, all bets are off and we simply print the
+ token on the next line. */
+ if (remaining_chars <= strlen (token))
+ {
+ printf ("\n%*c", TABULATION, ' ');
+ remaining_chars = line_length - TABULATION;
+ }
+ printf ("%s ", token);
+ remaining_chars -= strlen (token) + 1; /* account for " " */
+ token = strtok (NULL, " ");
+ }
+
+ printf ("\n");
+
+ xfree (line_dup);
+}
+
static void
print_version (void)
{
- printf ("GNU Wget %s built on %s.\n\n", VERSION_STRING, OS_TYPE);
+ const char *wgetrc_title = _("Wgetrc: ");
+ const char *locale_title = _("Locale: ");
+ const char *compile_title = _("Compile: ");
+ const char *link_title = _("Link: ");
+ char *line;
+ char *env_wgetrc, *user_wgetrc;
+ int i;
+
+ printf (_("GNU Wget %s\n\n"), version_string);
+ #ifdef __VMS
+ printf ("GNU Wget %s built on VMS %s %s.\n\n",
+ VERSION_STRING, vms_arch(), vms_vers());
+ #else /* def __VMS */
++ printf ("GNU Wget %s built on %s.\n\n", version_string, OS_TYPE);
+ #endif /* def __VMS */
+ /* compiled_features is a char*[]. We limit the characters per
+ line to MAX_CHARS_PER_LINE and prefix each line with a constant
+ number of spaces for proper alignment. */
+ for (i = 0; compiled_features[i] != NULL; )
+ {
+ int line_length = MAX_CHARS_PER_LINE;
+ while ((line_length > 0) && (compiled_features[i] != NULL))
+ {
+ printf ("%s ", compiled_features[i]);
+ line_length -= strlen (compiled_features[i]) + 2;
+ i++;
+ }
+ printf ("\n");
+ }
+ printf ("\n");
+ /* Handle the case when $WGETRC is unset and $HOME/.wgetrc is
+ absent. */
+ printf ("%s\n", wgetrc_title);
+ env_wgetrc = wgetrc_env_file_name ();
+ if (env_wgetrc && *env_wgetrc)
+ {
+ printf (" %s (env)\n", env_wgetrc);
+ xfree (env_wgetrc);
+ }
+ user_wgetrc = wgetrc_user_file_name ();
+ if (user_wgetrc)
+ {
+ printf (" %s (user)\n", user_wgetrc);
+ xfree (user_wgetrc);
+ }
+#ifdef SYSTEM_WGETRC
+ printf (" %s (system)\n", SYSTEM_WGETRC);
+#endif
+
+ format_and_print_line (locale_title,
+ LOCALEDIR,
+ MAX_CHARS_PER_LINE);
+
+ format_and_print_line (compile_title,
+ compilation_string,
+ MAX_CHARS_PER_LINE);
+
+ format_and_print_line (link_title,
+ link_string,
+ MAX_CHARS_PER_LINE);
+
+ printf ("\n");
/* TRANSLATORS: When available, an actual copyright character
(cirle-c) should be used in preference to "(C)". */
fputs (_("\
stdout);
fputs (_("Currently maintained by Micah Cowan <micah@cowan.name>.\n"),
stdout);
+ fputs (_("Please send bug reports and questions to <bug-wget@gnu.org>.\n"),
+ stdout);
exit (0);
}
-\f
+
+char *program_name; /* Needed by lib/error.c. */
int
main (int argc, char **argv)
int nurl, status;
bool append_to_log = false;
+ program_name = argv[0];
+
i18n_initialize ();
/* Construct the name of the executable, without the directory part. */
exit (1);
}
#endif
- if (opt.output_document
- && (opt.page_requisites
- || opt.recursive
- || opt.timestamping))
+ if (opt.output_document)
{
- printf (_("Cannot specify -r, -p or -N if -O is given.\n"));
+ if (opt.convert_links
+ && (nurl > 1 || opt.page_requisites || opt.recursive))
+ {
+ fputs (_("\
+Cannot specify both -k and -O if multiple URLs are given, or in combination\n\
+with -p or -r. See the manual for details.\n\n"), stdout);
print_usage ();
exit (1);
+ }
+ if (opt.page_requisites
+ || opt.recursive)
+ {
+ logprintf (LOG_NOTQUIET, "%s", _("\
+WARNING: combining -O with -r or -p will mean that all downloaded content\n\
+will be placed in the single file you specified.\n\n"));
+ }
+ if (opt.timestamping)
+ {
+ logprintf (LOG_NOTQUIET, "%s", _("\
+WARNING: timestamping does nothing in combination with -O. See the manual\n\
+for details.\n\n"));
+ opt.timestamping = false;
+ }
+ if (opt.noclobber && file_exists_p(opt.output_document))
+ {
+ /* Check if output file exists; if it does, exit. */
+ logprintf (LOG_VERBOSE, _("File `%s' already there; not retrieving.\n"), opt.output_document);
+ exit(1);
+ }
}
- if (opt.output_document
- && opt.convert_links
- && nurl > 1)
+
+ if (opt.ask_passwd && opt.passwd)
{
- printf (_("Cannot specify both -k and -O if multiple URLs are given.\n"));
- print_usage ();
- exit (1);
+ printf (_("Cannot specify both --ask-password and --password.\n"));
+ print_usage ();
+ exit (1);
}
if (!nurl && !opt.input_filename)
exit (1);
}
+#ifdef ENABLE_IRI
+ if (opt.enable_iri)
+ {
+ if (opt.locale && !check_encoding_name (opt.locale))
+ opt.locale = NULL;
+
+ if (!opt.locale)
+ opt.locale = find_locale ();
+
+ if (opt.encoding_remote && !check_encoding_name (opt.encoding_remote))
+ opt.encoding_remote = NULL;
+ }
+#else
+ if (opt.enable_iri || opt.locale || opt.encoding_remote)
+ {
+ /* sXXXav : be more specific... */
+ printf(_("This version does not have support for IRIs\n"));
+ exit(1);
+ }
+#endif
+
+ if (opt.ask_passwd)
+ {
+ opt.passwd = prompt_for_password ();
+
+ if (opt.passwd == NULL || opt.passwd[0] == '\0')
+ exit (1);
+ }
+
#ifdef MSDOS
if (opt.wdebug)
dbug_init();
/* Initialize logging. */
log_init (opt.lfilename, append_to_log);
- DEBUGP (("DEBUG output created by Wget %s on %s.\n\n", version_string,
- OS_TYPE));
+ DEBUGP (("DEBUG output created by Wget %s on %s.\n\n",
+ VERSION_STRING, OS_TYPE));
/* Open the output filename if necessary. */
+
+ /* 2005-04-17 SMS.
+ Note that having the output_stream ("-O") file opened here for an FTP
+ URL rather than in getftp() (ftp.c) (and the http equivalent) rather
+ limits the ability in VMS to open the file differently for ASCII
+ versus binary FTP there. (Of course, doing it here allows a open
+ failure to be detected immediately, without first connecting to the
+ server.)
+ */
if (opt.output_document)
{
if (HYPHENP (opt.output_document))
- output_stream = stdout;
+ {
+#ifdef WINDOWS
+ FILE *result;
+ result = freopen ("CONOUT$", "wb", stdout);
+ if (result == NULL)
+ {
+ logputs (LOG_NOTQUIET, _("\
+WARNING: Can't reopen standard output in binary mode;\n\
+ downloaded file may contain inappropriate line endings.\n"));
+ }
+#endif
+ output_stream = stdout;
+ }
else
{
struct_fstat st;
+
+ #ifdef __VMS
+ /* Common fopen() optional arguments:
+ sequential access only, access callback function.
+ */
+ # define FOPEN_OPT_ARGS , "fop=sqo", "acc", acc_cb, &open_id
+ int open_id = 7;
+ #else /* def __VMS */
+ # define FOPEN_OPT_ARGS
+ #endif /* def __VMS [else] */
+
output_stream = fopen (opt.output_document,
- opt.always_rest ? "ab" : "wb");
+ opt.always_rest ? "ab" : "wb"
+ FOPEN_OPT_ARGS);
if (output_stream == NULL)
{
perror (opt.output_document);
}
}
+ #ifdef __VMS
+ /* Set global ODS5 flag according to the specified destination (if
+ any), otherwise according to the current default device.
+ */
+ if (output_stream == NULL)
+ {
+ set_ods5_dest( "SYS$DISK");
+ }
+ else if (output_stream != stdout)
+ {
+ set_ods5_dest( opt.output_document);
+ }
+ #endif /* def __VMS */
+
#ifdef WINDOWS
ws_startup ();
#endif
for (t = url; *t; t++)
{
char *filename = NULL, *redirected_URL = NULL;
- int dt;
+ int dt, url_err;
+ /* Need to do a new struct iri every time, because
+ * retrieve_url may modify it in some circumstances,
+ * currently. */
+ struct iri *iri = iri_new ();
+ struct url *url_parsed;
- if ((opt.recursive || opt.page_requisites)
- && (url_scheme (*t) != SCHEME_FTP || url_uses_proxy (*t)))
+ set_uri_encoding (iri, opt.locale, true);
+ url_parsed = url_parse (*t, &url_err, iri, true);
+
+ if (!url_parsed)
{
- int old_follow_ftp = opt.follow_ftp;
+ char *error = url_error (*t, url_err);
+ logprintf (LOG_NOTQUIET, "%s: %s.\n",*t, error);
+ xfree (error);
+ status = URLERROR;
+ }
+ else
+ {
+ if ((opt.recursive || opt.page_requisites)
+ && (url_scheme (*t) != SCHEME_FTP || url_uses_proxy (url_parsed)))
+ {
+ int old_follow_ftp = opt.follow_ftp;
- /* Turn opt.follow_ftp on in case of recursive FTP retrieval */
- if (url_scheme (*t) == SCHEME_FTP)
- opt.follow_ftp = 1;
+ /* Turn opt.follow_ftp on in case of recursive FTP retrieval */
+ if (url_scheme (*t) == SCHEME_FTP)
+ opt.follow_ftp = 1;
- status = retrieve_tree (*t);
+ status = retrieve_tree (url_parsed, NULL);
- opt.follow_ftp = old_follow_ftp;
- }
- else
- status = retrieve_url (*t, &filename, &redirected_URL, NULL, &dt, opt.recursive);
+ opt.follow_ftp = old_follow_ftp;
+ }
+ else
+ {
+ status = retrieve_url (url_parsed, *t, &filename, &redirected_URL,
+ NULL, &dt, opt.recursive, iri);
+ }
- if (opt.delete_after && file_exists_p(filename))
- {
- DEBUGP (("Removing file due to --delete-after in main():\n"));
- logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
- if (unlink (filename))
- logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
+ if (opt.delete_after && file_exists_p(filename))
+ {
+ DEBUGP (("Removing file due to --delete-after in main():\n"));
+ logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
+ if (unlink (filename))
+ logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
+ }
+ xfree_null (redirected_URL);
+ xfree_null (filename);
+ url_free (url_parsed);
}
-
- xfree_null (redirected_URL);
- xfree_null (filename);
+ iri_free (iri);
}
/* And then from the input file, if any. */
logprintf (LOG_NOTQUIET,
_("FINISHED --%s--\nDownloaded: %d files, %s in %s (%s)\n"),
datetime_str (time (NULL)),
- opt.numurls,
+ numurls,
human_readable (total_downloaded_bytes),
secs_to_human_time (total_download_time),
retr_rate (total_downloaded_bytes, total_download_time));
xfree (url[i]);
cleanup ();
-#ifdef DEBUG_MALLOC
- print_malloc_debug_stats ();
-#endif
if (status == RETROK)
return 0;
else
than examining the error stack after a failed SSL_connect. */
SSL_CTX_set_verify (ssl_ctx, SSL_VERIFY_NONE, NULL);
+ /* Use the private key from the cert file unless otherwise specified. */
+ if (opt.cert_file && !opt.private_key)
+ {
+ opt.private_key = opt.cert_file;
+ opt.private_key_type = opt.cert_type;
+ }
+
if (opt.cert_file)
if (SSL_CTX_use_certificate_file (ssl_ctx, opt.cert_file,
key_type_to_ssl_type (opt.cert_type))
Returns true on success, false on failure. */
bool
- ssl_connect (int fd)
+ ssl_connect_wget (int fd)
{
SSL *conn;
struct openssl_transport_context *ctx;
its certificate, corresponds to HOST. (HOST typically comes from
the URL and is what the user thinks he's connecting to.)
- This assumes that ssl_connect has successfully finished, i.e. that
+ This assumes that ssl_connect_wget has successfully finished, i.e. that
the SSL handshake has been performed and that FD is connected to an
SSL handle.
if (!cert)
{
logprintf (LOG_NOTQUIET, _("%s: No certificate presented by %s.\n"),
- severity, escnonprint (host));
+ severity, quotearg_style (escape_quoting_style, host));
success = false;
goto no_cert; /* must bail out since CERT is NULL */
}
char *subject = X509_NAME_oneline (X509_get_subject_name (cert), 0, 0);
char *issuer = X509_NAME_oneline (X509_get_issuer_name (cert), 0, 0);
DEBUGP (("certificate:\n subject: %s\n issuer: %s\n",
- escnonprint (subject), escnonprint (issuer)));
+ quotearg_style (escape_quoting_style, subject),
+ quotearg_style (escape_quoting_style, issuer)));
OPENSSL_free (subject);
OPENSSL_free (issuer);
}
{
char *issuer = X509_NAME_oneline (X509_get_issuer_name (cert), 0, 0);
logprintf (LOG_NOTQUIET,
- _("%s: cannot verify %s's certificate, issued by `%s':\n"),
- severity, escnonprint (host), escnonprint (issuer));
+ _("%s: cannot verify %s's certificate, issued by %s:\n"),
+ severity, quotearg_style (escape_quoting_style, host),
+ quote (issuer));
/* Try to print more user-friendly (and translated) messages for
the frequent verification errors. */
switch (vresult)
if (!pattern_match (common_name, host))
{
logprintf (LOG_NOTQUIET, _("\
-%s: certificate common name `%s' doesn't match requested host name `%s'.\n"),
- severity, escnonprint (common_name), escnonprint (host));
+%s: certificate common name %s doesn't match requested host name %s.\n"),
+ severity, quote (common_name), quote (host));
success = false;
}
if (success)
DEBUGP (("X509 certificate successfully verified and matches host %s\n",
- escnonprint (host)));
+ quotearg_style (escape_quoting_style, host)));
X509_free (cert);
no_cert:
if (opt.check_cert && !success)
logprintf (LOG_NOTQUIET, _("\
To connect to %s insecurely, use `--no-check-certificate'.\n"),
- escnonprint (host));
+ quotearg_style (escape_quoting_style, host));
/* Allow --no-check-cert to disable certificate checking. */
return opt.check_cert ? success : true;
char *input_filename; /* Input filename */
bool force_html; /* Is the input file an HTML file? */
+ char *default_page; /* Alternative default page (index file) */
+
bool spider; /* Is Wget in spider mode? */
char **accepts; /* List of patterns to accept. */
char *user; /* Generic username */
char *passwd; /* Generic password */
+ bool ask_passwd; /* Ask for password? */
bool always_rest; /* Always use REST. */
char *ftp_user; /* FTP username */
SUM_SIZE_INT quota; /* Maximum file size to download and
store. */
- int numurls; /* Number of successfully downloaded
- URLs #### should be removed because
- it's not a setting, but a global var */
-
bool server_response; /* Do we print server response? */
bool save_headers; /* Do we save headers together with
file? */
bool content_disposition; /* Honor HTTP Content-Disposition header. */
bool auth_without_challenge; /* Issue Basic authentication creds without
- waiting for a challenge. */
+ waiting for a challenge. */
+
+ bool enable_iri;
+ char *encoding_remote;
+ char *locale;
+
+ #ifdef __VMS
+ int ftp_stmlf; /* Force Stream_LF format for binary FTP. */
+ #endif /* def __VMS */
+
};
extern struct options opt;
#include "hash.h"
#include "convert.h"
#include "ptimer.h"
+#include "html-url.h"
+#include "iri.h"
/* Total size of downloaded files. Used to enforce quota. */
SUM_SIZE_INT total_downloaded_bytes;
static int
write_data (FILE *out, const char *buf, int bufsize, wgint *skip,
- wgint *written)
+ wgint *written, int flags)
{
+ static int cr_pending = 0; /* Found CR in ASCII FTP data. */
+
if (!out)
return 1;
if (*skip > bufsize)
return 1;
}
- fwrite (buf, 1, bufsize, out);
- *written += bufsize;
+ /* Note: This code assumes that "\n" is the universal line ending
+ character, as on UNIX and VMS. If this is not true, then here's
+ where to change it.
+ */
+
+ #if 1
+ # define EOL_STRING "\n"
+ #else /* 1 */
+ # define EOL_STRING "\r\n"
+ #endif /* 1 [else] */
+ #define EOL_STRING_LEN (sizeof( EOL_STRING)- 1)
+
+ if (flags & rb_ftp_ascii)
+ {
+ const char *bufend;
+
+ /* ASCII transfer. Put out lines delimited by CRLF. */
+ bufend = buf+ bufsize;
+ while (buf < bufend)
+ {
+ /* If CR, put out any pending CR, then set CR-pending flag. */
+ if (*buf == '\r')
+ {
+ if (cr_pending)
+ {
+ fwrite ("\r", 1, 1, out);
+ *written += 1;
+ }
+ cr_pending = 1;
+ buf++;
+ continue;
+ }
+
+ if (cr_pending)
+ {
+ if (*buf == '\n')
+ {
+ /* Found FTP EOL (CRLF). Put out local EOL. */
+ fwrite (EOL_STRING, 1, EOL_STRING_LEN, out);
+ *written += EOL_STRING_LEN;
+ }
+ else
+ {
+ /* Normal character. Put out pending CR and it. */
+ fwrite ("\r", 1, 1, out);
+ fwrite (buf, 1, 1, out);
+ *written += 2;
+ }
+ buf++;
+ cr_pending = 0;
+ }
+ else
+ {
+ /* Normal character. Put it out. */
+ fwrite (buf, 1, 1, out);
+ *written += 1;
+ buf++;
+ }
+ }
+ }
+ else
+ {
+ /* Image transfer. Put out buffer. */
+ fwrite (buf, 1, bufsize, out);
+ *written += bufsize;
+ }
/* Immediately flush the downloaded data. This should not hinder
performance: fast downloads will arrive in large 16K chunks
(which stdio would write out immediately anyway), and slow
downloads wouldn't be limited by disk speed. */
+
+ /* 2005-04-20 SMS.
+ Perhaps it shouldn't hinder performance, but it sure does, at least
+ on VMS (more than 2X). Rather than speculate on what it should or
+ shouldn't do, it might make more sense to test it. Even better, it
+ might be nice to explain what possible benefit it could offer, as
+ it appears to be a clear invitation to poor performance with no
+ actual justification. (Also, why 16K? Anyone test other values?)
+ */
+ #ifndef __VMS
fflush (out);
+ #endif /* ndef __VMS */
return !ferror (out);
}
/* If we're skipping STARTPOS bytes, pass 0 as the INITIAL
argument to progress_create because the indicator doesn't
(yet) know about "skipping" data. */
- progress = progress_create (skip ? 0 : startpos, startpos + toread);
+ wgint start = skip ? 0 : startpos;
+ progress = progress_create (start, start + toread);
progress_interactive = progress_interactive_p (progress);
}
if (ret > 0)
{
sum_read += ret;
- if (!write_data (out, dlbuf, ret, &skip, &sum_written))
+ if (!write_data (out, dlbuf, ret, &skip, &sum_written, flags))
{
ret = -2;
goto out;
char *hunk = xmalloc (bufsize);
int tail = 0; /* tail position in HUNK */
- assert (maxsize >= bufsize);
+ assert (!maxsize || maxsize >= bufsize);
while (1)
{
multiple points. */
uerr_t
-retrieve_url (const char *origurl, char **file, char **newloc,
- const char *refurl, int *dt, bool recursive)
+retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
+ char **newloc, const char *refurl, int *dt, bool recursive,
+ struct iri *iri)
{
uerr_t result;
char *url;
bool location_changed;
+ bool iri_fallbacked = 0;
int dummy;
char *mynewloc, *proxy;
- struct url *u, *proxy_url;
+ struct url *u = orig_parsed, *proxy_url;
int up_error_code; /* url parse error code */
char *local_file;
int redirection_count = 0;
if (file)
*file = NULL;
- u = url_parse (url, &up_error_code);
- if (!u)
- {
- logprintf (LOG_NOTQUIET, "%s: %s.\n", url, url_error (up_error_code));
- xfree (url);
- return URLERROR;
- }
-
if (!refurl)
refurl = opt.referer;
redirected:
+ /* (also for IRI fallbacking) */
result = NOCONERROR;
mynewloc = NULL;
proxy = getproxy (u);
if (proxy)
{
+ struct iri *pi = iri_new ();
+ set_uri_encoding (pi, opt.locale, true);
+ pi->utf8_encode = false;
+
/* Parse the proxy URL. */
- proxy_url = url_parse (proxy, &up_error_code);
+ proxy_url = url_parse (proxy, &up_error_code, NULL, true);
if (!proxy_url)
{
+ char *error = url_error (proxy, up_error_code);
logprintf (LOG_NOTQUIET, _("Error parsing proxy URL %s: %s.\n"),
- proxy, url_error (up_error_code));
+ proxy, error);
xfree (url);
+ xfree (error);
RESTORE_POST_DATA;
return PROXERR;
}
#endif
|| (proxy_url && proxy_url->scheme == SCHEME_HTTP))
{
- result = http_loop (u, &mynewloc, &local_file, refurl, dt, proxy_url);
+ result = http_loop (u, &mynewloc, &local_file, refurl, dt, proxy_url, iri);
}
else if (u->scheme == SCHEME_FTP)
{
xfree (mynewloc);
mynewloc = construced_newloc;
+ /* Reset UTF-8 encoding state, keep the URI encoding and reset
+ the content encoding. */
+ iri->utf8_encode = opt.enable_iri;
+ set_content_encoding (iri, NULL);
+ xfree_null (iri->orig_url);
+
/* Now, see if this new location makes sense. */
- newloc_parsed = url_parse (mynewloc, &up_error_code);
+ newloc_parsed = url_parse (mynewloc, &up_error_code, iri, true);
if (!newloc_parsed)
{
+ char *error = url_error (mynewloc, up_error_code);
logprintf (LOG_NOTQUIET, "%s: %s.\n", escnonprint_uri (mynewloc),
- url_error (up_error_code));
- url_free (u);
+ error);
+ if (orig_parsed != u)
+ {
+ url_free (u);
+ }
xfree (url);
xfree (mynewloc);
+ xfree (error);
RESTORE_POST_DATA;
return result;
}
logprintf (LOG_NOTQUIET, _("%d redirections exceeded.\n"),
opt.max_redirect);
url_free (newloc_parsed);
- url_free (u);
+ if (orig_parsed != u)
+ {
+ url_free (u);
+ }
xfree (url);
xfree (mynewloc);
RESTORE_POST_DATA;
xfree (url);
url = mynewloc;
- url_free (u);
+ if (orig_parsed != u)
+ {
+ url_free (u);
+ }
u = newloc_parsed;
/* If we're being redirected from POST, we don't want to POST
goto redirected;
}
- if (local_file)
+ /* Try to not encode in UTF-8 if fetching failed */
+ if (!(*dt & RETROKF) && iri->utf8_encode)
+ {
+ iri->utf8_encode = false;
+ if (orig_parsed != u)
+ {
+ url_free (u);
+ }
+ u = url_parse (origurl, NULL, iri, true);
+ if (u)
+ {
+ DEBUGP (("[IRI fallbacking to non-utf8 for %s\n", quote (url)));
+ url = xstrdup (u->url);
+ iri_fallbacked = 1;
+ goto redirected;
+ }
+ else
+ DEBUGP (("[Couldn't fallback to non-utf8 for %s\n", quote (url)));
+ }
+
+ if (local_file && *dt & RETROKF)
{
+ register_download (u->url, local_file);
+ if (redirection_count && 0 != strcmp (origurl, u->url))
+ register_redirection (origurl, u->url);
+ if (*dt & TEXTHTML)
+ register_html (u->url, local_file);
if (*dt & RETROKF)
{
register_download (u->url, local_file);
register_redirection (origurl, u->url);
if (*dt & TEXTHTML)
register_html (u->url, local_file);
+ if (*dt & TEXTCSS)
+ register_css (u->url, local_file);
}
}
else
xfree_null (local_file);
- url_free (u);
+ if (orig_parsed != u)
+ {
+ url_free (u);
+ }
- if (redirection_count)
+ if (redirection_count || iri_fallbacked)
{
if (newloc)
*newloc = url;
{
uerr_t status;
struct urlpos *url_list, *cur_url;
+ struct iri *iri = iri_new();
+
+ char *input_file = NULL;
+ const char *url = file;
- url_list = (html ? get_urls_html (file, NULL, NULL)
- : get_urls_file (file));
status = RETROK; /* Suppose everything is OK. */
*count = 0; /* Reset the URL count. */
+ /* sXXXav : Assume filename and links in the file are in the locale */
+ set_uri_encoding (iri, opt.locale, true);
+ set_content_encoding (iri, opt.locale);
+
+ if (url_has_scheme (url))
+ {
+ int dt,url_err;
+ uerr_t status;
+ struct url * url_parsed = url_parse(url, &url_err, iri, true);
+
+ if (!url_parsed)
+ {
+ char *error = url_error (url, url_err);
+ logprintf (LOG_NOTQUIET, "%s: %s.\n", url, error);
+ xfree (error);
+ return URLERROR;
+ }
+
+ if (!opt.base_href)
+ opt.base_href = xstrdup (url);
+
+ status = retrieve_url (url_parsed, url, &input_file, NULL, NULL, &dt,
+ false, iri);
+ if (status != RETROK)
+ return status;
+
+ if (dt & TEXTHTML)
+ html = true;
+
+ /* If we have a found a content encoding, use it.
+ * ( == is okay, because we're checking for identical object) */
+ if (iri->content_encoding != opt.locale)
+ set_uri_encoding (iri, iri->content_encoding, false);
+
+ /* Reset UTF-8 encode status */
+ iri->utf8_encode = opt.enable_iri;
+ xfree_null (iri->orig_url);
+ iri->orig_url = NULL;
+ }
+ else
+ input_file = (char *) file;
+
+ url_list = (html ? get_urls_html (input_file, NULL, NULL, iri)
+ : get_urls_file (input_file));
+
for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count)
{
char *filename = NULL, *new_file = NULL;
int dt;
+ struct iri *tmpiri = iri_dup (iri);
+ struct url *parsed_url = NULL;
if (cur_url->ignore_when_downloading)
continue;
status = QUOTEXC;
break;
}
+
+ /* Need to reparse the url, since it didn't have iri information. */
+ if (opt.enable_iri)
+ parsed_url = url_parse (cur_url->url->url, NULL, tmpiri, true);
+
if ((opt.recursive || opt.page_requisites)
&& (cur_url->url->scheme != SCHEME_FTP || getproxy (cur_url->url)))
{
int old_follow_ftp = opt.follow_ftp;
/* Turn opt.follow_ftp on in case of recursive FTP retrieval */
- if (cur_url->url->scheme == SCHEME_FTP)
+ if (cur_url->url->scheme == SCHEME_FTP)
opt.follow_ftp = 1;
-
- status = retrieve_tree (cur_url->url->url);
+
+ status = retrieve_tree (parsed_url ? parsed_url : cur_url->url,
+ tmpiri);
opt.follow_ftp = old_follow_ftp;
}
else
- status = retrieve_url (cur_url->url->url, &filename, &new_file, NULL, &dt, opt.recursive);
+ status = retrieve_url (parsed_url ? parsed_url : cur_url->url,
+ cur_url->url->url, &filename,
+ &new_file, NULL, &dt, opt.recursive, tmpiri);
+
+ if (parsed_url)
+ url_free (parsed_url);
if (filename && opt.delete_after && file_exists_p (filename))
{
xfree_null (new_file);
xfree_null (filename);
+ iri_free (tmpiri);
}
/* Free the linked list of URL-s. */
free_urlpos (url_list);
+ iri_free (iri);
+
return status;
}
/* Returns true if URL would be downloaded through a proxy. */
bool
-url_uses_proxy (const char *url)
+url_uses_proxy (struct url * u)
{
bool ret;
- struct url *u = url_parse (url, NULL);
if (!u)
return false;
ret = getproxy (u) != NULL;
- url_free (u);
return ret;
}
else
return sufmatch (no_proxy, host);
}
+
+/* Set the file parameter to point to the local file string. */
+void
+set_local_file (const char **file, const char *default_file)
+{
+ if (opt.output_document)
+ {
+ if (output_stream_regular)
+ *file = opt.output_document;
+ }
+ else
+ *file = default_file;
+}
#ifndef RETR_H
#define RETR_H
+#include "url.h"
+
/* These global vars should be made static to retr.c and exported via
functions! */
extern SUM_SIZE_INT total_downloaded_bytes;
/* Flags for fd_read_body. */
enum {
rb_read_exactly = 1,
- rb_skip_startpos = 2
+ rb_skip_startpos = 2,
+ rb_ftp_ascii = 4
};
int fd_read_body (int, FILE *, wgint, wgint, wgint *, wgint *, double *, int);
char *fd_read_hunk (int, hunk_terminator_t, long, long);
char *fd_read_line (int);
-uerr_t retrieve_url (const char *, char **, char **, const char *, int *, bool);
+uerr_t retrieve_url (struct url *, const char *, char **, char **,
+ const char *, int *, bool, struct iri *);
uerr_t retrieve_from_file (const char *, bool, int *);
const char *retr_rate (wgint, double);
void rotate_backups (const char *);
-bool url_uses_proxy (const char *);
+bool url_uses_proxy (struct url *);
#endif /* RETR_H */
#include "url.h"
#include "host.h" /* for is_valid_ipv6_address */
+ #ifdef __VMS
+ #include "vms.h"
+ #endif /* def __VMS */
+
#ifdef TESTING
#include "test.h"
#endif
/* Forward declarations: */
-static bool path_simplify (char *);
+static bool path_simplify (enum url_scheme, char *);
\f
/* Support for escaping and unescaping of URL strings. */
return url_escape_1 (s, urlchr_unsafe, false);
}
+/* URL-escape the unsafe and reserved characters (see urlchr_table) in
+ a given string, returning a freshly allocated string. */
+
+char *
+url_escape_unsafe_and_reserved (const char *s)
+{
+ return url_escape_1 (s, urlchr_unsafe|urlchr_reserved, false);
+}
+
/* URL-escape the unsafe characters (see urlchr_table) in a given
string. If no characters are unsafe, S is returned. */
#define PE_NO_ERROR 0
N_("No error"),
#define PE_UNSUPPORTED_SCHEME 1
- N_("Unsupported scheme"),
-#define PE_INVALID_HOST_NAME 2
+ N_("Unsupported scheme %s"), /* support for format token only here */
+#define PE_MISSING_SCHEME 2
+ N_("Scheme missing"),
+#define PE_INVALID_HOST_NAME 3
N_("Invalid host name"),
-#define PE_BAD_PORT_NUMBER 3
+#define PE_BAD_PORT_NUMBER 4
N_("Bad port number"),
-#define PE_INVALID_USER_NAME 4
+#define PE_INVALID_USER_NAME 5
N_("Invalid user name"),
-#define PE_UNTERMINATED_IPV6_ADDRESS 5
+#define PE_UNTERMINATED_IPV6_ADDRESS 6
N_("Unterminated IPv6 numeric address"),
-#define PE_IPV6_NOT_SUPPORTED 6
+#define PE_IPV6_NOT_SUPPORTED 7
N_("IPv6 addresses not supported"),
-#define PE_INVALID_IPV6_ADDRESS 7
+#define PE_INVALID_IPV6_ADDRESS 8
N_("Invalid IPv6 numeric address")
};
error, and if ERROR is not NULL, also set *ERROR to the appropriate
error code. */
struct url *
-url_parse (const char *url, int *error)
+url_parse (const char *url, int *error, struct iri *iri, bool percent_encode)
{
struct url *u;
const char *p;
int port;
char *user = NULL, *passwd = NULL;
- char *url_encoded = NULL;
+ const char *url_encoded = NULL;
+ char *new_url = NULL;
int error_code;
scheme = url_scheme (url);
if (scheme == SCHEME_INVALID)
{
- error_code = PE_UNSUPPORTED_SCHEME;
+ if (url_has_scheme (url))
+ error_code = PE_UNSUPPORTED_SCHEME;
+ else
+ error_code = PE_MISSING_SCHEME;
goto error;
}
- url_encoded = reencode_escapes (url);
+ if (iri && iri->utf8_encode)
+ {
+ iri->utf8_encode = remote_to_utf8 (iri, iri->orig_url ? iri->orig_url : url, (const char **) &new_url);
+ if (!iri->utf8_encode)
+ new_url = NULL;
+ else
+ iri->orig_url = xstrdup (url);
+ }
+
+ /* XXX XXX Could that change introduce (security) bugs ??? XXX XXX*/
+ if (percent_encode)
+ url_encoded = reencode_escapes (new_url ? new_url : url);
+ else
+ url_encoded = new_url ? new_url : url;
+
p = url_encoded;
+ if (new_url && url_encoded != new_url)
+ xfree (new_url);
+
p += strlen (supported_schemes[scheme].leading_string);
uname_b = p;
p = url_skip_credentials (p);
u->passwd = passwd;
u->path = strdupdelim (path_b, path_e);
- path_modified = path_simplify (u->path);
+ path_modified = path_simplify (scheme, u->path);
split_path (u->path, &u->dir, &u->file);
host_modified = lowercase_str (u->host);
{
url_unescape (u->host);
host_modified = true;
+
+ /* Apply IDNA regardless of iri->utf8_encode status */
+ if (opt.enable_iri && iri)
+ {
+ char *new = idn_encode (iri, u->host);
+ if (new)
+ {
+ xfree (u->host);
+ u->host = new;
+ host_modified = true;
+ }
+ }
}
if (params_b)
if (fragment_b)
u->fragment = strdupdelim (fragment_b, fragment_e);
- if (path_modified || u->fragment || host_modified || path_b == path_e)
+ if (opt.enable_iri || path_modified || u->fragment || host_modified || path_b == path_e)
{
/* If we suspect that a transformation has rendered what
url_string might return different from URL_ENCODED, rebuild
if (url_encoded == url)
u->url = xstrdup (url);
else
- u->url = url_encoded;
+ u->url = (char *) url_encoded;
}
return u;
error:
/* Cleanup in case of error: */
if (url_encoded && url_encoded != url)
- xfree (url_encoded);
+ xfree ((char *) url_encoded);
/* Transmit the error code to the caller, if the caller wants to
know. */
/* Return the error message string from ERROR_CODE, which should have
been retrieved from url_parse. The error message is translated. */
-const char *
-url_error (int error_code)
+char *
+url_error (const char *url, int error_code)
{
- assert (error_code >= 0 && error_code < countof (parse_errors));
- return _(parse_errors[error_code]);
+ assert (error_code >= 0 && ((size_t) error_code) < countof (parse_errors));
+
+ if (error_code == PE_UNSUPPORTED_SCHEME)
+ {
+ char *error, *p;
+ char *scheme = xstrdup (url);
+ assert (url_has_scheme (url));
+
+ if ((p = strchr (scheme, ':')))
+ *p = '\0';
+ if (!strcasecmp (scheme, "https"))
+ error = aprintf (_("HTTPS support not compiled in"));
+ else
+ error = aprintf (_(parse_errors[error_code]), quote (scheme));
+ xfree (scheme);
+
+ return error;
+ }
+ else
+ return xstrdup (_(parse_errors[error_code]));
}
/* Split PATH into DIR and FILE. PATH comes from the URL and is
const char *u_file, *u_query;
char *fname, *unique;
+ char *index_filename = "index.html"; /* The default index file is index.html */
fnres.base = NULL;
fnres.size = 0;
fnres.tail = 0;
+ /* If an alternative index file was defined, change index_filename */
+ if (opt.default_page)
+ index_filename = opt.default_page;
+
+
/* Start with the directory prefix, if specified. */
if (opt.dir_prefix)
append_string (opt.dir_prefix, &fnres);
/* Add the file name. */
if (fnres.tail)
append_char ('/', &fnres);
- u_file = *u->file ? u->file : "index.html";
+ u_file = *u->file ? u->file : index_filename;
append_uri_pathel (u_file, u_file + strlen (u_file), false, &fnres);
/* Append "?query" to the file name. */
if ((opt.noclobber || opt.always_rest || opt.timestamping || opt.dirstruct)
&& !(file_exists_p (fname) && !file_non_directory_p (fname)))
- return fname;
+ {
+ unique = fname;
+ }
+ else
+ {
+ unique = unique_name (fname, true);
+ if (unique != fname)
+ xfree (fname);
+ }
+
+ /* On VMS, alter the name as required. */
+ #ifdef __VMS
+ {
+ char *unique2;
+
+ unique2 = ods_conform( unique);
+ if (unique2 != unique)
+ {
+ xfree (unique);
+ unique = unique2;
+ }
+ }
+ #endif /* def __VMS */
- unique = unique_name (fname, true);
- if (unique != fname)
- xfree (fname);
return unique;
}
\f
test case. */
static bool
-path_simplify (char *path)
+path_simplify (enum url_scheme scheme, char *path)
{
char *h = path; /* hare */
char *t = path; /* tortoise */
+ char *beg = path;
char *end = strchr (path, '\0');
while (h < end)
{
/* Handle "../" by retreating the tortoise by one path
element -- but not past beggining. */
- if (t > path)
+ if (t > beg)
{
/* Move backwards until T hits the beginning of the
previous path element or the beginning of path. */
- for (--t; t > path && t[-1] != '/'; t--)
+ for (--t; t > beg && t[-1] != '/'; t--)
;
}
+ else if (scheme == SCHEME_FTP)
+ {
+ /* If we're at the beginning, copy the "../" literally
+ and move the beginning so a later ".." doesn't remove
+ it. This violates RFC 3986; but we do it for FTP
+ anyway because there is otherwise no way to get at a
+ parent directory, when the FTP server drops us in a
+ non-root directory (which is not uncommon). */
+ beg = t + 3;
+ goto regular;
+ }
h += 3;
}
else
{
+ regular:
/* A regular path element. If H hasn't advanced past T,
simply skip to the next path element. Otherwise, copy
the path element until the next slash. */
\f
static int
getchar_from_escaped_string (const char *str, char *c)
-{
+{
const char *p = str;
assert (str && *str);
assert (c);
-
+
if (p[0] == '%')
{
if (!c_isxdigit(p[1]) || !c_isxdigit(p[2]))
p += pp;
q += qq;
}
-
+
return (*p == 0 && *q == 0 ? true : false);
}
\f
-#if 0
+#ifdef TESTING
/* Debugging and testing support for path_simplify. */
+#if 0
/* Debug: run path_simplify on PATH and return the result in a new
string. Useful for calling from the debugger. */
static char *
path_simplify (copy);
return copy;
}
+#endif
-static void
-run_test (char *test, char *expected_result, bool expected_change)
+static const char *
+run_test (char *test, char *expected_result, enum url_scheme scheme,
+ bool expected_change)
{
char *test_copy = xstrdup (test);
- bool modified = path_simplify (test_copy);
+ bool modified = path_simplify (scheme, test_copy);
if (0 != strcmp (test_copy, expected_result))
{
printf ("Failed path_simplify(\"%s\"): expected \"%s\", got \"%s\".\n",
test, expected_result, test_copy);
+ mu_assert ("", 0);
}
if (modified != expected_change)
{
test);
}
xfree (test_copy);
+ mu_assert ("", modified == expected_change);
+ return NULL;
}
-static void
+const char *
test_path_simplify (void)
{
static struct {
char *test, *result;
+ enum url_scheme scheme;
bool should_modify;
} tests[] = {
- { "", "", false },
- { ".", "", true },
- { "./", "", true },
- { "..", "", true },
- { "../", "", true },
- { "foo", "foo", false },
- { "foo/bar", "foo/bar", false },
- { "foo///bar", "foo///bar", false },
- { "foo/.", "foo/", true },
- { "foo/./", "foo/", true },
- { "foo./", "foo./", false },
- { "foo/../bar", "bar", true },
- { "foo/../bar/", "bar/", true },
- { "foo/bar/..", "foo/", true },
- { "foo/bar/../x", "foo/x", true },
- { "foo/bar/../x/", "foo/x/", true },
- { "foo/..", "", true },
- { "foo/../..", "", true },
- { "foo/../../..", "", true },
- { "foo/../../bar/../../baz", "baz", true },
- { "a/b/../../c", "c", true },
- { "./a/../b", "b", true }
+ { "", "", SCHEME_HTTP, false },
+ { ".", "", SCHEME_HTTP, true },
+ { "./", "", SCHEME_HTTP, true },
+ { "..", "", SCHEME_HTTP, true },
+ { "../", "", SCHEME_HTTP, true },
+ { "..", "..", SCHEME_FTP, false },
+ { "../", "../", SCHEME_FTP, false },
+ { "foo", "foo", SCHEME_HTTP, false },
+ { "foo/bar", "foo/bar", SCHEME_HTTP, false },
+ { "foo///bar", "foo///bar", SCHEME_HTTP, false },
+ { "foo/.", "foo/", SCHEME_HTTP, true },
+ { "foo/./", "foo/", SCHEME_HTTP, true },
+ { "foo./", "foo./", SCHEME_HTTP, false },
+ { "foo/../bar", "bar", SCHEME_HTTP, true },
+ { "foo/../bar/", "bar/", SCHEME_HTTP, true },
+ { "foo/bar/..", "foo/", SCHEME_HTTP, true },
+ { "foo/bar/../x", "foo/x", SCHEME_HTTP, true },
+ { "foo/bar/../x/", "foo/x/", SCHEME_HTTP, true },
+ { "foo/..", "", SCHEME_HTTP, true },
+ { "foo/../..", "", SCHEME_HTTP, true },
+ { "foo/../../..", "", SCHEME_HTTP, true },
+ { "foo/../../bar/../../baz", "baz", SCHEME_HTTP, true },
+ { "foo/../..", "..", SCHEME_FTP, true },
+ { "foo/../../..", "../..", SCHEME_FTP, true },
+ { "foo/../../bar/../../baz", "../../baz", SCHEME_FTP, true },
+ { "a/b/../../c", "c", SCHEME_HTTP, true },
+ { "./a/../b", "b", SCHEME_HTTP, true }
};
int i;
for (i = 0; i < countof (tests); i++)
{
+ const char *message;
char *test = tests[i].test;
char *expected_result = tests[i].result;
+ enum url_scheme scheme = tests[i].scheme;
bool expected_change = tests[i].should_modify;
- run_test (test, expected_result, expected_change);
+ message = run_test (test, expected_result, scheme, expected_change);
+ if (message) return message;
}
+ return NULL;
}
-#endif
-\f
-#ifdef TESTING
const char *
test_append_uri_pathel()
} test_array[] = {
{ "http://www.yoyodyne.com/path/", "somepage.html", false, "http://www.yoyodyne.com/path/somepage.html" },
};
-
+
for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i)
{
struct growable dest;
#include "utils.h"
#include "hash.h"
+ #ifdef __VMS
+ #include "vms.h"
+ #endif /* def __VMS */
+
#ifdef TESTING
#include "test.h"
#endif
+static void
+memfatal (const char *context, long attempted_size)
+{
+ /* Make sure we don't try to store part of the log line, and thus
+ call malloc. */
+ log_set_save_context (false);
+
+ /* We have different log outputs in different situations:
+ 1) output without bytes information
+ 2) output with bytes information */
+ if (attempted_size == UNKNOWN_ATTEMPTED_SIZE)
+ {
+ logprintf (LOG_ALWAYS,
+ _("%s: %s: Failed to allocate enough memory; memory exhausted.\n"),
+ exec_name, context);
+ }
+ else
+ {
+ logprintf (LOG_ALWAYS,
+ _("%s: %s: Failed to allocate %ld bytes; memory exhausted.\n"),
+ exec_name, context, attempted_size);
+ }
+
+ exit (1);
+}
+
+ /* Character property table for (re-)escaping VMS ODS5 extended file
+ names. Note that this table ignores Unicode.
+
+ ODS2 valid characters: 0-9 A-Z a-z $ - _ ~
+
+ ODS5 Invalid characters:
+ C0 control codes (0x00 to 0x1F inclusive)
+ Asterisk (*)
+ Question mark (?)
+
+ ODS5 Invalid characters only in VMS V7.2 (which no one runs, right?):
+ Double quotation marks (")
+ Backslash (\)
+ Colon (:)
+ Left angle bracket (<)
+ Right angle bracket (>)
+ Slash (/)
+ Vertical bar (|)
+
+ Characters escaped by "^":
+ SP ! # % & ' ( ) + , . ; = @ [ ] ^ ` { } ~
+
+ Either "^_" or "^ " is accepted as a space. Period (.) is a special
+ case. Note that un-escaped < and > can also confuse a directory
+ spec.
+
+ Characters put out as ^xx:
+ 7F (DEL)
+ 80-9F (C1 control characters)
+ A0 (nonbreaking space)
+ FF (Latin small letter y diaeresis)
+
+ Other cases:
+ Unicode: "^Uxxxx", where "xxxx" is four hex digits.
+
+ Property table values:
+ Normal escape: 1
+ Space: 2
+ Dot: 4
+ Hex-hex escape: 8
+ ODS2 normal: 16
+ ODS2 lower case: 32
+ Hex digit: 64
+ */
+
+ unsigned char char_prop[ 256] = {
+
+ /* NUL SOH STX ETX EOT ENQ ACK BEL BS HT LF VT FF CR SO SI */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ /* DLE DC1 DC2 DC3 DC4 NAK SYN ETB CAN EM SUB ESC FS GS RS US */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ /* SP ! " # $ % & ' ( ) * + , - . / */
+ 2, 1, 0, 1, 16, 1, 1, 1, 1, 1, 0, 1, 1, 16, 4, 0,
+
+ /* 0 1 2 3 4 5 6 7 8 9 : ; < = > ? */
+ 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 0, 1, 1, 1, 1, 1,
+
+ /* @ A B C D E F G H I J K L M N O */
+ 1, 80, 80, 80, 80, 80, 80, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+
+ /* P Q R S T U V W X Y Z [ \ ] ^ _ */
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 1, 0, 1, 1, 16,
+
+ /* ` a b c d e f g h i j k l m n o */
+ 1, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+
+ /* p q r s t u v w x y z { | } ~ DEL */
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 1, 0, 1, 17, 8,
+
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8
+ };
+
/* Utility function: like xstrdup(), but also lowercases S. */
char *
const char *next_str;
int total_length = 0;
- int argcount;
+ size_t argcount;
/* Calculate the length of and allocate the resulting string. */
/* The Windows versions of the following two functions are defined in
mswindows.c. On MSDOS this function should never be called. */
+ #ifdef __VMS
+
+ void
+ fork_to_background (void)
+ {
+ return;
+ }
+
+ #else /* def __VMS */
+
#if !defined(WINDOWS) && !defined(MSDOS)
void
fork_to_background (void)
/* parent, no error */
printf (_("Continuing in background, pid %d.\n"), (int) pid);
if (logfile_changed)
- printf (_("Output will be written to `%s'.\n"), opt.lfilename);
+ printf (_("Output will be written to %s.\n"), quote (opt.lfilename));
exit (0); /* #### should we use _exit()? */
}
freopen ("/dev/null", "w", stderr);
}
#endif /* !WINDOWS && !MSDOS */
+
+ #endif /* def __VMS [else] */
+
\f
/* "Touch" FILE, i.e. make its mtime ("modified time") equal the time
specified with TM. The atime ("access time") is set to the current
DEBUGP (("Unlinking %s (symlink).\n", file));
err = unlink (file);
if (err != 0)
- logprintf (LOG_VERBOSE, _("Failed to unlink symlink `%s': %s\n"),
- file, strerror (errno));
+ logprintf (LOG_VERBOSE, _("Failed to unlink symlink %s: %s\n"),
+ quote (file), strerror (errno));
}
return err;
}
#endif
}
+ /* 2005-02-19 SMS.
+ If no UNIQ_SEP is defined (as on VMS), have unique_name() return the
+ original name. With the VMS file systems' versioning, everything
+ should be fine, and appending ".NN" just causes trouble.
+ */
+
+ #ifdef UNIQ_SEP
+
/* stat file names named PREFIX.1, PREFIX.2, etc., until one that
doesn't exist is found. Return a freshly allocated copy of the
unused file name. */
char *template_tail = template + plen;
memcpy (template, prefix, plen);
- *template_tail++ = '.';
+ *template_tail++ = UNIQ_SEP;
do
number_to_string (template_tail, count++);
If not, FILE.1 is tried, then FILE.2, etc. The first FILE.<number>
file name that doesn't exist is returned.
+ 2005-02-19 SMS. "." is now UNIQ_SEP, and may be different.
+
The resulting file is not created, only verified that it didn't
exist at the point in time when the function was called.
Therefore, where security matters, don't rely that the file created
return unique_name_1 (file);
}
+ #else /* def UNIQ_SEP */
+
+ /* Dummy unique_name() for VMS. Return the original name as easily as
+ possible.
+ */
+ char *
+ unique_name (const char *file, bool allow_passthrough)
+ {
+ /* Return the FILE itself, without modification, irregardful. */
+ return allow_passthrough ? (char *)file : xstrdup (file);
+ }
+
+ #endif /* def UNIQ_SEP [else] */
+
/* Create a file based on NAME, except without overwriting an existing
file with that name. Providing O_EXCL is correctly implemented,
this function does not have the race condition associated with
{
int fd;
#ifdef O_EXCL
+
+ /* 2005-04-14 SMS.
+ VMS lacks O_BINARY, but makes up for it in weird and wonderful ways.
+ It also has file versions which obviate all the O_EXCL effort.
+ O_TRUNC (something of a misnomer) requests a new version.
+ */
+ # ifdef __VMS
+ /* Common open() optional arguments:
+ sequential access only, access callback function.
+ */
+ # define OPEN_OPT_ARGS "fop=sqo", "acc", acc_cb, &open_id
+
+ int open_id;
+ int flags = O_WRONLY | O_CREAT | O_TRUNC;
+
+ if (binary > 1)
+ {
+ open_id = 11;
+ fd = open( fname, /* File name. */
+ flags, /* Flags. */
+ 0777, /* Mode for default protection. */
+ "ctx=bin,stm", /* Binary, stream access. */
+ "rfm=stmlf", /* Stream_LF. */
+ OPEN_OPT_ARGS); /* Access callback. */
+ }
+ else if (binary)
+ {
+ open_id = 12;
+ fd = open( fname, /* File name. */
+ flags, /* Flags. */
+ 0777, /* Mode for default protection. */
+ "ctx=bin,stm", /* Binary, stream access. */
+ "rfm=fix", /* Fixed-length, */
+ "mrs=512", /* 512-byte records. */
+ OPEN_OPT_ARGS); /* Access callback. */
+ }
+ else
+ {
+ open_id = 13;
+ fd = open( fname, /* File name. */
+ flags, /* Flags. */
+ 0777, /* Mode for default protection.
+ */
+ "rfm=stmlf", /* Stream_LF. */
+ OPEN_OPT_ARGS); /* Access callback. */
+ }
+ # else /* def __VMS */
int flags = O_WRONLY | O_CREAT | O_EXCL;
# ifdef O_BINARY
if (binary)
flags |= O_BINARY;
# endif
fd = open (fname, flags, 0666);
+ # endif /* def __VMS [else] */
+
if (fd < 0)
return NULL;
return fdopen (fd, binary ? "wb" : "w");
'E', /* exabyte, 2^60 bytes */
};
static char buf[8];
- int i;
+ size_t i;
/* If the quantity is smaller than 1K, just print it. */
if (n < 1024)
{ { "*/*COMPLETE", NULL, NULL }, "foo/!COMPLETE", true },
{ { "/dir with spaces", NULL, NULL }, "dir with spaces", true },
{ { "/dir*with*spaces", NULL, NULL }, "dir with spaces", true },
+ { { "/Tmp/has", NULL, NULL }, "/Tmp/has space", false },
+ { { "/Tmp/has", NULL, NULL }, "/Tmp/has,comma", false },
};
for (i = 0; i < countof(test_array); ++i)
#ifndef UTILS_H
#define UTILS_H
+/* Constant is using when we don`t know attempted size exactly */
+#define UNKNOWN_ATTEMPTED_SIZE -3
+
+/* Macros that interface to malloc, but know about type sizes, and
+ cast the result to the appropriate type. The casts are not
+ necessary in standard C, but Wget performs them anyway for the sake
+ of pre-standard environments and possibly C++. */
+
+#define xnew(type) (xmalloc (sizeof (type)))
+#define xnew0(type) (xcalloc (1, sizeof (type)))
+#define xnew_array(type, len) (xmalloc ((len) * sizeof (type)))
+#define xnew0_array(type, len) (xcalloc ((len), sizeof (type)))
+
+#define alloca_array(type, size) ((type *) alloca ((size) * sizeof (type)))
+
+#define xfree free
+/* Free P if it is non-NULL. C requires free() to behaves this way by
+ default, but Wget's code is historically careful not to pass NULL
+ to free. This allows us to assert p!=NULL in xfree to check
+ additional errors. (But we currently don't do that!) */
+#define xfree_null(p) if (!(p)) ; else xfree (p)
+
struct hash_table;
struct file_memory {
char *time_str (time_t);
char *datetime_str (time_t);
-#ifdef DEBUG_MALLOC
-void print_malloc_debug_stats ();
-#endif
-
char *xstrdup_lower (const char *);
char *strdupdelim (const char *, const char *);
const char *print_decimal (double);
+ extern unsigned char char_prop[];
+
#endif /* UTILS_H */
#else
/* Fall back to using long, which is always available and in most
cases large enough. */
- typedef long off_t;
+ typedef long wgint;
# define SIZEOF_WGINT SIZEOF_LONG
#endif
#include "options.h"
/* Everything uses this, so include them here directly. */
-#include "xmalloc.h"
+#include <alloca.h>
+#include "xalloc.h"
/* Likewise for logging functions. */
#include "log.h"
-\f
+
+/* Likewise for quoting functions. */
+#include "quote.h"
+#include "quotearg.h"
+
+/* Likewise for struct iri definition */
+#include "iri.h"
+
/* Useful macros used across the code: */
/* The number of elements in an array. For example:
HEAD_ONLY = 0x0004, /* only send the HEAD request */
SEND_NOCACHE = 0x0008, /* send Pragma: no-cache directive */
ACCEPTRANGES = 0x0010, /* Accept-ranges header was found */
- ADDED_HTML_EXTENSION = 0x0020 /* added ".html" extension due to -E */
+ ADDED_HTML_EXTENSION = 0x0020, /* added ".html" extension due to -E */
+ TEXTCSS = 0x0040 /* document is of type text/css */
};
/* Universal error type -- used almost everywhere. Error reporting of
AUTHFAILED, QUOTEXC, WRITEFAILED, SSLINITFAILED
} uerr_t;
+ /* 2005-02-19 SMS.
+ Select an appropriate "orig" suffix and a separator character for
+ adding a unique suffix to a file name.
+
+ A VMS ODS2 file system can not tolerate multiple dots. An ODS5 file
+ system can, but even there not all dots are equal, and heroic effort
+ would be needed to get ".html^.orig" rather than (the less desirable)
+ "^.html.orig". It's more satisfactory always to use "_orig" on VMS
+ (rather than including "vms.h", testing "ods5_dest", and acting
+ accordingly).
+
+ Note that code in various places assumes that this string is five
+ characters long.
+ */
+ # ifdef __VMS
+ # define ORIG_SFX "_orig"
+ # else /* def __VMS */
+ # define ORIG_SFX ".orig"
+ # endif /* def __VMS [else] */
+
+ /* ".NNN" unique-ifying suffix separator character for unique_name() in
+ url.c (and anywhere else). Note that on VMS, the file system's
+ version numbers solve the problem that unique_name() is designed to
+ handle, obviating this whole exercise. Other systems may specify a
+ character different from "." here, if desired.
+ */
+ # ifndef __VMS
+ # define UNIQ_SEP '.'
+ # endif /* ndef __VMS */
+
#endif /* WGET_H */