now quoted to escape non-ASCII characters.
+2005-03-03 Hrvoje Niksic <hniksic@xemacs.org>
+
+ * retr.c (retrieve_url): Escape location header.
+
+ * http.c (print_server_response_1): Escape server response when
+ printing it.
+ (gethttp): Escape host name, status message, location header, and
+ content type.
+ (http_loop): Escape error message from server.
+
+ * host.c (lookup_host): Escape host name when printing it.
+
+ * ftp.c (getftp): Escape user name when printing it.
+ (getftp): Escape remote file and directory for printing.
+ (getftp): Escape server listing when printing it.
+ (ftp_retrieve_list): Escape link name and file name.
+ (ftp_retrieve_glob): Escape file name.
+
+ * ftp-basic.c (ftp_response): Escape server response when printing
+ it.
+
+ * cookies.c (parse_set_cookies): Escape the cookie field when
+ printing it.
+ (parse_set_cookies): Escape contents of remote header.
+ (cookie_handle_set_cookie): Escape host name and cookie domain.
+
+ * connect.c (connect_to_ip): Escape the host name.
+
+ * log.c (escnonprint): New function, used for printing strings
+ coming from the server that possibly contain non-ASCII characters.
+ (escnonprint_uri): Ditto.
+
2005-02-24 Hrvoje Niksic <hniksic@xemacs.org>
* ftp.c (getftp): Ditto.
{
const char *txt_addr = pretty_print_address (ip);
if (print && 0 != strcmp (print, txt_addr))
- logprintf (LOG_VERBOSE,
- _("Connecting to %s|%s|:%d... "), print, txt_addr, port);
+ logprintf (LOG_VERBOSE, _("Connecting to %s|%s|:%d... "),
+ escnonprint (print), txt_addr, port);
else
logprintf (LOG_VERBOSE, _("Connecting to %s:%d... "), txt_addr, port);
}
char *name;
BOUNDED_TO_ALLOCA (name_b, name_e, name);
logprintf (LOG_NOTQUIET,
- _("Error in Set-Cookie, field `%s'"), name);
+ _("Error in Set-Cookie, field `%s'"),
+ escnonprint (name));
}
state = S_ERROR;
break;
if (!silent)
logprintf (LOG_NOTQUIET,
_("Syntax error in Set-Cookie: %s at position %d.\n"),
- sc, p - sc);
+ escnonprint (sc), p - sc);
return NULL;
}
\f
{
logprintf (LOG_NOTQUIET,
"Cookie coming from %s attempted to set domain to %s\n",
- host, cookie->domain);
+ escnonprint (host), escnonprint (cookie->domain));
xfree (cookie->domain);
goto copy_domain;
}
if (!line)
return FTPRERR;
if (opt.server_response)
- logputs (LOG_NOTQUIET, line);
+ logputs (LOG_NOTQUIET, escnonprint (line));
else
- DEBUGP (("%s", line));
+ DEBUGP (("%s", escnonprint (line)));
if (ISDIGIT (line[0]) && ISDIGIT (line[1]) && ISDIGIT (line[2])
&& line[3] == ' ')
{
con->csock = -1;
/* Second: Login with proper USER/PASS sequence. */
- logprintf (LOG_VERBOSE, _("Logging in as %s ... "), user);
+ logprintf (LOG_VERBOSE, _("Logging in as %s ... "), escnonprint (user));
if (opt.server_response)
logputs (LOG_ALWAYS, "\n");
err = ftp_login (csock, logname, passwd);
}
if (!opt.server_response)
- logprintf (LOG_VERBOSE, "==> CWD %s ... ", target);
+ logprintf (LOG_VERBOSE, "==> CWD %s ... ", escnonprint (target));
err = ftp_cwd (csock, target);
/* FTPRERR, WRITEFAILED, FTPNSFOD */
switch (err)
case FTPNSFOD:
logputs (LOG_VERBOSE, "\n");
logprintf (LOG_NOTQUIET, _("No such directory `%s'.\n\n"),
- u->dir);
+ escnonprint (u->dir));
fd_close (csock);
con->csock = -1;
return err;
if (opt.verbose)
{
if (!opt.server_response)
- logprintf (LOG_VERBOSE, "==> SIZE %s ... ", u->file);
+ logprintf (LOG_VERBOSE, "==> SIZE %s ... ", escnonprint (u->file));
}
err = ftp_size (csock, u->file, len);
if (restval && (cmd & DO_RETR))
{
if (!opt.server_response)
- logprintf (LOG_VERBOSE, "==> REST %s ... ", number_to_static_string (restval));
+ logprintf (LOG_VERBOSE, "==> REST %s ... ",
+ number_to_static_string (restval));
err = ftp_rest (csock, restval);
/* FTPRERR, WRITEFAILED, FTPRESTFAIL */
{
if (restval)
logputs (LOG_VERBOSE, "\n");
- logprintf (LOG_VERBOSE, "==> RETR %s ... ", u->file);
+ logprintf (LOG_VERBOSE, "==> RETR %s ... ", escnonprint (u->file));
}
}
break;
case FTPNSFOD:
logputs (LOG_VERBOSE, "\n");
- logprintf (LOG_NOTQUIET, _("No such file `%s'.\n\n"), u->file);
+ logprintf (LOG_NOTQUIET, _("No such file `%s'.\n\n"),
+ escnonprint (u->file));
fd_close (dtsock);
fd_close (local_sock);
return err;
no-buffering on opt.lfile. */
while ((line = read_whole_line (fp)))
{
- logprintf (LOG_ALWAYS, "%s\n", line);
+ logprintf (LOG_ALWAYS, "%s\n", escnonprint (line));
xfree (line);
}
fclose (fp);
{
logprintf (LOG_VERBOSE, _("\
Already have correct symlink %s -> %s\n\n"),
- con->target, f->linkto);
+ con->target, escnonprint (f->linkto));
dlthis = 0;
break;
}
}
}
logprintf (LOG_VERBOSE, _("Creating symlink %s -> %s\n"),
- con->target, f->linkto);
+ con->target, escnonprint (f->linkto));
/* Unlink before creating symlink! */
unlink (con->target);
if (symlink (f->linkto, con->target) == -1)
- logprintf (LOG_NOTQUIET, "symlink: %s\n",
- strerror (errno));
+ logprintf (LOG_NOTQUIET, "symlink: %s\n", strerror (errno));
logputs (LOG_VERBOSE, "\n");
} /* have f->linkto */
#else /* not HAVE_SYMLINK */
case FT_DIRECTORY:
if (!opt.recursive)
logprintf (LOG_NOTQUIET, _("Skipping directory `%s'.\n"),
- f->name);
+ escnonprint (f->name));
break;
case FT_PLAINFILE:
/* Call the retrieve loop. */
break;
case FT_UNKNOWN:
logprintf (LOG_NOTQUIET, _("%s: unknown/unsupported file type.\n"),
- f->name);
+ escnonprint (f->name));
break;
} /* switch */
if (!accdir (newdir, ALLABS))
{
logprintf (LOG_VERBOSE, _("\
-Not descending to `%s' as it is excluded/not-included.\n"), newdir);
+Not descending to `%s' as it is excluded/not-included.\n"),
+ escnonprint (newdir));
continue;
}
{
if (f->type != FT_DIRECTORY && !acceptable (f->name))
{
- logprintf (LOG_VERBOSE, _("Rejecting `%s'.\n"), f->name);
+ logprintf (LOG_VERBOSE, _("Rejecting `%s'.\n"),
+ escnonprint (f->name));
f = delelement (f, &start);
}
else
{
if (has_insecure_name_p (f->name))
{
- logprintf (LOG_VERBOSE, _("Rejecting `%s'.\n"), f->name);
+ logprintf (LOG_VERBOSE, _("Rejecting `%s'.\n"),
+ escnonprint (f->name));
f = delelement (f, &start);
}
else
/* No luck. */
/* #### This message SUCKS. We should see what was the
reason that nothing was retrieved. */
- logprintf (LOG_VERBOSE, _("No matches on pattern `%s'.\n"), u->file);
+ logprintf (LOG_VERBOSE, _("No matches on pattern `%s'.\n"),
+ escnonprint (u->file));
}
else /* GETONE or GETALL */
{
/* No luck with the cache; resolve HOST. */
if (!silent && !numeric_address)
- logprintf (LOG_VERBOSE, _("Resolving %s... "), host);
+ logprintf (LOG_VERBOSE, _("Resolving %s... "), escnonprint (host));
#ifdef ENABLE_IPV6
{
if (b < e && e[-1] == '\r')
--e;
BOUNDED_TO_ALLOCA (b, e, ln);
- logprintf (LOG_VERBOSE, "%s%s\n", prefix, ln);
+ logprintf (LOG_VERBOSE, "%s%s\n", prefix, escnonprint (ln));
}
/* Print the server response, line by line, omitting the trailing CR
sock = pconn.socket;
using_ssl = pconn.ssl;
logprintf (LOG_VERBOSE, _("Reusing existing connection to %s:%d.\n"),
- pconn.host, pconn.port);
+ escnonprint (pconn.host), pconn.port);
DEBUGP (("Reusing fd %d.\n", sock));
}
}
{
failed_tunnel:
logprintf (LOG_NOTQUIET, _("Proxy tunneling failed: %s"),
- message ? message : "?");
+ message ? escnonprint (message) : "?");
xfree_null (message);
return CONSSLERR;
}
- xfree (message);
+ xfree_null (message);
/* SOCK is now *really* connected to u->host, so update CONN
to reflect this. That way register_persistent will
message = NULL;
statcode = response_status (resp, &message);
if (!opt.server_response)
- logprintf (LOG_VERBOSE, "%2d %s\n", statcode, message ? message : "");
+ logprintf (LOG_VERBOSE, "%2d %s\n", statcode,
+ message ? escnonprint (message) : "");
else
{
logprintf (LOG_VERBOSE, "\n");
{
logprintf (LOG_VERBOSE,
_("Location: %s%s\n"),
- hs->newloc ? hs->newloc : _("unspecified"),
+ hs->newloc ? escnonprint_uri (hs->newloc) : _("unspecified"),
hs->newloc ? _(" [following]") : "");
if (keep_alive)
skip_short_body (sock, contlen);
logputs (LOG_VERBOSE,
opt.ignore_length ? _("ignored") : _("unspecified"));
if (type)
- logprintf (LOG_VERBOSE, " [%s]\n", type);
+ logprintf (LOG_VERBOSE, " [%s]\n", escnonprint (type));
else
logputs (LOG_VERBOSE, "\n");
}
xfree (hurl);
}
logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"),
- tms, hstat.statcode, hstat.error);
+ tms, hstat.statcode, escnonprint (hstat.error));
logputs (LOG_VERBOSE, "\n");
free_hstat (&hstat);
xfree_null (dummy);
if (opt.spider)
{
- logprintf (LOG_NOTQUIET, "%d %s\n\n", hstat.statcode, hstat.error);
+ logprintf (LOG_NOTQUIET, "%d %s\n\n", hstat.statcode,
+ escnonprint (hstat.error));
xfree_null (dummy);
return RETROK;
}
fflush (fp);
}
\f
+/* String escape functions. */
+
+/* Return the number of non-printable characters in SOURCE.
+
+ Non-printable characters are determined as per safe-ctype.h,
+ i.e. the non-printable characters of the "C" locale. This code is
+ meant to be used to protect the user from binary characters in
+ (normally ASCII) server messages. */
+
+static int
+count_nonprint (const char *source)
+{
+ const char *p;
+ int cnt;
+ for (p = source, cnt = 0; *p; p++)
+ if (!ISPRINT (*p))
+ ++cnt;
+ return cnt;
+}
+
+/* Copy SOURCE to DEST, escaping non-printable characters. If FOR_URI
+ is 0, they are escaped as \ooo; otherwise, they are escaped as
+ %xx.
+
+ DEST must point to a location with sufficient room to store an
+ encoded version of SOURCE. */
+
+static void
+copy_and_escape (const char *source, char *dest, int for_uri)
+{
+ const char *from;
+ char *to;
+
+ /* Copy the string, escaping non-printable chars. */
+ if (!for_uri)
+ {
+ for (from = source, to = dest; *from; from++)
+ if (ISPRINT (*from))
+ *to++ = *from;
+ else
+ {
+ const unsigned char c = *from;
+ *to++ = '\\';
+ *to++ = '0' + (c >> 6);
+ *to++ = '0' + ((c >> 3) & 7);
+ *to++ = '0' + (c & 7);
+ }
+ }
+ else
+ {
+ for (from = source, to = dest; *from; from++)
+ if (ISPRINT (*from))
+ *to++ = *from;
+ else
+ {
+ const unsigned char c = *from;
+ *to++ = '%';
+ *to++ = XNUM_TO_DIGIT (c >> 4);
+ *to++ = XNUM_TO_DIGIT (c & 0xf);
+ }
+ }
+ *to = '\0';
+}
+
+#define RING_SIZE 3
+struct ringel {
+ char *buffer;
+ int size;
+};
+
+static const char *
+escnonprint_internal (const char *str, int for_uri)
+{
+ static struct ringel ring[RING_SIZE]; /* ring data */
+ static int ringpos; /* current ring position */
+
+ int nprcnt = count_nonprint (str);
+ if (nprcnt == 0)
+ /* If there are no non-printable chars in STR, don't bother
+ copying anything, just return STR. */
+ return str;
+
+ {
+ /* Set up a pointer to the current ring position, so we can write
+ simply r->X instead of ring[ringpos].X. */
+ struct ringel *r = ring + ringpos;
+
+ /* Every non-printable character is replaced with "\ooo",
+ i.e. with three *additional* chars (two in URI-mode). Size
+ must also include the length of the original string and an
+ additional char for the terminating \0. */
+ int needed_size = strlen (str) + 1 + for_uri ? (2 * nprcnt) : (3 * nprcnt);
+
+ /* If the current buffer is uninitialized or too small,
+ (re)allocate it. */
+ if (r->buffer == NULL || r->size < needed_size)
+ r->buffer = xrealloc (r->buffer, needed_size);
+
+ copy_and_escape (str, r->buffer, for_uri);
+ ringpos = (ringpos + 1) % RING_SIZE;
+ return r->buffer;
+ }
+}
+
+/* Return a pointer to a static copy of STR with the non-printable
+ characters escaped as \ooo. If there are no non-printable
+ characters in STR, STR is returned.
+
+ NOTE: since this function can return a pointer to static data, be
+ careful to copy its result before calling it again. However, to be
+ more useful with printf, it maintains an internal ring of static
+ buffers to return. Currently the ring size is 3, which means you
+ can print up to three values in the same printf; if more is needed,
+ bump RING_SIZE. */
+
+const char *
+escnonprint (const char *str)
+{
+ return escnonprint_internal (str, 0);
+}
+
+/* Return a pointer to a static copy of STR with the non-printable
+ characters escaped as %XX. If there are no non-printable
+ characters in STR, STR is returned.
+
+ This function returns a pointer to static data which will be
+ overwritten by subsequent calls -- see escnonprint for details. */
+
+const char *
+escnonprint_uri (const char *str)
+{
+ return escnonprint_internal (str, 1);
+}
+\f
/* When SIGHUP or SIGUSR1 are received, the output is redirected
elsewhere. Such redirection is only allowed once. */
enum { RR_NONE, RR_REQUESTED, RR_DONE } redirect_request = RR_NONE;
void log_close PARAMS ((void));
void log_request_redirect_output PARAMS ((const char *));
+const char *escnonprint PARAMS ((const char *));
+const char *escnonprint_uri PARAMS ((const char *));
+
#endif /* LOG_H */
newloc_parsed = url_parse (mynewloc, &up_error_code);
if (!newloc_parsed)
{
- logprintf (LOG_NOTQUIET, "%s: %s.\n", mynewloc,
+ logprintf (LOG_NOTQUIET, "%s: %s.\n", escnonprint_uri (mynewloc),
url_error (up_error_code));
url_free (u);
xfree (url);
#include "config.h"
+#define _GNU_SOURCE /* to get iswblank */
+
#include <assert.h>
#include <stdlib.h>
+#include <string.h>
#include <wchar.h>
+#include <wctype.h>
#include "wget.h"
memset (str, 0, sizeof (*str));
}
+#if 0 /* unused */
static void
string_append_delim (struct string_t *dst)
{
assert_valid_string (dst);
string_cat (dst, line_delim, line_delim_len);
}
+#endif
static int
is_line_delim (const wchar_t *wsz)