+/* String escape functions. */
+
+/* Return the number of non-printable characters in SOURCE.
+ Non-printable characters are determined as per safe-ctype.c. */
+
+static int
+count_nonprint (const char *source)
+{
+ const char *p;
+ int cnt;
+ for (p = source, cnt = 0; *p; p++)
+ if (!ISPRINT (*p))
+ ++cnt;
+ return cnt;
+}
+
+/* Copy SOURCE to DEST, escaping non-printable characters.
+
+ Non-printable refers to anything outside the non-control ASCII
+ range (32-126) which means that, for example, CR, LF, and TAB are
+ considered non-printable along with ESC, BS, and other control
+ chars. This is by design: it makes sure that messages from remote
+ servers cannot be easily used to deceive the users by mimicking
+ Wget's output. Disallowing non-ASCII characters is another
+ necessary security measure, which makes sure that remote servers
+ cannot garble the screen or guess the local charset and perform
+ homographic attacks.
+
+ Of course, the above mandates that escnonprint only be used in
+ contexts expected to be ASCII, such as when printing host names,
+ URL components, HTTP headers, FTP server messages, and the like.
+
+ ESCAPE is the leading character of the escape sequence. BASE
+ should be the base of the escape sequence, and must be either 8 for
+ octal or 16 for hex.
+
+ DEST must point to a location with sufficient room to store an
+ encoded version of SOURCE. */
+
+static void
+copy_and_escape (const char *source, char *dest, char escape, int base)
+{
+ const char *from = source;
+ char *to = dest;
+ unsigned char c;
+
+ /* Copy chars from SOURCE to DEST, escaping non-printable ones. */
+ switch (base)
+ {
+ case 8:
+ while ((c = *from++) != '\0')
+ if (ISPRINT (c))
+ *to++ = c;
+ else
+ {
+ *to++ = escape;
+ *to++ = '0' + (c >> 6);
+ *to++ = '0' + ((c >> 3) & 7);
+ *to++ = '0' + (c & 7);
+ }
+ break;
+ case 16:
+ while ((c = *from++) != '\0')
+ if (ISPRINT (c))
+ *to++ = c;
+ else
+ {
+ *to++ = escape;
+ *to++ = XNUM_TO_DIGIT (c >> 4);
+ *to++ = XNUM_TO_DIGIT (c & 0xf);
+ }
+ break;
+ default:
+ abort ();
+ }
+ *to = '\0';
+}
+
+#define RING_SIZE 3
+struct ringel {
+ char *buffer;
+ int size;
+};
+static struct ringel ring[RING_SIZE]; /* ring data */
+
+static const char *
+escnonprint_internal (const char *str, char escape, int base)
+{
+ static int ringpos; /* current ring position */
+ int nprcnt;
+
+ assert (base == 8 || base == 16);
+
+ nprcnt = count_nonprint (str);
+ if (nprcnt == 0)
+ /* If there are no non-printable chars in STR, don't bother
+ copying anything, just return STR. */
+ return str;
+
+ {
+ /* Set up a pointer to the current ring position, so we can write
+ simply r->X instead of ring[ringpos].X. */
+ struct ringel *r = ring + ringpos;
+
+ /* Every non-printable character is replaced with the escape char
+ and three (or two, depending on BASE) *additional* chars. Size
+ must also include the length of the original string and one
+ additional char for the terminating \0. */
+ int needed_size = strlen (str) + 1 + (base == 8 ? 3 * nprcnt : 2 * nprcnt);
+
+ /* If the current buffer is uninitialized or too small,
+ (re)allocate it. */
+ if (r->buffer == NULL || r->size < needed_size)
+ {
+ r->buffer = xrealloc (r->buffer, needed_size);
+ r->size = needed_size;
+ }
+
+ copy_and_escape (str, r->buffer, escape, base);
+ ringpos = (ringpos + 1) % RING_SIZE;
+ return r->buffer;
+ }
+}
+
+/* Return a pointer to a static copy of STR with the non-printable
+ characters escaped as \ooo. If there are no non-printable
+ characters in STR, STR is returned. See copy_and_escape for more
+ information on which characters are considered non-printable.
+
+ DON'T call this function on translated strings because escaping
+ will break them. Don't call it on literal strings from the source,
+ which are by definition trusted. If newlines are allowed in the
+ string, escape and print it line by line because escaping the whole
+ string will convert newlines to \012. (This is so that expectedly
+ single-line messages cannot use embedded newlines to mimic Wget's
+ output and deceive the user.)
+
+ escnonprint doesn't quote its escape character because it is notf
+ meant as a general and reversible quoting mechanism, but as a quick
+ way to defang binary junk sent by malicious or buggy servers.
+
+ NOTE: since this function can return a pointer to static data, be
+ careful to copy its result before calling it again. However, to be
+ more useful with printf, it maintains an internal ring of static
+ buffers to return. Currently the ring size is 3, which means you
+ can print up to three values in the same printf; if more is needed,
+ bump RING_SIZE. */
+
+const char *
+escnonprint (const char *str)
+{
+ return escnonprint_internal (str, '\\', 8);
+}
+
+/* Return a pointer to a static copy of STR with the non-printable
+ characters escaped as %XX. If there are no non-printable
+ characters in STR, STR is returned.
+
+ See escnonprint for usage details. */
+
+const char *
+escnonprint_uri (const char *str)
+{
+ return escnonprint_internal (str, '%', 16);
+}
+
+void
+log_cleanup (void)
+{
+ int i;
+ for (i = 0; i < countof (ring); i++)
+ xfree_null (ring[i].buffer);
+}
+\f