@cindex file names, restrict
@cindex Windows file names
-@itemx --restrict-file-names=none|unix|windows
-Restrict characters that may occur in local file names created by Wget
-from remote URLs. Characters that are considered @dfn{unsafe} under a
-set of restrictions are escaped, i.e. replaced with @samp{%XX}, where
-@samp{XX} is the hexadecimal code of the character.
-
-The default for this option depends on the operating system: on Unix and
-Unix-like OS'es, it defaults to ``unix''. Under Windows and Cygwin, it
-defaults to ``windows''. Changing the default is useful when you are
-using a non-native partition, e.g. when downloading files to a Windows
-partition mounted from Linux, or when using NFS-mounted or SMB-mounted
-Windows drives.
-
-When set to ``none'', the only characters that are quoted are those that
-are impossible to get into a file name---the NUL character and @samp{/}.
-The control characters, newline, etc. are all placed into file names.
-
-When set to ``unix'', additional unsafe characters are those in the
-0--31 range and in the 128--159 range. This is because those characters
-are typically not printable.
-
-When set to ``windows'', all of the above are quoted, along with
-@samp{\}, @samp{|}, @samp{:}, @samp{?}, @samp{"}, @samp{*}, @samp{<},
-and @samp{>}. Additionally, Wget in Windows mode uses @samp{+} instead
-of @samp{:} to separate host and port in local file names, and uses
+@itemx --restrict-file-names=@var{mode}
+Change which characters found in remote URLs may show up in local file
+names generated from those URLs. Characters that are @dfn{restricted}
+by this option are escaped, i.e. replaced with @samp{%HH}, where
+@samp{HH} is the hexadecimal number that corresponds to the restricted
+character.
+
+By default, Wget escapes the characters that are not valid as part of
+file names on your operating system, as well as control characters that
+are typically unprintable. This option is useful for changing these
+defaults, either because you are downloading to a non-native partition,
+or because you want to disable escaping of the control characters.
+
+When mode is set to ``unix'', Wget escapes the character @samp{/} and
+the control characters in the ranges 0--31 and 128--159. This is the
+default on Unix-like OS'es.
+
+When mode is seto to ``windows'', Wget escapes the characters @samp{\},
+@samp{|}, @samp{/}, @samp{:}, @samp{?}, @samp{"}, @samp{*}, @samp{<},
+@samp{>}, and the control characters in the ranges 0--31 and 128--159.
+In addition to this, Wget in Windows mode uses @samp{+} instead of
+@samp{:} to separate host and port in local file names, and uses
@samp{@@} instead of @samp{?} to separate the query portion of the file
name from the rest. Therefore, a URL that would be saved as
@samp{www.xemacs.org:4300/search.pl?input=blah} in Unix mode would be
saved as @samp{www.xemacs.org+4300/search.pl@@input=blah} in Windows
-mode.
+mode. This mode is the default on Windows.
+
+If you append @samp{,nocontrol} to the mode, as in
+@samp{unix,nocontrol}, escaping of the control characters is also
+switched off. You can use @samp{--restrict-file-names=nocontrol} to
+turn off escaping of control characters without affecting the choice of
+the OS to use as file name restriction mode.
@end table
@node Directory Options, HTTP Options, Download Options, Invoking
If set to on, remove @sc{ftp} listings downloaded by Wget. Setting it
to off is the same as @samp{-nr}.
-@item restrict_file_names = off/unix/windows
+@item restrict_file_names = unix/windows
Restrict the file names generated by Wget from URLs. See
@samp{--restrict-file-names} for a more detailed description.
{ "reject", &opt.rejects, cmd_vector },
{ "relativeonly", &opt.relative_only, cmd_boolean },
{ "removelisting", &opt.remove_listing, cmd_boolean },
- { "restrictfilenames", &opt.restrict_file_names, cmd_spec_restrict_file_names },
+ { "restrictfilenames", NULL, cmd_spec_restrict_file_names },
{ "retrsymlinks", &opt.retr_symlinks, cmd_boolean },
{ "retryconnrefused", &opt.retry_connrefused, cmd_boolean },
{ "robots", &opt.use_robots, cmd_boolean },
/* The default for file name restriction defaults to the OS type. */
#if !defined(WINDOWS) && !defined(__CYGWIN__)
- opt.restrict_file_names = restrict_shell;
+ opt.restrict_files_os = restrict_unix;
#else
- opt.restrict_file_names = restrict_windows;
+ opt.restrict_files_os = restrict_windows;
#endif
+ opt.restrict_files_ctrl = 1;
}
\f
/* Return the user's home directory (strdup-ed), or NULL if none is
static int
cmd_spec_restrict_file_names (const char *com, const char *val, void *closure)
{
- /* The currently accepted values are `none', `unix', and
- `windows'. */
- if (0 == strcasecmp (val, "none"))
- opt.restrict_file_names = restrict_none;
- else if (0 == strcasecmp (val, "unix"))
- opt.restrict_file_names = restrict_shell;
- else if (0 == strcasecmp (val, "windows"))
- opt.restrict_file_names = restrict_windows;
+ int restrict_os = opt.restrict_files_os;
+ int restrict_ctrl = opt.restrict_files_ctrl;
+
+ const char *end = strchr (val, ',');
+ if (!end)
+ end = val + strlen (val);
+
+#define VAL_IS(string_literal) BOUNDED_EQUAL (val, end, string_literal)
+
+ if (VAL_IS ("unix"))
+ restrict_os = restrict_unix;
+ else if (VAL_IS ("windows"))
+ restrict_os = restrict_windows;
+ else if (VAL_IS ("nocontrol"))
+ restrict_ctrl = 0;
else
{
+ err:
fprintf (stderr, _("%s: %s: Invalid specification `%s'.\n"),
exec_name, com, val);
return 0;
}
+
+#undef VAL_IS
+
+ if (*end)
+ {
+ if (!strcmp (end + 1, "nocontrol"))
+ restrict_ctrl = 0;
+ else
+ goto err;
+ }
+
+ opt.restrict_files_os = restrict_os;
+ opt.restrict_files_ctrl = restrict_ctrl;
return 1;
}
}
enum {
- filechr_unsafe_always = 1, /* always unsafe, e.g. / or \0 */
- filechr_unsafe_shell = 2, /* unsafe for shell use, e.g. control chars */
- filechr_unsafe_windows = 2, /* disallowed on Windows file system */
+ filechr_not_unix = 1, /* unusable on Unix, / and \0 */
+ filechr_not_windows = 2, /* unusable on Windows, one of \|/<>?:*" */
+ filechr_control = 4, /* a control character, e.g. 0-31 */
};
#define FILE_CHAR_TEST(c, mask) (filechr_table[(unsigned char)(c)] & (mask))
/* Shorthands for the table: */
-#define A filechr_unsafe_always
-#define S filechr_unsafe_shell
-#define W filechr_unsafe_windows
+#define U filechr_not_unix
+#define W filechr_not_windows
+#define C filechr_control
-/* Forbidden chars:
+#define UW U|W
+#define UWC U|W|C
- always: \0, /
- Unix shell: 0-31, 128-159
- Windows: \, |, /, <, >, ?, :
+/* Table of characters unsafe under various conditions (see above).
Arguably we could also claim `%' to be unsafe, since we use it as
the escape character. If we ever want to be able to reliably
const static unsigned char filechr_table[256] =
{
- A, S, S, S, S, S, S, S, /* NUL SOH STX ETX EOT ENQ ACK BEL */
- S, S, S, S, S, S, S, S, /* BS HT LF VT FF CR SO SI */
- S, S, S, S, S, S, S, S, /* DLE DC1 DC2 DC3 DC4 NAK SYN ETB */
- S, S, S, S, S, S, S, S, /* CAN EM SUB ESC FS GS RS US */
+UWC, C, C, C, C, C, C, C, /* NUL SOH STX ETX EOT ENQ ACK BEL */
+ C, C, C, C, C, C, C, C, /* BS HT LF VT FF CR SO SI */
+ C, C, C, C, C, C, C, C, /* DLE DC1 DC2 DC3 DC4 NAK SYN ETB */
+ C, C, C, C, C, C, C, C, /* CAN EM SUB ESC FS GS RS US */
0, 0, W, 0, 0, 0, 0, 0, /* SP ! " # $ % & ' */
- 0, 0, W, 0, 0, 0, 0, A, /* ( ) * + , - . / */
+ 0, 0, W, 0, 0, 0, 0, UW, /* ( ) * + , - . / */
0, 0, 0, 0, 0, 0, 0, 0, /* 0 1 2 3 4 5 6 7 */
0, 0, W, 0, W, 0, W, W, /* 8 9 : ; < = > ? */
0, 0, 0, 0, 0, 0, 0, 0, /* @ A B C D E F G */
0, 0, 0, 0, 0, 0, 0, 0, /* p q r s t u v w */
0, 0, 0, 0, 0, 0, 0, 0, /* x y z { | } ~ DEL */
- S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, /* 128-143 */
- S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, /* 144-159 */
+ C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, /* 128-143 */
+ C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, /* 144-159 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
-/* Return non-zero if character CH is unsafe for use in file or
- directory name. Called by append_uri_pathel. */
-
-static inline int
-file_unsafe_char (char ch, int restrict)
-{
- int mask = filechr_unsafe_always;
- if (restrict == restrict_shell)
- mask |= filechr_unsafe_shell;
- else if (restrict == restrict_windows)
- mask |= (filechr_unsafe_shell | filechr_unsafe_windows);
- return FILE_CHAR_TEST (ch, mask);
-}
-
/* FN_PORT_SEP is the separator between host and port in file names
for non-standard port numbers. On Unix this is normally ':', as in
"www.xemacs.org:4001/index.html". Under Windows, we set it to +
because Windows can't handle ':' in file names. */
-#define FN_PORT_SEP (opt.restrict_file_names != restrict_windows ? ':' : '+')
+#define FN_PORT_SEP (opt.restrict_files_os != restrict_windows ? ':' : '+')
/* FN_QUERY_SEP is the separator between the file name and the URL
query, normally '?'. Since Windows cannot handle '?' as part of
file name, we use '@' instead there. */
-#define FN_QUERY_SEP (opt.restrict_file_names != restrict_windows ? '?' : '@')
+#define FN_QUERY_SEP (opt.restrict_files_os != restrict_windows ? '?' : '@')
/* Quote path element, characters in [b, e), as file name, and append
the quoted string to DEST. Each character is quoted as per
const char *p;
int quoted, outlen;
- /* Currently restrict_for_windows is determined at compile time
- only. But some users download files to Windows partitions; they
- should be able to say --windows-file-names so Wget escapes
- characters invalid on Windows. Similar run-time restrictions for
- other file systems can be implemented. */
- const int restrict = opt.restrict_file_names;
+ int mask;
+ if (opt.restrict_files_os == restrict_unix)
+ mask = filechr_not_unix;
+ else
+ mask = filechr_not_windows;
+ if (opt.restrict_files_ctrl)
+ mask |= filechr_control;
/* Copy [b, e) to PATHEL and URL-unescape it. */
BOUNDED_TO_ALLOCA (b, e, pathel);
add for file quoting. */
quoted = 0;
for (p = pathel; *p; p++)
- if (file_unsafe_char (*p, restrict))
+ if (FILE_CHAR_TEST (*p, mask))
++quoted;
/* p - pathel is the string length. Each quoted char means two
char *q = TAIL (dest);
for (p = pathel; *p; p++)
{
- if (!file_unsafe_char (*p, restrict))
+ if (!FILE_CHAR_TEST (*p, mask))
*q++ = *p;
else
{