From 67e6027ea130d06aeff365adfbc83f34d019b968 Mon Sep 17 00:00:00 2001 From: Tim Ruehsen Date: Sat, 29 Sep 2012 13:47:53 +0200 Subject: [PATCH] Add support for file names longer than MAX_FILE. --- NEWS | 5 ++++ src/ChangeLog | 8 +++++++ src/url.c | 64 +++++++++++++++++++++++++++++++++++++++------------ src/url.h | 10 ++++++++ src/utils.c | 50 ++++++++++++++++++++++++++++++++++++++++ src/utils.h | 2 ++ 6 files changed, 124 insertions(+), 15 deletions(-) diff --git a/NEWS b/NEWS index 6ee857c1..be845d1b 100644 --- a/NEWS +++ b/NEWS @@ -5,6 +5,11 @@ Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, See the end for copying conditions. Please send GNU Wget bug reports to . + +* Changes in Wget X.Y.Z + +** Add support for file names longer than MAX_FILE. + * Changes in Wget 1.14 diff --git a/src/ChangeLog b/src/ChangeLog index 862ce36d..07ff19c5 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,11 @@ +2012-09-29 Tim Ruehsen + + * url.h (CHOMP_BUFFER): Add definition. + * url.c (url_file_name): New local variables `fname_len_check' and + `max_length'. Check that the length of the file name is acceptable. + * utils.h (get_max_length): Declare function. + * utils.c (get_max_length): New function. + 2012-09-28 Steven Schubiger * src/recur.c (retrieve_tree): Combine duplicated code. diff --git a/src/url.c b/src/url.c index e44dfcd2..4a1c9f10 100644 --- a/src/url.c +++ b/src/url.c @@ -1361,6 +1361,7 @@ UWC, C, C, C, C, C, C, C, /* NUL SOH STX ETX EOT ENQ ACK BEL */ query, normally '?'. Since Windows cannot handle '?' as part of file name, we use '@' instead there. */ #define FN_QUERY_SEP (opt.restrict_files_os != restrict_windows ? '?' : '@') +#define FN_QUERY_SEP_STR (opt.restrict_files_os != restrict_windows ? "?" : "@") /* Quote path element, characters in [b, e), as file name, and append the quoted string to DEST. Each character is quoted as per @@ -1494,22 +1495,28 @@ append_dir_structure (const struct url *u, struct growable *dest) } } -/* Return a unique file name that matches the given URL as good as +/* Return a unique file name that matches the given URL as well as possible. Does not create directories on the file system. */ char * url_file_name (const struct url *u, char *replaced_filename) { struct growable fnres; /* stands for "file name result" */ + struct growable temp_fnres; const char *u_file; - char *fname, *unique; + char *fname, *unique, *fname_len_check; const char *index_filename = "index.html"; /* The default index file is index.html */ + size_t max_length; fnres.base = NULL; fnres.size = 0; fnres.tail = 0; + temp_fnres.base = NULL; + temp_fnres.size = 0; + temp_fnres.tail = 0; + /* If an alternative index file was defined, change index_filename */ if (opt.default_page) index_filename = opt.default_page; @@ -1555,33 +1562,60 @@ url_file_name (const struct url *u, char *replaced_filename) if (!replaced_filename) { - /* Add the file name. */ - if (fnres.tail) - append_char ('/', &fnres); + /* Create the filename. */ u_file = *u->file ? u->file : index_filename; - append_uri_pathel (u_file, u_file + strlen (u_file), false, &fnres); - /* Append "?query" to the file name, even if empty */ + /* Append "?query" to the file name, even if empty, + * and create fname_len_check. */ if (u->query) - { - append_char (FN_QUERY_SEP, &fnres); - append_uri_pathel (u->query, u->query + strlen (u->query), - true, &fnres); - } + fname_len_check = concat_strings (u_file, FN_QUERY_SEP_STR, u->query, NULL); + else + fname_len_check = strdupdelim (u_file, u_file + strlen (u_file)); } else { - if (fnres.tail) - append_char ('/', &fnres); u_file = replaced_filename; - append_uri_pathel (u_file, u_file + strlen (u_file), false, &fnres); + fname_len_check = strdupdelim (u_file, u_file + strlen (u_file)); } + append_uri_pathel (fname_len_check, + fname_len_check + strlen (fname_len_check), false, &temp_fnres); + + /* Zero-terminate the temporary file name. */ + append_char ('\0', &temp_fnres); + + /* Check that the length of the file name is acceptable. */ + max_length = get_max_length (fnres.base, fnres.tail, _PC_NAME_MAX) - CHOMP_BUFFER; + if (max_length > 0 && strlen (temp_fnres.base) > max_length) + { + logprintf (LOG_NOTQUIET, "The name is too long, %lu chars total.\n", + (unsigned long) strlen (temp_fnres.base)); + logprintf (LOG_NOTQUIET, "Trying to shorten...\n"); + + /* Shorten the file name. */ + temp_fnres.base[max_length] = '\0'; + + logprintf (LOG_NOTQUIET, "New name is %s.\n", temp_fnres.base); + } + + free (fname_len_check); + + /* The filename has already been 'cleaned' by append_uri_pathel() above. So, + * just append it. */ + if (fnres.tail) + append_char ('/', &fnres); + append_string (temp_fnres.base, &fnres); + /* Zero-terminate the file name. */ append_char ('\0', &fnres); fname = fnres.base; + /* Make a final check that the path length is acceptable? */ + /* TODO: check fnres.base for path length problem */ + + free (temp_fnres.base); + /* Check the cases in which the unique extensions are not used: 1) Clobbering is turned off (-nc). 2) Retrieval with regetting. diff --git a/src/url.h b/src/url.h index edb6b06b..b7f4366d 100644 --- a/src/url.h +++ b/src/url.h @@ -37,6 +37,16 @@ as that of the covered work. */ #define DEFAULT_FTP_PORT 21 #define DEFAULT_HTTPS_PORT 443 +/* This represents how many characters less than the OS max name length a file + * should be. More precisely, a file name should be at most + * (NAME_MAX - CHOMP_BUFFER) characters in length. This number was arrived at + * by adding the lengths of all possible strings that could be appended to a + * file name later in the code (e.g. ".orig", ".html", etc.). This is + * hopefully plenty of extra characters, but I am not guaranteeing that a file + * name will be of the proper length by the time the code wants to open a + * file descriptor. */ +#define CHOMP_BUFFER 19 + /* Specifies how, or whether, user auth information should be included * in URLs regenerated from URL parse structures. */ enum url_auth_mode { diff --git a/src/utils.c b/src/utils.c index 567dc359..729ec50d 100644 --- a/src/utils.c +++ b/src/utils.c @@ -2494,6 +2494,56 @@ print_decimal (double number) return buf; } +/* Get the maximum name length for the given path. */ +/* Return 0 if length is unknown. */ +size_t +get_max_length (const char *path, int length, int name) +{ + long ret; + char *p, *d; + + /* Make a copy of the path that we can modify. */ + p = path ? strdupdelim (path, path + length) : strdup (""); + + for (;;) + { + errno = 0; + /* For an empty path query the current directory. */ + ret = pathconf (*p ? p : ".", name); + if (!(ret < 0 && errno == ENOENT)) + break; + + /* The path does not exist yet, but may be created. */ + /* Already at current or root directory, give up. */ + if (!*p || strcmp (p, "/") == 0) + break; + + /* Remove one directory level and try again. */ + d = strrchr (p, '/'); + if (d == p) + p[1] = '\0'; /* check root directory */ + else if (d) + *d = '\0'; /* remove last directory part */ + else + *p = '\0'; /* check current directory */ + } + + xfree (p); + + if (ret < 0) + { + /* pathconf() has a message for us. */ + if (errno != 0) + perror ("pathconf"); + + /* If (errno == 0) then there is no max length. + Even on error return 0 so the caller can continue. */ + return 0; + } + + return ret; +} + #ifdef TESTING const char * diff --git a/src/utils.h b/src/utils.h index 409cdc5a..67d8d25d 100644 --- a/src/utils.h +++ b/src/utils.h @@ -155,6 +155,8 @@ void stable_sort (void *, size_t, size_t, int (*) (const void *, const void *)); const char *print_decimal (double); +size_t get_max_length (const char *path, int length, int name); + extern unsigned char char_prop[]; #endif /* UTILS_H */ -- 2.39.2