/* URL handling.
Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
- 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
+ 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation,
+ Inc.
This file is part of GNU Wget.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#ifdef HAVE_UNISTD_H
-# include <unistd.h>
-#endif
+#include <unistd.h>
#include <errno.h>
#include <assert.h>
*p++ = '?';
if (flags & scm_has_fragment)
*p++ = '#';
- *p++ = '\0';
+ *p = '\0';
return seps;
}
The idea is to have a convenient and efficient way to construct a
string by having various functions append data to it. Instead of
passing the obligatory BASEVAR, SIZEVAR and TAILPOS to all the
- functions in questions, we pass the pointer to this struct. */
+ functions in questions, we pass the pointer to this struct.
+
+ Functions that write to the members in this struct must make sure
+ that base remains null terminated by calling append_null().
+ */
struct growable {
char *base;
- int size;
- int tail;
+ int size; /* memory allocated */
+ int tail; /* string length */
};
/* Ensure that the string can accept APPEND_COUNT more characters past
/* Move the tail position by APPEND_COUNT characters. */
#define TAIL_INCR(r, append_count) ((r)->tail += append_count)
-/* Append the string STR to DEST. NOTICE: the string in DEST is not
- terminated. */
+/* Append NULL to DEST. */
static void
-append_string (const char *str, struct growable *dest)
+append_null (struct growable *dest)
{
- int l = strlen (str);
- GROW (dest, l);
- memcpy (TAIL (dest), str, l);
- TAIL_INCR (dest, l);
+ GROW (dest, 1);
+ *TAIL (dest) = 0;
}
-/* Append CH to DEST. For example, append_char (0, DEST)
- zero-terminates DEST. */
+/* Shorten DEST to LENGTH. */
+static void
+shorten_length (size_t length, struct growable *dest)
+{
+ if (length < dest->tail)
+ dest->tail = length;
+
+ append_null (dest);
+}
+/* Append CH to DEST. */
static void
append_char (char ch, struct growable *dest)
{
- GROW (dest, 1);
- *TAIL (dest) = ch;
- TAIL_INCR (dest, 1);
+ if (ch)
+ {
+ GROW (dest, 1);
+ *TAIL (dest) = ch;
+ TAIL_INCR (dest, 1);
+ }
+
+ append_null (dest);
}
+/* Append the string STR to DEST. */
+static void
+append_string (const char *str, struct growable *dest)
+{
+ int l = strlen (str);
+
+ if (l)
+ {
+ GROW (dest, l);
+ memcpy (TAIL (dest), str, l);
+ TAIL_INCR (dest, l);
+ }
+
+ append_null (dest);
+}
+
+
enum {
filechr_not_unix = 1, /* unusable on Unix, / and \0 */
filechr_not_windows = 2, /* unusable on Windows, one of \|/<>?:*" */
query, normally '?'. Since Windows cannot handle '?' as part of
file name, we use '@' instead there. */
#define FN_QUERY_SEP (opt.restrict_files_os != restrict_windows ? '?' : '@')
+#define FN_QUERY_SEP_STR (opt.restrict_files_os != restrict_windows ? "?" : "@")
/* Quote path element, characters in [b, e), as file name, and append
the quoted string to DEST. Each character is quoted as per
}
TAIL_INCR (dest, outlen);
+ append_null (dest);
}
/* Append to DEST the directory structure that corresponds the
}
}
-/* Return a unique file name that matches the given URL as good as
+/* Return a unique file name that matches the given URL as well as
possible. Does not create directories on the file system. */
char *
-url_file_name (const struct url *u)
+url_file_name (const struct url *u, char *replaced_filename)
{
struct growable fnres; /* stands for "file name result" */
+ struct growable temp_fnres;
- const char *u_file, *u_query;
- char *fname, *unique;
- char *index_filename = "index.html"; /* The default index file is index.html */
+ const char *u_file;
+ char *fname, *unique, *fname_len_check;
+ const char *index_filename = "index.html"; /* The default index file is index.html */
+ size_t max_length;
fnres.base = NULL;
fnres.size = 0;
fnres.tail = 0;
+ temp_fnres.base = NULL;
+ temp_fnres.size = 0;
+ temp_fnres.tail = 0;
+
/* If an alternative index file was defined, change index_filename */
if (opt.default_page)
index_filename = opt.default_page;
append_dir_structure (u, &fnres);
}
- /* Add the file name. */
- if (fnres.tail)
- append_char ('/', &fnres);
- u_file = *u->file ? u->file : index_filename;
- append_uri_pathel (u_file, u_file + strlen (u_file), false, &fnres);
+ if (!replaced_filename)
+ {
+ /* Create the filename. */
+ u_file = *u->file ? u->file : index_filename;
+
+ /* Append "?query" to the file name, even if empty,
+ * and create fname_len_check. */
+ if (u->query)
+ fname_len_check = concat_strings (u_file, FN_QUERY_SEP_STR, u->query, NULL);
+ else
+ fname_len_check = strdupdelim (u_file, u_file + strlen (u_file));
+ }
+ else
+ {
+ u_file = replaced_filename;
+ fname_len_check = strdupdelim (u_file, u_file + strlen (u_file));
+ }
+
+ append_uri_pathel (fname_len_check,
+ fname_len_check + strlen (fname_len_check), false, &temp_fnres);
- /* Append "?query" to the file name. */
- u_query = u->query && *u->query ? u->query : NULL;
- if (u_query)
+ /* Zero-terminate the temporary file name. */
+ append_char ('\0', &temp_fnres);
+
+ /* Check that the length of the file name is acceptable. */
+ max_length = get_max_length (fnres.base, fnres.tail, _PC_NAME_MAX) - CHOMP_BUFFER;
+ if (max_length > 0 && strlen (temp_fnres.base) > max_length)
{
- append_char (FN_QUERY_SEP, &fnres);
- append_uri_pathel (u_query, u_query + strlen (u_query), true, &fnres);
+ logprintf (LOG_NOTQUIET, "The name is too long, %lu chars total.\n",
+ (unsigned long) strlen (temp_fnres.base));
+ logprintf (LOG_NOTQUIET, "Trying to shorten...\n");
+
+ /* Shorten the file name. */
+ temp_fnres.base[max_length] = '\0';
+
+ logprintf (LOG_NOTQUIET, "New name is %s.\n", temp_fnres.base);
}
- /* Zero-terminate the file name. */
- append_char ('\0', &fnres);
+ free (fname_len_check);
+
+ /* The filename has already been 'cleaned' by append_uri_pathel() above. So,
+ * just append it. */
+ if (fnres.tail)
+ append_char ('/', &fnres);
+ append_string (temp_fnres.base, &fnres);
fname = fnres.base;
+ /* Make a final check that the path length is acceptable? */
+ /* TODO: check fnres.base for path length problem */
+
+ free (temp_fnres.base);
+
/* Check the cases in which the unique extensions are not used:
1) Clobbering is turned off (-nc).
2) Retrieval with regetting.
append_string (test_array[i].original_url, &dest);
append_uri_pathel (p, p + strlen(p), test_array[i].escaped, &dest);
- append_char ('\0', &dest);
mu_assert ("test_append_uri_pathel: wrong result",
strcmp (dest.base, test_array[i].expected_result) == 0);