+2002-04-12 Ian Abbott <abbotti@mev.co.uk>
+
+ * utils.c (has_html_suffix_p): New function to test filename for
+ common html extensions.
+
+ * utils.h: Declare it.
+
+ * http.c (http_loop): Use it instead of previous test.
+
+ * retr.c (retrieve_url): Ditto.
+
+ * recur.c (download_child_p): Ditto.
+
2002-04-12 Hrvoje Niksic <hniksic@arsdigita.com>
* config.h.in: Define _VA_LIST on Solaris to prevent stdio.h from
int use_ts, got_head = 0; /* time-stamping info */
char *filename_plus_orig_suffix;
char *local_filename = NULL;
- char *tms, *suf, *locf, *tmrate;
+ char *tms, *locf, *tmrate;
uerr_t err;
time_t tml = -1, tmr = -1; /* local and remote time-stamps */
long local_size = 0; /* the size of the local file */
*dt |= RETROKF;
/* #### Bogusness alert. */
- /* If its suffix is "html" or "htm", assume text/html. */
- if (((suf = suffix (*hstat.local_file)) != NULL)
- && (!strcmp (suf, "html") || !strcmp (suf, "htm")))
+ /* If its suffix is "html" or "htm" or similar, assume text/html. */
+ if (has_html_suffix_p (*hstat.local_file))
*dt |= TEXTHTML;
FREE_MAYBE (dummy);
/* 6. */
{
- char *suf;
/* Check for acceptance/rejection rules. We ignore these rules
for HTML documents because they might lead to other files which
need to be downloaded. Of course, we don't know which
* u->file is not "" (i.e. it is not a directory)
and either:
+ there is no file suffix,
- + or there is a suffix, but is not "html" or "htm",
+ + or there is a suffix, but is not "html" or "htm" or similar,
+ both:
- recursion is not infinite,
- and we are at its very end. */
if (u->file[0] != '\0'
- && ((suf = suffix (url)) == NULL
- || (0 != strcmp (suf, "html") && 0 != strcmp (suf, "htm"))
+ && (!has_html_suffix_p (url)
|| (opt.reclevel != INFINITE_RECURSION && depth >= opt.reclevel)))
{
if (!acceptable (u->file))
/* There is a possibility of having HTTP being redirected to
FTP. In these cases we must decide whether the text is HTML
- according to the suffix. The HTML suffixes are `.html' and
- `.htm', case-insensitive. */
+ according to the suffix. The HTML suffixes are `.html',
+ `.htm' and a few others, case-insensitive. */
if (redirection_count && local_file && u->scheme == SCHEME_FTP)
{
- char *suf = suffix (local_file);
- if (suf && (!strcasecmp (suf, "html") || !strcasecmp (suf, "htm")))
+ if (has_html_suffix_p (local_file))
*dt |= TEXTHTML;
}
}
return NULL;
}
+/* Return non-zero if FNAME ends with a typical HTML suffix. The
+ following (case-insensitive) suffixes are presumed to be HTML files:
+
+ html
+ htm
+ ?html (`?' matches one character)
+
+ #### CAVEAT. This is not necessarily a good indication that FNAME
+ refers to a file that contains HTML! */
+int
+has_html_suffix_p (const char *fname)
+{
+ char *suf;
+
+ if ((suf = suffix (fname)) == NULL)
+ return 0;
+ if (!strcasecmp (suf, "html"))
+ return 1;
+ if (!strcasecmp (suf, "htm"))
+ return 1;
+ if (suf[0] && !strcasecmp (suf + 1, "html"))
+ return 1;
+ return 0;
+}
+
/* Read a line from FP and return the pointer to freshly allocated
storage. The stoarage space is obtained through malloc() and
should be freed with free() when it is no longer needed.
char *suffix PARAMS ((const char *s));
int match_tail PARAMS ((const char *, const char *));
+int has_html_suffix_p PARAMS ((const char *));
+
char *read_whole_line PARAMS ((FILE *));
struct file_memory *read_file PARAMS ((const char *));
void read_file_free PARAMS ((struct file_memory *));