together, we compare local file X.orig (if extant) against server file X.
Previously -k and -N were worthless in combination because the local converted
files always differed from the server versions.
+2000-03-01 Dan Harkless <dan-wget@dilvish.speed.net>
+
+ * NEWS (-K): Now possible to use -N with -k thanks to this option.
+
+ * TODO: Removed the -K / -N interaction item.
+
2000-02-29 Dan Harkless <dan-wget@dilvish.speed.net>
* NEWS (-K / --backup-converted): Mentioned this new option.
* Changes in Wget 1.5.3+dev
** New -K / --backup-converted / backup_converted = on option causes files
-modified due to -k to be saved with a .orig prefix before being changed.
+modified due to -k to be saved with a .orig prefix before being changed. When
+using -N as well, it is these .orig files that are compared against the server.
\f
* Wget 1.5.3 is a bugfix release with no user-visible changes.
\f
* Implement more HTTP/1.1 bells and whistles (ETag, Content-MD5 etc.)
* Support SSL encryption through SSLeay or OpenSSL.
-
-* When -K is used with -N, check local file X.orig (if extant) against server
- file X.
+2000-03-01 Dan Harkless <dan-wget@dilvish.speed.net>
+
+ * ftp.c (ftp_loop_internal): Call new downloaded_file() function,
+ even though we don't do conversion on HTML files retrieved via
+ FTP, so _current_ usage of downloaded_file() makes this call unneeded.
+ (ftp_retrieve_list): Added a comment saying where we need to
+ stat() a .orig file if FTP'd HTML file conversion is ever implemented.
+ (ftp_retrieve_list): "Local file '%s' is more recent," is sometimes
+ a lie -- reworded as "Server file no newer than local file '%s' --".
+
+ * http.c (http_loop): Fixed a typo and clarified a comment.
+ (http_loop): When -K and -N are specified together, compare size
+ and timestamp of server file X against local file X.orig (if
+ extant) rather than converted local file X.
+ (http_loop): "Local file '%s' is more recent," is sometimes a lie
+ -- reworded as "Server file no newer than local file '%s' --".
+ (http_loop): Call new downloaded_file() function to prevent
+ wrongful overwriting of .orig file when -N is specified.
+
+ * url.c (convert_links): When -K specified, only rename X to
+ X.orig if downloaded_file() returns TRUE. Otherwise when we skip
+ file X due to -N, we clobber an X.orig from a previous invocation.
+ (convert_links): Call the failsafe xstrdup(), not the real strdup().
+ (convert_links): Added a note asking anyone who understands how
+ multiple URLs can correspond to a single file to comment it.
+ (downloaded_file): Added this new function.
+
+ * url.h (downloaded_file): Added prototype for this new function
+ as well as its downloaded_file_t enum type.
+
+ * wget.h (boolean): Added this new typedef and TRUE and FALSE #defines.
+
2000-02-29 Dan Harkless <dan-wget@dilvish.speed.net>
* version.c: Upped version to developer-only "1.5.3+dev".
/* Not as great. */
abort ();
}
+
+ /* If we get out of the switch above without continue'ing, we've
+ successfully downloaded a file. Remember this fact. */
+ downloaded_file(ADD_FILE, locf);
+
if (con->st & ON_YOUR_OWN)
{
CLOSE (RBUF_FD (&con->rbuf));
if (opt.timestamping && f->type == FT_PLAINFILE)
{
struct stat st;
+ /* If conversion of HTML files retrieved via FTP is ever implemented,
+ we'll need to stat() <file>.orig here when -K has been specified.
+ I'm not implementing it now since files on an FTP server are much
+ more likely than files on an HTTP server to legitimately have a
+ .orig suffix. */
if (!stat (u->local, &st))
{
/* Else, get it from the file. */
if (local_size == f->size && tml >= f->tstamp)
{
logprintf (LOG_VERBOSE, _("\
-Local file `%s' is more recent, not retrieving.\n\n"), u->local);
+Server file no newer than local file `%s' -- not retrieving.\n\n"), u->local);
dlthis = 0;
}
else if (local_size != f->size)
{
logprintf (LOG_VERBOSE, _("\
-The sizes do not match (local %ld), retrieving.\n"), local_size);
+The sizes do not match (local %ld) -- retrieving.\n"), local_size);
}
}
} /* opt.timestamping && f->type == FT_PLAINFILE */
static int first_retrieval = 1;
int count;
+ int local_dot_orig_file_exists = FALSE;
int use_ts, got_head = 0; /* time-stamping info */
char *tms, *suf, *locf, *tmrate;
uerr_t err;
*newloc = NULL;
/* Warn on (likely bogus) wildcard usage in HTTP. Don't use
- has_wildcards_p because it would also warn on `?', and we that
+ has_wildcards_p because it would also warn on `?', and we know that
shows up in CGI paths a *lot*. */
if (strchr (u->url, '*'))
logputs (LOG_VERBOSE, _("Warning: wildcards not supported in HTTP.\n"));
use_ts = 0;
if (opt.timestamping)
{
- if (stat (u->local, &st) == 0)
+ boolean local_file_exists = FALSE;
+
+ if (opt.backup_converted)
+ /* If -K is specified, we'll act on the assumption that it was specified
+ last time these files were downloaded as well, and instead of just
+ comparing local file X against server file X, we'll compare local
+ file X.orig (if extant, else X) against server file X. If -K
+ _wasn't_ specified last time, or the server contains files called
+ *.orig, -N will be back to not operating correctly with -k. */
+ {
+ size_t filename_len = strlen(u->local);
+ char* filename_plus_orig_suffix = malloc(filename_len +
+ sizeof(".orig"));
+
+ /* Would a single s[n]printf() call be faster? */
+ strcpy(filename_plus_orig_suffix, u->local);
+ strcpy(filename_plus_orig_suffix + filename_len, ".orig");
+
+ /* Try to stat() the .orig file. */
+ if (stat(filename_plus_orig_suffix, &st) == 0)
+ {
+ local_file_exists = TRUE;
+ local_dot_orig_file_exists = TRUE;
+ }
+
+ free(filename_plus_orig_suffix);
+ }
+
+ if (!local_dot_orig_file_exists)
+ /* Couldn't stat() <file>.orig, so try to stat() <file>. */
+ if (stat (u->local, &st) == 0)
+ local_file_exists = TRUE;
+
+ if (local_file_exists)
+ /* There was a local file, so we'll check later to see if the version
+ the server has is the same version we already have, allowing us to
+ skip a download. */
{
use_ts = 1;
tml = st.st_mtime;
if (tml >= tmr &&
(hstat.contlen == -1 || local_size == hstat.contlen))
{
- logprintf (LOG_VERBOSE, _("\
-Local file `%s' is more recent, not retrieving.\n\n"), u->local);
+ if (local_dot_orig_file_exists)
+ /* We can't collapse this down into just one logprintf()
+ call with a variable set to u->local or the .orig
+ filename because we have to malloc() space for the
+ latter, and because there are multiple returns above (a
+ coding style no-no by many measures, for reasons such as
+ this) we'd have to remember to free() the string at each
+ one to avoid a memory leak. */
+ logprintf (LOG_VERBOSE, _("\
+Server file no newer than local file `%s.orig' -- not retrieving.\n\n"),
+ u->local);
+ else
+ logprintf (LOG_VERBOSE, _("\
+Server file no newer than local file `%s' -- not retrieving.\n\n"), u->local);
FREEHSTAT (hstat);
return RETROK;
}
else if (tml >= tmr)
logprintf (LOG_VERBOSE, _("\
-The sizes do not match (local %ld), retrieving.\n"), local_size);
+The sizes do not match (local %ld) -- retrieving.\n"), local_size);
else
logputs (LOG_VERBOSE,
_("Remote file is newer, retrieving.\n"));
}
++opt.numurls;
opt.downloaded += hstat.len;
+ downloaded_file(ADD_FILE, locf);
return RETROK;
}
else if (hstat.res == 0) /* No read error */
{
- if (hstat.contlen == -1) /* We don't know how much we were
- supposed to get, so... */
+ if (hstat.contlen == -1) /* We don't know how much we were supposed
+ to get, so assume we succeeded. */
{
if (*dt & RETROKF)
{
}
++opt.numurls;
opt.downloaded += hstat.len;
+ downloaded_file(ADD_FILE, locf);
return RETROK;
}
else if (hstat.len < hstat.contlen) /* meaning we lost the
tms, u->url, hstat.len, hstat.contlen, locf, count);
++opt.numurls;
opt.downloaded += hstat.len;
+ downloaded_file(ADD_FILE, locf);
return RETROK;
}
else /* the same, but not accepted */
if (result != URLOK || u->proto != URLHTTP)
{
if (u->proto == URLHTTP)
- logprintf (LOG_NOTQUIET, "Proxy %s: %s.\n", proxy, uerrmsg (result));
+ logprintf (LOG_NOTQUIET, "Proxy %s: %s.\n", proxy, uerrmsg(result));
else
logprintf (LOG_NOTQUIET, _("Proxy %s: Must be HTTP.\n"), proxy);
freeurl (u, 1);
}
status = retrieve_url (cur_url->url, &filename, &new_file, NULL, &dt);
if (opt.recursive && status == RETROK && (dt & TEXTHTML))
- status = recursive_retrieve (filename, new_file ? new_file : cur_url->url);
+ status = recursive_retrieve (filename, new_file ? new_file
+ : cur_url->url);
if (filename && opt.delete_after && file_exists_p (filename))
{
void
convert_links (const char *file, urlpos *l)
{
- FILE *fp;
- char *buf, *p, *p2;
- long size;
- static slist* converted_files = NULL;
+ FILE *fp;
+ char *buf, *p, *p2;
+ long size;
logprintf (LOG_VERBOSE, _("Converting %s... "), file);
/* Read from the file.... */
/* ...to a buffer. */
load_file (fp, &buf, &size);
fclose (fp);
- if (opt.backup_converted)
+ if (opt.backup_converted && downloaded_file(CHECK_FOR_FILE, file))
/* Rather than just writing over the original .html file with the converted
- version, save the former to *.orig. */
+ version, save the former to *.orig. Note we only do this for files we've
+ _successfully_ downloaded, so we don't clobber .orig files sitting around
+ from previous invocations. */
{
/* Construct the backup filename as the original name plus ".orig". */
- size_t filename_len = strlen(file);
- char* filename_plus_orig_suffix = malloc(filename_len + sizeof(".orig"));
- int already_wrote_backup_file = 0;
- slist* converted_file_ptr;
-
+ size_t filename_len = strlen(file);
+ char* filename_plus_orig_suffix = malloc(filename_len +
+ sizeof(".orig"));
+ boolean already_wrote_backup_file = FALSE;
+ slist* converted_file_ptr;
+ static slist* converted_files = NULL;
+
+ /* Would a single s[n]printf() call be faster? */
strcpy(filename_plus_orig_suffix, file);
strcpy(filename_plus_orig_suffix + filename_len, ".orig");
/* We can get called twice on the same URL thanks to the
convert_all_links() call in main(). If we write the .orig file each
time in such a case, it'll end up containing the first-pass conversion,
- not the original file. */
+ not the original file. So, see if we've already been called on this
+ file. */
converted_file_ptr = converted_files;
while (converted_file_ptr != NULL)
if (strcmp(converted_file_ptr->string, file) == 0)
{
- already_wrote_backup_file = 1;
+ already_wrote_backup_file = TRUE;
break;
}
else
Note that we never free this memory since we need it till the
convert_all_links() call, which is one of the last things the
program does before terminating. BTW, I'm not sure if it would be
- safe to just set converted_file_ptr->string to file below, rather
- than making a copy of the string... */
+ safe to just set 'converted_file_ptr->string' to 'file' below,
+ rather than making a copy of the string... Another note is that I
+ thought I could just add a field to the urlpos structure saying
+ that we'd written a .orig file for this URL, but that didn't work,
+ so I had to make this separate list. */
converted_file_ptr = malloc(sizeof(slist));
- converted_file_ptr->string = strdup(file);
+ converted_file_ptr->string = xstrdup(file); /* die on out-of-mem. */
converted_file_ptr->next = converted_files;
converted_files = converted_file_ptr;
}
free (buf);
return;
}
+ /* [If someone understands why multiple URLs can correspond to one local file,
+ can they please add a comment here...?] */
for (p = buf; l; l = l->next)
{
if (l->pos >= size)
t->next = l;
return t;
}
+
+
+/* Remembers which files have been downloaded. Should be called with
+ add_or_check == ADD_FILE for each file we actually download successfully
+ (i.e. not for ones we have failures on or that we skip due to -N). If you
+ just want to check if a file has been previously added without adding it,
+ call with add_or_check == CHECK_FOR_FILE. Please be sure to call this
+ function with local filenames, not remote URLs -- by some means that isn't
+ commented well enough for me understand, multiple remote URLs can apparently
+ correspond to a single local file. */
+boolean
+downloaded_file (downloaded_file_t add_or_check, const char* file)
+{
+ boolean found_file = FALSE;
+ static slist* downloaded_files = NULL;
+ slist* rover = downloaded_files;
+
+ while (rover != NULL)
+ if (strcmp(rover->string, file) == 0)
+ {
+ found_file = TRUE;
+ break;
+ }
+ else
+ rover = rover->next;
+
+ if (found_file)
+ return TRUE; /* file had already been downloaded */
+ else
+ {
+ if (add_or_check == ADD_FILE)
+ {
+ rover = malloc(sizeof(slist));
+ rover->string = xstrdup(file); /* die on out-of-mem. */
+ rover->next = downloaded_files;
+ downloaded_files = rover;
+ }
+
+ return FALSE; /* file had not already been downloaded */
+ }
+}
struct _urlpos *next; /* Next struct in list */
} urlpos;
+/* Controls how downloaded_file() behaves. */
+typedef enum
+{
+ ADD_FILE,
+ CHECK_FOR_FILE
+} downloaded_file_t;
/* Function declarations */
void convert_links PARAMS ((const char *, urlpos *));
urlpos *add_url PARAMS ((urlpos *, const char *, const char *));
+boolean downloaded_file PARAMS ((downloaded_file_t, const char *));
+
#endif /* URL_H */
ROBOTSOK, NOROBOTS, PROXERR, AUTHFAILED, QUOTEXC, WRITEFAILED
} uerr_t;
+typedef unsigned char boolean;
+#ifndef FALSE
+#define FALSE 0
+#endif
+#ifndef TRUE
+#define TRUE 1
+#endif
+
#endif /* WGET_H */