[svn] Updated long_to_string(); enhanced opt.downloaded to use

[wget] / src / http.c
diff --git a/src/http.c b/src/http.c

index 9ea0670f4988a5b756e7911cbf0ffcf8b85be956..24de9bb8f9d58a8d03329503ca00d54f97298fa7 100644 (file)
--- a/src/http.c
+++ b/src/http.c
@@ -241,7 +241,6 @@ http_process_type (const char *hdr, void *arg)
    char **result = (char **)arg;
    char *p;
  
-  *result = xstrdup (hdr);
    p = strrchr (hdr, ';');
    if (p)
      {
@@ -309,6 +308,8 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
    char *authenticate_h;
    char *proxyauth;
    char *all_headers;
+  char *host_port;
+  int host_port_len;
    int sock, hcount, num_written, all_length, remport, statcode;
    long contlen, contrange;
    struct urlinfo *ou;
@@ -317,8 +318,9 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
    int auth_tried_already;
    struct rbuf rbuf;
  
-  /* Let the others worry about local filename...  */
    if (!(*dt & HEAD_ONLY))
+    /* If we're doing a GET on the URL, as opposed to just a HEAD, we need to
+       know the local filename so we can save to it. */
      assert (u->local != NULL);
  
    authenticate_h = 0;
@@ -328,7 +330,7 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
    /* We need to come back here when the initial attempt to retrieve
       without authorization header fails.  */
  
-  /* Initialize certain elements of struct hstat.  */
+  /* Initialize certain elements of struct http_stat.  */
    hs->len = 0L;
    hs->contlen = -1;
    hs->res = -1;
@@ -382,6 +384,7 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
      path = u->proxy->url;
    else
      path = u->path;
+  
    command = (*dt & HEAD_ONLY) ? "HEAD" : "GET";
    referer = NULL;
    if (ou->referer)
@@ -454,10 +457,22 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
      }
    remhost = ou->host;
    remport = ou->port;
+
+  if (remport == 80)
+    {
+      host_port = NULL;
+      host_port_len = 0;
+    }
+  else
+    {
+      host_port = (char *)alloca (numdigit (remport) + 2);
+      host_port_len = sprintf (host_port, ":%d", remport);
+    }
+
    /* Allocate the memory for the request.  */
    request = (char *)alloca (strlen (command) + strlen (path)
                             + strlen (useragent)
-                           + strlen (remhost) + numdigit (remport)
+                           + strlen (remhost) + host_port_len
                             + strlen (HTTP_ACCEPT)
                             + (referer ? strlen (referer) : 0)
                             + (wwwauth ? strlen (wwwauth) : 0)
@@ -470,16 +485,17 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
    sprintf (request, "\
  %s %s HTTP/1.0\r\n\
  User-Agent: %s\r\n\
-Host: %s:%d\r\n\
+Host: %s%s\r\n\
  Accept: %s\r\n\
  %s%s%s%s%s%s\r\n",
-         command, path, useragent, remhost, remport, HTTP_ACCEPT, 
-         referer ? referer : "", 
-         wwwauth ? wwwauth : "", 
-         proxyauth ? proxyauth : "", 
-         range ? range : "",
-         pragma_h, 
-         opt.user_header ? opt.user_header : "");
+          command, path, useragent, remhost,
+          host_port ? host_port : "",
+          HTTP_ACCEPT, referer ? referer : "",
+          wwwauth ? wwwauth : "", 
+          proxyauth ? proxyauth : "", 
+          range ? range : "",
+          pragma_h, 
+          opt.user_header ? opt.user_header : "");
    DEBUGP (("---request begin---\n%s---request end---\n", request));
     /* Free the temporary memory.  */
    FREE_MAYBE (wwwauth);
@@ -490,7 +506,6 @@ Accept: %s\r\n\
    if (num_written < 0)
      {
        logputs (LOG_VERBOSE, _("Failed writing HTTP request.\n"));
-      free (request);
        CLOSE (sock);
        return WRITEFAILED;
      }
@@ -708,6 +723,26 @@ Accept: %s\r\n\
      /* We don't assume text/html by default.  */
      *dt &= ~TEXTHTML;
  
+  if (opt.html_extension && (*dt & TEXTHTML))
+    /* -E / --html-extension / html_extension = on was specified, and this is a
+       text/html file.  If some case-insensitive variation on ".htm[l]" isn't
+       already the file's suffix, tack on ".html". */
+    {
+      char*  last_period_in_local_filename = strrchr(u->local, '.');
+
+      if (last_period_in_local_filename == NULL ||
+         !(strcasecmp(last_period_in_local_filename, ".htm") == EQ ||
+           strcasecmp(last_period_in_local_filename, ".html") == EQ))
+       {
+         size_t  local_filename_len = strlen(u->local);
+         
+         u->local = xrealloc(u->local, local_filename_len + sizeof(".html"));
+         strcpy(u->local + local_filename_len, ".html");
+
+         *dt |= ADDED_HTML_EXTENSION;
+       }
+    }
+
    if (contrange == -1)
      hs->restval = 0;
    else if (contrange != hs->restval ||
@@ -808,8 +843,17 @@ Accept: %s\r\n\
           return FOPENERR;
         }
      }
-  else                      /* opt.dfp */
-    fp = opt.dfp;
+  else                         /* opt.dfp */
+    {
+      fp = opt.dfp;
+      if (!hs->restval)
+       {
+         /* This will silently fail for streams that don't correspond
+            to regular files, but that's OK.  */
+         rewind (fp);
+         clearerr (fp);
+       }
+    }
  
    /* #### This confuses the code that checks for file size.  There
       should be some overhead information.  */
@@ -821,10 +865,18 @@ Accept: %s\r\n\
                           (contlen != -1 ? contlen : 0),
                           &rbuf);
    hs->dltime = elapsed_time ();
-  if (!opt.dfp)
-    fclose (fp);
-  else
-    fflush (fp);
+  {
+    /* Close or flush the file.  We have to be careful to check for
+       error here.  Checking the result of fwrite() is not enough --
+       errors could go unnoticed!  */
+    int flush_res;
+    if (!opt.dfp)
+      flush_res = fclose (fp);
+    else
+      flush_res = fflush (fp);
+    if (flush_res == EOF)
+      hs->res = -2;
+  }
    FREE_MAYBE (all_headers);
    CLOSE (sock);
    if (hs->res == -2)
@@ -841,17 +893,20 @@ http_loop (struct urlinfo *u, char **newloc, int *dt)
  
    int count;
    int use_ts, got_head = 0;    /* time-stamping info */
+  char *filename_plus_orig_suffix;
+  char *local_filename = NULL;
    char *tms, *suf, *locf, *tmrate;
    uerr_t err;
    time_t tml = -1, tmr = -1;   /* local and remote time-stamps */
    long local_size = 0;         /* the size of the local file */
+  size_t filename_len;
    struct http_stat hstat;      /* HTTP status */
    struct stat st;
  
    *newloc = NULL;
  
    /* Warn on (likely bogus) wildcard usage in HTTP.  Don't use
-     has_wildcards_p because it would also warn on `?', and we that
+     has_wildcards_p because it would also warn on `?', and we know that
       shows up in CGI paths a *lot*.  */
    if (strchr (u->url, '*'))
      logputs (LOG_VERBOSE, _("Warning: wildcards not supported in HTTP.\n"));
@@ -865,6 +920,12 @@ http_loop (struct urlinfo *u, char **newloc, int *dt)
    else
      locf = opt.output_document;
  
+  /* Yuck.  Multiple returns suck.  We need to remember to free() the space we
+     xmalloc() here before EACH return.  This is one reason it's better to set
+     flags that influence flow control and then return once at the end. */
+  filename_len = strlen(u->local);
+  filename_plus_orig_suffix = xmalloc(filename_len + sizeof(".orig"));
+
    if (opt.noclobber && file_exists_p (u->local))
      {
        /* If opt.noclobber is turned on and file already exists, do not
@@ -881,6 +942,7 @@ File `%s' already there, will not retrieve.\n"), u->local);
           && (!strcmp (suf, "html") || !strcmp (suf, "htm")))
         *dt |= TEXTHTML;
        free (suf);
+      free(filename_plus_orig_suffix);  /* must precede every return! */
        /* Another harmless lie: */
        return RETROK;
      }
@@ -888,7 +950,37 @@ File `%s' already there, will not retrieve.\n"), u->local);
    use_ts = 0;
    if (opt.timestamping)
      {
-      if (stat (u->local, &st) == 0)
+      boolean  local_dot_orig_file_exists = FALSE;
+
+      if (opt.backup_converted)
+       /* If -K is specified, we'll act on the assumption that it was specified
+          last time these files were downloaded as well, and instead of just
+          comparing local file X against server file X, we'll compare local
+          file X.orig (if extant, else X) against server file X.  If -K
+          _wasn't_ specified last time, or the server contains files called
+          *.orig, -N will be back to not operating correctly with -k. */
+       {
+         /* Would a single s[n]printf() call be faster? */
+         strcpy(filename_plus_orig_suffix, u->local);
+         strcpy(filename_plus_orig_suffix + filename_len, ".orig");
+
+         /* Try to stat() the .orig file. */
+         if (stat(filename_plus_orig_suffix, &st) == 0)
+           {
+             local_dot_orig_file_exists = TRUE;
+             local_filename = filename_plus_orig_suffix;
+           }
+       }      
+
+      if (!local_dot_orig_file_exists)
+       /* Couldn't stat() <file>.orig, so try to stat() <file>. */
+       if (stat (u->local, &st) == 0)
+         local_filename = u->local;
+
+      if (local_filename != NULL)
+       /* There was a local file, so we'll check later to see if the version
+          the server has is the same version we already have, allowing us to
+          skip a download. */
         {
           use_ts = 1;
           tml = st.st_mtime;
@@ -905,9 +997,20 @@ File `%s' already there, will not retrieve.\n"), u->local);
        /* Increment the pass counter.  */
        ++count;
        /* Wait before the retrieval (unless this is the very first
-        retrieval).  */
-      if (!first_retrieval && opt.wait)
-       sleep (opt.wait);
+        retrieval).
+        Check if we are retrying or not, wait accordingly - HEH */
+      if (!first_retrieval && (opt.wait || (count && opt.waitretry)))
+       {
+         if (count)
+           {
+             if (count<opt.waitretry)
+               sleep(count);
+             else
+               sleep(opt.waitretry);
+           }
+         else
+           sleep (opt.wait);
+       }
        if (first_retrieval)
         first_retrieval = 0;
        /* Get the current time string.  */
@@ -950,6 +1053,16 @@ File `%s' already there, will not retrieve.\n"), u->local);
  
        /* Try fetching the document, or at least its head.  :-) */
        err = gethttp (u, &hstat, dt);
+
+      /* It's unfortunate that wget determines the local filename before finding
+        out the Content-Type of the file.  Barring a major restructuring of the
+        code, we need to re-set locf here, since gethttp() may have xrealloc()d
+        u->local to tack on ".html". */
+      if (!opt.output_document)
+       locf = u->local;
+      else
+       locf = opt.output_document;
+
        /* Time?  */
        tms = time_str (NULL);
        /* Get the new location (with or without the redirection).  */
@@ -970,6 +1083,7 @@ File `%s' already there, will not retrieve.\n"), u->local);
         case HOSTERR: case CONREFUSED: case PROXERR: case AUTHFAILED:
           /* Fatal errors just return from the function.  */
           FREEHSTAT (hstat);
+         free(filename_plus_orig_suffix);  /* must precede every return! */
           return err;
           break;
         case FWRITEERR: case FOPENERR:
@@ -978,6 +1092,7 @@ File `%s' already there, will not retrieve.\n"), u->local);
           logprintf (LOG_NOTQUIET, _("Cannot write to `%s' (%s).\n"),
                      u->local, strerror (errno));
           FREEHSTAT (hstat);
+         free(filename_plus_orig_suffix);  /* must precede every return! */
           return err;
           break;
         case NEWLOCATION:
@@ -987,9 +1102,11 @@ File `%s' already there, will not retrieve.\n"), u->local);
               logprintf (LOG_NOTQUIET,
                          _("ERROR: Redirection (%d) without location.\n"),
                          hstat.statcode);
+             free(filename_plus_orig_suffix);  /* must precede every return! */
               return WRONGCODE;
             }
           FREEHSTAT (hstat);
+         free(filename_plus_orig_suffix);  /* must precede every return! */
           return NEWLOCATION;
           break;
         case RETRFINISHED:
@@ -1012,6 +1129,7 @@ File `%s' already there, will not retrieve.\n"), u->local);
                      tms, hstat.statcode, hstat.error);
           logputs (LOG_VERBOSE, "\n");
           FREEHSTAT (hstat);
+         free(filename_plus_orig_suffix);  /* must precede every return! */
           return WRONGCODE;
         }
  
@@ -1052,13 +1170,15 @@ Last-modified header invalid -- time-stamp ignored.\n"));
                   (hstat.contlen == -1 || local_size == hstat.contlen))
                 {
                   logprintf (LOG_VERBOSE, _("\
-Local file `%s' is more recent, not retrieving.\n\n"), u->local);
+Server file no newer than local file `%s' -- not retrieving.\n\n"),
+                            local_filename);
                   FREEHSTAT (hstat);
+                 free(filename_plus_orig_suffix);/*must precede every return!*/
                   return RETROK;
                 }
               else if (tml >= tmr)
                 logprintf (LOG_VERBOSE, _("\
-The sizes do not match (local %ld), retrieving.\n"), local_size);
+The sizes do not match (local %ld) -- retrieving.\n"), local_size);
               else
                 logputs (LOG_VERBOSE,
                          _("Remote file is newer, retrieving.\n"));
@@ -1081,6 +1201,7 @@ The sizes do not match (local %ld), retrieving.\n"), local_size);
        if (opt.spider)
         {
           logprintf (LOG_NOTQUIET, "%d %s\n\n", hstat.statcode, hstat.error);
+         free(filename_plus_orig_suffix);  /* must precede every return! */
           return RETROK;
         }
  
@@ -1103,12 +1224,20 @@ The sizes do not match (local %ld), retrieving.\n"), local_size);
             }
           ++opt.numurls;
           opt.downloaded += hstat.len;
+
+         /* Remember that we downloaded the file for later ".orig" code. */
+         if (*dt & ADDED_HTML_EXTENSION)
+           downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf);
+         else
+           downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
+
+         free(filename_plus_orig_suffix);  /* must precede every return! */
           return RETROK;
         }
        else if (hstat.res == 0) /* No read error */
         {
-         if (hstat.contlen == -1)  /* We don't know how much we were
-                                      supposed to get, so...  */
+         if (hstat.contlen == -1)  /* We don't know how much we were supposed
+                                      to get, so assume we succeeded. */ 
             {
               if (*dt & RETROKF)
                 {
@@ -1121,6 +1250,14 @@ The sizes do not match (local %ld), retrieving.\n"), local_size);
                 }
               ++opt.numurls;
               opt.downloaded += hstat.len;
+
+             /* Remember that we downloaded the file for later ".orig" code. */
+             if (*dt & ADDED_HTML_EXTENSION)
+               downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf);
+             else
+               downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
+             
+             free(filename_plus_orig_suffix);  /* must precede every return! */
               return RETROK;
             }
           else if (hstat.len < hstat.contlen) /* meaning we lost the
@@ -1142,6 +1279,14 @@ The sizes do not match (local %ld), retrieving.\n"), local_size);
                          tms, u->url, hstat.len, hstat.contlen, locf, count);
               ++opt.numurls;
               opt.downloaded += hstat.len;
+
+             /* Remember that we downloaded the file for later ".orig" code. */
+             if (*dt & ADDED_HTML_EXTENSION)
+               downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf);
+             else
+               downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
+             
+             free(filename_plus_orig_suffix);  /* must precede every return! */
               return RETROK;
             }
           else                  /* the same, but not accepted */
@@ -1177,6 +1322,7 @@ The sizes do not match (local %ld), retrieving.\n"), local_size);
        break;
      }
    while (!opt.ntry || (count < opt.ntry));
+  free(filename_plus_orig_suffix);  /* must precede every return! */
    return TRYLIMEXC;
  }
  \f