X-Git-Url: http://sjero.net/git/?a=blobdiff_plain;f=src%2Fhttp.c;h=77f9797232cdb648dde8b641c4eb2d023ca0e7bb;hb=8566a727674ab3c2b0df03c31c6085a0d5d5bf81;hp=4752ce3d7eec4ef7910681f63929aa513a22a27b;hpb=46c94e5f262351556f9559148cfad57cccbeec3f;p=wget

diff --git a/src/http.c b/src/http.c
index 4752ce3d..77f97972 100644
--- a/src/http.c
+++ b/src/http.c
@@ -1,5 +1,5 @@
 /* HTTP support.
-   Copyright (C) 1996-2005 Free Software Foundation, Inc.
+   Copyright (C) 1996-2006 Free Software Foundation, Inc.
 
 This file is part of GNU Wget.
 
@@ -757,7 +757,7 @@ print_server_response (const struct response *resp, const char *prefix)
         --e;
       /* This is safe even on printfs with broken handling of "%.<n>s"
          because resp->headers ends with \0.  */
-      logprintf (LOG_VERBOSE, "%s%.*s\n", prefix, e - b, b);
+      logprintf (LOG_VERBOSE, "%s%.*s\n", prefix, (int) (e - b), b);
     }
 }
 
@@ -932,6 +932,23 @@ extract_param (const char **source, param_token *name, param_token *value,
 #undef MAX
 #define MAX(p, q) ((p) > (q) ? (p) : (q))
 
+/* Parse the contents of the `Content-Disposition' header, extracting
+   the information useful to Wget.  Content-Disposition is a header
+   borrowed from MIME; when used in HTTP, it typically serves for
+   specifying the desired file name of the resource.  For example:
+
+       Content-Disposition: attachment; filename="flora.jpg"
+
+   Wget will skip the tokens it doesn't care about, such as
+   "attachment" in the previous example; it will also skip other
+   unrecognized params.  If the header is syntactically correct and
+   contains a file name, a copy of the file name is stored in
+   *filename and true is returned.  Otherwise, the function returns
+   false.
+
+   The file name is stripped of directory components and must not be
+   empty.  */
+
 static bool
 parse_content_disposition (const char *hdr, char **filename)
 {
@@ -1709,33 +1726,49 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
 
   /* Determine the local filename if needed. Notice that if -O is used 
    * hstat.local_file is set by http_loop to the argument of -O. */
-  if (!hs->local_file)     
+  if (!hs->local_file)
     {
       /* Honor Content-Disposition whether possible. */
-      if (!resp_header_copy (resp, "Content-Disposition", hdrval, sizeof (hdrval))
+      if (!opt.content_disposition
+          || !resp_header_copy (resp, "Content-Disposition", 
+                                hdrval, sizeof (hdrval))
           || !parse_content_disposition (hdrval, &hs->local_file))
         {
-          /* Choose filename according to URL name. */
+          /* The Content-Disposition header is missing or broken. 
+           * Choose unique file name according to given URL. */
           hs->local_file = url_file_name (u);
         }
     }
   
+  DEBUGP (("hs->local_file is: %s %s\n", hs->local_file,
+          file_exists_p (hs->local_file) ? "(existing)" : "(not existing)"));
+  
   /* TODO: perform this check only once. */
-  if (opt.noclobber && file_exists_p (hs->local_file))
+  if (file_exists_p (hs->local_file))
     {
-      /* If opt.noclobber is turned on and file already exists, do not
-         retrieve the file */
-      logprintf (LOG_VERBOSE, _("\
+      if (opt.noclobber)
+        {
+          /* If opt.noclobber is turned on and file already exists, do not
+             retrieve the file */
+          logprintf (LOG_VERBOSE, _("\
 File `%s' already there; not retrieving.\n\n"), hs->local_file);
-      /* If the file is there, we suppose it's retrieved OK.  */
-      *dt |= RETROKF;
+          /* If the file is there, we suppose it's retrieved OK.  */
+          *dt |= RETROKF;
 
-      /* #### Bogusness alert.  */
-      /* If its suffix is "html" or "htm" or similar, assume text/html.  */
-      if (has_html_suffix_p (hs->local_file))
-        *dt |= TEXTHTML;
+          /* #### Bogusness alert.  */
+          /* If its suffix is "html" or "htm" or similar, assume text/html.  */
+          if (has_html_suffix_p (hs->local_file))
+            *dt |= TEXTHTML;
 
-      return RETROK;
+          return RETROK;
+        }
+      else
+        {
+          char *unique = unique_name (hs->local_file, true);
+          if (unique != hs->local_file)
+            xfree (hs->local_file);
+          hs->local_file = unique;
+        }
     }
 
   /* Support timestamping */
@@ -1981,11 +2014,13 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file);
      content-type.  */
   if (!type ||
         0 == strncasecmp (type, TEXTHTML_S, strlen (TEXTHTML_S)) ||
-        0 == strncasecmp (type, TEXTXHTML_S, strlen (TEXTXHTML_S)))
+        0 == strncasecmp (type, TEXTXHTML_S, strlen (TEXTXHTML_S)))    
     *dt |= TEXTHTML;
   else
     *dt &= ~TEXTHTML;
 
+  DEBUGP (("TEXTHTML is %s.\n", *dt | TEXTHTML ? "on": "off"));
+
   if (opt.html_extension && (*dt & TEXTHTML))
     /* -E / --html-extension / html_extension = on was specified, and this is a
        text/html file.  If some case-insensitive variation on ".htm[l]" isn't
@@ -2104,13 +2139,6 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file);
       return RETRFINISHED;
     }
 
-  /* Print fetch message, if opt.verbose.  */
-  if (opt.verbose)
-    {
-      logprintf (LOG_NOTQUIET, _("Saving to: `%s'\n"), 
-                 HYPHENP (hs->local_file) ? "STDOUT" : hs->local_file);
-    }
-    
   /* Open the local file.  */
   if (!output_stream)
     {
@@ -2147,6 +2175,13 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file);
   else
     fp = output_stream;
 
+  /* Print fetch message, if opt.verbose.  */
+  if (opt.verbose)
+    {
+      logprintf (LOG_NOTQUIET, _("Saving to: `%s'\n"), 
+                 HYPHENP (hs->local_file) ? "STDOUT" : hs->local_file);
+    }
+    
   /* This confuses the timestamping code that checks for file size.
      #### The timestamping code should be smarter about file size.  */
   if (opt.save_headers && hs->restval == 0)
@@ -2245,7 +2280,7 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
       sleep_between_retrievals (count);
       
       /* Get the current time string.  */
-      tms = time_str (NULL);
+      tms = time_str (time (NULL));
       
       /* Print fetch message, if opt.verbose.  */
       if (opt.verbose)
@@ -2274,7 +2309,7 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
       /* Default document type is empty.  However, if spider mode is
          on or time-stamping is employed, HEAD_ONLY commands is
          encoded within *dt.  */
-      if (opt.spider || (opt.timestamping && !got_head))
+      if ((opt.spider && !opt.recursive) || (opt.timestamping && !got_head))
         *dt |= HEAD_ONLY;
       else
         *dt &= ~HEAD_ONLY;
@@ -2309,7 +2344,7 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
       err = gethttp (u, &hstat, dt, proxy);
 
       /* Time?  */
-      tms = time_str (NULL);
+      tms = time_str (time (NULL));
       
       /* Get the new location (with or without the redirection).  */
       if (hstat.newloc)
@@ -2365,20 +2400,26 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
           /* All possibilities should have been exhausted.  */
           abort ();
         }
-      
+     
       if (!(*dt & RETROKF))
         {
+          char *hurl = NULL;
           if (!opt.verbose)
             {
               /* #### Ugly ugly ugly! */
-              char *hurl = url_string (u, true);
+              hurl = url_string (u, true);
               logprintf (LOG_NONVERBOSE, "%s:\n", hurl);
-              xfree (hurl);
+            }
+          if (opt.spider && opt.recursive)
+            {
+              if (!hurl) hurl = url_string (u, true);
+              nonexisting_url (hurl, referer);
             }
           logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"),
                      tms, hstat.statcode, escnonprint (hstat.error));
           logputs (LOG_VERBOSE, "\n");
           ret = WRONGCODE;
+          xfree_null (hurl);
           goto exit;
         }
 
@@ -2444,7 +2485,7 @@ The sizes do not match (local %s) -- retrieving.\n"),
         }
       
       if ((tmr != (time_t) (-1))
-          && !opt.spider
+          && (!opt.spider || opt.recursive)
           && ((hstat.len == hstat.contlen) ||
               ((hstat.res == 0) && (hstat.contlen == -1))))
         {
@@ -2463,7 +2504,7 @@ The sizes do not match (local %s) -- retrieving.\n"),
         }
       /* End of time-stamping section. */
 
-      if (opt.spider)
+      if (opt.spider && !opt.recursive)
         {
           logprintf (LOG_NOTQUIET, "%d %s\n\n", hstat.statcode,
                      escnonprint (hstat.error));
@@ -2929,7 +2970,7 @@ http_cleanup (void)
 
 #ifdef TESTING
 
-char *
+const char *
 test_parse_content_disposition()
 {
   int i;
@@ -2953,8 +2994,6 @@ test_parse_content_disposition()
                  res == test_array[i].result
                  && (res == false 
                      || 0 == strcmp (test_array[i].filename, filename)));
-
-      /* printf ("test %d: %s\n", i, res == false ? "false" : filename); */
     }
 
   return NULL;