X-Git-Url: http://sjero.net/git/?p=wget;a=blobdiff_plain;f=src%2Fwarc.c;h=38ef3a1b0172f733d17da6edbf79c9dde8185c93;hp=911cebd7ee1fbbc2cee11b8b256676717a098901;hb=38a7829dcb4eb5dba28dbf0f05c6a80fea9217f8;hpb=bd4f1e60423c07475db39c979bb4c0c7b7acd22d

diff --git a/src/warc.c b/src/warc.c
index 911cebd7..38ef3a1b 100644
--- a/src/warc.c
+++ b/src/warc.c
@@ -51,10 +51,16 @@ as that of the covered work.  */
 
 #ifndef WINDOWS
 #include <libgen.h>
+#else
+#include <fcntl.h>
 #endif
 
 #include "warc.h"
 
+#ifndef O_TEMPORARY
+#define O_TEMPORARY 0
+#endif
+
 extern char *version_string;
 
 /* Set by main in main.c */
@@ -75,7 +81,7 @@ static FILE *warc_current_file;
 #ifdef HAVE_LIBZ
 /* The gzip stream for the current WARC file
    (or NULL, if WARC or gzip is disabled). */
-static gzFile *warc_current_gzfile;
+static gzFile warc_current_gzfile;
 
 /* The offset of the current gzip record in the WARC file. */
 static off_t warc_current_gzfile_offset;
@@ -102,7 +108,7 @@ static char *warc_current_filename;
 static int warc_current_file_number;
 
 /* The table of CDX records, if deduplication is enabled. */
-struct hash_table * warc_cdx_dedup_table;
+static struct hash_table * warc_cdx_dedup_table;
 
 static bool warc_start_new_file (bool meta);
 
@@ -165,7 +171,7 @@ warc_write_string (const char *str)
 }
 
 
-#define EXTRA_GZIP_HEADER_SIZE 12
+#define EXTRA_GZIP_HEADER_SIZE 14
 #define GZIP_STATIC_HEADER_SIZE  10
 #define FLG_FEXTRA          0x04
 #define OFF_FLG             3
@@ -180,7 +186,7 @@ warc_write_string (const char *str)
    Returns false and set warc_write_ok to false if there
    is an error.  */
 static bool
-warc_write_start_record ()
+warc_write_start_record (void)
 {
   if (!warc_write_ok)
     return false;
@@ -200,7 +206,7 @@ warc_write_start_record ()
          In warc_write_end_record we will fill this space
          with information about the uncompressed and
          compressed size of the record. */
-      fprintf (warc_current_file, "XXXXXXXXXXXX");
+      fseek (warc_current_file, EXTRA_GZIP_HEADER_SIZE, SEEK_CUR);
       fflush (warc_current_file);
 
       /* Start a new GZIP stream. */
@@ -209,7 +215,8 @@ warc_write_start_record ()
 
       if (warc_current_gzfile == NULL)
         {
-          logprintf (LOG_NOTQUIET, _("Error opening GZIP stream to WARC file.\n"));
+          logprintf (LOG_NOTQUIET,
+_("Error opening GZIP stream to WARC file.\n"));
           warc_write_ok = false;
           return false;
         }
@@ -244,15 +251,10 @@ static bool
 warc_write_block_from_file (FILE *data_in)
 {
   /* Add the Content-Length header. */
-  char *content_length;
+  char content_length[MAX_INT_TO_STRING_LEN(off_t)];
   fseeko (data_in, 0L, SEEK_END);
-  if (! asprintf (&content_length, "%ld", ftello (data_in)))
-    {
-      warc_write_ok = false;
-      return false;
-    }
+  number_to_string (content_length, ftello (data_in));
   warc_write_header ("Content-Length", content_length);
-  free (content_length);
 
   /* End of the WARC header section. */
   warc_write_string ("\r\n");
@@ -279,7 +281,7 @@ warc_write_block_from_file (FILE *data_in)
    with the uncompressed and compressed length of the
    record. */
 static bool
-warc_write_end_record ()
+warc_write_end_record (void)
 {
   warc_write_buffer ("\r\n\r\n", 4);
 
@@ -298,12 +300,12 @@ warc_write_end_record ()
 
       /* The WARC standard suggests that we add 'skip length' data in the
          extra header field of the GZIP stream.
-         
+
          In warc_write_start_record we reserved space for this extra header.
          This extra space starts at warc_current_gzfile_offset and fills
          EXTRA_GZIP_HEADER_SIZE bytes.  The static GZIP header starts at
          warc_current_gzfile_offset + EXTRA_GZIP_HEADER_SIZE.
-         
+
          We need to do three things:
          1. Move the static GZIP header to warc_current_gzfile_offset;
          2. Set the FEXTRA flag in the GZIP header;
@@ -317,11 +319,13 @@ warc_write_end_record ()
       off_t compressed_size = warc_current_gzfile_uncompressed_size;
 
       /* Go back to the static GZIP header. */
-      fseeko (warc_current_file, warc_current_gzfile_offset + EXTRA_GZIP_HEADER_SIZE, SEEK_SET);
+      fseeko (warc_current_file, warc_current_gzfile_offset
+              + EXTRA_GZIP_HEADER_SIZE, SEEK_SET);
 
       /* Read the header. */
       char static_header[GZIP_STATIC_HEADER_SIZE];
-      size_t result = fread (static_header, 1, GZIP_STATIC_HEADER_SIZE, warc_current_file);
+      size_t result = fread (static_header, 1, GZIP_STATIC_HEADER_SIZE,
+                             warc_current_file);
       if (result != GZIP_STATIC_HEADER_SIZE)
         {
           warc_write_ok = false;
@@ -331,7 +335,8 @@ warc_write_end_record ()
       /* Set the FEXTRA flag in the flags byte of the header. */
       static_header[OFF_FLG] = static_header[OFF_FLG] | FLG_FEXTRA;
 
-      /* Write the header back to the file, but starting at warc_current_gzfile_offset. */
+      /* Write the header back to the file, but starting at
+         warc_current_gzfile_offset. */
       fseeko (warc_current_file, warc_current_gzfile_offset, SEEK_SET);
       fwrite (static_header, 1, GZIP_STATIC_HEADER_SIZE, warc_current_file);
 
@@ -343,19 +348,23 @@ warc_write_end_record ()
       /* The extra header field identifier for the WARC skip length. */
       extra_header[2]  = 's';
       extra_header[3]  = 'l';
+      /* The size of the field value (8 bytes).  */
+      extra_header[4]  = (8 & 255);
+      extra_header[5]  = ((8 >> 8) & 255);
       /* The size of the uncompressed record.  */
-      extra_header[4]  = (uncompressed_size & 255);
-      extra_header[5]  = (uncompressed_size >> 8) & 255;
-      extra_header[6]  = (uncompressed_size >> 16) & 255;
-      extra_header[7]  = (uncompressed_size >> 24) & 255;
+      extra_header[6]  = (uncompressed_size & 255);
+      extra_header[7]  = (uncompressed_size >> 8) & 255;
+      extra_header[8]  = (uncompressed_size >> 16) & 255;
+      extra_header[9]  = (uncompressed_size >> 24) & 255;
       /* The size of the compressed record.  */
-      extra_header[8]  = (compressed_size & 255);
-      extra_header[9]  = (compressed_size >> 8) & 255;
-      extra_header[10] = (compressed_size >> 16) & 255;
-      extra_header[11] = (compressed_size >> 24) & 255;
+      extra_header[10] = (compressed_size & 255);
+      extra_header[11] = (compressed_size >> 8) & 255;
+      extra_header[12] = (compressed_size >> 16) & 255;
+      extra_header[13] = (compressed_size >> 24) & 255;
 
       /* Write the extra header after the static header. */
-      fseeko (warc_current_file, warc_current_gzfile_offset + GZIP_STATIC_HEADER_SIZE, SEEK_SET);
+      fseeko (warc_current_file, warc_current_gzfile_offset
+              + GZIP_STATIC_HEADER_SIZE, SEEK_SET);
       fwrite (extra_header, 1, EXTRA_GZIP_HEADER_SIZE, warc_current_file);
 
       /* Done, move back to the end of the file. */
@@ -372,7 +381,7 @@ warc_write_end_record ()
    the current WARC record.
    If timestamp is NULL, the current time will be used.  */
 static bool
-warc_write_date_header (char *timestamp)
+warc_write_date_header (const char *timestamp)
 {
   if (timestamp == NULL)
     {
@@ -402,13 +411,14 @@ warc_write_ip_header (ip_address *ip)
    Compute SHA1 message digests for bytes read from STREAM.  The
    digest of the complete file will be written into the 16 bytes
    beginning at RES_BLOCK.
-   
+
    If payload_offset >= 0, a second digest will be calculated of the
    portion of the file starting at payload_offset and continuing to
    the end of the file.  The digest number will be written into the
    16 bytes beginning ad RES_PAYLOAD.  */
 static int
-warc_sha1_stream_with_payload (FILE *stream, void *res_block, void *res_payload, off_t payload_offset)
+warc_sha1_stream_with_payload (FILE *stream, void *res_block, void *res_payload,
+                               off_t payload_offset)
 {
 #define BLOCKSIZE 32768
 
@@ -486,7 +496,8 @@ warc_sha1_stream_with_payload (FILE *stream, void *res_block, void *res_payload,
              have to start with a full block, there may still be some
              bytes left from the previous buffer.  Therefore, we need
              to continue with  sha1_process_bytes.  */
-          sha1_process_bytes (buffer + start_of_payload, BLOCKSIZE - start_of_payload, &ctx_payload);
+          sha1_process_bytes (buffer + start_of_payload,
+                              BLOCKSIZE - start_of_payload, &ctx_payload);
         }
     }
 
@@ -505,7 +516,8 @@ warc_sha1_stream_with_payload (FILE *stream, void *res_block, void *res_payload,
             start_of_payload = 0;
 
           /* Process the payload part of the buffer. */
-          sha1_process_bytes (buffer + start_of_payload, sum - start_of_payload, &ctx_payload);
+          sha1_process_bytes (buffer + start_of_payload,
+                              sum - start_of_payload, &ctx_payload);
         }
     }
 
@@ -524,9 +536,10 @@ warc_sha1_stream_with_payload (FILE *stream, void *res_block, void *res_payload,
 static char *
 warc_base32_sha1_digest (char *sha1_digest)
 {
-  // length: "sha1:" + digest + "\0"
+  /* length: "sha1:" + digest + "\0" */
   char *sha1_base32 = malloc (BASE32_LENGTH(SHA1_DIGEST_SIZE) + 1 + 5 );
-  base32_encode (sha1_digest, SHA1_DIGEST_SIZE, sha1_base32 + 5, BASE32_LENGTH(SHA1_DIGEST_SIZE) + 1);
+  base32_encode (sha1_digest, SHA1_DIGEST_SIZE, sha1_base32 + 5,
+                 BASE32_LENGTH(SHA1_DIGEST_SIZE) + 1);
   memcpy (sha1_base32, "sha1:", 5);
   sha1_base32[BASE32_LENGTH(SHA1_DIGEST_SIZE) + 5] = '\0';
   return sha1_base32;
@@ -547,7 +560,8 @@ warc_write_digest_headers (FILE *file, long payload_offset)
       char sha1_res_payload[SHA1_DIGEST_SIZE];
 
       rewind (file);
-      if (warc_sha1_stream_with_payload (file, sha1_res_block, sha1_res_payload, payload_offset) == 0)
+      if (warc_sha1_stream_with_payload (file, sha1_res_block,
+          sha1_res_payload, payload_offset) == 0)
         {
           char *digest;
 
@@ -633,11 +647,12 @@ warc_uuid_str (char *urn_str)
 
 /* Write a warcinfo record to the current file.
    Updates warc_current_warcinfo_uuid_str. */
-bool
+static bool
 warc_write_warcinfo_record (char *filename)
 {
   /* Write warc-info record as the first record of the file. */
-  /* We add the record id of this info record to the other records in the file. */
+  /* We add the record id of this info record to the other records in the
+     file. */
   warc_current_warcinfo_uuid_str = (char *) malloc (48);
   warc_uuid_str (warc_current_warcinfo_uuid_str);
 
@@ -666,7 +681,8 @@ warc_write_warcinfo_record (char *filename)
 
   fprintf (warc_tmp, "software: Wget/%s (%s)\r\n", version_string, OS_TYPE);
   fprintf (warc_tmp, "format: WARC File Format 1.0\r\n");
-  fprintf (warc_tmp, "conformsTo: http://bibnum.bnf.fr/WARC/WARC_ISO_28500_version1_latestdraft.pdf\r\n");
+  fprintf (warc_tmp,
+"conformsTo: http://bibnum.bnf.fr/WARC/WARC_ISO_28500_version1_latestdraft.pdf\r\n");
   fprintf (warc_tmp, "robots: %s\r\n", (opt.use_robots ? "classic" : "off"));
   fprintf (warc_tmp, "wget-arguments: %s\r\n", program_argstring);
   /* Add the user headers, if any. */
@@ -683,9 +699,7 @@ warc_write_warcinfo_record (char *filename)
   warc_write_end_record ();
 
   if (! warc_write_ok)
-    {
-      logprintf (LOG_NOTQUIET, _("Error writing warcinfo record to WARC file.\n"));
-    }
+    logprintf (LOG_NOTQUIET, _("Error writing warcinfo record to WARC file.\n"));
 
   free (filename_copy);
   free (filename_basename);
@@ -695,7 +709,7 @@ warc_write_warcinfo_record (char *filename)
 
 /* Opens a new WARC file.
    If META is true, generates a filename ending with 'meta.warc.gz'.
-   
+
    This method will:
    1. close the current WARC file (if there is one);
    2. increment warc_current_file_number;
@@ -712,10 +726,9 @@ warc_start_new_file (bool meta)
 
   if (warc_current_file != NULL)
     fclose (warc_current_file);
-  if (warc_current_warcinfo_uuid_str)
-    free (warc_current_warcinfo_uuid_str);
-  if (warc_current_filename)
-    free (warc_current_filename);
+  
+  free (warc_current_warcinfo_uuid_str);
+  free (warc_current_filename);
 
   warc_current_file_number++;
 
@@ -724,17 +737,26 @@ warc_start_new_file (bool meta)
   char *new_filename = malloc (base_filename_length + 1 + 5 + 8 + 1);
   warc_current_filename = new_filename;
 
+#ifdef __VMS
+# define WARC_GZ "warc-gz"
+#else /* def __VMS */
+# define WARC_GZ "warc.gz"
+#endif /* def __VMS [else] */
+
 #ifdef HAVE_LIBZ
-  char *extension = (opt.warc_compression_enabled ? "warc.gz" : "warc");
+  const char *extension = (opt.warc_compression_enabled ? WARC_GZ : "warc");
 #else
-  char *extension = "warc";
+  const char *extension = "warc";
 #endif
 
   /* If max size is enabled, we add a serial number to the file names. */
   if (meta)
     sprintf (new_filename, "%s-meta.%s", opt.warc_filename, extension);
   else if (opt.warc_maxsize > 0)
-    sprintf (new_filename, "%s-%05d.%s", opt.warc_filename, warc_current_file_number, extension);
+    {
+      sprintf (new_filename, "%s-%05d.%s", opt.warc_filename,
+               warc_current_file_number, extension);
+    }
   else
     sprintf (new_filename, "%s.%s", opt.warc_filename, extension);
 
@@ -744,7 +766,8 @@ warc_start_new_file (bool meta)
   warc_current_file = fopen (new_filename, "wb+");
   if (warc_current_file == NULL)
     {
-      logprintf (LOG_NOTQUIET, _("Error opening WARC file %s.\n"), quote (new_filename));
+      logprintf (LOG_NOTQUIET, _("Error opening WARC file %s.\n"),
+                 quote (new_filename));
       return false;
     }
 
@@ -760,7 +783,7 @@ warc_start_new_file (bool meta)
 
 /* Opens the CDX file for output. */
 static bool
-warc_start_cdx_file ()
+warc_start_cdx_file (void)
 {
   int filename_length = strlen (opt.warc_filename);
   char *cdx_filename = alloca (filename_length + 4 + 1);
@@ -794,7 +817,8 @@ warc_start_cdx_file ()
 /* Parse the CDX header and find the field numbers of the original url,
    checksum and record ID fields. */
 static bool
-warc_parse_cdx_header (char *lineptr, int *field_num_original_url, int *field_num_checksum, int *field_num_record_id)
+warc_parse_cdx_header (char *lineptr, int *field_num_original_url,
+                       int *field_num_checksum, int *field_num_record_id)
 {
   *field_num_original_url = -1;
   *field_num_checksum = -1;
@@ -803,7 +827,7 @@ warc_parse_cdx_header (char *lineptr, int *field_num_original_url, int *field_nu
   char *token;
   char *save_ptr;
   token = strtok_r (lineptr, CDX_FIELDSEP, &save_ptr);
-  
+
   if (token != NULL && strcmp (token, "CDX") == 0)
     {
       int field_num = 0;
@@ -836,7 +860,8 @@ warc_parse_cdx_header (char *lineptr, int *field_num_original_url, int *field_nu
 
 /* Parse the CDX record and add it to the warc_cdx_dedup_table hash table. */
 static void
-warc_process_cdx_line (char *lineptr, int field_num_original_url, int field_num_checksum, int field_num_record_id)
+warc_process_cdx_line (char *lineptr, int field_num_original_url,
+                       int field_num_checksum, int field_num_record_id)
 {
   char *original_url = NULL;
   char *checksum = NULL;
@@ -874,13 +899,15 @@ warc_process_cdx_line (char *lineptr, int field_num_original_url, int field_num_
          bytes.  */
       size_t checksum_l;
       char * checksum_v;
-      base32_decode_alloc (checksum, strlen (checksum), &checksum_v, &checksum_l);
+      base32_decode_alloc (checksum, strlen (checksum), &checksum_v,
+                           &checksum_l);
       free (checksum);
 
       if (checksum_v != NULL && checksum_l == SHA1_DIGEST_SIZE)
         {
           /* This is a valid line with a valid checksum. */
-          struct warc_cdx_record * rec = malloc (sizeof (struct warc_cdx_record));
+          struct warc_cdx_record *rec;
+          rec = malloc (sizeof (struct warc_cdx_record));
           rec->url = original_url;
           rec->uuid = record_id;
           memcpy (rec->digest, checksum_v, SHA1_DIGEST_SIZE);
@@ -890,17 +917,22 @@ warc_process_cdx_line (char *lineptr, int field_num_original_url, int field_num_
       else
         {
           free (original_url);
-          if (checksum_v != NULL)
-            free (checksum_v);
+          free (checksum_v);
           free (record_id);
         }
     }
+  else
+    {
+      xfree_null(checksum);
+      xfree_null(original_url);
+      xfree_null(record_id);
+    }
 }
 
 /* Loads the CDX file from opt.warc_cdx_dedup_filename and fills
    the warc_cdx_dedup_table. */
-bool
-warc_load_cdx_dedup_file ()
+static bool
+warc_load_cdx_dedup_file (void)
 {
   FILE *f = fopen (opt.warc_cdx_dedup_filename, "r");
   if (f == NULL)
@@ -912,7 +944,7 @@ warc_load_cdx_dedup_file ()
 
   char *lineptr = NULL;
   size_t n = 0;
-  size_t line_length;
+  ssize_t line_length;
 
   /* The first line should contain the CDX header.
      Format:  " CDX x x x x x"
@@ -921,7 +953,8 @@ warc_load_cdx_dedup_file ()
      'u' (the WARC record id). */
   line_length = getline (&lineptr, &n, f);
   if (line_length != -1)
-    warc_parse_cdx_header (lineptr, &field_num_original_url, &field_num_checksum, &field_num_record_id);
+    warc_parse_cdx_header (lineptr, &field_num_original_url,
+                           &field_num_checksum, &field_num_record_id);
 
   /* If the file contains all three fields, read the complete file. */
   if (field_num_original_url == -1
@@ -929,22 +962,29 @@ warc_load_cdx_dedup_file ()
       || field_num_record_id == -1)
     {
       if (field_num_original_url == -1)
-        logprintf (LOG_NOTQUIET, _("CDX file does not list original urls. (Missing column 'a'.)\n"));
+        logprintf (LOG_NOTQUIET,
+_("CDX file does not list original urls. (Missing column 'a'.)\n"));
       if (field_num_checksum == -1)
-        logprintf (LOG_NOTQUIET, _("CDX file does not list checksums. (Missing column 'k'.)\n"));
+        logprintf (LOG_NOTQUIET,
+_("CDX file does not list checksums. (Missing column 'k'.)\n"));
       if (field_num_record_id == -1)
-        logprintf (LOG_NOTQUIET, _("CDX file does not list record ids. (Missing column 'u'.)\n"));
+        logprintf (LOG_NOTQUIET,
+_("CDX file does not list record ids. (Missing column 'u'.)\n"));
     }
   else
     {
       /* Initialize the table. */
-      warc_cdx_dedup_table = hash_table_new (1000, warc_hash_sha1_digest, warc_cmp_sha1_digest);
+      warc_cdx_dedup_table = hash_table_new (1000, warc_hash_sha1_digest,
+                                             warc_cmp_sha1_digest);
 
       do
         {
           line_length = getline (&lineptr, &n, f);
           if (line_length != -1)
-            warc_process_cdx_line (lineptr, field_num_original_url, field_num_checksum, field_num_record_id);
+            {
+              warc_process_cdx_line (lineptr, field_num_original_url,
+                            field_num_checksum, field_num_record_id);
+            }
 
         }
       while (line_length != -1);
@@ -952,7 +992,8 @@ warc_load_cdx_dedup_file ()
       /* Print results. */
       int nrecords = hash_table_count (warc_cdx_dedup_table);
       logprintf (LOG_VERBOSE, ngettext ("Loaded %d record from CDX.\n\n",
-                                        "Loaded %d records from CDX.\n\n", nrecords),
+                                        "Loaded %d records from CDX.\n\n",
+                                         nrecords),
                               nrecords);
     }
 
@@ -972,11 +1013,10 @@ warc_find_duplicate_cdx_record (char *url, char *sha1_digest_payload)
   if (warc_cdx_dedup_table == NULL)
     return NULL;
 
-  char *key;
-  struct warc_cdx_record *rec_existing;
-  hash_table_get_pair (warc_cdx_dedup_table, sha1_digest_payload, &key, &rec_existing);
+  struct warc_cdx_record *rec_existing
+    = hash_table_get (warc_cdx_dedup_table, sha1_digest_payload);
 
-  if (rec_existing != NULL && strcmp (rec_existing->url, url) == 0)
+  if (rec_existing && strcmp (rec_existing->url, url) == 0)
     return rec_existing;
   else
     return NULL;
@@ -985,7 +1025,7 @@ warc_find_duplicate_cdx_record (char *url, char *sha1_digest_payload)
 /* Initializes the WARC writer (if opt.warc_filename is set).
    This should be called before any WARC record is written. */
 void
-warc_init ()
+warc_init (void)
 {
   warc_write_ok = true;
 
@@ -1005,7 +1045,8 @@ warc_init ()
       warc_manifest_fp = warc_tempfile ();
       if (warc_manifest_fp == NULL)
         {
-          logprintf (LOG_NOTQUIET, _("Could not open temporary WARC manifest file.\n"));
+          logprintf (LOG_NOTQUIET,
+                     _("Could not open temporary WARC manifest file.\n"));
           exit(1);
         }
 
@@ -1014,7 +1055,8 @@ warc_init ()
           warc_log_fp = warc_tempfile ();
           if (warc_log_fp == NULL)
             {
-              logprintf (LOG_NOTQUIET, _("Could not open temporary WARC log file.\n"));
+              logprintf (LOG_NOTQUIET,
+                         _("Could not open temporary WARC log file.\n"));
               exit(1);
             }
           log_set_warc_log_fp (warc_log_fp);
@@ -1031,7 +1073,8 @@ warc_init ()
         {
           if (! warc_start_cdx_file ())
             {
-              logprintf (LOG_NOTQUIET, _("Could not open CDX file for output.\n"));
+              logprintf (LOG_NOTQUIET,
+                         _("Could not open CDX file for output.\n"));
               exit(1);
             }
         }
@@ -1039,8 +1082,8 @@ warc_init ()
 }
 
 /* Writes metadata (manifest, configuration, log file) to the WARC file. */
-void
-warc_write_metadata ()
+static void
+warc_write_metadata (void)
 {
   /* If there are multiple WARC files, the metadata should be written to a separate file. */
   if (opt.warc_maxsize > 0)
@@ -1050,7 +1093,7 @@ warc_write_metadata ()
   warc_uuid_str (manifest_uuid);
 
   fflush (warc_manifest_fp);
-  warc_write_resource_record (manifest_uuid,
+  warc_write_metadata_record (manifest_uuid,
                               "metadata://gnu.org/software/wget/warc/MANIFEST.txt",
                               NULL, NULL, NULL, "text/plain",
                               warc_manifest_fp, -1);
@@ -1065,16 +1108,16 @@ warc_write_metadata ()
   fflush (warc_tmp_fp);
   fprintf (warc_tmp_fp, "%s\n", program_argstring);
 
-  warc_write_resource_record (manifest_uuid,
-                              "metadata://gnu.org/software/wget/warc/wget_arguments.txt",
-                              NULL, NULL, NULL, "text/plain",
+  warc_write_resource_record (NULL,
+                   "metadata://gnu.org/software/wget/warc/wget_arguments.txt",
+                              NULL, manifest_uuid, NULL, "text/plain",
                               warc_tmp_fp, -1);
   /* warc_write_resource_record has closed warc_tmp_fp. */
 
   if (warc_log_fp != NULL)
     {
       warc_write_resource_record (NULL,
-                                  "metadata://gnu.org/software/wget/warc/wget.log",
+                              "metadata://gnu.org/software/wget/warc/wget.log",
                                   NULL, manifest_uuid, NULL, "text/plain",
                                   warc_log_fp, -1);
       /* warc_write_resource_record has closed warc_log_fp. */
@@ -1087,7 +1130,7 @@ warc_write_metadata ()
 /* Finishes the WARC writing.
    This should be called at the end of the program. */
 void
-warc_close ()
+warc_close (void)
 {
   if (warc_current_file != NULL)
     {
@@ -1108,20 +1151,38 @@ warc_close ()
    The temporary file will be created in opt.warc_tempdir.
    Returns the pointer to the temporary file, or NULL. */
 FILE *
-warc_tempfile ()
+warc_tempfile (void)
 {
   char filename[100];
   if (path_search (filename, 100, opt.warc_tempdir, "wget", true) == -1)
     return NULL;
 
-  int fd = mkstemp (filename);
+#ifdef __VMS
+  /* 2013-07-12 SMS.
+   * mkostemp()+unlink()+fdopen() scheme causes trouble on VMS, so use
+   * mktemp() to uniquify the (VMS-style) name, and then use a normal
+   * fopen() with a "create temp file marked for delete" option.
+   */
+  {
+    char *tfn;
+
+    tfn = mktemp (filename);            /* Get unique name from template. */
+    if (tfn == NULL)
+      return NULL;
+    return fopen (tfn, "w+", "fop=tmd");    /* Create auto-delete temp file. */
+  }
+#else /* def __VMS */
+  int fd = mkostemp (filename, O_TEMPORARY);
   if (fd < 0)
     return NULL;
 
+#if !O_TEMPORARY
   if (unlink (filename) < 0)
     return NULL;
+#endif
 
   return fdopen (fd, "wb+");
+#endif /* def __VMS [else] */
 }
 
 
@@ -1134,7 +1195,8 @@ warc_tempfile ()
    Calling this function will close body.
    Returns true on success, false on error. */
 bool
-warc_write_request_record (char *url, char *timestamp_str, char *record_uuid, ip_address *ip, FILE *body, off_t payload_offset)
+warc_write_request_record (char *url, char *timestamp_str, char *record_uuid,
+                           ip_address *ip, FILE *body, off_t payload_offset)
 {
   warc_write_start_record ();
   warc_write_header ("WARC-Type", "request");
@@ -1147,7 +1209,7 @@ warc_write_request_record (char *url, char *timestamp_str, char *record_uuid, ip
   warc_write_digest_headers (body, payload_offset);
   warc_write_block_from_file (body);
   warc_write_end_record ();
-  
+
   fclose (body);
 
   return warc_write_ok;
@@ -1166,7 +1228,11 @@ warc_write_request_record (char *url, char *timestamp_str, char *record_uuid, ip
    response_uuid  is the uuid of the response.
    Returns true on success, false on error. */
 static bool
-warc_write_cdx_record (char *url, char *timestamp_str, char *mime_type, int response_code, char *payload_digest, char *redirect_location, off_t offset, char *warc_filename, char *response_uuid)
+warc_write_cdx_record (const char *url, const char *timestamp_str,
+                       const char *mime_type, int response_code,
+                       const char *payload_digest, const char *redirect_location,
+                       off_t offset, const char *warc_filename _GL_UNUSED,
+                       const char *response_uuid)
 {
   /* Transform the timestamp. */
   char timestamp_str_cdx [15];
@@ -1177,9 +1243,9 @@ warc_write_cdx_record (char *url, char *timestamp_str, char *mime_type, int resp
   memcpy (timestamp_str_cdx + 10, timestamp_str + 14, 2); /* "MM"   ":" */
   memcpy (timestamp_str_cdx + 12, timestamp_str + 17, 2); /* "SS"   "Z" */
   timestamp_str_cdx[14] = '\0';
-  
+
   /* Rewrite the checksum. */
-  char *checksum;
+  const char *checksum;
   if (payload_digest != NULL)
     checksum = payload_digest + 5; /* Skip the "sha1:" */
   else
@@ -1190,8 +1256,14 @@ warc_write_cdx_record (char *url, char *timestamp_str, char *mime_type, int resp
   if (redirect_location == NULL || strlen(redirect_location) == 0)
     redirect_location = "-";
 
+  char offset_string[MAX_INT_TO_STRING_LEN(off_t)];
+  number_to_string (offset_string, offset);
+
   /* Print the CDX line. */
-  fprintf (warc_current_cdx_file, "%s %s %s %s %d %s %s - %ld %s %s\n", url, timestamp_str_cdx, url, mime_type, response_code, checksum, redirect_location, offset, warc_current_filename, response_uuid);
+  fprintf (warc_current_cdx_file, "%s %s %s %s %d %s %s - %s %s %s\n", url,
+           timestamp_str_cdx, url, mime_type, response_code, checksum,
+           redirect_location, offset_string, warc_current_filename,
+           response_uuid);
   fflush (warc_current_cdx_file);
 
   return true;
@@ -1211,7 +1283,9 @@ warc_write_cdx_record (char *url, char *timestamp_str, char *mime_type, int resp
    Calling this function will close body.
    Returns true on success, false on error. */
 static bool
-warc_write_revisit_record (char *url, char *timestamp_str, char *concurrent_to_uuid, char *payload_digest, char *refers_to, ip_address *ip, FILE *body)
+warc_write_revisit_record (char *url, char *timestamp_str,
+                           char *concurrent_to_uuid, char *payload_digest,
+                           char *refers_to, ip_address *ip, FILE *body)
 {
   char revisit_uuid [48];
   warc_uuid_str (revisit_uuid);
@@ -1237,7 +1311,7 @@ warc_write_revisit_record (char *url, char *timestamp_str, char *concurrent_to_u
   warc_write_header ("WARC-Payload-Digest", payload_digest);
   warc_write_block_from_file (body);
   warc_write_end_record ();
-  
+
   fclose (body);
   free (block_digest);
 
@@ -1258,7 +1332,10 @@ warc_write_revisit_record (char *url, char *timestamp_str, char *concurrent_to_u
    Calling this function will close body.
    Returns true on success, false on error. */
 bool
-warc_write_response_record (char *url, char *timestamp_str, char *concurrent_to_uuid, ip_address *ip, FILE *body, off_t payload_offset, char *mime_type, int response_code, char *redirect_location)
+warc_write_response_record (char *url, char *timestamp_str,
+                            char *concurrent_to_uuid, ip_address *ip,
+                            FILE *body, off_t payload_offset, char *mime_type,
+                            int response_code, char *redirect_location)
 {
   char *block_digest = NULL;
   char *payload_digest = NULL;
@@ -1269,15 +1346,20 @@ warc_write_response_record (char *url, char *timestamp_str, char *concurrent_to_
     {
       /* Calculate the block and payload digests. */
       rewind (body);
-      if (warc_sha1_stream_with_payload (body, sha1_res_block, sha1_res_payload, payload_offset) == 0)
+      if (warc_sha1_stream_with_payload (body, sha1_res_block, sha1_res_payload,
+          payload_offset) == 0)
         {
           /* Decide (based on url + payload digest) if we have seen this
              data before. */
-          struct warc_cdx_record *rec_existing = warc_find_duplicate_cdx_record (url, sha1_res_payload);
+          struct warc_cdx_record *rec_existing;
+          rec_existing = warc_find_duplicate_cdx_record (url, sha1_res_payload);
           if (rec_existing != NULL)
             {
+              bool result;
+
               /* Found an existing record. */
-              logprintf (LOG_VERBOSE, _("Found exact match in CDX file. Saving revisit record to WARC.\n"));
+              logprintf (LOG_VERBOSE,
+          _("Found exact match in CDX file. Saving revisit record to WARC.\n"));
 
               /* Remove the payload from the file. */
               if (payload_offset > 0)
@@ -1288,7 +1370,9 @@ warc_write_response_record (char *url, char *timestamp_str, char *concurrent_to_
 
               /* Send the original payload digest. */
               payload_digest = warc_base32_sha1_digest (sha1_res_payload);
-              bool result = warc_write_revisit_record (url, timestamp_str, concurrent_to_uuid, payload_digest, rec_existing->uuid, ip, body);
+              result = warc_write_revisit_record (url, timestamp_str,
+                         concurrent_to_uuid, payload_digest, rec_existing->uuid,
+                         ip, body);
               free (payload_digest);
 
               return result;
@@ -1326,30 +1410,35 @@ warc_write_response_record (char *url, char *timestamp_str, char *concurrent_to_
   if (warc_write_ok && opt.warc_cdx_enabled)
     {
       /* Add this record to the CDX. */
-      warc_write_cdx_record (url, timestamp_str, mime_type, response_code, payload_digest, redirect_location, offset, warc_current_filename, response_uuid);
+      warc_write_cdx_record (url, timestamp_str, mime_type, response_code,
+      payload_digest, redirect_location, offset, warc_current_filename,
+      response_uuid);
     }
 
-  if (block_digest)
-    free (block_digest);
-  if (payload_digest)
-    free (payload_digest);
+  free (block_digest);
+  free (payload_digest);
 
   return warc_write_ok;
 }
 
-/* Writes a resource record to the WARC file.
+/* Writes a resource or metadata record to the WARC file.
+   warc_type  is either "resource" or "metadata",
    resource_uuid  is the uuid of the resource (or NULL),
    url  is the target uri of the resource,
    timestamp_str  is the timestamp (generated with warc_timestamp),
-   concurrent_to_uuid  is the uuid of the request for that generated this resource
-                 (generated with warc_uuid_str) or NULL,
+   concurrent_to_uuid  is the uuid of the record that generated this,
+   resource (generated with warc_uuid_str) or NULL,
    ip  is the ip address of the server (or NULL),
    content_type  is the mime type of the body (or NULL),
    body  is a pointer to a file containing the resource data.
    Calling this function will close body.
    Returns true on success, false on error. */
-bool
-warc_write_resource_record (char *resource_uuid, char *url, char *timestamp_str, char *concurrent_to_uuid, ip_address *ip, char *content_type, FILE *body, off_t payload_offset)
+static bool
+warc_write_record (const char *record_type, char *resource_uuid,
+                 const char *url, const char *timestamp_str,
+                 const char *concurrent_to_uuid,
+                 ip_address *ip, const char *content_type, FILE *body,
+                 off_t payload_offset)
 {
   if (resource_uuid == NULL)
     {
@@ -1361,7 +1450,7 @@ warc_write_resource_record (char *resource_uuid, char *url, char *timestamp_str,
     content_type = "application/octet-stream";
 
   warc_write_start_record ();
-  warc_write_header ("WARC-Type", "resource");
+  warc_write_header ("WARC-Type", record_type);
   warc_write_header ("WARC-Record-ID", resource_uuid);
   warc_write_header ("WARC-Warcinfo-ID", warc_current_warcinfo_uuid_str);
   warc_write_header ("WARC-Concurrent-To", concurrent_to_uuid);
@@ -1372,9 +1461,52 @@ warc_write_resource_record (char *resource_uuid, char *url, char *timestamp_str,
   warc_write_header ("Content-Type", content_type);
   warc_write_block_from_file (body);
   warc_write_end_record ();
-  
+
   fclose (body);
 
   return warc_write_ok;
 }
 
+/* Writes a resource record to the WARC file.
+   resource_uuid  is the uuid of the resource (or NULL),
+   url  is the target uri of the resource,
+   timestamp_str  is the timestamp (generated with warc_timestamp),
+   concurrent_to_uuid  is the uuid of the record that generated this,
+   resource (generated with warc_uuid_str) or NULL,
+   ip  is the ip address of the server (or NULL),
+   content_type  is the mime type of the body (or NULL),
+   body  is a pointer to a file containing the resource data.
+   Calling this function will close body.
+   Returns true on success, false on error. */
+bool
+warc_write_resource_record (char *resource_uuid, const char *url,
+                 const char *timestamp_str, const char *concurrent_to_uuid,
+                 ip_address *ip, const char *content_type, FILE *body,
+                 off_t payload_offset)
+{
+  return warc_write_record ("resource",
+      resource_uuid, url, timestamp_str, concurrent_to_uuid,
+      ip, content_type, body, payload_offset);
+}
+
+/* Writes a metadata record to the WARC file.
+   record_uuid  is the uuid of the record (or NULL),
+   url  is the target uri of the record,
+   timestamp_str  is the timestamp (generated with warc_timestamp),
+   concurrent_to_uuid  is the uuid of the record that generated this,
+   record (generated with warc_uuid_str) or NULL,
+   ip  is the ip address of the server (or NULL),
+   content_type  is the mime type of the body (or NULL),
+   body  is a pointer to a file containing the record data.
+   Calling this function will close body.
+   Returns true on success, false on error. */
+bool
+warc_write_metadata_record (char *record_uuid, const char *url,
+                 const char *timestamp_str, const char *concurrent_to_uuid,
+                 ip_address *ip, const char *content_type, FILE *body,
+                 off_t payload_offset)
+{
+  return warc_write_record ("metadata",
+      record_uuid, url, timestamp_str, concurrent_to_uuid,
+      ip, content_type, body, payload_offset);
+}