X-Git-Url: http://sjero.net/git/?a=blobdiff_plain;f=src%2Fwarc.c;h=25a8517f2b31e848b56bf334b56da24b948f5203;hb=359dd167602071cfa62d6c586ca846ede5ed7c29;hp=57fdcad0766024fde7d0399f630cbe23fbd7c005;hpb=8ac9c05fc03c9f12ae201fda56622f42d78ff697;p=wget diff --git a/src/warc.c b/src/warc.c index 57fdcad0..25a8517f 100644 --- a/src/warc.c +++ b/src/warc.c @@ -75,7 +75,7 @@ static FILE *warc_current_file; #ifdef HAVE_LIBZ /* The gzip stream for the current WARC file (or NULL, if WARC or gzip is disabled). */ -static gzFile *warc_current_gzfile; +static gzFile warc_current_gzfile; /* The offset of the current gzip record in the WARC file. */ static off_t warc_current_gzfile_offset; @@ -245,15 +245,10 @@ static bool warc_write_block_from_file (FILE *data_in) { /* Add the Content-Length header. */ - char *content_length; + char content_length[22]; fseeko (data_in, 0L, SEEK_END); - if (! asprintf (&content_length, "%ld", ftello (data_in))) - { - warc_write_ok = false; - return false; - } + number_to_string (content_length, ftello (data_in)); warc_write_header ("Content-Length", content_length); - free (content_length); /* End of the WARC header section. */ warc_write_string ("\r\n"); @@ -913,6 +908,12 @@ warc_process_cdx_line (char *lineptr, int field_num_original_url, free (record_id); } } + else + { + xfree_null(checksum); + xfree_null(original_url); + xfree_null(record_id); + } } /* Loads the CDX file from opt.warc_cdx_dedup_filename and fills @@ -930,7 +931,7 @@ warc_load_cdx_dedup_file (void) char *lineptr = NULL; size_t n = 0; - size_t line_length; + ssize_t line_length; /* The first line should contain the CDX header. Format: " CDX x x x x x" @@ -999,12 +1000,10 @@ warc_find_duplicate_cdx_record (char *url, char *sha1_digest_payload) if (warc_cdx_dedup_table == NULL) return NULL; - char *key; - struct warc_cdx_record *rec_existing; - hash_table_get_pair (warc_cdx_dedup_table, sha1_digest_payload, &key, - &rec_existing); + struct warc_cdx_record *rec_existing + = hash_table_get (warc_cdx_dedup_table, sha1_digest_payload); - if (rec_existing != NULL && strcmp (rec_existing->url, url) == 0) + if (rec_existing && strcmp (rec_existing->url, url) == 0) return rec_existing; else return NULL; @@ -1226,10 +1225,14 @@ warc_write_cdx_record (const char *url, const char *timestamp_str, if (redirect_location == NULL || strlen(redirect_location) == 0) redirect_location = "-"; + char offset_string[22]; + number_to_string (offset_string, offset); + /* Print the CDX line. */ - fprintf (warc_current_cdx_file, "%s %s %s %s %d %s %s - %ld %s %s\n", url, + fprintf (warc_current_cdx_file, "%s %s %s %s %d %s %s - %s %s %s\n", url, timestamp_str_cdx, url, mime_type, response_code, checksum, - redirect_location, offset, warc_current_filename, response_uuid); + redirect_location, offset_string, warc_current_filename, + response_uuid); fflush (warc_current_cdx_file); return true;