X-Git-Url: http://sjero.net/git/?a=blobdiff_plain;f=src%2Fwarc.c;h=fb506a7f99aee4ad9405dd0aa0207df7130dd042;hb=3ebbd84518281f348659bf8cf2d7bdc25ec4e140;hp=6935aaf1b136144626e92a2080047d5129929e93;hpb=9cc514d21ced5b18c9db1bec418c04a99d3d6c5c;p=wget diff --git a/src/warc.c b/src/warc.c index 6935aaf1..fb506a7f 100644 --- a/src/warc.c +++ b/src/warc.c @@ -75,7 +75,7 @@ static FILE *warc_current_file; #ifdef HAVE_LIBZ /* The gzip stream for the current WARC file (or NULL, if WARC or gzip is disabled). */ -static gzFile *warc_current_gzfile; +static gzFile warc_current_gzfile; /* The offset of the current gzip record in the WARC file. */ static off_t warc_current_gzfile_offset; @@ -245,15 +245,10 @@ static bool warc_write_block_from_file (FILE *data_in) { /* Add the Content-Length header. */ - char *content_length; + char content_length[MAX_INT_TO_STRING_LEN(off_t)]; fseeko (data_in, 0L, SEEK_END); - if (! asprintf (&content_length, "%ld", ftello (data_in))) - { - warc_write_ok = false; - return false; - } + number_to_string (content_length, ftello (data_in)); warc_write_header ("Content-Length", content_length); - free (content_length); /* End of the WARC header section. */ warc_write_string ("\r\n"); @@ -532,7 +527,7 @@ warc_sha1_stream_with_payload (FILE *stream, void *res_block, void *res_payload, static char * warc_base32_sha1_digest (char *sha1_digest) { - // length: "sha1:" + digest + "\0" + /* length: "sha1:" + digest + "\0" */ char *sha1_base32 = malloc (BASE32_LENGTH(SHA1_DIGEST_SIZE) + 1 + 5 ); base32_encode (sha1_digest, SHA1_DIGEST_SIZE, sha1_base32 + 5, BASE32_LENGTH(SHA1_DIGEST_SIZE) + 1); @@ -913,6 +908,12 @@ warc_process_cdx_line (char *lineptr, int field_num_original_url, free (record_id); } } + else + { + xfree_null(checksum); + xfree_null(original_url); + xfree_null(record_id); + } } /* Loads the CDX file from opt.warc_cdx_dedup_filename and fills @@ -999,12 +1000,10 @@ warc_find_duplicate_cdx_record (char *url, char *sha1_digest_payload) if (warc_cdx_dedup_table == NULL) return NULL; - char *key; - struct warc_cdx_record *rec_existing; - hash_table_get_pair (warc_cdx_dedup_table, sha1_digest_payload, &key, - &rec_existing); + struct warc_cdx_record *rec_existing + = hash_table_get (warc_cdx_dedup_table, sha1_digest_payload); - if (rec_existing != NULL && strcmp (rec_existing->url, url) == 0) + if (rec_existing && strcmp (rec_existing->url, url) == 0) return rec_existing; else return NULL; @@ -1226,10 +1225,14 @@ warc_write_cdx_record (const char *url, const char *timestamp_str, if (redirect_location == NULL || strlen(redirect_location) == 0) redirect_location = "-"; + char offset_string[MAX_INT_TO_STRING_LEN(off_t)]; + number_to_string (offset_string, offset); + /* Print the CDX line. */ - fprintf (warc_current_cdx_file, "%s %s %s %s %d %s %s - %ld %s %s\n", url, + fprintf (warc_current_cdx_file, "%s %s %s %s %d %s %s - %s %s %s\n", url, timestamp_str_cdx, url, mime_type, response_code, checksum, - redirect_location, offset, warc_current_filename, response_uuid); + redirect_location, offset_string, warc_current_filename, + response_uuid); fflush (warc_current_cdx_file); return true;