]> sjero.net Git - wget/blobdiff - src/ChangeLog
[svn] Commit my url.c fix (space as unsafe character) and Jan's
[wget] / src / ChangeLog
index 7d74f276afcaeec8aab47ad853ecee3bb2905bb7..d5acdc160e808b3247c843f2cd555c6f40f74585 100644 (file)
@@ -1,3 +1,541 @@
+2000-12-05  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * url.c (init_unsafe_char_table): Reinstate space as an unsafe
+       char.
+
+2000-11-29  John Summerfield  <summer@OS2.ami.com.au>
+
+       * netrc.c (parse_netrc): Get rid of line ending.
+
+2000-11-25  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * ftp.c (ftp_retrieve_list): Undo typo "fix" until resolution by
+       Dan.
+
+2000-11-24  Karl Eichwalder  <ke@suse.de>
+
+       * main.c (print_help): Untabify.
+
+2000-11-23  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * utils.c (xrealloc_debug): Do the unregister/register thing only
+       if the pointer has actually changed.
+       (xmalloc_real): Declare `static' in DEBUG_MALLOC builds.
+       (xfree_real): Ditto.
+       (xrealloc_real): Ditto.
+       (xstrdup_real): Ditto.
+
+2000-11-22  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * ftp.c (getftp): ftp_getaddress() returns a malloc'ed copy of the
+       string; no need to strdup() it.
+       (getftp): Make pwd_len a local variable.
+       (ftp_loop): Free PWD before returning.
+
+       * init.c (cleanup): Free opt.ftp_pass only if it's non-NULL.
+
+2000-11-22  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * all: Use xfree() instead of free.
+
+       * utils.c (xfree): New function.
+
+2000-11-21  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * url.c (convert_links): HTML-quote the converted string.
+
+       * utils.c (html_quote_string): Move here from ftp-ls.c
+       (html_quote_string): Make non-static; declare in utils.h.
+       (html_quote_string): Convert SP to &#32;.
+
+2000-11-21  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * ftp.c (getftp): Reformat Jan's code according to GNU coding
+       standards; remove (debugging?) printf's; use '\0' for the ASCII
+       zero character.  Use alloca() instead of malloc() for
+       inter-function temporary allocations.
+
+2000-11-18  Jan Prikryl  <prikryl@cg.tuwien.ac.at>
+
+       * ftpparse.c, ftpparse.h: New files.
+
+       * ftp-ls.c (ftp_parse_ls): Use ftp_parse_unix_ls for UNIX servers
+       only. Use ftp_parse_nonunix_ls otherwise.
+       (ftp_parse_nonunix_ls): Stub to the ftpparse library handling all
+       exotic FTP servers.
+
+       * ftp.h (stype): New enum, distinguishes UNIX, VMS, and "other"
+       FTP servers.
+
+       * ftp.c: New static wariables host_type, pwd, and pwd_len. 
+       (getftp): Support for VMS. Support for FTP servers that do not
+       place you in the root directory after login.
+       (ftp_retrieve_list): VMS is silent about the real file size, issue
+       a more appropriate message.
+       (ftp_get_listing): Pass host_type to ftp_parse_ls.
+
+       * ftp-basic.c (ftp_pwd, ftp_syst): New functions.
+
+2000-11-21  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * hash.c (hash_table_put): Don't overwrite deleted mappings.
+
+2000-11-21  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * hash.c (find_mapping): New function.
+       (hash_table_get): Use it.
+       (hash_table_get_pair): Ditto.
+       (hash_table_exists): Ditto.
+       (hash_table_remove): Ditto.
+       (hash_table_remove): Really delete the entry if the mapping
+       following LOCATION is empty.
+
+       * utils.c (string_set_add): Check whether the element has existed
+       before.
+
+       * hash.c (hash_table_get_pair): New function.
+
+2000-11-20  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * http.c (http_process_type): Ignore trailing whitespace; use
+       strdupdelim().
+
+       * recur.c (recursive_retrieve): Use the new `convert' field.
+       (convert_all_links): Ditto.
+       (convert_all_links): Don't respect meta_disallow_follow.
+
+       * html-url.c (handle_link): Fill out link_relative_p and
+       link_complete_p.
+
+       * url.h (struct _urlpos): Make elements more readable.
+
+       * recur.c (recursive_retrieve): Call slist_prepend instead of
+       slist_append.
+       (convert_all_links): Call slist_nreverse before iterating through
+       urls_html.
+
+       * utils.c (slist_prepend): New function.
+       (slist_nreverse): Ditto.
+
+2000-11-20  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * http.c (check_end): Constify.
+
+2000-11-20  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * http.c (http_loop): If username and password are known, try the
+       `Basic' authentication scheme by default.
+
+       * connect.h: Declare test_socket_open.
+
+2000-11-20  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * version.c: Bump version to 1.7-dev.
+
+2000-11-20  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * http.c (gethttp): Don't use the return value of sprintf().
+       (gethttp): Inhibit keep-alive if opt.http_keep_alive is 0.
+
+2000-11-20  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * recur.c (recursive_retrieve): Print the "so we don't load"
+       debugging message only if we really don't load.
+
+       * http.c (gethttp): Inhibit keep-alive if proxy is being used.
+       (gethttp): Don't request keep-alive if keep-alive is inhibited.
+
+2000-11-19  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * http.c (gethttp): Make the HTTP persistent connections more
+       robust.
+
+2000-11-19  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * retr.c (get_contents): If use_expected, make sure that the
+       appropriate amount of data is being read.
+
+       * http.c (gethttp): Check for both `Keep-Alive: ...' and
+       `Connection: Keep-Alive'.
+
+       * wget.h (DEBUGP): Call debug_logprintf only if opt.debug is
+       turned on.
+
+2000-11-19  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * http.c (connection_available_p): Use it.
+
+       * connect.c (test_socket_open): New function.
+
+       * http.c (gethttp): Support persistent connections.  Based on the
+       ideas, and partly on code, by Sam Horrocks <sam@daemoninc.com>.
+       (register_persistent): New function.
+       (connection_available_p): Ditto.
+       (invalidate_connection): Ditto.
+
+2000-11-19  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * url.c (convert_links): Handle UREL2ABS case.
+
+       * recur.c (recursive_retrieve): Instead of the list
+       urls_downloaded, use hash tables dl_file_url_map and
+       dl_url_file_map.
+       (convert_all_links): Use them to retrieve data.
+
+       * host.c (clean_hosts): Free the hash tables.
+
+       * main.c (private_initialize): Call host_init().
+
+       * host.c (store_hostaddress): Use a saner, hash table-based data
+       model.
+       (realhost): Ditto.
+       (host_init): Initialize the hash tables.
+
+2000-11-18  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * utils.c (slist_append): Eviscerate NOSORT.  Hash tables are now
+       used for what the sorted slists used to be used for.
+       (slist_contains): Don't rely on the list being sorted.
+       (slist_append): Simplify the code.
+
+       * recur.c (recursive_cleanup): Use free_string_set.
+
+       * utils.c (string_set_add, string_set_exists, string_set_free):
+       New functions for easier freeing of hash tables whose keys are
+       strdup'ed strings.
+
+       * recur.c (recursive_retrieve): Use the hash table functions for
+       storing undesirable URLs.
+
+       * hash.c: New file.
+
+2000-11-17  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * main.c (private_initialize): Call url_init.
+       (main): Call private_initialize.
+
+       * url.c (unsafe_char_table): New table.
+       (UNSAFE_CHAR): Use it.
+       (init_unsafe_char_table): New function.
+       (url_init): New function; call init_unsafe_char_table.
+
+2000-11-15  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * html-url.c (handle_link): Handle HTML fragment identifiers.
+
+       * recur.c (recursive_retrieve): If norobot info is respected and
+       the file is specified not to be followed by robots, respect that.
+
+       * html-url.c (collect_tags_mapper): Handle <meta name=robots
+       content=X>.  For us the important cases are where X is NONE or
+       where X contains NOFOLLOW.
+       (get_urls_html): Propagate that information to the caller.
+
+2000-11-13  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * url.c (convert_links): Unlink the file we might be reading from
+       before writing to it.
+       (convert_links): Use alloca instead of malloc for
+       filename_plus_orig_suffix.
+
+2000-11-10  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * url.c (get_urls_file): Ditto.
+       (convert_links): Ditto.
+
+       * html-url.c (get_urls_html): Use read_file() instead of
+       load_file().
+
+       * utils.c (read_file): New function, instead of the old
+       load_file().
+       (read_file_free): Ditto.
+
+       * url.c (findurl): Search only for the supported protocols.
+       (convert_links): Use fwrite() when writing out a region of
+       characters.
+
+2000-11-10  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * ftp-ls.c: Move html_quote_string and ftp_index here.
+
+       * url.c: Remove get_urls_html, since that's now in html-url.c.
+
+       * html-url.c: New file.
+
+       * html-parse.c: New file.
+
+2000-11-16  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * mswindows.h: Define snprintf and vsnprintf to _snprintf and
+       _vsnprintf respectively.
+
+2000-11-15  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * config.h.in: Do the _XOPEN_SOURCE and _SVID_SOURCE things only
+       on Linux.
+
+2000-11-12  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * host.c (realhost): Add HOST to the list with quality==0 only if
+       it wasn't already there.
+       Based on analysis by Lu Guohan <feng@public.bjnet.edu.cn>.
+
+2000-11-10  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * init.c (run_wgetrc): Don't bother killing off '\r' since
+       pars_line() skips whitespace at end of line anyway.
+       (parse_line): Oops, it didn't.  Now it does.
+
+       * recur.c (parse_robots): Ditto here.
+
+       * ftp-ls.c (ftp_parse_unix_ls): Kill off the newline character
+       manually because read_whole_line no longer does.
+
+       * utils.c (read_whole_line): Rewrite to: a) use less memory
+       (reallocates to needed size after work), b) work faster -->
+       fgets() instead of getc, c) be more correct --> doesn't kill the
+       newline character at the end of line.
+
+2000-11-10  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * init.c (comind): Initialize MAX to array size - 1.
+
+2000-11-08  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * url.c (construct): Changed last_slash[-1] to *(last_slash - 1).
+       Suggested by Edward J. Sabol.
+
+2000-11-08  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * url.c (construct): Handle the case where host name is not
+       followed by a slash.
+
+2000-11-06  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * init.c: commands[] need to be sorted!  ("base" wasn't.)
+
+2000-11-05  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * wget.h (DO_REALLOC_FROM_ALLOCA): Use braces to disambiguate
+       `if'.
+
+2000-11-05  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * url.c (construct): Insert unneeded initialization for the
+       compiler to shut up.
+
+       * config.h.in: Define _XOPEN_SOURCE to 500 to get the prototype
+       for strptime() (*duh*).  Define _SVID_SOURCE to get S_IFLNK which
+       otherwise gets lost when you define _XOPEN_SOURCE.
+
+       * utils.c (touch): Include the file name in the error message.
+       From Debian.
+
+2000-11-05  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * log.c (logvprintf): Use vsnprintf() in all cases.  If necessary,
+       resize the buffer to fit the formated message.  That way, messages
+       of arbitrary size may be printed.
+       (logvprintf): Use saved_append() to optionally log the last
+       several lines of output.
+       (logputs): Ditto.
+       (log_close): Adapt to new data structures.
+       (log_dump): Ditto.
+       (redirect_output): Print messages to stderr, not to stdout.
+
+       * log.c (saved_append_1): New function.  Replaces the old logging
+       system ("log all output until 10M characters") with a new, much
+       more reasonable one ("log last screenful of text").
+       (saved_append): New function; call saved_append_1.
+       (free_log_line): New function.
+
+2000-11-05  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * url.c (construct): Fix comment.
+       (find_last_char): Document.
+
+2000-11-04  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * snprintf.c: New file.
+
+2000-11-03  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * wget.h: If HAVE_STDARG_H is not defined, don't declare argument
+       types to logprintf() and debug_logprintf().
+
+2000-11-02  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * ftp.c (ftp_loop_internal): Hide the password from the URL when
+       printing non-verbose.  Problem spotted by Dariusz Mlynarczyk
+       <darekm@bydg.lomac.com.pl>.
+
+2000-11-02  Junio Hamano  <junio@twinsun.com>
+
+       * ftp-basic.c (ftp_login): Make comparison case-insensitive.
+
+2000-11-02  Tyler Riddle  <triddle@liquidmarket.com>
+
+       * http.c (known_authentication_scheme_p): Recognize NTML
+       authentication.
+       (create_authorization_line): Treat NTML the same as `Basic'.
+
+2000-11-02  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * retr.c (retrieve_url): Free url before returning.
+       (retrieve_url): Free mynewloc before returning.
+       Spotted by Mark A. Mankins <Mankins_Mark@prc.com>.
+
+2000-11-02  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * url.c (parseurl): Remove possible reading past the end of
+       sup_protos[].  Spotted by Mark A. Mankins <Mankins_Mark@prc.com>.
+
+2000-11-01  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * main.c (main): In case of opt.downloaded overflowing, print
+       <overflow> instead of a totally bogus random value.
+
+       * retr.c (retrieve_from_file): Ditto.
+
+       * recur.c (recursive_retrieve): Ditto.
+
+       * main.c (main): Ditto.
+
+       * http.c (http_loop): Ditto.
+
+       * ftp.c (ftp_loop_internal): Use downloaded_increase() instead of
+       `+=', and downloaded_exceeds_quota() instead of the simple-minded
+       check.
+       (ftp_retrieve_list): Ditto.
+       (ftp_retrieve_dirs): Ditto.
+       (ftp_retrieve_glob): Ditto.
+
+       * retr.c (downloaded_increase): New function.  Notice overflows of
+       opt.downloaded.
+       (downloaded_exceeds_quota): Make sure that opt.downloaded is not
+       used if it overflowed.
+
+       * options.h (struct options): New member downloaded_overflow.
+
+2000-11-01  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * wget.h (enum): Remove extra space after last enumeration.
+
+2000-11-01  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * main.c (main): Use legible_very_long() for printing
+       opt.downloaded.
+
+       * utils.c (legible_1): New function that operates on strings and
+       does the brunt of legible()'s work.
+       (legible): Use legible_1().
+       (legible_very_long): New function; dump the argument with
+       sprintf(), and call legible_1().
+
+       * options.h (struct options): Use VERY_LONG_TYPE for
+       opt.downloaded.
+
+       * sysdep.h (VERY_LONG_TYPE): Define it to have a 64-bit or greater
+       type.
+
+       * config.h.in: Make sure that SIZEOF_LONG and SIZEOF_LONG_LONG get
+       defined.  Define HAVE_LONG_LONG if long long is available.
+
+2000-11-01  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * utils.c (long_to_string): Update with a later, better version.
+
+2000-11-01  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * url.c (path_simplify_with_kludge): New function.
+       (path_simplify_with_kludge): Disable it.  Instead...
+       (parse_dir): ...make sure that at this point the right thing is
+       done, i.e. that "query" part of the URL (?...) is always assigned
+       to the file, never to the directory portion of the path.
+
+2000-11-01  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * retr.c (retrieve_url): Detect redirection cycles.
+
+2000-11-01  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * url.c (get_urls_html): Decode HTML entities using
+       html_decode_entities.
+
+       * html.c (htmlfindurl): Don't count the `#' in numeric entities
+       (&#NNN;) as an HTML fragemnt.
+       (html_decode_entities): New function.
+
+2000-11-01  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * html.c (htmlfindurl): Fix recognition of # HTML fragments.
+
+2000-11-01  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * url.c (construct): Rewritten for clarity.  Avoids the
+       unnecessary copying and stack-allocation the old version
+       performed.
+
+2000-10-31  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * ftp.c (getftp): Ditto.
+
+       * http.c (gethttp): Rewind the stream when retrying from scratch.
+
+2000-10-31  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * retr.c (retrieve_url): Use url_concat() to handle relative
+       redirections instead of /ad hoc/ code.
+
+       * url.c (url_concat): New function encapsulating weird
+       construct().
+       (urllen_http_hack): New function.
+       (construct): When constructing new URLs, recognize that `?' does
+       not form part of the file name in HTTP.
+
+2000-10-13  Adrian Aichner  <adrian@xemacs.org>
+
+       * retr.c: Add msec timing support for WINDOWS.
+       * retr.c (reset_timer): GetSystemTime() on WINDOWS.
+       * retr.c (elapsed_time): Calculate delta time to msec on WINDOWS.
+
+2000-10-27  Dan Harkless  <dan-wget@dilvish.speed.net>
+
+       * retr.c (retrieve_url): Manually applied T. Bharath
+       <TBharath@responsenetworks.com>'s patch to get wget to grok
+       illegal relative URL redirects.  Reformatted and re-commented it.
+
+2000-10-23  Dan Harkless  <dan-wget@dilvish.speed.net>
+
+       * connect.c (make_connection and bindport): Manually applied Rob
+       Mayoff <mayoff@dqd.com>'s 1.5.3 patch to add --bind-address,
+       changing coding style to GNU's.
+
+       * ftp.c (ftp_loop_internal): --delete-after wasn't implemented for
+       files downloaded via FTP.  Per a comment, .listing files were not
+       counted towards number of bytes and files downloaded because they're 
+       deleted anyway.  Well, they aren't under -nr, so count them then.
+
+       * init.c: Manually applied Rob Mayoff's 1.5.3 patch to add
+       --bind-address, alphabetizing, changing coding style to GNU's,
+       commenting, and renaming cmd_ip_address() to cmd_address() to
+       imply hostnames also okay.
+               
+       * main.c (main): --delete-after didn't delete the root of the
+       tree.  Ignore --convert-links if --delete-after was specified.
+       Manually applied Rob Mayoff's 1.5.3 patch to add --bind-address,
+       fixing duplicate use of added-since-1.5.3 case value.
+       (print_help): Clarified that --delete-after deletes local files.
+       Rob forgot to add a line for his new --bind-address option.
+               
+       * options.h (struct options): Manually applied Rob Mayoff's patch
+       to add --bind-address (bind_address structure member).
+               
+       * recur.c (recursive_retrieve): Improved comment; added DEBUGP().
+       Ignore --convert-links if --delete-after was specified.
+               
+       * retr.c (retrieve_from_file): Just added a DEBUGP().
+               
 2000-10-19  Dan Harkless  <dan-wget@dilvish.speed.net>
 
        * ftp.c (ftp_loop_internal): downloaded_file() enumerators changed.