From b014f8fae9291e7504c0cca2dd8b9a0035466c03 Mon Sep 17 00:00:00 2001 From: Micah Cowan Date: Thu, 27 Aug 2009 23:08:58 -0700 Subject: [PATCH] Improved exit status handling. --- ChangeLog | 4 + NEWS | 3 + src/ChangeLog | 25 ++++ src/Makefile.am | 3 +- src/exits.c | 111 ++++++++++++++++++ src/exits.h | 30 +++++ src/http.c | 9 +- src/main.c | 8 +- src/recur.c | 2 +- src/res.c | 2 +- src/retr.c | 22 ++-- src/retr.h | 2 +- src/wget.h | 16 +-- tests/ChangeLog | 11 ++ tests/Test--spider-fail.px | 2 +- ...pider-r--no-content-disposition-trivial.px | 2 +- .../Test--spider-r--no-content-disposition.px | 2 +- ...Test--spider-r-HTTP-Content-Disposition.px | 2 +- tests/Test--spider-r.px | 2 +- tests/Test-O-nonexisting.px | 2 +- tests/Test-cookies-401.px | 2 +- tests/Test-nonexisting-quiet.px | 2 +- tests/WgetTest.pm.in | 1 + 23 files changed, 232 insertions(+), 33 deletions(-) create mode 100644 src/exits.c create mode 100644 src/exits.h diff --git a/ChangeLog b/ChangeLog index dc0f6cd4..0af82930 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,7 @@ +2009-08-27 Micah Cowan + + * NEWS: Mention the changes to exit codes. + 2009-08-27 Micah Cowan * NEWS: Add mention of the NUL characters SSL security fix. diff --git a/NEWS b/NEWS index 87bd21c0..4467817c 100644 --- a/NEWS +++ b/NEWS @@ -32,6 +32,9 @@ are translated from their source encoding to UTF-8 before percent-encoding. IRI support was added by Saint Xavier , as his project for the Google Summer of Code. +** Wget now provides more sensible exit status codes when downloads +don't proceed as expected (see the manual). + ** --default-page option (and associated wgetrc command) added to support alternative default names for index.html. diff --git a/src/ChangeLog b/src/ChangeLog index 03795ef7..7a096ef3 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,28 @@ +2009-08-27 Micah Cowan + + * wget.h (uerr_t): added new VERIFCERTERR code for SSL certificate + problems. Marked exit codes that are defined but never used (at + least, the ones I could find). + + * retr.c, retr.h (retrieve_url): Added a new boolean argument to + determine whether an exit status should be recorded. + (retrieve_from_file): Adjust to new retrieve_url signature. + + * res.c (res_retrieve_file): Don't have retrieve_url record an + exit status for robots.txt. + + * recur.c (retrieve_tree): Adjust to new retrieve_url signature. + + * main.c (main): Use the exit status stored by retrieve_url. + + * http.c (gethttp): Distinguish certificate verification problems + from SSL connection issues. + (http_loop): Handle newly-created VERIFCERTERR error code. + + * exits.c, exits.h: Newly added. + + * Makefile.am (wget_SOURCES): Add exits.c and exits.h. + 2009-08-27 Micah Cowan * http.c (gethttp): Make sure Wget heeds cookies when they diff --git a/src/Makefile.am b/src/Makefile.am index f0da4eee..026ff1c8 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -49,7 +49,8 @@ wget_SOURCES = cmpt.c connect.c convert.c cookies.c ftp.c \ ftp.h gen-md5.h hash.h host.h html-parse.h html-url.h \ http.h http-ntlm.h init.h log.h mswindows.h netrc.h \ options.h progress.h ptimer.h recur.h res.h retr.h \ - spider.h ssl.h sysdep.h url.h utils.h wget.h iri.h + spider.h ssl.h sysdep.h url.h utils.h wget.h iri.h \ + exits.c exits.h nodist_wget_SOURCES = build_info.c version.c EXTRA_wget_SOURCES = mswindows.c iri.c LDADD = $(LIBOBJS) ../lib/libgnu.a @MD5_LDADD@ diff --git a/src/exits.c b/src/exits.c new file mode 100644 index 00000000..254034f7 --- /dev/null +++ b/src/exits.c @@ -0,0 +1,111 @@ +/* Command line parsing. + Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, + 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc. + + This file is part of GNU Wget. + + GNU Wget is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + GNU Wget is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Wget. If not, see . +*/ + +#include "wget.h" +#include "exits.h" + +/* Final exit code possibilities. Exit codes 1 and 2 are reserved + * for situations that lead to direct exits from Wget, not using the + * value of final_exit_status. */ +enum + { + WGET_EXIT_SUCCESS = 0, + + WGET_EXIT_MINIMUM = 3, + WGET_EXIT_IO_FAIL = WGET_EXIT_MINIMUM, + WGET_EXIT_NETWORK_FAIL = 4, + WGET_EXIT_SSL_AUTH_FAIL = 5, + WGET_EXIT_SERVER_AUTH_FAIL = 6, + WGET_EXIT_PROTOCOL_ERROR = 7, + WGET_EXIT_SERVER_ERROR = 8, + + WGET_EXIT_UNKNOWN + }; + +static int final_exit_status = WGET_EXIT_SUCCESS; + +/* XXX: I don't like that newly-added uerr_t codes will doubtless fall + through the craccks, or the fact that we seem to have way more + codes than we know what to do with. Need to go through and sort + through the truly essential codes, and merge the rest with + those. Quite a few are never even used! + + Quite a few of the codes below would have no business being + returned to retrieve_url's caller, but since it's very difficult to + determine which do and which don't, I grab virtually all of them to + be safe. */ +static int +get_status_for_err (uerr_t err) +{ + switch (err) + { + case RETROK: + return WGET_EXIT_SUCCESS; + case FOPENERR: case FOPEN_EXCL_ERR: case FWRITEERR: case WRITEFAILED: + return WGET_EXIT_IO_FAIL; + case NOCONERROR: case HOSTERR: case CONSOCKERR: case CONERROR: + case CONSSLERR: case CONIMPOSSIBLE: case FTPRERR: case FTPINVPASV: + case READERR: case TRYLIMEXC: + return WGET_EXIT_NETWORK_FAIL; + case VERIFCERTERR: + return WGET_EXIT_SSL_AUTH_FAIL; + case FTPLOGINC: case FTPLOGREFUSED: case AUTHFAILED: + return WGET_EXIT_SERVER_AUTH_FAIL; + case HEOF: case HERR: + return WGET_EXIT_PROTOCOL_ERROR; + case WRONGCODE: case FTPPORTERR: case FTPSYSERR: + case FTPNSFOD: case FTPUNKNOWNTYPE: case FTPSRVERR: + case FTPRETRINT: case FTPRESTFAIL: case FTPNOPASV: + case CONTNOTSUPPORTED: case RANGEERR: case RETRBADPATTERN: + case PROXERR: + return WGET_EXIT_SERVER_ERROR; + case URLERROR: case QUOTEXC: case SSLINITFAILED: + default: + return WGET_EXIT_UNKNOWN; + } +} + +/* inform_exit_status + * + * Ensure that Wget's exit status will reflect the problem indicated + * by ERR, unless the exit status has already been set to reflect a more + * important problem. */ +void +inform_exit_status (uerr_t err) +{ + int new_status = get_status_for_err (err); + + if (new_status != WGET_EXIT_SUCCESS + && (final_exit_status == WGET_EXIT_SUCCESS + || new_status < final_exit_status)) + { + final_exit_status = new_status; + } +} + +int +get_exit_status (void) +{ + return + (final_exit_status == WGET_EXIT_UNKNOWN) + ? 1 + : final_exit_status; +} + diff --git a/src/exits.h b/src/exits.h new file mode 100644 index 00000000..94fb76b2 --- /dev/null +++ b/src/exits.h @@ -0,0 +1,30 @@ +/* Internationalization related declarations. + Copyright (C) 2008 Free Software Foundation, Inc. + +This file is part of GNU Wget. + +GNU Wget is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3 of the License, or +(at your option) any later version. + +GNU Wget is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Wget. If not, see . */ + +#ifndef WGET_EXITS_H +#define WGET_EXITS_H + +#include "wget.h" + + +void inform_exit_status (uerr_t err); + +int get_exit_status (void); + + +#endif /* WGET_EXITS_H */ diff --git a/src/http.c b/src/http.c index a8705aa4..1b579bf1 100644 --- a/src/http.c +++ b/src/http.c @@ -1762,11 +1762,16 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy, if (conn->scheme == SCHEME_HTTPS) { - if (!ssl_connect_wget (sock) || !ssl_check_certificate (sock, u->host)) + if (!ssl_connect_wget (sock)) { fd_close (sock); return CONSSLERR; } + else if (!ssl_check_certificate (sock, u->host)) + { + fd_close (sock); + return VERIFCERTERR; + } using_ssl = true; } #endif /* HAVE_SSL */ @@ -2598,7 +2603,7 @@ Spider mode enabled. Check if remote file exists.\n")); logprintf (LOG_NOTQUIET, _("Cannot write to %s (%s).\n"), quote (hstat.local_file), strerror (errno)); case HOSTERR: case CONIMPOSSIBLE: case PROXERR: case AUTHFAILED: - case SSLINITFAILED: case CONTNOTSUPPORTED: + case SSLINITFAILED: case CONTNOTSUPPORTED: case VERIFCERTERR: /* Fatal errors just return from the function. */ ret = err; goto exit; diff --git a/src/main.c b/src/main.c index 5c0b3315..0f059f76 100644 --- a/src/main.c +++ b/src/main.c @@ -44,6 +44,7 @@ as that of the covered work. */ #include #include +#include "exits.h" #include "utils.h" #include "init.h" #include "retr.h" @@ -1289,7 +1290,7 @@ WARNING: Can't reopen standard output in binary mode;\n\ else { status = retrieve_url (url_parsed, *t, &filename, &redirected_URL, - NULL, &dt, opt.recursive, iri); + NULL, &dt, opt.recursive, iri, true); } if (opt.delete_after && file_exists_p(filename)) @@ -1354,10 +1355,7 @@ WARNING: Can't reopen standard output in binary mode;\n\ xfree (url[i]); cleanup (); - if (status == RETROK) - return 0; - else - return 1; + return get_exit_status (); } #endif /* TESTING */ diff --git a/src/recur.c b/src/recur.c index 4e95e869..66ef2e0f 100644 --- a/src/recur.c +++ b/src/recur.c @@ -283,7 +283,7 @@ retrieve_tree (struct url *start_url_parsed, struct iri *pi) struct url *url_parsed = url_parse (url, &url_err, i, true); status = retrieve_url (url_parsed, url, &file, &redirected, referer, - &dt, false, i); + &dt, false, i, true); if (html_allowed && file && status == RETROK && (dt & RETROKF) && (dt & TEXTHTML)) diff --git a/src/res.c b/src/res.c index 4b0ff82b..eb4caf11 100644 --- a/src/res.c +++ b/src/res.c @@ -562,7 +562,7 @@ res_retrieve_file (const char *url, char **file, struct iri *iri) else { err = retrieve_url (url_parsed, robots_url, file, NULL, NULL, NULL, - false, i); + false, i, false); url_free(url_parsed); } diff --git a/src/retr.c b/src/retr.c index b667ca2f..f1b8f955 100644 --- a/src/retr.c +++ b/src/retr.c @@ -39,6 +39,7 @@ as that of the covered work. */ #include #include +#include "exits.h" #include "utils.h" #include "retr.h" #include "progress.h" @@ -611,7 +612,7 @@ static char *getproxy (struct url *); uerr_t retrieve_url (struct url * orig_parsed, const char *origurl, char **file, char **newloc, const char *refurl, int *dt, bool recursive, - struct iri *iri) + struct iri *iri, bool register_status) { uerr_t result; char *url; @@ -668,7 +669,8 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file, xfree (url); xfree (error); RESTORE_POST_DATA; - return PROXERR; + result = PROXERR; + goto bail; } if (proxy_url->scheme != SCHEME_HTTP && proxy_url->scheme != u->scheme) { @@ -676,7 +678,8 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file, url_free (proxy_url); xfree (url); RESTORE_POST_DATA; - return PROXERR; + result = PROXERR; + goto bail; } } @@ -757,7 +760,7 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file, xfree (mynewloc); xfree (error); RESTORE_POST_DATA; - return result; + goto bail; } /* Now mynewloc will become newloc_parsed->url, because if the @@ -779,7 +782,8 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file, xfree (url); xfree (mynewloc); RESTORE_POST_DATA; - return WRONGCODE; + result = WRONGCODE; + goto bail; } xfree (url); @@ -866,6 +870,9 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file, RESTORE_POST_DATA; +bail: + if (register_status) + inform_exit_status (result); return result; } @@ -910,7 +917,7 @@ retrieve_from_file (const char *file, bool html, int *count) opt.base_href = xstrdup (url); status = retrieve_url (url_parsed, url, &input_file, NULL, NULL, &dt, - false, iri); + false, iri, true); if (status != RETROK) return status; @@ -970,7 +977,8 @@ retrieve_from_file (const char *file, bool html, int *count) else status = retrieve_url (parsed_url ? parsed_url : cur_url->url, cur_url->url->url, &filename, - &new_file, NULL, &dt, opt.recursive, tmpiri); + &new_file, NULL, &dt, opt.recursive, tmpiri, + true); if (parsed_url) url_free (parsed_url); diff --git a/src/retr.h b/src/retr.h index 8854b684..07c63593 100644 --- a/src/retr.h +++ b/src/retr.h @@ -54,7 +54,7 @@ char *fd_read_hunk (int, hunk_terminator_t, long, long); char *fd_read_line (int); uerr_t retrieve_url (struct url *, const char *, char **, char **, - const char *, int *, bool, struct iri *); + const char *, int *, bool, struct iri *, bool); uerr_t retrieve_from_file (const char *, bool, int *); const char *retr_rate (wgint, double); diff --git a/src/wget.h b/src/wget.h index 2c313bc8..42694c84 100644 --- a/src/wget.h +++ b/src/wget.h @@ -331,21 +331,23 @@ typedef enum { /* 0 */ NOCONERROR, HOSTERR, CONSOCKERR, CONERROR, CONSSLERR, - CONIMPOSSIBLE, NEWLOCATION, NOTENOUGHMEM, CONPORTERR, CONCLOSED, + CONIMPOSSIBLE, NEWLOCATION, NOTENOUGHMEM /* ! */, + CONPORTERR /* ! */, CONCLOSED /* ! */, /* 10 */ FTPOK, FTPLOGINC, FTPLOGREFUSED, FTPPORTERR, FTPSYSERR, - FTPNSFOD, FTPRETROK, FTPUNKNOWNTYPE, FTPRERR, FTPREXC, + FTPNSFOD, FTPRETROK /* ! */, FTPUNKNOWNTYPE, FTPRERR, FTPREXC /* ! */, /* 20 */ FTPSRVERR, FTPRETRINT, FTPRESTFAIL, URLERROR, FOPENERR, - FOPEN_EXCL_ERR, FWRITEERR, HOK, HLEXC, HEOF, + FOPEN_EXCL_ERR, FWRITEERR, HOK /* ! */, HLEXC /* ! */, HEOF, /* 30 */ - HERR, RETROK, RECLEVELEXC, FTPACCDENIED, WRONGCODE, + HERR, RETROK, RECLEVELEXC, FTPACCDENIED /* ! */, WRONGCODE, FTPINVPASV, FTPNOPASV, CONTNOTSUPPORTED, RETRUNNEEDED, RETRFINISHED, /* 40 */ - READERR, TRYLIMEXC, URLBADPATTERN, FILEBADFILE, RANGEERR, - RETRBADPATTERN, RETNOTSUP, ROBOTSOK, NOROBOTS, PROXERR, + READERR, TRYLIMEXC, URLBADPATTERN /* ! */, FILEBADFILE /* ! */, RANGEERR, + RETRBADPATTERN, RETNOTSUP /* ! */, ROBOTSOK /* ! */, NOROBOTS /* ! */, + PROXERR, /* 50 */ - AUTHFAILED, QUOTEXC, WRITEFAILED, SSLINITFAILED + AUTHFAILED, QUOTEXC, WRITEFAILED, SSLINITFAILED, VERIFCERTERR } uerr_t; /* 2005-02-19 SMS. diff --git a/tests/ChangeLog b/tests/ChangeLog index 9d367e84..f5e4f348 100644 --- a/tests/ChangeLog +++ b/tests/ChangeLog @@ -1,3 +1,14 @@ +2009-08-27 Micah Cowan + + * WgetTest.pm.in (run): Shift the errcode right by 8 binary places. + + * Test--spider-fail.px, Test--spider-r--no-content-disposition.px, + Test--spider-r--no-content-disposition-trivial.px, + Test--spider-r-HTTP-Content-Disposition.px, Test--spider-r.px, + Test-O-nonexisting.px, Test-cookies-401.px, + Test-nonexisting-quiet.px: Adjusted "expected error code"; Wget's + exit codes have changed. + 2009-08-27 Micah Cowan * run-px: Added Test-cookies.px, Test-cookies-401.px diff --git a/tests/Test--spider-fail.px b/tests/Test--spider-fail.px index 6e5c976d..ac8f5e6d 100755 --- a/tests/Test--spider-fail.px +++ b/tests/Test--spider-fail.px @@ -35,7 +35,7 @@ my %urls = ( my $cmdline = $WgetTest::WGETPATH . " --spider http://localhost:{{port}}/nonexistent"; -my $expected_error_code = 256; +my $expected_error_code = 8; my %expected_downloaded_files = ( ); diff --git a/tests/Test--spider-r--no-content-disposition-trivial.px b/tests/Test--spider-r--no-content-disposition-trivial.px index 0bd7d29e..d18be997 100755 --- a/tests/Test--spider-r--no-content-disposition-trivial.px +++ b/tests/Test--spider-r--no-content-disposition-trivial.px @@ -92,7 +92,7 @@ my %urls = ( my $cmdline = $WgetTest::WGETPATH . " --spider -r --no-content-disposition http://localhost:{{port}}/"; -my $expected_error_code = 0; +my $expected_error_code = 8; my %expected_downloaded_files = ( ); diff --git a/tests/Test--spider-r--no-content-disposition.px b/tests/Test--spider-r--no-content-disposition.px index 78beb18d..b4c80ea0 100755 --- a/tests/Test--spider-r--no-content-disposition.px +++ b/tests/Test--spider-r--no-content-disposition.px @@ -93,7 +93,7 @@ my %urls = ( my $cmdline = $WgetTest::WGETPATH . " --spider -r --no-content-disposition http://localhost:{{port}}/"; -my $expected_error_code = 0; +my $expected_error_code = 8; my %expected_downloaded_files = ( ); diff --git a/tests/Test--spider-r-HTTP-Content-Disposition.px b/tests/Test--spider-r-HTTP-Content-Disposition.px index e79152f7..a2cc5741 100755 --- a/tests/Test--spider-r-HTTP-Content-Disposition.px +++ b/tests/Test--spider-r-HTTP-Content-Disposition.px @@ -93,7 +93,7 @@ my %urls = ( my $cmdline = $WgetTest::WGETPATH . " --spider -r http://localhost:{{port}}/"; -my $expected_error_code = 0; +my $expected_error_code = 8; my %expected_downloaded_files = ( ); diff --git a/tests/Test--spider-r.px b/tests/Test--spider-r.px index b32f792d..b7ad76c9 100755 --- a/tests/Test--spider-r.px +++ b/tests/Test--spider-r.px @@ -92,7 +92,7 @@ my %urls = ( my $cmdline = $WgetTest::WGETPATH . " --spider -r http://localhost:{{port}}/"; -my $expected_error_code = 0; +my $expected_error_code = 8; my %expected_downloaded_files = ( ); diff --git a/tests/Test-O-nonexisting.px b/tests/Test-O-nonexisting.px index 60ef7c70..c8df6d72 100755 --- a/tests/Test-O-nonexisting.px +++ b/tests/Test-O-nonexisting.px @@ -26,7 +26,7 @@ my %urls = ( my $cmdline = $WgetTest::WGETPATH . " --quiet -O out http://localhost:{{port}}/nonexistent"; -my $expected_error_code = 256; +my $expected_error_code = 8; my %expected_downloaded_files = ( 'out' => { diff --git a/tests/Test-cookies-401.px b/tests/Test-cookies-401.px index bb0d60e9..e1030c3b 100755 --- a/tests/Test-cookies-401.px +++ b/tests/Test-cookies-401.px @@ -32,7 +32,7 @@ my %urls = ( my $cmdline = $WgetTest::WGETPATH . " -d http://localhost:{{port}}/one.txt" . " http://localhost:{{port}}/two.txt"; -my $expected_error_code = 0; +my $expected_error_code = 6; my %expected_downloaded_files = ( 'two.txt' => { diff --git a/tests/Test-nonexisting-quiet.px b/tests/Test-nonexisting-quiet.px index 04e11587..fa6fa0f2 100755 --- a/tests/Test-nonexisting-quiet.px +++ b/tests/Test-nonexisting-quiet.px @@ -26,7 +26,7 @@ my %urls = ( my $cmdline = $WgetTest::WGETPATH . " --quiet http://localhost:{{port}}/nonexistent"; -my $expected_error_code = 256; +my $expected_error_code = 8; my %expected_downloaded_files = ( ); diff --git a/tests/WgetTest.pm.in b/tests/WgetTest.pm.in index 5cd6769b..c4c0d4d9 100644 --- a/tests/WgetTest.pm.in +++ b/tests/WgetTest.pm.in @@ -88,6 +88,7 @@ sub run { ($cmdline =~ m{^/.*}) ? system ($cmdline) : system ("$self->{_workdir}/../src/$cmdline"); + $errcode >>= 8; # XXX: should handle abnormal error codes. # Shutdown server # if we didn't explicitely kill the server, we would have to call -- 2.39.2