X-Git-Url: http://sjero.net/git/?p=wget;a=blobdiff_plain;f=src%2Fftp.c;h=25f05a4c7e3944ea538082637cab693e7e5c349c;hp=827e597e75890e6fa31622aa6aa70e146200889b;hb=38a7829dcb4eb5dba28dbf0f05c6a80fea9217f8;hpb=d5e283b1a75c5f8249300b465b4e7b55130bec49 diff --git a/src/ftp.c b/src/ftp.c index 827e597e..25f05a4c 100644 --- a/src/ftp.c +++ b/src/ftp.c @@ -1,6 +1,7 @@ /* File Transfer Protocol support. - Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, - 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc. + Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, + 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, + Inc. This file is part of GNU Wget. @@ -33,9 +34,8 @@ as that of the covered work. */ #include #include #include -#ifdef HAVE_UNISTD_H -# include -#endif +#include +#include #include #include #include @@ -49,6 +49,7 @@ as that of the covered work. */ #include "netrc.h" #include "convert.h" /* for downloaded_file */ #include "recur.h" /* for INFINITE_RECURSION */ +#include "warc.h" #ifdef __VMS # include "vms.h" @@ -68,7 +69,8 @@ typedef struct int cmd; /* command code */ int csock; /* control connection socket */ double dltime; /* time of the download in msecs */ - enum stype rs; /* remote system reported by ftp server */ + enum stype rs; /* remote system reported by ftp server */ + enum ustype rsu; /* when rs is ST_UNIX, here there are more details */ char *id; /* initial directory */ char *target; /* target file name */ struct url *proxy; /* FTWK-style proxy */ @@ -109,7 +111,7 @@ ftp_expected_bytes (const char *s) } #ifdef ENABLE_IPV6 -/* +/* * This function sets up a passive data connection with the FTP server. * It is merely a wrapper around ftp_epsv, ftp_lpsv and ftp_pasv. */ @@ -124,8 +126,8 @@ ftp_do_pasv (int csock, ip_address *addr, int *port) if (!socket_ip_address (csock, addr, ENDPOINT_PEER)) abort (); - /* If our control connection is over IPv6, then we first try EPSV and then - * LPSV if the former is not supported. If the control connection is over + /* If our control connection is over IPv6, then we first try EPSV and then + * LPSV if the former is not supported. If the control connection is over * IPv4, we simply issue the good old PASV request. */ switch (addr->family) { @@ -154,7 +156,7 @@ ftp_do_pasv (int csock, ip_address *addr, int *port) return err; } -/* +/* * This function sets up an active data connection with the FTP server. * It is merely a wrapper around ftp_eprt, ftp_lprt and ftp_port. */ @@ -167,8 +169,8 @@ ftp_do_port (int csock, int *local_sock) if (!socket_ip_address (csock, &cip, ENDPOINT_PEER)) abort (); - /* If our control connection is over IPv6, then we first try EPRT and then - * LPRT if the former is not supported. If the control connection is over + /* If our control connection is over IPv6, then we first try EPRT and then + * LPRT if the former is not supported. If the control connection is over * IPv4, we simply issue the good old PORT request. */ switch (cip.family) { @@ -237,23 +239,27 @@ static uerr_t ftp_get_listing (struct url *, ccon *, struct fileinfo **); /* Retrieves a file with denoted parameters through opening an FTP connection to the server. It always closes the data connection, - and closes the control connection in case of error. */ + and closes the control connection in case of error. If warc_tmp + is non-NULL, the downloaded data will be written there as well. */ static uerr_t -getftp (struct url *u, wgint *len, wgint restval, ccon *con) +getftp (struct url *u, wgint passed_expected_bytes, wgint *qtyread, + wgint restval, ccon *con, int count, FILE *warc_tmp) { int csock, dtsock, local_sock, res; uerr_t err = RETROK; /* appease the compiler */ FILE *fp; - char *user, *passwd, *respline; - char *tms; - const char *tmrate; + char *respline, *tms; + const char *user, *passwd, *tmrate; int cmd = con->cmd; bool pasv_mode_open = false; wgint expected_bytes = 0; + bool got_expected_bytes = false; bool rest_failed = false; int flags; - wgint rd_size; + wgint rd_size, previous_rd_size = 0; char type_char; + bool try_again; + bool list_a_used = false; assert (con != NULL); assert (con->target != NULL); @@ -265,6 +271,8 @@ getftp (struct url *u, wgint *len, wgint restval, ccon *con) /* Make sure that at least *something* is requested. */ assert ((cmd & (DO_LIST | DO_CWD | DO_RETR | DO_LOGIN)) != 0); + *qtyread = restval; + user = u->user; passwd = u->passwd; search_netrc (u->host, (const char **)&user, (const char **)&passwd, 1); @@ -283,13 +291,6 @@ getftp (struct url *u, wgint *len, wgint restval, ccon *con) { char *host = con->proxy ? con->proxy->host : u->host; int port = con->proxy ? con->proxy->port : u->port; - char *logname = user; - - if (con->proxy) - { - /* If proxy is in use, log in as username@target-site. */ - logname = concat_strings (user, "@", u->host, (char *) 0); - } /* Login to the server: */ @@ -297,10 +298,10 @@ getftp (struct url *u, wgint *len, wgint restval, ccon *con) csock = connect_to_host (host, port); if (csock == E_HOST) - return HOSTERR; + return HOSTERR; else if (csock < 0) - return (retryable_socket_connect_error (errno) - ? CONERROR : CONIMPOSSIBLE); + return (retryable_socket_connect_error (errno) + ? CONERROR : CONIMPOSSIBLE); if (cmd & LEAVE_PENDING) con->csock = csock; @@ -308,14 +309,19 @@ getftp (struct url *u, wgint *len, wgint restval, ccon *con) con->csock = -1; /* Second: Login with proper USER/PASS sequence. */ - logprintf (LOG_VERBOSE, _("Logging in as %s ... "), + logprintf (LOG_VERBOSE, _("Logging in as %s ... "), quotearg_style (escape_quoting_style, user)); if (opt.server_response) logputs (LOG_ALWAYS, "\n"); - err = ftp_login (csock, logname, passwd); - if (con->proxy) - xfree (logname); + { + /* If proxy is in use, log in as username@target-site. */ + char *logname = concat_strings (user, "@", u->host, (char *) 0); + err = ftp_login (csock, logname, passwd); + xfree (logname); + } + else + err = ftp_login (csock, user, passwd); /* FTPRERR, FTPSRVERR, WRITEFAILED, FTPLOGREFUSED, FTPLOGINC */ switch (err) @@ -362,7 +368,7 @@ Error in server response, closing control connection.\n")); /* Third: Get the system type */ if (!opt.server_response) logprintf (LOG_VERBOSE, "==> SYST ... "); - err = ftp_syst (csock, &con->rs); + err = ftp_syst (csock, &con->rs, &con->rsu); /* FTPRERR */ switch (err) { @@ -387,6 +393,44 @@ Error in server response, closing control connection.\n")); if (!opt.server_response && err != FTPSRVERR) logputs (LOG_VERBOSE, _("done. ")); + /* 2013-10-17 Andrea Urbani (matfanjol) + According to the system type I choose which + list command will be used. + If I don't know that system, I will try, the + first time of each session, "LIST -a" and + "LIST". (see __LIST_A_EXPLANATION__ below) */ + switch (con->rs) + { + case ST_VMS: + /* About ST_VMS there is an old note: + 2008-01-29 SMS. For a VMS FTP server, where "LIST -a" may not + fail, but will never do what is desired here, + skip directly to the simple "LIST" command + (assumed to be the last one in the list). */ + DEBUGP (("\nVMS: I know it and I will use \"LIST\" as standard list command\n")); + con->st |= LIST_AFTER_LIST_A_CHECK_DONE; + con->st |= AVOID_LIST_A; + break; + case ST_UNIX: + if (con->rsu == UST_MULTINET) + { + DEBUGP (("\nUNIX MultiNet: I know it and I will use \"LIST\" " + "as standard list command\n")); + con->st |= LIST_AFTER_LIST_A_CHECK_DONE; + con->st |= AVOID_LIST_A; + } + else if (con->rsu == UST_TYPE_L8) + { + DEBUGP (("\nUNIX TYPE L8: I know it and I will use \"LIST -a\" " + "as standard list command\n")); + con->st |= LIST_AFTER_LIST_A_CHECK_DONE; + con->st |= AVOID_LIST; + } + break; + default: + break; + } + /* Fourth: Find the initial ftp directory */ if (!opt.server_response) @@ -425,7 +469,7 @@ Error in server response, closing control connection.\n")); */ /* VMS will report something like "PUB$DEVICE:[INITIAL.FOLDER]". - Convert it to "/INITIAL/FOLDER" */ + Convert it to "/INITIAL/FOLDER" */ if (con->rs == ST_VMS) { char *path = strchr (con->id, '['); @@ -498,10 +542,10 @@ Error in server response, closing control connection.\n")); logputs (LOG_VERBOSE, _("==> CWD not needed.\n")); else { - char *targ; - int cwd_count; - int cwd_end; - int cwd_start; + const char *targ = NULL; + int cwd_count; + int cwd_end; + int cwd_start; char *target = u->dir; @@ -525,7 +569,7 @@ Error in server response, closing control connection.\n")); Why is this wise even on UNIX? It certainly fouls VMS. See below for a more reliable, more universal method. */ - + /* 2008-04-22 MJC. I'm not crazy about it either. I'm informed it's useful for misconfigured servers that have some dirs in the path @@ -599,7 +643,7 @@ Error in server response, closing control connection.\n")); #endif /* 0 */ /* 2004-09-20 SMS. - A relative directory is relative to the initial directory. + A relative directory is relative to the initial directory. Thus, what _is_ useful on VMS (and probably elsewhere) is to CWD to the initial directory (ideally, whatever the server reports, _exactly_, NOT badly UNIX-ixed), and then @@ -613,16 +657,16 @@ Error in server response, closing control connection.\n")); The VMS restriction may be relaxed when the squirrely code above is reformed. */ - if ((con->rs == ST_VMS) && (target[0] != '/')) - { - cwd_start = 0; - DEBUGP (("Using two-step CWD for relative path.\n")); - } - else - { + if ((con->rs == ST_VMS) && (target[0] != '/')) + { + cwd_start = 0; + DEBUGP (("Using two-step CWD for relative path.\n")); + } + else + { /* Go straight to the target. */ - cwd_start = 1; - } + cwd_start = 1; + } /* At least one VMS FTP server (TCPware V5.6-2) can switch to a UNIX emulation mode when given a UNIX-like directory @@ -640,10 +684,10 @@ Error in server response, closing control connection.\n")); Unlike the rest of this block, this particular behavior _is_ VMS-specific, so it gets its own VMS test. */ - if ((con->rs == ST_VMS) && (strchr( target, '/') != NULL)) + if ((con->rs == ST_VMS) && (strchr( target, '/') != NULL)) { cwd_end = 3; - DEBUGP (("Using extra \"CWD []\" step for VMS server.\n")); + DEBUGP (("Using extra \"CWD []\" step for VMS server.\n")); } else { @@ -653,22 +697,22 @@ Error in server response, closing control connection.\n")); /* 2004-09-20 SMS. */ /* Sorry about the deviant indenting. Laziness. */ - for (cwd_count = cwd_start; cwd_count < cwd_end; cwd_count++) - { + for (cwd_count = cwd_start; cwd_count < cwd_end; cwd_count++) + { switch (cwd_count) { case 0: - /* Step one (optional): Go to the initial directory, - exactly as reported by the server. - */ - targ = con->id; + /* Step one (optional): Go to the initial directory, + exactly as reported by the server. + */ + targ = con->id; break; case 1: - /* Step two: Go to the target directory. (Absolute or - relative will work now.) - */ - targ = target; + /* Step two: Go to the target directory. (Absolute or + relative will work now.) + */ + targ = target; break; case 2: @@ -681,12 +725,12 @@ Error in server response, closing control connection.\n")); default: /* Can't happen. */ assert (1); - } + } if (!opt.server_response) logprintf (LOG_VERBOSE, "==> CWD (%d) %s ... ", cwd_count, quotearg_style (escape_quoting_style, target)); - err = ftp_cwd (csock, target); + err = ftp_cwd (csock, targ); /* FTPRERR, WRITEFAILED, FTPNSFOD */ switch (err) { @@ -729,16 +773,16 @@ Error in server response, closing control connection.\n")); else /* do not CWD */ logputs (LOG_VERBOSE, _("==> CWD not required.\n")); - if ((cmd & DO_RETR) && *len == 0) + if ((cmd & DO_RETR) && passed_expected_bytes == 0) { if (opt.verbose) { if (!opt.server_response) - logprintf (LOG_VERBOSE, "==> SIZE %s ... ", + logprintf (LOG_VERBOSE, "==> SIZE %s ... ", quotearg_style (escape_quoting_style, u->file)); } - err = ftp_size (csock, u->file, len); + err = ftp_size (csock, u->file, &expected_bytes); /* FTPRERR */ switch (err) { @@ -751,16 +795,34 @@ Error in server response, closing control connection.\n")); con->csock = -1; return err; case FTPOK: + got_expected_bytes = true; /* Everything is OK. */ break; default: abort (); } if (!opt.server_response) - logprintf (LOG_VERBOSE, *len ? "%s\n" : _("done.\n"), - number_to_static_string (*len)); + { + logprintf (LOG_VERBOSE, "%s\n", + expected_bytes ? + number_to_static_string (expected_bytes) : + _("done.\n")); + } } + if (cmd & DO_RETR && restval > 0 && restval == expected_bytes) + { + /* Server confirms that file has length restval. We should stop now. + Some servers (f.e. NcFTPd) return error when receive REST 0 */ + logputs (LOG_VERBOSE, _("File has already been retrieved.\n")); + fd_close (csock); + con->csock = -1; + return RETRFINISHED; + } + + do + { + try_again = false; /* If anything is to be retrieved, PORT (or PASV) must be sent. */ if (cmd & (DO_LIST | DO_RETR)) { @@ -801,7 +863,7 @@ Error in server response, closing control connection.\n")); } /* switch (err) */ if (err==FTPOK) { - DEBUGP (("trying to connect to %s port %d\n", + DEBUGP (("trying to connect to %s port %d\n", print_address (&passive_addr), passive_port)); dtsock = connect_to_ip (&passive_addr, passive_port, NULL); if (dtsock < 0) @@ -924,42 +986,41 @@ Error in server response, closing control connection.\n")); if (cmd & DO_RETR) { /* If we're in spider mode, don't really retrieve anything except - the directory listing and verify whether the given "file" exists. */ + the directory listing and verify whether the given "file" exists. */ if (opt.spider) { - bool exists = false; - uerr_t res; - struct fileinfo *f; - res = ftp_get_listing (u, con, &f); - /* Set the DO_RETR command flag again, because it gets unset when - calling ftp_get_listing() and would otherwise cause an assertion - failure earlier on when this function gets repeatedly called - (e.g., when recursing). */ - con->cmd |= DO_RETR; - if (res == RETROK) - { - while (f) - { - if (!strcmp (f->name, u->file)) - { - exists = true; - break; - } - f = f->next; - } + bool exists = false; + struct fileinfo *f; + uerr_t _res = ftp_get_listing (u, con, &f); + /* Set the DO_RETR command flag again, because it gets unset when + calling ftp_get_listing() and would otherwise cause an assertion + failure earlier on when this function gets repeatedly called + (e.g., when recursing). */ + con->cmd |= DO_RETR; + if (_res == RETROK) + { + while (f) + { + if (!strcmp (f->name, u->file)) + { + exists = true; + break; + } + f = f->next; + } if (exists) { logputs (LOG_VERBOSE, "\n"); logprintf (LOG_NOTQUIET, _("File %s exists.\n"), quote (u->file)); } - else + else { - logputs (LOG_VERBOSE, "\n"); - logprintf (LOG_NOTQUIET, _("No such file %s.\n"), - quote (u->file)); - } - } + logputs (LOG_VERBOSE, "\n"); + logprintf (LOG_NOTQUIET, _("No such file %s.\n"), + quote (u->file)); + } + } fd_close (csock); con->csock = -1; fd_close (dtsock); @@ -973,7 +1034,7 @@ Error in server response, closing control connection.\n")); { if (restval) logputs (LOG_VERBOSE, "\n"); - logprintf (LOG_VERBOSE, "==> RETR %s ... ", + logprintf (LOG_VERBOSE, "==> RETR %s ... ", quotearg_style (escape_quoting_style, u->file)); } } @@ -1008,10 +1069,6 @@ Error in server response, closing control connection.\n")); fd_close (local_sock); return err; case FTPOK: - if (getenv( "FTP_DELETE") != NULL) - { - err = ftp_dele (csock, u->file); - } break; default: abort (); @@ -1019,7 +1076,9 @@ Error in server response, closing control connection.\n")); if (!opt.server_response) logputs (LOG_VERBOSE, _("done.\n")); - expected_bytes = ftp_expected_bytes (ftp_last_respline); + + if (! got_expected_bytes) + expected_bytes = ftp_expected_bytes (ftp_last_respline); } /* do retrieve */ if (cmd & DO_LIST) @@ -1029,7 +1088,8 @@ Error in server response, closing control connection.\n")); /* As Maciej W. Rozycki (macro@ds2.pg.gda.pl) says, `LIST' without arguments is better than `LIST .'; confirmed by RFC959. */ - err = ftp_list (csock, NULL, con->rs); + err = ftp_list (csock, NULL, con->st&AVOID_LIST_A, con->st&AVOID_LIST, &list_a_used); + /* FTPRERR, WRITEFAILED */ switch (err) { @@ -1065,7 +1125,9 @@ Error in server response, closing control connection.\n")); } if (!opt.server_response) logputs (LOG_VERBOSE, _("done.\n")); - expected_bytes = ftp_expected_bytes (ftp_last_respline); + + if (! got_expected_bytes) + expected_bytes = ftp_expected_bytes (ftp_last_respline); } /* cmd & DO_LIST */ if (!(cmd & (DO_LIST | DO_RETR)) || (opt.spider && !(cmd & DO_LIST))) @@ -1073,11 +1135,11 @@ Error in server response, closing control connection.\n")); /* Some FTP servers return the total length of file after REST command, others just return the remaining size. */ - if (*len && restval && expected_bytes - && (expected_bytes == *len - restval)) + if (passed_expected_bytes && restval && expected_bytes + && (expected_bytes == passed_expected_bytes - restval)) { DEBUGP (("Lying FTP server found, adjusting.\n")); - expected_bytes = *len; + expected_bytes = passed_expected_bytes; } /* If no transmission was required, then everything is OK. */ @@ -1095,7 +1157,7 @@ Error in server response, closing control connection.\n")); } /* Open the file -- if output_stream is set, use it instead. */ - + /* 2005-04-17 SMS. Note that having the output_stream ("-O") file opened in main() (main.c) rather limits the ability in VMS to open the file @@ -1109,14 +1171,14 @@ Error in server response, closing control connection.\n")); #ifdef __VMS char *targ; - targ = ods_conform( con->target); + targ = ods_conform (con->target); if (targ != con->target) { - xfree( con->target); + xfree (con->target); con->target = targ; } #endif /* def __VMS */ - + mkalldirs (con->target); if (opt.backups) rotate_backups (con->target); @@ -1127,15 +1189,27 @@ Error in server response, closing control connection.\n")); Elsewhere, define a constant "binary" flag. Isn't it nice to have distinct text and binary file types? */ -# define BIN_TYPE_TRANSFER (type_char != 'A') +/* 2011-09-30 SMS. + Added listing files to the set of non-"binary" (text, Stream_LF) + files. (Wget works either way, but other programs, like, say, text + editors, work better on listing files which have text attributes.) + Now we use "binary" attributes for a binary ("IMAGE") transfer, + unless "--ftp-stmlf" was specified, and we always use non-"binary" + (text, Stream_LF) attributes for a listing file, or for an ASCII + transfer. + Tidied the VMS-specific BIN_TYPE_xxx macros, and changed the call to + fopen_excl() (restored?) to use BIN_TYPE_FILE instead of "true". +*/ #ifdef __VMS +# define BIN_TYPE_TRANSFER (type_char != 'A') +# define BIN_TYPE_FILE \ + ((!(cmd & DO_LIST)) && BIN_TYPE_TRANSFER && (opt.ftp_stmlf == 0)) # define FOPEN_OPT_ARGS "fop=sqo", "acc", acc_cb, &open_id # define FOPEN_OPT_ARGS_BIN "ctx=bin,stm", "rfm=fix", "mrs=512" FOPEN_OPT_ARGS -# define BIN_TYPE_FILE (BIN_TYPE_TRANSFER && (opt.ftp_stmlf == 0)) #else /* def __VMS */ -# define BIN_TYPE_FILE 1 +# define BIN_TYPE_FILE true #endif /* def __VMS [else] */ - + if (restval && !(con->cmd & DO_LIST)) { #ifdef __VMS @@ -1156,8 +1230,22 @@ Error in server response, closing control connection.\n")); #endif /* def __VMS [else] */ } else if (opt.noclobber || opt.always_rest || opt.timestamping || opt.dirstruct - || opt.output_document) + || opt.output_document || count > 0) { + if (opt.unlink && file_exists_p (con->target)) + { + if (unlink (con->target) < 0) + { + logprintf (LOG_NOTQUIET, "%s: %s\n", con->target, + strerror (errno)); + fd_close (csock); + con->csock = -1; + fd_close (dtsock); + fd_close (local_sock); + return UNLINKERR; + } + } + #ifdef __VMS int open_id; @@ -1177,7 +1265,7 @@ Error in server response, closing control connection.\n")); } else { - fp = fopen_excl (con->target, true); + fp = fopen_excl (con->target, BIN_TYPE_FILE); if (!fp && errno == EEXIST) { /* We cannot just invent a new name and use it (which is @@ -1206,10 +1294,11 @@ Error in server response, closing control connection.\n")); else fp = output_stream; - if (*len) + if (passed_expected_bytes) { - print_length (*len, restval, true); - expected_bytes = *len; /* for fd_read_body's progress bar */ + print_length (passed_expected_bytes, restval, true); + expected_bytes = passed_expected_bytes; + /* for fd_read_body's progress bar */ } else if (expected_bytes) print_length (expected_bytes, restval, false); @@ -1218,11 +1307,10 @@ Error in server response, closing control connection.\n")); flags = 0; if (restval && rest_failed) flags |= rb_skip_startpos; - *len = restval; rd_size = 0; - res = fd_read_body (dtsock, fp, + res = fd_read_body (con->target, dtsock, fp, expected_bytes ? expected_bytes - restval : 0, - restval, &rd_size, len, &con->dltime, flags); + restval, &rd_size, qtyread, &con->dltime, flags, warc_tmp); tms = datetime_str (time (NULL)); tmrate = retr_rate (rd_size, con->dltime); @@ -1233,15 +1321,18 @@ Error in server response, closing control connection.\n")); if (!output_stream || con->cmd & DO_LIST) fclose (fp); - /* If fd_read_body couldn't write to fp, bail out. */ - if (res == -2) + /* If fd_read_body couldn't write to fp or warc_tmp, bail out. */ + if (res == -2 || (warc_tmp != NULL && res == -3)) { logprintf (LOG_NOTQUIET, _("%s: %s, closing control connection.\n"), con->target, strerror (errno)); fd_close (csock); con->csock = -1; fd_close (dtsock); - return FWRITEERR; + if (res == -2) + return FWRITEERR; + else if (res == -3) + return WARC_TMP_FWRITEERR; } else if (res == -1) { @@ -1299,8 +1390,10 @@ Error in server response, closing control connection.\n")); } /* If it was a listing, and opt.server_response is true, print it out. */ - if (opt.server_response && (con->cmd & DO_LIST)) + if (con->cmd & DO_LIST) { + if (opt.server_response) + { /* 2005-02-25 SMS. Much of this work may already have been done, but repeating it should do no damage beyond wasting time. @@ -1323,22 +1416,115 @@ Error in server response, closing control connection.\n")); logprintf (LOG_ALWAYS, "%s: %s\n", con->target, strerror (errno)); else { - char *line; - /* The lines are being read with read_whole_line because of + char *line = NULL; + size_t bufsize = 0; + ssize_t len; + + /* The lines are being read with getline because of no-buffering on opt.lfile. */ - while ((line = read_whole_line (fp)) != NULL) + while ((len = getline (&line, &bufsize, fp)) > 0) { - char *p = strchr (line, '\0'); - while (p > line && (p[-1] == '\n' || p[-1] == '\r')) - *--p = '\0'; - logprintf (LOG_ALWAYS, "%s\n", + while (len > 0 && (line[len - 1] == '\n' || line[len - 1] == '\r')) + line[--len] = '\0'; + logprintf (LOG_ALWAYS, "%s\n", quotearg_style (escape_quoting_style, line)); - xfree (line); } + xfree (line); fclose (fp); } - } /* con->cmd & DO_LIST && server_response */ + } /* server_response */ + + /* 2013-10-17 Andrea Urbani (matfanjol) + < __LIST_A_EXPLANATION__ > + After the SYST command, looks if it knows that system. + If yes, wget will force the use of "LIST" or "LIST -a". + If no, wget will try, only the first time of each session, before the + "LIST -a" command and after the "LIST". + If "LIST -a" works and returns more or equal data of the "LIST", + "LIST -a" will be the standard list command for all the session. + If "LIST -a" fails or returns less data than "LIST" (think on the case + of an existing file called "-a"), "LIST" will be the standard list + command for all the session. + ("LIST -a" is used to get also the hidden files) + */ + if (!(con->st & LIST_AFTER_LIST_A_CHECK_DONE)) + { + /* We still have to check "LIST" after the first "LIST -a" to see + if with "LIST" we get more data than "LIST -a", that means + "LIST -a" returned files/folders with "-a" name. */ + if (con->st & AVOID_LIST_A) + { + /* LIST was used in this cycle. + Let's see the result. */ + if (rd_size > previous_rd_size) + { + /* LIST returns more data than "LIST -a". + "LIST" is the official command to use. */ + con->st |= LIST_AFTER_LIST_A_CHECK_DONE; + DEBUGP (("LIST returned more data than \"LIST -a\": " + "I will use \"LIST\" as standard list command\n")); + } + else if (previous_rd_size > rd_size) + { + /* "LIST -a" returned more data then LIST. + "LIST -a" is the official command to use. */ + con->st |= LIST_AFTER_LIST_A_CHECK_DONE; + con->st |= AVOID_LIST; + con->st &= ~AVOID_LIST_A; + /* Sorry, please, download again the "LIST -a"... */ + try_again = true; + DEBUGP (("LIST returned less data than \"LIST -a\": I will " + "use \"LIST -a\" as standard list command\n")); + } + else + { + /* LIST and "LIST -a" return the same data. */ + if (rd_size == 0) + { + /* Same empty data. We will check both again because + we cannot check if "LIST -a" has returned an empty + folder instead of a folder content. */ + con->st &= ~AVOID_LIST_A; + } + else + { + /* Same data, so, better to take "LIST -a" that + shows also hidden files/folders (when present) */ + con->st |= LIST_AFTER_LIST_A_CHECK_DONE; + con->st |= AVOID_LIST; + con->st &= ~AVOID_LIST_A; + DEBUGP (("LIST returned the same amount of data of " + "\"LIST -a\": I will use \"LIST -a\" as standard " + "list command\n")); + } + } + } + else + { + /* In this cycle "LIST -a" should being used. Is it true? */ + if (list_a_used) + { + /* Yes, it is. + OK, let's save the amount of data and try again + with LIST */ + previous_rd_size = rd_size; + try_again = true; + con->st |= AVOID_LIST_A; + } + else + { + /* No: something happens and LIST was used. + This means "LIST -a" raises an error. */ + con->st |= LIST_AFTER_LIST_A_CHECK_DONE; + con->st |= AVOID_LIST_A; + DEBUGP (("\"LIST -a\" failed: I will use \"LIST\" " + "as standard list command\n")); + } + } + } + } + } while (try_again); return RETRFINISHED; } @@ -1348,15 +1534,20 @@ Error in server response, closing control connection.\n")); This loop either gets commands from con, or (if ON_YOUR_OWN is set), makes them up to retrieve the file given by the URL. */ static uerr_t -ftp_loop_internal (struct url *u, struct fileinfo *f, ccon *con) +ftp_loop_internal (struct url *u, struct fileinfo *f, ccon *con, char **local_file) { int count, orig_lp; - wgint restval, len = 0; + wgint restval, len = 0, qtyread = 0; char *tms, *locf; const char *tmrate = NULL; uerr_t err; struct_stat st; + /* Declare WARC variables. */ + bool warc_enabled = (opt.warc_filename != NULL); + FILE *warc_tmp = NULL; + ip_address *warc_ip = NULL; + /* Get the target, and set the name for the message accordingly. */ if ((f == NULL) && (con->target)) { @@ -1366,7 +1557,7 @@ ftp_loop_internal (struct url *u, struct fileinfo *f, ccon *con) else { /* URL-derived file. Consider "-O file" name. */ - con->target = url_file_name (u); + con->target = url_file_name (u, NULL); if (!opt.output_document) locf = con->target; else @@ -1375,7 +1566,12 @@ ftp_loop_internal (struct url *u, struct fileinfo *f, ccon *con) /* If the output_document was given, then this check was already done and the file didn't exist. Hence the !opt.output_document */ - if (opt.noclobber && !opt.output_document && file_exists_p (con->target)) + + /* If we receive .listing file it is necessary to determine system type of the ftp + server even if opn.noclobber is given. Thus we must ignore opt.noclobber in + order to establish connection with the server and get system type. */ + if (opt.noclobber && !opt.output_document && file_exists_p (con->target) + && !((con->cmd & DO_LIST) && !(con->cmd & DO_RETR))) { logprintf (LOG_VERBOSE, _("File %s already there; not retrieving.\n"), quote (con->target)); @@ -1420,9 +1616,26 @@ ftp_loop_internal (struct url *u, struct fileinfo *f, ccon *con) con->cmd |= DO_CWD; } + /* For file RETR requests, we can write a WARC record. + We record the file contents to a temporary file. */ + if (warc_enabled && (con->cmd & DO_RETR) && warc_tmp == NULL) + { + warc_tmp = warc_tempfile (); + if (warc_tmp == NULL) + return WARC_TMP_FOPENERR; + + if (!con->proxy && con->csock != -1) + { + warc_ip = (ip_address *) alloca (sizeof (ip_address)); + socket_ip_address (con->csock, warc_ip, ENDPOINT_PEER); + } + } + /* Decide whether or not to restart. */ if (con->cmd & DO_LIST) restval = 0; + else if (opt.start_pos >= 0) + restval = opt.start_pos; else if (opt.always_rest && stat (locf, &st) == 0 && S_ISREG (st.st_mode)) @@ -1431,7 +1644,7 @@ ftp_loop_internal (struct url *u, struct fileinfo *f, ccon *con) first attempt to clobber existing data.) */ restval = st.st_size; else if (count > 1) - restval = len; /* start where the previous run left off */ + restval = qtyread; /* start where the previous run left off */ else restval = 0; @@ -1453,11 +1666,14 @@ ftp_loop_internal (struct url *u, struct fileinfo *f, ccon *con) xfree (hurl); } /* Send getftp the proper length, if fileinfo was provided. */ - if (f) + if (f && f->type != FT_SYMLINK) len = f->size; else len = 0; - err = getftp (u, &len, restval, con); + + /* If we are working on a WARC record, getftp should also write + to the warc_tmp file. */ + err = getftp (u, len, &qtyread, restval, con, count, warc_tmp); if (con->csock == -1) con->st &= ~DONE_CWD; @@ -1468,7 +1684,10 @@ ftp_loop_internal (struct url *u, struct fileinfo *f, ccon *con) { case HOSTERR: case CONIMPOSSIBLE: case FWRITEERR: case FOPENERR: case FTPNSFOD: case FTPLOGINC: case FTPNOPASV: case CONTNOTSUPPORTED: + case UNLINKERR: case WARC_TMP_FWRITEERR: /* Fatal errors, give up. */ + if (warc_tmp != NULL) + fclose (warc_tmp); return err; case CONSOCKERR: case CONERROR: case FTPSRVERR: case FTPRERR: case WRITEFAILED: case FTPUNKNOWNTYPE: case FTPSYSERR: @@ -1480,14 +1699,14 @@ ftp_loop_internal (struct url *u, struct fileinfo *f, ccon *con) { /* Re-determine the file name. */ xfree_null (con->target); - con->target = url_file_name (u); + con->target = url_file_name (u, NULL); locf = con->target; } continue; case FTPRETRINT: /* If the control connection was closed, the retrieval will be considered OK if f->size == len. */ - if (!f || len != f->size) + if (!f || qtyread != f->size) { printwhat (count, opt.ntry); continue; @@ -1502,7 +1721,7 @@ ftp_loop_internal (struct url *u, struct fileinfo *f, ccon *con) } tms = datetime_str (time (NULL)); if (!opt.spider) - tmrate = retr_rate (len - restval, con->dltime); + tmrate = retr_rate (qtyread - restval, con->dltime); /* If we get out of the switch above without continue'ing, we've successfully downloaded a file. Remember this fact. */ @@ -1523,7 +1742,7 @@ ftp_loop_internal (struct url *u, struct fileinfo *f, ccon *con) : _("%s (%s) - %s saved [%s]\n\n"), tms, tmrate, write_to_stdout ? "" : quote (locf), - number_to_static_string (len)); + number_to_static_string (qtyread)); } if (!opt.verbose && !opt.quiet) { @@ -1532,18 +1751,31 @@ ftp_loop_internal (struct url *u, struct fileinfo *f, ccon *con) time. */ char *hurl = url_string (u, URL_AUTH_HIDE_PASSWD); logprintf (LOG_NONVERBOSE, "%s URL: %s [%s] -> \"%s\" [%d]\n", - tms, hurl, number_to_static_string (len), locf, count); + tms, hurl, number_to_static_string (qtyread), locf, count); xfree (hurl); } - if ((con->cmd & DO_LIST)) + if (warc_enabled && (con->cmd & DO_RETR)) + { + /* Create and store a WARC resource record for the retrieved file. */ + bool warc_res; + + warc_res = warc_write_resource_record (NULL, u->url, NULL, NULL, + warc_ip, NULL, warc_tmp, -1); + if (! warc_res) + return WARC_ERR; + + /* warc_write_resource_record has also closed warc_tmp. */ + } + + if (con->cmd & DO_LIST) /* This is a directory listing file. */ { if (!opt.remove_listing) /* --dont-remove-listing was specified, so do count this towards the number of bytes and files downloaded. */ { - total_downloaded_bytes += len; + total_downloaded_bytes += qtyread; numurls++; } @@ -1558,10 +1790,10 @@ ftp_loop_internal (struct url *u, struct fileinfo *f, ccon *con) downloaded if they're going to be deleted. People seeding proxies, for instance, may want to know how many bytes and files they've downloaded through it. */ - total_downloaded_bytes += len; + total_downloaded_bytes += qtyread; numurls++; - if (opt.delete_after) + if (opt.delete_after && !input_file_url (opt.input_filename)) { DEBUGP (("\ Removing file due to --delete-after in ftp_loop_internal():\n")); @@ -1576,6 +1808,10 @@ Removing file due to --delete-after in ftp_loop_internal():\n")); con->cmd |= LEAVE_PENDING; else con->cmd &= ~LEAVE_PENDING; + + if (local_file) + *local_file = xstrdup (locf); + return RETROK; } while (!opt.ntry || (count < opt.ntry)); @@ -1604,14 +1840,14 @@ ftp_get_listing (struct url *u, ccon *con, struct fileinfo **f) /* Find the listing file name. We do it by taking the file name of the URL and replacing the last component with the listing file name. */ - uf = url_file_name (u); + uf = url_file_name (u, NULL); lf = file_merge (uf, LIST_FILENAME); xfree (uf); DEBUGP ((_("Using %s as listing tmp file.\n"), quote (lf))); con->target = xstrdup (lf); xfree (lf); - err = ftp_loop_internal (u, NULL, con); + err = ftp_loop_internal (u, NULL, con, NULL); lf = xstrdup (con->target); xfree (con->target); con->target = old_target; @@ -1698,7 +1934,7 @@ ftp_retrieve_list (struct url *u, struct fileinfo *f, ccon *con) ofile = xstrdup (u->file); url_set_file (u, f->name); - con->target = url_file_name (u); + con->target = url_file_name (u, NULL); err = RETROK; dlthis = true; @@ -1806,7 +2042,7 @@ Already have correct symlink %s -> %s\n\n"), else /* opt.retr_symlinks */ { if (dlthis) - err = ftp_loop_internal (u, f, con); + err = ftp_loop_internal (u, f, con, NULL); } /* opt.retr_symlinks */ break; case FT_DIRECTORY: @@ -1817,7 +2053,7 @@ Already have correct symlink %s -> %s\n\n"), case FT_PLAINFILE: /* Call the retrieve loop. */ if (dlthis) - err = ftp_loop_internal (u, f, con); + err = ftp_loop_internal (u, f, con, NULL); break; case FT_UNKNOWN: logprintf (LOG_NOTQUIET, _("%s: unknown/unsupported file type.\n"), @@ -1835,8 +2071,10 @@ Already have correct symlink %s -> %s\n\n"), set_local_file (&actual_target, con->target); - /* If downloading a plain file, set valid (non-zero) permissions. */ - if (dlthis && (actual_target != NULL) && (f->type == FT_PLAINFILE)) + /* If downloading a plain file, and the user requested it, then + set valid (non-zero) permissions. */ + if (dlthis && (actual_target != NULL) && + (f->type == FT_PLAINFILE) && opt.preserve_perm) { if (f->perms) chmod (actual_target, f->perms); @@ -1849,7 +2087,8 @@ Already have correct symlink %s -> %s\n\n"), original. :( */ if (actual_target != NULL) { - if (!(f->type == FT_SYMLINK && !opt.retr_symlinks) + if (opt.useservertimestamps + && !(f->type == FT_SYMLINK && !opt.retr_symlinks) && f->tstamp != -1 && dlthis && file_exists_p (con->target)) @@ -1868,7 +2107,9 @@ Already have correct symlink %s -> %s\n\n"), xfree (ofile); /* Break on fatals. */ - if (err == QUOTEXC || err == HOSTERR || err == FWRITEERR) + if (err == QUOTEXC || err == HOSTERR || err == FWRITEERR + || err == WARC_ERR || err == WARC_TMP_FOPENERR + || err == WARC_TMP_FWRITEERR) break; con->cmd &= ~ (DO_CWD | DO_LOGIN); f = f->next; @@ -2033,7 +2274,7 @@ ftp_retrieve_glob (struct url *u, ccon *con, int action) if (matchres == -1) { logprintf (LOG_NOTQUIET, _("Error matching %s against %s: %s\n"), - u->file, quotearg_style (escape_quoting_style, f->name), + u->file, quotearg_style (escape_quoting_style, f->name), strerror (errno)); break; } @@ -2050,8 +2291,22 @@ ftp_retrieve_glob (struct url *u, ccon *con, int action) } else if (action == GLOB_GETONE) { +#ifdef __VMS + /* 2009-09-09 SMS. + * Odd-ball compiler ("HP C V7.3-009 on OpenVMS Alpha V7.3-2") + * bug causes spurious %CC-E-BADCONDIT complaint with this + * "?:" statement. (Different linkage attributes for strcmp() + * and strcasecmp().) Converting to "if" changes the + * complaint to %CC-W-PTRMISMATCH on "cmp = strcmp;". Adding + * the senseless type cast clears the complaint, and looks + * harmless. + */ + int (*cmp) (const char *, const char *) + = opt.ignore_case ? strcasecmp : (int (*)())strcmp; +#else /* def __VMS */ int (*cmp) (const char *, const char *) = opt.ignore_case ? strcasecmp : strcmp; +#endif /* def __VMS [else] */ f = start; while (f) { @@ -2065,7 +2320,7 @@ ftp_retrieve_glob (struct url *u, ccon *con, int action) if (start) { /* Just get everything. */ - ftp_retrieve_list (u, start, con); + res = ftp_retrieve_list (u, start, con); } else { @@ -2081,7 +2336,7 @@ ftp_retrieve_glob (struct url *u, ccon *con, int action) { /* Let's try retrieving it anyway. */ con->st |= ON_YOUR_OWN; - res = ftp_loop_internal (u, NULL, con); + res = ftp_loop_internal (u, NULL, con, NULL); return res; } @@ -2094,15 +2349,15 @@ ftp_retrieve_glob (struct url *u, ccon *con, int action) if (opt.quota && total_downloaded_bytes > opt.quota) return QUOTEXC; else - /* #### Should we return `res' here? */ - return RETROK; + return res; } /* The wrapper that calls an appropriate routine according to contents of URL. Inherently, its capabilities are limited on what can be encoded into a URL. */ uerr_t -ftp_loop (struct url *u, int *dt, struct url *proxy, bool recursive, bool glob) +ftp_loop (struct url *u, char **local_file, int *dt, struct url *proxy, + bool recursive, bool glob) { ccon con; /* FTP connection */ uerr_t res; @@ -2132,7 +2387,7 @@ ftp_loop (struct url *u, int *dt, struct url *proxy, bool recursive, bool glob) char *filename = (opt.output_document ? xstrdup (opt.output_document) : (con.target ? xstrdup (con.target) - : url_file_name (u))); + : url_file_name (u, NULL))); res = ftp_index (filename, u, f); if (res == FTPOK && opt.verbose) { @@ -2172,16 +2427,16 @@ ftp_loop (struct url *u, int *dt, struct url *proxy, bool recursive, bool glob) file_part = u->path; ispattern = has_wildcards_p (file_part); } - if (ispattern || recursive || opt.timestamping) + if (ispattern || recursive || opt.timestamping || opt.preserve_perm) { /* ftp_retrieve_glob is a catch-all function that gets called - if we need globbing, time-stamping or recursion. Its - third argument is just what we really need. */ + if we need globbing, time-stamping, recursion or preserve + permissions. Its third argument is just what we really need. */ res = ftp_retrieve_glob (u, &con, ispattern ? GLOB_GLOBALL : GLOB_GETONE); } else - res = ftp_loop_internal (u, NULL, &con); + res = ftp_loop_internal (u, NULL, &con, local_file); } if (res == FTPOK) res = RETROK;