From c3835a425a2b441dd741c7edc1684310141fb385 Mon Sep 17 00:00:00 2001 From: Andrea Urbani Date: Wed, 23 Oct 2013 07:30:14 +0200 Subject: [PATCH] "LIST" or "LIST -a" ftp command according to the remote system --- src/ChangeLog | 19 +++ src/ftp-basic.c | 48 +++++-- src/ftp.c | 150 +++++++++++++++++++- src/ftp.h | 29 +++- tests/ChangeLog | 27 ++++ tests/FTPServer.pm | 96 ++++++++++--- tests/Test-ftp-list-Multinet.px | 67 +++++++++ tests/Test-ftp-list-UNIX-hidden.px | 65 +++++++++ tests/Test-ftp-list-Unknown-a.px | 77 ++++++++++ tests/Test-ftp-list-Unknown-hidden.px | 69 +++++++++ tests/Test-ftp-list-Unknown-list-a-fails.px | 62 ++++++++ tests/Test-ftp-list-Unknown.px | 65 +++++++++ tests/run-px | 6 + 13 files changed, 745 insertions(+), 35 deletions(-) create mode 100644 tests/Test-ftp-list-Multinet.px create mode 100644 tests/Test-ftp-list-UNIX-hidden.px create mode 100644 tests/Test-ftp-list-Unknown-a.px create mode 100644 tests/Test-ftp-list-Unknown-hidden.px create mode 100644 tests/Test-ftp-list-Unknown-list-a-fails.px create mode 100644 tests/Test-ftp-list-Unknown.px diff --git a/src/ChangeLog b/src/ChangeLog index cdeb55e1..566db745 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,22 @@ +2013-10-17 Andrea Urbani + + * ftp.c (getftp): force "LIST" or "LIST -a" + according to the remote system type. If the remote + system is not known, it tries, only the first time, + "LIST -a", after "LIST" and decides which one to use. + For more information look for "__LIST_A_EXPLANATION__" + * ftp.h (enum ustype): New ustype enum. + * ftp.h (ftp_syst): New enum ustype *unix_type parameter. + * ftp.h (ftp_list): Removed enum stype rs parameter, added. + bool avoid_list_a, bool avoid_list, bool *list_a_used + parameters. + * ftp.h (wget_ftp_fstatus): New AVOID_LIST_A, AVOID_LIST, + LIST_AFTER_LIST_A_CHECK_DONE values. + * ftp-basic.c (ftp_list): it handles the new avoid_list_a, + avoid_list and list_a_used parameters. + * ftp.h (ftp_syst): it stores information about the "215 UNIX" + systems into the new unix_type parameter. + 2013-10-26 Bykov Aleksey * utils.c (match_tail): Fix cookies reject diff --git a/src/ftp-basic.c b/src/ftp-basic.c index 045d125e..7a512c6e 100644 --- a/src/ftp-basic.c +++ b/src/ftp-basic.c @@ -960,23 +960,32 @@ ftp_retr (int csock, const char *file) /* Sends the LIST command to the server. If FILE is NULL, send just `LIST' (no space). */ uerr_t -ftp_list (int csock, const char *file, enum stype rs) +ftp_list (int csock, const char *file, bool avoid_list_a, bool avoid_list, + bool *list_a_used) { char *request, *respline; int nwritten; uerr_t err; bool ok = false; size_t i = 0; - /* Try `LIST -a' first and revert to `LIST' in case of failure. */ + + *list_a_used = false; + + /* 2013-10-12 Andrea Urbani (matfanjol) + For more information about LIST and "LIST -a" please look at ftp.c, + function getftp, text "__LIST_A_EXPLANATION__". + + If somebody changes the following commands, please, checks also the + later "i" variable. */ const char *list_commands[] = { "LIST -a", "LIST" }; - /* 2008-01-29 SMS. For a VMS FTP server, where "LIST -a" may not - fail, but will never do what is desired here, skip directly to the - simple "LIST" command (assumed to be the last one in the list). - */ - if (rs == ST_VMS) - i = countof (list_commands)- 1; + if (avoid_list_a) + { + i = countof (list_commands)- 1; + DEBUGP (("(skipping \"LIST -a\")")); + } + do { /* Send request. */ @@ -1000,6 +1009,8 @@ ftp_list (int csock, const char *file, enum stype rs) { err = FTPOK; ok = true; + /* Which list command was used? */ + *list_a_used = (i == 0); } else { @@ -1008,6 +1019,12 @@ ftp_list (int csock, const char *file, enum stype rs) xfree (respline); } ++i; + if ((avoid_list) && (i == 1)) + { + /* I skip LIST */ + ++i; + DEBUGP (("(skipping \"LIST\")")); + } } while (i < countof (list_commands) && !ok); return err; @@ -1015,7 +1032,7 @@ ftp_list (int csock, const char *file, enum stype rs) /* Sends the SYST command to the server. */ uerr_t -ftp_syst (int csock, enum stype *server_type) +ftp_syst (int csock, enum stype *server_type, enum ustype *unix_type) { char *request, *respline; int nwritten; @@ -1048,12 +1065,23 @@ ftp_syst (int csock, enum stype *server_type) first word of the server response)? */ request = strtok (NULL, " "); + *unix_type = UST_OTHER; + if (request == NULL) *server_type = ST_OTHER; else if (!strcasecmp (request, "VMS")) *server_type = ST_VMS; else if (!strcasecmp (request, "UNIX")) - *server_type = ST_UNIX; + { + *server_type = ST_UNIX; + /* 2013-10-17 Andrea Urbani (matfanjol) + I check more in depth the system type */ + if (!strncasecmp (ftp_last_respline, "215 UNIX Type: L8", 17)) + *unix_type = UST_TYPE_L8; + else if (!strncasecmp (ftp_last_respline, + "215 UNIX MultiNet Unix Emulation V5.3(93)", 41)) + *unix_type = UST_MULTINET; + } else if (!strcasecmp (request, "WINDOWS_NT") || !strcasecmp (request, "WINDOWS2000")) *server_type = ST_WINNT; diff --git a/src/ftp.c b/src/ftp.c index 1fe2bac7..c2522ca1 100644 --- a/src/ftp.c +++ b/src/ftp.c @@ -70,6 +70,7 @@ typedef struct int csock; /* control connection socket */ double dltime; /* time of the download in msecs */ enum stype rs; /* remote system reported by ftp server */ + enum ustype rsu; /* when rs is ST_UNIX, here there are more details */ char *id; /* initial directory */ char *target; /* target file name */ struct url *proxy; /* FTWK-style proxy */ @@ -255,8 +256,10 @@ getftp (struct url *u, wgint passed_expected_bytes, wgint *qtyread, bool got_expected_bytes = false; bool rest_failed = false; int flags; - wgint rd_size; + wgint rd_size, previous_rd_size = 0; char type_char; + bool try_again; + bool list_a_used = false; assert (con != NULL); assert (con->target != NULL); @@ -365,7 +368,7 @@ Error in server response, closing control connection.\n")); /* Third: Get the system type */ if (!opt.server_response) logprintf (LOG_VERBOSE, "==> SYST ... "); - err = ftp_syst (csock, &con->rs); + err = ftp_syst (csock, &con->rs, &con->rsu); /* FTPRERR */ switch (err) { @@ -390,6 +393,44 @@ Error in server response, closing control connection.\n")); if (!opt.server_response && err != FTPSRVERR) logputs (LOG_VERBOSE, _("done. ")); + /* 2013-10-17 Andrea Urbani (matfanjol) + According to the system type I choose which + list command will be used. + If I don't know that system, I will try, the + first time of each session, "LIST -a" and + "LIST". (see __LIST_A_EXPLANATION__ below) */ + switch (con->rs) + { + case ST_VMS: + /* About ST_VMS there is an old note: + 2008-01-29 SMS. For a VMS FTP server, where "LIST -a" may not + fail, but will never do what is desired here, + skip directly to the simple "LIST" command + (assumed to be the last one in the list). */ + DEBUGP (("\nVMS: I know it and I will use \"LIST\" as standard list command\n")); + con->st |= LIST_AFTER_LIST_A_CHECK_DONE; + con->st |= AVOID_LIST_A; + break; + case ST_UNIX: + if (con->rsu == UST_MULTINET) + { + DEBUGP (("\nUNIX MultiNet: I know it and I will use \"LIST\" " + "as standard list command\n")); + con->st |= LIST_AFTER_LIST_A_CHECK_DONE; + con->st |= AVOID_LIST_A; + } + else if (con->rsu == UST_TYPE_L8) + { + DEBUGP (("\nUNIX TYPE L8: I know it and I will use \"LIST -a\" " + "as standard list command\n")); + con->st |= LIST_AFTER_LIST_A_CHECK_DONE; + con->st |= AVOID_LIST; + } + break; + default: + break; + } + /* Fourth: Find the initial ftp directory */ if (!opt.server_response) @@ -775,6 +816,9 @@ Error in server response, closing control connection.\n")); return RETRFINISHED; } + do + { + try_again = false; /* If anything is to be retrieved, PORT (or PASV) must be sent. */ if (cmd & (DO_LIST | DO_RETR)) { @@ -1041,7 +1085,8 @@ Error in server response, closing control connection.\n")); /* As Maciej W. Rozycki (macro@ds2.pg.gda.pl) says, `LIST' without arguments is better than `LIST .'; confirmed by RFC959. */ - err = ftp_list (csock, NULL, con->rs); + err = ftp_list (csock, NULL, con->st&AVOID_LIST_A, con->st&AVOID_LIST, &list_a_used); + /* FTPRERR, WRITEFAILED */ switch (err) { @@ -1343,8 +1388,10 @@ Error in server response, closing control connection.\n")); } /* If it was a listing, and opt.server_response is true, print it out. */ - if (opt.server_response && (con->cmd & DO_LIST)) + if (con->cmd & DO_LIST) { + if (opt.server_response) + { /* 2005-02-25 SMS. Much of this work may already have been done, but repeating it should do no damage beyond wasting time. @@ -1383,8 +1430,99 @@ Error in server response, closing control connection.\n")); xfree (line); fclose (fp); } - } /* con->cmd & DO_LIST && server_response */ + } /* server_response */ + + /* 2013-10-17 Andrea Urbani (matfanjol) + < __LIST_A_EXPLANATION__ > + After the SYST command, looks if it knows that system. + If yes, wget will force the use of "LIST" or "LIST -a". + If no, wget will try, only the first time of each session, before the + "LIST -a" command and after the "LIST". + If "LIST -a" works and returns more or equal data of the "LIST", + "LIST -a" will be the standard list command for all the session. + If "LIST -a" fails or returns less data than "LIST" (think on the case + of an existing file called "-a"), "LIST" will be the standard list + command for all the session. + ("LIST -a" is used to get also the hidden files) + */ + if (!(con->st & LIST_AFTER_LIST_A_CHECK_DONE)) + { + /* We still have to check "LIST" after the first "LIST -a" to see + if with "LIST" we get more data than "LIST -a", that means + "LIST -a" returned files/folders with "-a" name. */ + if (con->st & AVOID_LIST_A) + { + /* LIST was used in this cycle. + Let's see the result. */ + if (rd_size > previous_rd_size) + { + /* LIST returns more data than "LIST -a". + "LIST" is the official command to use. */ + con->st |= LIST_AFTER_LIST_A_CHECK_DONE; + DEBUGP (("LIST returned more data than \"LIST -a\": " + "I will use \"LIST\" as standard list command\n")); + } + else if (previous_rd_size > rd_size) + { + /* "LIST -a" returned more data then LIST. + "LIST -a" is the official command to use. */ + con->st |= LIST_AFTER_LIST_A_CHECK_DONE; + con->st |= AVOID_LIST; + con->st &= ~AVOID_LIST_A; + /* Sorry, please, download again the "LIST -a"... */ + try_again = true; + DEBUGP (("LIST returned less data than \"LIST -a\": I will " + "use \"LIST -a\" as standard list command\n")); + } + else + { + /* LIST and "LIST -a" return the same data. */ + if (rd_size == 0) + { + /* Same empty data. We will check both again because + we cannot check if "LIST -a" has returned an empty + folder instead of a folder content. */ + con->st &= ~AVOID_LIST_A; + } + else + { + /* Same data, so, better to take "LIST -a" that + shows also hidden files/folders (when present) */ + con->st |= LIST_AFTER_LIST_A_CHECK_DONE; + con->st |= AVOID_LIST; + con->st &= ~AVOID_LIST_A; + DEBUGP (("LIST returned the same amount of data of " + "\"LIST -a\": I will use \"LIST -a\" as standard " + "list command\n")); + } + } + } + else + { + /* In this cycle "LIST -a" should being used. Is it true? */ + if (list_a_used) + { + /* Yes, it is. + OK, let's save the amount of data and try again + with LIST */ + previous_rd_size = rd_size; + try_again = true; + con->st |= AVOID_LIST_A; + } + else + { + /* No: something happens and LIST was used. + This means "LIST -a" raises an error. */ + con->st |= LIST_AFTER_LIST_A_CHECK_DONE; + con->st |= AVOID_LIST_A; + DEBUGP (("\"LIST -a\" failed: I will use \"LIST\" " + "as standard list command\n")); + } + } + } + } + } while (try_again); return RETRFINISHED; } @@ -1626,7 +1764,7 @@ ftp_loop_internal (struct url *u, struct fileinfo *f, ccon *con, char **local_fi /* warc_write_resource_record has also closed warc_tmp. */ } - if ((con->cmd & DO_LIST)) + if (con->cmd & DO_LIST) /* This is a directory listing file. */ { if (!opt.remove_listing) diff --git a/src/ftp.h b/src/ftp.h index be00d880..3b0fba59 100644 --- a/src/ftp.h +++ b/src/ftp.h @@ -45,6 +45,14 @@ enum stype ST_OTHER }; +/* Extensions of the ST_UNIX */ +enum ustype +{ + UST_TYPE_L8, + UST_MULTINET, + UST_OTHER +}; + extern char ftp_last_respline[]; uerr_t ftp_response (int, char **); @@ -61,8 +69,8 @@ uerr_t ftp_type (int, int); uerr_t ftp_cwd (int, const char *); uerr_t ftp_retr (int, const char *); uerr_t ftp_rest (int, wgint); -uerr_t ftp_list (int, const char *, enum stype); -uerr_t ftp_syst (int, enum stype *); +uerr_t ftp_list (int, const char *, bool, bool, bool *); +uerr_t ftp_syst (int, enum stype *, enum ustype *); uerr_t ftp_pwd (int, char **); uerr_t ftp_size (int, const char *, wgint *); @@ -124,8 +132,23 @@ enum wget_ftp_fstatus NOTHING = 0x0000, /* Nothing done yet. */ ON_YOUR_OWN = 0x0001, /* The ftp_loop_internal sets the defaults. */ - DONE_CWD = 0x0002 /* The current working directory is + DONE_CWD = 0x0002, /* The current working directory is correct. */ + + /* 2013-10-17 Andrea Urbani (matfanjol) + For more information about the following entries, please, + look at ftp.c, function getftp, text "__LIST_A_EXPLANATION__". */ + AVOID_LIST_A = 0x0004, /* It tells us if during this + session we have to avoid the use + of "LIST -a".*/ + AVOID_LIST = 0x0008, /* It tells us if during this + session we have to avoid to use + "LIST". */ + LIST_AFTER_LIST_A_CHECK_DONE = 0x0010 + /* It tells us if we have already + checked "LIST" after the first + "LIST -a" to handle the case of + file/folders named "-a". */ }; struct fileinfo *ftp_parse_ls (const char *, const enum stype); diff --git a/tests/ChangeLog b/tests/ChangeLog index fab7460e..e1ef3346 100644 --- a/tests/ChangeLog +++ b/tests/ChangeLog @@ -1,3 +1,30 @@ +2013-10-17 Andrea Urbani + + * FTPServer.pm (GetBehavior): new routine. + * FTPServer.pm (get_list): new parameter to skip hidden files + * Test-ftp-list-Multinet.px: Test LIST on a "UNIX MultiNet + Unix Emulation" system that returns an empty content when + "LIST -a" is requested (probably because no "-a" files + exist) + * Test-ftp-list-Unknown.px: Test LIST on a "Unknown ftp + service" system that returns an empty content when + "LIST -a" is requested (probably because no "-a" files + exist) + * Test-ftp-list-Unknown-a.px: Test LIST on a "Unknown ftp + service" system that recognises "LIST -a" as "give me the + -a file" and there is a "-a" file + other two files. + "LIST -a" will return only "-a", "LIST" all the three files. + * Test-ftp-list-Unknown-hidden.px: Test LIST on a "Unknown ftp + service" system that recognises "LIST -a" as an "UNIX Type: + L8" system (show me also the hidden files) and there is an + hidden file. + * Test-ftp-list-Unknown-list-a-fails.px: Test LIST on a + "Unknown ftp service" system that raises an error on + "LIST -a" command. + * Test-ftp-list-UNIX-hidden.px: Test LIST on a "UNIX Type: + L8" system that recognises "LIST -a" as "show me also the + hidden files" and there is an hidden file. + 2013-10-10 Giuseppe Scrivano * Test-idn-robots-utf8.px: Remove -H. diff --git a/tests/FTPServer.pm b/tests/FTPServer.pm index 87e79839..2ac72e34 100644 --- a/tests/FTPServer.pm +++ b/tests/FTPServer.pm @@ -81,10 +81,25 @@ sub _LIST_command my ($conn, $cmd, $path) = @_; my $paths = $conn->{'paths'}; - # This is something of a hack. Some clients expect a Unix server - # to respond to flags on the 'ls command line'. Remove these flags - # and ignore them. This is particularly an issue with ncftp 2.4.3. - $path =~ s/^-[a-zA-Z0-9]+\s?//; + my $ReturnEmptyList = ( $paths->GetBehavior('list_empty_if_list_a') && + $path eq '-a'); + my $SkipHiddenFiles = ( $paths->GetBehavior('list_no_hidden_if_list') && + ( ! $path ) ); + + if ($paths->GetBehavior('list_fails_if_list_a') && $path eq '-a') + { + print {$conn->{socket}} "500 Unknown command\r\n"; + return; + } + + + if (!$paths->GetBehavior('list_dont_clean_path')) + { + # This is something of a hack. Some clients expect a Unix server + # to respond to flags on the 'ls command line'. Remove these flags + # and ignore them. This is particularly an issue with ncftp 2.4.3. + $path =~ s/^-[a-zA-Z0-9]+\s?//; + } my $dir = $conn->{'dir'}; @@ -94,12 +109,16 @@ sub _LIST_command # working directory. local $_; - $dir = FTPPaths::path_merge($dir, $path); - my $listing = $paths->get_list($dir); - unless ($listing) { - print {$conn->{socket}} "550 File or directory not found.\r\n"; - return; - } + my $listing; + if (!$ReturnEmptyList) + { + $dir = FTPPaths::path_merge($dir, $path); + $listing = $paths->get_list($dir,$SkipHiddenFiles); + unless ($listing) { + print {$conn->{socket}} "550 File or directory not found.\r\n"; + return; + } + } print STDERR "_LIST_command - dir is: $dir\n" if $log; @@ -112,9 +131,12 @@ sub _LIST_command return; } - for my $item (@$listing) { - print $sock "$item\r\n"; - } + if (!$ReturnEmptyList) + { + for my $item (@$listing) { + print $sock "$item\r\n"; + } + } unless ($sock->close) { print {$conn->{socket}} "550 Error closing data connection: $!\r\n"; @@ -372,7 +394,14 @@ sub _SYST_command { my ($conn, $cmd, $dummy) = @_; - print {$conn->{socket}} "215 UNIX Type: L8\r\n"; + if ($conn->{'paths'}->GetBehavior('syst_response')) + { + print {$conn->{socket}} $conn->{'paths'}->GetBehavior('syst_response') . "\r\n"; + } + else + { + print {$conn->{socket}} "215 UNIX Type: L8\r\n"; + } } sub _TYPE_command @@ -780,7 +809,7 @@ sub _format_for_list { } sub get_list { - my ($self, $path) = @_; + my ($self, $path, $no_hidden) = @_; my $info = $self->get_info($path); return undef unless defined $info; my $list = []; @@ -788,7 +817,19 @@ sub get_list { if ($info->{'_type'} eq 'd') { for my $item (keys %$info) { next if $item =~ /^_/; - push @$list, $self->_format_for_list($item, $info->{$item}); + # 2013-10-17 Andrea Urbani (matfanjol) + # I skip the hidden files if requested + if (($no_hidden) && + (defined($info->{$item}->{'attr'})) && + (index($info->{$item}->{'attr'}, "H")>=0)) + { + # This is an hidden file and I don't want to see it! + print STDERR "get_list: Skipped hidden file [$item]\n"; + } + else + { + push @$list, $self->_format_for_list($item, $info->{$item}); + } } } else { push @$list, $self->_format_for_list(final_component($path), $info); @@ -797,6 +838,29 @@ sub get_list { return $list; } +# 2013-10-17 Andrea Urbani (matfanjol) +# It returns the behavior of the given name. +# In this file I handle also the following behaviors: +# list_dont_clean_path : if defined, the command +# $path =~ s/^-[a-zA-Z0-9]+\s?//; +# is not runt and the given path +# remains the original one +# list_empty_if_list_a : if defined, "LIST -a" returns an +# empty content +# list_fails_if_list_a : if defined, "LIST -a" returns an +# error +# list_no_hidden_if_list: if defined, "LIST" doesn't return +# hidden files. +# To define an hidden file add +# attr => "H" +# to the url files +# syst_response : if defined, its content is printed +# out as SYST response +sub GetBehavior { + my ($self, $name) = @_; + return $self->{'_behavior'}{$name}; +} + 1; # vim: et ts=4 sw=4 diff --git a/tests/Test-ftp-list-Multinet.px b/tests/Test-ftp-list-Multinet.px new file mode 100644 index 00000000..40a1bdb9 --- /dev/null +++ b/tests/Test-ftp-list-Multinet.px @@ -0,0 +1,67 @@ +#!/usr/bin/env perl + + +# 2013-10-17 Andrea Urbani (matfanjol) +# In this ftp test: +# - the response of SYST command is +# 215 UNIX MultiNet Unix Emulation V5.3(93) +# - the response of "LIST -a" command is an empty +# directory. +# wget should use directly the "LIST" command to get +# the right content, but it will be ok also "LIST -a" +# if followed by "LIST" (in the case of future changes). + + +use strict; +use warnings; + +use FTPTest; + + +############################################################################### + +my $afile = < { + content => $afile, + }, + '/bfile.txt' => { + content => $bfile, + }, +); + +my $cmdline = $WgetTest::WGETPATH . " --no-directories --recursive --level=1 --accept \"?file.txt\" ftp://localhost:{{port}}/"; + +my $expected_error_code = 0; + +my %expected_downloaded_files = ( + 'afile.txt' => { + content => $afile, + }, + 'bfile.txt' => { + content => $bfile, + }, +); + +############################################################################### + +my $the_test = FTPTest->new (name => "Test-ftp-list-Multinet", + input => \%urls, + cmdline => $cmdline, + errcode => $expected_error_code, + output => \%expected_downloaded_files, + server_behavior => {list_empty_if_list_a => 1, + syst_response => "215 UNIX MultiNet Unix Emulation V5.3(93)"}); +exit $the_test->run(); + diff --git a/tests/Test-ftp-list-UNIX-hidden.px b/tests/Test-ftp-list-UNIX-hidden.px new file mode 100644 index 00000000..a99e9d2e --- /dev/null +++ b/tests/Test-ftp-list-UNIX-hidden.px @@ -0,0 +1,65 @@ +#!/usr/bin/env perl + +# 2013-10-17 Andrea Urbani (matfanjol) +# In this ftp test: +# - the response of "LIST -a" command contains +# all the files +# - the response of "LIST" command contains +# the normal files (hidden files are not present) +# wget should use only "LIST -a" because it recognise +# the system as "UNIX Type: L8" and so it should see +# and download the hidden file too. + +use strict; +use warnings; + +use FTPTest; + + +############################################################################### + +my $normalfile = <