# libwhisker v1.6 # libwhisker is a collection of routines used by whisker # # libwhisker copyright 2000,2001,2002 rfp.labs # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # # More information can be found at http://www.wiretrip.net/rfp/ # Libwhisker mailing list and resources are also available at # http://sourceforge.net/projects/whisker/ # package LW; use 5.004; $LW::VERSION="1.6"; ####### external module tests ################################### BEGIN { ## LW module manager stuff ## %LW::available = (); $LW::LW_HAS_SOCKET = 0; $LW::LW_HAS_SSL = 0; $LW::LW_SSL_LIB = 0; $LW::LW_NONBLOCK_CONNECT= 0; ## binary helper - may contain functions substituted further down ## eval "use LW::bin"; # do we have libwhisker binary helpers? if($@){ $LW::available{'LW::bin'}=$LW::bin::VERSION; } ## encode subpackage ## eval "require MIME::Base64"; if($@){ *encode_base64 = \&encode_base64_perl; *decode_base64 = \&decode_base64_perl; } else{ # MIME::Base64 typically has faster C versions $LW::available{'mime::base64'}=$MIME::Base64::VERSION; *encode_base64 = \&MIME::Base64::encode_base64; *decode_base64 = \&MIME::Base64::decode_base64;} ## md5 subpackage ## eval "require MD5"; if(!$@){ $LW::available{'md5'}=$MD5::VERSION;} ## http subpackage ## eval "use Socket"; # do we have socket support? if($@){ $LW::LW_HAS_SOCKET=0; } else { $LW::LW_HAS_SOCKET=1; $LW::available{'socket'}=$Socket::VERSION;} if($LW_HAS_SOCKET){ eval "use Net::SSLeay"; # do we have SSL support? if($@){ $LW::LW_HAS_SSL=0; } else { $LW::LW_HAS_SSL=1; $LW::LW_SSL_LIB=1; $LW::available{'net::ssleay'}=$Net::SSLeay::VERSION; Net::SSLeay::load_error_strings(); Net::SSLeay::SSLeay_add_ssl_algorithms(); Net::SSLeay::randomize();} if(!$LW::LW_HAS_SSL){ eval "use Net::SSL"; # different SSL lib if($@){ $LW::LW_HAS_SSL=0; } else { $LW::LW_HAS_SSL=1; $LW::LW_SSL_LIB=2; $LW::available{'net::ssl'}=$Net::SSL::VERSION;} } ## non-blocking IO ## if($^O!~/Win32/){ eval "use POSIX qw(:errno_h :fcntl_h)"; # better if(!$@){ $LW::LW_NONBLOCK_CONNECT=1; } } } # if($LW_HAS_SOCKET) } # BEGIN ####### package variables ####################################### ## crawl subpackage ## %LW::crawl_config=( 'save_cookies' => 0, 'reuse_cookies' => 1, 'save_offsites' => 0, 'follow_moves' => 1, 'url_limit' => 1000, 'use_params' => 0, 'params_double_record' => 0, 'skip_ext' => '.gif .jpg .gz .mp3 .swf .zip ', 'save_skipped' => 0, 'save_referrers'=> 0, 'do_head' => 0, 'callback' => 0, 'slashdot_bug' => 1, 'normalize_uri' => 1, 'source_callback' => 0 ); @LW::crawl_urls=();; %LW::crawl_server_tags=(); %LW::crawl_referrers=(); %LW::crawl_offsites=(); %LW::crawl_cookies=(); %LW::crawl_forms=(); %LW::crawl_temp=(); # this idea/structure was taken from HTML::LinkExtor.pm, # copyright 2000 Gisle Aas and Michael A. Chase %LW::crawl_linktags = ( 'a' => 'href', 'applet' => [qw(codebase archive code)], 'area' => 'href', 'base' => 'href', 'bgsound' => 'src', 'blockquote' => 'cite', 'body' => 'background', 'del' => 'cite', 'embed' => [qw(src pluginspage)], 'form' => 'action', 'frame' => [qw(src longdesc)], 'iframe' => [qw(src longdesc)], 'ilayer' => 'background', 'img' => [qw(src lowsrc longdesc usemap)], 'input' => [qw(src usemap)], 'ins' => 'cite', 'isindex' => 'action', 'head' => 'profile', 'layer' => [qw(background src)], 'link' => 'href', 'object' => [qw(codebase data archive usemap)], 'q' => 'cite', 'script' => 'src', 'table' => 'background', 'td' => 'background', 'th' => 'background', 'xmp' => 'href', ); ## forms subpackage ## @LW::forms_found=(); %LW::forms_current=(); ## http subpackage ## my $SOCKSTATE=0; my $TIMEOUT=10; # default my ($STATS_REQS,$STATS_SYNS)=(0,0); my ($LAST_HOST,$LAST_INET_ATON,$LAST_SSL)=('','',0); my ($OUTGOING_QUEUE,$INCOMING_QUEUE)=('',''); my ($SSL_CTX, $SSL_THINGY); my %http_host_cache=(); # order is following: # [0] - SOCKET # [1] - $SOCKSTATE # [2] - INET_ATON # [3] - $SSL_CTX # [4] - $SSL_THINGY # [5] - $OUTGOING_QUEUE # [6] - $INCOMING_QUEUE # [7] - $STATS_SYNS # [8] - $STATS_REQS my $Z; # array ref to current host specs sub anti_ids { my ($rhin,$modes)=(shift,shift); my (@T,$x,$c,$s,$y); my $ENCODED=0; my $W = $$rhin{'whisker'}; return if(!(defined $rhin && ref($rhin))); # in case they didn't do it already $$rhin{'whisker'}->{'uri_orig'}=$$rhin{'whisker'}->{'uri'}; # note: order is important! # mode 9 - session splicing if($modes=~/9/){ $$rhin{'whisker'}->{'ids_session_splice'}=1; } # mode 4 - prepend long random string if($modes=~/4/){$s=''; if($$W{'uri'}=~m#^/#){ $y=&utils_randstr; $s.=$y while(length($s)<512); $$W{'uri'}="/$s/..".$$W{'uri'}; } } # mode 7 - (windows) random case sensitivity if($modes=~/7/){ @T=split(//,$$W{'uri'}); for($x=0;$x<(scalar @T);$x++){ if((rand()*2)%2 == 1){ $T[$x]=uc($T[$x]);}} $$W{'uri'}=join('',@T); } # mode 2 - directory self-reference (/./) if($modes=~/2/){ $$W{'uri'}=~s#/#/./#g; } # mode 8 - windows directory separator (\) if($modes=~/8/){ $$W{'uri'}=~s#/#\\#g; $$W{'uri'}=~s#^\\#/#; $$W{'uri'}=~s#^(http|file|ftp|nntp|news|telnet):\\#$1://#; $$W{'uri'}=~s#\\$#/#; } # mode 1 - random URI (non-UTF8) encoding if($modes=~/1/){ if($ENCODED==0){ $$W{'uri'}=encode_str2ruri($$W{'uri'}); $ENCODED=1;} } # mode 5 - fake parameter if($modes=~/5/){ ($s,$y)=(&utils_randstr,&utils_randstr); $$W{'uri'}="/$s.html%3f$y=/../$$W{'uri'}"; } # mode 3 - premature URL ending if($modes=~/3/){ $s=&utils_randstr; $$W{'uri'}="/%20HTTP/1.1%0D%0A%Accept%3A%20$s/../..$$W{'uri'}"; } # mode 6 - TAB as request spacer if($modes=~/6/){ $$W{'req_spacer'}="\t"; } } # end anti_ids sub auth_brute_force { my ($auth_method, $hrin, $user, $pwordref, $dom)=@_; my ($P,%hout); return undef if(!defined $auth_method || length($auth_method)==0); return undef if(!defined $user || length($user) ==0); return undef if(!(defined $hrin && ref($hrin) )); return undef if(!(defined $pwordref && ref($pwordref))); map { ($P=$_)=~tr/\r\n//d; auth_set_header($auth_method,$hrin,$user,$P,$dom); return undef if(http_do_request($hrin,\%hout)); return $P if($hout{'whisker'}->{'http_resp'} ne 401); } @$pwordref; return undef;} sub auth_set_header { my ($method, $href, $user, $pass, $domain)=(lc(shift),@_); return if(!(defined $href && ref($href))); return if(!defined $user || !defined $pass); if($method eq 'basic'){ $$href{'Authorization'}='Basic '.encode_base64($user.':'.$pass,''); } if($method eq 'proxy-basic'){ $$href{'Proxy-Authorization'}='Basic '.encode_base64($user.':'.$pass,''); } if($method eq 'ntlm'){ my $o=ntlm_new($user,$pass,$domain); $$href{'whisker'}->{'ntlm_obj'}=$o; $$href{'whisker'}->{'ntlm_step'}=0; $$href{'Authorization'}='NTLM '.ntlm_client($o); } } sub do_auth { goto &auth_set_header; } sub bruteurl { my ($hin, $upre, $upost, $arin, $arout)=@_; my ($U,%hout); return if(!(defined $hin && ref($hin) )); return if(!(defined $arin && ref($arin) )); return if(!(defined $arout && ref($arout))); return if(!defined $upre || length($upre) ==0); return if(!defined $upost || length($upost)==0); http_fixup_request($hin); map { ($U=$_)=~tr/\r\n//d; next if($U eq ''); if(!http_do_request($hin,\%hout,{'uri'=>$upre.$U.$upost})){ if( $hout{'whisker'}->{'http_resp'}==200 || $hout{'whisker'}->{'http_resp'}==403){ push(@{$arout},$U); } } } @$arin; } sub cookie_read { my ($count,$jarref,$href)=(0,@_); return 0 if(!(defined $jarref && ref($jarref))); return 0 if(!(defined $href && ref($href) )); my $lc = $$href{'whisker'}->{'lowercase_incoming_headers'}||0; my $target = $lc ? 'set-cookie' : 'Set-Cookie'; if(!defined $$href{$target}){ return 0;} if(ref($$href{$target})){ # multiple headers foreach ($$href{$target}){ cookie_parse($jarref,$_); $count++; } } else { # single header cookie_parse($jarref,$$href{$target}); $count=1; } return $count; } sub cookie_parse { my ($jarref, $header)=@_; my ($del,$part,@parts,@construct,$cookie_name)=(0); return if(!(defined $jarref && ref($jarref))); return if(!(defined $header && length($header)>0)); @parts=split(/;/,$header); foreach $part (@parts){ if($part=~/^[ \t]*(.+?)=(.*)$/){ my ($name,$val)=($1,$2); if($name=~/^domain$/i){ $val=~s#^http://##; $val=~s#/.*$##; $construct[1]=$val; } elsif($name=~/^path$/i){ $val=~s#/$## if($val ne '/'); $construct[2]=$val; } elsif($name=~/^expires$/i){ $construct[3]=$val; } else { $cookie_name=$name; if($val eq ''){ $del=1; } else { $construct[0]=$val;} } } else { if($part=~/secure/){ $construct[4]=1;} } } if($del){ delete $$jarref{$cookie_name} if defined $$jarref{$cookie_name}; } else { $$jarref{$cookie_name}=\@construct; } } sub cookie_write { my ($jarref, $hin, $override)=@_; my ($name,$out)=('',''); return if(!(defined $jarref && ref($jarref))); return if(!(defined $hin && ref($hin) )); $override=$override||0; $$hin{'whisker'}->{'ssl'}=$$hin{'whisker'}->{'ssl'}||0; foreach $name (keys %$jarref){ next if($name eq ''); next if($$hin{'whisker'}->{'ssl'}==0 && $$jarref{$name}->[4]>0); if($override || ($$hin{'whisker'}->{'host'}=~/$$jarref{$name}->[1]$/i && $$hin{'whisker'}->{'uri'}=~/$$jarref{$name}->[2]/i)){ $out.="$name=$$jarref{$name}->[0];"; } } if($out ne ''){ $$hin{'Cookie'}=$out; } } sub cookie_get { my ($jarref,$name)=@_; return undef if(!(defined $jarref && ref($jarref))); if(defined $$jarref{$name}){ return @{$$jarref{$name}};} return undef; } sub cookie_set { my ($jarref,$name,$value,$domain,$path,$expire,$secure)=@_; my @construct; return if(!(defined $jarref && ref($jarref))); return if($name eq ''); if($value eq ''){ delete $$jarref{$name}; return;} $path=$path||'/'; $secure=$secure||0; @construct=($value,$domain,$path,$expire,$secure); $$jarref{$name}=\@construct; } sub crawl { my ($START, $MAX_DEPTH, $hrtrack, $hrin)=@_; my (%hout, %jar); my ($T, @ST, @links, @tlinks, @vals, @ERRORS)=(''); return if(!(defined $hrtrack && ref($hrtrack))); return if(!(defined $hrin && ref($hrin) )); return if(!defined $START || length($START)==0); $MAX_DEPTH||=2; # $ST[0]=HOST $ST[1]=URL $ST[2]=CWD $ST[3]=HTTPS $ST[4]=SERVER # $ST[5]=PORT $ST[6]=DEPTH @vals=utils_split_uri($START); $ST[1]=$vals[0]; # uri $ST[0]=$vals[2]; # host $ST[5]=$vals[3]; # port $ST[4]=undef; # server tag return if($ST[0] eq ''); # some various informationz... $LW::crawl_config{'host'}=$ST[0]; $LW::crawl_config{'port'}=$ST[5]; $LW::crawl_config{'start'}=$ST[1]; $$hrin{'whisker'}->{'host'}=$ST[0]; $$hrin{'whisker'}->{'port'}=$ST[5]; $$hrin{'whisker'}->{'lowercase_incoming_headers'}=1; # makes life easier http_fixup_request($hrin); # this is so callbacks can access internals via references $LW::crawl_config{'ref_links'}=\@links; $LW::crawl_config{'ref_jar'}=\%jar; $LW::crawl_config{'ref_hin'}=$hrin; $LW::crawl_config{'ref_hout'}=\%hout; %LW::crawl_referrers=(); # empty out existing referrers %LW::crawl_server_tags=(); %LW::crawl_offsites=(); %LW::crawl_cookies=(); %LW::crawl_forms=(); push @links, \@{[$ST[1],1,($vals[1] eq 'https')?1:0]}; while(@links){ my $C=shift @links; $ST[1]=$C->[0]; # url $ST[6]=$C->[1]; # depth $ST[3]=$C->[2]; # https next if(defined $$hrtrack{$ST[1]} && $$hrtrack{$ST[1]} ne '?'); if($ST[6] > $MAX_DEPTH){ $$hrtrack{$ST[1]}='?' if($LW::crawl_config{'save_skipped'}>0); next; } $ST[2]=utils_get_dir($ST[1]); $$hrin{'whisker'}->{'uri'}=$ST[1]; $$hrin{'whisker'}->{'ssl'}=$ST[3]; my $result = crawl_do_request($hrin,\%hout); if($result==1 || $result==2){ push @ERRORS, "Error on making request for '$ST[1]': $hout{'whisker'}->{'error'}"; next; } if($result==0 || $result==4){ $$hrtrack{$ST[1]}=$hout{'whisker'}->{'http_resp'}; } if($result==3 || $result==5){ $$hrtrack{$ST[1]}='?' if($LW::crawl_config{'save_skipped'}>0); } if(defined $hout{'server'}){ if(!defined $ST[4]){ # server tag $ST[4]=$hout{'server'}; } $LW::crawl_server_tags{$hout{'server'}}++; } if(defined $hout{'set-cookie'}){ if($LW::crawl_config{'save_cookies'}>0){ if(ref($hout{'set-cookie'})){ foreach (@{$hout{'set-cookie'}}){ $LW::crawl_cookies{$_}++; } } else { $LW::crawl_cookies{$hout{'set-cookie'}}++; } } if($LW::crawl_config{'reuse_cookies'}>0){ cookie_read(\%jar,\%hout); } } next if($result==4 || $result==5); next if(scalar @links > $LW::crawl_config{'url_limit'}); if($result==0){ # page should be parsed if($LW::crawl_config{'source_callback'} != 0 && ref($LW::crawl_config{'source_callback'})){ &{$LW::crawl_config{'source_callback'}}($hrin,\%hout); } LW::html_find_tags(\$hout{'whisker'}->{'data'}, \&crawl_extract_links_test); $LW::crawl_config{'stats_html'}++; # count how many pages we've parsed } if($result==3){ # follow the move via location header push @LW::crawl_urls, $hout{'location'}; } foreach $T (@LW::crawl_urls){ $T=~tr/\0\r\n//d; # the NULL character is a bug that's somewhere next if (length($T)==0); next if ($T=~/^javascript:/i); # stupid javascript next if ($T=~/^#/i); # fragment if($LW::crawl_config{'callback'} != 0){ next if &{$LW::crawl_config{'callback'}}($T,@ST); } push(@{$LW::crawl_referrers{$T}}, $ST[1]) if( $LW::crawl_config{'save_referrers'}>0 ); $T=utils_absolute_uri($T,$ST[1],1) if($LW::crawl_config{'normalize_uri'}>0); @vals=utils_split_uri($T); # slashdot bug: workaround for the following fsck'd html code: #
if($LW::crawl_config{'slashdot_bug'} > 0 && substr($vals[0],0,2) eq '//'){ if($ST[3]==1){ $T='https:'.$T; } else { $T='http:' .$T; } @vals=utils_split_uri($T); } # make sure URL is on same host, port, and protocol if( (defined $vals[2] && $vals[2] ne $ST[0]) || (defined $vals[3] && $vals[3] != $ST[5]) || (defined $vals[1] && ($vals[1] ne 'http' && $vals[1] ne 'https'))){ if($LW::crawl_config{'save_offsites'}>0){ $LW::crawl_offsites{utils_join_uri(@vals)}++; } next; } if(substr($vals[0],0,1) ne '/'){ $vals[0]=$ST[2].$vals[0]; } my $where=rindex($vals[0],'.'); my $EXT=''; if($where >= 0){ $EXT = substr($vals[0], $where+1, length($vals[0])-$where); } $EXT=~tr/0-9a-zA-Z//cd; # yucky chars will puke regex below if($EXT ne '' && $LW::crawl_config{'skip_ext'}=~/\.$EXT /i){ if($LW::crawl_config{'save_skipped'}>0){ $$hrtrack{$vals[0]}='?'; } next; } if(defined $vals[4] && $LW::crawl_config{'use_params'}>0){ if($LW::crawl_config{'params_double_record'}>0 && !defined $$hrtrack{$vals[0]}){ $$hrtrack{$vals[0]}='?'; } $vals[0]=$vals[0].'?'.$vals[4]; } next if(defined $$hrtrack{$vals[0]}); push @links, \@{[$vals[0],$ST[6]+1, ($vals[1] eq 'https')?1:0]}; } # foreach @LW::crawl_urls=(); # reset for next round } # while my $key; foreach $key (keys %LW::crawl_config){ delete $LW::crawl_config{$key} if (substr($key,0,4) eq 'ref_');} $LW::crawl_config{'stats_reqs'}=$hout{'whisker'}->{'stats_reqs'}; $LW::crawl_config{'stats_syns'}=$hout{'whisker'}->{'stats_syns'}; } # end sub crawl sub crawl_get_config { my $key=shift; return $LW::crawl_config{$key}; } sub crawl_set_config { return if(!defined $_[0]); my %opts=@_; while( my($k,$v)=each %opts){ $LW::crawl_config{lc($k)}=$v; } } sub crawl_extract_links_test { my ($TAG, $hr, $dr, $start, $len)=(lc(shift),@_); my $t; # this should be most of the time... return undef if(!defined ($t=$LW::crawl_linktags{$TAG})); return undef if(!scalar %$hr); # fastpath quickie while( my ($key,$val)= each %$hr){ # normalize element values $$hr{lc($key)} = $val; } if(ref($t)){ foreach (@$t){ push(@LW::crawl_urls,$$hr{$_}) if(defined $$hr{$_}); } } else { push(@LW::crawl_urls,$$hr{$t}) if(defined $$hr{$t}); } if($TAG eq 'form' && defined $$hr{action}){ my $u=$LW::crawl_config{'ref_hout'}->{'whisker'}->{'uri'}; $LW::crawl_forms{utils_absolute_uri($$hr{action},$u,1)}++; } return undef; } sub crawl_do_request { my ($hrin,$hrout) = @_; my $ret; if($LW::crawl_config{'do_head'}){ my $save=$$hrin{'whisker'}->{'method'}; $$hrin{'whisker'}->{'method'}='HEAD'; $ret=http_do_request($hrin,$hrout); $$hrin{'whisker'}->{'method'}=$save; return 2 if($ret==2); # if there was connection error, do not continue if($ret==0){ # successful request if($$hrout{'whisker'}->{'http_resp'}==501){ # HEAD not allowed $LW::crawl_config{'do_head'}=0; # no more HEAD requests } if($$hrout{'whisker'}->{'http_resp'} <308 && $$hrout{'whisker'}->{'http_resp'} >300){ if($LW::crawl_config{'follow_moves'} >0){ return 3 if(defined $$hrout{'location'}); } return 5; # not avail } if($$hrout{'whisker'}->{'http_resp'}==200){ # no content-type is treated as text/htm if(defined $$hrout{'content-type'} && $$hrout{'content-type'}!~/^text\/htm/i){ return 4; } # fall through to GET request below } } # request errors are essentially redone via GET, below } return http_do_request($hrin,$hrout); } sub dumper { my %what=@_; my ($final,$k,$v)=(''); while( ($k,$v)=each %what){ return 'ERROR' if(ref($k) || !ref($v)); $final.="\$$k = "._dump(1,$v); $final=~s#,\n$##; $final.=";\n"; } return $final; } sub dumper_writefile { my $file=shift; my $output=dumper(@_); return 1 if(!open(OUT,">$file") || $output eq 'ERROR'); print OUT $output; close(OUT); } sub _dump { # dereference and dump an element my ($t, $ref)=@_; my ($out,$k,$v)=(''); if(ref($ref) eq 'HASH'){ $out.="{\n"; while( ($k,$v)=each %$ref){ $out.= "\t"x$t; $out.=_dumpd($k).' => '; if(ref($v)){ $out.=_dump($t+1,$v); } else { $out.=_dumpd($v).",\n"; }} $out=~s#,\n$#\n#; $out.="\t"x$t; $out.="},\n"; } elsif(ref($ref) eq 'ARRAY'){ $out.="[\n"; foreach $v (@$ref) { $out.= "\t"x$t; if(ref($v)){ $out.=_dump($t+1,$v); } else { $out.=_dumpd($v).",\n"; }} $out=~s#,\n$#\n#; $out.="\t"x$t; $out.="],\n"; } elsif(ref($ref) eq 'SCALAR'){ $out.=_dumpd($$ref); } elsif(ref($ref) eq 'REF'){ $out.=_dump($t,$$ref); } elsif(ref($ref)){ $out.='"" # unsupported reference type: '; $out.=ref($ref); $out.="\n"; } else { # normal scalar $out.=_dumpd($ref); } return $out; } sub _dumpd { # escape a scalar string my $v=shift; return "'$v'" if($v!~tr/!-~//c); $v=~s#\\#\\\\#g; $v=~s#"#\\"#g; $v=~s#\r#\\r#g; $v=~s#\n#\\n#g; $v=~s#\0#\\0#g; $v=~s#\t#\\t#g; $v=~s#([^!-~ ])#sprintf('\\x%02x',ord($1))#eg; return "\"$v\""; } sub get_page { my ($URL,$hr)=(shift,shift); return (undef,"No URL supplied") if(length($URL)==0); my (%req,%resp); my $rptr; if(defined $hr && ref($hr)){ $rptr=$hr; } else { $rptr=\%req; LW::http_init_request(\%req); } LW::utils_split_uri($URL,$rptr); # this is newer >=1.1 syntax LW::http_fixup_request($rptr); if(http_do_request($rptr,\%resp)){ return (undef,$resp{'whisker'}->{'error'}); } return ($resp{'whisker'}->{'code'}, $resp{'whisker'}->{'data'}); } sub get_page_hash { my ($URL,$hr)=(shift,shift); return undef if(length($URL)==0); my (%req,%resp); my $rptr; if(defined $hr && ref($hr)){ $rptr=$hr; } else { $rptr=\%req; LW::http_init_request(\%req); } LW::utils_split_uri($URL,$rptr); # this is newer >=1.1 syntax LW::http_fixup_request($rptr); my $r=http_do_request($rptr,\%resp); $resp{whisker}->{get_page_hash}=$r; return \%resp; } sub get_page_to_file { my ($URL, $filepath, $hr)=@_; return undef if(length($URL)==0); return undef if(length($filepath)==0); my (%req,%resp); my $rptr; if(defined $hr && ref($hr)){ $rptr=$hr; } else { $rptr=\%req; LW::http_init_request(\%req); } LW::utils_split_uri($URL,$rptr); # this is newer >=1.1 syntax LW::http_fixup_request($rptr); if(http_do_request($rptr,\%resp)){ return undef; } open(OUT,">$filepath") || return undef; binmode(OUT); # stupid Windows print OUT $resp{'whisker'}->{'data'}; close(OUT); return $resp{'whisker'}->{'code'}; } sub upload_file { my ($URL, $filepath, $paramname, $hr)=@_; return undef if(length($URL) ==0); return undef if(length($filepath) ==0); return undef if(length($paramname)==0); return undef if(!(-e $filepath && -f $filepath)); my (%req,%resp,%multi); my $rptr; if(defined $hr && ref($hr)){ $rptr=$hr; } else { $rptr=\%req; LW::http_init_request(\%req); } LW::utils_split_uri($URL,$rptr); # this is newer >=1.1 syntax $rptr{'whisker'}->{'method'}='POST'; LW::http_fixup_request($rptr); LW::multipart_setfile(\%multi,$filepath,$paramname); LW::multipart_write(\%multi,$rptr); if(http_do_request($rptr,\%resp)){ return undef; } return $resp{'whisker'}->{'code'}; } sub download_file { goto &LW::get_page_to_file; } #sub encode_base64; #sub decode_base64; sub encode_base64_perl { # ripped from MIME::Base64 my $res = ""; my $eol = $_[1]; $eol = "\n" unless defined $eol; pos($_[0]) = 0; while ($_[0] =~ /(.{1,45})/gs) { $res .= substr(pack('u', $1), 1); chop($res);} $res =~ tr|` -_|AA-Za-z0-9+/|; my $padding = (3 - length($_[0]) % 3) % 3; $res =~ s/.{$padding}$/'=' x $padding/e if $padding; if (length $eol) { $res =~ s/(.{1,76})/$1$eol/g; } $res; } sub decode_base64_perl { # ripped from MIME::Base64 my $str = shift; my $res = ""; $str =~ tr|A-Za-z0-9+=/||cd; $str =~ s/=+$//; # remove padding $str =~ tr|A-Za-z0-9+/| -_|; # convert to uuencoded format while ($str =~ /(.{1,60})/gs) { my $len = chr(32 + length($1)*3/4); # compute length byte $res .= unpack("u", $len . $1 ); # uudecode }$res;} sub encode_str2uri { # normal hex encoding my $str=shift; $str=~s/([^\/])/sprintf("%%%02x",ord($1))/ge; return $str;} sub encode_str2ruri { # random normal hex encoding my @T=split(//,shift); my $s; foreach (@T) { if(m#;=:&@\?#){ $s.=$_; next; } if((rand()*2)%2 == 1){ $s.=sprintf("%%%02x",ord($_)) ; }else{ $s.=$_; } } return $s; } sub encode_unicode { my $r=''; foreach $c (split(//,shift)){ $r.=pack("v",ord($c)); } return $r; } sub forms_read { my $dr=shift; return undef if(!ref($dr) || length($$dr)==0); @LW::forms_found=(); LW::html_find_tags($dr,\&forms_parse_callback); if(scalar %LW::forms_current){ my %DUP=%LW::forms_current; push(@LW::forms_found,\%DUP); } return @LW::forms_found; } sub forms_write { my $hr=shift; return undef if(!ref($hr) || !(scalar %$hr)); return undef if(!defined $$hr{"\0"}); my $t='[0].'" method="'; $t.=$$hr{"\0"}->[1].'" action="'.$$hr{"\0"}->[2].'"'; if(defined $$hr{"\0"}->[3]){ $t.=' '.join(' ',@{$$hr{"\0"}->[3]}); } $t.=">\n"; while( my($name,$ar)=each(%$hr) ){ next if($name eq "\0"); foreach $a (@$ar){ my $P=''; $P=' '.join(' ', @{$$a[2]}) if(defined $$a[2]); $t.="\t"; if($$a[0] eq 'textarea'){ $t.="\n"; } elsif($$a[0]=~m/^input-(.+)$/){ $t.="\n"; } elsif($$a[0] eq 'option'){ $t.="\t