# libwhisker v1.6 # libwhisker is a collection of routines used by whisker # # libwhisker copyright 2000,2001,2002 rfp.labs # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # # More information can be found at http://www.wiretrip.net/rfp/ # Libwhisker mailing list and resources are also available at # http://sourceforge.net/projects/whisker/ # package LW; use 5.004; $LW::VERSION="1.6"; ####### external module tests ################################### BEGIN { ## LW module manager stuff ## %LW::available = (); $LW::LW_HAS_SOCKET = 0; $LW::LW_HAS_SSL = 0; $LW::LW_SSL_LIB = 0; $LW::LW_NONBLOCK_CONNECT= 0; ## binary helper - may contain functions substituted further down ## eval "use LW::bin"; # do we have libwhisker binary helpers? if($@){ $LW::available{'LW::bin'}=$LW::bin::VERSION; } ## encode subpackage ## eval "require MIME::Base64"; if($@){ *encode_base64 = \&encode_base64_perl; *decode_base64 = \&decode_base64_perl; } else{ # MIME::Base64 typically has faster C versions $LW::available{'mime::base64'}=$MIME::Base64::VERSION; *encode_base64 = \&MIME::Base64::encode_base64; *decode_base64 = \&MIME::Base64::decode_base64;} ## md5 subpackage ## eval "require MD5"; if(!$@){ $LW::available{'md5'}=$MD5::VERSION;} ## http subpackage ## eval "use Socket"; # do we have socket support? if($@){ $LW::LW_HAS_SOCKET=0; } else { $LW::LW_HAS_SOCKET=1; $LW::available{'socket'}=$Socket::VERSION;} if($LW_HAS_SOCKET){ eval "use Net::SSLeay"; # do we have SSL support? if($@){ $LW::LW_HAS_SSL=0; } else { $LW::LW_HAS_SSL=1; $LW::LW_SSL_LIB=1; $LW::available{'net::ssleay'}=$Net::SSLeay::VERSION; Net::SSLeay::load_error_strings(); Net::SSLeay::SSLeay_add_ssl_algorithms(); Net::SSLeay::randomize();} if(!$LW::LW_HAS_SSL){ eval "use Net::SSL"; # different SSL lib if($@){ $LW::LW_HAS_SSL=0; } else { $LW::LW_HAS_SSL=1; $LW::LW_SSL_LIB=2; $LW::available{'net::ssl'}=$Net::SSL::VERSION;} } ## non-blocking IO ## if($^O!~/Win32/){ eval "use POSIX qw(:errno_h :fcntl_h)"; # better if(!$@){ $LW::LW_NONBLOCK_CONNECT=1; } } } # if($LW_HAS_SOCKET) } # BEGIN ####### package variables ####################################### ## crawl subpackage ## %LW::crawl_config=( 'save_cookies' => 0, 'reuse_cookies' => 1, 'save_offsites' => 0, 'follow_moves' => 1, 'url_limit' => 1000, 'use_params' => 0, 'params_double_record' => 0, 'skip_ext' => '.gif .jpg .gz .mp3 .swf .zip ', 'save_skipped' => 0, 'save_referrers'=> 0, 'do_head' => 0, 'callback' => 0, 'slashdot_bug' => 1, 'normalize_uri' => 1, 'source_callback' => 0 ); @LW::crawl_urls=();; %LW::crawl_server_tags=(); %LW::crawl_referrers=(); %LW::crawl_offsites=(); %LW::crawl_cookies=(); %LW::crawl_forms=(); %LW::crawl_temp=(); # this idea/structure was taken from HTML::LinkExtor.pm, # copyright 2000 Gisle Aas and Michael A. Chase %LW::crawl_linktags = ( 'a' => 'href', 'applet' => [qw(codebase archive code)], 'area' => 'href', 'base' => 'href', 'bgsound' => 'src', 'blockquote' => 'cite', 'body' => 'background', 'del' => 'cite', 'embed' => [qw(src pluginspage)], 'form' => 'action', 'frame' => [qw(src longdesc)], 'iframe' => [qw(src longdesc)], 'ilayer' => 'background', 'img' => [qw(src lowsrc longdesc usemap)], 'input' => [qw(src usemap)], 'ins' => 'cite', 'isindex' => 'action', 'head' => 'profile', 'layer' => [qw(background src)], 'link' => 'href', 'object' => [qw(codebase data archive usemap)], 'q' => 'cite', 'script' => 'src', 'table' => 'background', 'td' => 'background', 'th' => 'background', 'xmp' => 'href', ); ## forms subpackage ## @LW::forms_found=(); %LW::forms_current=(); ## http subpackage ## my $SOCKSTATE=0; my $TIMEOUT=10; # default my ($STATS_REQS,$STATS_SYNS)=(0,0); my ($LAST_HOST,$LAST_INET_ATON,$LAST_SSL)=('','',0); my ($OUTGOING_QUEUE,$INCOMING_QUEUE)=('',''); my ($SSL_CTX, $SSL_THINGY); my %http_host_cache=(); # order is following: # [0] - SOCKET # [1] - $SOCKSTATE # [2] - INET_ATON # [3] - $SSL_CTX # [4] - $SSL_THINGY # [5] - $OUTGOING_QUEUE # [6] - $INCOMING_QUEUE # [7] - $STATS_SYNS # [8] - $STATS_REQS my $Z; # array ref to current host specs sub anti_ids { my ($rhin,$modes)=(shift,shift); my (@T,$x,$c,$s,$y); my $ENCODED=0; my $W = $$rhin{'whisker'}; return if(!(defined $rhin && ref($rhin))); # in case they didn't do it already $$rhin{'whisker'}->{'uri_orig'}=$$rhin{'whisker'}->{'uri'}; # note: order is important! # mode 9 - session splicing if($modes=~/9/){ $$rhin{'whisker'}->{'ids_session_splice'}=1; } # mode 4 - prepend long random string if($modes=~/4/){$s=''; if($$W{'uri'}=~m#^/#){ $y=&utils_randstr; $s.=$y while(length($s)<512); $$W{'uri'}="/$s/..".$$W{'uri'}; } } # mode 7 - (windows) random case sensitivity if($modes=~/7/){ @T=split(//,$$W{'uri'}); for($x=0;$x<(scalar @T);$x++){ if((rand()*2)%2 == 1){ $T[$x]=uc($T[$x]);}} $$W{'uri'}=join('',@T); } # mode 2 - directory self-reference (/./) if($modes=~/2/){ $$W{'uri'}=~s#/#/./#g; } # mode 8 - windows directory separator (\) if($modes=~/8/){ $$W{'uri'}=~s#/#\\#g; $$W{'uri'}=~s#^\\#/#; $$W{'uri'}=~s#^(http|file|ftp|nntp|news|telnet):\\#$1://#; $$W{'uri'}=~s#\\$#/#; } # mode 1 - random URI (non-UTF8) encoding if($modes=~/1/){ if($ENCODED==0){ $$W{'uri'}=encode_str2ruri($$W{'uri'}); $ENCODED=1;} } # mode 5 - fake parameter if($modes=~/5/){ ($s,$y)=(&utils_randstr,&utils_randstr); $$W{'uri'}="/$s.html%3f$y=/../$$W{'uri'}"; } # mode 3 - premature URL ending if($modes=~/3/){ $s=&utils_randstr; $$W{'uri'}="/%20HTTP/1.1%0D%0A%Accept%3A%20$s/../..$$W{'uri'}"; } # mode 6 - TAB as request spacer if($modes=~/6/){ $$W{'req_spacer'}="\t"; } } # end anti_ids sub auth_brute_force { my ($auth_method, $hrin, $user, $pwordref, $dom)=@_; my ($P,%hout); return undef if(!defined $auth_method || length($auth_method)==0); return undef if(!defined $user || length($user) ==0); return undef if(!(defined $hrin && ref($hrin) )); return undef if(!(defined $pwordref && ref($pwordref))); map { ($P=$_)=~tr/\r\n//d; auth_set_header($auth_method,$hrin,$user,$P,$dom); return undef if(http_do_request($hrin,\%hout)); return $P if($hout{'whisker'}->{'http_resp'} ne 401); } @$pwordref; return undef;} sub auth_set_header { my ($method, $href, $user, $pass, $domain)=(lc(shift),@_); return if(!(defined $href && ref($href))); return if(!defined $user || !defined $pass); if($method eq 'basic'){ $$href{'Authorization'}='Basic '.encode_base64($user.':'.$pass,''); } if($method eq 'proxy-basic'){ $$href{'Proxy-Authorization'}='Basic '.encode_base64($user.':'.$pass,''); } if($method eq 'ntlm'){ my $o=ntlm_new($user,$pass,$domain); $$href{'whisker'}->{'ntlm_obj'}=$o; $$href{'whisker'}->{'ntlm_step'}=0; $$href{'Authorization'}='NTLM '.ntlm_client($o); } } sub do_auth { goto &auth_set_header; } sub bruteurl { my ($hin, $upre, $upost, $arin, $arout)=@_; my ($U,%hout); return if(!(defined $hin && ref($hin) )); return if(!(defined $arin && ref($arin) )); return if(!(defined $arout && ref($arout))); return if(!defined $upre || length($upre) ==0); return if(!defined $upost || length($upost)==0); http_fixup_request($hin); map { ($U=$_)=~tr/\r\n//d; next if($U eq ''); if(!http_do_request($hin,\%hout,{'uri'=>$upre.$U.$upost})){ if( $hout{'whisker'}->{'http_resp'}==200 || $hout{'whisker'}->{'http_resp'}==403){ push(@{$arout},$U); } } } @$arin; } sub cookie_read { my ($count,$jarref,$href)=(0,@_); return 0 if(!(defined $jarref && ref($jarref))); return 0 if(!(defined $href && ref($href) )); my $lc = $$href{'whisker'}->{'lowercase_incoming_headers'}||0; my $target = $lc ? 'set-cookie' : 'Set-Cookie'; if(!defined $$href{$target}){ return 0;} if(ref($$href{$target})){ # multiple headers foreach ($$href{$target}){ cookie_parse($jarref,$_); $count++; } } else { # single header cookie_parse($jarref,$$href{$target}); $count=1; } return $count; } sub cookie_parse { my ($jarref, $header)=@_; my ($del,$part,@parts,@construct,$cookie_name)=(0); return if(!(defined $jarref && ref($jarref))); return if(!(defined $header && length($header)>0)); @parts=split(/;/,$header); foreach $part (@parts){ if($part=~/^[ \t]*(.+?)=(.*)$/){ my ($name,$val)=($1,$2); if($name=~/^domain$/i){ $val=~s#^http://##; $val=~s#/.*$##; $construct[1]=$val; } elsif($name=~/^path$/i){ $val=~s#/$## if($val ne '/'); $construct[2]=$val; } elsif($name=~/^expires$/i){ $construct[3]=$val; } else { $cookie_name=$name; if($val eq ''){ $del=1; } else { $construct[0]=$val;} } } else { if($part=~/secure/){ $construct[4]=1;} } } if($del){ delete $$jarref{$cookie_name} if defined $$jarref{$cookie_name}; } else { $$jarref{$cookie_name}=\@construct; } } sub cookie_write { my ($jarref, $hin, $override)=@_; my ($name,$out)=('',''); return if(!(defined $jarref && ref($jarref))); return if(!(defined $hin && ref($hin) )); $override=$override||0; $$hin{'whisker'}->{'ssl'}=$$hin{'whisker'}->{'ssl'}||0; foreach $name (keys %$jarref){ next if($name eq ''); next if($$hin{'whisker'}->{'ssl'}==0 && $$jarref{$name}->[4]>0); if($override || ($$hin{'whisker'}->{'host'}=~/$$jarref{$name}->[1]$/i && $$hin{'whisker'}->{'uri'}=~/$$jarref{$name}->[2]/i)){ $out.="$name=$$jarref{$name}->[0];"; } } if($out ne ''){ $$hin{'Cookie'}=$out; } } sub cookie_get { my ($jarref,$name)=@_; return undef if(!(defined $jarref && ref($jarref))); if(defined $$jarref{$name}){ return @{$$jarref{$name}};} return undef; } sub cookie_set { my ($jarref,$name,$value,$domain,$path,$expire,$secure)=@_; my @construct; return if(!(defined $jarref && ref($jarref))); return if($name eq ''); if($value eq ''){ delete $$jarref{$name}; return;} $path=$path||'/'; $secure=$secure||0; @construct=($value,$domain,$path,$expire,$secure); $$jarref{$name}=\@construct; } sub crawl { my ($START, $MAX_DEPTH, $hrtrack, $hrin)=@_; my (%hout, %jar); my ($T, @ST, @links, @tlinks, @vals, @ERRORS)=(''); return if(!(defined $hrtrack && ref($hrtrack))); return if(!(defined $hrin && ref($hrin) )); return if(!defined $START || length($START)==0); $MAX_DEPTH||=2; # $ST[0]=HOST $ST[1]=URL $ST[2]=CWD $ST[3]=HTTPS $ST[4]=SERVER # $ST[5]=PORT $ST[6]=DEPTH @vals=utils_split_uri($START); $ST[1]=$vals[0]; # uri $ST[0]=$vals[2]; # host $ST[5]=$vals[3]; # port $ST[4]=undef; # server tag return if($ST[0] eq ''); # some various informationz... $LW::crawl_config{'host'}=$ST[0]; $LW::crawl_config{'port'}=$ST[5]; $LW::crawl_config{'start'}=$ST[1]; $$hrin{'whisker'}->{'host'}=$ST[0]; $$hrin{'whisker'}->{'port'}=$ST[5]; $$hrin{'whisker'}->{'lowercase_incoming_headers'}=1; # makes life easier http_fixup_request($hrin); # this is so callbacks can access internals via references $LW::crawl_config{'ref_links'}=\@links; $LW::crawl_config{'ref_jar'}=\%jar; $LW::crawl_config{'ref_hin'}=$hrin; $LW::crawl_config{'ref_hout'}=\%hout; %LW::crawl_referrers=(); # empty out existing referrers %LW::crawl_server_tags=(); %LW::crawl_offsites=(); %LW::crawl_cookies=(); %LW::crawl_forms=(); push @links, \@{[$ST[1],1,($vals[1] eq 'https')?1:0]}; while(@links){ my $C=shift @links; $ST[1]=$C->[0]; # url $ST[6]=$C->[1]; # depth $ST[3]=$C->[2]; # https next if(defined $$hrtrack{$ST[1]} && $$hrtrack{$ST[1]} ne '?'); if($ST[6] > $MAX_DEPTH){ $$hrtrack{$ST[1]}='?' if($LW::crawl_config{'save_skipped'}>0); next; } $ST[2]=utils_get_dir($ST[1]); $$hrin{'whisker'}->{'uri'}=$ST[1]; $$hrin{'whisker'}->{'ssl'}=$ST[3]; my $result = crawl_do_request($hrin,\%hout); if($result==1 || $result==2){ push @ERRORS, "Error on making request for '$ST[1]': $hout{'whisker'}->{'error'}"; next; } if($result==0 || $result==4){ $$hrtrack{$ST[1]}=$hout{'whisker'}->{'http_resp'}; } if($result==3 || $result==5){ $$hrtrack{$ST[1]}='?' if($LW::crawl_config{'save_skipped'}>0); } if(defined $hout{'server'}){ if(!defined $ST[4]){ # server tag $ST[4]=$hout{'server'}; } $LW::crawl_server_tags{$hout{'server'}}++; } if(defined $hout{'set-cookie'}){ if($LW::crawl_config{'save_cookies'}>0){ if(ref($hout{'set-cookie'})){ foreach (@{$hout{'set-cookie'}}){ $LW::crawl_cookies{$_}++; } } else { $LW::crawl_cookies{$hout{'set-cookie'}}++; } } if($LW::crawl_config{'reuse_cookies'}>0){ cookie_read(\%jar,\%hout); } } next if($result==4 || $result==5); next if(scalar @links > $LW::crawl_config{'url_limit'}); if($result==0){ # page should be parsed if($LW::crawl_config{'source_callback'} != 0 && ref($LW::crawl_config{'source_callback'})){ &{$LW::crawl_config{'source_callback'}}($hrin,\%hout); } LW::html_find_tags(\$hout{'whisker'}->{'data'}, \&crawl_extract_links_test); $LW::crawl_config{'stats_html'}++; # count how many pages we've parsed } if($result==3){ # follow the move via location header push @LW::crawl_urls, $hout{'location'}; } foreach $T (@LW::crawl_urls){ $T=~tr/\0\r\n//d; # the NULL character is a bug that's somewhere next if (length($T)==0); next if ($T=~/^javascript:/i); # stupid javascript next if ($T=~/^#/i); # fragment if($LW::crawl_config{'callback'} != 0){ next if &{$LW::crawl_config{'callback'}}($T,@ST); } push(@{$LW::crawl_referrers{$T}}, $ST[1]) if( $LW::crawl_config{'save_referrers'}>0 ); $T=utils_absolute_uri($T,$ST[1],1) if($LW::crawl_config{'normalize_uri'}>0); @vals=utils_split_uri($T); # slashdot bug: workaround for the following fsck'd html code: #
\n"; return $t; } { # these are private static variables for &forms_parse_html %FORMS_ELEMENTS=( 'form'=>1, 'input'=>1, 'textarea'=>1, 'button'=>1, 'select'=>1, 'option'=>1, '/select'=>1 ); $CURRENT_SELECT=undef; $UNKNOWNS=0; sub forms_parse_callback { my ($TAG, $hr, $dr, $start, $len)=(lc(shift),@_); my ($saveparam, $parr, $key)=(0,undef,''); # fastpath shortcut return undef if(!defined $FORMS_ELEMENTS{$TAG}); LW::utils_lowercase_hashkeys($hr) if(scalar %$hr); if($TAG eq 'form'){ if(scalar %LW::forms_current){ # save last form my %DUP=%LW::forms_current; push (@LW::forms_found, \%DUP); %LW::forms_current=(); } $LW::forms_current{"\0"}=[$$hr{name},$$hr{method}, $$hr{action},undef]; delete $$hr{'name'}; delete $$hr{'method'}; delete $$hr{'action'}; $key="\0"; $parr=\@{$LW::forms_current{"\0"}}; $UNKNOWNS=0; } elsif($TAG eq 'input'){ $$hr{type}='text' if(!defined $$hr{type}); $$hr{name}='unknown'.$UNKNOWNS++ if(!defined $$hr{name}); $key=$$hr{name}; push( @{$LW::forms_current{$key}}, (['input-'.$$hr{type},$$hr{value},undef]) ); delete $$hr{'name'}; delete $$hr{'type'}; delete $$hr{'value'}; $parr=\@{$LW::forms_current{$key}->[-1]}; } elsif($TAG eq 'select'){ $$hr{name}='unknown'.$UNKNOWNS++ if(!defined $$hr{name}); $key=$$hr{name}; push( @{$LW::forms_current{$key}}, (['select',undef,undef]) ); $parr=\@{$LW::forms_current{$key}->[-1]}; $CURRENT_SELECT=$key; delete $$hr{name}; } elsif($TAG eq '/select'){ push( @{$LW::forms_current{$CURRENT_SELECT}}, (['/select',undef,undef]) ); $CURRENT_SELECT=undef; return undef; } elsif($TAG eq 'option'){ return undef if(!defined $CURRENT_SELECT); if(!defined $$hr{value}){ my $stop=index($$dr,'<',$start+$len); return undef if($stop==-1); # MAJOR PUKE $$hr{value}=substr($$dr,$start+$len, ($stop-$start-$len)); $$hr{value}=~tr/\r\n//d; } push( @{$LW::forms_current{$CURRENT_SELECT}}, (['option',$$hr{value},undef]) ); delete $$hr{value}; $parr=\@{$LW::forms_current{$CURRENT_SELECT}->[-1]}; } elsif($TAG eq 'textarea'){ my $stop=$start+$len; # find closing tag do { $stop=index($$dr,'',$stop+2); return undef if($stop==-1); # MAJOR PUKE } while( lc(substr($$dr,$stop+2,8)) ne 'textarea'); $$hr{value}=substr($$dr,$start+$len,($stop-$start-$len)); $$hr{name}='unknown'.$UNKNOWNS++ if(!defined $$hr{name}); $key=$$hr{name}; push( @{$LW::forms_current{$key}}, (['textarea',$$hr{value},undef]) ); $parr=\@{$LW::forms_current{$key}->[-1]}; delete $$hr{'name'}; delete $$hr{'value'}; } else { # button $$hr{name}='unknown'.$UNKNOWNS++ if(!defined $$hr{name}); $key=$$hr{name}; push( @{$LW::forms_current{$key}}, (['button',$$hr{value},undef]) ); } if(scalar %$hr){ my @params=(); foreach $k (keys %$hr){ if(defined $$hr{$k}){ push @params, "$k=\"$$hr{$k}\""; } else { push @params, $k; } } $$parr[2]=\@params; } return undef; }} sub html_find_tags { # use faster binary helper goto &LW::bin::html_find_tags if(defined $LW::available{'lw::bin'}); my ($dataref, $callbackfunc)=@_; return if(!(defined $dataref && ref($dataref) )); return if(!(defined $callbackfunc && ref($callbackfunc))); my ($CURTAG, $ELEMENT, $VALUE, $c, $cc); my ($INCOMMENT,$INTAG,$INSCRIPT)=(0,0,0); my (%TAG, $ret, $start, $tagstart, $commstart, $scriptstart, $x); # YES, this looks like C. In fact, it's my C version ported to # perl. But it's faster and more dependable than any regex mess # someone could come up with. my $LEN = length($$dataref); for ($c=0; $c<$LEN; $c++){ $cc=substr($$dataref,$c,1); if(!$INCOMMENT && !$INTAG && !$INSCRIPT && $cc ne '>' && $cc ne '<'){ next; } if($cc eq '<'){ if($INSCRIPT){ if(lc(substr($$dataref,$c+1,7)) eq '/script'){ $INSCRIPT=0; $TAG{'='}=substr($$dataref, $scriptstart, $c - $scriptstart - 1); } else { next; } } if(substr($$dataref,$c+1,3) eq '!--'){ $INCOMMENT=1; $commstart=$c; $c+=3; } else { $INTAG=1; $c++; $c++ while(substr($$dataref,$c,1)=~/[< \t\r\n]/); $tagstart=$c-1; $CURTAG=''; while(($x=substr($$dataref,$c,1))!~/[ \t\r\n>=]/ && $c < $LEN){ $CURTAG.=$x; $c++;} $c++ if($x ne '>'); $INSCRIPT=1 if($CURTAG eq 'script'); } $cc=substr($$dataref,$c,1); # refresh current char (cc) } if($cc eq '>'){ if($INSCRIPT){ if($CURTAG eq 'script'){ $scriptstart = $c + 1; } else { next; } } if(!$INCOMMENT && $INTAG){ $INTAG=0; $ret=&$callbackfunc($CURTAG,\%TAG, $dataref, $tagstart, $c-$tagstart+1); if(defined $ret && $ret != 0){ $c+=$ret;} $CURTAG=''; %TAG=(); } if($INCOMMENT && substr($$dataref,$c-2,2) eq '--'){ $INCOMMENT=0; $TAG{'='}=substr($$dataref,$commstart+4, $c-$commstart-3); $ret=&$callbackfunc('!--',\%TAG, $dataref, $commstart, $c-$commstart+1); if(defined $ret && $ret != 0){ $c+=$ret;} delete $TAG{'='}; next; } } next if($INCOMMENT); if($INTAG){ $ELEMENT=''; $VALUE=''; # eat whitespace while(substr($$dataref,$c,1)=~/[ \t\r\n]/i){ $c++; } $start=$c; while(substr($$dataref,$c,1)!~/[ \t\r\n=\>]/i && $c < $LEN) { $c++; } $ELEMENT=substr($$dataref,$start,$c-$start); $VALUE=''; if(substr($$dataref,$c,1) ne '>'){ # eat whitespace while(substr($$dataref,$c,1)=~/[ \t\r\n]/i) { $c++; } if(substr($$dataref,$c,1) eq '='){ $c++; $start=$c; my $p = substr($$dataref,$c,1); if($p eq '"' || $p eq '\''){ $c++; $start++; $c++ while(substr($$dataref,$c,1) ne $p && $c < $LEN); $VALUE=substr($$dataref,$start,$c-$start); $c++; } else { $c++ while(substr($$dataref,$c,1)!~/[ \t\r\n\>]/ && $c < $LEN); $VALUE=substr($$dataref,$start,$c-$start); } # eat whitespace while(substr($$dataref,$c,1)=~/[ \t\r\n]/) { $c++; } } } # if $c ne '>' $c--; $TAG{$ELEMENT}=$VALUE; # save element in the hash } }} sub http_init_request { # doesn't return anything my ($hin)=shift; return if(!(defined $hin && ref($hin))); %$hin=(); # clear control hash # control values $$hin{'whisker'}={ req_spacer => ' ', req_spacer2 => ' ', http_ver => '1.1', method => 'GET', method_postfix => '', port => 80, uri => '/', uri_prefix => '', uri_postfix => '', uri_param_sep => '?', host => 'localhost', http_req_trailer => '', timeout => 10, include_host_in_uri => 0, ignore_duplicate_headers=> 1, normalize_incoming_headers => 1, lowercase_incoming_headers => 0, ssl => 0, http_eol => "\x0d\x0a", force_close => 0, force_open => 0, retry => 1, trailing_slurp => 0, force_bodysnatch => 0, INITIAL_MAGIC => 31337 }; # default header values $$hin{'Connection'}='Keep-Alive'; # notice it is now default! $$hin{'User-Agent'}="libwhisker/$LW::VERSION"; # heh } sub http_do_request { my @params = @_; my $retry_count = ${$params[0]}{'whisker'}->{'retry'} || 0; my ($ret, @retry_errors, $auth); return 1 if(!(defined $params[0] && ref($params[0]))); return 1 if(!(defined $params[1] && ref($params[1]))); if(defined $params[2]){ foreach (keys %{$params[2]}){ ${$params[0]}{'whisker'}->{$_}=${$params[2]}{$_};}} $auth=$params[0]->{'Authorization'} if(defined $params[0]->{'Authorization'}); do { if(defined $auth && $auth=~/^NTLM/){ $ret=0; if($params[0]->{'whisker'}->{'ntlm_step'}==0){ $ret=LW::http_do_request_ex($params[0],$params[1]); return 2 if($ret==2); if($ret==0){ return 0 if($params[1]->{'whisker'}->{'code'} == 200); return 1 if($params[1]->{'whisker'}->{'code'} != 401); $params[0]->{'whisker'}->{'ntlm_step'}=1; my $thead=utils_find_lowercase_key($params[1],'www-authenticate'); return 1 if(!defined $thead); return 1 if($thead!~m/^NTLM (.+)$/); $params[0]->{'Authorization'}='NTLM '.ntlm_client( $params[0]->{'whisker'}->{'ntlm_obj'},$1); } } if($ret==0){ delete $params[0]->{'Authorization'} if($params[0]->{'whisker'}->{'ntlm_step'}>1); $ret=LW::http_do_request_ex($params[0],$params[1]); $params[0]->{'Authorization'}=$auth; if($ret>0){ $params[0]->{'whisker'}->{'ntlm_step'}=0; } else { $params[0]->{'whisker'}->{'ntlm_step'}=2; } return $ret if($ret==2||$ret==0); } } else { $ret=LW::http_do_request_ex($params[0],$params[1]); push @{${$params[1]}{'whisker'}->{'retry_errors'}}, @retry_errors if scalar(@retry_errors); return $ret if($ret==0 || $ret==2); } push @retry_errors, ${$params[1]}{'whisker'}->{'error'}; $retry_count--; } while( $retry_count >= 0); # if we get here, we still had errors, but no more retries return 1; } sub http_do_request_ex { my ($hin, $hout, $hashref)=@_; my ($temp,$vin,$resp,$S,$a,$b,$vout,@c,$c,$res)=(1,''); my $W; # shorthand alias for the {'whisker'} hash return 1 if(!(defined $hin && ref($hin) )); return 1 if(!(defined $hout && ref($hout))); %$hout=(); # clear output hash $$hout{whisker}->{uri}=$$hin{whisker}->{uri}; # for tracking purposes $$hout{whisker}->{'INITIAL_MAGIC'}=31338; # we can tell requests from responses if($LW::LW_HAS_SOCKET==0){ $$hout{'whisker'}->{'error'}='Socket support not available'; return 2;} if(!defined $$hin{'whisker'} || !defined $$hin{'whisker'}->{'INITIAL_MAGIC'} || $$hin{'whisker'}->{'INITIAL_MAGIC'}!=31337 ){ $$hout{'whisker'}->{'error'}='Input hash not initialized'; return 2; } if(defined $hashref){ foreach (keys %$hashref){ $$hin{'whisker'}->{$_}=$$hashref{$_};}} # if we want anti-IDS, make a copy and setup new values if(defined $$hin{'whisker'}->{'anti_ids'}){ my %copy=%{$hin}; anti_ids(\%copy,$$hin{'whisker'}->{'anti_ids'}); $W = $copy{'whisker'}; } else { $W = $$hin{'whisker'}; } if($$W{'ssl'}>0 && $LW::LW_HAS_SSL!=1){ $$hout{'whisker'}->{'error'}='SSL not available'; return 2;} $TIMEOUT=$$W{'timeout'}||10; my $cache_key = defined $$W{'proxy_host'} ? join(':',$$W{'proxy_host'},$$W{'proxy_port'}) : join(':',$$W{'host'},$$W{'port'}); if(!defined $http_host_cache{$cache_key}){ # make new entry push(@{$http_host_cache{$cache_key}}, undef, # SOCKET $$Z[0] 0, # $SOCKSTATE $$Z[1] undef, # INET_ATON $$Z[2] undef, # $SSL_CTX $$Z[3] undef, # $SSL_THINGY $$Z[4] '', # $OUTGOING_QUEUE $$Z[5] '', # $INCOMING_QUEUE $$Z[6] 0, # $STATS_SYNS $$Z[7] 0, # $STATS_REQS $$Z[8] undef ) # SSL session ID $$Z[9] } # NOTE: the 'Z' reference will be going away in future versions... $Z = $http_host_cache{$cache_key}; # use $chost/$cport for actual server we are connecting to my ($chost,$cport,$cwhat,$PROXY)=('',80,'',0); if(defined $$W{'proxy_host'}){ $chost=$$W{'proxy_host'}; $cport=$$W{'proxy_port'}||80; $cwhat='proxy'; $PROXY=1; if($$W{'ssl'}>0 && $LW::LW_SSL_LIB==2){ $ENV{HTTPS_PROXY} ="$$W{'proxy_host'}:"; $ENV{HTTPS_PROXY}.=$$W{'proxy_port'}||80; } } else { $chost=$$W{'host'}; $cport=$$W{'port'}; $cwhat='host'; } if($$Z[1]>0){ # check to see if socket is still alive if(! sock_valid($Z,$hin,$hout) ){ $$Z[1]=0; sock_close($$Z[0],$$Z[4]); } } # technically we have a race condition: socket can go # bad before we send request, below. But that's ok, # we handle the errors down there. if($$Z[1]==0){ if(defined $$W{'UDP'} && $$W{'UDP'}>0){ if(!socket(SOCK,PF_INET,SOCK_DGRAM,getprotobyname('udp')||0)){ $$hout{'whisker'}->{'error'}='Socket() problems (UDP)'; return 2;} } else { if(!socket(SOCK,PF_INET,SOCK_STREAM,getprotobyname('tcp')||0)){ $$hout{'whisker'}->{'error'}='Socket() problems'; return 2;} } $$Z[0]=SOCK; # lame hack to get perl to take variable for socket $$Z[5]=$$Z[6]=''; # flush in/out queues if($$W{'ssl'}>0){ # ssl setup stuff if($LW::LW_SSL_LIB==1){ if(!defined($$Z[3])){ if(! ($$Z[3] = Net::SSLeay::CTX_new()) ){ $$hout{'whisker'}->{'error'}="SSL_CTX error: $!"; return 2;} if(defined $$W{'ssl_rsacertfile'}){ if(! (Net::SSLeay::CTX_use_RSAPrivateKey_file($$Z[3], $$W{'ssl_rsacertfile'}, &Net::SSLeay::FILETYPE_PEM))){ $$hout{'whisker'}->{'error'}="SSL_CTX_use_rsacert error: $!"; return 2;} } if(defined $$W{'ssl_certfile'}){ if(! (Net::SSLeay::CTX_use_certificate_file($$Z[3], $$W{'ssl_certfile'}, &Net::SSLeay::FILETYPE_PEM))){ $$hout{'whisker'}->{'error'}="SSL_CTX_use_cert error: $!"; return 2;} } } if(! ($$Z[4] = Net::SSLeay::new($$Z[3])) ){ $$hout{'whisker'}->{'error'}="SSL_new error: $!"; return 2;} if(defined $$W{'ssl_ciphers'}){ if(!(Net::SSLeay::set_cipher_list($$Z[4], $$W{'ssl_ciphers'}))){ $$hout{'whisker'}->{'error'}="SSL_set_ciphers error: $!"; return 2;} } } } $$Z[2]=inet_aton($chost) if(!defined $$Z[2]); if(!defined $$Z[2]){ # can't find hostname $$hout{'whisker'}->{'error'}="Can't resolve hostname"; return 2; } if($$W{'ssl'}>0 && $LW::LW_SSL_LIB==2){ # proxy set in ENV; we always connect to host $$Z[4]= Net::SSL->new( PeerAddr => $$hin{'whisker'}->{'host'}, PeerPort => $$hin{'whisker'}->{'port'}, Timeout => $TIMEOUT ); if($@){ $$hout{'whisker'}->{'error'}="Can't connect via SSL: $@[0]"; return 2;} $$Z[4]->autoflush(1); } else { if($LW::LW_NONBLOCK_CONNECT){ my $flags=fcntl($$Z[0],F_GETFL,0); $flags |= O_NONBLOCK; # set nonblock flag if(!(fcntl($$Z[0],F_SETFL,$flags))){ # error setting flag $LW::LW_NONBLOCK_CONNECT=0; # revert to normal } else { my $R=connect($$Z[0],sockaddr_in($cport,$$Z[2])); if(!$R){ # we didn't connect... if($! != EINPROGRESS){ close($$Z[0]); $$Z[0]=undef; # this is a bad socket $$hout{'whisker'}->{'error'}="Can't connect to $cwhat"; return 2;} vec($vin,fileno($$Z[0]),1)=1; if(!select(undef,$vin,undef,$TIMEOUT) || !getpeername($$Z[0])){ close($$Z[0]); $$Z[0]=undef; # this is a bad socket $$hout{'whisker'}->{'error'}="Can't connect to $cwhat"; return 2; } } $flags &= ~O_NONBLOCK; # clear nonblock flag if(!(fcntl($$Z[0],F_SETFL,$flags))){ # not good! close($$Z[0]); $LW::LW_NONBLOCK_CONNECT=0; $$Z[0]=undef; $$hout{'whisker'}->{'error'}="Error setting socket to block"; return 2; } } } if(!defined $$Z[0]){ # this is a safety catch $$hout{'whisker'}->{'error'}="Error creating valid socket connection"; return 2; } if($LW::LW_NONBLOCK_CONNECT==0){ # attempt to do a timeout alarm... eval { local $SIG{ALRM} = sub { die "timeout\n" }; eval {alarm($TIMEOUT)}; if(!connect($$Z[0],sockaddr_in($cport,$$Z[2]))){ alarm(0); die("no_connect\n"); } eval {alarm(0)}; }; if($@ || !(defined $$Z[0])){ $$hout{'whisker'}->{'error'}="Can't connect to $cwhat"; return 2; } } binmode($$Z[0]); # stupid Windows # same as IO::Handle->autoflush(1), without importing 1000+ lines my $S=select($$Z[0]); $|++; select($S); } $$Z[1]=1; $$Z[7]++; if($$W{'ssl'}>0){ if($LW::LW_SSL_LIB==1){ if($PROXY){ # handle the proxy CONNECT stuff... my $SSL_CONNECT = "CONNECT $$W{'host'}". ":$$W{'port'}/ HTTP/1.0\n\n"; syswrite($$Z[0],$SSL_CONNECT, length($SSL_CONNECT)); } Net::SSLeay::set_fd($$Z[4], fileno($$Z[0])); Net::SSLeay::set_session($$Z[4],$$Z[9]) if(defined $$Z[9]); if(! (Net::SSLeay::connect($$Z[4])) ){ $$hout{'whisker'}->{'error'}="SSL_connect error: $!"; sock_close($$Z[0],$$Z[4]); return 2;} if(defined $$W{'save_ssl_info'} && $$W{'save_ssl_info'}>0){ ssl_save_info($hout,$$Z[4]); } my $x=Net::SSLeay::ctrl($$Z[4],6,0,''); $$Z[9]=Net::SSLeay::get_session($$Z[4]) unless(defined $$W{'ssl_resume'} && $$W{'ssl_resume'}==0); } } else { $$Z[4]=undef; } } if(defined $$W{'ids_session_splice'} && $$W{'ids_session_splice'}>0 && $$W{'ssl'}==0){ # no session_spice over ssl setsockopt($$Z[0],SOL_SOCKET,SO_SNDLOWAT,1); @c=split(//, &http_req2line($hin)); # notice we bypass queueing here, in order to trickle the packets my $ss; foreach $c (@c){ $ss=syswrite($$Z[0],$c,1); # char size assumed to be 1 if(!defined $ss || $ss==0){ $$hout{'whisker'}->{'error'}="Error sending session splice request to server"; sock_close($$Z[0],$$Z[4]); return 1; } select(undef,undef,undef,.1); } } else { http_queue(http_req2line($hin)); } $$Z[8]++; if($$W{'http_ver'} ne '0.9'){ my %SENT; if(defined $$W{'header_order'} && ref($$W{'header_order'})){ foreach (@{$$W{'header_order'}}){ next if($_ eq '' || $_ eq 'whisker'); if(ref($$hin{$_})){ $SENT{$_}||=0; my $v=$$hin{$_}->[$SENT{$_}]; http_queue("$_: $v$$W{'http_eol'}"); } else { http_queue("$_: $$hin{$_}$$W{'http_eol'}"); } $SENT{$_}++; } } foreach (keys %$hin){ next if($_ eq '' || $_ eq 'whisker'); next if(defined $SENT{$_}); if(ref($$hin{$_})){ # header with multiple values my $key=$_; foreach (@{$$hin{$key}}){ http_queue("$key: $_$$W{'http_eol'}");} } else { # normal header http_queue("$_: $$hin{$_}$$W{'http_eol'}"); } } if(defined $$W{'raw_header_data'}){ http_queue($$W{'raw_header_data'});} http_queue($$W{'http_eol'}); if(defined $$W{'data'}){ http_queue($$W{'data'});} } # http 0.9 support # take a MD5 of queue, if wanted if(defined $$W{'queue_md5'}){ $$hout{'whisker'}->{'queue_md5'}= LW::md5($$Z[5]); } # all data is wrangled...actually send it now if($res=http_queue_send($$Z[0],$$Z[4])){ $$hout{'whisker'}->{'error'}="Error sending request to server: $res"; sock_close($$Z[0],$$Z[4]); $$Z[1]=0; return 1;} if(defined $$Z[4]){ if($LW::LW_SSL_LIB==1){ # Net::SSLeay shutdown $$Z[0], 1; } else { # Net::SSL shutdown $$Z[4], 1; } } vec($vin,fileno($$Z[0]),1)=1; # wait only so long to read... if(!select($vin,undef,undef,$TIMEOUT)){ $$hout{'whisker'}->{'error'}="Server read timed out"; sock_close($$Z[0],$$Z[4]); $$Z[1]=0; return 1;} my ($LC,$CL,$TE,$CO)=('',-1,'',''); # extra header stuff $$hout{'whisker'}->{'lowercase_incoming_headers'} = $$W{'lowercase_incoming_headers'}; if($$W{'http_ver'} ne '0.9'){ do { # catch '100 Continue' responses $resp=sock_getline($$Z[0],$$Z[4]); #$resp=~tr/\r\n//d if(defined $resp); if(!defined $resp){ $$hout{'whisker'}->{'error'}='Error reading HTTP response'; if($!){ # this should be left over from sysread via sock_getline $$hout{'whisker'}->{'error'}.=": $!"; } $$hout{'whisker'}->{'data'}=$$Z[6]; sock_close($$Z[0],$$Z[4]); $$Z[1]=0; # otherwise bad crap lingers return 1;} if(defined $$W{'save_raw_headers'}){ $$hout{'whisker'}->{'raw_header_data'}.=$resp;} if($resp!~/^HTTP\/([0-9.]{3})[ \t]+(\d+)[ \t]{0,1}(.*?)[\r\n]+/){ $$hout{'whisker'}->{'error'}="Invalid HTTP response: $resp"; # let's save the incoming data...we might want it $$hout{'whisker'}->{'data'}=$resp; while(defined ($_=sock_getline($$Z[0],$$Z[4]))){ $$hout{'whisker'}->{'data'}.=$_;} # normally we'd check the results to see if socket is closed, but # we close it anyway, so it doesn't matter sock_close($$Z[0],$$Z[4]); $$Z[1]=0; # otherwise bad crap lingers return 1;} $$hout{'whisker'}->{'http_ver'} = $1; $$hout{'whisker'}->{'http_resp'} = $2; $$hout{'whisker'}->{'http_resp_message'}= $3; $$hout{'whisker'}->{'code'} = $2; $$hout{'whisker'}->{'100_continue'}++ if($2 == 100); while(defined ($_=sock_getline($$Z[0],$$Z[4]))){ # check pertinent headers if(defined $$W{'save_raw_headers'}){ $$hout{'whisker'}->{'raw_header_data'}.=$_;} $_=~s/[\r]{0,1}\n$//; # anchored regex, so it's fast last if ($_ eq ''); # acceptable assumption case? my $l2=index($_,':'); # this is faster than regex $a=substr($_,0,$l2); $b=substr($_,$l2+1); $b=~s/^([ \t]*)//; # anchored regex, so it's fast $hout{'whisker'}->{'abnormal_header_spacing'}++ if($1 ne ' '); $LC = lc($a); next if($LC eq 'whisker'); $TE = lc($b) if($LC eq 'transfer-encoding'); $CL = $b if($LC eq 'content-length'); $CO = lc($b) if($LC eq 'connection'); if($$W{'lowercase_incoming_headers'}>0){ $a=$LC; } elsif($$W{'normalize_incoming_headers'}>0){ $a=~s/(-[a-z])/uc($1)/eg; } # save the received header order, in case we're curious push(@{$$hout{'whisker'}->{'recv_header_order'}},$a); if(defined $$hout{$a} && $$W{'ignore_duplicate_headers'}!=1){ if(!ref($$hout{$a})){ my $temp=$$hout{$a}; delete $$hout{$a}; push(@{$$hout{$a}},$temp); } push(@{$$hout{$a}},$b); } else { $$hout{$a}=$b; } } # did we have a socket error? if($!){ $hout{'whisker'}->{'error'}='Error in reading response/headers'; sock_close($$Z[0],$$Z[4]); $$Z[1]=0; return 1; } if( $CO eq '' ){ # do whatever the client wanted $CO = (defined $$hin{'Connection'}) ? lc($$hin{'Connection'}) : 'close'; } } while($$hout{'whisker'}->{'http_resp'}==100); } else { # http ver 0.9, we need to fake it # Keep in mind lame broken servers, like IIS, still send headers for # 0.9 requests; the headers are treated as data. Also keep in mind # that if the server doesn't support HTTP 0.9 requests, it will spit # back an HTTP 1.0 response header. User is responsible for figuring # this out himself. $$hout{'whisker'}->{'http_ver'}='0.9'; $$hout{'whisker'}->{'http_resp'}='200'; $$hout{'whisker'}->{'http_resp_message'}=''; } if($$W{'force_bodysnatch'} || ( $$W{'method'} ne 'HEAD' && $$hout{'whisker'}->{'http_resp'}!=206 && $$hout{'whisker'}->{'http_resp'}!=102)){ if ($TE eq 'chunked') { if(!defined ($a=sock_getline($$Z[0],$$Z[4]))){ $$hout{'whisker'}->{'error'}='Error reading chunked data length'; sock_close($$Z[0],$$Z[4]); $$Z[1]=0; return 1;} $a=~tr/a-fA-F0-9//cd; $CL=hex($a); $$hout{'whisker'}->{'data'}=''; while($CL!=0) { # chunked sucks if(!defined ($temp=sock_get($$Z[0],$$Z[4],$CL))){ $$hout{'whisker'}->{'error'}="Error reading chunked data: $!"; sock_close($$Z[0],$$Z[4]); $$Z[1]=0; return 1;} $$hout{'whisker'}->{'data'}=$$hout{'whisker'}->{'data'} . $temp; $temp=sock_getline($$Z[0], $$Z[4]); ($temp=sock_getline($$Z[0], $$Z[4])) if(defined $temp && $temp=~/^[\r\n]*$/); if(!defined $temp){ # this will catch errors in either sock_getline $$hout{'whisker'}->{'error'}="Error reading chunked data: $!"; sock_close($$Z[0],$$Z[4]); $$Z[1]=0; return 1;} $temp=~tr/a-fA-F0-9//cd; $CL=hex($temp);} # read in trailer headers while(defined ($_=sock_getline($$Z[0],$$Z[4]))){ tr/\r\n//d; last if($_ eq ''); } # Hmmmm...error, but we should have full body. Don't return error if($!){ $$Z[1]=0; sock_close($$Z[0],$$Z[4]); } } else { if ($CL != -1) { if(!defined ($temp=sock_get($$Z[0],$$Z[4],$CL))){ $$hout{'whisker'}->{'error'}="Error reading data: $!"; sock_close($$Z[0],$$Z[4]); $$Z[1]=0; return 1;} } else { # Yuck...read until server stops sending.... $temp=sock_getall($$Z[0],$$Z[4]); # we go until we puke, so close socket and don't return error sock_close($$Z[0],$$Z[4]); $$Z[1]=0; } $$hout{'whisker'}->{'data'}=$temp; } } # /method ne HEAD && http_resp ne 206 or 102/ if(($CO ne 'keep-alive' || ( defined $$hin{'Connection'} && lc($$hin{'Connection'}) eq 'close')) && $$W{'force_open'}!=1){ $$Z[1]=0; sock_close($$Z[0],$$Z[4]); } # this way we know what the state *would* have been... $$hout{'whisker'}->{'sockstate'}=$$Z[1]; if($$W{'force_close'}>0) { $$Z[1]=0; sock_close($$Z[0],$$Z[4]); } if($$W{'ssl'}>0){ # we don't reuse SSL sockets $$Z[1]=0; sock_close($$Z[0],$$Z[4]); } $$hout{'whisker'}->{'stats_reqs'}=$$Z[8]; $$hout{'whisker'}->{'stats_syns'}=$$Z[7]; $$hout{'whisker'}->{'error'}=''; # no errors return 0; } sub http_req2line { my ($S,$hin,$UO)=('',@_); $UO||=0; # shut up -w warning # notice: full_request_override can play havoc with proxy settings if(defined $$hin{'whisker'}->{'full_request_override'}){ return $$hin{'whisker'}->{'full_request_override'}; } else { # notice the components of a request--this is for flexibility if($UO!=1){$S.= $$hin{'whisker'}->{'method'}. $$hin{'whisker'}->{'method_postfix'}. $$hin{'whisker'}->{'req_spacer'}; if($$hin{'whisker'}->{'include_host_in_uri'}>0){ $S.= 'http://'; if(defined $$hin{'whisker'}->{'uri_user'}){ $S.= $$hin{'whisker'}->{'uri_user'}; if(defined $$hin{'whisker'}->{'uri_password'}){ $S.= ':'.$$hin{'whisker'}->{'uri_user'}; } $S.= '@'; } $S.= $$hin{'whisker'}->{'host'}. ':'.$$hin{'whisker'}->{'port'};}} $S.= $$hin{'whisker'}->{'uri_prefix'}. $$hin{'whisker'}->{'uri'}. $$hin{'whisker'}->{'uri_postfix'}; if(defined $$hin{'whisker'}->{'uri_param'}){ $S.= $$hin{'whisker'}->{'uri_param_sep'}. $$hin{'whisker'}->{'uri_param'};} if($UO!=1){ if($$hin{'whisker'}->{'http_ver'} ne '0.9'){ $S.= $$hin{'whisker'}->{'req_spacer2'}.'HTTP/'. $$hin{'whisker'}->{'http_ver'}. $$hin{'whisker'}->{'http_req_trailer'};} $S.= $$hin{'whisker'}->{'http_eol'};}} return $S;} sub sock_close { my ($fd,$ssl)=@_; eval { close($fd); }; if(defined $ssl){ if($LW::LW_SSL_LIB==1){ # Net::SSLeay eval "&Net::SSLeay::free($ssl)"; # eval "&Net::SSLeay::CTX_free($$Z[3])"; } else { # Net::SSL eval { close($ssl) }; # is that right for Net::SSL? } } $$Z[4]=undef; } sub sock_valid { my ($z,$Hin,$Hout)=@_; my $slurp=$$Hin{'whisker'}->{'trailing_slurp'}; my ($o,$vin)=(undef,''); return 0 if(defined $$z[3]); # we don't do SSL yet # closed socket sets read flag (and so does waiting data) vec($vin,fileno($$z[0]),1)=1; if(select(($o=$vin),undef,undef,.01)){ # we have data to read my ($hold, $res); do { $res = sysread($$z[0], $hold, 4096); $$z[6].=$hold if($slurp==0); # save to queue $$Hout{'whisker'}->{'slurped'}.="$hold\0" if($slurp==1); # save to hout hash # fall through value of 2 doesn't do anything } while ($res && select(($o=$vin),undef,undef,.01)); if(!defined $res || $res==0){ # error or EOF return 0; } } return 1; } sub sock_getline { # read from socket w/ timeouts my ($fd,$ssl) = @_; my ($str,$t)=('',''); $t = index($$Z[6],"\n",0); while($t < 0){ return undef if &http_queue_read($fd,$ssl); $t=index($$Z[6],"\n",0); } # MEMLEAK: use following lines; comment out SPEEDUP and LEGACY lines # my $r; # ($r,$$Z[6])=unpack('A'.($t+1).'A*',$$Z[6]); # return $r; # SPEEDUP: use following line; comment out MEMLEAK and LEGACY lines # return substr($$Z[6],0,$t+1,''); # LEGACY: use following lines; comment out MEMLEAK and SPEEDUP lines my $r = substr($$Z[6],0,$t+1); substr($$Z[6],0,$t+1)=''; return $r; } sub sock_get { # read from socket w/ timeouts my ($fd,$ssl,$amount) = @_; my ($str,$t)=('',''); while($amount > length($$Z[6])){ return undef if &http_queue_read($fd,$ssl); } # MEMLEAK: use following lines; comment out SPEEDUP and LEGACY lines # my $r; # ($r,$$Z[6])=unpack('A'.$amount.'A*',$$Z[6]); # return $r; # SPEEDUP: use following line; comment out MEMLEAK and LEGACY lines # return substr($$Z[6],0,$amount,''); # LEGACY: use following lines; comment out MEMLEAK and SPEEDUP lines my $r = substr($$Z[6],0,$amount); substr($$Z[6],0,$amount)=''; return $r; } sub sock_getall { my ($fd,$ssl) = @_; 1 while( !(&http_queue_read($fd,$ssl)) ); return $$Z[6]; } sub http_queue_read { my ($fd,$ssl)=@_; my ($vin, $t)=('',''); if(defined $ssl){ if($LW::LW_SSL_LIB==1){ # Net::SSLeay local $SIG{ALRM} = sub { die "timeout\n" }; local $SIG{PIPE} = sub { die "pipe_error\n" }; eval { eval { alarm($TIMEOUT); }; $t=Net::SSLeay::read($ssl); eval { alarm(0); }; }; if($@ || !defined $t || $t eq ''){ return 1;} $$Z[6].=$t; } else { # Net::SSL if(!$ssl->read($t,1024)){ return 1; } else { $$Z[6].=$t;} } } else { vec($vin,fileno($fd),1)=1; # wait only so long to read... if(!select($vin,undef,undef,$TIMEOUT)){ return 1;} if(!sysread($fd,$t,4096)){ return 1; # EOF or error } else { $$Z[6].=$t;} } return 0; } sub http_queue_send { # write to socket my ($fd,$ssl)=@_; my ($v,$wrote,$err)=(''); my $len = length($$Z[5]); if(defined $ssl){ if($LW::LW_SSL_LIB==1){ # Net::SSLeay ($wrote,$err)=Net::SSLeay::ssl_write_all($ssl,$$Z[5]); return 'Could not send entire data queue' if ($wrote!=$len); return "SSL_write error: $err" unless $wrote; } else { # Net::SSL $ssl->print($$Z[5]); } } else { vec($v,fileno($fd),1)=1; if(!select(undef,$v,undef,.01)){ return 'Socket write test failed'; } $wrote=syswrite($fd,$$Z[5],length($$Z[5])); return "Error sending data queue: $!" if(!defined $wrote); return 'Could not send entire data queue' if ($wrote != $len); } $$Z[5]=''; return undef; } sub http_queue { $$Z[5].= shift; } sub http_fixup_request { my $hin=shift; return if(!(defined $hin && ref($hin))); if($$hin{'whisker'}->{'http_ver'} eq '1.1'){ $$hin{'Host'}=$$hin{'whisker'}->{'host'} if(!defined $$hin{'Host'}); $$hin{'Connection'}='Keep-Alive' if(!defined $$hin{'Connection'}); } if(defined $$hin{'whisker'}->{'data'}){ if(!defined $$hin{'Content-Length'}){ $$hin{'Content-Length'}=length($$hin{'whisker'}->{'data'});} # if(!defined $$hin{'Content-Encoding'}){ # $$hin{'Content-Encoding'}='application/x-www-form-urlencoded';} } if(defined $$hin{'whisker'}->{'proxy_host'}){ $$hin{'whisker'}->{'include_host_in_uri'}=1;} } sub http_reset { my $key; foreach $key (keys %http_host_cache){ # *Z=$http_host_cache{$key}; sock_close($http_host_cache{$key}->[0], $http_host_cache{$key}->[4]); my $x=$http_host_cache{$key}->[3]; if(defined $x && $LW::LW_SSL_LIB==1){ eval "Net::SSLeay::CTX_free($x)"; } delete $http_host_cache{$key}; } } sub ssl_save_info { my ($hr,$SSL)=@_; my $cert; return if($LW::LW_SSL_LIB!=1); # only Net::SSLeay used $$hr{'whisker'}->{'ssl_cipher'}=Net::SSLeay::get_cipher($SSL); if( $cert = Net::SSLeay::get_peer_certificate($SSL)){ $$hr{'whisker'}->{'ssl_cert_subject'} = Net::SSLeay::X509_NAME_oneline( Net::SSLeay::X509_get_subject_name($cert) ); $$hr{'whisker'}->{'ssl_cert_issuer'} = Net::SSLeay::X509_NAME_oneline( Net::SSLeay::X509_get_issuer_name($cert) ); } } { # start md5 packaged varbs my (@S,@T,@M); my $code=''; sub md5 { return undef if(!defined $_[0]); # oops, forgot the data return MD5->hexhash($_[0]) if(defined $LW::available{'md5'}); return md5_perl($_[0]); } sub md5_perl { my $DATA=shift; $DATA=md5_pad($DATA); &md5_init() if(!defined $M[0]); return md5_perl_generated(\$DATA); } sub md5_init { return if(defined $S[0]); for(my $i=1; $i<=64; $i++){ $T[$i-1]=int((2**32)*abs(sin($i))); } my @t=(7,12,17,22,5,9,14,20,4,11,16,23,6,10,15,21); for($i=0; $i<64; $i++){ $S[$i]=$t[(int($i/16)*4)+($i%4)]; } @M=( 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, 1,6,11,0,5,10,15,4,9,14,3,8,13,2,7,12, 5,8,11,14,1,4,7,10,13,0,3,6,9,12,15,2, 0,7,14,5,12,3,10,1,8,15,6,13,4,11,2,9 ); &md5_generate(); # check to see if it works correctly my $TEST=md5_pad('foobar'); if( md5_perl_generated(\$TEST) ne '3858f62230ac3c915f300c664312c63f'){ die('Error: MD5 self-test not successful.'); } } sub md5_pad { my $l = length(my $msg=shift() . chr(128)); $ msg .= "\0" x (($l%64<=56?56:120)-$l%64); $l=($l-1)*8; $msg .= pack 'VV',$l & 0xffffffff, ($l >> 16 >> 16); return $msg; } sub md5_generate { my $N='abcddabccdabbcda'; my $M=''; $M='&0xffffffff' if((1 << 16) << 16); # mask for 64bit systems $code=<