#!/bin/perl # -*- perl -*- # # Copyright (c) 1997, 2000 DJ Delorie, All Rights Reserved. NO WARRANTEE. # # Note: headers.cgi is used by a Firefox extension. #if ($ENV{'HTTP_REFERER'} # && $ENV{'HTTP_REFERER'} !~ m@^http://www.delorie.com/web/@) { # print "Location: http://www.delorie.com/web/\n\n"; # exit 0; #} if ($ENV{'HTTP_USER_AGENT'} =~ /robot|spider|wget|crawl|slurp|googlebot|openfind/i) { exit 0; } sub rebase_url { local($tag, $uri, $rest) = @_; if ($uri =~ /^http:/) { $u = $uri; } elsif ($uri =~ /^[a-z]+:/) { if ($tag) { return "$tag=\"$uri\"$rest"; } else { return $uri; } } elsif ($uri =~ m@^/@) { $u = $urlhead . $uri; } else { $u = $urlpath . $uri; 0 while $u =~ s@/\./@/@g; 0 while $u =~ s@/[^/]+/\.\./@/@g; 0 while $u =~ s@/[^/]+/\.\.$@/@g; $u = $urlhead . $u; } if ($u !~ /\.(pdf|zip|gz|exe|gif|jpg|png)/) { $u =~ s/([^a-zA-Z0-9\._\#-])/sprintf("%%%02x", ord($1));/ge; $u = $me . $u; } if ($tag) { return "$tag=\"$u\"$rest"; } else { return $u; } } sub canonical_url { local($base, $uri) = @_; local($urlhead, $urlpath) = $base =~ m@([^/]+/+[^/]+)(.*)@; $urlpath =~ s@/[^/]+$@/@; $urlpath = "/" unless $urlpath; if ($uri =~ /^[a-z]+:/) { return $uri; } elsif ($uri =~ m@^/@) { $u = $urlhead . $uri; } else { $u = $urlpath . $uri; 0 while $u =~ s@/\./@/@g; 0 while $u =~ s@/[^/]+/\.\./@/@g; 0 while $u =~ s@/[^/]+/\.\.$@/@g; $u = $urlhead . $u; } return $u; } #----------------------------------------------------------------------------- sub try_one { my ($url, $ref) = @_; $attempts .= "

Attempted URL: $url \n"; $try_access_ok = &webget($url, $ref); $try_access_ok .= ""; if ($webstatus eq "404") { $attempts .= " (not found)"; } elsif ($webstatus eq "403") { $attempts .= " (access denied)"; } else { $access_ok .= $try_access_ok; } return $webstatus =~ /^[123]/; } sub check_delorie_htm { my ($url) = @_; my ($ref) = "http://$ENV{'SERVER_NAME'}/web/howto-allow.html (on behalf of $ENV{'REMOTE_ADDR'})"; my ($ourl) = $url; $attempts = ''; #return if $ENV{'REMOTE_ADDR'} eq "207.22.48.171"; $url =~ s@\?.*@@; $url =~ s@/[^/]*$@/delorie.htm@; return if &try_one($url, $ref); if ($url =~ m@/\~@) { $url =~ s@(/\~[^/]+)/.*$@$1/delorie.htm@; return if &try_one($url, $ref); } if ($url !~ m@//[^/]+/delorie.htm$@) { $url =~ s@(//[^/]+)/.*$@$1/delorie.htm@; return if &try_one($url, $ref); } $url = $ourl; $url =~ s@\?.*@@; $url =~ s@/[^/]*$@/delorie.gif@; return if &try_one($url, $ref); if ($url =~ m@/\~@) { $url =~ s@(/\~[^/]+)/.*$@$1/delorie.gif@; return if &try_one($url, $ref); } if ($url !~ m@//[^/]+/delorie.htm$@) { $url =~ s@(//[^/]+)/.*$@$1/delorie.gif@; return if &try_one($url, $ref); } print "\n"; print "


Sorry, but due to abuse, this service cannot access\n"; print "sites that do not explicitly\n"; print "allow it.

\n"; print "

Not Permitted

\n"; print "

You need to create a file called delorie.htm or\n"; print "delorie.gif (here's one) on your\n"; print "web server to prove you're the webmaster. When I see this file (it can be empty)\n"; print "I'll allow my tools to access your site.

\n"; $attempts =~ s/\r//g; $attempts =~ s@\n\n+@\n@g; print $attempts, "

\n"; if ($access_ok) { print "


\n"; print "

Here is a copy of the error messages I got from the server\n"; print "you were attempting to look at, in case that helps you.\n"; print "Note that these are not errors with my tools, so pleaes\n"; print "do not tell me my tools are broken because of these.

\n"; print "
\n"; print $access_ok; print "
\n"; } else { open(H, "howto-allow.html"); while () { next if /\#perl/; print; } close H; } exit 0; } sub webget { my ($url, $referer) = @_; if ($url =~ m@^[a-z]+://[^/]+$@) { $url .= "/"; } if ($times_webgot++ > 5) { return "

Error

too many levels of HTTP redirection\n"; } if ($url !~ m@/delorie.(htm|gif)@ && ! $no_access_check) { &check_delorie_htm($url); } exit 0 if $host =~ /delorie.com/i && $ENV{'SERVER_NAME'} =~ /delorie.com/; my ($host,$port,$uri) = $url =~ m@http://([^/:]+)(:\d+)?(.*)@; if ($ENV{'SERVER_NAME'} =~ /delorie.com/) { $chost = "delorie.com"; $uri = $url; $port = 9000; } else { $chost = $host; $port =~ s/://; $port = 80 unless $port; $uri = "/" unless $uri; } my ($name,$aliases,$type,$len,$addr) = gethostbyname($chost); if (! $addr) { return "

Error

Host $chost unknown"; } $them = pack($sockaddr, &AF_INET, $port, $addr); unless (socket(S, &AF_INET, &SOCK_STREAM, $proto)) { return "

Error

Unable to create socket"; } unless (connect(S, $them)) { return "

Error

Unable to connect to $chost:$port: $!"; } select(S); $| = 1; select(STDOUT); print "webgot: $url\n" unless $ENV{'SERVER_NAME'}; $webgot_url = $url; print S "GET $uri HTTP/1.0\r\n"; print S "Host: $host\r\n"; print S "User-Agent: $agent\r\n" if $agent; if ($ENV{"HTTP_PRAGMA"} =~ /no-cache/) { print S "Pragma: no-cache\r\n"; } if ($referer) { print S "Referer: $referer\r\n"; } print S "\r\n"; $line = scalar(); ($webstatus) = $line =~ m@ (\d+)@; if ($webstatus ne "200") { $resp = "\n

Error

Server returned error code $webstatus
";
	$resp .= "Request: GET $uri HTTP/1.0\n";
	$resp .= $line;
	while () {
	    $resp .= $_;
	}
	close(S);
	if ($webstatus eq '302' || $webstatus eq '301') {
	    ($loc) = $resp =~ m@Location:\s*(.*\S)@;
	    $loc = &canonical_url($url, $loc);
	    return &webget($loc);
	}
	return $resp;
    }

    # Read the response header
    while () {
	$web_header .= $_ unless /Content-length/i || /^\s/;
	last unless /\S/;
    }

    $resp = "";
    # Read the data
    while () {
	$resp .= $_;
    }

    close(S);
    return $resp;
}

1;