با این کد میتونید تمام لینک ها و مسیر های یک سایت رو در یک فایل ذخیره کنید (مثلا اسکنر اکانتیس که یه بخش Site crawler داره )
#!/usr/bin/perl
use LWP::Simple;
use LWP::UserAgent;
use HTTP::Request;
use HTTP::Response;
use HTML::LinkExtor;
$DEBUG=0;
open(OUTPUT,">login.html")|| die "cannot open file\n";
$browser = LWP::UserAgent->new();
$browser->timeout(10);
print"Enter the URL for eg: http://www.abc.com/\n";
$URL=<STDIN>;
print"Enter the Depth of search : ";
$depth=<STDIN>;
$dep=$depth;
my $request = HTTP::Request->new(GET => $URL);
my $response = $browser->request($request);
if ($response->is_error()) {printf "%s\n", $response->status_line;}
$contents = $response->content();
print OUTPUT $contents;
%URLqueue = ();
$been = 0;
$URLqueue{$URL} = 0;
$thisURL = &find_new(%URLqueue);
$x=1;
# While there's a URL in our queue which we haven't looked at ...
while(($thisURL ne "")&& $dep>-1){
# Progress report.
$count = 0;
while(($key,$value) = each(%URLqueue)){
$count ++;
}
print "-----------------------------------------\n"; printf("Been: %d To Go: %d\n", $been, $count-$been);
if($been==$x)
{
$x=$x+($count-$been);
$dep--;
}
print "Current URL: $$thisURL[2]\n" if($DEBUG>=1);
&dump_stack();
my $request = HTTP::Request->new(GET =>$thisURL);
my $response = $browser->request($request);
if ($response->is_error()) {printf "%s\n", $response->status_line;}
$contents = $response->content();
print OUTPUT $contents;
my ($page_parser) = HTML::LinkExtor->new(undef,$thisURL);
$page_parser->parse($contents)->eof;
@links = $page_parser->links;
foreach $link (@links) {print "$$link[2]\n";}
foreach $link (@links){
$newURL=$$link[2];
if($URLqueue{$newURL} > 0){
# Increment the count for URLs we've already checked out
$URLqueue{$newURL}++;
}
else{
# Add a zero record for URLs we haven't encountered
$URLqueue{$newURL}=0;
}
}
# Record the fact that we've been here, and get a new URL to process.
$URLqueue{$thisURL} ++;
$been ++;
$thisURL = &find_new(%URLqueue);
}
sub find_new
{
local(%URLqueue) = @_;
local($key, $value);
while(($key, $value) = each(%URLqueue)){
return $key if($value == 0);
}
return "";
}
sub dump_stack
{
local($key, $x);
local($done, $togo) = ("", "");
foreach $key (keys(%URLqueue)){
if($URLqueue{$key} == 0){
$togo .= " " . $key . "\n";
}else{
$done .= " " . $key . " (hitcount = "
. $URLqueue{$key} . ")\n";
}
}
print "Been There:\n" . $done;
open(OUTPUT1,">login1.txt")|| die "cannot open file\n";
print OUTPUT1 "Number of URLs covered = ",$been;
print OUTPUT1 "\nBeen There:\n" .$done;
close OUTPUT1;
print "To Go:\n" . $togo;
print"----------------continuing-----------------------\n";
}