分析家 发表于 2017-5-19 09:32:07

Perl-网页爬虫程序web crawler

#!C:perlinperl-w
usestrict;
useLWP::Simple;
useCGIqw(:standard);

my$url="http://slashdot.org/slashdot.xml";
my$cache="cache";

printheader;

#savetheresultsinthecachefile
my$doc=get($url);
if(defined$doc){
open(CF,">$cache")||die"writingtocache:$!";
printCF$doc;
closeCF;
}

#parsetheXMLfileandoutputtheTITLEandURL
print"<h2>slashdot.org'sheadlinesasof</h2>";
open(CF,$cache)||die"cannotopenthecache:$!";
my($title,$link);
while(<CF>){
if(m,<title>(.*)</title>,){
$title=$1;
}
if(m,<url>(.*)</url>,){
$link=$1;
printqq(<ahref="$link">$title</a><br/>n);
}
}
closeCF;



页: [1]
查看完整版本: Perl-网页爬虫程序web crawler