#! /usr/bin/perl # link_tester.pl # Meryll Larkin 12/31/09 # This script has been written for Linux/Unix # This script evaluates all the external links on a page, # that is, the ones with anchor tags that begin: # {$key}; } } print Dumper(@links_ref) . "\n" if ($DEBUG); for my $hashref (@links_ref) { for my $key (keys %$hashref) { my $link = %$hashref->{$key}; push (@links_in_file, $link); } } my @stripped_links; for my $grep_line (@links_in_file) { chomp ($grep_line); # using grep means there could be more than one link per item in list my @links = split (/<\/a>/i, $grep_line); for my $link (@links) { next if ($link !~ /http/i); print "raw link = $link\n" if ($DEBUG == 2); # ignore MAILTO links because they are email addresses, not urls next if ($link =~ /MAILTO/i); # get just the link # strip out everything before http $link =~ s/^.*href="?//; # strip out everything after the end of the link $link =~ s/"?>?\s.*$//; $link =~ s/".*$//; $link =~ s/>.*$//; print "Stripped link = $link\n" if ($DEBUG); push (@stripped_links, $link); } } my @deadlinks; my @redirected_links; my @urls = sort (@stripped_links); for my $url (@urls) { # test to see if it is a good link my $head = head($url); push (@deadlinks,$url) if (! $head); print "$url head = " . Dumper($head) . "\n\n" if ($DEBUG); for my $item (keys %$head) { if ($item =~ /previous/) { push (@redirected_links,$url); } } } print "\nThe following links on $file are dead:\n"; for my $link (@deadlinks) { print "$link\n"; } print "\nThe following links on $file have been redirected:\n"; for my $link (@redirected_links) { print "$link\n"; } print "\nReport Complete\n\n"; exit 0; # ----------------------------------- sub get_file_path { print "To test the links on a single text or HTML page,\n"; print "please provide a relative or absolute path and file name: "; return ; } # --- END ---