#!/usr/bin/perl use strict; use warnings; use WWW::Mechanize; use URI; # prototypes sub usage($); my $badArgs_err = 1; if( @ARGV < 1 ) { usage($0); exit $badArgs_err; } my $url = shift; my @regexes = @ARGV; my $mech = WWW::Mechanize->new; $mech->get($url); # extract the URLs themselves from the WWW::Mechanize::Link objects my @urls = map {$_->url()} $mech->links(); # absolutify each URL @urls = map { URI->new($_)->abs($url) } @urls; # find the subset of @urls that match each of the regexen foreach my $regex ( @regexes ) { @urls = grep { m{$regex} } @urls; } print join "\n", @urls; sub usage($) { use File::Basename; my $progName = basename(shift()); print STDERR "Usage:\t$progName [URL] [list of regular expressions],\n", "\twhere [URL] is the URL we want to download. We'll then hunt through\n", "\tthe URL for links matching the list of regular expressions, then download\n", "\tall the matching links. If the list of regular expressions isn't specified,\n", "\treturns *all* the links in the URL.\n"; return 1; }