This Perl script reads in the existing links from links.dat into the array @bigarray. It then loops through the array reading in each link and appending the new links it finds to links.dat. If the script were run in a loop it would add every single web address it can find to links.dat.
#!/usr/bin/perl use IO::Socket; use URI; open(LINKS, "<< links.dat"); @bigarray = (); while (<LINKS>) { chomp; push(@bigarray, $_); } close(LINKS); foreach $uri (@bigarray) { ($domain = URI->new($uri)->authority) =~ s/^www\.//i; $socket = IO::Socket::INET->new(PeerAddr => $domain, PeerPort => 80, Proto => 'tcp', Type => SOCK_STREAM) or die "Couldn't connect"; print $socket "GET / HTTP/1.0\n\n"; #$page = <$socket>; open(LINKS, ">> links.dat"); while (defined($line = <$socket>)) { $line =~ m{href="(.*?)"}ig; print LINKS "$1"; } close(LINKS); close($socket); }