#!/usr/bin/perl #redirect to archive.org # #if archive.org doesn't have the date specified, it returns a 302 with the nearest date #this script takes the Location header from the 302 response and returns it to Squid use IO::Handle; use LWP::UserAgent; $|=1; $debug=0; my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime time; $mon = sprintf "%02d",($mon + 1); $mday = sprintf "%02d",$mday; $year = $year + 1900; $year = $year - 4; $datestring = $year . $mon . $mday . "000000"; if ($debug == 1) { open (DEBUG, '>>/usr/local/squid/var/logs/time_machine_debug.log'); } autoflush DEBUG 1; while (<>) { chomp $_; if ($debug == 1) { print DEBUG "INPUT- $_\n"; } if ($_ =~ m/.*archive\.org/) { print "$_\n"; if ($debug == 1) { print DEBUG "OUTPUT- $_\n"; } } else { @input = split(" ", $_); $url = $input[0]; @split_url = split("//", $url); $archive_url = "http://web.archive.org/web/$datestring/$split_url[1]"; my $ua = LWP::UserAgent->new; $ua->timeout(10); my $response = $ua->get($archive_url); if ($response->is_success) { $newurl = $response->previous->header('Location'); } else { $newurl = $_; } chomp $newurl; print "$newurl\n"; if ($debug == 1) { print DEBUG "OUTPUT- $newurl\n"; } } } close (DEBUG);