#!/usr/bin/perl # Author: Alex Chudnovsky , http://www.chudnosvky.org # Decsription: This script retrieves fuckedcompany.com archives, parses them for the data we after # and saves it into a file. # I dont care how you use this code, I dont really think there is any use beyond original intention. # # # History: 13/05/01 Change parsing patterns to accomodate change in HTML format on fuckedcompany.com # Convert all dates to HUMAN (European) format. # 24/02/01 Initial implementation # use strict; my $dbg=0; require LWP::UserAgent; require HTTP::Request; my %Data; # we keep collected data here in form of: Date -> Company -> Points my %OPT=( "MaxArchiveItems" => 1877, ); if(!open(OUT,">dotcom.txt")) { print "Cant create output file!"; exit; } print "Retrieving and parsing archives"; my $done; for(my $i=1; $i<$OPT{"MaxArchiveItems"}; $i+=10) { my $URL="http://forum.fuckedcompany.com/fc/phparchives/index.php?startrow=".$i; print "."; my $data=GetURL($URL); if(!$data) { print "Cant retrieve URL: ".$URL; exit; } my $found=ParseData(\$data); $done+=$found; if($found!=10) { print "(".$found." - ".$done.")"."\n".$URL."\n"; } } print "\n\ndone ($done)."; sub ParseData { my $data=shift; my ($when,$company,$points,$comments)=undef; my $items=0; foreach my $i (split("\n",$$data)) { if(!$when) # first we search for when { if($i =~ m/When: (.*)
/i) { $when=$1; my ($month,$day,$year)=split(" ",$when); $when=$day." ".$month." ".$year; } next; } elsif(!$company) { if($i =~ m/Company: (.*)
/i) { $company=$1; } next; } elsif(!$points) { if($i =~ m/Points: (.*)
/i) { $points=$1; } next; } elsif($i =~ m#(.*)commen(.*) in the Happy Fun Slander Corner#i) { $comments=$1; print OUT "$when\t$company\t$points\t$comments\n"; print "$when\t$company\t$points\t$comments\n" if($dbg); getc if($dbg); # clean vars up for the next run $when=$company=$points=$comments=undef; $items++; next; } } return $items; } sub GetURL { my $URL=shift; my $ua = new LWP::UserAgent; my $request=new HTTP::Request("GET", $URL); my $response; if(!($response=$ua->request($request))) { return undef; } return $response->content(); }