#!/usr/bin/perl
# gaim2pidgin.pl
# author:  towo <towo@ydal.de>
# version: 3
# license: CC-BY

use strict;

# convert short month names to numbers.
my %shortmonths = (
	'Jan' => '01',
	'Feb' => '02',
	'Mar' => '03',
	'Apr' => '04',
	'May' => '05',
	'Jun' => '06',
	'Jul' => '07',
	'Aug' => '08',
	'Sep' => '09',
	'Oct' => '10',
	'Nov' => '11',
	'Dec' => '12'
);

# go through files
FILE: foreach my $file (@ARGV) {
	my ($header, $target);

	# sanity checks
	unless (-f $file) {
		warn "$file is not a file.\n";
		next FILE;
	}
	unless(open(LOG, $file)) {
		warn "Unable to open $file for reading: $!\n";
		next FILE;
	}

	# get file header, get target name
	chomp($header = <LOG>);
	$header =~ s#<.*?>##g;
	$target = $file;
	$target =~ s/\.log$//;

	# check header for correctness
	unless($header =~ m{^(<HTML><HEAD><TITLE>)?IM Sessions with .*?(</TITLE></HEAD><BODY BGCOLOR=".*?">)?$}i) {
		warn "$file does not seem to be a gaim conversation.\n";
		next FILE;
	}

	# read LOG to file
	my @contents = <LOG>;
	close(LOG);

	# parse log file (one loop ^= one chat session)
	while(@contents) {
		my ($session, $identifier, $date);

		# get session identifier
		chomp($session = shift @contents);

		# Strip HTML.
		#$session =~ s#<.*?>##g;
		$session =~ s#</?(FONT|B|I|ALIGN|HTML|HEAD|TITLE|HR|BR|BODY|H3).*?>##ig;

		# sanity check for the session identifier
		unless ($session =~ m/^ ?---- New Conversation @ \w{3} (\w{3}) ([0-9 ]{2}) (\d{2}):(\d{2}):(\d{2}) (\d{4}) ----$/) {
			warn "Could not recognize session identifier: «$session»\n";
			next FILE;
		}

		# extract date from session identifier and create target identifier
		$date = "$6-$shortmonths{$1}-" . sprintf("%02d", $2) . ".$3$4$5";
		$identifier = "$target/$date.txt";

		# sanity check for target directory
		unless (-d $target) {
			unless(mkdir $target) {
				warn "Could not create directory $target: $!\n";
				next FILE;
			}
		}

		# open output file
		unless(open(OUTPUT, "> $identifier")) {
			warn "Could not write to $identifier: $!\n";
			next FILE;
		}
		select OUTPUT;

		# extract log to log file
		until($contents[0] =~ m/^(<HR><BR><H3 Align=Center>)? ?---- New Conversation/ or !@contents) {
			my $line = shift @contents;
			$line =~ s#<.*?>##g;
			print $line;
		}
		close(OUTPUT)
	}
}

