use strict;

use Tk;
use Tk::Table;
use Tk::Font;
use Time::Local;
use URI;

my $context = {};
@main::kurls = ();

print "Starte Logfile-Analyse.";

&config($context);
&init($context);

&url_known($context,'dummy');

my $data = [];
&rawdata_read($context, $data);

my $mw = MainWindow->new();
$context->{mw} = $mw;
$mw->Label(-text => 'lf2', -height => 2, -font => $ mw->fontCreate(-family => "Georgia", -size => 14, -weight => 'bold'))->pack();
$mw->Button(-text => 'PageImpressions und Visits', -padx => 10, -pady => 10, -command => sub {&dlg_pv($context, $data)})->pack(-fill => 'x');
$mw->Button(-text => 'Referrer-Analyse', -padx => 10, -pady => 10, -command => sub {&dlg_refer($context, $data)})->pack(-fill => 'x');
$mw->Button(-text => 'Unterseiten', -padx => 10, -pady => 10, -command => sub {&dlg_unterseiten($context, $data)})->pack(-fill => 'x');
MainLoop();

&save_known_urls($context);

sub dlg_unterseiten
{
	my $context = shift;
	my $data = shift;
	
	my %unterseiten;
		
	foreach my $r (@$data)
	{
		unless (($r->{command} =~ /GET/) and ($r->{command} =~ /(\.shtml )|(\.html )|(\.htm )|(\.pl )|(\.cgi )/))
		{
			next;
		}
			
		my $subpage = $r->{command};
		unless ($subpage =~ /^GET (.+?) HTTP(.+?)$/)
		{
			die "Kann die GET-Anweisung $subpage nicht parsen, obwohl das möglich sein sollte";		
		}
		$subpage = $1;
				
		unless (exists $unterseiten{$subpage})
		{
			$unterseiten{$subpage} = 0;
		}
		$unterseiten{$subpage}++;
	}
	
	my @sites = sort {$unterseiten{$b} <=> $unterseiten{$a}} (keys %unterseiten);
	my $r_days; # = {pageviews => 0, visits => 0, ips => {}};

	my $dlg = $context->{mw}->Toplevel(-width => 600, -height => 600);
	$dlg->title('Unterseiten');
	
	$dlg->Label(-text => 'Unterseiten', -height => 2, -font => $dlg->fontCreate(-family => "Verdana", -size => 14, -weight => 'bold'))->pack();

	my $table = $dlg->Table(-rows => $#sites,
                          -columns => 4,            
                          -height => 600,
                          -width => 600,              
                          );
	my $labelize = sub {my $parent = shift; my $text = shift; my $bold = shift; my $data = shift;
	 my $label = $parent->Label(-text => $text, -justify => 'left'); my $url = ($text =~ /^\/awd\/content/) ? 'http://www.aboutwebdesign.de' : 'http://www.webdesign-referenz.de'; $url .= $text;  $label->bind('<Button-1>', sub {&startproc($url)}); if ($bold) {$label->configure(-font => $parent->fontCreate(-weight => 'bold', -size => 10))} return $label;};
	
	$table->put(0,0,&$labelize($table, 'Site', 1));
	$table->put(0,1,&$labelize($table, 'PI', 1));
		
 	for (my $j = 0; $j <= (($#sites < 200) ? $#sites : 200); $j++)
 	{
 		$table->put($j+1,0,&$labelize($table, $sites[$j], 0)); 	
 		#$table->put($j+1,0,$sites[$j]); 	
		#$table->put($j+1,1,"stumpel"); 	
 		 		
 		$table->put($j+1,1,&$labelize($table, $unterseiten{$sites[$j]}, 0 ));		
 	}
 	
 	$table->pack();
 		
}

sub dlg_refer
{
	my $context = shift;
	my $data = shift;
	
	my %referer;
		
	foreach my $r (@$data)
	{
		unless (($r->{command} =~ /GET/) and ($r->{command} =~ /(\.shtml )|(\.html )|(\.htm )|(\.pl )|(\.cgi )/))
		{
			next;
		}
			
		my $ref = $r->{referrer};
		my $found = 0;
		for (@$main::selfurls)
		{
			$found = 1 if (index($ref, $_) > -1);
			$found = 1 if (($ref eq '-') or ($ref eq ''));
		}

		next if $found;
		
		my $baseurl;
		eval {
			my $URL = new URI($ref);
			$baseurl = $URL->host();
		};
		
		next if ($@);

				
		unless (exists $referer{$baseurl})
		{
			$referer{$baseurl} = {};
		}
		$referer{$baseurl}->{c}++;
		$referer{$baseurl}->{urls}->{$ref}++;
	}
	
	my @sites = sort {return -1 if ($b =~ /google/); return 1 if ($a =~ /google/); ($referer{$b}->{c} <=> $referer{$a}->{c})} (keys %referer);
	my $r_days; # = {pageviews => 0, visits => 0, ips => {}};

	my $dlg = $context->{mw}->Toplevel(-width => 600, -height => 600);
	$dlg->title('Referrer');
	
	$dlg->Label(-text => 'Referrer', -height => 2, -font => $dlg->fontCreate(-family => "Verdana", -size => 14, -weight => 'bold'))->pack();

	my $table = $dlg->Table(-rows => $#sites,
                          -columns => 4,            
                          -height => 600,
                          -width => 600,              
                          );

	print $#sites;
	
	my $labelize = sub {my $parent = shift; my $text = shift; my $bold = shift; my $data = shift;
	 my $label = $parent->Label(-text => $text, -justify => 'left');  if (defined $data) {$label->bind('<Button-1>', sub {&dlg_refer_detail($context, $data)});} if ($bold) {$label->configure(-font => $parent->fontCreate(-weight => 'bold', -size => 10))} return $label;};
	
	$table->put(0,0,&$labelize($table, 'Site', 1));
	$table->put(0,1,&$labelize($table, 'PI', 1));
		
 	for (my $j = 0; $j <= $#sites; $j++)
 	{
 		$table->put($j+1,0,&$labelize($table, $sites[$j], (not &known_urls_recursive($context,$referer{$sites[$j]}->{urls} )) , $referer{$sites[$j]}->{urls} )); 	
 		#$table->put($j+1,0,$sites[$j]); 	
		#$table->put($j+1,1,"stumpel"); 	
 		 		
 		$table->put($j+1,1,&$labelize($table, $referer{$sites[$j]}->{c}, 0 ));		
 	}
 	
 	$table->pack();
 		
}

sub dlg_refer_detail
{
	my $context = shift;
	my $data = shift;
	my $dlg = $context->{mw}->Toplevel(-width => 600, -height => 600);
	$dlg->title('Referrer');
	
	$dlg->Label(-text => 'Referrer', -height => 2, -font => $dlg->fontCreate(-family => "Verdana", -size => 14, -weight => 'bold'))->pack();

	my @urls = sort {$data->{$b} <=> $data->{$a}} (keys %$data);
	
	
	my $table = $dlg->Table(-rows => $#urls+2,
                          -columns => 4,            
                          -height => 600,
                          -width => 600,              
                          );


	my $labelize = sub {my $parent = shift; my $text = shift; my $bold = shift; my $lt = $text;
	 my $label = $parent->Label(-text => $lt, -justify => 'left');  $label->bind('<Button-1>', sub {&make_known($context, $text); &startproc("$text")}); if ($bold) {$label->configure(-font => $parent->fontCreate(-weight => 'bold', -size => 10))} return $label;};
	
	$table->put(0,0,&$labelize($table, 'URL', 1));
	$table->put(0,1,&$labelize($table, 'PI', 1));
	
	print "Anzahl Suburls: ", $#urls, "\n\n";	
 	for (my $j = 0; $j <= $#urls; $j++)
 	{
 		print $urls[$j], "\n";
 		print $data->{$urls[$j]}, "\n\n";
 		$table->put($j+1,0,&$labelize($table, $urls[$j], not &url_known($context, $urls[$j]) )); 	
 		#$table->put($j+1,0,$sites[$j]); 	
		#$table->put($j+1,1,"stumpel"); 	
 		 		
 		$table->put($j+1,1,&$labelize($table, $data->{$urls[$j]}, 0));		
 	}
 	
 	$table->pack();
 }


sub dlg_pv
{
	my $context = shift;
	my $data = shift;
	
	my $r_days; # = {pageviews => 0, visits => 0, ips => {}};

	foreach my $r (@$data)
	{
		my $d = $r->{date};
		unless ($d =~ /(\d\d)\/(\w+?)\/(\d\d\d\d)\:/)
		{
			&ende($context, "Kann Datum $d nicht zerlegen");
		}	
		my $date = $1.".".$2.".".$3;
		my %months = qw/Jan 0 Feb 1 Mar 2 Apr 3 May 4 Jun 5 Jul 6 Aug 7 Sep 8 Oct 9 Nov 10 Dec 11/;
		unless (exists $r_days->{$date})
		{
			$r_days->{$date}->{unixtime} = timelocal(1,1,1,$1,$months{$2},$3 - 1900);
		}
		
		if (($r->{command} =~ /GET/) and ($r->{command} =~ /(\.shtml )|(\.html )|(\.htm )|(\.pl )|(\.cgi )/))
		{
			$r_days->{$date}->{pageviews}++;
			
			unless (exists $r_days->{$date}->{ips}->{$r->{client}})
			{
				$r_days->{$date}->{visits}++;
				$r_days->{$date}->{ips}->{$r->{client}} = 1;
			}
		}				
	}
	
	my $dlg = $context->{mw}->Toplevel(-width => 600, -height => 600);
	$dlg->title('Pageimpressions und Visits');
	
	$dlg->Label(-text => 'Pageimpressions und Visits', -height => 2, -font => $dlg->fontCreate(-family => "Verdana", -size => 14, -weight => 'bold'))->pack();
	
	my @days = sort {$r_days->{$a}->{unixtime} <=> $r_days->{$b}->{unixtime}} (keys %$r_days);
	
	my $table = $dlg->Table(-rows => $#days*2+2,
                          -columns => 4,            
                          -height => 600,
                          -width => 600,              
                          );

	my $labelize = sub {my $parent = shift; my $text = shift; my $bold = shift;
	 my $label = $parent->Label(-text => $text);  if ($bold) {$label->configure(-font => $parent->fontCreate(-weight => 'bold', -size => 10))} return $label;};
	
	$table->put(0,1,&$labelize($table, 'Pageimpressions', 1));
	$table->put(0,2,&$labelize($table, 'Visits', 1));
	$table->put(0,3,&$labelize($table, 'Ratio', 1));	
		
 	for (my $j = 0; $j <= $#days; $j++)
 	{
 		$table->put($j*2+1,0,&$labelize($table, $days[$j], 1 ));
 		#print $days[$j], "\n";
 		$table->put($j*2+1,1,&$labelize($table, $r_days->{$days[$j]}->{pageviews},0));
 		$table->put($j*2+1,2,&$labelize($table, $r_days->{$days[$j]}->{visits},0)); 		
 		$table->put($j*2+1,3,&$labelize($table, sprintf('%.1f',($r_days->{$days[$j]}->{pageviews}/$r_days->{$days[$j]}->{visits})),0)); 		
 	}
 	
 	$table->pack();
 		
}

sub rawdata_read
{
	my $context = shift;
	my $aref = shift;
	
	chdir($main::logdir) or &ende($context, "Kann nicht nicht $main::logdir wechseln");
	
	my @files = <*.txt>;
	foreach my $file (@files)
	{
		open(DATEI, "<$file") or &ende($context, "Kann Logfile-Datei $file nicht oeffnen");
		while(<DATEI>)
		{
			my $line = $_;
			my $linehash = {};
			$linehash = &linetohash($context, $line);
			push(@$aref, $linehash);
		}
		close(DATEI) or &ende($context, "Kann Logfile-Datei $file nicht schliessen");
	}
	
	chdir('..') or &ende($context, "Kann nicht ins Hauptverzeichnis zurückwechseln");
}

sub linetohash
{
	my $context = shift;
	my $line = shift;
	
	# Folgender Code ist explizit zur Strato-Logfile-Analyse vorgesehen
	# Wer ein anderes Format verwendet, mag ihn gern umschreiben
	
	my $r = {};
	
	my @e = ($line =~ /^(\S+) \- \- \[(.+?)\] "(.+?)" (\d+?) ([\-\d]+?) "(.*?)" "(.*?)"/);
	if ($#e == -1)
	{
		print "PROBLEM: \t $line\n\n";
		return;
	}
	
	my @elems = qw(client date command code byte referrer browser);
	my $i = -1;
	for (@elems)
	{
		$i++;
		$r->{$_} = $e[$i];
	}

	return $r;
}

sub config
{
	my $context = shift;
	
	$main::verzeichnis = "C:\\EigeneDateien\\Perl\\lf2";
	$main::logdir = "input";
	$main::selfurls = ['http://www.aboutwebdesign.de', 'http://www.webdesign-referenz.de'];
	$main::urlstxt = $main::verzeichnis."\\urls.txt";
}

sub startproc
{
	my $cmd = shift;
	
    use Win32::Process;
    use Win32;

    sub ErrorReport{
        print Win32::FormatMessage( Win32::GetLastError() );
    }
    
    my $ProcessObj;
    Win32::Process::Create($ProcessObj,
                                "C:\\Programme\\Mozilla Firefox\\firefox.exe",
                                "firefox $cmd",
                                0,
                                NORMAL_PRIORITY_CLASS,
                                ".")|| die ErrorReport();
}

sub url_known
{
	my $context = shift;
	my $url = shift;
	
	if ($#main::kurls == -1)
	{
		open(DATEI, "<$main::urlstxt") or &ende($context, "Kann Logfile-Datei $main::urlstxt nicht oeffnen");
		while(<DATEI>)
		{
			chomp;
			push(@main::kurls, $_);			
		}
		close(DATEI) or &ende($context, "Kann Logfile-Datei $main::urlstxt nicht schliessen");
		
		push(@main::kurls, 'dummy');
		
	}
	
	for (@main::kurls)
	{
		return 1 if ($_ eq $url);
	}
	return 0;
}

sub make_known
{
	my $context = shift;
	my $url = shift;
	unless (&url_known($context, $url))
	{
		push(@main::kurls, $url);
	}
}

sub save_known_urls
{
	my $context = shift;

		open(DATEI, ">$main::urlstxt") or &ende($context, "Kann Logfile-Datei $main::urlstxt nicht oeffnen");
		for(@main::kurls)
		{
			print DATEI $_, "\n"
		}
		close(DATEI) or &ende($context, "Kann Logfile-Datei $main::urlstxt nicht schliessen");
}

sub known_urls_recursive
{
	my $context = shift;
	my $data = shift;
	
	my @keys = keys %$data;
	
	my $known = 1;
	
	for (@keys)
	{
		$known = ($known and &url_known($context, $_));
	}
	return $known;
}	

sub init
{
	my $context = shift;
	

	chdir ($main::verzeichnis) or &ende($context, "Kann nicht nach $main::Verzeichnis wechseln");
}

sub ende
{
	my $context = shift;
	my $msg = shift;
	die $msg;
}