use strict; use Tk; use Tk::Table; use Tk::Font; use Time::Local; use URI; my $context = {}; @main::kurls = (); print "Starte Logfile-Analyse."; &config($context); &init($context); &url_known($context,'dummy'); my $data = []; &rawdata_read($context, $data); my $mw = MainWindow->new(); $context->{mw} = $mw; $mw->Label(-text => 'lf2', -height => 2, -font => $ mw->fontCreate(-family => "Georgia", -size => 14, -weight => 'bold'))->pack(); $mw->Button(-text => 'PageImpressions und Visits', -padx => 10, -pady => 10, -command => sub {&dlg_pv($context, $data)})->pack(-fill => 'x'); $mw->Button(-text => 'Referrer-Analyse', -padx => 10, -pady => 10, -command => sub {&dlg_refer($context, $data)})->pack(-fill => 'x'); $mw->Button(-text => 'Unterseiten', -padx => 10, -pady => 10, -command => sub {&dlg_unterseiten($context, $data)})->pack(-fill => 'x'); MainLoop(); &save_known_urls($context); sub dlg_unterseiten { my $context = shift; my $data = shift; my %unterseiten; foreach my $r (@$data) { unless (($r->{command} =~ /GET/) and ($r->{command} =~ /(\.shtml )|(\.html )|(\.htm )|(\.pl )|(\.cgi )/)) { next; } my $subpage = $r->{command}; unless ($subpage =~ /^GET (.+?) HTTP(.+?)$/) { die "Kann die GET-Anweisung $subpage nicht parsen, obwohl das möglich sein sollte"; } $subpage = $1; unless (exists $unterseiten{$subpage}) { $unterseiten{$subpage} = 0; } $unterseiten{$subpage}++; } my @sites = sort {$unterseiten{$b} <=> $unterseiten{$a}} (keys %unterseiten); my $r_days; # = {pageviews => 0, visits => 0, ips => {}}; my $dlg = $context->{mw}->Toplevel(-width => 600, -height => 600); $dlg->title('Unterseiten'); $dlg->Label(-text => 'Unterseiten', -height => 2, -font => $dlg->fontCreate(-family => "Verdana", -size => 14, -weight => 'bold'))->pack(); my $table = $dlg->Table(-rows => $#sites, -columns => 4, -height => 600, -width => 600, ); my $labelize = sub {my $parent = shift; my $text = shift; my $bold = shift; my $data = shift; my $label = $parent->Label(-text => $text, -justify => 'left'); my $url = ($text =~ /^\/awd\/content/) ? 'http://www.aboutwebdesign.de' : 'http://www.webdesign-referenz.de'; $url .= $text; $label->bind('', sub {&startproc($url)}); if ($bold) {$label->configure(-font => $parent->fontCreate(-weight => 'bold', -size => 10))} return $label;}; $table->put(0,0,&$labelize($table, 'Site', 1)); $table->put(0,1,&$labelize($table, 'PI', 1)); for (my $j = 0; $j <= (($#sites < 200) ? $#sites : 200); $j++) { $table->put($j+1,0,&$labelize($table, $sites[$j], 0)); #$table->put($j+1,0,$sites[$j]); #$table->put($j+1,1,"stumpel"); $table->put($j+1,1,&$labelize($table, $unterseiten{$sites[$j]}, 0 )); } $table->pack(); } sub dlg_refer { my $context = shift; my $data = shift; my %referer; foreach my $r (@$data) { unless (($r->{command} =~ /GET/) and ($r->{command} =~ /(\.shtml )|(\.html )|(\.htm )|(\.pl )|(\.cgi )/)) { next; } my $ref = $r->{referrer}; my $found = 0; for (@$main::selfurls) { $found = 1 if (index($ref, $_) > -1); $found = 1 if (($ref eq '-') or ($ref eq '')); } next if $found; my $baseurl; eval { my $URL = new URI($ref); $baseurl = $URL->host(); }; next if ($@); unless (exists $referer{$baseurl}) { $referer{$baseurl} = {}; } $referer{$baseurl}->{c}++; $referer{$baseurl}->{urls}->{$ref}++; } my @sites = sort {return -1 if ($b =~ /google/); return 1 if ($a =~ /google/); ($referer{$b}->{c} <=> $referer{$a}->{c})} (keys %referer); my $r_days; # = {pageviews => 0, visits => 0, ips => {}}; my $dlg = $context->{mw}->Toplevel(-width => 600, -height => 600); $dlg->title('Referrer'); $dlg->Label(-text => 'Referrer', -height => 2, -font => $dlg->fontCreate(-family => "Verdana", -size => 14, -weight => 'bold'))->pack(); my $table = $dlg->Table(-rows => $#sites, -columns => 4, -height => 600, -width => 600, ); print $#sites; my $labelize = sub {my $parent = shift; my $text = shift; my $bold = shift; my $data = shift; my $label = $parent->Label(-text => $text, -justify => 'left'); if (defined $data) {$label->bind('', sub {&dlg_refer_detail($context, $data)});} if ($bold) {$label->configure(-font => $parent->fontCreate(-weight => 'bold', -size => 10))} return $label;}; $table->put(0,0,&$labelize($table, 'Site', 1)); $table->put(0,1,&$labelize($table, 'PI', 1)); for (my $j = 0; $j <= $#sites; $j++) { $table->put($j+1,0,&$labelize($table, $sites[$j], (not &known_urls_recursive($context,$referer{$sites[$j]}->{urls} )) , $referer{$sites[$j]}->{urls} )); #$table->put($j+1,0,$sites[$j]); #$table->put($j+1,1,"stumpel"); $table->put($j+1,1,&$labelize($table, $referer{$sites[$j]}->{c}, 0 )); } $table->pack(); } sub dlg_refer_detail { my $context = shift; my $data = shift; my $dlg = $context->{mw}->Toplevel(-width => 600, -height => 600); $dlg->title('Referrer'); $dlg->Label(-text => 'Referrer', -height => 2, -font => $dlg->fontCreate(-family => "Verdana", -size => 14, -weight => 'bold'))->pack(); my @urls = sort {$data->{$b} <=> $data->{$a}} (keys %$data); my $table = $dlg->Table(-rows => $#urls+2, -columns => 4, -height => 600, -width => 600, ); my $labelize = sub {my $parent = shift; my $text = shift; my $bold = shift; my $lt = $text; my $label = $parent->Label(-text => $lt, -justify => 'left'); $label->bind('', sub {&make_known($context, $text); &startproc("$text")}); if ($bold) {$label->configure(-font => $parent->fontCreate(-weight => 'bold', -size => 10))} return $label;}; $table->put(0,0,&$labelize($table, 'URL', 1)); $table->put(0,1,&$labelize($table, 'PI', 1)); print "Anzahl Suburls: ", $#urls, "\n\n"; for (my $j = 0; $j <= $#urls; $j++) { print $urls[$j], "\n"; print $data->{$urls[$j]}, "\n\n"; $table->put($j+1,0,&$labelize($table, $urls[$j], not &url_known($context, $urls[$j]) )); #$table->put($j+1,0,$sites[$j]); #$table->put($j+1,1,"stumpel"); $table->put($j+1,1,&$labelize($table, $data->{$urls[$j]}, 0)); } $table->pack(); } sub dlg_pv { my $context = shift; my $data = shift; my $r_days; # = {pageviews => 0, visits => 0, ips => {}}; foreach my $r (@$data) { my $d = $r->{date}; unless ($d =~ /(\d\d)\/(\w+?)\/(\d\d\d\d)\:/) { &ende($context, "Kann Datum $d nicht zerlegen"); } my $date = $1.".".$2.".".$3; my %months = qw/Jan 0 Feb 1 Mar 2 Apr 3 May 4 Jun 5 Jul 6 Aug 7 Sep 8 Oct 9 Nov 10 Dec 11/; unless (exists $r_days->{$date}) { $r_days->{$date}->{unixtime} = timelocal(1,1,1,$1,$months{$2},$3 - 1900); } if (($r->{command} =~ /GET/) and ($r->{command} =~ /(\.shtml )|(\.html )|(\.htm )|(\.pl )|(\.cgi )/)) { $r_days->{$date}->{pageviews}++; unless (exists $r_days->{$date}->{ips}->{$r->{client}}) { $r_days->{$date}->{visits}++; $r_days->{$date}->{ips}->{$r->{client}} = 1; } } } my $dlg = $context->{mw}->Toplevel(-width => 600, -height => 600); $dlg->title('Pageimpressions und Visits'); $dlg->Label(-text => 'Pageimpressions und Visits', -height => 2, -font => $dlg->fontCreate(-family => "Verdana", -size => 14, -weight => 'bold'))->pack(); my @days = sort {$r_days->{$a}->{unixtime} <=> $r_days->{$b}->{unixtime}} (keys %$r_days); my $table = $dlg->Table(-rows => $#days*2+2, -columns => 4, -height => 600, -width => 600, ); my $labelize = sub {my $parent = shift; my $text = shift; my $bold = shift; my $label = $parent->Label(-text => $text); if ($bold) {$label->configure(-font => $parent->fontCreate(-weight => 'bold', -size => 10))} return $label;}; $table->put(0,1,&$labelize($table, 'Pageimpressions', 1)); $table->put(0,2,&$labelize($table, 'Visits', 1)); $table->put(0,3,&$labelize($table, 'Ratio', 1)); for (my $j = 0; $j <= $#days; $j++) { $table->put($j*2+1,0,&$labelize($table, $days[$j], 1 )); #print $days[$j], "\n"; $table->put($j*2+1,1,&$labelize($table, $r_days->{$days[$j]}->{pageviews},0)); $table->put($j*2+1,2,&$labelize($table, $r_days->{$days[$j]}->{visits},0)); $table->put($j*2+1,3,&$labelize($table, sprintf('%.1f',($r_days->{$days[$j]}->{pageviews}/$r_days->{$days[$j]}->{visits})),0)); } $table->pack(); } sub rawdata_read { my $context = shift; my $aref = shift; chdir($main::logdir) or &ende($context, "Kann nicht nicht $main::logdir wechseln"); my @files = <*.txt>; foreach my $file (@files) { open(DATEI, "<$file") or &ende($context, "Kann Logfile-Datei $file nicht oeffnen"); while() { my $line = $_; my $linehash = {}; $linehash = &linetohash($context, $line); push(@$aref, $linehash); } close(DATEI) or &ende($context, "Kann Logfile-Datei $file nicht schliessen"); } chdir('..') or &ende($context, "Kann nicht ins Hauptverzeichnis zurückwechseln"); } sub linetohash { my $context = shift; my $line = shift; # Folgender Code ist explizit zur Strato-Logfile-Analyse vorgesehen # Wer ein anderes Format verwendet, mag ihn gern umschreiben my $r = {}; my @e = ($line =~ /^(\S+) \- \- \[(.+?)\] "(.+?)" (\d+?) ([\-\d]+?) "(.*?)" "(.*?)"/); if ($#e == -1) { print "PROBLEM: \t $line\n\n"; return; } my @elems = qw(client date command code byte referrer browser); my $i = -1; for (@elems) { $i++; $r->{$_} = $e[$i]; } return $r; } sub config { my $context = shift; $main::verzeichnis = "C:\\EigeneDateien\\Perl\\lf2"; $main::logdir = "input"; $main::selfurls = ['http://www.aboutwebdesign.de', 'http://www.webdesign-referenz.de']; $main::urlstxt = $main::verzeichnis."\\urls.txt"; } sub startproc { my $cmd = shift; use Win32::Process; use Win32; sub ErrorReport{ print Win32::FormatMessage( Win32::GetLastError() ); } my $ProcessObj; Win32::Process::Create($ProcessObj, "C:\\Programme\\Mozilla Firefox\\firefox.exe", "firefox $cmd", 0, NORMAL_PRIORITY_CLASS, ".")|| die ErrorReport(); } sub url_known { my $context = shift; my $url = shift; if ($#main::kurls == -1) { open(DATEI, "<$main::urlstxt") or &ende($context, "Kann Logfile-Datei $main::urlstxt nicht oeffnen"); while() { chomp; push(@main::kurls, $_); } close(DATEI) or &ende($context, "Kann Logfile-Datei $main::urlstxt nicht schliessen"); push(@main::kurls, 'dummy'); } for (@main::kurls) { return 1 if ($_ eq $url); } return 0; } sub make_known { my $context = shift; my $url = shift; unless (&url_known($context, $url)) { push(@main::kurls, $url); } } sub save_known_urls { my $context = shift; open(DATEI, ">$main::urlstxt") or &ende($context, "Kann Logfile-Datei $main::urlstxt nicht oeffnen"); for(@main::kurls) { print DATEI $_, "\n" } close(DATEI) or &ende($context, "Kann Logfile-Datei $main::urlstxt nicht schliessen"); } sub known_urls_recursive { my $context = shift; my $data = shift; my @keys = keys %$data; my $known = 1; for (@keys) { $known = ($known and &url_known($context, $_)); } return $known; } sub init { my $context = shift; chdir ($main::verzeichnis) or &ende($context, "Kann nicht nach $main::Verzeichnis wechseln"); } sub ende { my $context = shift; my $msg = shift; die $msg; }