#! @PERL@ -w # vim:syntax=perl use strict; use lib '@LR_PERL5LIBDIR@'; use Lire::DlfSchema; use Lire::Program qw( :msg :dlf ); use Time::Local; # # set to non-zero only in case of testing. running this script in debug # mode on production-size logs will fill your disk pretty fast. # my $debug = 0; sub debug { $debug and lr_debug($_[0]); } my $lines = 0; my $dlflines = 0; my $errorlines = 0; my $schema = eval { Lire::DlfSchema::load_schema( "database" ); }; lr_err( "error loading database schema: $@" ) if $@; my $dlf_maker = $schema->make_hashref2asciidlf_func( qw/time user remote_host action database query success result connection_id run_id/ ); my $prevtime = ""; my $sqlquery = ""; my $runid; my %sessions; my %hosts; my %databases; my $query = {}; my @queries = (); sub print_dlf { my ($c, $print_only_x ) = @_; my $subroutine = 'print_dlf'; my %dlf = map { $_ => $c->{$_} } qw/time user remote_host action database query success result connection_id/; foreach my $f ( qw/time user remote_host action database query success result connection_id/ ) { $dlf{$f} = $c->{$f}; } my $dlf = $dlf_maker->( \%dlf ); print join( " ", @$dlf ), "\n"; $dlflines++; } sub new_command { debug("new command"); # store previous query (if != null) if ($query->{"time"}) { debug("Storing line: $query"); push(@queries, $query); } # empty temp $query = {}; } sub parse_mysql_line { my ( $line ) = @_; debug("Parsing: $line"); if ($line =~ /mysqld, Version:/i) { # skip startup info line $runid++; } elsif ($line =~ /^Tcp port/i) { # skip startup info line ; } elsif ($line =~ /^Time\s*Id\s*Command\s*Argument/) { # skip header line ; } else { my $query_part = 1; my $time = $prevtime; if ($line =~ s/^(\d\d)(\d\d)(\d\d)\s([\s|\d]\d):(\d\d):(\d\d)\t//) { # new time stamp my ( $year, $month, $day, $hour, $min, $sec ) = ($1, $2, $3, $4, $5, $6); $time = timelocal( $sec, $min, $hour, $day, $month -1, $year ); $prevtime = $time; # no continuation of query new_command(); $query_part = 0; $sqlquery = ""; } elsif ($line =~ /^\t\t[\d ]{8}/) { # no continuation of query new_command(); $query_part = 0; $sqlquery = ""; $line =~ s/^\t\t//; # this is a dirty hack to keep lines in order $time = ++$prevtime; } if ($query_part) { debug("adding query part"); $sqlquery .= $line; $query->{query} = $sqlquery; } else { debug($line); # parse line # # this will raise an error if $line is shorter than 8 chars # we're looking for char 9 .. 18 unless ($line =~ /^[\d ]{8}[a-zA-Z ]{11}/) { # when command is Quit, we have only 11 command-and-space # chars, not 12 # skip these lines die("expected command and session id, i.g. '54 Query'. " . "Found '$line'\n"); } my $command = substr($line, 8, 10); $command =~ s/\s//g; $command = lc($command); my $session_id = substr($line, 0, 7); $session_id =~ s/\s//g; $query->{time} ||= $time; $query->{run_id} ||= $runid; $query->{action} ||= $command; $query->{connection_id} ||= $session_id; debug("time : " . $time); debug("action : " . $command); debug("session_id: " . $session_id); if ($command eq "connect") { # try to deduce the remote_host and user if ($line =~ /(\w*)\x40([\w|\.]*)/) { # user and hostname found my $user = $1; my $remote_host = $2; debug("user : " . $user); debug("remotehost: " . $remote_host); $query->{user} ||= $user; $query->{remote_host} ||= $remote_host; # store user/host for other lines in this session $sessions{$runid . "-" . $session_id} = $user; $hosts{$runid . "-" . $session_id} = $remote_host; } # try to deduce success if ($line =~ /access denied/i) { $query->{success} ||= "no"; } else { $query->{success} ||= "yes"; } } else { $query->{user} ||= $sessions{$runid . "-" . $session_id}; $query->{remote_host} ||= $hosts{$runid . "-" . $session_id}; $query->{database} ||= $databases{$runid . "-" . $session_id}; } if ($command eq "initdb") { # try to deduce the database name my $dbname = substr($line, 20); debug("database : " . $dbname); $query->{database} ||= $dbname; $databases{$runid . "-" . $session_id} = $dbname; } elsif ($command eq "query") { $sqlquery = substr($line, 20); $query->{query} = $sqlquery; } } } } init_dlf_converter( "database" ); $runid = 0; while ( <> ) { chomp; $lines++; eval { parse_mysql_line( $_ ); }; if ($@) { lr_warn( $@ ); lr_warn( "failed to parse '$_'. Skipping." ); $errorlines++; } } # now print all delayed messages not yet sent foreach my $query (@queries) { print_dlf( $query ); } end_dlf_converter( $lines, $dlflines, $errorlines ); __END__ =pod =head1 NAME mysql2dlf - convert mysql logfiles to dlf format =head1 SYNOPSIS B =head1 DESCRIPTION B converts a MySQL query log file to DLF format. Information on MySQL can be found on http://www.mysql.org/. The query log file is defined by the --log[=file] option of B, or the log=file setting in MySQL configuration file. Such a log file looks like e.g. /usr/sbin/mysqld, Version: 3.23.43-debug-log, started with: Tcp port: 3306 Unix socket: /var/run/mysqld/mysqld.sock Time Id Command Argument 011218 15:02:58 1 Connect root@localhost on 1 Query SHOW VARIABLES LIKE 'pid_file' 1 Shutdown 011226 21:35:59 6 Connect joe@localhost on 6 Init DB nmrshiftdb 6 Query SHOW VARIABLES or 42401 Query SELECT bannerID, banner, format, width, height, alt, url FROM banners WHERE act = 'true' AND (keyw = 'fullsize' OR keyw = 'global') The generic database dlf format is described in database.xml. =head1 EXAMPLES To process a log as produced by MySQL: $ mysql2dlf < mysql-log mysql2dlf will be rarely used on its own, but is more likely called by lr_log2report: $ lr_log2report mysql < /var/log/mysql-log =head1 NOTES The parser has been tested for query log files for MySQL 3.23.46 only. =head1 BUGS This converter stores all log files information in one big nested array in memory, before writing. Therefore, memory consumption is linear with the size of the log. =head1 SEE ALSO mysql(1), mysqld(1) =head1 VERSION $Id: mysql2dlf.in,v 1.11 2006/07/23 13:16:33 vanbaal Exp $ =head1 COPYRIGHT Copyright (C) 2001, 2002 Stichting LogReport Foundation LogReport@LogReport.org This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program (see COPYING); if not, check with http://www.gnu.org/copyleft/gpl.html. =head1 AUTHOR Egon Willighagen =cut # Local Variables: # mode: cperl # End: