💾 Archived View for gmn.clttr.info › sources › photo-stats.git › tree › phosta.txt captured on 2021-12-03 at 14:04:38.

View Raw

More Information

➡️ Next capture (2023-01-29)

-=-=-=-=-=-=-

#!/usr/bin/perl
# Copyright(c) René Wagner 2019-2020
# https://src.clttr.info/rwa/photo-stats
# published under BSD 3-Clause License

use Modern::Perl '2019';
no warnings 'experimental';
use POSIX 'lround';
use Text::SimpleTable::AutoWidth;
use Term::ANSIColor;
use Scalar::Util 'looks_like_number';
use DBI;
use Cwd;
use Getopt::Std;
use File::Spec::Functions;
use File::HomeDir;
use Data::Dumper;

$Getopt::Std::STANDARD_HELP_VERSION = 'true';

my $VERSION = '1.1';
my $PROGRAM = 'Photo Stats';

my $configfile = catfile(File::HomeDir->my_home, '.phosta.conf');
my $aliasfile = catfile(File::HomeDir->my_home, '.phosta.alias');

# read commandline switches
our $opt_E='jpg,jpeg';
our $opt_D=getcwd . "/photo_stats.db";
our $opt_p='';
our $opt_t=undef;
our $opt_c=0;
our $opt_s=undef;
our $opt_g=undef;
our $opt_v=0;
our $opt_n=undef;
our $opt_o='count';
our $opt_r=0;
our $opt_f=undef;
our $opt_e=0;
our $opt_W=80;
our $opt_T='auto';

getconfig();

getopts('vcrp:n:g:s:t:D:E:o:f:eW:T:') or die "Invalid parameters provided! See 'phosta --help' for more details.";
validate() or die "Invalid parameters provided! See 'phosta --help' for more details.";
my $dsn = "DBI:SQLite:dbname=$opt_D";

if ($opt_c) { unlink $opt_D; }
if ( !-e $opt_D ) { create_db($opt_D) or die 'database could not be created'; }

writeconfig();

if ($opt_p ne '') { 
    populate();
    exit 0;
}

query();

exit 0;

sub validate
{
    my @group_params = ('year', 'month', 'week', 'hour');
    !defined($opt_g) || grep /$opt_g/, @group_params or return 0;

    my @select_params = qw(file maker model lensmake lens aperture exposuretime iso focallength focallength35mm);
    if (defined($opt_s))
    {
        foreach (split (/,/, $opt_s))
        {
            grep /$_/, @select_params or return 0;
        }
    }

    my @order_params = (@group_params, @select_params, 'count');
    if (defined($opt_o))
    {
        foreach (split (/,/, $opt_o))
        {
            grep /$_/, @order_params or return 0;
        }
    }
    (!defined($opt_n) || (looks_like_number($opt_n) && $opt_n > 0)) or return 0;
    (looks_like_number($opt_W) && $opt_W >= 50) or return 0;

    !defined($opt_t) || $opt_t =~ /^([0-9]{8}){0,1}\-([0-9]{8}){0,1}$/ or return 0;

    $opt_E =~ /^([a-z]{2,4}){1,}(\,[a-z]{2,4}){0,}$/ or return 0;

    !defined($opt_f) || $opt_f =~ /^(,{0,1}[\w\-]{2,}(!){0,1}=[\w\-\s]{1,}){1,}$/ or return 0;

    my @table_params = qw(on off auto);
    $opt_T ~~ @table_params or return 0;
    return 1;
}

sub getconfig
{
	if ( -f $configfile ) { do $configfile; }
}

sub writeconfig
{
    open(my $filehandle, '>', $configfile) or die "Could not open file '$configfile': $!";
    say $filehandle '$opt_D="'. $opt_D .'";';
    say $filehandle '$opt_E="'. $opt_E .'";';
    say $filehandle '$opt_W="'. $opt_W .'";';
	say $filehandle '$opt_T="'. $opt_T .'";';
	close $filehandle;
}

sub getaliases
{
    my @aliases;
    if ( -f $aliasfile )
    {
        open(FH, '<', $aliasfile) or die "Could not read file '$aliasfile': $!";

        while(<FH>)
        {
            chomp $_;
            my @line = split /\t{1,}/, $_;
            push @aliases, \@line;
        }

        close(FH);
    }

    if ( $opt_v )
    {
        say '### Aliases found';
        foreach (@aliases) { say join(' -> ', @$_) }
    }

    return @aliases;
}

sub trim { my $s = shift; $s =~ s/^\s+|\s+$//g; return $s };

sub create_db
{
    my ($dbfile) = @_;
    my $dbh = DBI->connect($dsn, '', '', { RaiseError => 1 }) or die $DBI::errstr;

    my $stmt = 'CREATE TABLE photos (file TEXT PRIMARY KEY, maker TEXT, model TEXT, lensmake TEXT, lens TEXT, focallength INTEGER, focallength35mm INTEGER, aperture DECIMAL, exposuretime TEXT, iso INTEGER, flash TEXT, datetimeoriginal DATETIME);';
    !$opt_v or say "### DB Statement: $stmt";
    my $rv = $dbh->do($stmt);
    $dbh->disconnect();
}

sub populate
{
    say "Scanning $opt_p images...this may take a while...";
   
    my $extensions = ' -ext ' . join(' -ext ', split(/\,/, $opt_E)) .' ';
    my @aliases = getaliases();
    
    my $cmd = "exiftool -fast2 -r -m -f -p '\$filepath##\$make##\$model##\$lensmake##\$lens##\$lensmodel##\$focallength##\$focallengthin35mmformat##\$aperture##\$exposuretime##\$shutterspeed##\$iso##\$flash##\$datetimeoriginal' -d \"%Y-%m-%d %H:%M:%S\" " . $extensions . "\"$opt_p\"";
    !$opt_v or say "### Exiftool command: $cmd";
    my @lines = `$cmd`;

    my $dbh = DBI->connect($dsn, '', '', { RaiseError => 1, AutoCommit => 0 }) or die $DBI::errstr;
    my $errorcount = 0;
    foreach (@lines)
    {
        chomp $_;
        my @line = split(/#/, $_); 

        map { s/^\s+|\s+$//g; } @line;
        my ($file, $maker, $model, $lensmake, $lens, $lensmodel, $fl, $fl35, $apert, $exposuretime, $ss, $iso, $flash, $datetimeoriginal) = @line;
        my @forbidden_content = ('Unknown', 'N/A', '', '-');
		if (!defined($maker) || $maker ~~ @forbidden_content) { $maker = '-'};
        if (!defined($model) || $model ~~ @forbidden_content) { $model = '-'};
		if (!defined($lens) || $lens ~~ @forbidden_content) { $lens = $lensmodel };
        if (!defined($lens) || $lens ~~ @forbidden_content) { $lens = '-' };
		$apert = ($apert ne '-') ? sprintf("%.1f", $apert) : $apert;
        $fl = ($fl ne '-') ? sprintf("%.1f", $fl) : $fl;
        $fl35 = ($fl35 ne '-') ? POSIX::lround($fl35) : $fl35;
        $exposuretime = (looks_like_number($exposuretime) && $exposuretime < 1 && $exposuretime != 0) ? "1/". POSIX::lround(1/$exposuretime) : $exposuretime;
        $datetimeoriginal =~ /\d{4}\-\d{2}\-\d{2}\s\d{2}:\d{2}:\d{2}$/ or $datetimeoriginal = '-';

        foreach (@aliases)
        {
            $lens =~ s/@$_[0]/@$_[1]/;
            $maker =~ s/@$_[0]/@$_[1]/;
            $model =~ s/@$_[0]/@$_[1]/;
        }

        my $stmt = "INSERT OR REPLACE INTO photos (file, maker, model, lensmake, lens, focallength, focallength35mm, aperture, exposuretime, iso, flash, datetimeoriginal)
               VALUES ('$file', '$maker', '$model', '$lensmake', '$lens', '$fl', '$fl35', '$apert', '$exposuretime', '$iso', '$flash', '$datetimeoriginal')"; 
        my $rv = $dbh->do($stmt) or $errorcount++; 
    }
    $dbh->commit();
    $dbh->disconnect();
    say sprintf('%5d', $errorcount). ' image files skipped due to errors';
    say "Updated database $opt_D.";
}

sub get_timerange
{
    my ($from, $to) = split(/\-/, $opt_t);
     
    $from = $from ne '' ? substr($from, 0, 4) .'-'. substr($from, 4, 2) .'-'. substr($from, 6, 2) : undef;
    $to = $to ne '' ? substr($to, 0, 4) .'-'. substr($to, 4, 2) .'-'. substr($to, 6, 2) : undef;

    return ($from, $to);
}

sub get_filters
{
    if (defined($opt_f))
    {
        my @returnarray;
        my (@filters) = split /\,/, $opt_f;
        foreach (@filters)
        {
            my @line = split /(!{0,1}=)/, $_;
            push @returnarray, \@line;
        }
        return @returnarray;
    }
    return;
}

sub get_sql
{
    my $fieldlist = '';
    my @grouparray;
    my $orderlist = '';
    my @wherearray; 
    given ($opt_g) 
    {
        when ('month')
        {
            $fieldlist = "IFNULL(strftime('%Y/%m', datetimeoriginal), '-') as month,";
            push @grouparray, "strftime('%Y/%m', datetimeoriginal)";
        }
        when ('week')
        {
            $fieldlist = "IFNULL(strftime('%Y/%W', datetimeoriginal), '-') as week,";
            push @grouparray, "strftime('%Y/%W', datetimeoriginal)";
        }
        when ('year')
        {
            $fieldlist = "IFNULL(strftime('%Y', datetimeoriginal), '-') as year,";
            push @grouparray, "strftime('%Y', datetimeoriginal)";
        }
        when ('hour')
        {
            $fieldlist = "IFNULL(strftime('%H', datetimeoriginal), '-') as hour,";
            push @grouparray, "strftime('%H', datetimeoriginal)";
        }
    }

    if (defined($opt_s))
    {
        $fieldlist = $fieldlist . $opt_s. ', ';
        push @grouparray, $opt_s; 
    }

    if (defined($opt_o))
    {
        my $sorter = $opt_r ? ' ASC' : ' DESC';
        my @order = split(/\,/, $opt_o);
        $orderlist = ' ORDER BY '. join("$sorter, ", @order) .$sorter;
    }

    if (defined($opt_t))
    {
        my ($from, $to) = get_timerange();
        !defined($from) or push @wherearray, "datetimeoriginal >= '$from'";
        !defined($to) or push @wherearray, "datetimeoriginal <= '$to'"; 
    }


    foreach (get_filters())
    {
        push @wherearray, @{$_}[0] . (@{$_}[1] =~ /!/ ? ' NOT' : '')  . " LIKE '%@{$_}[2]%'";
    }

    my $wherelist = '';
    my $grouplist = '';
    if (scalar @grouparray) { $grouplist = 'GROUP BY '. join(', ', @grouparray); }
    if (scalar @wherearray) { $wherelist = 'WHERE ' . join(' AND ',  @wherearray); }

    return "SELECT $fieldlist count(file) as count FROM photos $wherelist $grouplist $orderlist";
}

sub query
{
    my $dbh = DBI->connect($dsn, '', '', { RaiseError => 1 }) or die $DBI::errstr;

    my $total_count = $dbh->selectrow_array("SELECT count(file) from photos");

    say "Querying database $opt_D with $total_count entries...";
    say '';
    $total_count > 0 or return;
    my $sql = get_sql();
    !$opt_v or say '### SQL Statement: '. $sql;
    my $stmt = $dbh->prepare($sql);
    $stmt->execute();
    
    my $rows = $stmt->fetchall_arrayref;
    my $headers = $stmt->{NAME};
    $dbh->disconnect();

    push @$headers, 'percent';
    my $sum = 0;
    foreach (@$rows)
    {
        $sum += @$_[$#$_];
    }
    foreach (@$rows)
    {
        my $perc = $sum > 0 ? sprintf("%.1f", (@$_[$#$_] / $sum)* 100). '%' : '';
        push @$_, $perc;
    }

    my ($limited_rows, $skippedlines) = limit_results($rows);

    print_filterinfo(scalar @$limited_rows, $skippedlines, $sum);
    if ($opt_T eq 'off' || (@$headers == 3 and $opt_T eq 'auto') )
    {
        result_to_graph($headers, $limited_rows, $sum);
    }
    else
    {
        result_to_table($headers, $limited_rows, $sum); 
    }
}

sub limit_results
{
    my ( $rows ) = @_;
    my $skippedlines = 0;
    my $currentlines = 0;
    my @limited_rows;

    foreach (@$rows)
    {
        if (!defined($opt_n) || ($currentlines < $opt_n))
        {
            if ($opt_e)
            {
                my $skip = 1;
                for (my $i=0; $i < (scalar @$_ - 2); $i++)
                {
                    @$_[$i] eq '-' or $skip = 0;
                }
                if ($skip)
                {
                    $skippedlines++;
                    next;
                }
            }
            push @limited_rows, $_;
            $currentlines++;
        }
        else
        {
            $skippedlines++;
        }
    }
    return (\@limited_rows, $skippedlines);
}

sub print_filterinfo
{
    my ($currentlines, $skippedlines, $sum) = @_;
    if (defined($opt_f) || defined($opt_t))
    {
        say 'Applied filters:';
        if ($opt_t)
        {
            my ($from, $to) = get_timerange();
            if (defined($from) && defined($to)) { say '   - images taken between '. color('bold'). $from .color('reset') .' and '. color('bold'). $to . color('reset'); }
            if (defined($from) && !defined($to)) { say '   - images taken since '. color('bold'). $from . color('reset'); }
            if (defined($to) && !defined($from)) { say '   - images taken till '. color('bold'). $to . color('reset'); }
        }
        foreach (get_filters())
        {
            say '   - ' .color('italic') . @{$_}[0] .color('reset') . (@{$_}[1] =~ /!/ ? ' NOT' : '')  . ' LIKE ' .color('bold') .@{$_}[2]. color('reset');;
        }
        say '';
    }

    if (defined($opt_n) || $opt_e) 
    {
        say 'Showing '. color('bold'). "$currentlines". color('reset'). ' results, skipping '. color('italic') . "$skippedlines lines".color('reset') .'.';
    }
    say 'A total of '. color('bold'). "$sum images". color('reset') .' matched your criteria.';
    say '';
}

sub result_to_graph
{
    my ( $headers, $rows, $sum ) = @_;

    my $barwidth = POSIX::lround(($opt_W - 17) * 0.7);
    my $titlewidth = POSIX::lround(($opt_W - 17) * 0.3);
    my $chartformat = '%'.$titlewidth.'.'.$titlewidth.'s |%-'.$barwidth.'s| %5s (%s)';

    my $countcolumn = @$headers - 2;
    my $percentcolumn = @$headers - 1;

    foreach (@$rows)
    {
        # join columns for display before the chart
        if ( @$headers >= 4)
        {
            # TODO: give remaining columns more room if first columns are shorter
            # TODO: remove trailing whitespace
            my $titlecolumns = @$headers - 2;
            my $columnwidth = POSIX::lround($titlewidth / $titlecolumns);
            my $title = '';
            for (my $i = 0; $i < $titlecolumns; $i++)
            {
                $title = $title . ((length(@$_[$i]) > $columnwidth) ? substr(@$_[$i], 0, $columnwidth-1).'~ ' : @$_[$i] . ' '); 
            }
            @$_[0] = $title;
        }
        say sprintf($chartformat, @$_[0], "*"x(50/$sum*@$_[$countcolumn]), @$_[$percentcolumn], @$_[$countcolumn]);
    }
}

sub result_to_table
{
    my ( $headers, $rows, $sum, $skippedlines ) = @_;

    my $tb = Text::SimpleTable::AutoWidth->new('max_width' => $opt_W);
   
    $tb->captions($headers);
    foreach (@$rows)
    {
        $tb->row(@$_);
    }
   
    if (scalar @$rows) { say $tb->draw; } 
}

sub main::VERSION_MESSAGE()
{
	say $PROGRAM . ' - version ' . $VERSION;
	say 'published under BSD 3 - Clause License';
	say 'Copyright(c) 2019-2020 René Wagner';
	say 'https://git.sr.ht/~rwa/photo-stats';
}

sub main::HELP_MESSAGE
{
	say '';
	say 'EXIF data extraction for media tools and stats querying for your terminal.';
	say '';
	say 'usage: phosta [options]';
	say '';
	say 'Options with upper-case letters are automatically saved in the user config and can be omitted if no change is required.';
	say '';
	say 'generic options:';
    say '   -D <file>   : path and name of the db file to use, defaults to <workingdir>/photo_stats.db';
    say '   -c          : clear the database';
    say '   -v          : be verbose - print some debug output';
    say '   --help      : show this help';
    say '';
    say 'data gathering:';
    say '   -p <folder> : populate database with the EXIF data of the media files in the specified folder';
    say '   -E <ext>    : list of comma separated extensions used for scanning image files, defaults to jpg,jpeg';
    say '                 only media files which match (case-insensitive!) the given extensions are added to the database';
    say '';
    say 'stats querying:';
    say '   -g <period> : group by a time period, defaults to total (which means no grouping by period)';
    say '                 allowed values: year, month, week, hour';
    say '   -s <fields> : specify the information you want to select, defaults to none (just show number of images)';
    say '                 allowed values: file, maker, model, lensmake, lens, aperture, exposuretime, iso, focallength, focallength35mm';
    say '                 multiple fields should be listed comma-separated without whitespaces';
    say '   -t <range>  : only take images into account which have been taken in the given timerange';
    say '                 <range> must be specified as \'YYYYMMDD-YYYYMMDD\', you can omit the first or the later value';
    say '   -f <expr>   : filter images based on the given expression, multiple expression can be given comma-separated';
    say '                 this works as a case-insensitive search, multiple expressions need to match all (AND logic)'; 
    say '                 use "field=value" for CONTAINS and "field!=value" for NOT CONTAINS';
    say '                 allowed fields: file, maker, model, lensmake, lens, aperture, exposuretime, iso, focallength, focallength35mm';
    say '   -n <number> : limit the resultset to <number> of lines';
    say '   -o <fields> : sort your output by the given fields (sequence matters!) in descending order';
    say '                 allowed values: any comma separated combination of the values of -t and -s param and \'count\'';
    say '   -r          : sort in reverse (ascending) order, default is descending order';
    say '   -e          : skip lines with empty selected or grouping fields';
    say '   -W <number> : define max char width of output, default: 80, a minimum of 50 is required for meaningful output';
    say '   -T <value>  : show results as always as a table (on) or not (off) or auto (chart when single field is selected), default is auto';
    say '                 allowed values: auto, on, off';
    say '';
    say 'examples:';
    say '   phosta -E jpg,jpeg,tiff -D ~/Documents/stats.db -p ~/Pictures';
    say '   load EXIF data of files with the extensions jpg, jpeg and tiff in folder ~/Pictures to the database located in ~/Documents/stats.db';
    say '';
    say '   phosta -s model -g month -t 20190101-20121231 -o month,count';
    say '   show number of pictures taken with a specific camera body in 2019 grouped by month, sorted by newest month first';
    say '';
    say '   phosta -n 10 -s lens -o count'; 
    say '   show top 10 lenses used the most';
    say '';
    say '   phosta -s model -f maker!=ricoh';
    say '   show camera models where the maker field does not contain the term "ricoh"';
}