#!/usr/bin/env perl
use strict;
use warnings;
use v5.20;

use open ":std", ":encoding(UTF-8)";
use Cwd 'abs_path';
use File::Basename;

use AUR::Json qw(write_json);
my $argv0 = 'repo-parse';

# Attributes which (where applicable) match AurJson, such that `aur-repo-parse --json`
# can be piped to `aur-format`.
my %repo_add_attributes = (
    'ARCH'      => 'Arch',       'BASE'         => 'PackageBase',   'BUILDDATE'   => 'BuildDate',
    'CONFLICTS' => 'Conflicts',  'CHECKDEPENDS' => 'CheckDepends',  'CSIZE'       => 'CSize',
    'DEPENDS'   => 'Depends',    'DESC'         => 'Description',   'FILENAME'    => 'FileName',
    'ISIZE'     => 'ISize',      'LICENSE'      => 'License',       'MAKEDEPENDS' => 'MakeDepends',
    'MD5SUM'    => 'Md5Sum',     'NAME'         => 'Name',          'OPTDEPENDS'  => 'OptDepends',
    'PACKAGER'  => 'Packager',   'PROVIDES'     => 'Provides',      'REPLACES'    => 'Replaces',
    'SHA256SUM' => 'Sha256Sum',  'URL'          => 'URL',           'VERSION'     => 'Version',
    'PGPSIG'    => 'PgpSig',     'GROUPS'       => 'Groups',        'FILES'       => 'Files'
);

my %repo_add_array = (
    'GROUPS'  => 1, 'LICENSE'    => 1, 'REPLACES'    => 1, 'CONFLICTS'    => 1, 'PROVIDES' => 1,
    'DEPENDS' => 1, 'OPTDEPENDS' => 1, 'MAKEDEPENDS' => 1, 'CHECKDEPENDS' => 1, 'FILES'    => 1
);

# md5sum and sha256sum are numeric values, but too large to be represented in int32/64.
my %repo_add_numeric = (
    'BUILDDATE' => 1, 'CSIZE' => 1, 'ISIZE' => 1
);

sub handler_search {
    my ($handler, $expr, $data, @args) = @_;

    if (length($expr) and defined($data)) {
        if (ref($data) eq 'ARRAY') {
            # Search entry-by-entry on arrays
            $handler->(@args) if grep(/$expr/, @{$data});
        } elsif ($data =~ /$expr/) {
            $handler->(@args);
        } else {
            return 0;
        }
    } elsif (defined($data)) {
        $handler->(@args);
    }
    return 1;
}

sub parse_db {
    my ($fh, $db_path, $db_name, $first, $handler, $search_expr, $search_label, @varargs) = @_;
    my $count = 0;
    my ($entry, $filename, $attr, $attr_label);

    while (my $row = <$fh>) {
        chomp($row);

        if ($row =~ /^%\Q$first\E%$/) {
            $filename = readline($fh);
            chomp($filename);

            # Evaluate condition on previous entry and run handler
            my @handler_args = ($entry, $count, 0, @varargs);

            if ($count > 0) {
                $count++ if handler_search($handler, $search_expr, $entry->{$search_label}, @handler_args);
            } else {
                $count++;
            }
            # New entry in the database (hashref)
            %{$entry} = ();
            $entry->{'DBPath'} = $db_path;
            $entry->{'Repository'} = $db_name;
            $entry->{$repo_add_attributes{$first}} = $filename;
        }
        elsif ($row =~ /^%.+%$/) {
            if (not length($filename)) {
                die "$argv0: attribute '$first' not set";
            }
            $attr = substr($row, 1, -1);
            $attr_label = $repo_add_attributes{$attr};

            if (not defined $attr_label) {
                warn "$argv0: unknown attribute '$attr'";
                $attr_label = ucfirst(lc($attr));
            }
        }
        elsif ($row eq "") {
            next;
        }
        else {
            die unless length($attr) and length($attr_label);

            if (defined($repo_add_numeric{$attr})) {
                $entry->{$attr_label} = $row + 0; # integer type
            } elsif (defined($repo_add_array{$attr})) {
                push(@{$entry->{$attr_label}}, $row); # array type
            } else {
                $entry->{$attr_label} = $row; # string type
            }
        }
    }
    # Process last entry
    my @handler_args = ($entry, $count, 1, @varargs);

    if ($count > 0) {
        handler_search($handler, $search_expr, $entry->{$search_label}, @handler_args);
    }
    return $count;
}

sub repo_json {
    my ($pkg, $count, $last) = @_;
    my $json_text = write_json($pkg);

    print "[" if ($count == 1);
    print "," if ($count > 1);

    print $json_text;
    print "]\n" if $last;
}

sub repo_jsonl {
    my ($pkg, $count, $last) = @_;
    my $json_text = write_json($pkg);

    print $json_text . "\n";
}

sub repo_list {
    my ($pkg, $count, $last, $delim, $quiet) = @_;

    my $name = $pkg->{'Name'};
    my $pver = $pkg->{'Version'};

    if ($quiet) {
        say $name;
    } else {
        say join($delim, $name, $pver);
    }
}

sub repo_table {
    my ($pkg, $count, $last, $delim) = @_;

    my $name = $pkg->{'Name'};
    my $base = $pkg->{'PackageBase'};
    my $pver = $pkg->{'Version'};

    say join($delim, $name, $name, $base, $pver, 'Self');

    for my $key ('Depends', 'MakeDepends', 'CheckDepends') {
        if (ref($pkg->{$key}) eq 'ARRAY') {
            map { say join($delim, $name, $_, $base, $pver, $key) } @{$pkg->{$key}};
        }
    }
}

sub repo_attr {
    my ($pkg, $count, $last, $attr) = @_;
    my $value = $pkg->{$repo_add_attributes{$attr}};

    if (defined($value) and ref($value) eq 'ARRAY') {
        say join("\n", @{$value});
    } elsif (defined($value)) {
        say $value;
    }
}

unless (caller) {
    use Getopt::Long;
    my $opt_json = 0;
    my $opt_jsonl = 0;
    my $opt_list = 0;
    my $opt_list_attr = 0;
    my $opt_table = 0;
    my $opt_quiet = 0;
    my $opt_delim;
    my $opt_attr = "";
    my @opt_db_path;
    my $opt_stdin = 0;
    my $opt_search = "";
    my $opt_search_by = 'Name';

    # optional arguments
    GetOptions('J|json'      => \$opt_json,
               'jsonl'       => \$opt_jsonl,
               'F|attr=s'    => \$opt_attr,
               'l|list'      => \$opt_list,
               'q|quiet'     => \$opt_quiet,
               't|table'     => \$opt_table,
               'd|delim'     => \$opt_delim,
               'list-attr'   => \$opt_list_attr,
               'p|path=s'    => \@opt_db_path,
               's|search=s'  => \$opt_search,
               'search-by=s' => \$opt_search_by)
        or exit(1);

    if (scalar(@ARGV) > 0 and ($ARGV[0] eq "-" or $ARGV[0] eq "/dev/stdin")) {
        $opt_stdin = 1;
    }
    elsif ($opt_list_attr) {
        say(join("\n", sort(keys(%repo_add_attributes))));
        exit(0);
    }
    elsif (scalar(@opt_db_path) < 1 and not $opt_stdin) {
        say STDERR $argv0 . ': repository path must be specified';
        exit(1);
    }

    # Callback function run on each entry in the database
    my ($handler, @varargs);

    if ($opt_json) {
        $handler = \&repo_json;
    }
    elsif ($opt_jsonl) {
        $handler = \&repo_jsonl;
    }
    elsif (length($opt_attr) and defined($repo_add_attributes{$opt_attr})) {
        $handler = \&repo_attr;
        @varargs = $opt_attr;
    }
    elsif (length($opt_attr)) {
        say STDERR "$argv0: unknown attribute '$opt_attr'";
        exit(2);
    }
    elsif ($opt_list) {
        $handler = \&repo_list;
        @varargs = (length($opt_delim) ? $opt_delim : "\t", $opt_quiet);
    }
    elsif ($opt_table) {
        $handler = \&repo_table;
        @varargs = length($opt_delim) ? $opt_delim : "\t";
    }
    else {
        say STDERR $argv0 . ': no mode specified';
        exit(1);
    }

    # Verify search field
    my $opt_search_attr = $repo_add_attributes{uc($opt_search_by)};

    if (not defined $opt_search_attr) {
        say STDERR "$argv0: unknown attribute '$opt_search_by'";
        exit(2);
    }
    my @parse_db_args = ($handler, $opt_search, $opt_search_attr, @varargs);

    # Take input from stdin instead of a pacman database
    if ($opt_stdin) {
        parse_db(*STDIN, "/dev/stdin", 'local', 'FILENAME', @parse_db_args);
        exit(0);
    }

    # bsdtar(1) does not support extracting multiple files in a single invocation,
    # so fork a new process for each specified path.
    for my $db_path (@opt_db_path) {
        my $db_abs_path = abs_path($db_path);
        my $db_name = basename($db_path);

        if (not length($db_abs_path)) {
            say STDERR $argv0 . ": file path '$db_path' not found";
            exit(2);
        }

        # repo-add(8) only accepts *.db or *.db.tar* extensions
        if ($db_name =~ /\.(db|files)(\.tar(\.\w+)?)?$/g) {
            $db_name = substr $db_name, 0, $-[0];
        } else {
            say STDERR "$argv0: $db_name does not have a valid database archive extension";
            exit(1);
        }

        # When parsing the database, do not require a full extraction to either memory or disk
        # by reading `tar` output line-by-line. It is not strictly necessary to depend on
        # attribute order (i.e. %FILENAME% occuring in first place) while doing so; however,
        # the `--verbose` flag printing file names has different behavior for different `tar`
        # versions. Specifically, `bsdtar -xv` does not add a newline after the file path,
        # while `tar -xv` does.
        my @extract = ('bsdtar', '-Oxf', $db_abs_path);
        my $child_pid = open(my $fh, "-|", @extract) or die $!;

        if ($child_pid) { # parent process
            my $count = parse_db($fh, $db_abs_path, $db_name, 'FILENAME', @parse_db_args);

            waitpid($child_pid, 0);
        }
        exit(2) if $?;
    }
}

# vim: set et sw=4 sts=4 ft=perl:
