#!/usr/bin/env perl

#   Copyright (c) MediaTek USA Inc., 2023-2024
#
#   This program is free software;  you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation; either version 2 of the License, or (at
#   your option) any later version.
#
#   This program is distributed in the hope that it will be useful, but
#   WITHOUT ANY WARRANTY;  without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
#   General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program;  if not, see
#   <http://www.gnu.org/licenses/>.
#
#
# perl2lcov [--output mydata.info] [--testname name] [options] cover_db+
#
#   This script traverses perl coverage information in one or more coverage
#   data directories (generated by the perl Devel::Cover module) and
#   translates it into LCOV .info format.
#
#   In addition to common options supported by other tools in the LCOV
#   suite (e.g., --comment, --version-script, --ignore-error, --substitute,
#   --exclude, etc.), the tool options are:
#
#      --output filename:
#          The lcov data will be written to the specified file - or to
#          the file called 'perlcov.info' in the current run directory
#          if this option is not used.
#
#      --testname name:
#          Coverage info will be associated with the testcase name provided.
#          It is not necessary to provide a name.
#
# See the Devel::Cover documentation for directions on how to generate
# perl coverage data.

use Devel::Cover::DB;
use Devel::Cover::Truth_Table;
use strict;
use warnings;
use Getopt::Long;

use lib "/usr/lib/lcov";
use lcovutil qw($tool_name);

sub print_usage
{
    local *HANDLE = $_[0];

    print(HANDLE <<END_OF_USAGE);
Usage: $tool_name [OPTIONS] TRACEFILE_PATTERN(S)

Translate Perl coverage directory generated by Devel::Cover to LCOV .info
file format.

In addition to common options supported by other tools in the LCOV
suite (e.g., --comment, --version-script, --ignore-error, --substitute,
--exclude, etc.), the tool options are:

  --output filename:
      The lcov data will be written to the specified file - or to
      the file called 'perlcov.info' in the current run directory
      if this option is not used.

  --testname name:
      Coverage info will be associated with the testcase name provided.
      It is not necessary to provide a name.

See the Devel::Cover documentation for directions on how to generate
perl coverage data.

For example:

    # write Perl line, branch, condition, and subroutine coverage data to
    #  'myPerlDB' in the current directory
  \$ perl -MDevel::Cover=-db,.\/myPerlDB,-coverage,statement,branch,condition,subroutine,-silent,1 myScript.pl
    # OR: write all the coverage types that Perl knows about to 'myPerlDB2' -
    #   note that perl2lcov will ignore types it does not understand/does
    #   not use (pod, time, and path)
  \$ perl -MDevel::Cover=-db,.\/myPerlDB2,-silent,1 myScript.pl
    # run 'cover' from the Devel::Cover installation - to extract runtime
    #   data into a usable form.  This will also generate an HTML report
    #   in 'myCoverDB'
  \$ cover myCoverDB -silent 1
    # run perl2lcov translator to produce LCOV format data:
  \$ perl2lcov -o perldata.info [--testname myTestName] myCoverDB
    # and generate a genhtml-format coverage report:
  \$ genhtml -o html_report perldata.info ...

Note that the data generateed by Devel::Cover is not always internally
consistent.  For example:

  - some which are never called, do not appear in the coverage data.

  - sometimes, a line will appear to be executed (non-zero hit count) but
    none of its contained branch expressions have been evaluated.
    (If the line was executed, then at least one branch condition must have
    been evaluated.

This can cause the various tools in the lcov package to generate errors of
type 'inconsistent'.
In that case, you can:

  - skip consistency checks entirely:  see the 'skip_consistency_checks' section
    in man lovrc(5)

  - ignore the error:  see the '--ignore-error' section in man genhtml(1)

  - exclude the offending code: see the '--exclude', '--filter', and
    '--omit-lines' sections in man genhtml(1).

END_OF_USAGE
}

sub findPackage
{
    my ($extents, $line) = @_;
    return undef unless @$extents;

    my $min = 0;
    my $max = $#$extents;
    my $best;
    while ($min <= $max) {
        my $mid = int(($min + $max) / 2);
        my $v   = $extents->[$mid];
        if ($line < $v->[0]) {
            $max = $mid - 1;
        } elsif ($line > $v->[0]) {
            $best = $v;
            $min  = $mid + 1;
        } else {
            # line number matched...which ought not to happen because
            # Deval::Cover reports subroutine start as first executable
            # line in the function.
            # That won't be the line containing "package ..." - unless the
            # user wrote the whole thing on one line.  Not clever.  Deserves
            # to lose, if something in here breaks.
            return $v;
        }
    }
    return $best;
}

$lcovutil::br_coverage                        = 1;
$lcovutil::func_coverage                      = 1;
$lcovutil::derive_function_end_line           = 1;
$lcovutil::derive_function_end_line_all_files = 1;
lcovutil::save_cmd_line(\@ARGV, "/usr/bin");
lcovutil::set_extensions('perl', '.*');

my $testname    = '';
my $output_file = 'perlcov.info';
our %options = ('testname=s' => \$testname,
                'output|o=s' => \$output_file,);
if (!lcovutil::parseOptions({}, \%options)) {
    print(STDERR "Use $lcovutil::tool_name --help to get usage information.\n");
    exit(1);
}

my $info = TraceFile->new();

foreach my $db (@ARGV) {
    # parse the other files first - to grab the data we want -
    #   Not quite sure how to map 'cond' to LCOV branch coverage.

    # save a readable message before remapping the $db
    my $msg =
        "$db appears to be empty; perhaps you need to run 'cover $db' before executing $0.";
    my $db    = Devel::Cover::DB->new(db => $db);
    my $cover = $db->cover;
    my @items = $cover->items;
    if (!@items) {
        lcovutil::ignorable_error($lcovutil::ERROR_EMPTY, $msg);
        next;
    }
    foreach my $file ($cover->items) {
        my $filename = lcovutil::subst_file_name($file);
        lcovutil::info("process $filename" .
                ($filename ne $file ? " (substituted from $file)" : '') . "\n");
        if (TraceFile::skipCurrentFile($filename)) {
            lcovutil::info("   (excluded)\n");
            next;
        }
        my $f = $cover->file($file);
        my $fileData =
            $info->data($file);    # really, want to use stored file name
        my $functionMap = $fileData->testfnc($testname);
        my $lineMap     = $fileData->test($testname);
        my $branchMap   = $fileData->testbr($testname);

        # use statement coverage to mark un-evaluated branches
        my ($stmts, $branches, $conditions, $subroutines);
        my @packageExtents;
        # Devel::Cover doesn't instrument all the functions in every file -
        # so need a workaround to find better extents for some of them
        my @functionExtents;

        foreach my $criteria ($f->items) {
            # some types we don't use
            next if (grep(/^$criteria$/, ('pod', 'time', 'path')));
            my $c = $f->criterion($criteria);
            if ($criteria eq 'branch') {
                $branches = $c;
            } elsif ($criteria eq 'condition') {
                $conditions = $c;
            } elsif ($criteria eq 'subroutine') {
                $subroutines = $c;
                if (-f $file) {
                    open(GREP, '-|', 'grep', '--line-number', '-E',
                         '^\s*(package|sub) ', $file) or
                        die("unable to grep $file: $!");
                    while (<GREP>) {
                        if (/^(\d+):\s*package\s+(\S+)\s*;/) {
                            push(@packageExtents, [$1, $2 . '::']);
                        } elsif (/^(\d+):\s*sub\s+([^\s(]+)/) {
                            push(@functionExtents, [$1, $2]);
                        } else {
                            die("unexpected grep output '$_'");
                        }
                    }
                    close(GREP);
                }
            } elsif ($criteria eq 'statement') {
                $stmts = $c;
            } else {
                lcovutil::ignorable_error($lcovutil::ERROR_UNKNOWN_CATEGORY,
                                          "unexpected data type '$criteria'");
            }
        }
        if (!defined($stmts)) {
            # this seems to happen sometimes if we re-run 'cover' multiple
            # times on the same DB - e.g., during testing.
            lcovutil::ignorable_error($lcovutil::ERROR_UNSUPPORTED,
                              "unable to process $file without statement data");
            next;
        }
        if ($lcovutil::verify_checksum &&
            !-f $file) {
            lcovutil::ignorable_error($lcovutil::ERROR_SOURCE,
                              "cannot read '$f': unable to compute --checksum");
        }
        my $version = lcovutil::extractFileVersion($file) if -f $file;
        $fileData->version($version) if defined($version) && $version ne '';

        # run through data to verify that there are no branch, function, or
        #  conditional coverpoints where there is no line data
        foreach my $c (['branch', $branches],
                       ['condition', $conditions],
                       ['subroutine', $subroutines]
        ) {
            next unless defined($c->[1]);
            foreach my $line ($c->[1]->items) {
                lcovutil::ignorable_error($lcovutil::ERROR_INCONSISTENT_DATA,
                                   'found ' . $c->[0] .
                                       " coverpoint on $line but no lineCov there"
                ) unless defined($stmts->location($line));
            }
        }

        foreach my $line ($stmts->items) {
            my $l         = $stmts->location($line);
            my $lineCount = $l->[0]->[0];
            $lineMap->append($line, $lineCount);

            if ($subroutines) {
                my $s = $subroutines->location($line);
                if (defined($s)) {
                    my ($count, $name) = @{$s->[0]};
                    if ($name !~ /(BEGIN|__ANON__)/) {
                        my $p = findPackage(\@packageExtents, $line);
                        if (defined($p)) {
                            $name = $p->[1] . $name;
                        }
                        $functionMap->define_function($name, $line);
                        $functionMap->add_count($name, $count);
                    }
                }
            }
            if (defined($conditions)) {
                my $cond = $conditions->location($line);
                if (defined($cond)) {
                    my @br      = $conditions->truth_table($line);
                    my $blockID = 0;
                    my @subst;
                    # the intent of this transform is for the branchExpr
                    #   to show which parts of the condition have evaluated
                    #   to true or false.
                    # However, this doesn't quite work because the truth
                    #   table computed by Devel::Cover is sometimes ordered
                    #   with the dependent clause after the independent
                    #   one - and sometimes the opposite.
                    # For the moment:  punt when we don't grok
                    foreach my $block (@br) {
                        my $counts     = $block->[0];
                        my $expr       = $block->[1];
                        my $simplified = $expr;
                        for (my $i = 0; $i <= $#subst; ++$i) {
                            my ($from, $to) = @{$subst[$i]};
                            $simplified =~ s/\Q$from\E/$to/;
                        }
                        my @expr;
                        while ($simplified =~
                               /(.+?)\s+(and|or|xor|&&|\|\|)\s+(.+)/) {
                            $simplified = $3;
                            $1 =~ s/^\s+|\s+$//g;
                            push(@expr, $1);
                        }
                        push(@expr, $simplified);
                        #@expr = split(/\s+(and|or|xor|&&|\|\|)\s+/, $simplified);
                        my $branchID = 0;
                        foreach my $entry (@$counts) {
                            my $taken =
                                $lineCount == 0 ? '-' : $entry->{covered};
                            my $inputs     = $entry->{inputs};
                            my $branchExpr = '';
                            if (scalar(@$inputs) == scalar(@expr)) {
                                # this is the case we expect..
                                my $sep = '';
                                for (my $i = 0; $i <= $#$inputs; ++$i) {
                                    my $v = $inputs->[$i];
                                    next if ($v eq 'X');
                                    $branchExpr .= $sep;
                                    $branchExpr .= " ! " if $v eq '0';
                                    $branchExpr .= $expr[$i];
                                    $sep = ', ';
                                }
                                for (my $i = 0; $i <= $#subst; ++$i) {
                                    my ($to, $from) = @{$subst[$i]};
                                    $branchExpr =~ s/$from/($to)/;
                                }
                                $branchExpr =~ s/^\s+|\s+$//g;
                            } else {
                                # punt.  Just report the original Devel::Cover
                                # expressions.  Hope the user can sort it out
                                $branchExpr = $expr;
                            }
                            my $br =
                                BranchBlock->new($branchID++, $taken,
                                                 $branchExpr, 0);
                            $branchMap->append($line, $blockID, $br, $file);
                        }
                        push(@subst, [$expr, '__' . scalar(@subst) . '__']);
                        ++$blockID;
                    }
                    # condition data is more compreshensive than branch
                    # if both exist on the line.
                    next;
                }
            }
            if (defined($branches)) {
                my $br = $branches->location($line);
                if (defined($br)) {
                    my ($true, $false) = @{$br->[0]->[0]};
                    my $expr = $br->[0]->[1]->{'text'};
                    my $id   = 0;
                    for my $c ([$true, $expr], [$false, '! ' . $expr]) {
                        # this is not an exception...
                        my $b =
                            BranchBlock->new($id++,
                                             $lineCount == 0 ? '-' : $c->[0],
                                             $c->[1], 0);
                        # blockID is always zero
                        $branchMap->append($line, 0, $b, $file);
                    }
                }
            }
        }
        $fileData->sum()->union($lineMap);
        $fileData->sumbr()->union($branchMap);
        $fileData->func()->union($functionMap);

        # have to do this manually due to some Perl quirks -
        # in particular, there may be code outside of the subroutine we are
        # walking...and we want to correct the end line
        TraceFile::_deriveFunctionEndLines($fileData);
        my $lineData = $fileData->sum();
        my $funcData = $fileData->testfnc();

        foreach my $func ($fileData->func()->valuelist()) {
            # where is the nearest 'package' after my start line?
            my $first = $func->line();
            my $end   = $func->end_line();
            next unless defined($end);
            # find package or function enclosing my end line..
            my $last = $end;
            foreach my $ext (\@packageExtents, \@functionExtents) {
                while (1) {
                    my $p = findPackage($ext, $last);
                    if (defined($p) && $p->[0] > $first) {
                        $last = $p->[0] - 1;
                        lcovutil::info(1,
                                       $func->name() .
                                           ": found update end line $last in " .
                                           $p->[1] . "\n");
                        # iterate in case there is another package above the first one
                    } else {
                        last;
                    }
                }
            }
            next unless $last < $end;

            # what is the last executable line before the 'package' or 'sub' decl?
            while ($last > $first) {
                if (defined($lineData->value($last))) {
                    last;
                }
                --$last;
            }
            lcovutil::info(1,
                           "resetting " . $func->name() .
                               " end line to $last (from $end)\n");
            $func->set_end_line($last);

            foreach my $tn ($funcData->keylist()) {
                my $d = $funcData->value($tn);
                my $f = $d->findKey($first);
                $f->set_end_line($last);
            }

        }    #foreach function

    }    # foreach file
}    #foreach cover db

$info->applyFilters();
$info->add_comments(@lcovutil::comments);
$info->write_info_file($output_file, $lcovutil::verify_checksum);

$info->checkCoverageCriteria();
CoverageCriteria::summarize();
my $exit_code = 0 != $CoverageCriteria::coverageCriteriaStatus;

lcovutil::warn_file_patterns();
lcovutil::summarize_cov_filters();
lcovutil::summarize_messages(1);    # silent if no messages

exit $exit_code;
