#!/bin/sh
# PCP QA Test No. 778
# Install/Remove postgresql PMDA and check some basic metrics
#
# Copyright (c) 2015 Ken McDonell.  All Rights Reserved.
# Copyright (c) 2018 Red Hat.  All Rights Reserved.
#

seq=`basename $0`
echo "QA output created by $seq"

# get standard environment, filters and checks
. ./common.product
. ./common.filter
. ./common.check

[ -d $PCP_PMDAS_DIR/postgresql ] || _notrun "postgresql PMDA directory is not installed"
[ -f $PCP_PMDAS_DIR/postgresql/pmdapostgresql.python ] || _notrun "postgresql PMDA is not installed"

echo '\q' | $sudo -u postgres psql >/dev/null 2>&1
[ $? -eq 0 ] || _notrun "Cannot run psql as the postgres user, postgresql not installed or running?"
$sudo -u postgres psql -c "select VERSION();" | grep -s "PostgreSQL 8" > /dev/null
[ $? -eq 0 ] && _notrun "not testing postgres v8.x, too old"

CONF=$PCP_PMDAS_DIR/postgresql/pmdapostgresql.conf

status=1	# failure is the default!
trap "cd $here; $sudo mv $CONF.$seq $CONF; _cleanup_pmda postgresql; rm -rf $tmp.*; exit \$status" 0 1 2 3 15

pmdapostgresql_remove()
{
    echo | tee -a $seq_full
    echo "=== remove postgresql agent ===" | tee -a $seq_full
    $sudo ./Remove >$tmp.out 2>>$seq_full
    cat $tmp.out >>$seq_full
    _filter_pmda_remove <$tmp.out
}

pmdapostgresql_install()
{
    # start from known starting points
    cd $PCP_PMDAS_DIR/postgresql
    $sudo mv $CONF $CONF.$seq
    cat <<EOF >$tmp.conf
[authentication]
host=local
port=5432
dbname=postgres
user=postgres
password=password
osuser=postgres
EOF
    $sudo mv $tmp.conf $CONF
    $sudo ./Remove >/dev/null 2>&1

    echo | tee -a $seq_full
    echo "=== postgresql agent installation ===" | tee -a $seq_full
    $sudo ./Install </dev/null >$tmp.out 2>&1
    cat $tmp.out >>$seq_full
    _filter_pmda_install < $tmp.out | sed -e '1,/Updating the PMCD control file/d' \
    | $PCP_AWK_PROG '
/Check postgresql metrics have appeared/   { if ($7 < 20) $7 = "X"
                                          if ($9 >= 200) $9 = "Y"
                                          if ($12 >= 7000) $12 = "Z"
                                        }
                                        { print }'
}

_prepare_pmda postgresql
# note: _restore_auto_restart pmcd done in _cleanup_pmda()

_stop_auto_restart pmcd

_do_sql()
{
    $sudo -u postgres psql -c "$*" >$tmp.out 2>$tmp.err
    _sts=$?
    if [ -s $tmp.err ]
    then
	cat $tmp.err
	echo "Warning: stderr from psql"
    fi
    if [ $_sts -ne 0 ]
    then
	echo "Error: psql exit status: $_sts"
	exit
    fi
    cat $tmp.out >>$seq_full
    sed <$tmp.out \
	-e 's/ *| */|/g' \
	-e 's/^  *//' \
	-e 's/  *$//' \
	-e '/^([0-9][0-9]* row/d' \
    | $PCP_AWK_PROG -F\| '
NR == 1	{ for (i = 1; i <= NF; i++) name[i] = $i }
NR >= 3	{ for (i = 1; i <= NF; i++) print NR "|" name[i] "|" $i }'
}

# aim is to match values ... the sql values are lines like
# this in $tmp.db
# 3|client_port|-1
# 4|client_port|1
# and the pminfo values are in lines like this in $tmp.pcp
# postgresql.stat.activity.client_port 3 -1 1 4
#
# Usage: _match dbpattern [[pcppattern|-] [width [fuzz]]]
#
# if pcppattern is missing or "-", use dbpattern
# if width specified use only the first n characters of each value
# if width is missing or "-", use 0 (match full field width)
# the optional fuzz specifies a +/- % tolerance that is allowed
#
# "match" is weakly defined as some value that is not "" and not 0
# and that occurs in both sets of values (non determinism in the
# queries being run on the DB engine dictate this as close as we can
# get ... major PMDA botches fail even this weak test!)
#
_match()
{
    rm -f $tmp.match $tmp.nomatch
    pat="$1"
    grep -E "\\|$pat\\|" $tmp.db >$tmp.val.db
    if [ ! -s $tmp.val.db ]
    then
	echo "_match: failed to pick any values from DB using pattern \"|$pat|\""
	return
    fi
    [ "$2" != "-" -a -n "$2" ] && pat="$2"
    width=0	# use full field width
    [ "$3" != "-" -a -n "$3" ] && width="$3"
    fuzz=0
    [ -n "$4" ] && fuzz="$4"
    grep "\\.$pat " $tmp.pcp >$tmp.val.pcp
    if [ ! -s $tmp.val.pcp ]
    then
	echo "_match: failed to pick any values from PCP using pattern \"\\.$pat \""
	return
    fi
    nlines=`wc -l <$tmp.val.pcp | sed -e 's/ //g'`
    if [ "$nlines" != 1 ]
    then
	echo "_match: picked $nlines lines of values from PCP, not 1 as expected"
	return
    fi
    # pmprobe output is a bit tricky ... string values have enclosing "
    if grep '"' $tmp.val.pcp >/dev/null
    then
	sed <$tmp.val.pcp \
	    -e 's/[^"]*"//' \
	    -e 's/" "/|/g' \
	    -e 's/"$//'
    else
	$PCP_AWK_PROG <$tmp.val.pcp '
    { for (i = 3; i <= NF; i++) {
	if (i > 3) printf "|"
	printf "%s",$i
      }
      print ""
    }'
    fi >$tmp.tmp

    ( echo "$width|$fuzz" ; cat $tmp.tmp $tmp.val.db ) \
    | $PCP_AWK_PROG -F\| '
NR == 1	{ # options: field_width fuzz_pct
	  width = $1
	  fuzz = $2
	  next
	}
NR == 2	{ # PCP values
	  j = 0
	  for (i = 1; i <= NF; i++) {
	      if ($i == 0 || $i == "")
		continue
	      if (width == 0)
		  pcp[j] = $i
	      else
		  pcp[j] = substr($i, 1, width)
#debug# print "pcp[" j "]=\"" pcp[j] "\""
	      j++
	  }
	  npcp = j
	  next
	}
	{ if ($3 == 0 || $3 == "") next
	  if (width != 0)
	      $3 = substr($3, 1, width)
	  for (j = 0; j < npcp; j++) {
#debug# print "pcp[" j "]=\"" pcp[j] "\" : db=\"" $3 "\""
	      if ($3 == pcp[j]) {
		  print "$3" >"'$tmp.match'"
		  exit
	      }
	      else {
		  print "pcp[" j "]=\"" pcp[j] "\" : db=\"" $3 "\"" >"'$tmp.nomatch'"
	      }
	  }
	}'

    if [ -f $tmp.match ]
    then
	echo "$1: match"
    else
	echo "$1: no match" | tee -a $seq_full
	[ -f $tmp.nomatch ] && cat $tmp.nomatch >>$seq_full
    fi
   
}

# real QA test starts here
pmdapostgresql_install

echo
echo "=== check values with pmie ==="
cat <<End-of-File | pmie -t 2sec -T 5sec 2>$tmp.err >$tmp.out
// metrics chosen almost at random .. sort of 1 per cluster and
// metrics where at least one instance is expected to have a
// value > 0
//

ruleset
    sum_inst instant(postgresql.statio.sys_tables.idx_blks_read) > 0
    -> print "postgresql.statio.sys_tables.idx_blks_read: OK"
otherwise
    -> print "postgresql.statio.sys_tables.idx_blks_read: BAD" & shell "pminfo -f postgresql.statio.sys_tables.idx_blks_read >>$tmp.bad"
;

ruleset
    sum_inst instant(postgresql.statio.sys_indexes.idx_blks_hit) > 0
    -> print "postgresql.statio.sys_indexes.idx_blks_hit: OK"
otherwise
    -> print "postgresql.statio.sys_indexes.idx_blks_hit: BAD" & shell "pminfo -f postgresql.statio.sys_indexes.idx_blks_hit >>$tmp.bad"
;

ruleset
    sum_inst instant(postgresql.stat.sys_indexes.idx_scan) > 0
    -> print "postgresql.stat.sys_indexes.idx_scan: OK"
otherwise
    -> print "postgresql.stat.sys_indexes.idx_scan: BAD" & shell "pminfo -f postgresql.stat.sys_indexes.idx_scan >>$tmp.bad"
;

ruleset
    sum_inst instant(postgresql.stat.database.tup_returned) > 0
    -> print "postgresql.stat.database.tup_returned: OK"
otherwise
    -> print "postgresql.stat.database.tup_returned: BAD" & shell "pminfo -f postgresql.stat.database.tup_returned >>$tmp.bad"
;

ruleset
    sum_inst instant(postgresql.stat.sys_tables.idx_tup_fetch) > 0
    -> print "postgresql.stat.sys_tables.idx_tup_fetch: OK"
otherwise
    -> print "postgresql.stat.sys_tables.idx_tup_fetch: BAD" & shell "pminfo -f postgresql.stat.sys_tables.idx_tup_fetch >>$tmp.bad"
;

ruleset
    sum_inst instant(postgresql.stat.xact.all_tables.seq_scan) > 0
    -> print "postgresql.stat.xact.all_tables.seq_scan: OK"
otherwise
    -> print "postgresql.stat.xact.all_tables.seq_scan: BAD" & shell "pminfo -f postgresql.stat.xact.all_tables.seq_scan >>$tmp.bad"
;

ruleset
    sum_inst instant(postgresql.stat.xact.sys_tables.idx_scan) > 0
    -> print "postgresql.stat.xact.sys_tables.idx_scan: OK"
otherwise
    -> print "postgresql.stat.xact.sys_tables.idx_scan: BAD" & shell "pminfo -f postgresql.stat.xact.sys_tables.idx_scan >>$tmp.bad"
;

ruleset
    sum_inst instant(postgresql.stat.all_tables.idx_tup_fetch) > 0
    -> print "postgresql.stat.all_tables.idx_tup_fetch: OK"
otherwise
    -> print "postgresql.stat.all_tables.idx_tup_fetch: BAD" & shell "pminfo -f postgresql.stat.all_tables.idx_tup_fetch >>$tmp.bad"
;

ruleset
    sum_inst instant(postgresql.stat.all_indexes.idx_tup_read) > 0
    -> print "postgresql.stat.all_indexes.idx_tup_read: OK"
otherwise
    -> print "postgresql.stat.all_indexes.idx_tup_read: BAD" & shell "pminfo -f postgresql.stat.all_indexes.idx_tup_read >>$tmp.bad"
;

End-of-File

cat $tmp.out >>$seq_full
cat $tmp.err >>$seq_full
_filter_pmie_log <$tmp.out \
| LC_COLLATE=POSIX sort \
| uniq
[ -f $tmp.bad ] && cat $tmp.bad

echo "validate values ..."

echo | tee -a $seq_full
echo "=== pg_stat_activity ===" | tee -a $seq_full
_do_sql 'select * from pg_stat_activity' >$tmp.db
pmprobe -v postgresql.stat.activity >$tmp.pcp
echo "--- DB values ---" >>$seq_full
cat $tmp.db >>$seq_full
echo "--- PCP values ---" >>$seq_full
cat $tmp.pcp >>$seq_full
_match client_port
# current query could be a bit of a crap-shoot ... don't try to match
#
#_match "(current_query|query)" current_query

echo | tee -a $seq_full
echo "=== pg_stat_bgwriter ===" | tee -a $seq_full
_do_sql 'select * from pg_stat_bgwriter' >$tmp.db
pmprobe -v postgresql.stat.bgwriter >$tmp.pcp
echo "--- DB values ---" >>$seq_full
cat $tmp.db >>$seq_full
echo "--- PCP values ---" >>$seq_full
cat $tmp.pcp >>$seq_full
_match checkpoints_timed
_match buffers_alloc

echo | tee -a $seq_full
echo "=== pg_stat_database ===" | tee -a $seq_full
_do_sql 'select * from pg_stat_database' >$tmp.db
pmprobe -v postgresql.stat.database >$tmp.pcp
echo "--- DB values ---" >>$seq_full
cat $tmp.db >>$seq_full
echo "--- PCP values ---" >>$seq_full
cat $tmp.pcp >>$seq_full
_match stats_reset - 10
_match tup_fetched - - 25

echo | tee -a $seq_full
echo "=== pg_stat_all_tables ===" | tee -a $seq_full
_do_sql 'select * from pg_stat_all_tables' >$tmp.db
pmprobe -v postgresql.stat.all_tables >$tmp.pcp
echo "--- DB values ---" >>$seq_full
cat $tmp.db >>$seq_full
echo "--- PCP values ---" >>$seq_full
cat $tmp.pcp >>$seq_full
_match seq_tup_read 
_match seq_scan
_match idx_tup_fetch
_match idx_scan

echo | tee -a $seq_full
echo "=== pg_stat_sys_tables ===" | tee -a $seq_full
_do_sql 'select * from pg_stat_sys_tables' >$tmp.db
pmprobe -v postgresql.stat.sys_tables >$tmp.pcp
echo "--- DB values ---" >>$seq_full
cat $tmp.db >>$seq_full
echo "--- PCP values ---" >>$seq_full
cat $tmp.pcp >>$seq_full
_match seq_tup_read 
_match seq_scan
_match idx_tup_fetch
_match idx_scan

echo | tee -a $seq_full
echo "=== pg_stat_all_indexes ===" | tee -a $seq_full
_do_sql 'select * from pg_stat_all_indexes' >$tmp.db
pmprobe -v postgresql.stat.all_indexes >$tmp.pcp
echo "--- DB values ---" >>$seq_full
cat $tmp.db >>$seq_full
echo "--- PCP values ---" >>$seq_full
cat $tmp.pcp >>$seq_full
_match idx_scan
_match idx_tup_read
_match idx_tup_fetch
_match relname

echo | tee -a $seq_full
echo "=== pg_stat_sys_indexes ===" | tee -a $seq_full
_do_sql 'select * from pg_stat_sys_indexes' >$tmp.db
pmprobe -v postgresql.stat.sys_indexes >$tmp.pcp
echo "--- DB values ---" >>$seq_full
cat $tmp.db >>$seq_full
echo "--- PCP values ---" >>$seq_full
cat $tmp.pcp >>$seq_full
_match idx_scan
_match idx_tup_read
_match idx_tup_fetch
_match relname

echo "PMDA log file ..." >>$seq_full
$sudo cat $PCP_LOG_DIR/pmcd/postgresql.log >>$seq_full

pmdapostgresql_remove

status=0
exit
