#!/bin/sh #----------------------------------------------------------------------- # File : djc_local # Contents: probabilistic network induction with local structure # on artificial Danish Jersey Cattle data # Author : Christian Borgelt # History : 17.12.1999 file created # 17.07.2002 all induction loops moved into functions #----------------------------------------------------------------------- function generate () { # --- generate random databases for (( i = 0; i < 10; i++ )); do gendb -s$(( $1*(i+1) )) djc.net train$i.tab 2> /dev/null gendb -s$(( $2*(i+1) )) djc.net test$i.tab 2> /dev/null done } # generate() #----------------------------------------------------------------------- function collect () { # --- collect evaluation results gawk ' /evaluation of/ { network = $3; } /number of attributes/ { attcnt = $NF } /number of conditions/ { concnt = $NF } /number of parameters/ { parcnt = $NF } /number of tuples/ { tplcnt = $NF } /impossible tuples/ { imptpl = $4 } /minimum/ { minimum = $NF } /average/ { average = $NF } /maximum/ { maximum = $NF } /additional conditions/ { addcnt = $NF } /missing conditions/ { miscnt = $NF } END { printf("%-12s", network); printf(" %3d %3d %3d", concnt, addcnt, miscnt); printf(" %5d %10g", parcnt, average); }' } # collect() #----------------------------------------------------------------------- function average () { # --- average evaluation results gawk ' function output() { if (NR > 0) { printf("%-10s", network); printf(" %6.1f %6.1f %6.1f", concnt/n, addcnt/n, miscnt/n); printf(" %7.1f %10.1f %10.1f\n", parcnt/n, train/n, test/n); } } BEGIN { network = ""; } ($1 == network) { concnt += $2; addcnt += $3; miscnt += $4; parcnt += $5; train += $6; test += $7; n++; } ($1 != network) { if (n > 0) output(); network = $1; n = 1; concnt = $2; addcnt = $3; miscnt = $4; parcnt = $5; train = $6; test = $7; } END { if (n > 0) output(); }' local.tmp } # average() #----------------------------------------------------------------------- function evaluate () { # --- evaluate a given network neval -L1 -c djc.net $1 train$2.tab 2> /dev/null | \ collect >> local.tmp neval -L1 $1 test$2.tab 2> /dev/null | \ gawk '/average/ { printf(" %10.1f\n", $NF); }' >> local.tmp rm -f $1 } # evaluate() #----------------------------------------------------------------------- function induce () { # --- induce and evaluate networks rm -f local.tmp for (( i = 0; i < 10; i++ )); do ines -s$1 -e$2 djc.dom train$i.tab $3 $4 $5 2> /dev/null evaluate $3 $i done average | tee -a local.res } # induce() #----------------------------------------------------------------------- function fixed () { # --- evaluate empty/original network if [[ $1 == indep ]]; then in="djc.dom"; else in="djc.net"; fi rm -f local.tmp for (( i = 0; i < 10; i++ )); do ines $in train$i.tab $1 2> /dev/null evaluate $1 $i done average | tee -a local.res } # fixed() #----------------------------------------------------------------------- function topord () { # --- greedy condition selection echo "---topord----------------------------------------------------" \ | tee -a local.res for m in infgain infgr infsgr1 gini chi2 bdm bdeu rdlrel; do if [[ $m == bdeu ]]; then mm="bdm" x="-p-20"; else mm=$m x=""; fi induce topord $mm $m $x done } # topord() #----------------------------------------------------------------------- function local () { # --- local structure learning echo "---local-----------------------------------------------------" \ | tee -a local.res for m in infgain infgr infsgr1 gini chi2 bdm bdeu rdlrel; do if [[ $m == bdeu ]]; then mm="bdm" x="-p-20"; else mm=$m x=""; fi induce topord $mm $m $x -g-1e-12 done } # local() #----------------------------------------------------------------------- function cleanup () { # --- clean up temporary files rm -f local.tmp rm -f train[0-9].tab rm -f test[0-9].tab } # cleanup() #----------------------------------------------------------------------- echo "network cond add miss params train test" \ | tee local.res echo "-------------------------------------------------------------" \ | tee -a local.res generate 13 17 # generate random databases fixed indep # evaluate empty network fixed orig # evaluate original network #topord # condition selection on topological order local # local structure learning cleanup # clean up temporary files