#!/bin/sh #----------------------------------------------------------------------- # File : djc_prob # Contents: probabilistic network induction # on artificial Danish Jersey Cattle data # Author : Christian Borgelt # History : 17.12.1999 file created # 16.01.2000 simulated annealing added from file djc_sian # 05.03.2002 shell changed from csh to sh # 10.04.2002 all induction loops moved into functions #----------------------------------------------------------------------- function generate () { # --- generate random databases for (( i = 0; i < 10; i++ )); do gendb -s$(( $1*(i+1) )) djc.net train$i.tab 2> /dev/null gendb -s$(( $2*(i+1) )) djc.net test$i.tab 2> /dev/null done } # generate() #----------------------------------------------------------------------- function collect () { # --- collect evaluation results gawk ' /evaluation of/ { network = $3; } /number of attributes/ { attcnt = $NF } /number of conditions/ { concnt = $NF } /number of parameters/ { parcnt = $NF } /number of tuples/ { tplcnt = $NF } /impossible tuples/ { imptpl = $4 } /minimum/ { minimum = $NF } /average/ { average = $NF } /maximum/ { maximum = $NF } /additional conditions/ { addcnt = $NF } /missing conditions/ { miscnt = $NF } END { printf("%-12s", network); printf(" %3d %3d %3d", concnt, addcnt, miscnt); printf(" %5d %10g", parcnt, average); }' } # collect() #----------------------------------------------------------------------- function average () { # --- average evaluation results gawk ' function output() { if (NR > 0) { printf("%-10s", network); printf(" %6.1f %6.1f %6.1f", concnt/n, addcnt/n, miscnt/n); printf(" %7.1f %10.1f %10.1f\n", parcnt/n, train/n, test/n); } } BEGIN { network = ""; } ($1 == network) { concnt += $2; addcnt += $3; miscnt += $4; parcnt += $5; train += $6; test += $7; n++; } ($1 != network) { if (n > 0) output(); network = $1; n = 1; concnt = $2; addcnt = $3; miscnt = $4; parcnt = $5; train = $6; test = $7; } END { if (n > 0) output(); }' prob.tmp } # average() #----------------------------------------------------------------------- function evaluate () { # --- evaluate a given network neval -L1 -c djc.net $1 train$2.tab 2> /dev/null | \ collect >> prob.tmp neval -L1 $1 test$2.tab 2> /dev/null | \ gawk '/average/ { printf(" %10.1f\n", $NF); }' >> prob.tmp rm -f $1 } # evaluate() #----------------------------------------------------------------------- function induce () { # --- induce and evaluate networks rm -f prob.tmp for (( i = 0; i < 10; i++ )); do ines -x -s$1 -e$2 djc.dom train$i.tab $3 $4 2> /dev/null evaluate $3 $i done average | tee -a prob.res } # induce() #----------------------------------------------------------------------- function fixed () { # --- evaluate empty/original network if [[ $1 == indep ]]; then in="djc.dom"; else in="djc.net"; fi rm -f prob.tmp for (( i = 0; i < 10; i++ )); do ines -x $in train$i.tab $1 2> /dev/null evaluate $1 $i done average | tee -a prob.res } # fixed() #----------------------------------------------------------------------- function owst () { # --- optimum weight spanning tree cons. echo "---owst------------------------------------------------------" \ | tee -a prob.res for m in infgain infsgr1 chi2; do induce owst $m $m done } # owst() #----------------------------------------------------------------------- function extst () { # --- optimum weight spanning tree ext. echo "---extst-----------------------------------------------------" \ | tee -a prob.res for m in infgain infsgr1 chi2; do induce extst $m $m done } # extst() #----------------------------------------------------------------------- function topord () { # --- selection on topological order echo "---topord----------------------------------------------------" \ | tee -a prob.res for m in infgain infgr infsgr1 gini chi2 bdm bdeu rdlrel; do if [[ $m == bdeu ]]; then mm="bdm" x="-p-20"; else mm=$m x=""; fi induce topord $mm $m $x done } # topord() #----------------------------------------------------------------------- function noloop () { # --- selection avoiding directed loops echo "---noloop----------------------------------------------------" \ | tee -a prob.res for m in infgain infgr infsgr1 gini chi2 bdm bdeu rdlrel; do if [[ $m == bdeu ]]; then mm="bdm" x="-p-20"; else mm=$m x=""; fi induce noloop $mm $m $x done } # noloop() #----------------------------------------------------------------------- function sian () { # --- hypertree simulated annealing echo "---sian------------------------------------------------------" \ | tee -a prob.res for p in 0 1; do if (( p == 0 )); then out="sian_no"; else out="sian_yes"; fi rm -f prob.tmp for (( i = 0; i < 10; i++ )); do ines -x -ssian -w$p -S1$i djc.dom train$i.tab $out 2> /dev/null evaluate $out $i done average | tee -a prob.res done } # sian() #----------------------------------------------------------------------- function cleanup () { # --- clean up temporary files rm -f prob.tmp rm -f train[0-9].tab rm -f test[0-9].tab } # cleanup() #----------------------------------------------------------------------- echo "network cond add miss params train test" \ | tee prob.res echo "-------------------------------------------------------------" \ | tee -a prob.res generate 13 17 # generate random databases fixed indep # evaluate empty network fixed orig # evaluate original network owst # optimum weight spanning tree construction #extst # optimum weight spanning tree extension topord # condition selection on topological order #noloop # condition selection avoiding directed loops sian # hypertree simulated annealing cleanup # clean up temporary files