* do-file for Additional multivariate exercise 14, VHM 802, Winter 2021
version 16 /* works also with versions 14-15 */
set more off
cd "r:\

* only MANOVA for Steneryd data (MANOVA for the expanded prehistoric dogs data was in Exercise 13)
import delimited "r:\steneryd.csv", clear varnames(1)
* note that we need to reduce the dimension of the outcome, because there are only 17 samples
pca spec1-spec25, comp(7) mineigen(0.9)
* pick the 7 first components of a PCA
predict scor*, score
manova scor1-scor7 = c.light c.moisture c.reaction c.nitrogen
mvreg
matrix errorsscp=e(E)
matrix errorcorr=corr(errorsscp)
matrix list errorcorr
* reduce the model by eliminating scores where nothing is explained
manova scor1-scor4 = c.light c.moisture c.reaction c.nitrogen
mvreg
matrix errorsscp=e(E)
matrix errorcorr=corr(errorsscp)
matrix list errorcorr
* still difficult to interpret because the environmental variables are highly correlated

***********************************
* extra code to support solution (not necessarily needed for analysis)

import delimited r:\prehist_dog2.csv, clear
encode species, gen(Species)

foreach var of varlist x1-x9 {
  egen `var's=std(`var')
  }
  
forval i=1(1)5 {
  scalar wss=0
  foreach var of varlist x1s-x9s {
    quietly summarize `var' if Species==`i'
    scalar wss=wss+r(Var)*(r(N)-1)
    }
  di "Within-cluster sum-of-squares: " wss
  }
scalar wss=0
foreach var of varlist x1s-x9s {
  quietly anova `var' i.Species
  scalar wss=wss+e(rss)
  }
di "Total Within-cluster sum-of-squares: " wss
* equals sum of 5 above values, and agrees with SS Residuals in the permanova table
tabstat x1s-x9s, statistics( var ) by(Species)
* same pattern in the within-group variances

scalar wss=0
foreach var of varlist x1s-x9s {
  quietly summarize `var'
  scalar wss=wss+r(Var)*(r(N)-1)
  }
di "Total sum of squared distances: " wss
* agrees with SS Total in the permanova table
