* do-file for Additional multivariate exercise 7, VHM 802
version 18 /* works also with versions 14-17 */
set more off
set scheme stcolor_alt
cd "r:\"

import delimited r:\goblet.csv, clear
* standardize variables
foreach var of varlist x1-x6 {
  egen `var's=std(`var')
  }
* hierarchical cluster analysis, average linkage
cluster average x1s-x6s, name(averagegoblet) measure(L2)
cluster dendrogram averagegoblet, label(goblet) title("Goblets: Average linkage") ytitle("L2 dissimilarity") xlabel(, angle(90))

* k-means clustering analysis done in R

* classical multidimensional scaling
mds x1-x6, std id(goblet) noplot
mdsconfig, msize(vsmall) mlabsize(tiny) aspectratio(0.5)
* same with modern output
mds x1-x6, std id(goblet) method(modern) loss(strain) noplot
* stress-1 version of modern MDS
mds x1-x6, std id(goblet) method(modern) noplot 
mdsconfig, msize(vsmall) mlabsize(tiny) yneg aspectratio(0.5)

foreach var of varlist x1-x6 {
  gen `var't=`var'/(x1+x2+x3+x4+x5+x6)
  gen `var'h=`var'/x3
  }
* need to standardize again
foreach var of varlist x1t-x6h {
  egen `var's=std(`var')
  }
* now redo analyses for both normalizations
cluster average x1ts x2ts x3ts x4ts x5ts x6ts, name(averagegoblettotal) measure(L2)
cluster dendrogram averagegoblettotal, label(goblet) title("Goblets: Average linkage, Normalized by Total") ytitle("L2 dissimilarity") xlabel(, angle(90))

cluster average x1hs x2hs x4hs x5hs x6hs, name(averagegobletheight) measure(L2)
cluster dendrogram averagegobletheight, label(goblet) title("Goblets: Average linkage, Normalized by Height") ytitle("L2 dissimilarity") xlabel(, angle(90))

mds x1t x2t x3t x4t x5t x6t, std id(goblet) noplot
mdsconfig, xneg msize(vsmall) mlabsize(tiny) aspectratio(0.5) title ("MDS configuration, Normalized by Total")
mds x1t x2t x3t x4t x5t x6t, std id(goblet) method(modern) loss(strain) noplot
mds x1t x2t x3t x4t x5t x6t, std id(goblet) method(modern) noplot 
mdsconfig, xneg msize(vsmall) mlabsize(tiny) aspectratio(0.5)

mds x1h x2h x4h x5h x6h, std id(goblet) noplot
mdsconfig, msize(vsmall) mlabsize(tiny) aspectratio(0.5) title ("MDS configuration, Normalized by Height")
mds x1h x2h x4h x5h x6h, std id(goblet) method(modern) loss(strain) noplot
mds x1h x2h x4h x5h x6h, std id(goblet) method(modern) noplot 
mdsconfig, msize(vsmall) mlabsize(tiny) yneg aspectratio(0.5)
