* do-file for Additional multivariate exercise 4, VHM 802, Winter 2021 version 16 /* works also with versions 14-15 */ set more off cd "r:\ * Manly Example 1.4 import delimited r:\prehist_dog.csv, clear * create grouping variable with shorter names gen group0=group replace group0="M.dog" if _n==1 replace group0="Jackal" if _n==2 replace group0="C.wolf" if _n==3 replace group0="I.wolf" if _n==4 replace group0="P.dog" if _n==7 * standardize variables foreach var of varlist mandbreadth-mol14length { egen `var's=std(`var') } * hierarchical cluster analysis cluster single mandbreadths-mol14lengths, name(singledogs) measure(L2) /* L2 is the default for single linkage */ cluster dendrogram singledogs, label(group0) title("Prehistoric dogs: Single linkage") ytitle("L2 dissimilarity") cluster average mandbreadths-mol14lengths, name(averagedogs) measure(L2) /* L2 is the default for average linkage */ cluster dendrogram averagedogs, label(group0) title("Prehistoric dogs: Average linkage") ytitle("L2 dissimilarity") cluster complete mandbreadths-mol14lengths, name(completedogs) measure(L2) /* L2 is the default for complete linkage */ cluster dendrogram completedogs, label(group0) title("Prehistoric dogs: Complete linkage") ytitle("L2 dissimilarity") * K-means clustering set seed 210315 forval i=1(1)10 { cluster kmeans mandbreadths-mol14lengths, k(2) start(krandom) name(dogs2m`i') tab group dogs2m`i' scalar wss=0 foreach var of varlist mandbreadths-mol14lengths { quietly anova `var' dogs2m`i' scalar wss=wss+e(rss) } di "Within-cluster sum-of-squares: " wss } forval i=1(1)10 { cluster kmeans mandbreadths-mol14lengths, k(3) start(krandom) name(dogs3m`i') tab group dogs3m`i' scalar wss=0 foreach var of varlist mandbreadths-mol14lengths { quietly anova `var' dogs3m`i' scalar wss=wss+e(rss) } di "Within-cluster sum-of-squares: " wss }