. do "C:\Users\DEFAUL~1.SID\AppData\Local\Temp\STD00000000.tmp" . * do-file (slightly revised) for lecture 1a of VHM 802/812, Winter 2014 . version 13 . set more off . cd "f:\vhm\vhm802\data_stata" f:\vhm\vhm802\data_stata . . use daisy2red, clear . twoway (scatter milk120 parity) (lfit milk120 parity) . twoway (scatter milk120 parity, msymbol(circle_hollow)) (lfit milk120 parity, range(0 8)), ytitle("milk120") > xtitle("parity") . histogram milk120 (bin=31, start=1110.2, width=145.80967) . . * regression model with prediction . regress milk120 parity Source | SS df MS Number of obs = 1536 -------------+------------------------------ F( 1, 1534) = 262.27 Model | 109234227 1 109234227 Prob > F = 0.0000 Residual | 638905966 1534 416496.718 R-squared = 0.1460 -------------+------------------------------ Adj R-squared = 0.1455 Total | 748140192 1535 487387.748 Root MSE = 645.37 ------------------------------------------------------------------------------ milk120 | Coef. Std. Err. t P>|t| [95% Conf. Interval] -------------+---------------------------------------------------------------- parity | 178.347 11.01266 16.19 0.000 156.7455 199.9484 _cons | 2727.08 34.33991 79.41 0.000 2659.722 2794.438 ------------------------------------------------------------------------------ . display invttail(1534,0.025) /* t* from Stata */ 1.9615116 . predict fit (option xb assumed; fitted values) . predict se_mean, stdp /* SE(yhat), for confidence interval */ . predict se_ind, stdf /* prediction error, for prediction interval */ . generate pred_low = fit - invttail(1534,0.025)*se_ind . generate pred_upp = fit + invttail(1534,0.025)*se_ind . list cow parity milk120 fit se_mean se_ind pred_low pred_upp in 1/10 +-------------------------------------------------------------------------------+ | cow parity milk120 fit se_mean se_ind pred_low pred_upp | |-------------------------------------------------------------------------------| 1. | 3 5 4173.0 3618.815 29.87665 646.0568 2351.567 4886.063 | 2. | 4 5 3727.3 3618.815 29.87665 646.0568 2351.567 4886.063 | 3. | 5 5 3090.8 3618.815 29.87665 646.0568 2351.567 4886.063 | 4. | 8 5 4228.4 3618.815 29.87665 646.0568 2351.567 4886.063 | 5. | 9 6 3431.1 3797.162 39.53432 646.5754 2528.896 5065.427 | |-------------------------------------------------------------------------------| 6. | 11 5 3984.7 3618.815 29.87665 646.0568 2351.567 4886.063 | 7. | 12 5 4064.7 3618.815 29.87665 646.0568 2351.567 4886.063 | 8. | 15 4 4241.5 3440.468 21.55974 645.7256 2173.869 4707.066 | 9. | 17 2 3416.6 3083.774 18.35515 645.6265 1817.37 4350.178 | 10. | 17 3 3529.5 3262.121 16.7209 645.5822 1995.804 4528.438 | +-------------------------------------------------------------------------------+ . * code modified from VER2 Fig 14.1, works for simple linear regression only . twoway (scatter milk120 parity, msymbol(circle_hollow)) (lfit milk120 parity, range(0,8)) /// > (lfitci milk120 parity, ciplot(rline) blcolor(black) blpattern(dash) range(0,8)) /// > (lfitci milk120 parity, stdf ciplot(rline) blcolor(black) blwidth(medium) blpattern(shortdash_dot_dot) range > (0,8)), /// > legend(off) ytitle("milk120") xtitle("parity") . . * residuals: individual observations . predict rawres, residuals (38 missing values generated) . predict stdres, rstandard (38 missing values generated) . predict delres, rstudent /* note: rstudent option for deletion residuals */ (38 missing values generated) . summarize rawres stdres delres Variable | Obs Mean Std. Dev. Min Max -------------+-------------------------------------------------------- rawres | 1536 -2.65e-06 645.1553 -1973.574 2137.779 stdres | 1536 -.0000951 1.000306 -3.059309 3.313621 delres | 1536 -.0000529 1.000989 -3.067684 3.324461 . list cow parity milk120 fit rawres stdres delres in 1/10 +-----------------------------------------------------------------------+ | cow parity milk120 fit rawres stdres delres | |-----------------------------------------------------------------------| 1. | 3 5 4173.0 3618.815 554.1854 .8596371 .8595639 | 2. | 4 5 3727.3 3618.815 108.4854 .1682796 .1682263 | 3. | 5 5 3090.8 3618.815 -528.0146 -.8190417 -.8189538 | 4. | 8 5 4228.4 3618.815 609.5853 .9455719 .9455392 | 5. | 9 6 3431.1 3797.162 -366.0615 -.568283 -.5681576 | |-----------------------------------------------------------------------| 6. | 11 5 3984.7 3618.815 365.8853 .5675513 .5674258 | 7. | 12 5 4064.7 3618.815 445.8853 .691645 .6915274 | 8. | 15 4 4241.5 3440.468 801.0323 1.2419 1.24212 | 9. | 17 2 3416.6 3083.774 332.8264 .5159264 .5158029 | 10. | 17 3 3529.5 3262.121 267.3793 .4144459 .414334 | +-----------------------------------------------------------------------+ . list cow parity milk120 fit rawres stdres delres if stdres~=. & abs(stdres)>3 +------------------------------------------------------------------------+ | cow parity milk120 fit rawres stdres delres | |------------------------------------------------------------------------| 669. | 765 2 1110.2 3083.774 -1973.574 -3.059309 -3.067684 | 1319. | 2440 5 5630.3 3618.815 2011.485 3.12016 3.129088 | 1341. | 2460 3 5399.9 3262.121 2137.779 3.313621 3.324461 | 1370. | 2489 3 5278.0 3262.121 2015.879 3.124673 3.133643 | +------------------------------------------------------------------------+ . display 2*1536*ttail(1533,3.324461) /* Bonferroni-corrected P-value */ 1.3928477 . display invttail(1533,.025/1536) /* cut-off for signif at 5% level */ 4.1672429 . . * assessing normality using residuals . qnorm stdres . histogram stdres, normal (bin=31, start=-3.0593085, width=.20557838) . summarize stdres, detail Standardized residuals ------------------------------------------------------------- Percentiles Smallest 1% -2.23909 -3.059309 5% -1.601332 -2.949112 10% -1.267381 -2.927706 Obs 1536 25% -.670562 -2.849384 Sum of Wgt. 1536 50% -.0180296 Mean -.0000951 Largest Std. Dev. 1.000306 75% .6480753 2.97556 90% 1.281694 3.12016 Variance 1.000612 95% 1.698061 3.124673 Skewness .129289 99% 2.410189 3.313621 Kurtosis 3.089667 . swilk stdres Shapiro-Wilk W test for normal data Variable | Obs W V z Prob>z -------------+-------------------------------------------------- stdres | 1536 0.99804 1.826 1.517 0.06468 . . * assessing linearity using residuals . scatter rawres fit . rvfplot /* same plot without generating variables; raw residuals only */ . scatter stdres fit /* most interesting residual plot */ . scatter stdres parity /* in simple regression, essentially the same as above */ . lowess stdres parity, xtitle("parity") . lowess milk120 parity, ytitle ("milk120") xtitle("parity") . . * assessing homoscedasticity . scatter stdres fit /* same as above */ . tabstat stdres, statistics(mean sd count) by(parity) Summary for variables: stdres by categories of: parity (Lactation number) parity | mean sd N ---------+------------------------------ 1 | -.4121462 .7542639 403 2 | .4093676 .9711145 369 3 | .2594253 1.015435 308 4 | .0739393 1.017689 219 5 | -.2971432 1.074628 164 6 | -.4336789 .8422888 69 7 | -.6370535 .1613464 4 ---------+------------------------------ Total | -.0000951 1.000306 1536 ---------------------------------------- . estat hettest Breusch-Pagan / Cook-Weisberg test for heteroskedasticity Ho: Constant variance Variables: fitted values of milk120 chi2(1) = 7.29 Prob > chi2 = 0.0069 . estat imtest Cameron & Trivedi's decomposition of IM-test --------------------------------------------------- Source | chi2 df p ---------------------+----------------------------- Heteroskedasticity | 16.48 2 0.0003 Skewness | 7.22 1 0.0072 Kurtosis | 0.75 1 0.3874 ---------------------+----------------------------- Total | 24.46 4 0.0001 --------------------------------------------------- . . * transformation . boxcox milk120 parity, model(lhs) /* lhs=left-hand-side is the default */ Fitting comparison model Iteration 0: log likelihood = -12237.344 Iteration 1: log likelihood = -12235.143 Iteration 2: log likelihood = -12235.143 Fitting full model Iteration 0: log likelihood = -12116.128 Iteration 1: log likelihood = -12112.932 Iteration 2: log likelihood = -12112.931 Number of obs = 1536 LR chi2(1) = 244.42 Log likelihood = -12112.931 Prob > chi2 = 0.000 ------------------------------------------------------------------------------ milk120 | Coef. Std. Err. z P>|z| [95% Conf. Interval] -------------+---------------------------------------------------------------- /theta | .7656256 .0921533 8.31 0.000 .5850085 .9462428 ------------------------------------------------------------------------------ Estimates of scale-variant parameters ---------------------------- | Coef. -------------+-------------- Notrans | parity | 27.0975 _cons | 554.488 -------------+-------------- /sigma | 97.53378 ---------------------------- --------------------------------------------------------- Test Restricted LR statistic P-value H0: log likelihood chi2 Prob > chi2 --------------------------------------------------------- theta = -1 -12314.672 403.48 0.000 theta = 0 -12148.834 71.81 0.000 theta = 1 -12116.128 6.39 0.011 --------------------------------------------------------- . * analysis of power-transformed outcome . generate tmilk=milk120^.75 (38 missing values generated) . regress tmilk parity Source | SS df MS Number of obs = 1536 -------------+------------------------------ F( 1, 1534) = 264.86 Model | 1103464.63 1 1103464.63 Prob > F = 0.0000 Residual | 6390947.69 1534 4166.19798 R-squared = 0.1472 -------------+------------------------------ Adj R-squared = 0.1467 Total | 7494412.32 1535 4882.3533 Root MSE = 64.546 ------------------------------------------------------------------------------ tmilk | Coef. Std. Err. t P>|t| [95% Conf. Interval] -------------+---------------------------------------------------------------- parity | 17.92527 1.101429 16.27 0.000 15.7648 20.08573 _cons | 375.9898 3.434499 109.47 0.000 369.253 382.7267 ------------------------------------------------------------------------------ . predict fit1 (option xb assumed; fitted values) . predict stdres1, rstandard (38 missing values generated) . scatter stdres1 fit1 . qnorm stdres1 . summarize stdres1, detail Standardized residuals ------------------------------------------------------------- Percentiles Smallest 1% -2.313608 -3.402181 5% -1.658406 -3.083472 10% -1.283043 -3.058562 Obs 1536 25% -.6623973 -3.01349 Sum of Wgt. 1536 50% .0068648 Mean -.0000956 Largest Std. Dev. 1.000307 75% .6662298 2.804832 90% 1.270427 2.859347 Variance 1.000614 95% 1.657495 2.936321 Skewness -.0226933 99% 2.27907 3.102081 Kurtosis 3.070858 . swilk stdres1 Shapiro-Wilk W test for normal data Variable | Obs W V z Prob>z -------------+-------------------------------------------------- stdres1 | 1536 0.99904 0.891 -0.291 0.61429 . estat hettest Breusch-Pagan / Cook-Weisberg test for heteroskedasticity Ho: Constant variance Variables: fitted values of tmilk chi2(1) = 3.31 Prob > chi2 = 0.0689 . * analysis of square-root transformed outcome . generate rmilk=milk120^.5 (38 missing values generated) . regress rmilk parity Source | SS df MS Number of obs = 1536 -------------+------------------------------ F( 1, 1534) = 266.23 Model | 8851.08257 1 8851.08257 Prob > F = 0.0000 Residual | 50998.9065 1534 33.2457018 R-squared = 0.1479 -------------+------------------------------ Adj R-squared = 0.1473 Total | 59849.9891 1535 38.9902209 Root MSE = 5.7659 ------------------------------------------------------------------------------ rmilk | Coef. Std. Err. t P>|t| [95% Conf. Interval] -------------+---------------------------------------------------------------- parity | 1.605405 .0983907 16.32 0.000 1.41241 1.798399 _cons | 51.96426 .3068041 169.37 0.000 51.36246 52.56606 ------------------------------------------------------------------------------ . predict fit2 (option xb assumed; fitted values) . predict stdres2, rstandard (38 missing values generated) . scatter stdres2 fit2 . qnorm stdres2 . summarize stdres2, detail Standardized residuals ------------------------------------------------------------- Percentiles Smallest 1% -2.486428 -3.791986 5% -1.729319 -3.220177 10% -1.306212 -3.191329 Obs 1536 25% -.6381913 -3.184865 Sum of Wgt. 1536 50% .0332434 Mean -.0000959 Largest Std. Dev. 1.000308 75% .6782325 2.611937 90% 1.257776 2.637807 Variance 1.000616 95% 1.624026 2.753201 Skewness -.1790609 99% 2.184529 2.897921 Kurtosis 3.137836 . swilk stdres2 Shapiro-Wilk W test for normal data Variable | Obs W V z Prob>z -------------+-------------------------------------------------- stdres2 | 1536 0.99739 2.436 2.243 0.01246 . estat hettest Breusch-Pagan / Cook-Weisberg test for heteroskedasticity Ho: Constant variance Variables: fitted values of rmilk chi2(1) = 0.83 Prob > chi2 = 0.3616 . * extra commands for backtransformation from 0.75 power scale . generate backfit1=fit1^(1/0.75) . scatter backfit1 parity . tabstat backfit1, statistics(mean sd) by(parity) Summary for variables: backfit1 by categories of: parity (Lactation number) parity | mean sd ---------+-------------------- 1 | 2887.599 0 2 | 3064.116 0 3 | 3243.214 0 4 | 3424.82 0 5 | 3608.866 0 6 | 3795.29 0 7 | 3984.032 0 ---------+-------------------- Total | 3197.403 269.8812 ------------------------------ . end of do-file . exit, clear