REALESTATE STEPWISE and BEST SUBSETS > ls() [1] "realestate" > head(realestate) salesprice finishsqft nbedrooms nbaths ac ncargarage pool yrbuilt 1 360000 3032 4 4 1 2 0 1972 2 340000 2058 4 2 1 2 0 1976 3 250000 1780 4 3 1 2 0 1980 4 205500 1638 4 2 1 2 0 1963 5 275500 2196 4 3 1 2 0 1968 6 248000 1966 4 3 1 5 1 1972 constrquality style lotsizesqft adjhighway 1 2 1 22221 0 2 2 1 22912 0 3 2 1 21345 0 4 2 1 17342 0 5 2 7 21786 0 6 2 1 18902 0 > d<-realestate[,-10] > head(d) salesprice finishsqft nbedrooms nbaths ac ncargarage pool yrbuilt 1 360000 3032 4 4 1 2 0 1972 2 340000 2058 4 2 1 2 0 1976 3 250000 1780 4 3 1 2 0 1980 4 205500 1638 4 2 1 2 0 1963 5 275500 2196 4 3 1 2 0 1968 6 248000 1966 4 3 1 5 1 1972 constrquality lotsizesqft adjhighway 1 2 22221 0 2 2 22912 0 3 2 21345 0 4 2 17342 0 5 2 21786 0 6 2 18902 0 > par(mfrow=c(2,2)) > d$constrquality<-factor(d$constrquality) > m1<-lm(salesprice~.,d) > summary(m1) Call: lm(formula = salesprice ~ ., data = d) Residuals: Min 1Q Median 3Q Max -204865 -28010 -4973 21315 298892 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -2.358e+06 3.991e+05 -5.909 6.29e-09 *** finishsqft 8.700e+01 6.570e+00 13.242 < 2e-16 *** nbedrooms -5.125e+03 3.275e+03 -1.565 0.1182 nbaths 8.127e+03 4.288e+03 1.895 0.0586 . ac 4.851e+03 8.086e+03 0.600 0.5488 ncargarage 1.089e+04 5.060e+03 2.152 0.0319 * pool 1.014e+04 1.040e+04 0.975 0.3303 yrbuilt 1.269e+03 2.024e+02 6.272 7.60e-10 *** constrquality2 -1.430e+05 1.021e+04 -14.007 < 2e-16 *** constrquality3 -1.484e+05 1.404e+04 -10.564 < 2e-16 *** lotsizesqft 1.556e+00 2.363e-01 6.587 1.12e-10 *** adjhighway -2.737e+04 1.810e+04 -1.512 0.1311 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 58770 on 510 degrees of freedom Multiple R-squared: 0.8223, Adjusted R-squared: 0.8184 F-statistic: 214.5 on 11 and 510 DF, p-value: < 2.2e-16 > plot(m1) > par(mfrow=c(1,1)) > library(MASS) > boxcox(m1) > m2<-lm(log(salesprice)~.,d) > par(mfrow=c(2,2)) > plot(m2) > d1<-d[-104,] > m3<-lm(log(salesprice)~.,d1) > plot(m3) > plot(m2) > plot(m3) > plot(m2) > plot(m3) > summary(m2) Call: lm(formula = log(salesprice) ~ ., data = d) Residuals: Min 1Q Median 3Q Max -0.70377 -0.10862 -0.01433 0.10854 0.53043 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 4.132e+00 1.203e+00 3.436 0.000638 *** finishsqft 2.631e-04 1.980e-05 13.290 < 2e-16 *** nbedrooms 2.530e-03 9.869e-03 0.256 0.797749 nbaths 4.005e-02 1.292e-02 3.099 0.002046 ** ac 5.269e-02 2.437e-02 2.163 0.031036 * ncargarage 3.726e-02 1.525e-02 2.443 0.014885 * pool 5.276e-02 3.135e-02 1.683 0.093036 . yrbuilt 3.874e-03 6.099e-04 6.352 4.72e-10 *** constrquality2 -2.905e-01 3.076e-02 -9.442 < 2e-16 *** constrquality3 -3.824e-01 4.232e-02 -9.035 < 2e-16 *** lotsizesqft 5.374e-06 7.121e-07 7.547 2.07e-13 *** adjhighway -6.961e-02 5.454e-02 -1.276 0.202426 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 0.1771 on 510 degrees of freedom Multiple R-squared: 0.8352, Adjusted R-squared: 0.8317 F-statistic: 235 on 11 and 510 DF, p-value: < 2.2e-16 > m2<-update(m2,.~.-nbedrooms) > summary(m2) Call: lm(formula = log(salesprice) ~ finishsqft + nbaths + ac + ncargarage + pool + yrbuilt + constrquality + lotsizesqft + adjhighway, data = d) Residuals: Min 1Q Median 3Q Max -0.70338 -0.10844 -0.01273 0.10815 0.53290 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 4.141e+00 1.201e+00 3.448 0.000612 *** finishsqft 2.645e-04 1.905e-05 13.886 < 2e-16 *** nbaths 4.099e-02 1.239e-02 3.309 0.001002 ** ac 5.319e-02 2.427e-02 2.192 0.028856 * ncargarage 3.731e-02 1.523e-02 2.449 0.014663 * pool 5.295e-02 3.131e-02 1.691 0.091446 . yrbuilt 3.870e-03 6.092e-04 6.353 4.67e-10 *** constrquality2 -2.891e-01 3.026e-02 -9.554 < 2e-16 *** constrquality3 -3.807e-01 4.178e-02 -9.113 < 2e-16 *** lotsizesqft 5.381e-06 7.109e-07 7.570 1.76e-13 *** adjhighway -6.940e-02 5.449e-02 -1.274 0.203335 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 0.1769 on 511 degrees of freedom Multiple R-squared: 0.8352, Adjusted R-squared: 0.832 F-statistic: 259 on 10 and 511 DF, p-value: < 2.2e-16 > plot(m2) > m2<-update(m2,.~.-adjhighway) > summary(m2) Call: lm(formula = log(salesprice) ~ finishsqft + nbaths + ac + ncargarage + pool + yrbuilt + constrquality + lotsizesqft, data = d) Residuals: Min 1Q Median 3Q Max -0.70326 -0.10797 -0.01231 0.10661 0.53314 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 4.257e+00 1.198e+00 3.553 0.000416 *** finishsqft 2.656e-04 1.904e-05 13.952 < 2e-16 *** nbaths 4.150e-02 1.239e-02 3.350 0.000867 *** ac 5.431e-02 2.427e-02 2.238 0.025639 * ncargarage 3.708e-02 1.524e-02 2.433 0.015317 * pool 5.364e-02 3.133e-02 1.712 0.087479 . yrbuilt 3.809e-03 6.076e-04 6.268 7.76e-10 *** constrquality2 -2.890e-01 3.028e-02 -9.545 < 2e-16 *** constrquality3 -3.805e-01 4.180e-02 -9.103 < 2e-16 *** lotsizesqft 5.294e-06 7.081e-07 7.477 3.32e-13 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 0.177 on 512 degrees of freedom Multiple R-squared: 0.8347, Adjusted R-squared: 0.8318 F-statistic: 287.3 on 9 and 512 DF, p-value: < 2.2e-16 > plot(m2) ------------------------------------------------------ > library(car) > library(leaps) > bss1<-regsubsets(log(salesprice)~.,data=d1,nbest=3,nvmax=11) > subsets(bss1,statistic="adjr2") > subsets(bss1,statistic="bic") > subsets(bss1,statistic="bic",min.size=5,max.size=7) > subsets(bss1,statistic="bic",min.size=6,max.size=9) > subsets(bss1,statistic="bic",min.size=6,max.size=8) > bss1<-lm(log(salesprice)~finishsqft+nbedrooms+ac+yrbuilt+constrquality+lotsizesqft,d1) > summary(bss1) Call: lm(formula = log(salesprice) ~ finishsqft + nbedrooms + ac + yrbuilt + constrquality + lotsizesqft, data = d1) Residuals: Min 1Q Median 3Q Max -0.657006 -0.108442 -0.005973 0.105142 0.488180 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 3.138e+00 1.170e+00 2.683 0.00753 ** finishsqft 3.089e-04 1.777e-05 17.385 < 2e-16 *** nbedrooms 1.350e-02 9.500e-03 1.421 0.15584 ac 6.466e-02 2.424e-02 2.668 0.00788 ** yrbuilt 4.402e-03 5.900e-04 7.461 3.70e-13 *** constrquality2 -3.082e-01 3.046e-02 -10.117 < 2e-16 *** constrquality3 -4.184e-01 4.138e-02 -10.111 < 2e-16 *** lotsizesqft 5.872e-06 7.044e-07 8.336 7.09e-16 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 0.1779 on 513 degrees of freedom Multiple R-squared: 0.8317, Adjusted R-squared: 0.8294 F-statistic: 362.1 on 7 and 513 DF, p-value: < 2.2e-16 >