/* mreg-country-30aug10.sas Directory: http://www.users.muohio.edu/baileraj/classes/sta402/examples Author: John Bailer Purpose: multiple regression example where average life expectancy of women is modeled as a function of country characteristics Input data file ------------------------------------- country.data Directories: http://www.users.muohio.edu/baileraj/classes/sta402/ (or M:\public.www\classes\sta402\examples) \\Casnov5\MST\MSTLab\Baileraj C:\Users\baileraj\BAILERAJ\Classes\Web-Classes\sta402\data Input variables ------------------------------------- Name = country name (Character variable) Area = country area Popnsize = population size Pcturban = % residents in urban setting Lang = primary language Liter = % literate Lifemen = average life expectancy men Lifewom = average life expectancy women PcGNP = per capita gross national product Created Variables ----------------------------------- logarea = log10(area); logpopn = log10(popnsize); loggnp = log10(pcGNP); ienglish = (lang="English"); Data Source: Extracted from World Almanac */ * setting up macro variables for directories; %let DIR1 = C:\Users\baileraj\BAILERAJ\Classes\Web-Classes\sta402\data; %let DIR2 = \\Casnov5\MST\MSTLab\Baileraj; * reading a file from a web page - added 30 aug 2010; filename FROMWEB url "http://www.users.muohio.edu/baileraj/classes/sta402/Data/country.data"; data country; title "country data analysis"; infile FROMWEB; * infile "&DIR1\country.data"; input name $ area popnsize pcturban lang $ liter lifemen lifewom pcGNP; logarea = log10(area); logpopn = log10(popnsize); loggnp = log10(pcGNP); ienglish = (lang="English"); drop area popnsize pcgnp; run; proc print data=country; run; ods rtf bodytitle file="C:\Users\baileraj\Desktop\ch1-display-1.1-fig.rtf"; proc reg data=country; title "LOGGNP as predictor of Life expectancy of women"; model lifewom = loggnp; output out=new p=yhat r=resid; run; /* plot life expectancy of women vs. log(GNP) with a linear regression fit and LOESS fit superimposed */ title; proc sgplot data=country; reg y=lifewom x=loggnp; loess y=lifewom x=loggnp; run; proc sgplot data=new; title2 "residuals from SLR with X=logGNP plotted vs. literacy (%)"; loess y=resid x=liter; reg y=resid x=liter; refline 0 / axis = y; run; ods rtf close;