############################################################ ### Do Lasso on diabetes data. ############################################################ dpl=FALSE #dpl=TRUE library(glmnet) ################################################ if(1) {cat("### read in data\n") ddf = read.csv("diabetes.csv") y = ddf$y x = as.matrix(ddf[,2:ncol(ddf)]) } ################################################ if(1) {cat("### run lasso, glmnet with alpha=1 \n") ##data already standardized and demeaned, simple numeric response => gaussian ## alpha =1 set elastic net parameter to lasso (which is the default). ## will default to 100 lambda values dgn = glmnet(x,y,family="gaussian",standardize=FALSE,intercept=FALSE,alpha=1) if(dpl) pdf(file="plot-diab-lasso.pdf",height=10,width=12) plot(dgn) if(dpl) dev.off() } ################################################ if(1) {cat("### run cvglmnet with alpha=1 \n") ## run cv, nfolds=10 is actually the default, other params are passed on the glmnet set.seed(99) cvdgn = cv.glmnet(x,y,nfolds=10,family="gaussian",standardize=FALSE,alpha=1,intercept=FALSE) if(dpl) pdf(file="plot-diab-cvlasso.pdf",height=10,width=12) plot(cvdgn) if(dpl) dev.off() minlam = cvdgn$lambda.min minlam1 = cvdgn$lambda.1se minlamT = paste("minlam and minlam (1se) are: ",round(minlam,4),round(minlam1,4)) print(minlamT) } ################################################ if(1) {cat("### look at sparse solution \n") bhatL = coef(dgn,s=minlam)[,1] #[,1] gets rid of sparse matrix format print(bhatL[bhatL!=0]) } ################################################ if(1) {cat("### look at in-sample fits \n") yhat = predict(dgn,newx=x,s=minlam) if(dpl) pdf(file="insamp-lasso-fit.pdf",height=10,width=12) plot(y,yhat,xlab="y",ylab="yhat",cex.axis=1.5,cex.lab=1.5) abline(0,1,col="red",lwd=2,cex.axis=1.5,cex.lab=1.5) if(dpl) dev.off() } ############################################### if(dpl) rm(list=ls())