### load library
library(glmnet)

### load data
ddf = read.csv(file="http://rob-mcculloch.org/data/swe8there.csv")

### pull off x and y
y = ddf$y
nc = ncol(ddf)
x = as.matrix(ddf[,1:(nc-1)])


### fit cv Lasso
cvfit = cv.glmnet(x,y,
                    family = "binomial",
                    alpha = 1,                        # lasso - 1, ridge - 0
                    nfold = 10
                 )


### plot Lasso
par(mfrow=c(2,1))
plot(cvfit)
plot(cvfit$glmnet.fit)

### look at lasso coefs
p=ncol(x)
# lambda vs the number of non-zero coefficients
plot(cvfit$glmnet$lambda,cvfit$glmnet$df)
abline(v=cvfit$lambda.1se,col="red")
abline(v=cvfit$lambda.min,col="blue")

#look at coefficients, which are pos/neg, interpret
coefL = coef(cvfit$glmnet.fit, s=cvfit$lambda.1se)
oo = order( coefL, decreasing = TRUE )

# positive coefficients
ncoef=30
cat("Big positive coefficients:\n")
print(coefL@Dimnames[[1]][oo[1:ncoef]])
print(coefL[oo[1:ncoef]])
# negative coefficients
cat("Big negative coefficients:\n")
print(coefL@Dimnames[[1]][tail(oo,ncoef)])
print(coefL[tail(oo,ncoef)])



