##
############################################################
##  model.matrix allows us to get the matrix X for y=X beta + e
##   corresponding to a formula.

##
############################################################
## construct some x's
n=100;p=3
x = matrix(rnorm(n*p),ncol=p)
colnames(x) = paste0("x",1:p)

#xf will be a factor 
xf = rep(1,n)
sz = floor(n/3)
xf[1:(2*sz)] = rep(c(2,3),sz)
xf = as.factor(xf)

#the data frame
ddf = data.frame(x,xf)

##
############################################################
## dummies for xf, R automatically expands factors into dummies
fm = as.formula(~.)
xm = model.matrix(fm,ddf)
print(head(xm))

##
############################################################
## all variable + interact x1 with xf
fm = as.formula(~.+x1:xf)
xm = model.matrix(fm,ddf)
print(head(xm))

##
############################################################
## interact x1 with xf and x2
fm = as.formula(~.+x1:xf+x1:x2)
xm = model.matrix(fm,ddf)
print(head(xm))

##
############################################################
## interact each column of x with xf, * notation include vars + interactions, 
## : is just interactions
fm = as.formula(~x*xf)
xm = model.matrix(fm,ddf)
print(head(xm))

##
############################################################
##all interactions up to and including third order
fm = as.formula(~(x1+x2+x3)^3)
xm = model.matrix(fm,ddf)
print(head(xm))

##
############################################################
##all cubes, I is the "as is" operator, here we just cube each var 
## Inside I all operations have their normal arithmetic meaning.
fm = as.formula(~I(x^3))
xm = model.matrix(fm,ddf)
print(head(xm))

##
############################################################
## variables, all squares, and interaction of numeric variables in x
fm = as.formula(~I(x^2) + .*.)
xm = model.matrix(fm,ddf[,1:3])
print(head(xm))

##
############################################################
## all variables and two-way interactions
fm = as.formula(~.^2)
xm = model.matrix(fm,ddf)
print(head(xm))

##
############################################################
## all variables and two-way interactions and all squares of the numeric variables
fm = as.formula(~I(x^2) + .^2)
xm = model.matrix(fm,ddf)
print(head(xm))

##
############################################################
## you can convert character strings to formulas which is very handy
## for constructing generic results.
fmc = paste("~(",paste0(colnames(x),collapse="+"),")^2","+ I(x^2)")
fm = as.formula(fmc)
xm = model.matrix(fm,ddf)
print(head(xm))

##
############################################################
## we can use update to add to an existing formula
## let's add the factor into the previous formula
fm = update(fm,"~.+ xf")
xm = model.matrix(fm,ddf)
print(head(xm))

##
############################################################
## you can use poly in formulas
## poly will generate orthogonal polynomials, 
## use raw=TRUE if you just want the powers
fm = as.formula(~poly(x1,degree=2))
xm = model.matrix(fm,ddf)
print(head(xm))

fm = as.formula(~poly(x1,2,raw=TRUE))
xm = model.matrix(fm,ddf)
print(head(xm))


##
############################################################
## you can polym with 2 x's to get squares and the interaction.
fm = as.formula(~polym(x1,x2,degree=2,raw=TRUE))
xm = model.matrix(fm,ddf)
print(head(xm))


##
############################################################
## check you can get the diabetes df just from original vars.
ddf = read.csv("diabetes.csv")
ddfx = ddf[,2:11] #the x vars

vnms = names(ddfx)
vint = paste0("~(",paste0(vnms,collapse="+"),")^2") #main and interactions
vsq = paste0("I(",paste0(vnms[-2],"^2"),")") #squares but not sex
fmc = paste(vint,"+",paste(vsq,collapse="+")) #combine
xm = model.matrix(as.formula(fmc),ddfx)[,-1]
print(head(xm))
xm = scale(xm)

xdf = as.matrix(ddf[,2:ncol(ddf)])
print(summary(xdf-xm))



