##################################################
### simple keras single layer neural nets with keras in Boston Housing

##################################################
### imports
import numpy as np
import pandas as pd
import math
import scipy as sp

#graphics with matplotlib
import matplotlib.pyplot as plt
plt.style.use('seaborn')
#ipython terminal
%matplotlib
#jupyter notebook
#%matplotlib inline 

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

from tensorflow.keras import models
from tensorflow.keras import layers
from tensorflow.keras import regularizers

##################################################
### data
from sklearn.datasets import load_boston

boston = load_boston()
features = boston.data
y = boston.target

##################################################
### train/test split
Xtrain, Xtest, ytrain, ytest = train_test_split(features,y, random_state=99,test_size=.25)

##################################################
## scale
scl = StandardScaler()
Xtr = scl.fit_transform(Xtrain)
Xte = scl.transform(Xtest)
print("means should be 0, sds should be 1")
print(Xtr.mean(axis=0))
print(Xtr.std(axis=0))

## pull off lstat
Xtr1 = Xtr[:,12][:,np.newaxis]
Xte1 = Xte[:,12][:,np.newaxis]

## our familiar plot
plt.scatter(Xtr1,ytrain,c='blue')
plt.xlabel("lstat"); plt.ylabel("medv")

plt.scatter(Xte1,ytest,c='blue')
plt.xlabel("lstat"); plt.ylabel("medv")


##################################################
### fit neural net with small number of units and not much L2 regularization

#make model
lp2pen = .001
nx = Xtr1.shape[1]
nn1 = models.Sequential()
nn1.add(layers.Dense(units=5,activation='sigmoid',kernel_regularizer = regularizers.l2(lp2pen),input_shape=(nx,)))
nn1.add(layers.Dense(units=1))

#compile model
nn1.compile(loss='mse',optimizer='rmsprop',metrics=['mse'])

# fit
nhist = nn1.fit(Xtr1,ytrain,epochs=1000,verbose=1,batch_size=20,validation_data=(Xte1,ytest))

### plot training by epoch
trL = nhist.history['loss']
teL = nhist.history['val_loss']
epind = range(1,len(trL)+1)
plt.plot(epind,trL,c='red')
plt.plot(epind,teL,c='blue')
plt.xlabel('epoch'); plt.ylabel('loss')

### plot fit -in sample
yhtr = nn1.predict(Xtr1)
plt.scatter(Xtr1,ytrain,c='red')
plt.scatter(Xtr1,yhtr,c='blue')
plt.xlabel('x=lstat'); plt.ylabel('y=medv')

### plot fit -out sample
yhte = nn1.predict(Xte1)
plt.scatter(Xte1,ytest,c='red')
plt.scatter(Xte1,yhte,c='blue')
plt.xlabel('x=lstat'); plt.ylabel('y=medv')

##################################################
### fit neural net with large number of units and L2 regularization

#make model
lp2pen = .01
nx = Xtr1.shape[1]
nn2 = models.Sequential()
nn2.add(layers.Dense(units=50,activation='sigmoid',kernel_regularizer = regularizers.l2(lp2pen),input_shape=(nx,)))
nn2.add(layers.Dense(units=1))

#compile model
nn2.compile(loss='mse',optimizer='rmsprop',metrics=['mse'])

# fit
nhist = nn2.fit(Xtr1,ytrain,epochs=1000,verbose=1,batch_size=20,validation_data=(Xte1,ytest))

### plot training by epoch
trL = nhist.history['loss']
teL = nhist.history['val_loss']
epind = range(1,len(trL)+1)
plt.plot(epind,trL,c='red')
plt.plot(epind,teL,c='blue')
plt.xlabel('epoch'); plt.ylabel('loss')

### plot fit -in sample
yhtr = nn2.predict(Xtr1)
plt.scatter(Xtr1,ytrain,c='red')
plt.scatter(Xtr1,yhtr,c='blue')
plt.xlabel('x=lstat'); plt.ylabel('y=medv')

### plot fit -out sample
yhte = nn2.predict(Xte1)
plt.scatter(Xte1,ytest,c='red')
plt.scatter(Xte1,yhte,c='blue')
plt.xlabel('x=lstat'); plt.ylabel('y=medv')