##################################################
### imports
%matplotlib
import matplotlib.pyplot as plt
plt.style.use('seaborn')

import math

import numpy as np
import pandas as pd

from time import time

from sklearn.linear_model import LogisticRegression
import statsmodels.api as sm

#####################################
### simulate data
## p(y=1|x) = F(beta[1] + beta[2]x), y ~ Bern(py1), F(x) = exp(x)/(1+exp(x))

n=500 #sample size
beta = (1,.5) # true (intercept, slope)

##set seed
np.random.seed(34)

## draw x ~ N(0,1)
x = np.random.randn(n)

## sort x to make plotting better
x = np.sort(x)

## compute prob Y=1|x
py1 = 1/(1+np.exp(-(beta[0] + beta[1] * x)))

## storage for simulation results
y = np.zeros(n,dtype='int')

## simulate y and compute py1
for i in range(n):
    y[i] = np.random.binomial(1,py1[i])
    
## pandas data frame
#In python, unlike R, there is no option to represent categorical data as factors.
ddf = pd.DataFrame({'x':x,'py1':py1,'y':y})
print(ddf.head())
ddf.describe()

## write to csv file 
ddf.to_csv("psimdata.csv",index=False)


##################################################
## plot

# x vs p(y=1|x)
plt.plot(x,py1)
plt.ylabel('P(Y=1 | x)'); plt.xlabel('x')

## marginal of y
print(ddf['y'].value_counts()/n)

## x|y
ddf.boxplot(column=['x'],by='y')

## y|x with jitter
plt.scatter(x,y+np.random.randn(n)*.05,s=5.0,color='blue')
plt.xlabel('x'); plt.ylabel('jittered y')

## marginal of x
import seaborn as sns
p = sns.histplot(x,bins=20)
p.set_xlabel('x')

##################################################
### fit logit using sklearn and statsmodels

#sklearn
lfit = LogisticRegression(penalty='none') 
# need numpy 2dim array for X
X = x.reshape((n,1)).copy()
lfit.fit(X,y)
print(lfit.intercept_,lfit.coef_)


#statsmodels
XX = sm.add_constant(X)
lfit2 = sm.Logit(y, XX).fit()
print(lfit2.summary())

# plot fit
phat = lfit2.predict(XX) # from statsmodels !!
plt.plot(X,phat,c='red')
plt.xlabel('x'); plt.ylabel('P(Y=1|x)')
plt.plot(X,py1,c='blue')
plt.legend(['estimated','true'])
plt.title('x vs. p(Y=1 | x)')

#check fits are the same from sklearn and statsmodels
temp = lfit.predict_proba(X)[:,1]
pd.Series(temp - phat).describe()

##################################################
### - log lik
def mLL1(x,y,beta):
   ''' function to compute minus log likelihood for a simple logit'''
   n = len(y)
   mll = 0.0
   for i in range(n):
      py1 = 1/(1 + math.exp(-(beta[0] + beta[1]*x[i])))
      if (y[i] == 1):
         mll -= math.log(py1)
      else:
         mll -= math.log(1-py1)
   return(mll)

##check, compute - log like at mle and see that you can the same thing as statsmodels
bmle = (lfit.intercept_[0],lfit.coef_[0][0])
temp = mLL1(x,y,bmle)
print(f'minus log Lik from mLL1 is {temp:0.4f}')

##################################################
### get -LL on beta1 grid
nval = 1000
p = 2
bMat = np.zeros((nval,p))
bMat[:,0] = lfit.intercept_
bMat[:,1] = np.linspace(0,1,nval)
llv = np.zeros(nval)

for i in range(nval):
   llv[i] = mLL1(x,y,bMat[i,:])


## plot
ii = llv.argmin()
plt.plot(bMat[:,1],llv)
plt.axvline(beta[1],c='blue',label='true value')
plt.axvline(bMat[ii,1],c='red',label='mle')
plt.xlabel('slope beta'); plt.ylabel('-LogLik')
plt.legend()

if True:
    figure = plt.gcf()
    figure.set_size_inches(10,8)
    plt.savefig('python-mLL-on-a-beta1-grid.pdf')

## row of bMat at min:
print(f'bhat from grid: {bMat[ii,:]}')
## check
print(f'bhat from sklearn: {lfit.intercept_}, {lfit.coef_[0]}')

##################################################
## bivariate grid

## make 2dim grid as numpy array
nval = 100
b0g = np.linspace(0,2,nval) #intercept grid
b1g = np.linspace(0,1,nval) #slope grid
bb0, bb1 = np.meshgrid(b0g,b1g)
bbM = np.hstack([bb0.reshape((nval*nval,1)),bb1.reshape((nval*nval,1))])
nn = bbM.shape[0]

print("dim of bivariate grid:",bbM.shape)

## make 2dim grid as pandas data frame
bbD = pd.DataFrame(bbM)

## time using DataFrame
t1 = time()
llv1 = np.zeros(nn)
for i in range(nn):
   llv1[i] = mLL1(x,y,bbD.iloc[i,:])
t2 = time()
print("time: ",t2-t1)

## time numpy array 
llv2 = np.zeros(nn)
# time loop
t1 = time()
for i in range(nn):
   llv2[i] = mLL1(x,y,bbM[i,:])
t2 = time()
print("time: ",t2-t1)


## check got the same thing
pd.Series(llv1-llv2).describe()
print("abs diff:",np.abs(llv1-llv2).max())

#### vectorize
def mLL(x,y,beta):
   py1 = 1.0/(1.0 + np.exp(-(beta[0] + beta[1]*x)))
   return(-(y * np.log(py1) + (1-y) * np.log(1-py1)).sum())
   
## time the loop
llv3 = np.zeros(nn)
t1 = time()
for i in range(nn):
   llv3[i] = mLL(x,y,bbM[i,:])
t2 = time()
print("time: ",t2-t1)

## check same
print("abs diff:",np.abs(llv2-llv3).max())

##################################################
## look at llv

# get min
ii = llv3.argmin()
print(bbM[ii,:])
print(lfit.intercept_,lfit.coef_)

llM = llv3.reshape((nval,nval))

## contour plot
plt.contourf(bb0,bb1,llM,50,cmap='Blues')
plt.colorbar()
plt.contour(bb0,bb1,llM,20,colors='white')
plt.scatter(bmle[0],bmle[1],c='red',s=10)
plt.scatter(beta[0],beta[1],c='blue',s=10)
plt.xlabel('beta0')
plt.ylabel('beta1')

## 3D scatter
ax = plt.axes(projection='3d')
ax.scatter3D(bbM[:,0],bbM[:,1],llv3)

## 3D contour
ax = plt.axes(projection='3d')
ax.contour3D(bb0,bb1,llM,50,cmap='Blues')

## plot surface
ax = plt.axes(projection='3d')
ax.plot_surface(bb0,bb1,llM,rstride=1,cstride=1,cmap='viridis',edgecolor='none')

## plot trisurface
ax = plt.axes(projection='3d')
ax.plot_trisurf(bbM[:,0],bbM[:,1],llv3,cmap='viridis',edgecolor='none')