
##################################################
### imports
import numpy as np
import pandas as pd

%matplotlib
import matplotlib.pyplot as plt
plt.style.use('seaborn')
import seaborn as sns

from sklearn.linear_model import LinearRegression

##################################################
### simulate data
np.random.seed(34) # Matthews

n=500; p=2
X = np.random.randn(n,p)
beta = np.array(range(p)) + 1
sigma = .5
Y = X @ beta + sigma * np.random.randn(n)

##################################################
### plot simulated data

p = sns.histplot(X[:,0],bins=20)
p.set_xlabel('first x')

fig,ax = plt.subplots(1,2)
ax[0].hist(X[:,0],bins=20)
ax[0].set_xlabel('x1')
ax[1].hist(X[:,1],bins=20)
ax[1].set_xlabel('x2')

fig,ax = plt.subplots(1,2)
ax[0].scatter(X[:,0],Y,c='blue',marker='o',s=4.0)
ax[0].set_xlabel('x1'); ax[0].set_ylabel('y')
ax[1].scatter(X[:,1],Y,c='blue',marker='o',s=4.0)
ax[1].set_xlabel('x2'); ax[1].set_ylabel('y')

##################################################
### fit with LinearRegression

lmmod = LinearRegression(fit_intercept=False)
lmmod.fit(X,Y) # (X,y) is the training data
print('**** bhat from sklearn.LinearRegression')
print("Model Slopes:    ",lmmod.coef_)

##################################################
### compute by hand

xtx = X.T @ X
xty = X.T @ Y
bhat = np.linalg.inv(xtx) @ xty
print('**** bhat by hand',bhat)



