
%matplotlib
import matplotlib.pyplot as plt


import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm


##################################################

## read in some data
cd = pd.read_csv("https://bitbucket.org/remcc/rob-data-sets/downloads/susedcars.csv")
print("*** the type of cd is:")
print(type(cd))
print("***number number of rows and columns is: ",cd.shape)
print("***the column names are:")
print(cd.columns.values)

## quick look at cd
cd.head()
cd.describe()

## pull off a column
temp = cd['mileage']  # pull out the variable mileage
temp[0:5]  # index from 0, slice is [a,b)

## simple scatter plot
plt.scatter(cd['mileage'],cd['price'])
plt.xlabel('mileage'); plt.ylabel('price')

## cd is an object, an instance of the pandas DataFrame class
## a class has attributes (or data members) and methods (function that can use the data members)
dir(cd)
type(cd.values)
type(cd.median)

# a method can use object attributes and arguments
temp = cd['price'].median()
print(temp)

junk = cd.iloc[:,[3,0]] #year and price
junk.head()
junk.median(axis=0)

## at prompt try:
##  cd?
##  cd.<TAB>
##  cd.*corr*?

##################################################
### basic python data structures

## you need to know a list, list can have different types
xx = [1,5,'rob']
## indexes from 0 to n-1
print(f'first xx is {xx[0]}')
print(xx[0:2]) # slice gets [a,b) elements
## you can assign to elements of a list
xx[1] = 'temp'
xx

## you need to know a tuple 
yy = (1,'rob')
yy[0]
yy[1]= 'dave' # error, tuple is immutable

## dictionary, key: value pairs
zz = {'a':1,'b':2}
zz['b']
zz.keys()
zz.values()

## numpy
xx = np.array([[1,2],[3,4]])
xx.shape
xx.mean()
np.mean(xx)

## pandas Series and DataFrame

# Series
xx = np.random.randn(1000)
xS = pd.Series(xx)
xS.hist()

# DataFrame
xx = np.random.rand(1000,2)
xxDF = pd.DataFrame(xx)
xxDF.describe()
xxDF.plot()

## great books
##   Introducing Python: Modern Computing in Simple Packages 2nd Edition
##   by Bill Lubanovic  (Author)
##   
##   Python Distilled (Developer's Library) 1st Edition
##   by David Beazley (Author)
##   
##   Python Data Science Handbook: Essential Tools for Working with Data 1st Edition
##   by Jake VanderPlas  (Author)

