This project demonstrates the linear regression algorithm.
First, the necessary libraries.
import numpy as np
import matplotlib.pyplot as plt
from numpy import linalg as LA
import plotly
import plotly.graph_objects as go
import random, time
from collections import Counter
plotly.offline.init_notebook_mode() #make plots available in html
def J(X,y,v):
######################### your code goes here ########################
m = np.size(X,0)
return 1/m * (((X@v)-y).T)@((X@v)-y)
def DJ(X,y,v):
######################### your code goes here ########################
m = np.size(X,0)
return (1/m) * X.T @ (X@v - y)
def GD_linreg(X,y,alpha,epsilon,max_iters = 10000):
######################### your code goes here ########################
m, n = np.shape(X)
onesvect = np.ones((m,1))
X_hat = np.concatenate((onesvect,X), axis = 1)
condition = True
iters = 0
costs = [10]
v = np.zeros((n+1,1)) #note: v should be length of number of features + 1
while condition and iters < max_iters:
v = v - (alpha * DJ(X_hat,y,v))
costs.append(J(X_hat,y,v))
iters +=1
if ((iters-1)/100).is_integer():
print(f'After {iters} steps the cost is {costs[iters]}')
condition = abs(costs[iters] - costs[iters-1]) > epsilon
endcost = len(costs)
print(f'After {endcost-1} steps the cost is {costs[endcost-1]}')
return (v,costs)
A data set of points in 3 dimensions is generated and visualized in the following cell. This will be the data used to test or "fit" our linear regression model.
X = np.random.normal(0, 500, size=(4000,2))
y = np.array([[np.random.normal(3,2)*point[0]+np.random.normal(4,2)*point[1]+np.random.normal(5,5)] for point in X])
Xhat = np.concatenate((np.ones((len(X),1)),X), axis = 1)
#plot
plot_figure = go.Figure(data=[go.Scatter3d(x=Xhat[:,1], y=Xhat[:,2], z=[r[0] for r in y], mode='markers',marker=dict(size=2))])
plotly.offline.iplot(plot_figure)
The following cell runs our model with appropriate parameters and outputs a few instances of the cost value.
alpha = 3*10e-8
epsilon = .01
max_iters = 10000
v,costs = GD_linreg(X,y,alpha,epsilon,max_iters)
By plotting the costs in the next cell we can visualize convergence.
plt.plot(costs)
plt.xlabel('number of iterations')
plt.ylabel('Cost J(v)')
plt.show()
Thus, our resulting hyperplane has been properly fitted to the model.
print("The hyperplane coefficients are:")
print(v)
In the following cell we can visualize how well this hyperplane fits the data.
yy = [r[0] for r in y]
trace = go.Scatter3d(x=Xhat[:,1], y=Xhat[:,2], z=[r[0] for r in y], mode='markers',marker=dict(size=2))
xs,ys = Xhat[:,1],Xhat[:,2]
new_x = np.outer(np.linspace(min(xs), max(xs), 30), np.ones(30))
new_y = np.outer(np.linspace(min(ys), max(ys), 30), np.ones(30)).T
new_z = v[0]+v[1]*new_x + v[2]*new_y
# Configure the layout.
layout = go.Layout(margin={'l': 0, 'r': 0, 'b': 0, 't': 0})
data = [trace,go.Surface(x=new_x, y=new_y, z=new_z, showscale=False, opacity=0.5)]
# Render the plot.
plot_figure = go.Figure(data=data, layout=layout)
plotly.offline.iplot(plot_figure)