Logistic regression

In this project, I demonstrate the implementation of the logistic regression algorithm from scratch.

First, the necessary libraries

In [2]:
import numpy as np
import matplotlib.pyplot as plt
from numpy import linalg as LA
import plotly
import plotly.graph_objects as go
import random, time
from collections import Counter
plotly.offline.init_notebook_mode() #make plots available in html

image.png

In [3]:
def G(v): 
    return 1/(1+np.exp(-v))

def Log(v): 
    return np.log(v)

def J(X,y,v):
    ######################### your code goes here ########################
    m = len(y)
    onesvect = np.ones((m,1))
    return -1/m * (((onesvect-y).T @ Log(onesvect-G(X@v))) - (y.T @ Log(G(X@v))))
    
def DJ(X,y,v):
    ######################### your code goes here ########################
    m = len(y)
    return 1/m * (X.T @ (G(X@v) - y))

def GD_logreg(X,y,epsilon,max_iters): 
    ######################### your code goes here ########################
    m, n = np.shape(X)
    onesvect = np.ones((m,1)) 
    X_hat = np.concatenate((onesvect,X), axis = 1)
    condition = True 
    iters = 0
    costs = [100]
    v = np.zeros((n+1,1)) #note: v should be length of number of features + 1
    while condition and iters < max_iters:
        #CALCULATE ALPHA
        cost_grad2 = DJ(X_hat,y,v) #1x4 SHOULD BE 4X1 - CHANGE IN DJ(X,y,v)
        #-------------HESSIAN OF LOGISTIC REGRESSION for ALPHA---------------------
        z = X_hat @ v # mx1
        #diagG = np.diag([G(z[i])*(1-G(z[i])) for i in range(m)]) #should be mxm
        Glist = [G(z[i])*(1-G(z[i])) for i in range(m)]
        #diagG = np.diag(Glist) DOESN'T WORK
        diagG = np.zeros((m,m))
        for i in range(m):
            diagG[i][i] = Glist[i]
        # Complete Hessian with diagG
        Hess = 1/m * (X_hat.T @ diagG @ X_hat) #nxn (4x4)   
        alpha = (cost_grad2.T @ cost_grad2) / (cost_grad2.T @ Hess @ cost_grad2)
        #alpha = 3*10e-8
        #------------GRADIENT DESCENT---------------------------------------------
        v = v - (alpha * cost_grad2)
        costs.append(J(X_hat,y,v))
        iters +=1
        if ((iters-1)/100).is_integer():
            print(f'After {iters} steps the cost is {costs[iters]}')  
        condition = abs(costs[iters] - costs[iters-1]) > epsilon
    endcost = len(costs)
    print(f'After {endcost-1} steps the cost is {costs[endcost-1]}')
    return (v,costs)

In the cell below we generate a dataset of points to test our logistic regression model. The visualization provides a clear distinction between red and blue dots.

y provides the dataset labels where a "0" represents the blue points and a "1" represents the red points.

In [10]:
points = np.random.normal(0, 50, size=(3000,3))
y,c = [],[]
for point in points:
    if 3*point[0]-7*point[1]+12*point[2]<4:
        rnd = random.randint(-10,10)
        if rnd>-8:
            y.append([1])
            c.append('red')
        else:
            y.append([0])
            c.append('blue')
    else:
        rnd = random.randint(-10,10)
        if rnd>-8:
            y.append([0])
            c.append('blue')
        else:
            y.append([1])
            c.append('red')
X,y = np.array(points), np.array(y)
Xhat = np.concatenate((np.ones((len(X),1)),X), axis = 1)
# visualize the dataset 
plot_figure = go.Figure(data=[go.Scatter3d(x=Xhat[:,1], y=Xhat[:,2], z=Xhat[:,3], mode='markers',marker=dict(size=2,color=c))])
plotly.offline.iplot(plot_figure)

The following cell runs our model with appropriate parameters and outputs a few instances of the cost value.

In [5]:
epsilon = .001
max_iters = 10000
v,costs=GD_logreg(X,y,epsilon,max_iters)
After 1 steps the cost is [[0.00744616]]
After 4 steps the cost is [[0.01407213]]

By plotting the costs in the next cell we can visualize convergence.

In [6]:
plt.plot(costs)
plt.xlabel('number of iterations')
plt.ylabel('Cost J(v)')
plt.show()

Our hyperplane serves to separate red dots from blue dots.

In [7]:
print("The hyperplane coefficients are:")
print(v)
The hyperplane coefficients are:
[[-1.32398191e-05]
 [-7.44360069e-03]
 [ 1.59323999e-02]
 [-2.75649543e-02]]

In the following cell we can visualize how well the plane separates the clusters.

In [9]:
yy = [r[0] for r in y]
trace = go.Scatter3d(x=Xhat[:,1], y=Xhat[:,2], z=Xhat[:,3], mode='markers',marker=dict(size=3,color=c))
xs = Xhat[:,1]
ys = Xhat[:,2]
xxx = np.outer(np.linspace(min(xs), max(xs), 30), np.ones(30))
yyy = np.outer(np.linspace(min(ys), max(ys), 30), np.ones(30)).T
zzz = (v[0]+v[1]*xxx + v[2]*yyy)/(-v[3])
# Configure the layout.
layout = go.Layout(margin={'l': 0, 'r': 0, 'b': 0, 't': 0})
data = [trace,go.Surface(x=xxx, y=yyy, z=zzz, showscale=False, opacity=0.5)]
# Render the plot.
plot_figure = go.Figure(data=data, layout=layout)
plotly.offline.iplot(plot_figure)