Logistic regression

In this project, I demonstrate the implementation of the logistic regression algorithm from scratch.

First, the necessary libraries

import numpy as np
import matplotlib.pyplot as plt
from numpy import linalg as LA
import plotly
import plotly.graph_objects as go
import random, time
from collections import Counter
plotly.offline.init_notebook_mode() #make plots available in html

def G(v): 
    return 1/(1+np.exp(-v))

def Log(v): 
    return np.log(v)

def J(X,y,v):
    ######################### your code goes here ########################
    m = len(y)
    onesvect = np.ones((m,1))
    return -1/m * (((onesvect-y).T @ Log(onesvect-G(X@v))) - (y.T @ Log(G(X@v))))
    
def DJ(X,y,v):
    ######################### your code goes here ########################
    m = len(y)
    return 1/m * (X.T @ (G(X@v) - y))

def GD_logreg(X,y,epsilon,max_iters): 
    ######################### your code goes here ########################
    m, n = np.shape(X)
    onesvect = np.ones((m,1)) 
    X_hat = np.concatenate((onesvect,X), axis = 1)
    condition = True 
    iters = 0
    costs = [100]
    v = np.zeros((n+1,1)) #note: v should be length of number of features + 1
    while condition and iters < max_iters:
        #CALCULATE ALPHA
        cost_grad2 = DJ(X_hat,y,v) #1x4 SHOULD BE 4X1 - CHANGE IN DJ(X,y,v)
        #-------------HESSIAN OF LOGISTIC REGRESSION for ALPHA---------------------
        z = X_hat @ v # mx1
        #diagG = np.diag([G(z[i])*(1-G(z[i])) for i in range(m)]) #should be mxm
        Glist = [G(z[i])*(1-G(z[i])) for i in range(m)]
        #diagG = np.diag(Glist) DOESN'T WORK
        diagG = np.zeros((m,m))
        for i in range(m):
            diagG[i][i] = Glist[i]
        # Complete Hessian with diagG
        Hess = 1/m * (X_hat.T @ diagG @ X_hat) #nxn (4x4)   
        alpha = (cost_grad2.T @ cost_grad2) / (cost_grad2.T @ Hess @ cost_grad2)
        #alpha = 3*10e-8
        #------------GRADIENT DESCENT---------------------------------------------
        v = v - (alpha * cost_grad2)
        costs.append(J(X_hat,y,v))
        iters +=1
        if ((iters-1)/100).is_integer():
            print(f'After {iters} steps the cost is {costs[iters]}')  
        condition = abs(costs[iters] - costs[iters-1]) > epsilon
    endcost = len(costs)
    print(f'After {endcost-1} steps the cost is {costs[endcost-1]}')
    return (v,costs)

In the cell below we generate a dataset of points to test our logistic regression model. The visualization provides a clear distinction between red and blue dots.

y provides the dataset labels where a "0" represents the blue points and a "1" represents the red points.

points = np.random.normal(0, 50, size=(3000,3))
y,c = [],[]
for point in points:
    if 3*point[0]-7*point[1]+12*point[2]<4:
        rnd = random.randint(-10,10)
        if rnd>-8:
            y.append([1])
            c.append('red')
        else:
            y.append([0])
            c.append('blue')
    else:
        rnd = random.randint(-10,10)
        if rnd>-8:
            y.append([0])
            c.append('blue')
        else:
            y.append([1])
            c.append('red')
X,y = np.array(points), np.array(y)
Xhat = np.concatenate((np.ones((len(X),1)),X), axis = 1)
# visualize the dataset 
plot_figure = go.Figure(data=[go.Scatter3d(x=Xhat[:,1], y=Xhat[:,2], z=Xhat[:,3], mode='markers',marker=dict(size=2,color=c))])
plotly.offline.iplot(plot_figure)

The following cell runs our model with appropriate parameters and outputs a few instances of the cost value.

epsilon = .001
max_iters = 10000
v,costs=GD_logreg(X,y,epsilon,max_iters)

After 1 steps the cost is [[0.00744616]]
After 4 steps the cost is [[0.01407213]]

By plotting the costs in the next cell we can visualize convergence.

plt.plot(costs)
plt.xlabel('number of iterations')
plt.ylabel('Cost J(v)')
plt.show()

Our hyperplane serves to separate red dots from blue dots.

print("The hyperplane coefficients are:")
print(v)

The hyperplane coefficients are:
[[-1.32398191e-05]
 [-7.44360069e-03]
 [ 1.59323999e-02]
 [-2.75649543e-02]]

In the following cell we can visualize how well the plane separates the clusters.

yy = [r[0] for r in y]
trace = go.Scatter3d(x=Xhat[:,1], y=Xhat[:,2], z=Xhat[:,3], mode='markers',marker=dict(size=3,color=c))
xs = Xhat[:,1]
ys = Xhat[:,2]
xxx = np.outer(np.linspace(min(xs), max(xs), 30), np.ones(30))
yyy = np.outer(np.linspace(min(ys), max(ys), 30), np.ones(30)).T
zzz = (v[0]+v[1]*xxx + v[2]*yyy)/(-v[3])
# Configure the layout.
layout = go.Layout(margin={'l': 0, 'r': 0, 'b': 0, 't': 0})
data = [trace,go.Surface(x=xxx, y=yyy, z=zzz, showscale=False, opacity=0.5)]
# Render the plot.
plot_figure = go.Figure(data=data, layout=layout)
plotly.offline.iplot(plot_figure)