This is a part of the solution for the problem I described here.
The LogisticRegression class attempts to maximize the log likelihood that a hypothesis matches the training data. The hypothesis is a Sigmoid Function parameterized by theta.
import numpy as np import math class LogisticRegression: _optimizer = None def __init__(self, optimizer): self._optimizer = optimizer def calculate_parameters(self, x, y): assert(len(x) == len(y)) gradient = [self._log_likelihood_partial_lambda(x,y) for i in range(0, x.shape[1])] return self._optimizer.maximize(gradient, self._initial_guess(x.shape[1])) def hypothesis(self, x, params): exponent = -1 * np.dot(np.transpose(params), x) # Prevent overflow for large exponents # If exp(exponent) is very large, then the hypothesis # goes to zero. So we simply set the hypothesis to zero # and return. if exponent > 707: hypothesis = 0.0 # Otherwise, we calculate the hypothesis using the sigmoid # (logistic) function. else: hypothesis = (1. / (1. + math.exp(exponent))) return hypothesis def _log_likelihood_partial_lambda(self, x, y): return lambda theta, j: self._log_likelihood_partial(x,y,theta,j) def _log_likelihood_partial(self, x, y, theta, j): result = 0 for i in range(0, x.shape[0]): result += (y[i] - self.hypothesis(x[i], theta))*x[i][j] return result def _initial_guess(self, numParams): return np.zeros(numParams) + 1e-50
import numpy as np
import math
class LogisticRegression:
_optimizer = None
def __init__(self, optimizer):
self._optimizer = optimizer
def calculate_parameters(self, x, y):
assert(len(x) == len(y))
gradient = [self._log_likelihood_partial_lambda(x,y)
for i in range(0, x.shape[1])]
return self._optimizer.maximize(gradient,
self._initial_guess(x.shape[1]))
def hypothesis(self, x, params):
exponent = -1 * np.dot(np.transpose(params), x)
# Prevent overflow for large exponents
# If exp(exponent) is very large, then the hypothesis
# goes to zero. So we simply set the hypothesis to zero
# and return.
if exponent > 707:
hypothesis = 0.0
# Otherwise, we calculate the hypothesis using the sigmoid
# (logistic) function.
else:
hypothesis = (1. / (1. + math.exp(exponent)))
return hypothesis
def _log_likelihood_partial_lambda(self, x, y):
return lambda theta, j: self._log_likelihood_partial(x,y,theta,j)
def _log_likelihood_partial(self, x, y, theta, j):
result = 0
for i in range(0, x.shape[0]):
result += (y[i] - self.hypothesis(x[i], theta))*x[i][j]
return result
def _initial_guess(self, numParams):
return np.zeros(numParams) + 1e-50For the optimizer, we use Batch Gradient Descent.
from numpy import linalg as la import numpy import sys class GradientDescent: _learningRate = None _tolerance = None MAX_ITERATIONS = 1000 def __init__(self, learningRate, tolerance): self._learningRate = learningRate self._tolerance = tolerance def minimize(self, gradient, initial): subtractFunc = lambda x,y: x-y return self._runDescent(gradient, initial, subtractFunc) def maximize(self, gradient, initial): addFunc = lambda x,y: x+y return self._runDescent(gradient, initial, addFunc) def _runDescent(self, gradient, initial, addOrSubtract): prevEstimate = float('inf') estimate = initial iteration = 0 while (self._max_change(estimate, prevEstimate) > self._tolerance and iteration < GradientDescent.MAX_ITERATIONS): prevEstimate = estimate.copy() estimate = self._increment(gradient, estimate, addOrSubtract) iteration += 1 return estimate def _increment(self, gradient, x, addOrSubtract): for j in range(0, len(x)): step = gradient[j](x, j) x[j] = addOrSubtract(x[j], self._learningRate * step) return x def _max_change(self, x1, x2): return la.norm(x1 - x2)
from numpy import linalg as la
import numpy
import sys
class GradientDescent:
_learningRate = None
_tolerance = None
MAX_ITERATIONS = 1000
def __init__(self, learningRate, tolerance):
self._learningRate = learningRate
self._tolerance = tolerance
def minimize(self, gradient, initial):
subtractFunc = lambda x,y: x-y
return self._runDescent(gradient, initial, subtractFunc)
def maximize(self, gradient, initial):
addFunc = lambda x,y: x+y
return self._runDescent(gradient, initial, addFunc)
def _runDescent(self, gradient, initial, addOrSubtract):
prevEstimate = float('inf')
estimate = initial
iteration = 0
while (self._max_change(estimate, prevEstimate) > self._tolerance and
iteration < GradientDescent.MAX_ITERATIONS):
prevEstimate = estimate.copy()
estimate = self._increment(gradient, estimate, addOrSubtract)
iteration += 1
return estimate
def _increment(self, gradient, x, addOrSubtract):
for j in range(0, len(x)):
step = gradient[j](x, j)
x[j] = addOrSubtract(x[j], self._learningRate * step)
return x
def _max_change(self, x1, x2):
return la.norm(x1 - x2)Here’s an example of how to train the classifier for features x and labels y:
def calculate_parameters(x, y): optimizer = GradientDescent(0.01, 0.001) logistic = LogisticRegression(optimizer) return logistic.calculate_parameters(x,y)
def calculate_parameters(x, y):
optimizer = GradientDescent(0.01, 0.001)
logistic = LogisticRegression(optimizer)
return logistic.calculate_parameters(x,y)And here’s how to classify new examples:
def classify(x, params): logistic = LogisticRegression(None) # Include x_0=1 by convention, so that # theta_0 can shift the whole sigmoid function features = np.concatenate([np.ones(1), x]) return logistic.hypothesis(x, params)
def classify(x, params):
logistic = LogisticRegression(None)
# Include x_0=1 by convention, so that
# theta_0 can shift the whole sigmoid function
features = np.concatenate([np.ones(1), x])
return logistic.hypothesis(x, params)References:
- Ng, Andrew. CS229 Lecture Notes. http://see.stanford.edu/materials/aimlcs229/cs229-notes1.pdf
- Christopher Bishop, Pattern Recognition and Machine Learning. Springer, 2006.
Pingback: Finding Pauses: The Solution (1)