Finding Pauses: Logistic Regression Code

This is a part of the solution for the problem I described here.

The LogisticRegression class attempts to maximize the log likelihood that a hypothesis matches the training data. The hypothesis is a Sigmoid Function parameterized by theta.

import numpy as np
import math
 
class LogisticRegression:
    _optimizer = None
 
    def __init__(self, optimizer):
        self._optimizer = optimizer
 
    def calculate_parameters(self, x, y):
        assert(len(x) == len(y))
        gradient = [self._log_likelihood_partial_lambda(x,y) 
                    for i in range(0, x.shape[1])]
 
        return self._optimizer.maximize(gradient, 
                                        self._initial_guess(x.shape[1]))
 
    def hypothesis(self, x, params):
        exponent = -1 * np.dot(np.transpose(params), x)
 
        # Prevent overflow for large exponents
        # If exp(exponent) is very large, then the hypothesis
        # goes to zero.  So we simply set the hypothesis to zero
        # and return.
        if exponent > 707:
            hypothesis = 0.0
        # Otherwise, we calculate the hypothesis using the sigmoid
        # (logistic) function.
        else:
            hypothesis = (1. / (1. + math.exp(exponent)))
        return hypothesis
 
    def _log_likelihood_partial_lambda(self, x, y):
        return lambda theta, j: self._log_likelihood_partial(x,y,theta,j)
 
    def _log_likelihood_partial(self, x, y, theta, j):
        result = 0
        for i in range(0, x.shape[0]):
            result += (y[i] - self.hypothesis(x[i], theta))*x[i][j]
        return result
 
    def _initial_guess(self, numParams):
        return np.zeros(numParams) + 1e-50
import numpy as np
import math

class LogisticRegression:
    _optimizer = None

    def __init__(self, optimizer):
        self._optimizer = optimizer

    def calculate_parameters(self, x, y):
        assert(len(x) == len(y))
        gradient = [self._log_likelihood_partial_lambda(x,y) 
                    for i in range(0, x.shape[1])]

        return self._optimizer.maximize(gradient, 
                                        self._initial_guess(x.shape[1]))

    def hypothesis(self, x, params):
        exponent = -1 * np.dot(np.transpose(params), x)

        # Prevent overflow for large exponents
        # If exp(exponent) is very large, then the hypothesis
        # goes to zero.  So we simply set the hypothesis to zero
        # and return.
        if exponent > 707:
            hypothesis = 0.0
        # Otherwise, we calculate the hypothesis using the sigmoid
        # (logistic) function.
        else:
            hypothesis = (1. / (1. + math.exp(exponent)))
        return hypothesis

    def _log_likelihood_partial_lambda(self, x, y):
        return lambda theta, j: self._log_likelihood_partial(x,y,theta,j)

    def _log_likelihood_partial(self, x, y, theta, j):
        result = 0
        for i in range(0, x.shape[0]):
            result += (y[i] - self.hypothesis(x[i], theta))*x[i][j]
        return result

    def _initial_guess(self, numParams):
        return np.zeros(numParams) + 1e-50

For the optimizer, we use Batch Gradient Descent.

from numpy import linalg as la
import numpy
import sys
 
class GradientDescent:
    _learningRate = None
    _tolerance = None
 
    MAX_ITERATIONS = 1000
 
    def __init__(self, learningRate, tolerance):
        self._learningRate = learningRate
        self._tolerance = tolerance
 
    def minimize(self, gradient, initial):
        subtractFunc = lambda x,y: x-y
        return self._runDescent(gradient, initial, subtractFunc)
 
    def maximize(self, gradient, initial):
        addFunc = lambda x,y: x+y
        return self._runDescent(gradient, initial, addFunc)
 
    def _runDescent(self, gradient, initial, addOrSubtract):
        prevEstimate = float('inf')
        estimate = initial
        iteration = 0
        while (self._max_change(estimate, prevEstimate) > self._tolerance and
                iteration < GradientDescent.MAX_ITERATIONS):
            prevEstimate = estimate.copy()
            estimate = self._increment(gradient, estimate, addOrSubtract)
            iteration += 1
        return estimate
 
    def _increment(self, gradient, x, addOrSubtract):
        for j in range(0, len(x)):
            step = gradient[j](x, j)
            x[j] = addOrSubtract(x[j], self._learningRate * step)
        return x
 
    def _max_change(self, x1, x2):
        return la.norm(x1 - x2)
from numpy import linalg as la
import numpy
import sys

class GradientDescent:
    _learningRate = None
    _tolerance = None

    MAX_ITERATIONS = 1000

    def __init__(self, learningRate, tolerance):
        self._learningRate = learningRate
        self._tolerance = tolerance

    def minimize(self, gradient, initial):
        subtractFunc = lambda x,y: x-y
        return self._runDescent(gradient, initial, subtractFunc)

    def maximize(self, gradient, initial):
        addFunc = lambda x,y: x+y
        return self._runDescent(gradient, initial, addFunc)

    def _runDescent(self, gradient, initial, addOrSubtract):
        prevEstimate = float('inf')
        estimate = initial
        iteration = 0
        while (self._max_change(estimate, prevEstimate) > self._tolerance and
                iteration < GradientDescent.MAX_ITERATIONS):
            prevEstimate = estimate.copy()
            estimate = self._increment(gradient, estimate, addOrSubtract)
            iteration += 1
        return estimate

    def _increment(self, gradient, x, addOrSubtract):
        for j in range(0, len(x)):
            step = gradient[j](x, j)
            x[j] = addOrSubtract(x[j], self._learningRate * step)
        return x

    def _max_change(self, x1, x2):
        return la.norm(x1 - x2)

Here’s an example of how to train the classifier for features x and labels y:

def calculate_parameters(x, y):                               
    optimizer = GradientDescent(0.01, 0.001)                                    
    logistic = LogisticRegression(optimizer)                                    
    return logistic.calculate_parameters(x,y)
def calculate_parameters(x, y):                               
    optimizer = GradientDescent(0.01, 0.001)                                    
    logistic = LogisticRegression(optimizer)                                    
    return logistic.calculate_parameters(x,y)

And here’s how to classify new examples:

def classify(x, params):
        logistic = LogisticRegression(None) 
 
        # Include x_0=1 by convention, so that                                  
        # theta_0 can shift the whole sigmoid function                          
        features = np.concatenate([np.ones(1), x])                       
 
        return logistic.hypothesis(x, params)
def classify(x, params):
        logistic = LogisticRegression(None) 

        # Include x_0=1 by convention, so that                                  
        # theta_0 can shift the whole sigmoid function                          
        features = np.concatenate([np.ones(1), x])                       

        return logistic.hypothesis(x, params)

References:

One thought on “Finding Pauses: Logistic Regression Code

  1. Pingback: Finding Pauses: The Solution (1)

Leave a Reply

Your email address will not be published. Required fields are marked *

*

You may use these HTML tags and attributes: <a href="" title=""> <abbr title=""> <acronym title=""> <b> <blockquote cite=""> <cite> <code lang=""> <del datetime=""> <em> <i> <q cite=""> <strike> <strong> <pre lang="" extra="">