Machine Learning - Week 1: Linear Regression

Functions for logistic regression

conventions
sigmoid function
logistic hypothesis
cost function
cost function
gradient function

Functions for logistic regression

name: logisticRegression_functions.R
author: Wiebke Petersen
date: 09 Mai, 2017
based on: ex2.m original author: Andrew Ng
context: Machine Learning course on coursera.org

conventions

m : number of training example
n : number of features (for exercise 1: n=1)
y : m-dimensional vector
X : $m\times (1+n)$ matrix (note, the first column is per convention a vector consisting only of 1’s)
$\Theta$ : $(n+1)$ -dimensional parameter vector
$x^{(i)}$ : $(n+1)$ -dimensional vector of i-th training example (i-th row of X)
$x_j$ : $m$ -dimensional vector (j-th column of X)

sigmoid function

sigmoid <- function(x){
  return(1/(1+exp(-x)))
}

logistic hypothesis

hlog <- function(theta,X){
  return(sigmoid(X %*% theta))
}

gradientDescentLog <- function(data,theta, alpha, iterations){
  m <- dim(data)[1]
  n <- dim(data)[2]
  X <- ones(data[,1:n-1])
  y <- data[,n]
  cost_vec <- Cost(sigmoid(X %*% theta),y)
  theta_vec <- t(theta)
  for (i in 1:iterations){
    theta = theta - alpha * t(t(sigmoid(X %*% theta)-y) %*% X)/m 
    cost <- Cost(sigmoid(X %*% theta),y)
    cost_vec <- c(cost_vec,cost)
    theta_vec <- rbind(theta_vec,t(theta))
  }
  return(list(theta=theta,theta_vec=theta_vec,cost_vec=cost_vec))
}

cost function

Cost <- function(h,y){
  return(((-y)%*%log(h) - (1-y)%*%log(1-h))/m)
}

cost function

costFunction <- function(X,y){
  function(theta){
    h <- sigmoid(X %*% theta)
    return(((-y)%*%log(h) - (1-y)%*%log(1-h))/m)
  }
}

gradient function

gradient <- function(X,y){
  function(theta){
    h <- sigmoid(X %*% theta)
    return(t(t(h-y) %*% X)/ m)
  }
}

#################################################

Some old linear regression functions

# hypothesis (note: here, theta is a 2-dimensional vector and X is a mx2 matrix)
h <- function(theta,X){
  prediction <- X %*% theta
  return(prediction)
}

# cost function 
computeCost <-  function(data,theta){
  m <- dim(data)[1]
  n <- dim(data)[2]
  X <- as.matrix(cbind(rep(1,times=m),data[,1:n-1]))
  y <- data[,n]
  cost <- h(theta,X)-y
  cost <- t(cost) %*% cost /(2*m)  
  return(cost)
}

# gradient descent algorithm
gradientDescent <- function(data,theta, alpha, iterations){
  m <- dim(data)[1]
  n <- dim(data)[2]
  X <- ones(data[,1:n-1])
  y <- data[,n]
  cost_vec <- computeCost(data,theta)
  theta_vec <- t(theta)
  for (i in 1:iterations){
    theta = theta - alpha/m * t(t(X%*%theta-y) %*% X) 
    cost <- computeCost(data,theta)
    cost_vec <- c(cost_vec,cost)
    theta_vec <- rbind(theta_vec,t(theta))
  }
  return(list(theta=theta,theta_vec=theta_vec,cost_vec=cost_vec))
}

# plot cost development
plotCostDev <- function(grad_desc){
  cost_vec <- grad_desc$cost_vec
  plot(cost_vec,type="l",col="blue",cex=.2,
       xlab="iteration step",
       ylab='cost',
       main='cost development in gradient descent'  )  
}

# Plot the linear fit
plotLinearFit <- function(data,theta){
  m <- dim(data)[1]
  X <- cbind(rep(1,times=m),data[,1])
  min <- min(data[,1])
  max <- max(data[,1])
  cost <- computeCost(data,theta)
  plot(data,xlim=c(min,max),
       col="red",
       xlab="x",ylab="y",
       main = "linear fit",
       sub= paste("theta0=", round(theta[1],digits=3),", theta1=", round(theta[2],digits=3), ", cost=",round(cost,digits=3))
  ) 
  abline(theta[1],theta[2],col="blue",lwd=2)
  abline(h=0,col="gray",lwd=1)
  abline(v=0,col="gray",lwd=1)
}

# Feature Normalization
featureNormalization <- function(value_matrix){
  mu <- apply(value_matrix,c(2),mean)
  sigma <- apply(value_matrix,c(2),sd)
  normal_matrix <- matrix(0,nrow=dim(value_matrix)[1],ncol=dim(value_matrix)[2])
  for (i in 1:dim(value_matrix)[2]){
    if(sigma[i] !=0){value_matrix[,i] <- (value_matrix[,i]-mu[i])/sigma[i]}
    if(sigma[i] == 0){value_matrix[,i] <- value_matrix[,i]-mu[i]}
  }
  return(list(nv=value_matrix,mu=mu,sigma=sigma))
}


######################################################
# auxiliary functions

last <- function(vec,n){
  return(vec[(length(vec)-n+1):length(vec)])
}

ones <- function(X){
  X <- as.matrix(X)
  return(as.matrix(cbind(rep(1,times=dim(X)[1]),X)))
}