name: logisticRegression_functions.R
author: Wiebke Petersen
date: 09 Mai, 2017
based on: ex2.m original author: Andrew Ng
context: Machine Learning course on coursera.org
m : number of training example
n : number of features (for exercise 1: n=1)
y : m-dimensional vector
X : m×(1+n) matrix (note, the first column is per convention a vector consisting only of 1’s)
Θ : (n+1)-dimensional parameter vector
x(i) : (n+1)-dimensional vector of i-th training example (i-th row of X)
xj : m-dimensional vector (j-th column of X)
sigmoid <- function(x){
return(1/(1+exp(-x)))
}
hlog <- function(theta,X){
return(sigmoid(X %*% theta))
}
gradientDescentLog <- function(data,theta, alpha, iterations){
m <- dim(data)[1]
n <- dim(data)[2]
X <- ones(data[,1:n-1])
y <- data[,n]
cost_vec <- Cost(sigmoid(X %*% theta),y)
theta_vec <- t(theta)
for (i in 1:iterations){
theta = theta - alpha * t(t(sigmoid(X %*% theta)-y) %*% X)/m
cost <- Cost(sigmoid(X %*% theta),y)
cost_vec <- c(cost_vec,cost)
theta_vec <- rbind(theta_vec,t(theta))
}
return(list(theta=theta,theta_vec=theta_vec,cost_vec=cost_vec))
}
Cost <- function(h,y){
return(((-y)%*%log(h) - (1-y)%*%log(1-h))/m)
}
costFunction <- function(X,y){
function(theta){
h <- sigmoid(X %*% theta)
return(((-y)%*%log(h) - (1-y)%*%log(1-h))/m)
}
}
gradient <- function(X,y){
function(theta){
h <- sigmoid(X %*% theta)
return(t(t(h-y) %*% X)/ m)
}
}
#################################################
Some old linear regression functions
# hypothesis (note: here, theta is a 2-dimensional vector and X is a mx2 matrix)
h <- function(theta,X){
prediction <- X %*% theta
return(prediction)
}
# cost function
computeCost <- function(data,theta){
m <- dim(data)[1]
n <- dim(data)[2]
X <- as.matrix(cbind(rep(1,times=m),data[,1:n-1]))
y <- data[,n]
cost <- h(theta,X)-y
cost <- t(cost) %*% cost /(2*m)
return(cost)
}
# gradient descent algorithm
gradientDescent <- function(data,theta, alpha, iterations){
m <- dim(data)[1]
n <- dim(data)[2]
X <- ones(data[,1:n-1])
y <- data[,n]
cost_vec <- computeCost(data,theta)
theta_vec <- t(theta)
for (i in 1:iterations){
theta = theta - alpha/m * t(t(X%*%theta-y) %*% X)
cost <- computeCost(data,theta)
cost_vec <- c(cost_vec,cost)
theta_vec <- rbind(theta_vec,t(theta))
}
return(list(theta=theta,theta_vec=theta_vec,cost_vec=cost_vec))
}
# plot cost development
plotCostDev <- function(grad_desc){
cost_vec <- grad_desc$cost_vec
plot(cost_vec,type="l",col="blue",cex=.2,
xlab="iteration step",
ylab='cost',
main='cost development in gradient descent' )
}
# Plot the linear fit
plotLinearFit <- function(data,theta){
m <- dim(data)[1]
X <- cbind(rep(1,times=m),data[,1])
min <- min(data[,1])
max <- max(data[,1])
cost <- computeCost(data,theta)
plot(data,xlim=c(min,max),
col="red",
xlab="x",ylab="y",
main = "linear fit",
sub= paste("theta0=", round(theta[1],digits=3),", theta1=", round(theta[2],digits=3), ", cost=",round(cost,digits=3))
)
abline(theta[1],theta[2],col="blue",lwd=2)
abline(h=0,col="gray",lwd=1)
abline(v=0,col="gray",lwd=1)
}
# Feature Normalization
featureNormalization <- function(value_matrix){
mu <- apply(value_matrix,c(2),mean)
sigma <- apply(value_matrix,c(2),sd)
normal_matrix <- matrix(0,nrow=dim(value_matrix)[1],ncol=dim(value_matrix)[2])
for (i in 1:dim(value_matrix)[2]){
if(sigma[i] !=0){value_matrix[,i] <- (value_matrix[,i]-mu[i])/sigma[i]}
if(sigma[i] == 0){value_matrix[,i] <- value_matrix[,i]-mu[i]}
}
return(list(nv=value_matrix,mu=mu,sigma=sigma))
}
######################################################
# auxiliary functions
last <- function(vec,n){
return(vec[(length(vec)-n+1):length(vec)])
}
ones <- function(X){
X <- as.matrix(X)
return(as.matrix(cbind(rep(1,times=dim(X)[1]),X)))
}