# graphical parameter
par(lwd=2,pch=4)
# read data
data <-read.table('ex1data1.txt',
sep=",",
encoding="UTF-8",
header=FALSE
)
# plot Data
plot(data, col="red",
xlab='Population of City in 10,000s',ylab='Profit in $10,000s'
)
# Some gradient descent settings
theta_init = c(0,0); # initialize fitting parameters
iterations = 1500
alpha = 0.01
# compute initial cost
cost_init <- computeCost(data, theta_init)
Basic parameters for gradient descent:
* α: learning rate 0.01
* Θinit: vector of initial parameters (00)
* iterations: 1500
* initial cost: J(Θinit)=J((00))=32.0727339
# run gradient descent
grad_desc <- gradientDescent(data, theta_init, alpha, iterations)
theta <- grad_desc$theta
theta_vec <- grad_desc$theta_vec
cost_final <- computeCost(data,theta)
# plot cost development
plotCostDev(grad_desc)
# Plot the linear fit
plotLinearFit(data,grad_desc$theta)
# Predict values for population sizes of 35,000 and 70,000
predict1 = h(theta,c(1, 3.5))
predict2 = h(theta,c(1, 7))
For population = 35,000, we predict a profit of 4520
For population = 70,000, we predict a profit of 45342
plotCostSurface(data,grad_desc)
#######################################################
# only vary theta0
t1 <- theta[2]
t0_vals <- seq(from=-10, to =10, length.out=100)
J0_vals <- rep(0,times=length(t0_vals))
for (i in 1:length(t0_vals)){
J0_vals[i] <- computeCost(data,c(t0_vals[i],t1))
}
plot(t0_vals,J0_vals,type="l",col="red",xlab="t0",ylab="cost",main=paste("t1 fixed to optimum: ",round(theta[2],digits=3)))
# only vary theta1
t0 <- theta[1]
t1_vals <- seq(from=-1, to =4, length.out=100)
J1_vals <- rep(0,times=length(t1_vals))
for (i in 1:length(t1_vals)){
J1_vals[i] <- computeCost(data,c(t0,t1_vals[i]))
}
plot(t1_vals,J1_vals,type="l",col="red",xlab="t1",main=paste("t0 fixed to optimum: ",round(theta[1],digits=3)))