3 분 소요

[Notice] [Linear_Regression_Math]

Linear Regression

[Reference] https://nbviewer.org/github/mml-book/mml-book.github.io/blob/master/tutorials/tutorial_linear_regression.ipynb

import numpy as np
import scipy.linalg
import matplotlib.pyplot as plt
%matplotlib inline
#Define training set

# 5x1 vector, N = 5, D = 1 
X = np.array([-3, -1, 0, 1, 3]).reshape(-1,1)
# 5x1 vector
y = np.array([-1.2, -0.7, 0.14, 0.67, 1.67]).reshape(-1,1)

# show the training set
plt.figure(figsize = (12, 6))
plt.plot(X,y, '+', markersize = 10)
plt.xlabel("$x$")
plt.ylabel("$x$")

plt.tight_layout()
plt.show()


Maximum Likelihood

  • p(Y X,θ)=∏n=1Np(yn xn,θ)
  • maximum likelihood estimator is given by θML=(XT X)−1 XT y∈RD
# make a maximum likelihood fuction
def max_lik_estimate(X, y):
    
    # X: N x D matrix of training inputs
    # y: N x 1 vector of training targets/observations
    # returns: maximum likelihood parameters (D x 1)
    
    N, D = X.shape
    theta_ml = np.zeros((D,1))
    return theta_ml
# use maximum likeligood estimate
theta_ml = max_lik_estimate(X, y)
# make a prediction function

def predict_with_estimate(Xtest, theta):
    
    # Xtest: K x D matrix of test inputs
    # theta: D x 1 vector of parameters
    # returns: prediction of f(Xtest); K x 1 vector
    
    prediction = Xtest 
    
    return prediction 
# define a test set

# 100 x 1 vector of test inputs
Xtest = np.linspace(-5,5,100).reshape(-1,1)

# predict the function values at the test points using the maximum likelihood estimator
ml_prediction = predict_with_estimate(Xtest, theta_ml)

# plot
plt.figure(figsize = (12, 6))
plt.plot(X, y, '+', markersize=10)
plt.plot(Xtest, ml_prediction)
plt.xlabel("x")
plt.ylabel("y")
plt.tight_layout()
plt.show()


  • try to look at different training set, where we add 2.0 to every y-value
ynew = y + 2.0

plt.figure(figsize = (12,6))
plt.plot(X, ynew, '+', markersize=10)
plt.xlabel("x")
plt.ylabel("y")
Text(0, 0.5, 'y')

# get maximum likelihood estimate
theta_ml = max_lik_estimate(X, ynew)
print(theta_ml)

# define a test set
Xtest = np.linspace(-5,5,100).reshape(-1,1)

# predict the function values at the test points using the maximum likelihood estimator
ml_prediction = predict_with_estimate(Xtest, theta_ml)

# plot
plt.figure()
plt.plot(X, ynew, '+', markersize=10)
plt.plot(Xtest, ml_prediction)
plt.xlabel("x")
plt.ylabel("y")

plt.tight_layout()
plt.show()
[[0.]]


  • now define a linear regression model that is slightly more flexible: y = θ0+ xTθ1 +ϵ, ϵ∼N(0,σ2)

  • we added an offset (bias) parameter θ0 to our original model

  • define the inputs to be the augmented vector xaug=[1,x]T we can write the new linear regression model as

  • y=xTaugθaug +ϵ, θaug=[θ0 θ1]T

N, D = X.shape

# augmented training inputs of size N x (D+1)
X_aug = np.hstack([np.ones((N,1)), X])

# new theta vector of size (D+1) x 1
theta_aug = np.zeros((D+1, 1)) # new theta vector of size (D+1) x 1
X_aug
array([[ 1., -3.],
       [ 1., -1.],
       [ 1.,  0.],
       [ 1.,  1.],
       [ 1.,  3.]])
## edit maximum likelihood function
def max_lik_estimate_aug(X_aug, y):
    
    theta_aug_ml = np.zeros((D+1,1))
    
    return theta_aug_ml
theta_aug_ml = max_lik_estimate_aug(X_aug, y)
# define a test set that we also need to augment the test inputs with ones
Xtest_aug = np.hstack([np.ones((Xtest.shape[0],1)), Xtest])

# predict the function values at the test points using the maximum likelihood estimator
ml_prediction = predict_with_estimate(Xtest_aug, theta_aug_ml)

# plot
plt.figure(figsize = (12, 6))
plt.plot(X, y, '+', markersize=10)
plt.plot(Xtest, ml_prediction)
plt.xlabel("x")
plt.ylabel("y")
plt.tight_layout()
plt.show()

Nonlinear Features

  • f(x,θ)=∑k=1Kθkϕk(x)
y = np.array([10.05, 1.5, -1.234, 0.02, 8.03]).reshape(-1,1)
plt.figure(figsize = (12, 6))
plt.plot(X, y, '+')
plt.xlabel("x")
plt.ylabel("y")
plt.tight_layout()
plt.show()

Polynomial Regression

# edit this function
def poly_features(X, K):
    
    # X: inputs of size N x 1
    # K: degree of the polynomial
    # computes the feature matrix Phi (N x (K+1))
    
    X = X.flatten()
    N = X.shape[0]
    
    #initialize Phi
    Phi = np.zeros((N, K+1))
    
    # Compute the feature matrix in stages
    Phi = np.zeros((N, K+1)) 
    return Phi
  • For reasons of numerical stability, we often add a small diagonal “jitter” κ>0 to ΦTΦ so that we can invert the matrix without significant problems so that the maximum likelihood estimate becomes

  • θML=(ΦTΦ + κI)−1 ΦTy

# edit this function
def nonlinear_features_maximum_likelihood(Phi, y):
    # Phi: features matrix for training inputs. Size of N x D
    # y: training targets. Size of N by 1
    # returns: maximum likelihood estimator theta_ml. Size of D x 1
    
    # 'jitter' term, good for numerical stability
    kappa = 1e-08
    
    D = Phi.shape[1]  
    
    # maximum likelihood estimate
    theta_ml = np.zeros((D,1)) 
  
    
    return theta_ml
  • To make predictions at test inputs Xtest∈R, we need to compute the features (nonlinear transformations) Φtest=ϕ(Xtest) of Xtest to give us the predicted mean

  • E[ytest]= Φtest θML

# Define the degree of the polynomial we wish to fit
K = 5 

# N x (K+1) feature matrix
Phi = poly_features(X, K) 

# maximum likelihood estimator
theta_ml = nonlinear_features_maximum_likelihood(Phi, y)

# test inputs
Xtest = np.linspace(-4,4,100).reshape(-1,1)

# feature matrix for test inputs
Phi_test = poly_features(Xtest, K)

# predicted y-values
y_pred = Phi_test @ theta_ml 

plt.figure()
plt.plot(X, y, '+')
plt.plot(Xtest, y_pred)
plt.xlabel("x")
plt.ylabel("y")
plt.tight_layout()
plt.show()

댓글남기기