import numpy as np
class LinearRegression:
'''支持多元线性回归'''
def __init__(self):
'''初始化 Linear Regression 模型'''
self.coef_ = None
self.interception_ = None
self._theta = None
def fit_normal(self,X_train,y_train):
'''根据训练数据 X_train, y_train 训练 Linear Regression 模型'''
assert X_train.shape[0] == y_train.shape[0],
'the size of X_train must equal to y_train'
X_b = np.hstack(np.ones(len(X_train,1)),X_train)
self._theta = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y_train)
self.interception_ = self._theta[0]
self.coef_ = self.theta[1:]
return self
def predict(self,x_predict):
'''给定 x_predict 进行预测'''
assert self.coef_ is not None and self.interception_ is not None,
'must fit_normal before predict'
assert x_predict.shape[1] == len(self.coef_),'the feature of x_predict must be equal to self.coef_'
X_b = np.hstack(np.ones(len(x_predict,1)),x_predict)
return X_b.dot(self._theta)
def fit_gd(self,X_train,y_train,eta=0.01,n_iters=1e4):
'''根据训练数据集X_train, y_train, 使用梯度下降法训练 Linear Regression 模型'''
assert X_train.shape[0] == y_train.shape[0],'the size of X_train must be equal to y_train'
def J(theta,X_b,y):
try:
return np.sum((y-X_b.dot(theta))**2) / len(y)
except:
return float('inf')
def dJ(theta,X_b,y):
return X_b.T.dot(X_b.dot(theta)-y)*2. / len(y)
def gradient_descent(X_b,y,initial_theta,eta,n_iters=1e4,epsilon=1e-4):
'''梯度下降法'''
theta = initial_theta
cur_iter = 0
while cur_iter < n_iters:
gradient = dJ(theta,X_b,y)
las_theta = theta
theta = theta - dJ(theta,X_b,y)*eta
if (abs(dJ(theta,X_b,y)-dJ(last_theta,X_b,y))) < epsilon:
break
cur_iter += 1
return theta
def __repre__(self):
return 'LinearRegression() Module'