import numpy as npimport matplotlib.pyplot as pltfrom sklearn import datasets
数据
boston = datasets.load_boston()
只使用房价数量这个特征
x = boston.data[:,5]#只使用房间数量这个特征
y = boston.target
plt.scatter(x,y)#上限点
去掉上限点
#返回新的索引x = x[y < 50]y = y[y < 50]plt.scatter(x,y)
train_test_split
from Simple_linear_Regression.model_selection import train_test_splitx_train,x_test,y_train,y_test = train_test_split(x,y,seed=666)
from Simple_linear_Regression.SimpleLinearRegression import Simple_linear_Regression2reg = Simple_linear_Regression2()print(reg.fit(x_train,y_train))
Simple_linear_Regression2
print(reg.a_)print(reg.b_)
7.8608543562689555-27.459342806705543
plt.scatter(x_train,y_train)plt.plot(x_train,reg.predict(x_train),color = 'r')
y_predict = reg.predict(x_test)
使用自己封装的mse,rmse,mae
from Simple_linear_Regression.metrics import mean_squared_errorfrom Simple_linear_Regression.metrics import root_mean_squared_errorfrom Simple_linear_Regression.metrics import mean_absolute_error
print(mean_squared_error(y_test,y_predict))print(root_mean_squared_error(y_test,y_predict))print(mean_absolute_error(y_test,y_predict))
24.1566021343874384.9149366358466353.5430974409463873
封装的代码
def mean_squared_error(y_true,y_predict):assert len(y_true) == len(y_predict),\'the size of y_true must be equal to the size of y_predict'return np.sum((y_true - y_predict) ** 2) / len(y_true)def root_mean_squared_error(y_true,y_predict):return np.sqrt(mean_squared_error(y_true,y_predict))def mean_absolute_error(y_true,y_predict):assert len(y_true) == len(y_predict), \'the size of y_true must be equal to the size of y_predict'return np.sum(np.absolute(y_true - y_predict)) / len(y_predict)
分类问题
回归问题如何评价呢
均方误差MES(mean squared error)
改变量纲
RMSE(Root Mean Squared error)
平均绝对误差MAE(虽然不可导,但是评价一个算法是可以的)
j尽量让RMSE小。。