(一)导入并查看数据
import pandas as pd import matplotlib.pyplot as pltfrom sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression #线性回归库from sklearn.datasets import load_boston #导入波士顿数据集import matplotlib.pyplot as pltimport numpy as npimport pandas as pd
df = load_boston()x = df.data #数据y = df.target #标签
x.shape
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.2,random_state = 888)
(random_state为随机数种子)
(二)训练
train = LinearRegression()train.fit(x_train,y_train)
train.score(x_test,y_test) #查看准确率
train.coef_ #每一列数据斜率,对应x的系数,13位未知数
(三)预测
train.predict(x_test)
(四)评估
dev = train.predict(x_test) - y_test #偏差dev
均方根误差
RMSE = np.sum(np.sqrt(dev**2))/102RMSE