匯入資料import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets,metrics
from sklearn.linear_model import linearregression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import standardscaler
from sklearn.decomposition import truncatedsvd
%matplotlib inline
劃分訓練集和測試集並使資料正規化boston = datasets.load_boston()
boston.feature_names
顯示資料直方圖x = boston.data
y = boston.target
scale = standardscaler()
normal_x = scale.fit_transform(x)
x_train, x_test, y_train, y_test = train_test_split(normal_x, y, test_size=0.1, random_state=42)
主成分pca分析# 顯示資料
plt.hist(x_train[:,0], bins=20)
plt.hist(x_train[:,2], bins=20, alpha=0.3)
視覺化# pca
n_components = 10
pca = truncatedsvd(n_components=n_components)
x_train_pca = pca.fit_transform(x_train)
x_test_pca = pca.transform(x_test)
比較pca之前與之後線性回歸後的誤差結果比較components = pca.components_
plt.plot(pca.explained_variance_)
plt.xlabel('component')
plt.ylabel('explained variance')
print("前{}個主成分解釋了資料中%的變化".format(n_components, sum(pca.explained_variance_ratio_)*100))
model = linearregression()
model.fit(x_train_pca, y_train)
print("前{}個pca主成分進行線性回歸的mse是{}".format(n_components,
metrics.mean_squared_error(y_test,model.predict(x_test_pca))))
model = linearregression()
model.fit(x_train, y_train)
print("不進行pca分析線性回歸的mse是{}".format(
metrics.mean_squared_error(y_test,model.predict(x_test))))
波士頓房價線性回歸
from matplotlib import pyplot as plt from sklearn import linear model from sklearn.model selection import train test split import numpy as np import p...
波士頓房價資料集視覺化
將所有屬性與房價之間的關係視覺化 import matplotlib.pyplot as plt import numpy as np import tensorflow as tf plt.rcparams font.sans serif microsoft yahei 指定預設字型 plt.rc...
使用sklearn載入波士頓房價資料集
使用sklearn載入波士頓房價資料集 從sklearn匯入資料集 from sklearn.datasets import load boston載入資料 boston load boston x為輸入,y為輸出 x boston.data y boston.target檢視資料有哪些字段 載入的...