異方差 Python實現

借鑑一位博主的文章，結合兩篇，用於實現python**，本人親測，可以跑通。

這位博主的**字母有連一起的，以下是我親測的**：

import pandas as pd
import numpy as np
from patsy import dmatrices
from statsmodels.stats.outliers_influence import variance_inflation_factor
import statsmodels.api as sm
import scipy.stats as stats
from sklearn.metrics import mean_squared_error
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
import scipy.io
plt.rcparams[
'font.sans-serif']=
['simhei'
]plt.rcparams[
'axes.unicode_minus']=
false
# 資料讀取
# #ccpp = pd.read_excel( 'ccpp.xlsx')ccpp.describe()
# data = scipy.io.loadmat('encdata-2hp.mat') # 讀取mat檔案
# # path = scio.loadmat('fft-2hp.mat')['fft-2hp']
# print(data)
# train1=data['train3hp']
# test1=data['test3hp']
# train_y=data['train_y3hp']
# test_y=data['test_y3hp']
# data1=train1[:,1]
# print("train1",train1.shape)
# print("data",data1.shape)
# # sns.pairplot(data)
# # plt.show()
# #y, x = dmatrices( data1, data = train1, return_type= 'dataframe')
# fit2 = sm.formula.ols( data1,data = train1).fit()
# print("fit2",fit2)
# fit2.summary()
# pred2 = fit2.predict()
# print("pred2",pred2)
## np.sqrt(mean_squared_error(train1.pe, pred2))
# resid = fit2.resid
# plt.scatter(fit2.predict(), (fit2.resid-fit2.resid.mean())/fit2.resid.std())
# plt.xlabel( '**值')
# plt.ylabel( '標準化殘差')
## # 新增水平參考線
## plt.axhline(y = 0, color = 'r', linewidth = 2)
# plt.show()
ccpp = pd.read_excel(
'ccpp.xlsx'
)ccpp.describe(
)sns.pairplot(ccpp)
plt.show(
)# 發電量與自變數之間的相關係數
ccpp.corrwith(ccpp.pe)
y, x = dmatrices(
'pe~at+v+ap'
, data = ccpp, return_type=
'dataframe'
)# 構造空的資料框
vif = pd.dataframe(
)vif[
"vif factor"]=
[variance_inflation_factor(x.values, i)
for i in
range
(x.shape[1]
)]# vif[ "features"] = x.columnsvif
# print(vif[ "features"])
# 構造pe與at、v和ap之間的線性模型
fit = sm.formula.ols(
'pe~at+v+ap'
,data = ccpp)
.fit(
)fit.summary(
)print
("fit"
,fit)
# 計算模型的rmse值
pred = fit.predict(
)np.sqrt(mean_squared_error(ccpp.pe, pred)
)# 離群點檢驗
outliers = fit.get_influence(
)# 高槓桿值點（帽子矩陣）
leverage = outliers.hat_matrix_diag
# dffits值
dffits = outliers.dffits[0]
# 學生化殘差
resid_stu = outliers.resid_studentized_external
# cook距離
cook = outliers.cooks_distance[0]
# covratio值
covratio = outliers.cov_ratio
# 將上面的幾種異常值檢驗統計量與原始資料集合並
contat1 = pd.concat(
[pd.series(leverage, name =
'leverage'),
pd.series(dffits, name =
'dffits'),
pd.series(resid_stu,name =
'resid_stu'),
pd.series(cook, name =
'cook'),
pd.series(covratio, name =
'covratio'),
],axis =1)
ccpp_outliers = pd.concat(
[ccpp,contat1]
, axis =1)
ccpp_outliers.head(
)print
("contat1"
,contat1)
# 重新建模
fit2 = sm.formula.ols(
'pe~at+v+ap'
,data = ccpp_outliers)
.fit(
)fit2.summary(
)# 計算模型的rmse值
pred2 = fit2.predict(
)np.sqrt(mean_squared_error(ccpp_outliers.pe, pred2)
)resid = fit2.resid
# 標準化殘差與**值之間的散點圖
plt.scatter(fit2.predict(),
(fit2.resid-fit2.resid.mean())
/fit2.resid.std())
plt.xlabel(
'**值'
,fontdict=
)plt.ylabel(
'標準化殘差'
,fontdict=
)# 新增水平參考線
plt.axhline(y =
0, color =
'r', linewidth =2)
plt.show(
)

基於python的異方差檢驗講講異方差的檢驗

我們前面講了異方差，也講了怎麼用圖示法來判斷是否有異方差，這一篇來講講怎麼用統計的方法來判斷有沒有異方差。關於檢驗異方差的統計方法有很多，我們這一節只講比較普遍且比較常用的white test 懷特檢驗假設現在我們做了如下的回歸方程如果要用懷特檢驗檢驗上述方程有沒有異方差，主要分以下幾個步驟 1...

理解異方差

這個單詞特別長，它的中文名叫做異方差，它是怎麼來的呢？細細道來。我們使用最小二乘法的時候有乙個基本假定，其他變數對於模型的影響是常數 u 比如在模型 y 0 1x u 裡面我們假定除了 x 以外，其他影響 y的東西都是常數，合起來就是 u 在這個假設下最小二乘法能夠順利進行，也能保障無偏性和一致性。...

異方差性質

殘差序列的異方差性異方差的影響使用arima模型擬合非平穩序列時，對殘差序序列有乙個重要假定殘差序列為零均值白雜訊序列。換言之，殘差序列要滿足如下三個假定條件。1 零均值 2 純隨機 3 方差齊性如果方差齊性假定不成立，即隨機誤差的方差不在是常數，它會隨時間的變化而變化，可以表示為時間的某個...

異方差 Python實現

基於python的異方差檢驗 講講異方差的檢驗

理解異方差

異方差性質

相關推薦

基於python的異方差檢驗講講異方差的檢驗