heamy之stacking和blending例項

2021-08-14 13:30:47 字數 4117 閱讀 2452

1、stacking例項

from heamy.dataset import dataset

from heamy.estimator import regressor, classifier

from heamy.pipeline import modelspipeline

from sklearn import cross_validation

from sklearn.ensemble import randomforestregressor

from sklearn.linear_model import linearregression

from sklearn.metrics import mean_absolute_error

#載入資料集

from sklearn.datasets import load_boston

data = load_boston()

x, y = data['data'], data['target']

x_train, x_test, y_train, y_test = cross_validation.train_test_split(x, y, test_size=0.1, random_state=111)

#建立資料集

dataset = dataset(x_train,y_train,x_test)

#建立rf模型和lr模型

model_rf = regressor(dataset=dataset, estimator=randomforestregressor, parameters=,name='rf')

model_lr = regressor(dataset=dataset, estimator=linearregression, parameters=,name='lr')

# stack兩個模型

# returns new dataset with out-of-fold predictions

pipeline = modelspipeline(model_rf,model_lr)

stack_ds = pipeline.stack(k=10,seed=111)

#第二層使用lr模型stack

stacker = regressor(dataset=stack_ds, estimator=linearregression)

results = stacker.predict()

# 使用10折交叉驗證結果

results10 = stacker.validate(k=10,scorer=mean_absolute_error)

2、blending例項

from heamy.dataset import dataset

from heamy.estimator import regressor, classifier

from heamy.pipeline import modelspipeline

from sklearn import cross_validation

from sklearn.ensemble import randomforestregressor

from sklearn.linear_model import linearregression

from sklearn.metrics import mean_absolute_error

#載入資料集

from sklearn.datasets import load_boston

data = load_boston()

x, y = data['data'], data['target']

x_train, x_test, y_train, y_test = cross_validation.train_test_split(x, y, test_size=0.1, random_state=111)

#建立資料集

dataset = dataset(x_train,y_train,x_test)

#建立rf模型和lr模型

model_rf = regressor(dataset=dataset, estimator=randomforestregressor, parameters=,name='rf')

model_lr = regressor(dataset=dataset, estimator=linearregression, parameters=,name='lr')

# blending兩個模型

# returns new dataset with out-of-fold predictions

pipeline = modelspipeline(model_rf,model_lr)

stack_ds = pipeline.blend(proportion=0.2,seed=111)

#第二層使用lr模型stack

stacker = regressor(dataset=stack_ds, estimator=linearregression)

results = stacker.predict()

# 使用10折交叉驗證結果

results10 = stacker.validate(k=10,scorer=mean_absolute_error)

3、權重加權平均

from heamy.dataset import dataset

from heamy.estimator import regressor, classifier

from heamy.pipeline import modelspipeline

from sklearn import cross_validation

from sklearn.ensemble import randomforestregressor

from sklearn.linear_model import linearregression

from sklearn.metrics import mean_absolute_error

from sklearn.neighbors import kneighborsregressor

data = load_boston()

x, y = data['data'], data['target']

x_train, x_test, y_train, y_test = cross_validation.train_test_split(x, y, test_size=0.1, random_state=111)

#建立資料集

dataset = dataset(x_train,y_train,x_test)

model_rf = regressor(dataset=dataset, estimator=randomforestregressor, parameters=,name='rf')

model_lr = regressor(dataset=dataset, estimator=linearregression, parameters=,name='lr')

model_knn = regressor(dataset=dataset, estimator=kneighborsregressor, parameters=,name='knn')

pipeline = modelspipeline(model_rf,model_lr,model_knn)

weights = pipeline.find_weights(mean_absolute_error)

result = pipeline.weight(weights)

4、簡單取平均或自定義

#取平均

# get predictions for test

result = pipeline.mean().execute()

# or validate

_ = pipeline.mean().validate(mean_absolute_error,10)

#自定義

參考文獻:

1、2、

heamy之stacking和blending例項

1 stacking例項 from heamy.dataset import dataset from heamy.estimator import regressor,classifier from heamy.pipeline import modelspipeline from sklearn...

模型融合之stacking方法

之前一直對stacking一知半解,找到的資料也介紹的很模糊。所以有多看了幾篇文章,然後來此寫篇部落格,加深一下印象,順便給各位朋友分享一下。stacking的過程有一張圖非常經典,如下 雖然他很直觀,但是沒有語言描述確實很難搞懂。上半部分是用乙個基礎模型進行5折交叉驗證,如 用xgboost作為基...

BasicBolt和IRichBolt之間的區別

irichbolt繼承自ibolt,ibolt會使用outputcollector來傳送元組。public inte ce ibolt extends serializableoutputcollector有兩個用於傳送元組的函式 後續component會向acker傳送ack響應。listemit...