1、stacking例項
from heamy.dataset import dataset
from heamy.estimator import regressor, classifier
from heamy.pipeline import modelspipeline
from sklearn import cross_validation
from sklearn.ensemble import randomforestregressor
from sklearn.linear_model import linearregression
from sklearn.metrics import mean_absolute_error
#載入資料集
from sklearn.datasets import load_boston
data = load_boston()
x, y = data['data'], data['target']
x_train, x_test, y_train, y_test = cross_validation.train_test_split(x, y, test_size=0.1, random_state=111)
#建立資料集
dataset = dataset(x_train,y_train,x_test)
#建立rf模型和lr模型
model_rf = regressor(dataset=dataset, estimator=randomforestregressor, parameters=,name='rf')
model_lr = regressor(dataset=dataset, estimator=linearregression, parameters=,name='lr')
# stack兩個模型
# returns new dataset with out-of-fold predictions
pipeline = modelspipeline(model_rf,model_lr)
stack_ds = pipeline.stack(k=10,seed=111)
#第二層使用lr模型stack
stacker = regressor(dataset=stack_ds, estimator=linearregression)
results = stacker.predict()
# 使用10折交叉驗證結果
results10 = stacker.validate(k=10,scorer=mean_absolute_error)
2、blending例項
from heamy.dataset import dataset
from heamy.estimator import regressor, classifier
from heamy.pipeline import modelspipeline
from sklearn import cross_validation
from sklearn.ensemble import randomforestregressor
from sklearn.linear_model import linearregression
from sklearn.metrics import mean_absolute_error
#載入資料集
from sklearn.datasets import load_boston
data = load_boston()
x, y = data['data'], data['target']
x_train, x_test, y_train, y_test = cross_validation.train_test_split(x, y, test_size=0.1, random_state=111)
#建立資料集
dataset = dataset(x_train,y_train,x_test)
#建立rf模型和lr模型
model_rf = regressor(dataset=dataset, estimator=randomforestregressor, parameters=,name='rf')
model_lr = regressor(dataset=dataset, estimator=linearregression, parameters=,name='lr')
# blending兩個模型
# returns new dataset with out-of-fold predictions
pipeline = modelspipeline(model_rf,model_lr)
stack_ds = pipeline.blend(proportion=0.2,seed=111)
#第二層使用lr模型stack
stacker = regressor(dataset=stack_ds, estimator=linearregression)
results = stacker.predict()
# 使用10折交叉驗證結果
results10 = stacker.validate(k=10,scorer=mean_absolute_error)
3、權重加權平均
from heamy.dataset import dataset
from heamy.estimator import regressor, classifier
from heamy.pipeline import modelspipeline
from sklearn import cross_validation
from sklearn.ensemble import randomforestregressor
from sklearn.linear_model import linearregression
from sklearn.metrics import mean_absolute_error
from sklearn.neighbors import kneighborsregressor
data = load_boston()
x, y = data['data'], data['target']
x_train, x_test, y_train, y_test = cross_validation.train_test_split(x, y, test_size=0.1, random_state=111)
#建立資料集
dataset = dataset(x_train,y_train,x_test)
model_rf = regressor(dataset=dataset, estimator=randomforestregressor, parameters=,name='rf')
model_lr = regressor(dataset=dataset, estimator=linearregression, parameters=,name='lr')
model_knn = regressor(dataset=dataset, estimator=kneighborsregressor, parameters=,name='knn')
pipeline = modelspipeline(model_rf,model_lr,model_knn)
weights = pipeline.find_weights(mean_absolute_error)
result = pipeline.weight(weights)
4、簡單取平均或自定義
#取平均# get predictions for test
result = pipeline.mean().execute()
# or validate
_ = pipeline.mean().validate(mean_absolute_error,10)
#自定義
heamy之stacking和blending例項
1 stacking例項 from heamy.dataset import dataset from heamy.estimator import regressor,classifier from heamy.pipeline import modelspipeline from sklearn...
模型融合之stacking方法
之前一直對stacking一知半解,找到的資料也介紹的很模糊。所以有多看了幾篇文章,然後來此寫篇部落格,加深一下印象,順便給各位朋友分享一下。stacking的過程有一張圖非常經典,如下 雖然他很直觀,但是沒有語言描述確實很難搞懂。上半部分是用乙個基礎模型進行5折交叉驗證,如 用xgboost作為基...
BasicBolt和IRichBolt之間的區別
irichbolt繼承自ibolt,ibolt會使用outputcollector來傳送元組。public inte ce ibolt extends serializableoutputcollector有兩個用於傳送元組的函式 後續component會向acker傳送ack響應。listemit...