from numpy import genfromtxt
import numpy as np
from sklearn import datasets, linear_model
datapath = r"c:\users\meachine learn\delivery.csv"
#skip_header=0和skip_footer=0,表示不跳過任何行。
deliverydata = genfromtxt(datapath, delimiter=',',skip_header=2, usecols=(1,2,3))
print(deliverydata)
[[100. 4. 9.3]
[ 50. 3. 4.8]
[100. 4. 8.9]
[100. 2. 6.5]
[ 50. 2. 4.2]
[ 80. 2. 6.2]
[ 75. 3. 7.4]
[ 65. 4. 6. ]
[ 90. 3. 7.6]
[ 90. 2. 6.1]]
x = deliverydata[:,:-1]
y = deliverydata[:,-1]
regr = linear_model.linearregression()
regr.fit(x,y)
linearregression(copy_x=true, fit_intercept=true, n_jobs=1, normalize=false)
print("coefficinet: ", regr.coef_)
coefficinet: [0.0611346 0.92342537]
print("intercept: ", regr.intercept_)
intercept: -0.868701466781709
根據模型進行**
x_pred = [102, 6]
x_pred = np.array(x_pred).reshape(-1,2) #把列表轉換成矩陣形式
y_pred = regr.predict(x_pred)
print(y_pred)
[10.90757981]
特徵量中存在類別特徵時,需要先對類別特徵進行one-hot編碼
path = r"c:\users\meachine learn\deliverydummydone.csv"
data = genfromtxt(path, delimiter=',')
print(data)
[[100. 4. 1. 9.3]
[ 50. 3. 0. 4.8]
[100. 4. 1. 8.9]
[100. 2. 2. 6.5]
[ 50. 2. 2. 4.2]
[ 80. 2. 1. 6.2]
[ 75. 3. 1. 7.4]
[ 65. 4. 0. 6. ]
[ 90. 3. 0. 7.6]]
#對第三列類別特徵編碼
from sklearn import preprocessing
enc = preprocessing.onehotencoder()
data_coder = data[:,2]
data_coder = np.array(data_coder).reshape(-1,1)
enc.fit(data_coder)
onehotencoder(categorical_features='all', dtype=,
handle_unknown='error', n_values='auto', sparse=true)
data_2 = enc.transform(data_coder).toarray()
data_1 = np.array(data[:,0:2]).reshape(-1,2)
x = np.hstack((data_1, data_2)) #對特徵量矩陣橫向合併
print(x)
[[100. 4. 0. 1. 0.]
[ 50. 3. 1. 0. 0.]
[100. 4. 0. 1. 0.]
[100. 2. 0. 0. 1.]
[ 50. 2. 0. 0. 1.]
[ 80. 2. 0. 1. 0.]
[ 75. 3. 0. 1. 0.]
[ 65. 4. 1. 0. 0.]
[ 90. 3. 1. 0. 0.]]
y = data[:,-1]
regr = linear_model.linearregression()
regr.fit(x,y)
linearregression(copy_x=true, fit_intercept=true, n_jobs=1, normalize=false)
print("coefficinet: ", regr.coef_)
coefficinet: [ 0.05553544 0.69257631 -0.17013278 0.57040007 -0.40026729]
print("intercept: ", regr.intercept_)
intercept: 0.19995688911881349
線回與非線回 sklearn 多元線性回歸
前面用自寫函式解決了多元問題,現在用sklearn庫來解決多元線性問題 老朋友,不介紹了 import numpy as np from numpy import genfromtxt 把線性回歸模型庫單獨匯出來 from sklearn import linear model 把畫圖工具庫匯出來 ...
sklearn機器學習 PCA
pca使用的資訊量衡量指標,就是樣本方差,又稱可解釋性方差,方差越大,特徵所帶的資訊量越多 原理推導 class sklearn decomposition.pca n components none,copy true whiten false svd solver auto tol 0.0,it...
sklearn 機器學習庫
pipeline parameters steps 步驟 列表 list 被連線的 名稱,變換 元組 實現擬合 變換 的列表,按照它們被連線的順序,最後乙個物件是估計器 estimator memory 記憶體引數,instance of sklearn.external.joblib.memory...