學習目標:
使用relu等非線性單元提公升模型效能,構建深度神經網路,執行便於使用的神經網路類
需要定義的函式:
def initialize_parameters_deep(layer_dims): #layer_dims是包含每層隱藏單元數量的array
np.random.seed(1)
parameters = {}
l = len(layer_dims)
for l in range(1, l):
parameters['w' + str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1]) * 0.01
parameters['b' + str(l)] = np.zeros((layer_dims[l], 1))
return parameters
def linear_forward(a, w, b):
z = np.dot(w, a) + b
cache = (a, w, b) # for backward
return z, cache
def linear_activation_forward(a_prev, w, b, activation):
if activation == 'sigmoid':
z, linear_cache = linear_forward(a_prev, w, b)
a, activation_cache = sigmoid(z)
elif activation == 'relu':
z, linear_cache = linear_forward(a_prev, w, b)
a, activation_cache = relu(z)
cache = (linear_cache, activation_cache)
return a, cache
def l_model_forward(x, parameters):
caches =
a = x
l = len(parameters) // 2
for l in range(1, l):
a_prev = a
a, cache = linear_activation_forward(a_prev, parameters['w' + str(l)], parameters['b' + str(l)], activation='relu')
al, cache = linear_activation_forward(a, parameters['w' + str(l)}, parameters['b' + str(l)}, activation = 'sigmoid)
return al, caches
def compute_cost(al, y):
m = y.shape[1]
cost = - np.sum(np.multiply(y, np.log(al)) + np.multiply((1-y), np.log(1-al))) / m
cost = np.squeeze(cost)
return cost
def linear_backward(dz, cache)
a_prev, w, b = cache
m = a_prev.shape[1]
dw = np.dot(dz, a_prev.t) / m
db = np.sum(dz, axis=1, keepdims=true) / m
da_prev = np.dot(w.t, dz)
return da_prev, dw, db
def linear_activation_backward(da, cache, activation):
linear_cache, activation_cache = cache
if activation == 'relu':
dz = relu_backward(da, cache[1]) #
da_prev, dw, db = linear_backward(dz, cache[0])
elif activation == 'sigmoid':
dz = sigmoid_backward(da, cache[1])
da_prev, dw, db = linear_backward(dz, cache[0])
return da_prev, dw, db
def l_model_backward(al, y, caches):
grads = {}
l = len(caches)
m = al.shape[1]
y = y.reshape(al.shape)
dal = - (np.divide(y, al) - np.divide(1-y, 1-al))
current_cache = cache[l-1]
grads['da' + str(l)], grads['dw' + str(l)], grads['db' +str(l)] = linear_activation_backward(dal, current_cache, activation='sigmoid')
for l in reversed(range(l-1)):
current_cache = caches[l]
da_prev_temp, dw_temp, db_temp = linear_activation_backward(grads['da' + str(l+2)], current_cache, activation='relu')
grads['da' + str(l+1)] = da_prev_temp
grads['dw' + str(l+1)] = dw_temp
grads['db' + str(l+1)] = db_temp
return grads
def update_parameters(parameters, grads, learning_rate):
l = len(parameters) // 2
for l in range(1, l):
parameters['w' + str(l)] = parameters['w' + str(l)] - learning_rate * grads['dw' +str(l)]
parameters['b' + str(l)] = parameters['b' +str(l)] - learning_rate * grads['db' +str(l)]
return parameters
模型:def l_layer_model(x, y, layers_dims, learning_rate = 0.0075, num_iterations = 3000, print_cost=false):
np.random.seed(1)
costs =
parameters = initialize_parameters_deep(layers_dim)
for i in range(0, num_iterations):
al, caches = l_model_foward(x, parameters)
cost = compute_cost(al, y)
grads = l_model_backward(al, y, caches)
parameters = update_parameters(parameters, grads, learning_rate)
return parameters
用pytorch構建神經網路
在神經網路模型中之前,要對資料進行一系列的預處理,如果是型別變數,可使用one hot編碼 對於數值型別,可進行標準化,讓其屬性值在0左右波動 import torch import numpy as np import pandas as pd from torch.autograd import...
深度神經網路
關於卷積神經網路cnn,網路和文獻中有非常多的資料,我在工作 研究中也用了好一段時間各種常見的model了,就想著簡單整理一下,以備查閱之需。如果讀者是初接觸cnn,建議可以先看一看 deep learning 深度學習 學習筆記整理系列 中關於cnn的介紹 1 是介紹我們常說的lenet為例,相信...
機器學習,深度學習,神經網路,深度神經網路
先來說一下這幾者之間的關係 人工智慧包含機器學習,機器學習包含深度學習 是其中比較重要的分支 深度學習源自於人工神經網路的研究,但是並不完全等於傳統神經網路。所以深度學習可以說是在傳統神經網路基礎上的公升級。神經網路一般有輸入層 隱藏層 輸出層,一般來說隱藏層大於2的神經網路就叫做深度神經網路,深度...