1、計算資料集的均值和方差
importosimport
cv2import
numpy as np
from torch.utils.data import
dataset
from pil import
image
defcompute_mean_and_std(dataset):
#輸入pytorch的dataset,輸出均值和標準差
mean_r =0
mean_g =0
mean_b =0
for img, _ in
dataset:
img = np.asarray(img) #
change pil image to numpy array
mean_b +=np.mean(img[:, :, 0])
mean_g += np.mean(img[:, :, 1])
mean_r += np.mean(img[:, :, 2])
mean_b /=len(dataset)
mean_g /=len(dataset)
mean_r /=len(dataset)
diff_r =0
diff_g =0
diff_b =0
n =0
for img, _ in
dataset:
img =np.asarray(img)
diff_b += np.sum(np.power(img[:, :, 0] - mean_b, 2))
diff_g += np.sum(np.power(img[:, :, 1] - mean_g, 2))
diff_r += np.sum(np.power(img[:, :, 2] - mean_r, 2))
n +=np.prod(img[:, :, 0].shape)
std_b = np.sqrt(diff_b /n)
std_g = np.sqrt(diff_g /n)
std_r = np.sqrt(diff_r /n)
mean = (mean_b.item() / 255.0, mean_g.item() / 255.0, mean_r.item() / 255.0)
std = (std_b.item() / 255.0, std_g.item() / 255.0, std_r.item() / 255.0)
return mean, std
importcv2video =cv2.videocapture(mp4_path)
height =int(video.get(cv2.cap_prop_frame_height))
width =int(video.get(cv2.cap_prop_frame_width))
num_frames =int(video.get(cv2.cap_prop_frame_count))
fps =int(video.get(cv2.cap_prop_fps))
video.release()
k =self._num_segmentsifis_train:
if num_frames >k:
#random index for each segment.
frame_indices =torch.randint(
high=num_frames // k, size=(k,), dtype=torch.long)
frame_indices += num_frames // k *torch.arange(k)
else
: frame_indices =torch.randint(
high=num_frames, size=(k - num_frames,), dtype=torch.long)
frame_indices =torch.sort(torch.cat((
torch.arange(num_frames), frame_indices)))[0]
else
:
if num_frames >k:
#middle index for each segment.
frame_indices = num_frames / k // 2frame_indices += num_frames // k *torch.arange(k)
else
: frame_indices =torch.sort(torch.cat((
torch.arange(num_frames), torch.arange(k -num_frames))))[0]
assert frame_indices.size() ==(k,)
return [frame_indices[i] for i in range(k)]
4、常用訓練和驗證預處理
其中 totensor 操作會將 pil.image 或形狀為 h×w×d,數值範圍為 [0, 255] 的 np.ndarray 轉換為形狀為 d×h×w,數值範圍為 [0.0, 1.0] 的 torch.tensor。
train_transform =torchvision.transforms.compose([torchvision.transforms.randomresizedcrop(size=224,
scale=(0.08, 1.0)),
torchvision.transforms.randomhorizontalflip(),
torchvision.transforms.totensor(),
torchvision.transforms.normalize(mean=(0.485, 0.456, 0.406),
std=(0.229, 0.224, 0.225)),
]) val_transform =torchvision.transforms.compose([
torchvision.transforms.resize(256),
torchvision.transforms.centercrop(224),
torchvision.transforms.totensor(),
torchvision.transforms.normalize(mean=(0.485, 0.456, 0.406),
std=(0.229, 0.224, 0.225)),
])
FLEX quick start XML資料處理
第一次翻譯,很多內容看的懂卻表達不好。英文水平有待提高。介紹xml as3 包含了一組基於 ecmascript for xml e4x 標準 specification 的類。這些類功能強大,而且能夠很容易用在處理 xml資料上。利用 e4x,你可以比以往的語言更快的開發處理 xml資料的程式。還...
ML Data Processing資料預處理
資料歸一化 引數 arrays list np.array matrices padas dataframes 需被分割的樣本集 options test size 在0.0和1.0之間,表示要從樣本集拆分到測試集的比例,預設為0.25 train size 在0.0和1.0之間,表示要從樣本集拆分...
邏輯回歸 breast cancer 資料集處理
coding utf 8 import pandas as pd import numpy as np from matplotlib import pyplot as plt from sklearn.model selection import train test split definit ...