colab pytorch 資料處理

2022-06-11 07:39:15 字數 3329 閱讀 1480

1、計算資料集的均值和方差

import

osimport

cv2import

numpy as np

from torch.utils.data import

dataset

from pil import

image

defcompute_mean_and_std(dataset):

#輸入pytorch的dataset,輸出均值和標準差

mean_r =0

mean_g =0

mean_b =0

for img, _ in

dataset:

img = np.asarray(img) #

change pil image to numpy array

mean_b +=np.mean(img[:, :, 0])

mean_g += np.mean(img[:, :, 1])

mean_r += np.mean(img[:, :, 2])

mean_b /=len(dataset)

mean_g /=len(dataset)

mean_r /=len(dataset)

diff_r =0

diff_g =0

diff_b =0

n =0

for img, _ in

dataset:

img =np.asarray(img)

diff_b += np.sum(np.power(img[:, :, 0] - mean_b, 2))

diff_g += np.sum(np.power(img[:, :, 1] - mean_g, 2))

diff_r += np.sum(np.power(img[:, :, 2] - mean_r, 2))

n +=np.prod(img[:, :, 0].shape)

std_b = np.sqrt(diff_b /n)

std_g = np.sqrt(diff_g /n)

std_r = np.sqrt(diff_r /n)

mean = (mean_b.item() / 255.0, mean_g.item() / 255.0, mean_r.item() / 255.0)

std = (std_b.item() / 255.0, std_g.item() / 255.0, std_r.item() / 255.0)

return mean, std

import

cv2video =cv2.videocapture(mp4_path)

height =int(video.get(cv2.cap_prop_frame_height))

width =int(video.get(cv2.cap_prop_frame_width))

num_frames =int(video.get(cv2.cap_prop_frame_count))

fps =int(video.get(cv2.cap_prop_fps))

video.release()

k =self._num_segments

ifis_train:

if num_frames >k:

#random index for each segment.

frame_indices =torch.randint(

high=num_frames // k, size=(k,), dtype=torch.long)

frame_indices += num_frames // k *torch.arange(k)

else

: frame_indices =torch.randint(

high=num_frames, size=(k - num_frames,), dtype=torch.long)

frame_indices =torch.sort(torch.cat((

torch.arange(num_frames), frame_indices)))[0]

else

:

if num_frames >k:

#middle index for each segment.

frame_indices = num_frames / k // 2frame_indices += num_frames // k *torch.arange(k)

else

: frame_indices =torch.sort(torch.cat((

torch.arange(num_frames), torch.arange(k -num_frames))))[0]

assert frame_indices.size() ==(k,)

return [frame_indices[i] for i in range(k)]

4、常用訓練和驗證預處理

其中 totensor 操作會將 pil.image 或形狀為 h×w×d,數值範圍為 [0, 255] 的 np.ndarray 轉換為形狀為 d×h×w,數值範圍為 [0.0, 1.0] 的 torch.tensor。

train_transform =torchvision.transforms.compose([

torchvision.transforms.randomresizedcrop(size=224,

scale=(0.08, 1.0)),

torchvision.transforms.randomhorizontalflip(),

torchvision.transforms.totensor(),

torchvision.transforms.normalize(mean=(0.485, 0.456, 0.406),

std=(0.229, 0.224, 0.225)),

]) val_transform =torchvision.transforms.compose([

torchvision.transforms.resize(256),

torchvision.transforms.centercrop(224),

torchvision.transforms.totensor(),

torchvision.transforms.normalize(mean=(0.485, 0.456, 0.406),

std=(0.229, 0.224, 0.225)),

])

FLEX quick start XML資料處理

第一次翻譯,很多內容看的懂卻表達不好。英文水平有待提高。介紹xml as3 包含了一組基於 ecmascript for xml e4x 標準 specification 的類。這些類功能強大,而且能夠很容易用在處理 xml資料上。利用 e4x,你可以比以往的語言更快的開發處理 xml資料的程式。還...

ML Data Processing資料預處理

資料歸一化 引數 arrays list np.array matrices padas dataframes 需被分割的樣本集 options test size 在0.0和1.0之間,表示要從樣本集拆分到測試集的比例,預設為0.25 train size 在0.0和1.0之間,表示要從樣本集拆分...

邏輯回歸 breast cancer 資料集處理

coding utf 8 import pandas as pd import numpy as np from matplotlib import pyplot as plt from sklearn.model selection import train test split definit ...