반응형

Install & cuda(gpu) check

# euni- gpu 버전 설치 방법 tensorflow와 torch 다름!
#  tensorflow: tensorflow-gpu로 gpu 버전 따로 설치
#  torch: 기본적으로 pytroch, torchvision로 동일하게 설치 (gpu를 사용하려면 관련 cudatoolkit 함께 설치)
#      !!주의!! 'https://pytorch.org/get-started/locally/' 에서 version 확인해서 설치하기
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using {} device'.format(device))

Datasets and DataLoaders (이미지)

https://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html

import torch
from torchvision import datasets, transforms

# euni- tf.image와 비슷, !!torchvision 따로 설치해야함!!
# euni- tf.keras.preprocessing.image.ImageDataGenerator: generator에 익숙하다면 매우 우용함
# Data augmentation and normalization for training
# Just normalization for validation
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

# euni- tf.data.Dataset과 비슷
# euni- directory에 있는 이미지 불러오기 (구조 맞춰줘야함)
#  .
#  ├─ data
#  |   ├─ train
#  |   └─ val
data_dir = 'data'
# euni- augumentation 실행시키는 부분
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x])
                  for x in ['train', 'val']}

dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
# euni- classes 아마 sub directory name이 아닐까?
class_names = image_datasets['train'].classes
# print(dataset_sizes)
# print(class_names)

# euni- batch, shuffle, worker 한번에 설정할 수 있는 것 장점
train_dataloader = torch.utils.data.DataLoader(image_datasets['train'], batch_size=4,
                                             shuffle=True, num_workers=4)
val_dataloader = torch.utils.data.DataLoader(image_datasets['val'], batch_size=4,
                                             shuffle=True, num_workers=4)

#### !!!주의!!! ####
# torch와 tensorflow는 shape이 다름!
# ex, image shape (물론 output shape도 같은 방식으로 다름)
#  tensorflow >> (B, H, W, 3)
#  torch >> (B, 3, H, W)
## case2) Dataset Custom 하기
from torch.utils.data import Dataset
from torchvision import transforms
import numpy as np
from PIL import Image

data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

def image_loader(image_name, device, training=False):
    image = Image.open(image_name).convert("RGB")
    image = transforms.Resize((256, 256))(image)
    # fake batch dimension required to fit network's input dimensions
    if training:
        image = data_transforms['train'](image)
    else:
        image = data_transforms['val'](image)

    return image.unsqueeze(0).to(device, torch.float)

class LoadDataSet(Dataset):

    def __init__(self, image2label, device, training=False):  # , acc_label):
        self.images = list(image2label.keys())
        self.labels = list(image2label.values())
        self.training = training

    def __getitem__(self, idx):
        try:
            image_name = self.images[idx]
            img = image_loader(image_name, self.device, training=self.training)[0]
						
            label = self.labels[idx]
            label = torch.from_numpy(np.array(label, dtype=np.float32)).to(self.device)
        except Exception as ex:
            # print(ex)
            return None
        return img, label

    def __len__(self):
        return len(self.images)

# torch 예외처리
def collate_fn(batch):
    batch = list(filter(lambda x: x is not None, batch))
    return torch.utils.data.dataloader.default_collate(batch)

train_dataset = {os.path.join(data_dir, 'train', label, f): label for label in os.listdir(os.path.join(data_dir, 'train')) for f in os.listdir(os.path.join(data_dir, 'train', label))}
val_dataset = {os.path.join(data_dir, 'val', label, f): label for label in os.listdir(os.path.join(data_dir, 'val')) for f in os.listdir(os.path.join(data_dir, 'val', label))}


train_dataset = LoadDataSet(train_dataset, device, training=True)  # Augmentation pass
val_dataset = LoadDataSet(val_dataset, device, training=False)  # Augmentation pass

train_dataloader = DataLoader(train_dataset, collate_fn=collate_fn, batch_size=batch_size, shuffle=True, drop_last=True)    
val_dataloader = DataLoader(val_dataset, collate_fn=collate_fn, batch_size=4, shuffle=True, drop_last=True)

 

Build Model

Basic model structure

https://pytorch.org/tutorials/beginner/nn_tutorial.html

from torch import nn
import torch.optim as optim

# euni- tensorflow에서는 tf.keras.Model 상속받듯이 nn.Module 사용함
#  참고) nn.Sequential도 존재

# class Mnist_Logistic(nn.Module):
#     def __init__(self):
#         super().__init__()
#         self.weights = nn.Parameter(torch.randn(784, 10) / math.sqrt(784))
#         self.bias = nn.Parameter(torch.zeros(10))
# 
#     def forward(self, xb):
#         return xb @ self.weights + self.bias

class Mnist_Logistic(nn.Module):
    def __init__(self):
        super(Mnist_Logistic, self).__init__() # 좀 더 명확하게 super 사용하기
        self.fc = nn.Linear(784, 10)

    def forward(self, xb):
        return self.fc(xb)

logistic_model = Mnist_Logistic()

# !! 참고 !!
# euni- pytorch는 tf.keras에 비해 대체적으로 shape을 직접 넣어주는 느낌?
# ex, Conv2d
#  tf.keras.layers.Conv2D(filters, kernel_size, 
#                  strides=(1, 1), padding='valid', ...)
#  torch.nn.Conv2d(in_channels, out_channels, kernel_size,
#                  stride=1, padding=0, ...)
## Sequential layer name 부여하기
import collections
model = torch.nn.Sequential(
			    collections.OrderedDict(
			        [
			            ('fc', torch.nn.Linear(in_features=784, 
			                      out_features=10, bias=True))
			        ]
			    )
)

 

Training

https://pytorch.org/tutorials/beginner/basics/optimization_tutorial.html

# Hyperparameters
learning_rate = 1e-3
batch_size = 64
epochs = 5
# Loss Function
# euni- pytorch의 cross_entropy 함수는 LogSoftmax와 NLLLoss를 결합함
#  : output layer에 추가적으로 softmax를 취하지 않음 (tensorflow에서는 output layer에 softmax 추가)
loss_func = nn.CrossEntropyLoss()
# print(loss_func(logistic_model(xb), yb))

# Optimizer
optimizer = torch.optim.SGD(logistic_model.parameters(), lr=learning_rate) # weight_decay parameter 존재

# and a learning rate scheduler
# # euni- 3 epoch마다 0.1씩 learning rate 감소
# lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
#                                                step_size=3,
#                                                gamma=0.1)
# euni- train 직접 정의 (tensorflow의 model.fit처럼 자동으로 train하는 함수는 없나?)
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    for batch, (X, y) in enumerate(dataloader):
        # Compute prediction and loss
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, logistic_model, loss_fn, optimizer)
    test_loop(val_dataloader, logistic_model, loss_fn)
print("Done!")

Save & Load model

https://pytorch.org/tutorials/beginner/basics/saveloadrun_tutorial.html

# # weight save & load
# torch.save(logistic_model.state_dict(), 'model_weights.pth')
# # euni- 모델 구조부터 선언해야함
# model = Mnist_Logistic()
# model.load_state_dict(torch.load('model_weights.pth'))
# model.eval() # be sure to call model.eval() method before inferencing to set the dropout and batch normalization layers to evaluation mode. Failing to do this will yield inconsistent inference results.

# euni- class 구조까지 save & load
torch.save(logistic_model, 'model.pth')

# euni - Mnist_Logistic class가 선언되어있어야 불러올 수 있음
model = torch.load('model.pth')

Transfer Learning

Finetuning tutorial

https://tutorials.pytorch.kr/beginner/transfer_learning_tutorial.html

https://pytorch.org/tutorials/intermediate/torchvision_tutorial.html

import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

# load a model pre-trained pre-trained on COCO
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
# euni- 예제 설명: class(output) custom을 위해 head layer 변경
# replace the classifier with a new one, that has
# num_classes which is user-defined
num_classes = 2  # 1 class (person) + background
# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
# euni- freeze network
# 참고) https://pytorch.org/docs/master/notes/autograd.html
for param in model.parameters():
    param.requires_grad = False

# # euni- tensorflow version
# for layer in model.layers:
#     layer.trainable = False

 

ETC

(2022.07.15 갱신)

Multi GPU 사용하기

NumPy는 훌륭한 프레임워크지만, GPU를 사용하여 수치 연산을 가속화할 수는 없습니다. PyTorch 텐서는 GPU를 사용하여 수치 연산을 가속할 수 있습니다. PyTorch 텐서(Tensor)는 개념적으로 NumPy 배열과 동일합니다.

https://pytorch.org/docs/stable/distributed.html

https://tutorials.pytorch.kr/intermediate/dist_tuto.html

logistic_model = torch.nn.DataParallel(logistic_model).to(device)

SAVING AND LOADING MODELS ACROSS DEVICES IN PYTORCH

https://pytorch.org/tutorials/recipes/recipes/save_load_across_devices.html

# torch.nn.DataParallel()로 훈련한 모델 Save
torch.save(logistic_model.module.state_dict(), PATH)

 

cuda error: device-side assert triggered

cuda 에러 같아 보이지만 사실 다양한 에러일 수 있음!

## 구체적인 에러 체크 가능
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

 

Torch Hub model save

import torch
device = 'cpu'
efficientnet = torch.hub.load('NVIDIA/DeepLearningExamples:torchhub',
                              'nvidia_efficientnet_b0', pretrained=True).to(device)

efficientnet.eval()  # 이거 안하면 결과 바뀔 수 있음!!

efficientnet = torch.jit.trace(efficientnet, torch.randn(1,3,224, 224))

efficientnet.save('efficient_net.pth')
import torch
device = 'cpu'
efficientnet = torch.jit.load('efficient_net.pth').eval().to(device)

pred = efficientnet(torch.randn(1,3,224, 224))

 

반응형

+ Recent posts