Home [PyTorch] Dropout
Post
Cancel

[PyTorch] Dropout

Library Import

1
2
3
4
5
import torch
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import random
import numpy as np

Seed

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
# seed 값 설정
random_seed = 1

# device
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# pytorch seed, cuda, cudnn seed 고정
torch.manual_seed(random_seed)
if device == 'cuda':
    torch.cuda.manual_seed_all(random_seed)

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

np.random.seed(random_seed) # 파이썬 seed 고정
random.seed(random_seed) # numpy seed 고정
1
2
3
4
5
# parameters
learning_rate = 0.001
epochs = 15
batch_size = 100
drop_prob = 0.3

Data Load

1
2
3
4
5
6
7
mnist_train = datasets.MNIST(root='./', train=True,
                             transform=transforms.ToTensor(),
                             download=True)

mnist_test = datasets.MNIST(root='./', train=False,
                             transform=transforms.ToTensor(),
                             download=True)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./MNIST\raw\train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:03<00:00, 3057685.33it/s]


Extracting ./MNIST\raw\train-images-idx3-ubyte.gz to ./MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./MNIST\raw\train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 14458784.18it/s]


Extracting ./MNIST\raw\train-labels-idx1-ubyte.gz to ./MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./MNIST\raw\t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 2592975.68it/s]


Extracting ./MNIST\raw\t10k-images-idx3-ubyte.gz to ./MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./MNIST\raw\t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 2277137.07it/s]


Extracting ./MNIST\raw\t10k-labels-idx1-ubyte.gz to ./MNIST\raw
1
2
3
4
5
# data loader
data_loader = torch.utils.data.DataLoader(dataset=mnist_train,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          drop_last=True)

Model

nn Layers, Activation FUnction, Dropout, Weight Initialization

1
2
3
4
5
6
7
8
9
10
11
12
# nn Layers
linear1 = torch.nn.Linear(784, 512, bias=True) # (28 x 28) = 784
linear2 = torch.nn.Linear(512, 512, bias=True)
linear3 = torch.nn.Linear(512, 512, bias=True)
linear4 = torch.nn.Linear(512, 512, bias=True)
linear5 = torch.nn.Linear(512, 10, bias=True) # 0 ~ 9까지 10개의 출력

# Activation Function
relu = torch.nn.ReLU()

# Dropout
dropout = torch.nn.Dropout(p=drop_prob)
1
2
3
4
5
6
# Xavier Initialization
torch.nn.init.xavier_uniform_(linear1.weight)
torch.nn.init.xavier_uniform_(linear2.weight)
torch.nn.init.xavier_uniform_(linear3.weight)
torch.nn.init.xavier_uniform_(linear4.weight)
torch.nn.init.xavier_uniform_(linear5.weight)
1
2
3
4
5
6
7
8
9
Parameter containing:
tensor([[ 0.0147, -0.0003, -0.0210,  ...,  0.0707, -0.0314, -0.0136],
        [ 0.0718, -0.0103,  0.0366,  ..., -0.0319,  0.0462,  0.0303],
        [ 0.0575, -0.0890, -0.0492,  ...,  0.0100,  0.0807, -0.0359],
        ...,
        [-0.0403,  0.0531, -0.0981,  ...,  0.0617, -0.0011,  0.0624],
        [-0.0896,  0.0671,  0.0815,  ...,  0.0659,  0.1023, -0.0633],
        [ 0.1020,  0.0481, -0.0295,  ..., -0.1030, -0.0380, -0.0858]],
       requires_grad=True)
1
2
3
4
5
6
# Model
model = torch.nn.Sequential(linear1, relu, dropout,
                            linear2, relu, dropout,
                            linear3, relu, dropout,
                            linear4, relu, dropout,
                            linear5).to(device)
1
2
3
# Cost/Loss & Optimizer
criterion = torch.nn.CrossEntropyLoss().to(device) # 다중 클랙스 분류
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

model.train()과 model.eval()

model.train() 과 model.eval()을 선언해주는 이유는 train하면서 학습한 평균과 표준편차를 test 하기 위해 새로운 데이터에 적용하여 test데이터가 train데이터와 동일한 형태를 띄도록 함이다.

batch마다 서로 다른 평균과 표준편차를 갖지만, 이들을 따로 저장한뒤 평균 값을 내어 test 데이터에 적용한다.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
total_batch = len(data_loader)
model.train()

for epoch in range(epochs):
    avg_cost = 0

    for X, y in data_loader:
        X = X.view(-1, 28 * 28).to(device)
        y = y.to(device)

        optimizer.zero_grad()
        hypothesis = model(X)
        cost = criterion(hypothesis, y) # Loss
        cost.backward()
        optimizer.step()

        avg_cost += cost / total_batch

    print('Epoch: {} Cost: {:.6f}'.format(epoch + 1, avg_cost))
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
Epoch: 1 Cost: 0.146523
Epoch: 2 Cost: 0.112582
Epoch: 3 Cost: 0.092465
Epoch: 4 Cost: 0.082154
Epoch: 5 Cost: 0.075115
Epoch: 6 Cost: 0.069497
Epoch: 7 Cost: 0.063039
Epoch: 8 Cost: 0.061050
Epoch: 9 Cost: 0.055589
Epoch: 10 Cost: 0.051682
Epoch: 11 Cost: 0.049075
Epoch: 12 Cost: 0.046335
Epoch: 13 Cost: 0.044887
Epoch: 14 Cost: 0.043464
Epoch: 15 Cost: 0.044467

Test

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
with torch.no_grad():
    model.eval()

    X_test = mnist_test.test_data.view(-1, 28 * 28).float().to(device)
    y_test = mnist_test.test_labels.to(device)

    y_pred = model(X_test)
    correct = torch.argmax(y_pred, 1) == y_test
    accuracy = correct.float().mean()
    print('Accuracy: {}'.format(accuracy))

    # 하나의 예측
    r = random.randint(0, len(mnist_test) - 1)
    X_single_data = mnist_test.test_data[r:r + 1].view(-1, 28 * 28).float().to(device)
    y_single_data = mnist_test.test_labels[r:r + 1].to(device)

    print('Label: {}'.format(y_single_data.item()))
    single_prediction = model(X_single_data)
    print('Prediction: {}'.format(torch.argmax(single_prediction, 1).item()))
1
2
3
Accuracy: 0.9790999889373779
Label: 8
Prediction: 8
This post is licensed under CC BY 4.0 by the author.