전체적인 DNN
과정들이다. 추가로 Batch Normalization
내용이 포함되어 있다. 사용 방법은 Model 부분에서 살펴볼 수 있다.
Library Import
1
2
3
4
5
import torch
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import random
import numpy as np
Seed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
# seed 값 설정
random_seed = 1
# device
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# pytorch seed, cuda, cudnn seed 고정
torch.manual_seed(random_seed)
if device == 'cuda':
torch.cuda.manual_seed_all(random_seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(random_seed) # 파이썬 seed 고정
random.seed(random_seed) # numpy seed 고정
1
2
3
4
5
# parameters
learning_rate = 0.01
epochs = 10
batch_size = 32
drop_prob = 0.3
Data Load
1
2
3
4
5
6
7
mnist_train = datasets.MNIST(root='./', train=True,
transform=transforms.ToTensor(),
download=True)
mnist_test = datasets.MNIST(root='./', train=False,
transform=transforms.ToTensor(),
download=True)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
2%|▏ | 196608/9912422 [00:00<00:05, 1928638.43it/s]
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./MNIST\raw\train-images-idx3-ubyte.gz
100%|██████████| 9912422/9912422 [00:01<00:00, 5159870.24it/s]
Extracting ./MNIST\raw\train-images-idx3-ubyte.gz to ./MNIST\raw
0%| | 0/28881 [00:00<?, ?it/s]
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./MNIST\raw\train-labels-idx1-ubyte.gz
100%|██████████| 28881/28881 [00:00<00:00, 4816528.58it/s]
Extracting ./MNIST\raw\train-labels-idx1-ubyte.gz to ./MNIST\raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./MNIST\raw\t10k-images-idx3-ubyte.gz
100%|██████████| 1648877/1648877 [00:00<00:00, 4500333.10it/s]
Extracting ./MNIST\raw\t10k-images-idx3-ubyte.gz to ./MNIST\raw
0%| | 0/4542 [00:00<?, ?it/s]
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./MNIST\raw\t10k-labels-idx1-ubyte.gz
100%|██████████| 4542/4542 [00:00<00:00, 2272519.24it/s]
Extracting ./MNIST\raw\t10k-labels-idx1-ubyte.gz to ./MNIST\raw
1
2
3
4
5
# data loader
data_loader = torch.utils.data.DataLoader(dataset=mnist_train,
batch_size=batch_size,
shuffle=True,
drop_last=True)
Model
- nn Layers
- Batch Normalization
- Activation FUnction
- Dropout
- Weight Initialization
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# nn Layers
linear1 = torch.nn.Linear(784, 32, bias=True) # (28 x 28) = 784
linear2 = torch.nn.Linear(32, 32, bias=True)
linear3 = torch.nn.Linear(32, 10, bias=True) # 0 ~ 9까지 10개의 출력
# Batch Normalization
bn1 = torch.nn.BatchNorm1d(32)
bn2 = torch.nn.BatchNorm1d(32)
# Activation Function
relu = torch.nn.ReLU()
# Dropout
dropout = torch.nn.Dropout(p=drop_prob)
1
2
3
4
# Xavier Initialization
torch.nn.init.xavier_uniform_(linear1.weight)
torch.nn.init.xavier_uniform_(linear2.weight)
torch.nn.init.xavier_uniform_(linear3.weight)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
Parameter containing:
tensor([[ 6.5885e-02, 8.6867e-02, -3.4090e-01, 1.7627e-01, 1.0799e-01,
-3.1302e-01, 2.0123e-01, 9.4009e-02, 1.6458e-01, -2.7733e-01,
1.1939e-02, -2.2541e-02, 8.4626e-02, 2.0827e-01, 2.5802e-02,
1.5831e-01, 3.0665e-01, -3.6419e-01, -3.3775e-01, 1.7042e-01,
-3.1985e-01, 9.2599e-03, 1.3387e-01, 2.4543e-01, -9.1732e-02,
-3.3526e-01, 2.4545e-01, -3.2385e-01, -1.2825e-01, 1.0531e-01,
1.9131e-01, -3.0362e-01],
[-3.1044e-01, 3.3539e-01, 3.2260e-01, 2.9921e-01, -2.0240e-01,
8.3491e-04, 1.5645e-01, -2.8142e-01, 1.9885e-01, -3.4972e-01,
-3.7560e-02, 3.3423e-01, 2.9793e-01, -9.8763e-02, -7.0204e-02,
-1.1631e-01, 2.6820e-01, -3.2737e-01, -1.3591e-01, -3.6103e-01,
1.5209e-01, 1.2621e-01, -3.8020e-03, 1.5168e-01, -3.3398e-01,
-2.6921e-01, 2.5876e-01, -2.3407e-01, 1.4498e-01, -9.1018e-02,
-1.4272e-01, -1.5065e-01],
[ 2.4812e-02, -8.6725e-02, 4.2410e-02, -1.2403e-01, 1.8063e-01,
-3.0059e-01, 6.0061e-02, -2.1790e-01, 2.8908e-01, 1.3223e-01,
3.2287e-01, -2.5739e-02, 1.4782e-01, -5.4985e-02, 1.4912e-02,
7.1432e-02, 1.0982e-01, 2.1810e-01, 1.9099e-01, -2.8484e-01,
3.1630e-01, 3.6800e-01, 2.5363e-01, 1.7658e-01, -3.1709e-01,
2.4382e-01, 2.3137e-01, -1.9403e-01, -1.3542e-01, 3.2858e-01,
-1.2109e-01, -1.5276e-01],
[ 3.0504e-01, 3.0843e-02, -7.7709e-02, 1.8686e-01, -9.4330e-02,
4.3348e-02, 9.5372e-02, 2.4525e-03, 2.3839e-01, -2.0816e-01,
3.1187e-01, -3.4166e-01, -1.9875e-01, -2.6966e-01, -4.3193e-02,
2.8213e-02, 6.0213e-02, -1.7342e-01, 3.3984e-01, -1.9494e-01,
-4.4339e-02, -3.6919e-02, -1.4448e-01, -1.1655e-01, 3.6624e-02,
2.3341e-01, -3.0984e-01, 1.6266e-01, -2.6989e-01, -2.3291e-01,
-2.4284e-01, -2.6878e-01],
[ 3.6857e-01, 1.1517e-01, -1.4269e-01, 7.2776e-02, 1.4875e-01,
2.6073e-01, 2.3604e-01, -1.9875e-01, -1.7362e-01, 1.4512e-01,
-1.5211e-01, 1.9218e-01, 7.1176e-02, -3.3338e-02, 1.3720e-01,
3.1157e-01, -2.8815e-01, 1.6212e-01, 4.2325e-02, -2.1941e-02,
2.4766e-01, -1.6681e-01, -2.2203e-01, 1.7540e-01, 4.2069e-02,
2.7422e-01, 3.6793e-01, 1.1660e-01, -1.0619e-01, 1.5367e-01,
4.9685e-02, -3.0120e-01],
[-1.4915e-01, -1.1548e-01, 9.2934e-02, 2.3706e-01, -3.4652e-01,
1.5702e-01, -1.4088e-01, 3.1599e-01, 5.9961e-02, 1.0103e-01,
-2.0021e-01, -9.4271e-02, 3.3392e-01, 1.4329e-01, -1.9100e-01,
-3.1845e-01, 9.0212e-02, 1.1443e-01, 2.2587e-01, 3.7202e-01,
-3.1070e-01, 1.1340e-01, -3.8842e-02, -2.8988e-01, -8.8203e-02,
-2.0217e-01, -1.0005e-01, -1.9183e-01, 3.6772e-01, -7.3482e-03,
-3.7540e-01, 5.8406e-02],
[ 1.5918e-01, 2.4550e-01, -2.4114e-02, 1.5470e-01, 3.1708e-01,
-3.3983e-01, -2.8420e-01, -1.6119e-01, 1.2829e-01, -2.7860e-01,
-1.2798e-01, 3.3444e-01, 2.3869e-01, -3.2793e-01, -1.8519e-01,
-2.0382e-01, -1.2151e-01, 1.5302e-01, 7.5613e-02, 2.7970e-01,
-1.8785e-01, 3.1672e-01, 4.8255e-03, -3.0042e-01, 2.4510e-02,
-3.7598e-01, -3.5691e-01, 9.0523e-02, 1.4022e-01, -1.8877e-02,
2.4061e-01, -8.5857e-02],
[-7.9248e-02, 7.9641e-02, -1.8309e-01, 1.3526e-01, 1.2776e-01,
-1.2456e-01, -1.2399e-02, 2.8039e-01, -2.2774e-01, -5.6683e-02,
2.9218e-03, 2.4844e-01, -8.6395e-02, -2.7825e-02, 2.8312e-01,
-3.0502e-01, -1.7172e-01, -3.5173e-01, -1.3017e-01, 3.4146e-01,
5.6664e-02, 2.9193e-02, -2.4934e-01, 1.7849e-01, 2.1391e-01,
-2.6876e-01, -1.0160e-02, 1.7447e-01, 5.3708e-02, 3.1691e-01,
-1.6121e-02, 3.2059e-01],
[-3.5543e-01, -2.4808e-02, 2.0317e-01, -1.2727e-01, -2.2227e-01,
-2.6659e-01, 3.4295e-03, 3.0672e-01, 6.3697e-02, 7.4377e-02,
1.6892e-04, 8.5027e-02, 3.1677e-01, -3.5082e-03, 1.7779e-01,
-4.2994e-02, -2.7238e-01, -1.0097e-01, -3.7375e-01, -2.2437e-02,
1.0622e-01, 1.6875e-01, -2.1930e-01, -1.0930e-01, -2.3377e-01,
2.4759e-01, 2.5192e-01, -2.3998e-01, -2.9316e-02, -3.2307e-01,
-3.1558e-01, 9.5892e-03],
[ 3.1670e-01, 1.3797e-01, 2.4464e-01, -4.0956e-02, -1.4132e-01,
1.6923e-01, 2.6715e-01, 5.7575e-02, -3.0542e-01, 3.1088e-01,
-3.4597e-01, -5.1263e-02, -1.2049e-01, -1.7397e-02, -2.6721e-01,
-3.2290e-02, -3.5068e-01, 2.1913e-02, 8.9591e-02, -2.8676e-01,
-3.2559e-01, 3.7415e-01, 1.2694e-01, 1.8467e-01, -2.0232e-01,
2.7797e-04, -3.1124e-01, 1.2474e-01, 1.9693e-01, 3.6651e-01,
-2.1581e-01, -1.5297e-01]], requires_grad=True)
- Batch Normalization은 Activation Function 이전에 사용해주어야 한다.
1
2
3
4
# Model
model = torch.nn.Sequential(linear1, bn1, relu,
linear2, bn2, relu,
linear3).to(device)
1
2
3
# Cost/Loss & Optimizer
criterion = torch.nn.CrossEntropyLoss().to(device) # 다중 클랙스 분류
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
model.train()과 model.eval()
model.train() 과 model.eval()을 선언해주는 이유는 train하면서 학습한 평균과 표준편차를 test 하기 위해 새로운 데이터에 적용하여 test데이터가 train데이터와 동일한 형태를 띄도록 함이다.
batch마다 서로 다른 평균과 표준편차를 갖지만, 이들을 따로 저장한뒤 평균 값을 내어 test 데이터에 적용한다.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
total_batch = len(data_loader)
model.train()
for epoch in range(epochs):
avg_cost = 0
for X, y in data_loader:
X = X.view(-1, 28 * 28).to(device)
y = y.to(device)
optimizer.zero_grad()
hypothesis = model(X)
cost = criterion(hypothesis, y) # Loss
cost.backward()
optimizer.step()
avg_cost += cost / total_batch
print('Epoch: {} Cost: {:.6f}'.format(epoch + 1, avg_cost))
1
2
3
4
5
6
7
8
9
10
Epoch: 1 Cost: 0.183778
Epoch: 2 Cost: 0.147789
Epoch: 3 Cost: 0.130806
Epoch: 4 Cost: 0.122594
Epoch: 5 Cost: 0.114198
Epoch: 6 Cost: 0.106254
Epoch: 7 Cost: 0.100409
Epoch: 8 Cost: 0.098111
Epoch: 9 Cost: 0.093448
Epoch: 10 Cost: 0.091531
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
with torch.no_grad():
model.eval()
X_test = mnist_test.test_data.view(-1, 28 * 28).float().to(device)
y_test = mnist_test.test_labels.to(device)
y_pred = model(X_test)
correct = torch.argmax(y_pred, 1) == y_test
accuracy = correct.float().mean()
print('Accuracy: {}'.format(accuracy))
# 하나의 예측
r = random.randint(0, len(mnist_test) - 1)
X_single_data = mnist_test.test_data[r:r + 1].view(-1, 28 * 28).float().to(device)
y_single_data = mnist_test.test_labels[r:r + 1].to(device)
print('Label: {}'.format(y_single_data.item()))
single_prediction = model(X_single_data)
print('Prediction: {}'.format(torch.argmax(single_prediction, 1).item()))
1
2
3
Accuracy: 0.8303999900817871
Label: 5
Prediction: 5