Fashion Recommendation System
[Data Information]
Data Source: https://www.aihub.or.kr/aihubdata/data/view.do?currMenu=115&topMenu=100&aihubDataSe=realm&dataSetSn=78
Raw Data Type : 720x1280x24b
[Image]
Item-Image : 16585장
Model-Image : 18040장
[Version]
Augmented Convolutional AE, 128x128x3
0. Setting
Goole Drive Connecting
1
pwd
1
'/Users/haesik/AISCHOOL/Final_Project/img'
1
2
from google.colab import drive
drive.mount('/content/drive')
1
Mounted at /content/drive
1
%cd '/content/drive/MyDrive/Code Lion/Final'
1
/content/drive/MyDrive/Code Lion/Final
1
!ls
1
data 'Fashion Recommendation System0810.ipynb' figure 'low version'
Library Call
1
pip install tensorflow_addons
1
pip install opencv-python
1
pip install pydot
1
pip install graphviz
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# 상용 라이브러리
from glob import glob
import os
import cv2
import pandas as pd
import numpy as np
import datetime as dt
import time
# 시각화 라이브러리
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib
import plotly.express as px
import plotly.graph_objects as go
# 한글 폰트 패치
matplotlib.rcParams['font.family']='Malgun Gothic'
matplotlib.rcParams['axes.unicode_minus'] = False
# 시각화 포맷 설정
plt.style.use("ggplot")
sns.set(font_scale=2)
sns.set_style("whitegrid")
sns.set_context("talk")
# 경고문 처리
import warnings
warnings.filterwarnings('ignore')
# sckit-learn
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances, pairwise_distances
# Tensorflow 라이브러리
import tensorflow as tf
from tensorflow import keras
from tqdm import tqdm
import tensorflow_addons as tfa
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Input, MaxPooling2D, UpSampling2D, Dropout, BatchNormalization
from tensorflow.keras import layers, models
# from tensorflow.keras.utils import np_utils
from tensorflow.keras.utils import plot_model
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
User Function Definition
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# -------------Image Load & Preprocessing ------------- #
# Global Constant Definition
imgR = 128
imgC = 128
channel = 3
crop_y = (250,1000)
crop_x = (40,680)
root_dir = 'D:/Fasion_Images/Train/train_itemimages/Item-Image/'
model_dir = 'D:/Fasion_Images/Train/train_modelimages/Model-Image_deid/'
# Single Image Load
def img_read(file):
img = cv2.imread(file)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
return img
# Gamma Correction
def adjust_gamma(img, gamma=1.0): # 감마 보정 함수
invGamma = 1.0 / gamma
out = img.copy().astype(np.float)
out = ((out / 255) ** invGamma) * 255
return out.astype(np.uint8)
# Image Crop & Resize
def img_crop(img):
img = img[crop_y[0]:crop_y[1], crop_x[0]:crop_x[1]]
img = cv2.resize(img, (imgR,imgC), cv2.INTER_LINEAR)
return img
# Load All img from folder
def load_img_folder():
# 의상의 전방부 사진만 가져오기
wfiles = sorted(glob(f'{root_dir}/*_F.jpg'))
img_list = []
label_list = []
for file in wfiles:
img = img_read(file)
img = img_crop(img)
img = adjust_gamma(img, 0.8)
img_list.append(img)
label_list.append(file.split('/')[-1])
return np.array(img_list), label_list
# Top10 유사 이미지 시각화
def top10_visualize(img_set,top10_idx):
fig = plt.figure()
fig, ax = plt.subplots(2, 5, figsize=(5*3,2*3))
plt.suptitle('Top10 Similar Images',size=20)
k=0
for i in range(2):
for j in range(5):
axis = ax[i,j]
axis.get_xaxis().set_visible(False)
axis.get_yaxis().set_visible(False)
axis.imshow(img_set[top10_idx[i+j]])
plt.axis('off')
k += 5
plt.show()
# -------------Image EDA & Visualization ------------- #
# plot_images
def plot_images(nRow, nCol, img_set):
fig = plt.figure()
fig, ax = plt.subplots(nRow, nCol, figsize=(nCol*4,nRow*4))
k=0
for i in range(nRow):
for j in range(nCol):
if nRow <= 1 : axis = ax[j]
else: axis = ax[i,j]
axis.get_xaxis().set_visible(False)
axis.get_yaxis().set_visible(False)
axis.imshow(img_set[k+j])
plt.axis('off')
k += nCol
plt.show()
# ------------- Model Function ------------- #
# Reconstruction Error Function Definition
def Reconstruction_Error(X_test,X_pred):
error_list = []
for i in range(len(X_test)):
ele = np.mean(np.power(X_test[i] - X_pred[i], 2),axis=1).mean()
error_list.append(ele)
return error_list
# Average Pooling Fuction Definition
def AVGpooling(raw_feature):
result = []
for i in range(raw_feature.shape[0]):
row= []
for j in range(raw_feature.shape[-1]):
row.append(raw_feature[i,:,:,j].mean())
result.append(row)
return np.array(result)
# ------------- Recommendation System ------------- #
def Fashion_coordination(top10_result, fashion_df):
result_df = []
for item in top10_result:
ele_df = pd.DataFrame(columns=fashion_df.columns)
for col in fashion_df.columns:
ele = fashion_df[fashion_df[col] == item]
ele_df = pd.concat([ele_df,ele])
result_df.append(ele_df)
return result_df
1. Data Load
1
2
3
4
# Image DataSet Load
img_set, label_set = load_img_folder()
print('img_set.shape :',img_set.shape)
print('label_set.shape :',len(label_set))
1
2
img_set.shape : (16585, 128, 128, 3)
label_set.shape : 16585
1
2
# Image Information
type(img_set), round(img_set.mean(),4)
1
(numpy.ndarray, 215.1488)
1
2
# Label Information
type(label_set), label_set[:4]
1
2
3
4
5
(list,
['Item-Image\\0928015_F.jpg',
'Item-Image\\0929029_F.jpg',
'Item-Image\\1008001_F.jpg',
'Item-Image\\1008004_F.jpg'])
1
2
# Image Sample
plt.imshow(img_set[200])
1
2
# Multi Image Samples
_ = plot_images(2,5,img_set)
2. Data Preprocessing
1
2
3
4
# Data Normalization
img_scaled = img_set / 255.0
print('Raw Image Format :',img_set.shape, img_set.mean())
print('Scaled Image Format :',img_scaled.shape, img_scaled.mean())
1
2
Raw Image Format : (16585, 128, 128, 3) 215.1488384048635
Scaled Image Format : (16585, 128, 128, 3) 0.8437209349210284
1
2
3
4
5
# Train, Test Data Split
np.random.seed(42)
X_train, X_test, y_train, y_test = train_test_split(img_scaled, label_set, random_state=42, test_size=0.2, shuffle=True)
print(X_train.shape, len(y_train))
print(X_test.shape, len(y_test))
1
2
(13268, 128, 128, 3) 13268
(3317, 128, 128, 3) 3317
3. Convolutional Autoencoder Modeling
Encoder
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# Encoder Part Modeling
tf.keras.backend.clear_session()
encoder_input = Input(shape=(imgR,imgC,channel))
# Fisrt ConvPooling Layer : 128
L1 = Conv2D(128, (3, 3), activation='relu', padding='same')(encoder_input)
L2 = MaxPooling2D((2, 2))(L1)
# Second ConvPooling Layer : 64
L3 = Conv2D(64, (3, 3), activation='relu', padding='same')(L2)
L4 = Conv2D(64, (3, 3), activation='relu', padding='same')(L3)
L5 = BatchNormalization()(L4)
L6 = MaxPooling2D((2, 2))(L5)
# Third ConvPooling Layer : 32
L7 = Conv2D(32, (3, 3), activation='relu', padding='same')(L6)
L8 = Conv2D(32, (3, 3), activation='relu', padding='same')(L7)
L9 = BatchNormalization()(L8)
L10 = MaxPooling2D((2, 2))(L9)
# Fourth ConvPooling Layer : 16
L11 = Conv2D(16, (3, 3), activation='relu', padding='same')(L10)
L12 = Conv2D(16, (3, 3), activation='relu', padding='same')(L11)
L13 = BatchNormalization()(L12)
L14 = MaxPooling2D((2, 2))(L13)
# Fifth ConvPooling Layer : 8
L15 = Conv2D(8, (3, 3), activation='relu', padding='same')(L14)
L16 = MaxPooling2D((2, 2))(L15)
encoder_output = L16
1
2
3
# Encoder Summary()
encoder = tf.keras.Model(encoder_input, encoder_output)
encoder.summary()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
Model: "model"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) [(None, 128, 128, 3)] 0
_________________________________________________________________
conv2d (Conv2D) (None, 128, 128, 128) 3584
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 64, 64, 128) 0
_________________________________________________________________
conv2d_1 (Conv2D) (None, 64, 64, 64) 73792
_________________________________________________________________
conv2d_2 (Conv2D) (None, 64, 64, 64) 36928
_________________________________________________________________
batch_normalization (BatchNo (None, 64, 64, 64) 256
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 32, 32, 64) 0
_________________________________________________________________
conv2d_3 (Conv2D) (None, 32, 32, 32) 18464
_________________________________________________________________
conv2d_4 (Conv2D) (None, 32, 32, 32) 9248
_________________________________________________________________
batch_normalization_1 (Batch (None, 32, 32, 32) 128
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 16, 16, 32) 0
_________________________________________________________________
conv2d_5 (Conv2D) (None, 16, 16, 16) 4624
_________________________________________________________________
conv2d_6 (Conv2D) (None, 16, 16, 16) 2320
_________________________________________________________________
batch_normalization_2 (Batch (None, 16, 16, 16) 64
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 8, 8, 16) 0
_________________________________________________________________
conv2d_7 (Conv2D) (None, 8, 8, 8) 1160
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 4, 4, 8) 0
=================================================================
Total params: 150,568
Trainable params: 150,344
Non-trainable params: 224
_________________________________________________________________
1
2
# Plot Encoder Diagram
plot_model(encoder, to_file='figure/Eecoder0818.png', show_shapes=True)
1
('You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) ', 'for plot_model/model_to_dot to work.')
Decoder
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# Decoder Part Modeling
decoder_input = Input(shape=(4,4,8)) # Decoder의 Input Shape는 Hard Coding이 필요함. (개선점)
# First ConvPooling Layer : 8
L17 = Conv2D(8, (3, 3), activation='relu', padding='same')(decoder_input)
L18 = UpSampling2D((2, 2))(L17)
# Second ConvPooling Layer : 16
L19 = Conv2D(16, (3, 3), activation='relu', padding='same')(L18)
L20 = Conv2D(16, (3, 3), activation='relu', padding='same')(L19)
L21 = BatchNormalization()(L20)
L22 = UpSampling2D((2, 2))(L21)
# Third ConvPooling Layer : 32
L23 = Conv2D(32, (3, 3), activation='relu', padding='same')(L22)
L24 = Conv2D(32, (3, 3), activation='relu', padding='same')(L23)
L25 = BatchNormalization()(L24)
L26 = UpSampling2D((2, 2))(L25)
# Fourth ConvPooling Layer : 64
L27 = Conv2D(64, (3, 3), activation='sigmoid', padding='same')(L26)
L28 = Conv2D(64, (3, 3), activation='relu', padding='same')(L27)
L29 = BatchNormalization()(L28)
L30 = UpSampling2D((2, 2))(L29)
# Fifth ConvPooling Layer : 128
L31 = Conv2D(128, (3, 3), activation='sigmoid', padding='same')(L30)
L32 = UpSampling2D((2, 2))(L31)
# Sixth ConvPooling Layer : 3
L33 = Conv2D(3, (3, 3), activation='sigmoid', padding='same')(L32)
decoder_output = L33
1
2
3
# Decoder Summary()
decoder = tf.keras.Model(decoder_input, decoder_output)
decoder.summary()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
Model: "model_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_2 (InputLayer) [(None, 4, 4, 8)] 0
_________________________________________________________________
conv2d_8 (Conv2D) (None, 4, 4, 8) 584
_________________________________________________________________
up_sampling2d (UpSampling2D) (None, 8, 8, 8) 0
_________________________________________________________________
conv2d_9 (Conv2D) (None, 8, 8, 16) 1168
_________________________________________________________________
conv2d_10 (Conv2D) (None, 8, 8, 16) 2320
_________________________________________________________________
batch_normalization_3 (Batch (None, 8, 8, 16) 64
_________________________________________________________________
up_sampling2d_1 (UpSampling2 (None, 16, 16, 16) 0
_________________________________________________________________
conv2d_11 (Conv2D) (None, 16, 16, 32) 4640
_________________________________________________________________
conv2d_12 (Conv2D) (None, 16, 16, 32) 9248
_________________________________________________________________
batch_normalization_4 (Batch (None, 16, 16, 32) 128
_________________________________________________________________
up_sampling2d_2 (UpSampling2 (None, 32, 32, 32) 0
_________________________________________________________________
conv2d_13 (Conv2D) (None, 32, 32, 64) 18496
_________________________________________________________________
conv2d_14 (Conv2D) (None, 32, 32, 64) 36928
_________________________________________________________________
batch_normalization_5 (Batch (None, 32, 32, 64) 256
_________________________________________________________________
up_sampling2d_3 (UpSampling2 (None, 64, 64, 64) 0
_________________________________________________________________
conv2d_15 (Conv2D) (None, 64, 64, 128) 73856
_________________________________________________________________
up_sampling2d_4 (UpSampling2 (None, 128, 128, 128) 0
_________________________________________________________________
conv2d_16 (Conv2D) (None, 128, 128, 3) 3459
=================================================================
Total params: 151,147
Trainable params: 150,923
Non-trainable params: 224
_________________________________________________________________
1
2
# Plot Decoder Diagram
plot_model(decoder, to_file='figure/Decoder0818.png', show_shapes=True)
1
('You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) ', 'for plot_model/model_to_dot to work.')
AutoEncoder (Encoder + Decoder)
1
2
3
4
5
6
7
8
# Convolutional Autoencoder Modeling
# Connecting Encoder & Decoder Part
init_input = Input(shape=(imgR,imgC,channel))
connect_input = encoder(init_input)
connect_output = decoder(connect_input)
model = tf.keras.Model(init_input, connect_output)
1
2
# Model Compile
model.compile(optimizer='Adam',loss='binary_crossentropy')
1
2
# Convolutional Autoencoder Summary
model.summary()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
Model: "model_2"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_3 (InputLayer) [(None, 128, 128, 3)] 0
_________________________________________________________________
model (Functional) (None, 4, 4, 8) 150568
_________________________________________________________________
model_1 (Functional) (None, 128, 128, 3) 151147
=================================================================
Total params: 301,715
Trainable params: 301,267
Non-trainable params: 448
_________________________________________________________________
1
2
# Plot ConvAE Diagram
plot_model(model, to_file='figure/ConvAE0818.png', show_shapes=True)
1
('You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) ', 'for plot_model/model_to_dot to work.')
1
2
3
4
5
6
7
8
9
10
# Checkpoint Callback Function Definition
checkpoint_dir = 'Training-checkpoint/'
checkpoint_path = checkpoint_dir + 'cp-{epoch:04d}-{val_loss:.2f}.ckpt'
# 10번 에포크씩 val_loss 변화 확인- 변화 없을 시 학습 중단
patience_epoch = 20
early_stopping = EarlyStopping(monitor='val_loss', patience=patience_epoch)
cp = ModelCheckpoint(filepath=checkpoint_path, verbose=1,
save_weights_only=True,
save_best_only=True)
1
2
3
4
5
6
7
8
9
# TQDM Tracking Conv-AE Model Training
nb_epochs = 100
batch_size = 64
start = time.time()
tqdm_callback = tfa.callbacks.TQDMProgressBar()
history = model.fit(X_train, X_train, epochs=nb_epochs, batch_size=batch_size, shuffle=True,
callbacks=[early_stopping, cp, tqdm_callback], validation_split=0.05).history
end = time.time()
1
2
3
4
# Print Training Time
train_time = end-start
result = dt.timedelta(seconds=train_time)
print('Training Time :',str(result).split('.')[0])
1
Training Time : 3:25:02
1
2
3
# Training History DataFrame
df_hist = pd.DataFrame(history)
df_hist.tail()
loss | val_loss | |
---|---|---|
92 | 0.208783 | 0.220412 |
93 | 0.208756 | 0.214018 |
94 | 0.208805 | 0.223127 |
95 | 0.208726 | 0.212937 |
96 | 0.208711 | 0.219324 |
1
2
# Best Estimator
df_hist.sort_values(by='val_loss').head(1)
loss | val_loss | |
---|---|---|
76 | 0.209124 | 0.212793 |
1
2
3
4
5
6
7
8
9
# Save Model
# Encoder Part Save
encoder.save('model_save/Encoder_0818/')
# Decoder Part Save
decoder.save('model_save/Decoder_0818/')
# Convolutional Autoencoder Model Save
model.save('model_save/ConvAE_0818/')
1
2
3
4
5
WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.
INFO:tensorflow:Assets written to: model_save/Encoder_0818/assets
WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.
INFO:tensorflow:Assets written to: model_save/Decoder_0818/assets
INFO:tensorflow:Assets written to: model_save/ConvAE_0818/assets
1
2
3
4
# Load Model
encoder = keras.models.load_model('model_save_2/model_save/Encoder_0818/')
decoder = keras.models.load_model('model_save_2/model_save/Decoder_0818/')
model = keras.models.load_model('model_save_2/model_save/ConvAE_0818/')
1
2
WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.
WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.
4. Performance Evaluation
1
2
3
4
5
6
7
8
9
10
# Loss Learning Curve
plt.figure(figsize=(8,6))
plt.title('Loss Learning Curve')
plt.plot(df_hist.loss, label='loss', color='black', linewidth=2.0)
plt.plot(df_hist.val_loss, label='val_loss', color='green', linewidth=2.0)
plt.axvline(x=df_hist.shape[0]-patience_epoch, color='r', linestyle='--',label='best epoch')
plt.xlabel('Epochs', fontsize=10)
plt.ylabel('Loss', fontsize=10)
plt.legend(['Training Loss', 'Validation Loss', 'Saturation epoch'], fontsize=14)
plt.show()
1
2
3
# Test Data Evaluation
test_loss = model.evaluate(X_test,X_test)
print('test loss :',np.round(test_loss,4))
1
2
104/104 [==============================] - 321s 3s/step - loss: 0.2163
test loss : 0.2163
1
2
3
# Test Data Prediction(Reconstruction)
X_pred = model.predict(X_test)
X_pred.shape,round(X_pred.mean(),4)
1
2
3
4
5
6
7
104/104 [==============================] - 289s 3s/step
((3317, 128, 128, 3), 0.8249)
1
2
# Test Image Samples
_ = plot_images(2,5,X_test)
1
2
# Prediction Image Samples
_ = plot_images(2,5,X_pred)
1
2
3
# Reconstruction Error
X_error = Reconstruction_Error(X_test,X_pred)
len(X_error), type(X_error)
1
(3317, list)
1
2
3
4
5
6
7
8
9
10
11
# Reconstruction Error Visaulization
X_loop = np.arange(len(X_error))
boundary = 0.05
plt.figure(figsize=(10,6))
sns.scatterplot(X_loop, X_error, color = 'red', alpha=0.5, marker='*', label='Error Points')
sns.lineplot(X_loop, boundary, color='blue', linestyle='--', label='95% CI Boundary', alpha=0.8)
plt.title('Reconstruction Error Variances', size=18)
plt.legend(loc="upper right")
plt.ylim(0.00,0.10)
plt.show()
5. Latent Space Projection
1
2
3
# Raw Latent Feature
raw_feature = encoder.predict(img_scaled)
raw_feature.shape, type(raw_feature)
1
2
3
# Deep Compact Latent 8 Features
latent_feature = AVGpooling(raw_feature)
latent_feature.shape, type(latent_feature)
1
((16585, 8), numpy.ndarray)
1
2
3
4
5
6
7
# Latent Feature DataFrame
comp_list = ['comp1','comp2','comp3','comp4','comp5','comp6','comp7','comp8','label']
df = pd.DataFrame(latent_feature)
df = pd.concat([df,pd.Series(label_set)],axis=1)
df.columns = comp_list
print('df.shape :',df.shape)
df.head()
1
2
# Latent Feature DataFrame export to csv
df.to_csv('latent_feature_0818.csv',index=False)
6. Similarity Calculation & Top10 Item Return
1
2
3
4
# Latent Feature DataFrame load D:\Fasion_Images
df = pd.read_csv('D:/Fasion_Images/latent_feature_0818.csv')
print(df.shape)
df.head()
1
(16585, 9)
comp1 | comp2 | comp3 | comp4 | comp5 | comp6 | comp7 | comp8 | label | |
---|---|---|---|---|---|---|---|---|---|
0 | 1.816822 | 1.886551 | 1.807105 | 0.858414 | 1.428525 | 2.295357 | 0.791543 | 1.505969 | 0928015_F.jpg |
1 | 1.904211 | 1.930044 | 1.972450 | 0.783947 | 1.302266 | 2.407675 | 0.821974 | 1.499656 | 0929029_F.jpg |
2 | 6.471252 | 6.036780 | 5.894671 | 2.869880 | 2.186035 | 10.727018 | 2.532120 | 5.862957 | 1008001_F.jpg |
3 | 5.119862 | 4.681663 | 5.019187 | 2.043906 | 1.902202 | 7.571554 | 1.737040 | 4.642845 | 1008004_F.jpg |
4 | 4.114837 | 3.997970 | 3.995601 | 1.616894 | 1.459364 | 5.931483 | 1.358331 | 3.653468 | 1008006_F.jpg |
1
2
3
4
# Latent Feature Data & Label Split
data = df.drop('label',axis=1)
label = df['label']
print(data.shape, label.shape)
1
(16585, 8) (16585,)
Cosine Similarity
1
2
# data downcasting
data = data.astype('float32')
1
2
3
4
5
6
# Cosine Similarity Computation
from sklearn.metrics.pairwise import cosine_similarity
cosine_matrix = cosine_similarity(data, data)
print('cosine_matrix.shape :',cosine_matrix.shape)
cosine_matrix[:4,:4]
1
2
3
4
5
6
7
8
9
10
cosine_matrix.shape : (16585, 16585)
array([[1. , 0.9987278 , 0.97110724, 0.97986597],
[0.9987278 , 0.99999994, 0.97669864, 0.98550713],
[0.97110724, 0.97669864, 0.99999994, 0.9974694 ],
[0.97986597, 0.98550713, 0.9974694 , 1. ]], dtype=float32)
1
2
3
4
5
# Cosine Similarity Visualization
plt.figure(figsize=(10,8))
plt.title('Latent Feature Cosine Similarity')
sns.heatmap(cosine_matrix[:100, :100], cmap='RdBu')
plt.show()
1
2
3
4
# 기존 이미지 중 단일 샘플 추출
sample_idx = np.random.choice(np.arange(df.shape[0]),1)[0]
sample = data.loc[sample_idx,:].values.reshape(1,-1)
print('sample_idx :',sample_idx)
1
sample_idx : 5390
1
2
3
4
# 기존 이미지 중 단일 샘플에 대한 코사인 유사도 계산식
sample_cosine_sim = cosine_similarity(sample, data)
print('sample_cosine_sim.shape :',sample_cosine_sim.shape)
1
sample_cosine_sim.shape : (1, 16585)
1
2
3
4
5
6
7
8
9
10
# # 새로운 이미지 중 단일 샘플에 대한 코사인 유사도 계산식
# smp = New_image_preprocessing('sample.jpg')
# smp_emb = encoder.predict(np.reshape(smp,(1,64,64,3)))
# smp_comp = AVGpooling(smp_emb)
# print('smp.shape :',smp.shape)
# plt.imshow(smp)
# sample_cosine_sim = cosine_similarity(smp_comp, data)
# print('sample_cosine_sim.shape :',sample_cosine_sim.shape)
# print()
1
2
3
4
# Define Sample Similarity DataFrame
df_cosine = pd.DataFrame(sample_cosine_sim.T, index=df.index, columns=['sample'])
print('df_cosine.shape :',df_cosine.shape)
df_cosine.head(4)
1
df_cosine.shape : (16585, 1)
sample | |
---|---|
0 | 0.963485 |
1 | 0.972127 |
2 | 0.996787 |
3 | 0.997025 |
1
2
3
4
5
# Return Top10 Similar Items
top10_idx_cosine = df_cosine['sample'].nlargest(10).index
top10_label_cosine = label[top10_idx_cosine].values
print('top10_idx_cosine :',top10_idx_cosine)
print(top10_label_cosine)
1
2
3
4
top10_idx_cosine : Int64Index([5390, 6126, 5178, 4123, 5665, 7182, 7177, 6353, 7232, 7678], dtype='int64')
['1103045_F.jpg' '1105054_F.jpg' '1102111_F.jpg' '1028064_F.jpg'
'1103367_F.jpg' '1109245_F.jpg' '1109239_F.jpg' '1105319_F.jpg'
'1109301_F.jpg' '1110275_F.jpg']
1
2
3
# 기존 이미지 중 단일 샘플 이미지
print(df.loc[sample_idx,'label'])
_ = plt.imshow(img_set[sample_idx])
1
2
# Top10 유사 이미지 시각화
top10_visualize(img_set,top10_idx_cosine)
Euclidean Distance Calculation
1
2
3
4
5
6
# Euclidean Distance Calculation
from sklearn.metrics.pairwise import euclidean_distances
ec_matrix = 1 / euclidean_distances(data, data)
print('ec_matrix.shape :',ec_matrix.shape)
ec_matrix[:4,:4]
1
2
3
4
5
6
7
8
9
10
ec_matrix.shape : (16585, 16585)
array([[ inf, 3.7284596 , 0.08076027, 0.12017515],
[3.7284596 , inf, 0.08180973, 0.12260531],
[0.08076027, 0.08180973, inf, 0.24063474],
[0.12017515, 0.12260531, 0.24063474, inf]], dtype=float32)
1
2
3
4
5
# Euclidean Distance Visualization
plt.figure(figsize=(10,8))
plt.title('Latent Feature Euclidean Distance')
sns.heatmap(ec_matrix[:100, :100], cmap='RdBu')
plt.show()
1
2
3
4
# 기존 이미지 중 단일 샘플에 대한 유클리디안 거리 계산식
sample_ec_distance = 1 / euclidean_distances(sample, data)
print('sample_ec_distance.shape :',sample_ec_distance.shape)
1
sample_ec_distance.shape : (1, 16585)
1
2
3
4
# Define Sample Similarity DataFrame
df_ec = pd.DataFrame(sample_ec_distance.T, index=df.index, columns=['sample'])
print('df_ec.shape :',df_ec.shape)
df_ec.head(4)
1
df_ec.shape : (16585, 1)
sample | |
---|---|
0 | 0.155935 |
1 | 0.159182 |
2 | 0.158337 |
3 | 0.394013 |
1
2
3
4
# Return Top10 Similar Items
top10_idx_ec = df_ec['sample'].nlargest(10).index
print('top10_idx_ec :',top10_idx_ec)
print(label[top10_idx_ec].values)
1
2
3
4
top10_idx_ec : Int64Index([860, 15170, 931, 355, 783, 2415, 1082, 1384, 15172, 9433], dtype='int64')
['1014162_F.jpg' '1208281_F.jpg' '1014240_F.jpg' '1012304_F.jpg'
'1014080_F.jpg' '1020250_F.jpg' '1015009_F.jpg' '1015412_F.jpg'
'1208283_F.jpg' '1116157_F.jpg']
1
2
3
# 기존 이미지 중 단일 샘플 이미지
print(df.loc[sample_idx,'label'])
_ = plt.imshow(img_set[sample_idx])
1
2
# Top10 유사 이미지 시각화
top10_visualize(img_set,top10_idx_ec)
Pearson Similarity
1
2
3
4
# Pearson Similarity Computation
pearson_sim = np.corrcoef(data.to_numpy())
print('pearson_sim.shape :',pearson_sim.shape)
pearson_sim[:4,:4]
1
2
3
4
5
6
7
8
9
10
pearson_sim.shape : (16585, 16585)
array([[1. , 0.99104718, 0.87296293, 0.91160757],
[0.99104718, 1. , 0.89450709, 0.93525527],
[0.87296293, 0.89450709, 1. , 0.9877586 ],
[0.91160757, 0.93525527, 0.9877586 , 1. ]])
1
2
3
4
5
# Pearson Similarity Visualization
plt.figure(figsize=(10,8))
plt.title('Latent Feature Pearson Similarity')
sns.heatmap(pearson_sim[:100, :100], cmap='RdBu')
plt.show()
1
2
3
4
# 기존 이미지 중 단일 샘플에 대한 코사인 유사도 계산식
sample_pearson_sim = np.corrcoef(x=data.to_numpy(),y=sample)
print('sample_pearson_sim.shape :',sample_pearson_sim.shape)
1
sample_pearson_sim.shape : (16586, 16586)
1
2
3
4
# Define Sample Similarity DataFrame
df_pearson = pd.DataFrame(sample_pearson_sim[-1,:-1], index=df.index, columns=['sample'])
print('df_pearson.shape :',df_pearson.shape)
df_pearson.head(4)
1
df_pearson.shape : (16585, 1)
sample | |
---|---|
0 | 0.854181 |
1 | 0.867406 |
2 | 0.953587 |
3 | 0.958042 |
1
2
3
4
# Return Top10 Similar Items
top10_idx_pearson = df_pearson['sample'].nlargest(10).index
print('top10_idx_pearson :',top10_idx_pearson)
print(label[top10_idx_pearson].values)
1
2
3
4
top10_idx_pearson : Int64Index([860, 1441, 732, 1727, 1363, 1724, 1487, 14339, 773, 1141], dtype='int64')
['1014162_F.jpg' '1015529_F.jpg' '1014025_F.jpg' '1016276_F.jpg'
'1015359_F.jpg' '1016273_F.jpg' '1016015_F.jpg' '1203183_F.jpg'
'1014069_F.jpg' '1015080_F.jpg']
1
2
3
# 기존 이미지 중 단일 샘플 이미지
print(df.loc[sample_idx,'label'])
_ = plt.imshow(img_set[sample_idx])
1
2
# Top10 유사 이미지 시각화
top10_visualize(img_set,top10_idx_pearson)
7. Fashion Coordination Recommendation
Fashion Coordination Dataframe Design
1
2
3
4
# fashion coordination dataframe
fashion_df = pd.read_csv('D:/Fasion_Images/uni_wearing.csv')
print('fashion_df.shape :',fashion_df.shape)
fashion_df.head()
1
fashion_df.shape : (18040, 6)
wearing | hat | main_top | inner_top | bottom | shoes | |
---|---|---|---|---|---|---|
0 | 1008_1008_720_A_A001_A001_000.jpg | 1008013.0 | 1008011 | NaN | 1008012.0 | NaN |
1 | 1030_1030_720_A_A002_232_223_222_A002_000.jpg | 1029449.0 | 1029157 | NaN | 1029107.0 | NaN |
2 | 1030_1030_720_A_A003_232_220_222_A003_000.jpg | 1029442.0 | 1029411 | NaN | 1029109.0 | NaN |
3 | 1030_1030_720_B_B002_232_221_223_B002_000.jpg | 1029434.0 | 1029073 | NaN | 1029141.0 | NaN |
4 | 1030_1030_720_B_B003_232_227_223_B003_000.jpg | 1029431.0 | 1029255 | NaN | 1029142.0 | NaN |
1
2
# fashion dataframe Information
fashion_df.info()
1
2
3
4
5
6
7
8
9
10
11
12
13
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18040 entries, 0 to 18039
Data columns (total 6 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 wearing 18040 non-null object
1 hat 32 non-null float64
2 main_top 18040 non-null int64
3 inner_top 2838 non-null float64
4 bottom 16224 non-null float64
5 shoes 125 non-null float64
dtypes: float64(4), int64(1), object(1)
memory usage: 845.8+ KB
1
2
# fashion dataframe
print(fashion_df.isna().sum())
1
2
3
4
5
6
7
wearing 0
hat 18008
main_top 0
inner_top 15202
bottom 1816
shoes 17915
dtype: int64
1
2
3
# fashion dataframe Missing Value Imputation
fashion_df.fillna(0,inplace=True)
print('Remain Missing Value :',fashion_df.isna().sum().sum())
1
Remain Missing Value : 0
1
2
3
# fashion data preprocessing
fashion_df.iloc[:,1:] = fashion_df.iloc[:,1:].astype(int)
fashion_df.head()
wearing | hat | main_top | inner_top | bottom | shoes | |
---|---|---|---|---|---|---|
0 | 1008_1008_720_A_A001_A001_000.jpg | 1008013 | 1008011 | 0 | 1008012 | 0 |
1 | 1030_1030_720_A_A002_232_223_222_A002_000.jpg | 1029449 | 1029157 | 0 | 1029107 | 0 |
2 | 1030_1030_720_A_A003_232_220_222_A003_000.jpg | 1029442 | 1029411 | 0 | 1029109 | 0 |
3 | 1030_1030_720_B_B002_232_221_223_B002_000.jpg | 1029434 | 1029073 | 0 | 1029141 | 0 |
4 | 1030_1030_720_B_B003_232_227_223_B003_000.jpg | 1029431 | 1029255 | 0 | 1029142 | 0 |
Match the coordination
1
2
# top10 label by cosine similarity
top10_label_cosine
1
2
3
array(['1103045_F.jpg', '1105054_F.jpg', '1102111_F.jpg', '1028064_F.jpg',
'1103367_F.jpg', '1109245_F.jpg', '1109239_F.jpg', '1105319_F.jpg',
'1109301_F.jpg', '1110275_F.jpg'], dtype=object)
1
2
3
# top10 label preprocessing
top10_result_cosine = list(map(lambda x : int(x[:7]), top10_label_cosine))
top10_result_cosine
1
2
3
4
5
6
7
8
9
10
[1103045,
1105054,
1102111,
1028064,
1103367,
1109245,
1109239,
1105319,
1109301,
1110275]
1
2
3
4
5
# top10 Recommendation Result
recomm_df = Fashion_coordination(top10_result_cosine, fashion_df)
print('recomm_df :',len(recomm_df),'DataFrame')
for i in range(len(recomm_df)):
print(f"{i+1} item's coordination cases :",recomm_df[i].shape[0])
1
2
3
4
5
6
7
8
9
10
11
recomm_df : 10 DataFrame
1 item's coordination cases : 1
2 item's coordination cases : 1
3 item's coordination cases : 3
4 item's coordination cases : 2
5 item's coordination cases : 1
6 item's coordination cases : 1
7 item's coordination cases : 1
8 item's coordination cases : 1
9 item's coordination cases : 3
10 item's coordination cases : 1
1
2
3
4
5
# Total Similar Item Coordination DataFrame
recomm_total = pd.concat(recomm_df,axis=0)
recomm_total.reset_index(drop=True,)
print('recomm_total.shape :',recomm_total.shape)
recomm_total.head()
1
recomm_total.shape : (15, 6)
wearing | hat | main_top | inner_top | bottom | shoes | |
---|---|---|---|---|---|---|
6720 | 1104_1104_720_A_A078_172_274_017_A078_000.jpg | 0 | 1103045 | 1103303 | 1103338 | 0 |
7973 | 1106_1106_720_C_C115_298_290_288_C115_000.jpg | 0 | 1105335 | 1105054 | 1105031 | 0 |
6297 | 1103_1103_720_B_B020_263_260_B020_000.jpg | 0 | 1102111 | 0 | 1102055 | 0 |
6360 | 1103_1103_720_B_B112_263_078_B112_000.jpg | 0 | 1102111 | 0 | 1102023 | 0 |
6456 | 1103_1103_720_C_C097_263_263_C097_000.jpg | 0 | 1102111 | 0 | 1102116 | 0 |
Recommendation Service Output Result
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
# Recommendation Service Output Result : 2
count = 0
fig = plt.figure()
fig, ax = plt.subplots(2,5,figsize=(5*3,2*3))
plt.suptitle('Fashion Best Fit Recommendatation!!')
for i in range(2):
for j in range(5):
axis = ax[i,j]
axis.get_xaxis().set_visible(False)
axis.get_yaxis().set_visible(False)
try:
fashion_img_name = recomm_total.iloc[count,0]
img = img_read(model_dir + '/' + fashion_img_name)
axis.imshow(img)
plt.axis('off')
except:
pass
count+=1
plt.show()