Home Fashion Recommendation System
Post
Cancel

Fashion Recommendation System

Fashion Recommendation System

image image image

[Data Information]
Data Source: https://www.aihub.or.kr/aihubdata/data/view.do?currMenu=115&topMenu=100&aihubDataSe=realm&dataSetSn=78
Raw Data Type : 720x1280x24b

[Image]
Item-Image : 16585장
Model-Image : 18040장

[Version]
Augmented Convolutional AE, 128x128x3

0. Setting

Goole Drive Connecting

1
pwd
1
'/Users/haesik/AISCHOOL/Final_Project/img'
1
2
from google.colab import drive
drive.mount('/content/drive')
1
Mounted at /content/drive
1
%cd '/content/drive/MyDrive/Code Lion/Final'
1
/content/drive/MyDrive/Code Lion/Final
1
!ls
1
 data  'Fashion Recommendation System0810.ipynb'   figure  'low version'

Library Call

1
pip install tensorflow_addons
1
pip install opencv-python
1
pip install pydot
1
pip install graphviz
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# 상용 라이브러리
from glob import glob
import os
import cv2
import pandas as pd
import numpy as np
import datetime as dt
import time

# 시각화 라이브러리
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib
import plotly.express as px
import plotly.graph_objects as go

# 한글 폰트 패치
matplotlib.rcParams['font.family']='Malgun Gothic'
matplotlib.rcParams['axes.unicode_minus'] = False   

# 시각화 포맷 설정
plt.style.use("ggplot")
sns.set(font_scale=2)
sns.set_style("whitegrid")
sns.set_context("talk")

# 경고문 처리
import warnings
warnings.filterwarnings('ignore')

# sckit-learn
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances, pairwise_distances

# Tensorflow 라이브러리
import tensorflow as tf
from tensorflow import keras
from tqdm import tqdm
import tensorflow_addons as tfa
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Input, MaxPooling2D, UpSampling2D, Dropout, BatchNormalization
from tensorflow.keras import layers, models
# from tensorflow.keras.utils import np_utils
from tensorflow.keras.utils import plot_model
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

User Function Definition

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# -------------Image Load & Preprocessing ------------- #
# Global Constant Definition
imgR = 128
imgC = 128
channel = 3
crop_y = (250,1000)
crop_x = (40,680)
root_dir = 'D:/Fasion_Images/Train/train_itemimages/Item-Image/'
model_dir = 'D:/Fasion_Images/Train/train_modelimages/Model-Image_deid/'

# Single Image Load
def img_read(file):
  img = cv2.imread(file)
  img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
  return img

# Gamma Correction
def adjust_gamma(img, gamma=1.0): # 감마 보정 함수
    invGamma = 1.0 / gamma
    out = img.copy().astype(np.float)
    out = ((out / 255) ** invGamma) * 255
    return out.astype(np.uint8)

# Image Crop & Resize
def img_crop(img):
    img = img[crop_y[0]:crop_y[1], crop_x[0]:crop_x[1]]
    img = cv2.resize(img, (imgR,imgC), cv2.INTER_LINEAR)
    return img

# Load All img from folder
def load_img_folder():
    # 의상의 전방부 사진만 가져오기
    wfiles = sorted(glob(f'{root_dir}/*_F.jpg'))
    img_list = []
    label_list = []
    for file in wfiles:
        img = img_read(file)
        img = img_crop(img)
        img = adjust_gamma(img, 0.8)
        img_list.append(img)
        label_list.append(file.split('/')[-1])
    return np.array(img_list), label_list

# Top10 유사 이미지 시각화
def top10_visualize(img_set,top10_idx):
  fig = plt.figure()
  fig, ax = plt.subplots(2, 5, figsize=(5*3,2*3))
  plt.suptitle('Top10 Similar Images',size=20)
  k=0
  for i in range(2):
    for j in range(5):
      axis = ax[i,j]
      axis.get_xaxis().set_visible(False)
      axis.get_yaxis().set_visible(False)
      axis.imshow(img_set[top10_idx[i+j]])
      plt.axis('off')
    k += 5
  plt.show()

# -------------Image EDA & Visualization ------------- #
# plot_images
def plot_images(nRow, nCol, img_set):
  fig = plt.figure()
  fig, ax = plt.subplots(nRow, nCol, figsize=(nCol*4,nRow*4))
  k=0
  for i in range(nRow):
    for j in range(nCol):
      if nRow <= 1 : axis = ax[j]
      else:          axis = ax[i,j]
      axis.get_xaxis().set_visible(False)
      axis.get_yaxis().set_visible(False)
      axis.imshow(img_set[k+j])
      plt.axis('off')
    k += nCol
  plt.show()

# ------------- Model Function ------------- #
# Reconstruction Error Function Definition
def Reconstruction_Error(X_test,X_pred):
  error_list = []
  for i in range(len(X_test)):
    ele = np.mean(np.power(X_test[i] - X_pred[i], 2),axis=1).mean()
    error_list.append(ele)
  return error_list

# Average Pooling Fuction Definition
def AVGpooling(raw_feature):
  result = []
  for i in range(raw_feature.shape[0]):
    row= []
    for j in range(raw_feature.shape[-1]):
      row.append(raw_feature[i,:,:,j].mean())
    result.append(row)
  return np.array(result)

# ------------- Recommendation System ------------- #
def Fashion_coordination(top10_result, fashion_df):
  result_df = []
  for item in top10_result:
    ele_df = pd.DataFrame(columns=fashion_df.columns)
    for col in fashion_df.columns:
      ele = fashion_df[fashion_df[col] == item]
      ele_df = pd.concat([ele_df,ele])
    result_df.append(ele_df)
  return result_df

1. Data Load

1
2
3
4
# Image DataSet Load
img_set, label_set = load_img_folder()
print('img_set.shape :',img_set.shape)
print('label_set.shape :',len(label_set))
1
2
img_set.shape : (16585, 128, 128, 3)
label_set.shape : 16585
1
2
# Image Information
type(img_set), round(img_set.mean(),4)
1
(numpy.ndarray, 215.1488)
1
2
# Label Information
type(label_set), label_set[:4]
1
2
3
4
5
(list,
 ['Item-Image\\0928015_F.jpg',
  'Item-Image\\0929029_F.jpg',
  'Item-Image\\1008001_F.jpg',
  'Item-Image\\1008004_F.jpg'])
1
2
# Image Sample
plt.imshow(img_set[200])
1
2
# Multi Image Samples
_ = plot_images(2,5,img_set)

2. Data Preprocessing

1
2
3
4
# Data Normalization
img_scaled = img_set / 255.0
print('Raw Image Format :',img_set.shape, img_set.mean())
print('Scaled Image Format :',img_scaled.shape, img_scaled.mean())
1
2
Raw Image Format : (16585, 128, 128, 3) 215.1488384048635
Scaled Image Format : (16585, 128, 128, 3) 0.8437209349210284
1
2
3
4
5
# Train, Test Data Split
np.random.seed(42)
X_train, X_test, y_train, y_test = train_test_split(img_scaled, label_set, random_state=42, test_size=0.2, shuffle=True)
print(X_train.shape, len(y_train))
print(X_test.shape, len(y_test))
1
2
(13268, 128, 128, 3) 13268
(3317, 128, 128, 3) 3317

3. Convolutional Autoencoder Modeling

Encoder

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# Encoder Part Modeling
tf.keras.backend.clear_session()
encoder_input = Input(shape=(imgR,imgC,channel))

# Fisrt ConvPooling Layer : 128
L1 = Conv2D(128, (3, 3), activation='relu', padding='same')(encoder_input)
L2 = MaxPooling2D((2, 2))(L1)

# Second ConvPooling Layer : 64
L3 = Conv2D(64, (3, 3), activation='relu', padding='same')(L2)
L4 = Conv2D(64, (3, 3), activation='relu', padding='same')(L3)
L5 = BatchNormalization()(L4)
L6 = MaxPooling2D((2, 2))(L5)

# Third ConvPooling Layer : 32
L7 = Conv2D(32, (3, 3), activation='relu', padding='same')(L6)
L8 = Conv2D(32, (3, 3), activation='relu', padding='same')(L7)
L9 = BatchNormalization()(L8)
L10 = MaxPooling2D((2, 2))(L9)

# Fourth ConvPooling Layer : 16
L11 = Conv2D(16, (3, 3), activation='relu', padding='same')(L10)
L12 = Conv2D(16, (3, 3), activation='relu', padding='same')(L11)
L13 = BatchNormalization()(L12)
L14 = MaxPooling2D((2, 2))(L13)

# Fifth ConvPooling Layer : 8
L15 = Conv2D(8, (3, 3), activation='relu', padding='same')(L14)
L16 = MaxPooling2D((2, 2))(L15)

encoder_output = L16
1
2
3
# Encoder Summary()
encoder = tf.keras.Model(encoder_input, encoder_output)
encoder.summary()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_1 (InputLayer)         [(None, 128, 128, 3)]     0         
_________________________________________________________________
conv2d (Conv2D)              (None, 128, 128, 128)     3584      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 64, 64, 128)       0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 64, 64, 64)        73792     
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 64, 64, 64)        36928     
_________________________________________________________________
batch_normalization (BatchNo (None, 64, 64, 64)        256       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 32, 32, 64)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 32, 32, 32)        18464     
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 32, 32, 32)        9248      
_________________________________________________________________
batch_normalization_1 (Batch (None, 32, 32, 32)        128       
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 16, 16, 32)        0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 16, 16, 16)        4624      
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 16, 16, 16)        2320      
_________________________________________________________________
batch_normalization_2 (Batch (None, 16, 16, 16)        64        
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 8, 8, 16)          0         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 8, 8, 8)           1160      
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 4, 4, 8)           0         
=================================================================
Total params: 150,568
Trainable params: 150,344
Non-trainable params: 224
_________________________________________________________________
1
2
# Plot Encoder Diagram
plot_model(encoder, to_file='figure/Eecoder0818.png', show_shapes=True)
1
('You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) ', 'for plot_model/model_to_dot to work.')

Decoder

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# Decoder Part Modeling
decoder_input = Input(shape=(4,4,8))  # Decoder의 Input Shape는 Hard Coding이 필요함. (개선점)

# First ConvPooling Layer : 8
L17 = Conv2D(8, (3, 3), activation='relu', padding='same')(decoder_input)
L18 = UpSampling2D((2, 2))(L17)

# Second ConvPooling Layer : 16
L19 = Conv2D(16, (3, 3), activation='relu', padding='same')(L18)
L20 = Conv2D(16, (3, 3), activation='relu', padding='same')(L19)
L21 = BatchNormalization()(L20)
L22 = UpSampling2D((2, 2))(L21)

# Third ConvPooling Layer : 32
L23 = Conv2D(32, (3, 3), activation='relu', padding='same')(L22)
L24 = Conv2D(32, (3, 3), activation='relu', padding='same')(L23)
L25 = BatchNormalization()(L24)
L26 = UpSampling2D((2, 2))(L25)

# Fourth ConvPooling Layer : 64
L27 = Conv2D(64, (3, 3), activation='sigmoid', padding='same')(L26)
L28 = Conv2D(64, (3, 3), activation='relu', padding='same')(L27)
L29 = BatchNormalization()(L28)
L30 = UpSampling2D((2, 2))(L29)

# Fifth ConvPooling Layer : 128
L31 = Conv2D(128, (3, 3), activation='sigmoid', padding='same')(L30)
L32 = UpSampling2D((2, 2))(L31)

# Sixth ConvPooling Layer : 3
L33 = Conv2D(3, (3, 3), activation='sigmoid', padding='same')(L32)

decoder_output = L33
1
2
3
# Decoder Summary()
decoder = tf.keras.Model(decoder_input, decoder_output)
decoder.summary()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_2 (InputLayer)         [(None, 4, 4, 8)]         0         
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 4, 4, 8)           584       
_________________________________________________________________
up_sampling2d (UpSampling2D) (None, 8, 8, 8)           0         
_________________________________________________________________
conv2d_9 (Conv2D)            (None, 8, 8, 16)          1168      
_________________________________________________________________
conv2d_10 (Conv2D)           (None, 8, 8, 16)          2320      
_________________________________________________________________
batch_normalization_3 (Batch (None, 8, 8, 16)          64        
_________________________________________________________________
up_sampling2d_1 (UpSampling2 (None, 16, 16, 16)        0         
_________________________________________________________________
conv2d_11 (Conv2D)           (None, 16, 16, 32)        4640      
_________________________________________________________________
conv2d_12 (Conv2D)           (None, 16, 16, 32)        9248      
_________________________________________________________________
batch_normalization_4 (Batch (None, 16, 16, 32)        128       
_________________________________________________________________
up_sampling2d_2 (UpSampling2 (None, 32, 32, 32)        0         
_________________________________________________________________
conv2d_13 (Conv2D)           (None, 32, 32, 64)        18496     
_________________________________________________________________
conv2d_14 (Conv2D)           (None, 32, 32, 64)        36928     
_________________________________________________________________
batch_normalization_5 (Batch (None, 32, 32, 64)        256       
_________________________________________________________________
up_sampling2d_3 (UpSampling2 (None, 64, 64, 64)        0         
_________________________________________________________________
conv2d_15 (Conv2D)           (None, 64, 64, 128)       73856     
_________________________________________________________________
up_sampling2d_4 (UpSampling2 (None, 128, 128, 128)     0         
_________________________________________________________________
conv2d_16 (Conv2D)           (None, 128, 128, 3)       3459      
=================================================================
Total params: 151,147
Trainable params: 150,923
Non-trainable params: 224
_________________________________________________________________
1
2
# Plot Decoder Diagram
plot_model(decoder, to_file='figure/Decoder0818.png', show_shapes=True)
1
('You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) ', 'for plot_model/model_to_dot to work.')

AutoEncoder (Encoder + Decoder)

1
2
3
4
5
6
7
8
# Convolutional Autoencoder Modeling
# Connecting Encoder & Decoder Part

init_input = Input(shape=(imgR,imgC,channel))
connect_input = encoder(init_input)
connect_output = decoder(connect_input)

model = tf.keras.Model(init_input, connect_output)
1
2
# Model Compile
model.compile(optimizer='Adam',loss='binary_crossentropy')
1
2
# Convolutional Autoencoder Summary
model.summary()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
Model: "model_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_3 (InputLayer)         [(None, 128, 128, 3)]     0         
_________________________________________________________________
model (Functional)           (None, 4, 4, 8)           150568    
_________________________________________________________________
model_1 (Functional)         (None, 128, 128, 3)       151147    
=================================================================
Total params: 301,715
Trainable params: 301,267
Non-trainable params: 448
_________________________________________________________________
1
2
# Plot ConvAE Diagram
plot_model(model, to_file='figure/ConvAE0818.png', show_shapes=True)
1
('You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) ', 'for plot_model/model_to_dot to work.')
1
2
3
4
5
6
7
8
9
10
# Checkpoint Callback Function Definition
checkpoint_dir = 'Training-checkpoint/'
checkpoint_path = checkpoint_dir + 'cp-{epoch:04d}-{val_loss:.2f}.ckpt'

# 10번 에포크씩 val_loss 변화 확인- 변화 없을 시 학습 중단
patience_epoch = 20
early_stopping = EarlyStopping(monitor='val_loss', patience=patience_epoch)
cp = ModelCheckpoint(filepath=checkpoint_path, verbose=1,
                     save_weights_only=True,
                     save_best_only=True)
1
2
3
4
5
6
7
8
9
# TQDM Tracking Conv-AE Model Training
nb_epochs = 100
batch_size = 64

start = time.time()
tqdm_callback = tfa.callbacks.TQDMProgressBar()
history = model.fit(X_train, X_train, epochs=nb_epochs, batch_size=batch_size, shuffle=True,
                    callbacks=[early_stopping, cp, tqdm_callback], validation_split=0.05).history
end = time.time()
1
2
3
4
# Print Training Time
train_time = end-start
result = dt.timedelta(seconds=train_time)
print('Training Time :',str(result).split('.')[0])
1
Training Time : 3:25:02
1
2
3
# Training History DataFrame
df_hist = pd.DataFrame(history)
df_hist.tail()
lossval_loss
920.2087830.220412
930.2087560.214018
940.2088050.223127
950.2087260.212937
960.2087110.219324
1
2
# Best Estimator
df_hist.sort_values(by='val_loss').head(1)
lossval_loss
760.2091240.212793
1
2
3
4
5
6
7
8
9
# Save Model
# Encoder Part Save
encoder.save('model_save/Encoder_0818/')

# Decoder Part Save
decoder.save('model_save/Decoder_0818/')

# Convolutional Autoencoder Model Save
model.save('model_save/ConvAE_0818/')
1
2
3
4
5
WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.
INFO:tensorflow:Assets written to: model_save/Encoder_0818/assets
WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.
INFO:tensorflow:Assets written to: model_save/Decoder_0818/assets
INFO:tensorflow:Assets written to: model_save/ConvAE_0818/assets
1
2
3
4
# Load Model
encoder = keras.models.load_model('model_save_2/model_save/Encoder_0818/')
decoder = keras.models.load_model('model_save_2/model_save/Decoder_0818/')
model = keras.models.load_model('model_save_2/model_save/ConvAE_0818/')
1
2
WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.
WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.

4. Performance Evaluation

1
2
3
4
5
6
7
8
9
10
# Loss Learning Curve
plt.figure(figsize=(8,6))
plt.title('Loss Learning Curve')
plt.plot(df_hist.loss, label='loss', color='black', linewidth=2.0)
plt.plot(df_hist.val_loss, label='val_loss', color='green', linewidth=2.0)
plt.axvline(x=df_hist.shape[0]-patience_epoch, color='r', linestyle='--',label='best epoch')
plt.xlabel('Epochs', fontsize=10)
plt.ylabel('Loss', fontsize=10)
plt.legend(['Training Loss', 'Validation Loss', 'Saturation epoch'], fontsize=14)
plt.show()
1
2
3
# Test Data Evaluation
test_loss = model.evaluate(X_test,X_test)
print('test loss :',np.round(test_loss,4))
1
2
104/104 [==============================] - 321s 3s/step - loss: 0.2163
test loss : 0.2163
1
2
3
# Test Data Prediction(Reconstruction)
X_pred = model.predict(X_test)
X_pred.shape,round(X_pred.mean(),4)
1
2
3
4
5
6
7
104/104 [==============================] - 289s 3s/step





((3317, 128, 128, 3), 0.8249)
1
2
# Test Image Samples
_ = plot_images(2,5,X_test)
1
2
# Prediction Image Samples
_ = plot_images(2,5,X_pred)
1
2
3
# Reconstruction Error 
X_error = Reconstruction_Error(X_test,X_pred)
len(X_error), type(X_error)
1
(3317, list)
1
2
3
4
5
6
7
8
9
10
11
# Reconstruction Error Visaulization
X_loop = np.arange(len(X_error))
boundary = 0.05

plt.figure(figsize=(10,6))
sns.scatterplot(X_loop, X_error, color = 'red', alpha=0.5, marker='*', label='Error Points')
sns.lineplot(X_loop, boundary, color='blue', linestyle='--', label='95% CI Boundary', alpha=0.8)
plt.title('Reconstruction Error Variances', size=18)
plt.legend(loc="upper right")
plt.ylim(0.00,0.10)
plt.show()

5. Latent Space Projection

1
2
3
# Raw Latent Feature
raw_feature = encoder.predict(img_scaled)
raw_feature.shape, type(raw_feature)
1
2
3
# Deep Compact Latent 8 Features 
latent_feature = AVGpooling(raw_feature)
latent_feature.shape, type(latent_feature)
1
((16585, 8), numpy.ndarray)
1
2
3
4
5
6
7
# Latent Feature DataFrame
comp_list = ['comp1','comp2','comp3','comp4','comp5','comp6','comp7','comp8','label']
df = pd.DataFrame(latent_feature)
df = pd.concat([df,pd.Series(label_set)],axis=1)
df.columns = comp_list
print('df.shape :',df.shape)
df.head()
1
2
# Latent Feature DataFrame export to csv
df.to_csv('latent_feature_0818.csv',index=False)

6. Similarity Calculation & Top10 Item Return

1
2
3
4
# Latent Feature DataFrame load D:\Fasion_Images
df = pd.read_csv('D:/Fasion_Images/latent_feature_0818.csv')
print(df.shape)
df.head()
1
(16585, 9)
comp1comp2comp3comp4comp5comp6comp7comp8label
01.8168221.8865511.8071050.8584141.4285252.2953570.7915431.5059690928015_F.jpg
11.9042111.9300441.9724500.7839471.3022662.4076750.8219741.4996560929029_F.jpg
26.4712526.0367805.8946712.8698802.18603510.7270182.5321205.8629571008001_F.jpg
35.1198624.6816635.0191872.0439061.9022027.5715541.7370404.6428451008004_F.jpg
44.1148373.9979703.9956011.6168941.4593645.9314831.3583313.6534681008006_F.jpg
1
2
3
4
# Latent Feature Data & Label Split
data = df.drop('label',axis=1)
label = df['label']
print(data.shape, label.shape)
1
(16585, 8) (16585,)

Cosine Similarity

1
2
# data downcasting
data = data.astype('float32')
1
2
3
4
5
6
# Cosine Similarity Computation
from sklearn.metrics.pairwise import cosine_similarity

cosine_matrix = cosine_similarity(data, data)
print('cosine_matrix.shape :',cosine_matrix.shape)
cosine_matrix[:4,:4]
1
2
3
4
5
6
7
8
9
10
cosine_matrix.shape : (16585, 16585)





array([[1.        , 0.9987278 , 0.97110724, 0.97986597],
       [0.9987278 , 0.99999994, 0.97669864, 0.98550713],
       [0.97110724, 0.97669864, 0.99999994, 0.9974694 ],
       [0.97986597, 0.98550713, 0.9974694 , 1.        ]], dtype=float32)
1
2
3
4
5
# Cosine Similarity Visualization
plt.figure(figsize=(10,8))
plt.title('Latent Feature Cosine Similarity')
sns.heatmap(cosine_matrix[:100, :100], cmap='RdBu')
plt.show()
1
2
3
4
# 기존 이미지 중 단일 샘플 추출
sample_idx = np.random.choice(np.arange(df.shape[0]),1)[0]
sample = data.loc[sample_idx,:].values.reshape(1,-1)
print('sample_idx :',sample_idx)
1
sample_idx : 5390
1
2
3
4
# 기존 이미지 중 단일 샘플에 대한 코사인 유사도 계산식
sample_cosine_sim = cosine_similarity(sample, data)

print('sample_cosine_sim.shape :',sample_cosine_sim.shape)
1
sample_cosine_sim.shape : (1, 16585)
1
2
3
4
5
6
7
8
9
10
# # 새로운 이미지 중 단일 샘플에 대한 코사인 유사도 계산식
# smp = New_image_preprocessing('sample.jpg')
# smp_emb = encoder.predict(np.reshape(smp,(1,64,64,3)))
# smp_comp = AVGpooling(smp_emb)
# print('smp.shape :',smp.shape)
# plt.imshow(smp)

# sample_cosine_sim = cosine_similarity(smp_comp, data)
# print('sample_cosine_sim.shape :',sample_cosine_sim.shape)
# print()
1
2
3
4
# Define Sample Similarity DataFrame
df_cosine = pd.DataFrame(sample_cosine_sim.T, index=df.index, columns=['sample'])
print('df_cosine.shape :',df_cosine.shape)
df_cosine.head(4)
1
df_cosine.shape : (16585, 1)
sample
00.963485
10.972127
20.996787
30.997025
1
2
3
4
5
# Return Top10 Similar Items
top10_idx_cosine = df_cosine['sample'].nlargest(10).index
top10_label_cosine = label[top10_idx_cosine].values
print('top10_idx_cosine :',top10_idx_cosine)
print(top10_label_cosine)
1
2
3
4
top10_idx_cosine : Int64Index([5390, 6126, 5178, 4123, 5665, 7182, 7177, 6353, 7232, 7678], dtype='int64')
['1103045_F.jpg' '1105054_F.jpg' '1102111_F.jpg' '1028064_F.jpg'
 '1103367_F.jpg' '1109245_F.jpg' '1109239_F.jpg' '1105319_F.jpg'
 '1109301_F.jpg' '1110275_F.jpg']
1
2
3
# 기존 이미지 중 단일 샘플 이미지
print(df.loc[sample_idx,'label'])
_ = plt.imshow(img_set[sample_idx])
1
2
# Top10 유사 이미지 시각화
top10_visualize(img_set,top10_idx_cosine)

Euclidean Distance Calculation

1
2
3
4
5
6
# Euclidean Distance Calculation
from sklearn.metrics.pairwise import euclidean_distances

ec_matrix = 1 / euclidean_distances(data, data)
print('ec_matrix.shape :',ec_matrix.shape)
ec_matrix[:4,:4]
1
2
3
4
5
6
7
8
9
10
ec_matrix.shape : (16585, 16585)





array([[       inf, 3.7284596 , 0.08076027, 0.12017515],
       [3.7284596 ,        inf, 0.08180973, 0.12260531],
       [0.08076027, 0.08180973,        inf, 0.24063474],
       [0.12017515, 0.12260531, 0.24063474,        inf]], dtype=float32)
1
2
3
4
5
# Euclidean Distance Visualization
plt.figure(figsize=(10,8))
plt.title('Latent Feature Euclidean Distance')
sns.heatmap(ec_matrix[:100, :100], cmap='RdBu')
plt.show()
1
2
3
4
# 기존 이미지 중 단일 샘플에 대한 유클리디안 거리 계산식
sample_ec_distance = 1 / euclidean_distances(sample, data)

print('sample_ec_distance.shape :',sample_ec_distance.shape)
1
sample_ec_distance.shape : (1, 16585)
1
2
3
4
# Define Sample Similarity DataFrame
df_ec = pd.DataFrame(sample_ec_distance.T, index=df.index, columns=['sample'])
print('df_ec.shape :',df_ec.shape)
df_ec.head(4)
1
df_ec.shape : (16585, 1)
sample
00.155935
10.159182
20.158337
30.394013
1
2
3
4
# Return Top10 Similar Items
top10_idx_ec = df_ec['sample'].nlargest(10).index
print('top10_idx_ec :',top10_idx_ec)
print(label[top10_idx_ec].values)
1
2
3
4
top10_idx_ec : Int64Index([860, 15170, 931, 355, 783, 2415, 1082, 1384, 15172, 9433], dtype='int64')
['1014162_F.jpg' '1208281_F.jpg' '1014240_F.jpg' '1012304_F.jpg'
 '1014080_F.jpg' '1020250_F.jpg' '1015009_F.jpg' '1015412_F.jpg'
 '1208283_F.jpg' '1116157_F.jpg']
1
2
3
# 기존 이미지 중 단일 샘플 이미지
print(df.loc[sample_idx,'label'])
_ = plt.imshow(img_set[sample_idx])
1
2
# Top10 유사 이미지 시각화
top10_visualize(img_set,top10_idx_ec)

Pearson Similarity

1
2
3
4
# Pearson Similarity Computation
pearson_sim = np.corrcoef(data.to_numpy())
print('pearson_sim.shape :',pearson_sim.shape)
pearson_sim[:4,:4]
1
2
3
4
5
6
7
8
9
10
pearson_sim.shape : (16585, 16585)





array([[1.        , 0.99104718, 0.87296293, 0.91160757],
       [0.99104718, 1.        , 0.89450709, 0.93525527],
       [0.87296293, 0.89450709, 1.        , 0.9877586 ],
       [0.91160757, 0.93525527, 0.9877586 , 1.        ]])
1
2
3
4
5
# Pearson Similarity Visualization
plt.figure(figsize=(10,8))
plt.title('Latent Feature Pearson Similarity')
sns.heatmap(pearson_sim[:100, :100], cmap='RdBu')
plt.show()
1
2
3
4
# 기존 이미지 중 단일 샘플에 대한 코사인 유사도 계산식
sample_pearson_sim = np.corrcoef(x=data.to_numpy(),y=sample)

print('sample_pearson_sim.shape :',sample_pearson_sim.shape)
1
sample_pearson_sim.shape : (16586, 16586)
1
2
3
4
# Define Sample Similarity DataFrame
df_pearson = pd.DataFrame(sample_pearson_sim[-1,:-1], index=df.index, columns=['sample'])
print('df_pearson.shape :',df_pearson.shape)
df_pearson.head(4)
1
df_pearson.shape : (16585, 1)
sample
00.854181
10.867406
20.953587
30.958042
1
2
3
4
# Return Top10 Similar Items
top10_idx_pearson = df_pearson['sample'].nlargest(10).index
print('top10_idx_pearson :',top10_idx_pearson)
print(label[top10_idx_pearson].values)
1
2
3
4
top10_idx_pearson : Int64Index([860, 1441, 732, 1727, 1363, 1724, 1487, 14339, 773, 1141], dtype='int64')
['1014162_F.jpg' '1015529_F.jpg' '1014025_F.jpg' '1016276_F.jpg'
 '1015359_F.jpg' '1016273_F.jpg' '1016015_F.jpg' '1203183_F.jpg'
 '1014069_F.jpg' '1015080_F.jpg']
1
2
3
# 기존 이미지 중 단일 샘플 이미지
print(df.loc[sample_idx,'label'])
_ = plt.imshow(img_set[sample_idx])
1
2
# Top10 유사 이미지 시각화
top10_visualize(img_set,top10_idx_pearson)

7. Fashion Coordination Recommendation

Fashion Coordination Dataframe Design

1
2
3
4
# fashion coordination dataframe
fashion_df = pd.read_csv('D:/Fasion_Images/uni_wearing.csv')
print('fashion_df.shape :',fashion_df.shape)
fashion_df.head()
1
fashion_df.shape : (18040, 6)
wearinghatmain_topinner_topbottomshoes
01008_1008_720_A_A001_A001_000.jpg1008013.01008011NaN1008012.0NaN
11030_1030_720_A_A002_232_223_222_A002_000.jpg1029449.01029157NaN1029107.0NaN
21030_1030_720_A_A003_232_220_222_A003_000.jpg1029442.01029411NaN1029109.0NaN
31030_1030_720_B_B002_232_221_223_B002_000.jpg1029434.01029073NaN1029141.0NaN
41030_1030_720_B_B003_232_227_223_B003_000.jpg1029431.01029255NaN1029142.0NaN
1
2
# fashion dataframe Information
fashion_df.info()
1
2
3
4
5
6
7
8
9
10
11
12
13
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18040 entries, 0 to 18039
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   wearing    18040 non-null  object 
 1   hat        32 non-null     float64
 2   main_top   18040 non-null  int64  
 3   inner_top  2838 non-null   float64
 4   bottom     16224 non-null  float64
 5   shoes      125 non-null    float64
dtypes: float64(4), int64(1), object(1)
memory usage: 845.8+ KB
1
2
# fashion dataframe 
print(fashion_df.isna().sum())
1
2
3
4
5
6
7
wearing          0
hat          18008
main_top         0
inner_top    15202
bottom        1816
shoes        17915
dtype: int64
1
2
3
# fashion dataframe Missing Value Imputation
fashion_df.fillna(0,inplace=True)
print('Remain Missing Value :',fashion_df.isna().sum().sum())
1
Remain Missing Value : 0
1
2
3
# fashion data preprocessing
fashion_df.iloc[:,1:] = fashion_df.iloc[:,1:].astype(int)
fashion_df.head()
wearinghatmain_topinner_topbottomshoes
01008_1008_720_A_A001_A001_000.jpg10080131008011010080120
11030_1030_720_A_A002_232_223_222_A002_000.jpg10294491029157010291070
21030_1030_720_A_A003_232_220_222_A003_000.jpg10294421029411010291090
31030_1030_720_B_B002_232_221_223_B002_000.jpg10294341029073010291410
41030_1030_720_B_B003_232_227_223_B003_000.jpg10294311029255010291420

Match the coordination

1
2
# top10 label by cosine similarity
top10_label_cosine
1
2
3
array(['1103045_F.jpg', '1105054_F.jpg', '1102111_F.jpg', '1028064_F.jpg',
       '1103367_F.jpg', '1109245_F.jpg', '1109239_F.jpg', '1105319_F.jpg',
       '1109301_F.jpg', '1110275_F.jpg'], dtype=object)
1
2
3
# top10 label preprocessing
top10_result_cosine = list(map(lambda x : int(x[:7]), top10_label_cosine))
top10_result_cosine
1
2
3
4
5
6
7
8
9
10
[1103045,
 1105054,
 1102111,
 1028064,
 1103367,
 1109245,
 1109239,
 1105319,
 1109301,
 1110275]
1
2
3
4
5
# top10 Recommendation Result
recomm_df = Fashion_coordination(top10_result_cosine, fashion_df)
print('recomm_df :',len(recomm_df),'DataFrame')
for i in range(len(recomm_df)):
  print(f"{i+1} item's coordination cases :",recomm_df[i].shape[0])
1
2
3
4
5
6
7
8
9
10
11
recomm_df : 10 DataFrame
1 item's coordination cases : 1
2 item's coordination cases : 1
3 item's coordination cases : 3
4 item's coordination cases : 2
5 item's coordination cases : 1
6 item's coordination cases : 1
7 item's coordination cases : 1
8 item's coordination cases : 1
9 item's coordination cases : 3
10 item's coordination cases : 1
1
2
3
4
5
# Total Similar Item Coordination DataFrame
recomm_total = pd.concat(recomm_df,axis=0)
recomm_total.reset_index(drop=True,)
print('recomm_total.shape :',recomm_total.shape)
recomm_total.head()
1
recomm_total.shape : (15, 6)
wearinghatmain_topinner_topbottomshoes
67201104_1104_720_A_A078_172_274_017_A078_000.jpg01103045110330311033380
79731106_1106_720_C_C115_298_290_288_C115_000.jpg01105335110505411050310
62971103_1103_720_B_B020_263_260_B020_000.jpg01102111011020550
63601103_1103_720_B_B112_263_078_B112_000.jpg01102111011020230
64561103_1103_720_C_C097_263_263_C097_000.jpg01102111011021160

Recommendation Service Output Result

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
# Recommendation Service Output Result : 2
count = 0
fig = plt.figure()
fig, ax = plt.subplots(2,5,figsize=(5*3,2*3))
plt.suptitle('Fashion Best Fit Recommendatation!!')
for i in range(2):
  for j in range(5):
    axis = ax[i,j]
    axis.get_xaxis().set_visible(False)
    axis.get_yaxis().set_visible(False)
    try:
      fashion_img_name = recomm_total.iloc[count,0]
      img = img_read(model_dir + '/' + fashion_img_name)
      axis.imshow(img)
      plt.axis('off')
    except:
      pass
    count+=1
plt.show()
This post is licensed under CC BY 4.0 by the author.