I'm trying to train an autoencoder with unsupervised images. I have about 300 train images and 100 validation images. But when I inputted an unseen image to the trained autoencoder, it is giving complete blank output.
train_images = os.listdir('./Data/train')
val_images = os.listdir('./Data/val')
X_train = []
X_val = []
for i in range(len(train_images)):
img = cv2.imread('./Data/train/'+train_images[i])
img = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
resized = cv2.resize(img, (224,224), interpolation = cv2.INTER_AREA)
X_train.append(resized)
X_train = np.asarray(X_train)
X_train = X_train.astype('float32')/255.
X_train = np.reshape(X_train, (len(X_train), 224, 224, 1))
for i in range(len(val_images)):
img = cv2.imread('./Data/val/'+val_images[i])
img = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
resized = cv2.resize(img, (224,224), interpolation = cv2.INTER_AREA)
X_val.append(resized)
X_val = np.asarray(X_val)
X_val = X_val.astype('float32')/255.
X_val = np.reshape(X_val, (len(X_val), 224, 224, 1))
print(len(X_train))
print(len(X_val))
Here X_train.shape
and X_val.shape
respectively are (300,224,224,1)
and (100, 224, 224, 1)
here is my upconv_concat
function
def upconv_concat(bottom_a, bottom_b, n_filter, pool_size, stride, padding='VALID'):
up_conv = Conv2DTranspose(filters=n_filter, kernel_size=[pool_size, pool_size],
strides=stride, padding=padding)(bottom_a)
return Concatenate(axis=-1)([up_conv, bottom_b])
here are some parameters
input_img = Input(shape=(224, 224, 1))
droprate=0.25
num_classes = 1
and here is my model
conv_1_1 = Conv2D(filters = 64, kernel_size = 3, activation='relu', padding='same')(input_img)
conv_1_1_bn = BatchNormalization()(conv_1_1)
conv_1_1_do = Dropout(droprate)(conv_1_1_bn)
pool_1 = MaxPooling2D(pool_size= 2, strides = 2)(conv_1_1_do)
conv_4_1 = SeparableConv2D(filters = 512, kernel_size = 3, activation='relu', padding='same')(pool_1)
conv_4_1_bn = BatchNormalization()(conv_4_1)
conv_4_1_do = Dropout(droprate)(conv_4_1_bn)
pool_4 = MaxPooling2D(pool_size= 2, strides = 2)(conv_4_1_do)
conv_5_1 = SeparableConv2D(filters = 1024, kernel_size = 3, activation='relu', padding='same')(pool_4)
conv_5_1_bn = BatchNormalization()(conv_5_1)
conv_5_1_do = Dropout(droprate)(conv_5_1_bn)
upconv_1 = upconv_concat(conv_5_1_do, conv_4_1_do, n_filter=512, pool_size=2, stride=2)
conv_6_1 = SeparableConv2D(filters = 512, kernel_size = 3, activation='relu', padding='same')(upconv_1)
conv_6_1_bn = BatchNormalization()(conv_6_1)
conv_6_1_do = Dropout(droprate)(conv_6_1_bn)
upconv_2 = upconv_concat(conv_6_1_do, conv_1_1_do, n_filter=64, pool_size=2, stride=2)
conv_9_1 = SeparableConv2D(filters = 64, kernel_size = 3, activation='relu', padding='same')(upconv_2)
conv_9_1_bn = BatchNormalization()(conv_9_1)
conv_9_1_do = Dropout(droprate)(conv_9_1_bn)
ae_output = Conv2D(num_classes, kernel_size=1, strides = (1,1), activation="softmax")(conv_9_1_do)
here is the training part
ae_model = Model(input_img, ae_output)
ae_model.compile(optimizer='adadelta', loss='binary_crossentropy')
ae_model.fit(X_train, X_train,
epochs=5,
batch_size=16,
shuffle=True,
validation_data=(X_val, X_val))
if anyone needs the model summary
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_1 (InputLayer) (None, 224, 224, 1) 0
__________________________________________________________________________________________________
conv2d_1 (Conv2D) (None, 224, 224, 64) 640 input_1[0][0]
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 224, 224, 64) 256 conv2d_1[0][0]
__________________________________________________________________________________________________
dropout_1 (Dropout) (None, 224, 224, 64) 0 batch_normalization_1[0][0]
__________________________________________________________________________________________________
max_pooling2d_1 (MaxPooling2D) (None, 112, 112, 64) 0 dropout_1[0][0]
__________________________________________________________________________________________________
separable_conv2d_1 (SeparableCo (None, 112, 112, 512 33856 max_pooling2d_1[0][0]
__________________________________________________________________________________________________
batch_normalization_2 (BatchNor (None, 112, 112, 512 2048 separable_conv2d_1[0][0]
__________________________________________________________________________________________________
dropout_2 (Dropout) (None, 112, 112, 512 0 batch_normalization_2[0][0]
__________________________________________________________________________________________________
max_pooling2d_2 (MaxPooling2D) (None, 56, 56, 512) 0 dropout_2[0][0]
__________________________________________________________________________________________________
separable_conv2d_2 (SeparableCo (None, 56, 56, 1024) 529920 max_pooling2d_2[0][0]
__________________________________________________________________________________________________
batch_normalization_3 (BatchNor (None, 56, 56, 1024) 4096 separable_conv2d_2[0][0]
__________________________________________________________________________________________________
dropout_3 (Dropout) (None, 56, 56, 1024) 0 batch_normalization_3[0][0]
__________________________________________________________________________________________________
conv2d_transpose_1 (Conv2DTrans (None, 112, 112, 512 2097664 dropout_3[0][0]
__________________________________________________________________________________________________
concatenate_1 (Concatenate) (None, 112, 112, 102 0 conv2d_transpose_1[0][0]
dropout_2[0][0]
__________________________________________________________________________________________________
separable_conv2d_3 (SeparableCo (None, 112, 112, 512 534016 concatenate_1[0][0]
__________________________________________________________________________________________________
batch_normalization_4 (BatchNor (None, 112, 112, 512 2048 separable_conv2d_3[0][0]
__________________________________________________________________________________________________
dropout_4 (Dropout) (None, 112, 112, 512 0 batch_normalization_4[0][0]
__________________________________________________________________________________________________
conv2d_transpose_2 (Conv2DTrans (None, 224, 224, 64) 131136 dropout_4[0][0]
__________________________________________________________________________________________________
concatenate_2 (Concatenate) (None, 224, 224, 128 0 conv2d_transpose_2[0][0]
dropout_1[0][0]
__________________________________________________________________________________________________
separable_conv2d_4 (SeparableCo (None, 224, 224, 64) 9408 concatenate_2[0][0]
__________________________________________________________________________________________________
batch_normalization_5 (BatchNor (None, 224, 224, 64) 256 separable_conv2d_4[0][0]
__________________________________________________________________________________________________
dropout_5 (Dropout) (None, 224, 224, 64) 0 batch_normalization_5[0][0]
__________________________________________________________________________________________________
conv2d_2 (Conv2D) (None, 224, 224, 1) 65 dropout_5[0][0]
==================================================================================================
Total params: 3,345,409
Trainable params: 3,341,057
Non-trainable params: 4,352
__________________________________________________________________________________________________
I have absolutely checked the X_train images to see if I am sending blank images by mistake. But no. I'm sending proper data only.
The problem is
when I tried to test the model, it is giving a blank image.
img = cv2.imread('./test/a184.jpg')
img = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
resized = cv2.resize(img, (224,224), interpolation = cv2.INTER_AREA)
resized = resized/255
resized = resized[:, :, np.newaxis]
resized = resized[np.newaxis, :, :]
now resized
is of shape (1,224,224,1)
image = ae_model.predict(resized)
image = image.reshape((224,224))
plt.imshow(image, cmap= 'gray')
gives me this image:
but the values in
image
variable are all 1s.
I'm using tf.Keras.
Please help me with this. Unable to locate where the problem is and how to debug to find it.