I'm using an AE to compress an 58 dimensional data into 8 dimension. I have used the same architect of AE with different number of data points. All the data points are independent to each other and selected randomly for training and validation but the test dataset is fixed with 1000 data points.
input_dim = data.shape[1]
encoding_dim = 8
for i in [2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000, 11000, data.shape[0]]:
train_data, val_data = train_test_split(data[:i][:], test_size=0.20, random_state=42)
# Encoder
input_layer = Input(shape=(input_dim,))
encoder_hidden1 = (Dense(64, activation='relu')(input_layer))
encoder_hidden1 = Dropout(0.2)(encoder_hidden1)
encoder_hidden2 = (Dense(32, activation='relu')(encoder_hidden1))
encoder_hidden2 = BatchNormalization()(encoder_hidden2)
encoder_hidden3 = (Dense(16, activation='relu')(encoder_hidden2))
encoder_hidden3 = BatchNormalization()(encoder_hidden3)
encoder_hidden4 = (Dense(8, activation='relu')(encoder_hidden3))
encoded = Dense(encoding_dim, activation='sigmoid', activity_regularizer=regularizers.l1(1e-6))(encoder_hidden4)
# Decoder
decoder_hidden1 = (Dense(8, activation='relu')(encoded))
decoder_hidden1 = BatchNormalization()(decoder_hidden1)
decoder_hidden2 = Dense(16, activation='relu')(decoder_hidden1)
decoder_hidden2 = BatchNormalization()(decoder_hidden2)
decoder_hidden3 = Dense(32, activation='relu')(decoder_hidden2)
decoder_hidden3 = BatchNormalization()(decoder_hidden3)
decoder_hidden4 = (Dense(64, activation='relu')(decoder_hidden3))
decoded = Dense(input_dim, activation='sigmoid')(decoder_hidden4)
autoencoder = Model(input_layer, decoded)
autoencoderpile(optimizer=Nadam(learning_rate=0.001), loss='mse',
metrics=[keras.metrics.RootMeanSquaredError(), 'mae', frechet_distance, ks_distance])
lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=20)
early_stopping = EarlyStopping(monitor='val_loss', patience=100, restore_best_weights=True)
history = autoencoder.fit(train_data, train_data,
epochs=5000, batch_size=64, shuffle=True,
validation_data=(val_data, val_data),
callbacks=[early_stopping, lr_scheduler])
encoder = Model(input_layer, encoded)
X_encoded = encoder.predict(test_data)
decoder_input = Input(shape=(encoding_dim,))
decoder_layer_1 = autoencoder.layers[-8] # Dense(8)
decoder_layer_2 = autoencoder.layers[-7] # BatchNormalization()
decoder_layer_3 = autoencoder.layers[-6] # Dense(16)
decoder_layer_4 = autoencoder.layers[-5] # BatchNormalization()
decoder_layer_5 = autoencoder.layers[-4] # Dense(32)
decoder_layer_6 = autoencoder.layers[-3] # BatchNormalization()
decoder_layer_7 = autoencoder.layers[-2] # Dense(64)
decoder_layer_8 = autoencoder.layers[-1] # Dense(input_dim)
decoder = Model(decoder_input,
decoder_layer_8(decoder_layer_7(decoder_layer_6(
decoder_layer_5(
decoder_layer_4(decoder_layer_3(decoder_layer_2(decoder_layer_1(decoder_input)))))))))
X_reconstructed = decoder.predict(X_encoded)
test_mse_, test_root_mean_squared_error_, test_mae_, test_fre_, test_kl_ = autoencoder.evaluate(test_data,
test_data)
test_mae.append(test_mae_)
test_root_mean_squared_error.append(test_root_mean_squared_error_)
test_mse.append(test_mse_)
test_fre.append(test_fre_)
test_kl.append(test_kl_)
I am attaching the error diagram for the references.