I'm trying to train a CNN model with Keras.
For my data, i have images placed in my drive, when i use google colab, i access them with drive mount. When i use my local jupyter, i access them with my google drive application for desktop.
For getting the images, i'm using 'tf.keras.utils.image_dataset_from_directory' to run a CNN model in my Jupyter Anaconda environment. Here is the code:
base_dir = 'G:/Meu Drive/...'
batch_size_treino_val = 32
batch_size_teste = 1
def create_dataset(directory, batch_size):
dataset = tf.keras.utils.image_dataset_from_directory(
directory,
labels='inferred',
label_mode='binary',
batch_size=batch_size,
image_size=(150, 150),
interpolation='bilinear',
shuffle=True,
class_names=['apneia', 'no_apneia'] # Especificando as classes desejadas
)
return dataset.prefetch(tf.data.AUTOTUNE)
train_ds = []
val_ds = []
test_ds = []
train_samples_count = 0
val_samples_count = 0
test_samples_count = 0
for patient in train_patients:
path = os.path.join(base_dir, patient, 'segmentacao_8_segundos_sobreposicao_5_segundos', 'gaf')
if os.path.exists(path):
print(f"Processando: {path}")
ds = create_dataset(path, batch_size_treino_val)
num_images = sum(1 for _ in ds.unbatch())
print(f"Imagens encontradas: {num_images}")
train_size = int(0.8 * num_images)
val_size = num_images - train_size
train_samples_count += train_size
val_samples_count += val_size
train_ds.append(ds.take(train_size // batch_size_treino_val))
val_ds.append(ds.skip(train_size // batch_size_treino_val))
else:
print(f"Diretório não encontrado: {path}")
But i'm getting the error:
---------------------------------------------------------------------------
NotFoundError Traceback (most recent call last)
Cell In[4], line 32
30 if os.path.exists(path):
31 print(f"Processando: {path}")
---> 32 ds = create_dataset(path, batch_size_treino_val)
33 num_images = sum(1 for _ in ds.unbatch())
34 print(f"Imagens encontradas: {num_images}")
Cell In[4], line 8, in create_dataset(directory, batch_size)
6 def create_dataset(directory, batch_size):
----> 8 dataset = tf.keras.utils.image_dataset_from_directory(
9 directory,
10 labels='inferred',
11 label_mode='binary',
12 batch_size=batch_size,
13 image_size=(150, 150),
14 interpolation='bilinear',
15 shuffle=True,
16 class_names=['apneia', 'no_apneia'] # Especificando as classes desejadas
17 )
18 return dataset.prefetch(tf.data.AUTOTUNE)
File ~\anaconda3\lib\site-packages\keras\utils\image_dataset.py:210, in image_dataset_from_directory(directory, labels, label_mode, class_names, color_mode, batch_size, image_size, shuffle, seed, validation_split, subset, interpolation, follow_links, crop_to_aspect_ratio, **kwargs)
208 if seed is None:
209 seed = np.random.randint(1e6)
--> 210 image_paths, labels, class_names = dataset_utils.index_directory(
211 directory,
212 labels,
213 formats=ALLOWLIST_FORMATS,
214 class_names=class_names,
215 shuffle=shuffle,
216 seed=seed,
217 follow_links=follow_links,
218 )
220 if label_mode == "binary" and len(class_names) != 2:
221 raise ValueError(
222 'When passing `label_mode="binary"`, there must be exactly 2 '
223 f"class_names. Received: class_names={class_names}"
224 )
File ~\anaconda3\lib\site-packages\keras\utils\dataset_utils.py:542, in index_directory(directory, labels, formats, class_names, shuffle, seed, follow_links)
540 else:
541 subdirs = []
--> 542 for subdir in sorted(tf.io.gfile.listdir(directory)):
543 if tf.io.gfile.isdir(tf.io.gfile.join(directory, subdir)):
544 if subdir.endswith("/"):
File ~\anaconda3\lib\site-packages\tensorflow\python\lib\io\file_io.py:768, in list_directory_v2(path)
753 """Returns a list of entries contained within a directory.
754
755 The list is in arbitrary order. It does not contain the special entries "."
(...)
765 errors.NotFoundError if directory doesn't exist
766 """
767 if not is_directory(path):
--> 768 raise errors.NotFoundError(
769 node_def=None,
770 op=None,
771 message="Could not find directory {}".format(path))
773 # Convert each element to string, since the return values of the
774 # vector of string should be interpreted as strings, not bytes.
775 return [
776 compat.as_str_any(filename)
777 for filename in _pywrap_file_io.GetChildren(compat.path_to_bytes(path))
778 ]
NotFoundError: Could not find directory G:/Meu Drive/...
The fun fact is: When i do this exact same code in the Google Colab (replacing base_dir = '/content/drive/MyDrive...
) the code works and the tf.keras.utils.image_dataset_from_directory can find my images for training.
What am i doing wrong here?