python - Always getting ' Your input ran out of data'

I am trying to train a CNN model with an image dataset for medical image segmentation:

  history = model.fit(x = train_dataset,
                        validation_data = val_dataset,
                        epochs= epochs,
                        steps_per_epoch= steps_per_epoch ,
                        callbacks = [iou_monitor,es,mc]
                        )

Such that the epochs =1 and batch_size = 4. The train_dataset consists of x, (y1,y2) where x is the input CT image ,and y1 and y2 are the corresponding segmentation masks ( multi-task model). Now, when I check train_dataset from the file system manually I found that the total number of samples are 948. Also, when I run the following code:

# Initialize sample counter
total_samples = 0

# Iterate over dataset to count samples
for batch in train_dataset:
    x_batch, (y1_batch, y2_batch) = batch  # Unpack batch
    batch_size = tf.shape(x_batch)[0].numpy()  # Get batch size from X
    total_samples += batch_size  # Accumulate count
print(f"Total samples in dataset: {total_samples}")

I get:

Total samples in dataset: 948

Which confirms my finding. This means that the steps_per_epoch is num_of_samples_in_train_dataset / batch_size = 948/4 = 237. Now, when I trian the model I get:

236/237 ━━━━━━━━━━━━━━━━━━━━ 0s 388ms/step - loss: 0.6321 - mask_1_output_loss: 0.1476 - mask_2_output_loss: 0.4845
/usr/lib/python3.10/contextlib.py:153: UserWarning: Your input ran out of data; interrupting training. Make sure that your dataset or generator can generate at least `steps_per_epoch * epochs` batches. You may need to use the `.repeat()` function when building your dataset.
  self.gen.throw(typ, value, traceback)

My data pipeline is defined as follows:

# Function to read the input image
def read_image(image_path):
    try:
        image_path = image_path.decode()
        x = plt.imread(image_path)
        x = x.astype(np.float32)
        # Normalize x 
        x = x / 255.0
        if x.shape != (512, 512, 3):
            raise ValueError(f"Invalid image dimensions for x: {x.shape}, expected (512, 512, 3)")
        #resize the image to 256,256,3
        x = cv2.resize(x, None, fx = img_size / original_size, fy = img_size / original_size, interpolation=cv2.INTER_AREA) .astype('float32')
        # Add channel dimension as the first dimension
        #x = np.transpose(x, (2, 0, 1))
        x = tf.convert_to_tensor(x, dtype=tf.float32)
        return [x]
    except Exception as e:
        print(f"Error reading image: {image_path}, Error: {e}")
        return None

# Function to read the mask image
def read_mask(mask_path):
    try:
        # The mask is in the npy format
        mask_path= mask_path.decode()
        y = np.load(mask_path)
        y = y.astype(np.float32)
        if y.shape != (512, 512, 2):
            raise ValueError(f"Invalid original image dimensions: {y.shape}, expected (512, 512, 2)")
                # Check if the mask is binary

        # resize the image to 256,256,2
        y = cv2.resize(y, None, fx = img_size / original_size, fy = img_size / original_size, interpolation=cv2.INTER_NEAREST).astype('float32')
        #print("Shape after resize : ",y.shape)
        # Add channel dimension as the first dimension
        y = np.transpose(y, (2, 0, 1))
      
       
        
        y1,y2 = y[0,:,:] , y[1,:,:]
        

        y1 = np.expand_dims(y1, axis=-1)  # Converts (img_size, img_size) to ( img_size, img_size,1)
        y2 = np.expand_dims(y2, axis=-1)

        y1 = tf.convert_to_tensor(y1, dtype=tf.float32)
        y2 = tf.convert_to_tensor(y2, dtype=tf.float32)    
        return [y1,y2]
    except Exception as e:
        print(f"Error reading mask: {mask_path}, Error: {e}")
        return None

# Data Augmentation 
def augment_image(image,mask1,mask2):
        
    # Flip and rotate the image and mask 50% of the time
    if tf.random.uniform(()) > 0.5:
        image = tf.image.flip_left_right(image)
        mask1 = tf.image.flip_left_right(mask1)
        mask2 = tf.image.flip_left_right(mask2)
        #image_channels_last_shape_2 = image_channels_last.shape
        #mask_channels_last_shape_2 = mask_channels_last.shape

    if tf.random.uniform(()) > 0.5:
    
        image = tf.image.flip_up_down(image)
        mask1 = tf.image.flip_up_down(mask1)
        mask2 = tf.image.flip_up_down(mask2)
        #image_channels_last_shape_3 = image_channels_last.shape
        #mask_channels_last_shape_3 = mask_channels_last.shape

    if tf.random.uniform(()) > 0.5:
    
        # Randomly select how many 90-degree rotations to apply (0, 1, 2, or 3)
        rotation_count = tf.random.uniform((), minval=0, maxval=4, dtype=tf.int32)

        # Apply the same rotation to the image and masks
        image = tf.image.rot90(image, k=rotation_count)
        mask1 = tf.image.rot90(mask1, k=rotation_count)
        mask2 = tf.image.rot90(mask2, k=rotation_count)

    
 
    return image, mask1, mask2
    
# Preprocessing function
def preprocess(x, y ,data_augmentation = False):
   
        #x = x.decode()
        #y = y.decode()
        output = tf.numpy_function(read_image, [x], [ tf.float32]) 
        x= output [0]
        output = tf.numpy_function(read_mask, [y], [ tf.float32, tf.float32]) 
        y1, y2  = output
        # convert the input image to grayscale
        #x=tf.image.rgb_to_grayscale(x)
        # Standardize each channel of the input image
        #x= standardize_image(x)
        
        # Apply data augmentation
        if data_augmentation:
 
            x,y1,y2 = augment_image(x,y1,y2)

        # Convert  to channels_first format 
        x = tf.transpose(x, perm=[2, 0, 1])  # [H, W, C] -> [C, H, W]
        y1 = tf.transpose(y1, perm=[2, 0, 1])  # [H, W, C] -> [C, H, W]
        y2 = tf.transpose(y2, perm=[2, 0, 1])  # [H, W, C] -> [C, H, W]
        
        
        x.set_shape((3,img_size, img_size))
        y1.set_shape((1,img_size, img_size))
        y2.set_shape((1,img_size, img_size))
       

        return x, (y1,y2)



# Create the TensorFlow dataset
def tf_dataset(x, y, batch_size=32, epochs=30, data_augmentation = False):
    dataset = tf.data.Dataset.from_tensor_slices((x, y))
    dataset = dataset.shuffle(buffer_size=100)
    dataset = dataset.map(lambda a, b: preprocess(a, b, data_augmentation), num_parallel_calls=tf.data.experimental.AUTOTUNE)
    dataset = dataset.repeat(epochs).batch(batch_size)
    dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
    return dataset

Also, I tried to turn off the data augmentation, but still the I get the same warning. I don't know what to do now.

I am trying to train a CNN model with an image dataset for medical image segmentation:

  history = model.fit(x = train_dataset,
                        validation_data = val_dataset,
                        epochs= epochs,
                        steps_per_epoch= steps_per_epoch ,
                        callbacks = [iou_monitor,es,mc]
                        )

# Initialize sample counter
total_samples = 0

# Iterate over dataset to count samples
for batch in train_dataset:
    x_batch, (y1_batch, y2_batch) = batch  # Unpack batch
    batch_size = tf.shape(x_batch)[0].numpy()  # Get batch size from X
    total_samples += batch_size  # Accumulate count
print(f"Total samples in dataset: {total_samples}")

I get:

Total samples in dataset: 948

Which confirms my finding. This means that the steps_per_epoch is num_of_samples_in_train_dataset / batch_size = 948/4 = 237. Now, when I trian the model I get:

236/237 ━━━━━━━━━━━━━━━━━━━━ 0s 388ms/step - loss: 0.6321 - mask_1_output_loss: 0.1476 - mask_2_output_loss: 0.4845
/usr/lib/python3.10/contextlib.py:153: UserWarning: Your input ran out of data; interrupting training. Make sure that your dataset or generator can generate at least `steps_per_epoch * epochs` batches. You may need to use the `.repeat()` function when building your dataset.
  self.gen.throw(typ, value, traceback)

My data pipeline is defined as follows:

# Function to read the input image
def read_image(image_path):
    try:
        image_path = image_path.decode()
        x = plt.imread(image_path)
        x = x.astype(np.float32)
        # Normalize x 
        x = x / 255.0
        if x.shape != (512, 512, 3):
            raise ValueError(f"Invalid image dimensions for x: {x.shape}, expected (512, 512, 3)")
        #resize the image to 256,256,3
        x = cv2.resize(x, None, fx = img_size / original_size, fy = img_size / original_size, interpolation=cv2.INTER_AREA) .astype('float32')
        # Add channel dimension as the first dimension
        #x = np.transpose(x, (2, 0, 1))
        x = tf.convert_to_tensor(x, dtype=tf.float32)
        return [x]
    except Exception as e:
        print(f"Error reading image: {image_path}, Error: {e}")
        return None

# Function to read the mask image
def read_mask(mask_path):
    try:
        # The mask is in the npy format
        mask_path= mask_path.decode()
        y = np.load(mask_path)
        y = y.astype(np.float32)
        if y.shape != (512, 512, 2):
            raise ValueError(f"Invalid original image dimensions: {y.shape}, expected (512, 512, 2)")
                # Check if the mask is binary

        # resize the image to 256,256,2
        y = cv2.resize(y, None, fx = img_size / original_size, fy = img_size / original_size, interpolation=cv2.INTER_NEAREST).astype('float32')
        #print("Shape after resize : ",y.shape)
        # Add channel dimension as the first dimension
        y = np.transpose(y, (2, 0, 1))
      
       
        
        y1,y2 = y[0,:,:] , y[1,:,:]
        

        y1 = np.expand_dims(y1, axis=-1)  # Converts (img_size, img_size) to ( img_size, img_size,1)
        y2 = np.expand_dims(y2, axis=-1)

        y1 = tf.convert_to_tensor(y1, dtype=tf.float32)
        y2 = tf.convert_to_tensor(y2, dtype=tf.float32)    
        return [y1,y2]
    except Exception as e:
        print(f"Error reading mask: {mask_path}, Error: {e}")
        return None

# Data Augmentation 
def augment_image(image,mask1,mask2):
        
    # Flip and rotate the image and mask 50% of the time
    if tf.random.uniform(()) > 0.5:
        image = tf.image.flip_left_right(image)
        mask1 = tf.image.flip_left_right(mask1)
        mask2 = tf.image.flip_left_right(mask2)
        #image_channels_last_shape_2 = image_channels_last.shape
        #mask_channels_last_shape_2 = mask_channels_last.shape

    if tf.random.uniform(()) > 0.5:
    
        image = tf.image.flip_up_down(image)
        mask1 = tf.image.flip_up_down(mask1)
        mask2 = tf.image.flip_up_down(mask2)
        #image_channels_last_shape_3 = image_channels_last.shape
        #mask_channels_last_shape_3 = mask_channels_last.shape

    if tf.random.uniform(()) > 0.5:
    
        # Randomly select how many 90-degree rotations to apply (0, 1, 2, or 3)
        rotation_count = tf.random.uniform((), minval=0, maxval=4, dtype=tf.int32)

        # Apply the same rotation to the image and masks
        image = tf.image.rot90(image, k=rotation_count)
        mask1 = tf.image.rot90(mask1, k=rotation_count)
        mask2 = tf.image.rot90(mask2, k=rotation_count)

    
 
    return image, mask1, mask2
    
# Preprocessing function
def preprocess(x, y ,data_augmentation = False):
   
        #x = x.decode()
        #y = y.decode()
        output = tf.numpy_function(read_image, [x], [ tf.float32]) 
        x= output [0]
        output = tf.numpy_function(read_mask, [y], [ tf.float32, tf.float32]) 
        y1, y2  = output
        # convert the input image to grayscale
        #x=tf.image.rgb_to_grayscale(x)
        # Standardize each channel of the input image
        #x= standardize_image(x)
        
        # Apply data augmentation
        if data_augmentation:
 
            x,y1,y2 = augment_image(x,y1,y2)

        # Convert  to channels_first format 
        x = tf.transpose(x, perm=[2, 0, 1])  # [H, W, C] -> [C, H, W]
        y1 = tf.transpose(y1, perm=[2, 0, 1])  # [H, W, C] -> [C, H, W]
        y2 = tf.transpose(y2, perm=[2, 0, 1])  # [H, W, C] -> [C, H, W]
        
        
        x.set_shape((3,img_size, img_size))
        y1.set_shape((1,img_size, img_size))
        y2.set_shape((1,img_size, img_size))
       

        return x, (y1,y2)



# Create the TensorFlow dataset
def tf_dataset(x, y, batch_size=32, epochs=30, data_augmentation = False):
    dataset = tf.data.Dataset.from_tensor_slices((x, y))
    dataset = dataset.shuffle(buffer_size=100)
    dataset = dataset.map(lambda a, b: preprocess(a, b, data_augmentation), num_parallel_calls=tf.data.experimental.AUTOTUNE)
    dataset = dataset.repeat(epochs).batch(batch_size)
    dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
    return dataset

Also, I tried to turn off the data augmentation, but still the I get the same warning. I don't know what to do now.

Share Improve this question edited Mar 5 at 15:17 Christoph Rackwitz 15.9k5 gold badges39 silver badges51 bronze badges asked Jan 29 at 13:15 AAA_11 736 bronze badges

Add a comment |

1 Answer 1

Sorted by: Reset to default 0

I think your dataset is running out cause you're batching before repeating. Try replacing dataset = dataset.repeat(epochs).batch(batch_size), it should not be the other way around. Try this, and do you have a Git repo for this

科技改变生活-雨落星辰 - 所有的伟大,都源于一个勇敢的开始

python - Always getting ' Your input ran out of data' - Stack Overflow

1 Answer 1

与本文相关的文章

评论列表(0)