python - Dst tensor not initialising, Memory allocatiion

I am having issue with a Dst tensor not initializing, there are similar posts but the solutions there are too outdated and my issue is pretty specific

I am running: python - 3.11, Tensorflow - 2.15, Keras - 2.15, tensorflow_metal - 2.15

I am using version 2.15 to ensure compatibility for my M2 Mac's GPU but I am interested to know if there is any other way to use the most modern version do mention it as this could be an issue

This is the error code I am getting

    Traceback (most recent call last):
      File "training.py", line 149, in <module>
        main()
      File "training.py", line 136, in main
        model, history = train_existing_model(MODEL_PATH, X, y)
                         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
      File "training.py", line 69, in train_existing_model
        X = tf.convert_to_tensor(X, dtype=tf.float32)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
      File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/tensorflow/python/util/traceback_utils.py", line 153, in error_handler
        raise e.with_traceback(filtered_tb) from None
      File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/tensorflow/python/framework/constant_op.py", line 103, in convert_to_eager_tensor
        return ops.EagerTensor(value, ctx.device_name, dtype)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    tensorflow.python.framework.errors_impl.InternalError: Failed copying input tensor from /job:localhost/replica:0/task:0/device:CPU:0 to /job:localhost/replica:0/task:0/device:GPU:0 in order to run _EagerConst: Dst tensor is not initialized.

with the full code producing the error below

    import os
    import tensorflow as tf
    import numpy as np
    import random as rdm
    import time
    from chess import pgn, Board
    from tqdm import tqdm
    from tensorflow.keras.models import load_model
    from tensorflow.keras.utils import to_categorical
    
    os.environ['TF_METAL'] = '1'
    
    
    def load_pgn(file_path):
        with open(file_path, 'r') as pgn_file:
            while True:
                game = pgn.read_game(pgn_file)
                if game is None:
                    break
                yield game
    
    
    def board_to_matrix(board: Board):
        matrix = np.zeros((8, 8, 12))
        piece_map = board.piece_map()
        for square, piece in piece_map.items():
            row, col = divmod(square, 8)
            piece_type = piece.piece_type - 1
            piece_color = 0 if piece.color else 6
            matrix[row, col, piece_type + piece_color] = 1
        return matrix
    
    
    def create_input_for_nn(games):
        X, y = [], []
        chunk_size = 1000
    
        for i in range(0, len(games), chunk_size):
            chunk = games[i:i + chunk_size]
            for game in chunk:
                board = game.board()
                for move in game.mainline_moves():
                    X.append(board_to_matrix(board))
                    y.append(move.uci())
                    board.push(move)
    
        return np.array(X, dtype=np.float32), np.array(y)
    
    
    def encode_moves(moves):
        move_to_int = {move: idx for idx, move in enumerate(set(moves))}
        return [move_to_int[move] for move in moves], move_to_int
    
    
    def data_generator(X, y, batch_size):
        dataset_size = len(X)
        while True:
            for i in range(0, dataset_size, batch_size):
                end = min(i + batch_size, dataset_size)
                yield X[i:end], y[i:end]
    
    
    def train_existing_model(model_path, X, y, batch_size=32, epochs=25):
        print(f"\nLoading existing model from {model_path}")
        model = load_model(model_path)
    
        # Convert to TensorFlow tensors
        X = np.array(X, dtype=np.float32)
        X = tf.convert_to_tensor(X, dtype=tf.float32)
    
        y = np.array(y, dtype=np.int32)
        y = tf.convert_to_tensor(y, dtype=tf.int32)
    
        # Create data generator
        train_gen = data_generator(X, y, batch_size)
    
        # Use legacy optimizer for M1/M2
        modelpile(
            optimizer=tf.keras.optimizers.legacy.Adam(),
            loss=model.loss,
            metrics=model.metrics
        )
    
        print("\nContinuing training...")
        history = model.fit(
            train_gen,
            validation_split=0.1,
            epochs=epochs,
            verbose=1
        )
    
        print("\nSaving updated model...")
        model.save(model_path)
        return model, history
    
    
    def main():
        start_time = time.time()
    
        # Configuration
        MODEL_PATH = '../assets/chessModel.keras'
        FILE_PATH = '../assets/ChessData'
        LIMIT_OF_FILES = 3
        GAMES_LIMIT = 50000
    
        physical_devices = tf.config.list_physical_devices('GPU')
        if physical_devices:
            for device in physical_devices:
                tf.config.experimental.set_memory_growth(device, False)
            tf.config.set_visible_devices(physical_devices[0], 'GPU')
        # Load games
        print("\nLoading games...")
        files = [file for file in os.listdir(FILE_PATH) if file.endswith('.pgn')]
        games = []
    
        for i, file in enumerate(tqdm(files)):
            games.extend(load_pgn(f"{FILE_PATH}/{file}"))
            if i >= LIMIT_OF_FILES - 1:
                break
    
        print(f"\nTotal games loaded: {len(games)}")
        LIMITED_GAMES = games[:GAMES_LIMIT]
    
        # Prepare training data
        print("\nPreparing training data...")
        X, y = create_input_for_nn(LIMITED_GAMES)
        y, move_to_int = encode_moves(y)
        y = to_categorical(y, num_classes=len(move_to_int))
        X = np.array(X)
    
        print(f"\nTraining data shapes:")
        print(f"X shape: {X.shape}")
        print(f"y shape: {y.shape}")
    
        # Train model
        model, history = train_existing_model(MODEL_PATH, X, y)
    
        # Create move mapping for predictions
        int_to_move = dict(zip(move_to_int.values(), move_to_int.keys()))
    
        # Print training time
        end_time = time.time()
        print(f"\nTotal training time: {(end_time - start_time) / 60:.2f} minutes")
    
        return model, history, int_to_move
    
    
    if __name__ == "__main__":
        main()

I do not know what else to try. I have set up data pipelines, int32 and float32 arrays as seen above but nothing is working.

科技改变生活-雨落星辰 - 所有的伟大,都源于一个勇敢的开始

python - Dst tensor not initialising, Memory allocatiion - Stack Overflow

与本文相关的文章

评论列表(0)