最新消息:雨落星辰是一个专注网站SEO优化、网站SEO诊断、搜索引擎研究、网络营销推广、网站策划运营及站长类的自媒体原创博客

python - I can't train a model from a HuggingFace tutorial for "Translation" - Stack Overflow

programmeradmin2浏览0评论

im new in this kind of proyects and i want to try a tutorial for translation from HugginFace (Here is the link :text). I'm using my own custom dataset that contains two columns , one for spanish(español) and the other from Mapudungun. I think that i adjust the dataset for try this tutorial, but when i try to train the model this error appears:

> Epoch 1/3
62/62 [==============================] - ETA: 0s - loss: 4.2643

---------------------------------------------------------------------------

ValueError                                Traceback (most recent call last)

<ipython-input-36-aa371efdfe52> in <cell line: 0>()
----> 1 model.fit(x=tf_train_set, validation_data=tf_test_set, epochs=3, callbacks=callbacks)

11 frames

/usr/local/lib/python3.11/dist-packages/transformers/models/t5/modeling_tf_t5.py in else_body()
     54                             nonlocal input_shape
     55                             err_msg_prefix = ag__.if_exp(ag__.ld(self).is_decoder, lambda: 'decoder_', lambda: '', 'self.is_decoder')
---> 56                             raise ag__.converted_call(ag__.ld(ValueError), (f'You have to specify either {ag__.ld(err_msg_prefix)}input_ids or {ag__.ld(err_msg_prefix)}inputs_embeds',), None, fscope)
     57                         err_msg_prefix = ag__.Undefined('err_msg_prefix')
     58                         input_shape = ag__.Undefined('input_shape')

ValueError: in user code:

    File "/usr/local/lib/python3.11/dist-packages/tf_keras/src/engine/training.py", line 2436, in predict_function  *
        return step_function(self, iterator)
    File "/usr/local/lib/python3.11/dist-packages/tf_keras/src/engine/training.py", line 2421, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/usr/local/lib/python3.11/dist-packages/tf_keras/src/engine/training.py", line 2409, in run_step  **
        outputs = model.predict_step(data)
    File "/usr/local/lib/python3.11/dist-packages/tf_keras/src/engine/training.py", line 2377, in predict_step
        return self(x, training=False)
    File "/usr/local/lib/python3.11/dist-packages/tf_keras/src/utils/traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/tmp/__autograph_generated_file0_hgym1q.py", line 40, in tf__run_call_with_unpacked_inputs
        raise
    File "/tmp/__autograph_generated_filedk4yrcz4.py", line 91, in tf__call
        decoder_outputs = ag__.converted_call(ag__.ld(self).decoder, (ag__.ld(decoder_input_ids),), dict(attention_mask=ag__.ld(decoder_attention_mask), encoder_hidden_states=ag__.ld(hidden_states), encoder_attention_mask=ag__.ld(attention_mask), inputs_embeds=ag__.ld(decoder_inputs_embeds), head_mask=ag__.ld(decoder_head_mask), past_key_values=ag__.ld(past_key_values), use_cache=ag__.ld(use_cache), output_attentions=ag__.ld(output_attentions), output_hidden_states=ag__.ld(output_hidden_states), return_dict=ag__.ld(return_dict), training=ag__.ld(training)), fscope)
    File "/tmp/__autograph_generated_file0_hgym1q.py", line 40, in tf__run_call_with_unpacked_inputs
        raise
    File "/tmp/__autograph_generated_filezh8cqxq8.py", line 65, in tf__call
        ag__.if_stmt(ag__.and_(lambda: ag__.ld(input_ids) is not None, lambda: ag__.ld(inputs_embeds) is not None), if_body_2, else_body_2, get_state_2, set_state_2, ('input_ids', 'input_shape'), 2)
    File "/tmp/__autograph_generated_filezh8cqxq8.py", line 62, in else_body_2
        ag__.if_stmt(ag__.ld(input_ids) is not None, if_body_1, else_body_1, get_state_1, set_state_1, ('input_ids', 'input_shape'), 2)
    File "/tmp/__autograph_generated_filezh8cqxq8.py", line 59, in else_body_1
        ag__.if_stmt(ag__.ld(inputs_embeds) is not None, if_body, else_body, get_state, set_state, ('input_shape',), 1)
    File "/tmp/__autograph_generated_filezh8cqxq8.py", line 56, in else_body
        raise ag__.converted_call(ag__.ld(ValueError), (f'You have to specify either {ag__.ld(err_msg_prefix)}input_ids or {ag__.ld(err_msg_prefix)}inputs_embeds',), None, fscope)

    ValueError: Exception encountered when calling layer 'tft5_for_conditional_generation' (type TFT5ForConditionalGeneration).
    
    in user code:
    
        File "/usr/local/lib/python3.11/dist-packages/transformers/modeling_tf_utils.py", line 1395, in run_call_with_unpacked_inputs  *
            return func(self, **unpacked_inputs)
        File "/usr/local/lib/python3.11/dist-packages/transformers/models/t5/modeling_tf_t5.py", line 1455, in call  *
            decoder_outputs = self.decoder(
        File "/usr/local/lib/python3.11/dist-packages/tf_keras/src/utils/traceback_utils.py", line 70, in error_handler  **
            raise e.with_traceback(filtered_tb) from None
        File "/tmp/__autograph_generated_file0_hgym1q.py", line 40, in tf__run_call_with_unpacked_inputs
            raise
        File "/tmp/__autograph_generated_filezh8cqxq8.py", line 65, in tf__call
            ag__.if_stmt(ag__.and_(lambda: ag__.ld(input_ids) is not None, lambda: ag__.ld(inputs_embeds) is not None), if_body_2, else_body_2, get_state_2, set_state_2, ('input_ids', 'input_shape'), 2)
        File "/tmp/__autograph_generated_filezh8cqxq8.py", line 62, in else_body_2
            ag__.if_stmt(ag__.ld(input_ids) is not None, if_body_1, else_body_1, get_state_1, set_state_1, ('input_ids', 'input_shape'), 2)
        File "/tmp/__autograph_generated_filezh8cqxq8.py", line 59, in else_body_1
            ag__.if_stmt(ag__.ld(inputs_embeds) is not None, if_body, else_body, get_state, set_state, ('input_shape',), 1)
        File "/tmp/__autograph_generated_filezh8cqxq8.py", line 56, in else_body
            raise ag__.converted_call(ag__.ld(ValueError), (f'You have to specify either {ag__.ld(err_msg_prefix)}input_ids or {ag__.ld(err_msg_prefix)}inputs_embeds',), None, fscope)
    
        ValueError: Exception encountered when calling layer 'decoder' (type TFT5MainLayer).
        
        in user code:
        
            File "/usr/local/lib/python3.11/dist-packages/transformers/modeling_tf_utils.py", line 1395, in run_call_with_unpacked_inputs  *
                return func(self, **unpacked_inputs)
            File "/usr/local/lib/python3.11/dist-packages/transformers/models/t5/modeling_tf_t5.py", line 754, in call  *
                raise ValueError(f"You have to specify either {err_msg_prefix}input_ids or {err_msg_prefix}inputs_embeds")
        
            ValueError: You have to specify either decoder_input_ids or decoder_inputs_embeds
        
        
        Call arguments received by layer 'decoder' (type TFT5MainLayer):
          • input_ids=None
          • attention_mask=None
          • encoder_hidden_states=tf.Tensor(shape=(16, 36, 512), dtype=float32)
          • encoder_attention_mask=tf.Tensor(shape=(16, 36), dtype=int32)
          • inputs_embeds=None
          • head_mask=None
          • encoder_head_mask=None
          • past_key_values=None
          • use_cache=True
          • output_attentions=False
          • output_hidden_states=False
          • return_dict=True
          • training=False
    
    
    Call arguments received by layer 'tft5_for_conditional_generation' (type TFT5ForConditionalGeneration):
      • input_ids={'input_ids': 'tf.Tensor(shape=(16, 36), dtype=int64)', 'attention_mask': 'tf.Tensor(shape=(16, 36), dtype=int64)'}
      • attention_mask=None
      • decoder_input_ids=None
      • decoder_attention_mask=None
      • head_mask=None
      • decoder_head_mask=None
      • encoder_outputs=None
      • past_key_values=None
      • inputs_embeds=None
      • decoder_inputs_embeds=None
      • labels=None
      • use_cache=None
      • output_attentions=None
      • output_hidden_states=None
      • return_dict=None
      • training=False

My full code is this

from huggingface_hub import notebook_login

notebook_login()


import pandas as pd
from datasets import Dataset

# Cargar el CSV
csv_path = "es_mapu_limpio.csv"  # Ruta de tu archivo CSV
df = pd.read_csv(csv_path)

# Asegúrate de que las columnas se llaman "español" y "mapudungun"
# Si tienen otros nombres, ajusta aquí
df = df.rename(columns={"español": "es", "mapudungun": "map"})

# Transformar el DataFrame a la estructura deseada
structured_data = [
    {
        "id": str(index),  # Generar un ID único para cada entrada
        "translation": {
            "es": row["es"],  # Traducción en español
            "map": row["map"],  # Traducción en mapudungun
        },
    }
    for index, row in df.iterrows()
]

# Crear el dataset de Hugging Face
books = Dataset.from_list(structured_data)

# Dividir en train y test
books = books.train_test_split(test_size=0.2)

# Resultado
print(books["train"][0])


from transformers import AutoTokenizer

checkpoint = "google-t5/t5-small"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)


source_lang = "es"  # Spanish column name
target_lang = "map" # Mapudungun column name
prefix = "traducir español a mapudungun: " 

def preprocess_function(examples):

    inputs = [prefix + example[source_lang] for example in examples["translation"]]

    targets = [example[target_lang] for example in examples["translation"]]

    model_inputs = tokenizer(inputs, text_target=targets, max_length=128, truncation=True)

    return model_inputs


tokenized_books = books.map(preprocess_function, batched=True)


from transformers import DataCollatorForSeq2Seq

data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=checkpoint, return_tensors="tf")


import evaluate

metric = evaluate.load("sacrebleu")



import numpy as np


def postprocess_text(preds, labels):
    preds = [pred.strip() for pred in preds]
    labels = [[label.strip()] for label in labels]

    return preds, labels


def compute_metrics(eval_preds):
    preds, labels = eval_preds
    if isinstance(preds, tuple):
        preds = preds[0]
    decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)

    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    decoded_preds, decoded_labels = postprocess_text(decoded_preds, decoded_labels)

    result = metricpute(predictions=decoded_preds, references=decoded_labels)
    result = {"bleu": result["score"]}

    prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in preds]
    result["gen_len"] = np.mean(prediction_lens)
    result = {k: round(v, 4) for k, v in result.items()}
    return result



from transformers import AdamWeightDecay

optimizer = AdamWeightDecay(learning_rate=2e-5, weight_decay_rate=0.01)



from transformers import TFAutoModelForSeq2SeqLM

model = TFAutoModelForSeq2SeqLM.from_pretrained(checkpoint)


tf_train_set = model.prepare_tf_dataset(
    tokenized_books["train"],
    shuffle=True,
    batch_size=16,
    collate_fn=data_collator,
)

tf_test_set = model.prepare_tf_dataset(
    tokenized_books["test"],
    shuffle=False,
    batch_size=16,
    collate_fn=data_collator,


import tensorflow as tf

modelpile(optimizer=optimizer)  # No loss argument!



from transformers.keras_callbacks import KerasMetricCallback

metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_test_set)


from transformers.keras_callbacks import PushToHubCallback

push_to_hub_callback = PushToHubCallback(
    output_dir="es_mapu_model",
    tokenizer=tokenizer,
)



callbacks = [metric_callback, push_to_hub_callback]


model.fit(x=tf_train_set, validation_data=tf_test_set, epochs=3, callbacks=callbacks)
发布评论

评论列表(0)

  1. 暂无评论