最新消息:雨落星辰是一个专注网站SEO优化、网站SEO诊断、搜索引擎研究、网络营销推广、网站策划运营及站长类的自媒体原创博客

prompt - How to merge pefted models to the base one in Transformers(Huggingface)? - Stack Overflow

programmeradmin0浏览0评论

I tried to merge a pefted model to the original one. Cause Hugging face APi only outputs "extra weights" of the fine-tuning as a .safetensors file. I try to merge but failed.

I wonder how to merge them. Or maybe .safetensors file isn't supported? How can I get a .bin one instead of .safetensors ?

You can see my struggling here post_on_forum Here are my main script and warnings I got.

merge script:

# merge base + LoRa models and save the model

from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer
import sys
import torch

device_map = {"": 0}
lora_dir = "/root/autodl-tmp/tuned_model"
base_model_name = "LLM4Binary/llm4decompile-1.3b-v1.5"
tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
model = AutoPeftModelForCausalLM.from_pretrained(lora_dir, device_map=device_map, torch_dtype=torch.bfloat16)
print(model)

model = model.merge_and_unload()

output_dir = "./output/merged_model"
model.save_pretrained(output_dir)

warnings:

Traceback (most recent call last):
  File "/root/miniconda3/envs/llm4decompile/lib/python3.9/site-packages/peft/peft_model.py", line 824, in __getattr__
    return super().__getattr__(name)  # defer to nn.Module's logic
  File "/root/miniconda3/envs/llm4decompile/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1695, in __getattr__
    raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'")
AttributeError: 'PeftModelForCausalLM' object has no attribute 'merge_and_unload'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/root/autodl-tmp/merge.py", line 15, in <module>
    model = model.merge_and_unload()
  File "/root/miniconda3/envs/llm4decompile/lib/python3.9/site-packages/peft/peft_model.py", line 828, in __getattr__
    return getattr(self.base_model, name)
  File "/root/miniconda3/envs/llm4decompile/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1695, in __getattr__
    raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'")
AttributeError: 'LlamaForCausalLM' object has no attribute 'merge_and_unload'

train script:

from transformers import *
from peft import *
import torch
from datasets import load_dataset
import os
from torch.utils.data import DataLoader
from transformers import default_data_collator, get_linear_schedule_with_warmup
from tqdm import tqdm
from datasets import load_dataset
from tensorboard import * 

device = "cuda"
tokenizer_name_or_path = "LLM4Binary/llm4decompile-1.3b-v1.5"
model_name_or_path = "LLM4Binary/llm4decompile-1.3b-v1.5"
dataset_name = "asm2c"
text_column = "asm text"
label_column = "text_label"
max_length = 64
lr = 3e-2
num_epochs = 50
batch_size = 8

from datasets import load_dataset

dataset = load_dataset("json", data_files="./traindata.jsonl")
dataset = dataset["train"].train_test_split(0.2)


tokenizer = AutoTokenizer.from_pretrained("LLM4Binary/llm4decompile-1.3b-v1.5")

def preprocess_function(examples):
    inputs = examples["input"]
    outputs = examples["output"]

    # 合并input和output列
    merged_texts = [f"{input} {output_text}" for input, output_text in zip(inputs, outputs)]
    
    model_inputs = tokenizer(merged_texts, truncation=True, padding="max_length", max_length=512)
    model_inputs["labels"] = model_inputs["input_ids"].copy()  # 设置labels
    return model_inputs

processed_datasets = dataset.map(
    preprocess_function,
    batched=True,
    num_proc=1,
    remove_columns=dataset["train"].column_names,
    load_from_cache_file=False,
    desc="Running tokenizer on dataset",
)

train_dataset = processed_datasets["train"]
eval_dataset = processed_datasets["test"]

peft_config = PromptTuningConfig(
    task_type=TaskType.CAUSAL_LM,
    prompt_tuning_init=PromptTuningInit.TEXT,
    num_virtual_tokens=8,
    prompt_tuning_init_text="What's the souce code of this asm?",
    tokenizer_name_or_path=model_name_or_path,
)
checkpoint_name = f"{dataset_name}_{model_name_or_path}_{peft_config.peft_type}_{peft_config.task_type}_v1.pt".replace(
    "/", "_"
)

# creating model
model = AutoModelForCausalLM.from_pretrained("LLM4Binary/llm4decompile-1.3b-v1.5") #, load_in_8bit=True, torch_dtype=torch.float16, device_map="auto")
#model = prepare_model_for_kbit_training(model)

peft_model = get_peft_model(model, peft_config)


training_args = TrainingArguments(
    output_dir="./results4",             # 保存模型的目录
    evaluation_strategy="epoch",         # 每个 epoch 进行评估
    save_strategy="epoch",               # 每个 epoch 结束时保存模型              
    learning_rate=2e-5,
    per_device_train_batch_size=4,      # 训练时的batch_size
    per_device_eval_batch_size=8,      # 验证时的batch_size
    logging_steps=10,                    # log 打印的频率
    num_train_epochs=3,
    weight_decay=0.01,
    load_best_model_at_end=False
)


trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    #data_collator=DataCollatorForSeq2Seq(tokenizer=tokenizer, padding=True)
)

trainer.train()
'''
trainer.evaluate(eval_dataset)

# 训练结束后手动保存模型
trainer.save_model(output_dir="./tuned_model")  # 保存最终的模型到指定的目录
tokenizer.save_pretrained(save_directory="./tuned_tokenizer")  # 保存tokenizer
'''
lora_adapter = "./lora_adapter"
peft_model.save_pretrained(lora_adapter, save_adapter=True, save_config=True)

model_to_merge = PeftModel.from_pretrained(AutoModelForCausalLM.from_pretrained(model_name_or_path).to("cuda"), lora_adapter)

merged_model = model_to_merge.merge_and_unload()
merged_model.save_pretrained("./merged_model")

warnings:

/root/miniconda3/envs/llm4decompile/lib/python3.10/site-packages/transformers/training_args.py:1575: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 
发布评论

评论列表(0)

  1. 暂无评论