I am using Ubuntu 24.04.1 on an AWS EC2 instance g5.8xlarge.
I am receiving the following error message:
OutOfMemoryError: Allocation on device
Code:
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "backend:cudaMallocAsync"
import torch
torch.cuda.empty_cache()
import transformers
if torch.cuda.is_available():
torch.set_default_device("cuda")
device = torch.device("cuda")
model = transformers.AutoModelForCausalLM.from_pretrained("microsoft/Orca-2-13b", device_map=device)
Full error:
/home/ubuntu/anaconda3/envs/ai/lib/python3.12/site-packages/torch/cuda/__init__.py:734: UserWarning: Can't initialize NVML
warnings.warn("Can't initialize NVML")
Loading checkpoint shards: 33%
2/6 [00:04<00:06, 1.72s/it]
/home/ubuntu/anaconda3/envs/ai/lib/python3.12/site-packages/torch/cuda/__init__.py:734: UserWarning: Can't initialize NVML
warnings.warn("Can't initialize NVML")
---------------------------------------------------------------------------
OutOfMemoryError Traceback (most recent call last)
Cell In[5], line 6
2 torch.set_default_device("cuda")
4 device = torch.device("cuda")
----> 6 model = transformers.AutoModelForCausalLM.from_pretrained("microsoft/Orca-2-13b", device_map=device)
8 #
9 # please use the slow tokenizer since fast and slow tokenizer produces different tokens
10 tokenizer = transformers.AutoTokenizer.from_pretrained(
11 "microsoft/Orca-2-13b",
12 use_fast=True,
13 )
File ~/anaconda3/envs/ai/lib/python3.12/site-packages/transformers/models/auto/auto_factory.py:564, in _BaseAutoModelClass.from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
562 elif type(config) in cls._model_mapping.keys():
563 model_class = _get_model_class(config, cls._model_mapping)
--> 564 return model_class.from_pretrained(
565 pretrained_model_name_or_path, *model_args, config=config, **hub_kwargs, **kwargs
566 )
567 raise ValueError(
568 f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n"
569 f"Model type should be one of {', '.join(c.__name__ for c in cls._model_mapping.keys())}."
570 )
File ~/anaconda3/envs/ai/lib/python3.12/site-packages/transformers/modeling_utils.py:262, in restore_default_torch_dtype.<locals>._wrapper(*args, **kwargs)
260 old_dtype = torch.get_default_dtype()
261 try:
--> 262 return func(*args, **kwargs)
263 finally:
264 torch.set_default_dtype(old_dtype)
File ~/anaconda3/envs/ai/lib/python3.12/site-packages/transformers/modeling_utils.py:4319, in PreTrainedModel.from_pretrained(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, weights_only, *model_args, **kwargs)
4309 if dtype_orig is not None:
4310 torch.set_default_dtype(dtype_orig)
4312 (
4313 model,
4314 missing_keys,
4315 unexpected_keys,
4316 mismatched_keys,
4317 offload_index,
4318 error_msgs,
-> 4319 ) = cls._load_pretrained_model(
4320 model,
4321 state_dict,
4322 loaded_state_dict_keys, # XXX: rename?
4323 resolved_archive_file,
4324 pretrained_model_name_or_path,
4325 ignore_mismatched_sizes=ignore_mismatched_sizes,
4326 sharded_metadata=sharded_metadata,
4327 _fast_init=_fast_init,
4328 low_cpu_mem_usage=low_cpu_mem_usage,
4329 device_map=device_map,
4330 offload_folder=offload_folder,
4331 offload_state_dict=offload_state_dict,
4332 dtype=torch_dtype,
4333 hf_quantizer=hf_quantizer,
4334 keep_in_fp32_modules=keep_in_fp32_modules,
4335 gguf_path=gguf_path,
4336 weights_only=weights_only,
4337 )
4339 # make sure token embedding weights are still tied if needed
4340 model.tie_weights()
File ~/anaconda3/envs/ai/lib/python3.12/site-packages/transformers/modeling_utils.py:4897, in PreTrainedModel._load_pretrained_model(cls, model, state_dict, loaded_keys, resolved_archive_file, pretrained_model_name_or_path, ignore_mismatched_sizes, sharded_metadata, _fast_init, low_cpu_mem_usage, device_map, offload_folder, offload_state_dict, dtype, hf_quantizer, keep_in_fp32_modules, gguf_path, weights_only)
4895 else:
4896 fixed_state_dict = cls._fix_state_dict_keys_on_load(state_dict)
-> 4897 new_error_msgs, offload_index, state_dict_index = _load_state_dict_into_meta_model(
4898 model_to_load,
4899 fixed_state_dict,
4900 start_prefix,
4901 expected_keys,
4902 device_map=device_map,
4903 offload_folder=offload_folder,
4904 offload_index=offload_index,
4905 state_dict_folder=state_dict_folder,
4906 state_dict_index=state_dict_index,
4907 dtype=dtype,
4908 hf_quantizer=hf_quantizer,
4909 is_safetensors=is_safetensors,
4910 keep_in_fp32_modules=keep_in_fp32_modules,
4911 unexpected_keys=unexpected_keys,
4912 )
4913 error_msgs += new_error_msgs
4914 else:
4915 # Sharded checkpoint or whole but low_cpu_mem_usage==True
File ~/anaconda3/envs/ai/lib/python3.12/site-packages/transformers/modeling_utils.py:896, in _load_state_dict_into_meta_model(model, state_dict, start_prefix, expected_keys, device_map, offload_folder, offload_index, state_dict_folder, state_dict_index, dtype, hf_quantizer, is_safetensors, keep_in_fp32_modules, unexpected_keys, pretrained_model_name_or_path)
893 param_device = "cpu" if is_local_dist_rank_0() else "meta"
895 # For backward compatibility with older versions of `accelerate` and for non-quantized params
--> 896 set_module_tensor_to_device(model, param_name, param_device, **set_module_kwargs)
897 else:
898 hf_quantizer.create_quantized_param(model, param, param_name, param_device, state_dict, unexpected_keys)
File ~/anaconda3/envs/ai/lib/python3.12/site-packages/accelerate/utils/modeling.py:330, in set_module_tensor_to_device(module, tensor_name, device, value, dtype, fp16_statistics, tied_params_map)
328 module._parameters[tensor_name] = param_cls(new_value, requires_grad=old_value.requires_grad)
329 elif isinstance(value, torch.Tensor):
--> 330 new_value = value.to(device)
331 else:
332 new_value = torch.tensor(value, device=device)
File ~/anaconda3/envs/ai/lib/python3.12/site-packages/torch/utils/_device.py:104, in DeviceContext.__torch_function__(self, func, types, args, kwargs)
102 if func in _device_constructors() and kwargs.get('device') is None:
103 kwargs['device'] = self.device
--> 104 return func(*args, **kwargs)
OutOfMemoryError: Allocation on device
I am using Ubuntu 24.04.1 on an AWS EC2 instance g5.8xlarge.
I am receiving the following error message:
OutOfMemoryError: Allocation on device
Code:
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "backend:cudaMallocAsync"
import torch
torch.cuda.empty_cache()
import transformers
if torch.cuda.is_available():
torch.set_default_device("cuda")
device = torch.device("cuda")
model = transformers.AutoModelForCausalLM.from_pretrained("microsoft/Orca-2-13b", device_map=device)
Full error:
/home/ubuntu/anaconda3/envs/ai/lib/python3.12/site-packages/torch/cuda/__init__.py:734: UserWarning: Can't initialize NVML
warnings.warn("Can't initialize NVML")
Loading checkpoint shards: 33%
2/6 [00:04<00:06, 1.72s/it]
/home/ubuntu/anaconda3/envs/ai/lib/python3.12/site-packages/torch/cuda/__init__.py:734: UserWarning: Can't initialize NVML
warnings.warn("Can't initialize NVML")
---------------------------------------------------------------------------
OutOfMemoryError Traceback (most recent call last)
Cell In[5], line 6
2 torch.set_default_device("cuda")
4 device = torch.device("cuda")
----> 6 model = transformers.AutoModelForCausalLM.from_pretrained("microsoft/Orca-2-13b", device_map=device)
8 # https://github/huggingface/transformers/issues/27132
9 # please use the slow tokenizer since fast and slow tokenizer produces different tokens
10 tokenizer = transformers.AutoTokenizer.from_pretrained(
11 "microsoft/Orca-2-13b",
12 use_fast=True,
13 )
File ~/anaconda3/envs/ai/lib/python3.12/site-packages/transformers/models/auto/auto_factory.py:564, in _BaseAutoModelClass.from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
562 elif type(config) in cls._model_mapping.keys():
563 model_class = _get_model_class(config, cls._model_mapping)
--> 564 return model_class.from_pretrained(
565 pretrained_model_name_or_path, *model_args, config=config, **hub_kwargs, **kwargs
566 )
567 raise ValueError(
568 f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n"
569 f"Model type should be one of {', '.join(c.__name__ for c in cls._model_mapping.keys())}."
570 )
File ~/anaconda3/envs/ai/lib/python3.12/site-packages/transformers/modeling_utils.py:262, in restore_default_torch_dtype.<locals>._wrapper(*args, **kwargs)
260 old_dtype = torch.get_default_dtype()
261 try:
--> 262 return func(*args, **kwargs)
263 finally:
264 torch.set_default_dtype(old_dtype)
File ~/anaconda3/envs/ai/lib/python3.12/site-packages/transformers/modeling_utils.py:4319, in PreTrainedModel.from_pretrained(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, weights_only, *model_args, **kwargs)
4309 if dtype_orig is not None:
4310 torch.set_default_dtype(dtype_orig)
4312 (
4313 model,
4314 missing_keys,
4315 unexpected_keys,
4316 mismatched_keys,
4317 offload_index,
4318 error_msgs,
-> 4319 ) = cls._load_pretrained_model(
4320 model,
4321 state_dict,
4322 loaded_state_dict_keys, # XXX: rename?
4323 resolved_archive_file,
4324 pretrained_model_name_or_path,
4325 ignore_mismatched_sizes=ignore_mismatched_sizes,
4326 sharded_metadata=sharded_metadata,
4327 _fast_init=_fast_init,
4328 low_cpu_mem_usage=low_cpu_mem_usage,
4329 device_map=device_map,
4330 offload_folder=offload_folder,
4331 offload_state_dict=offload_state_dict,
4332 dtype=torch_dtype,
4333 hf_quantizer=hf_quantizer,
4334 keep_in_fp32_modules=keep_in_fp32_modules,
4335 gguf_path=gguf_path,
4336 weights_only=weights_only,
4337 )
4339 # make sure token embedding weights are still tied if needed
4340 model.tie_weights()
File ~/anaconda3/envs/ai/lib/python3.12/site-packages/transformers/modeling_utils.py:4897, in PreTrainedModel._load_pretrained_model(cls, model, state_dict, loaded_keys, resolved_archive_file, pretrained_model_name_or_path, ignore_mismatched_sizes, sharded_metadata, _fast_init, low_cpu_mem_usage, device_map, offload_folder, offload_state_dict, dtype, hf_quantizer, keep_in_fp32_modules, gguf_path, weights_only)
4895 else:
4896 fixed_state_dict = cls._fix_state_dict_keys_on_load(state_dict)
-> 4897 new_error_msgs, offload_index, state_dict_index = _load_state_dict_into_meta_model(
4898 model_to_load,
4899 fixed_state_dict,
4900 start_prefix,
4901 expected_keys,
4902 device_map=device_map,
4903 offload_folder=offload_folder,
4904 offload_index=offload_index,
4905 state_dict_folder=state_dict_folder,
4906 state_dict_index=state_dict_index,
4907 dtype=dtype,
4908 hf_quantizer=hf_quantizer,
4909 is_safetensors=is_safetensors,
4910 keep_in_fp32_modules=keep_in_fp32_modules,
4911 unexpected_keys=unexpected_keys,
4912 )
4913 error_msgs += new_error_msgs
4914 else:
4915 # Sharded checkpoint or whole but low_cpu_mem_usage==True
File ~/anaconda3/envs/ai/lib/python3.12/site-packages/transformers/modeling_utils.py:896, in _load_state_dict_into_meta_model(model, state_dict, start_prefix, expected_keys, device_map, offload_folder, offload_index, state_dict_folder, state_dict_index, dtype, hf_quantizer, is_safetensors, keep_in_fp32_modules, unexpected_keys, pretrained_model_name_or_path)
893 param_device = "cpu" if is_local_dist_rank_0() else "meta"
895 # For backward compatibility with older versions of `accelerate` and for non-quantized params
--> 896 set_module_tensor_to_device(model, param_name, param_device, **set_module_kwargs)
897 else:
898 hf_quantizer.create_quantized_param(model, param, param_name, param_device, state_dict, unexpected_keys)
File ~/anaconda3/envs/ai/lib/python3.12/site-packages/accelerate/utils/modeling.py:330, in set_module_tensor_to_device(module, tensor_name, device, value, dtype, fp16_statistics, tied_params_map)
328 module._parameters[tensor_name] = param_cls(new_value, requires_grad=old_value.requires_grad)
329 elif isinstance(value, torch.Tensor):
--> 330 new_value = value.to(device)
331 else:
332 new_value = torch.tensor(value, device=device)
File ~/anaconda3/envs/ai/lib/python3.12/site-packages/torch/utils/_device.py:104, in DeviceContext.__torch_function__(self, func, types, args, kwargs)
102 if func in _device_constructors() and kwargs.get('device') is None:
103 kwargs['device'] = self.device
--> 104 return func(*args, **kwargs)
OutOfMemoryError: Allocation on device
Share
Improve this question
edited Mar 12 at 11:32
desertnaut
60.5k32 gold badges155 silver badges182 bronze badges
asked Mar 12 at 6:05
WolfyWolfy
4602 gold badges9 silver badges34 bronze badges
7
|
Show 2 more comments
1 Answer
Reset to default 2You can check out information on the specific model here. But you can see it requires 52.1 GB
of VRAM (GPU memory).
Based on this table we see that you have 24GB
of GPU memory. So it won't be able to fit. If you aren't able to get more GPU memory, you can look into quantized models.
You can check out the models on huggingface that have quantized versions, the GPU memory required, and the best use case.
device_map = auto
instead of justdevice
made all the difference – Wolfy Commented Mar 13 at 6:49