[1][1: Dataset shape] [2: Error]
I am working on a video anomaly detection model on Google Colab using the code from BN-WVAD. The authors reported high accuracy on two datasets (XD-Violence and UCF-Crime), but since the UCF-Crime-specific code wasn't released, I used code shared by others in the issue tab of the repository.
However, I keep encountering a CUDA Out of Memory (OOM) error during training, both on my local GPU and on Google Colab.
Shape of the 2 datasets, UCF-Crime is smaller, which is (88,1024) while XD-Violence is greater which is (360,1024), I had successfully trained for XD-violence, but it shows gpu memory not enough for UCF-Crime, does this make sense?
One of the main code is as below which is my main.py
import pdb
import numpy as np
import torch.utils.data as data
import utils
import time
import wandb
import torch
from options import *
from train import train
from losses import LossComputer
from test import test
from models import WSAD
from dataset_loader import *
from tqdm import tqdm
import os
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
import torch_xla
import torch_xla.core.xla_model as xm
localtime = time.localtime()
time_ymd = time.strftime("%Y-%m-%d", localtime)
time_hms = time.strftime("%H:%M:%S", localtime)
if __name__ == "__main__":
"""_summary_
args로부터 필요한 파라미터들을 받아오기
"""
args = parse_args()
if args.debug:
pdb.set_trace()
args.log_path = os.path.join(args.log_path, time_ymd, 'ucf', args.version)
args.model_path = os.path.join(args.model_path, time_ymd, 'ucf', args.version)
if not os.path.exists(args.log_path):
os.makedirs(args.log_path)
if not os.path.exists(args.model_path):
os.makedirs(args.model_path)
wandb.init(
project="BN-WVAD",
name=args.version,
config={
'optimization:lr': args.lr[0],
'optimization:iters': args.num_iters,
'dataset:dataset': 'ucf-crime',
'model:kernel_sizes': args.kernel_sizes,
'model:channel_ratios': args.ratios,
'triplet_loss:abn_ratio_sample': args.ratio_sample,
'triplet_loss:abn_ratio_batch': args.ratio_batch,
},
settings=wandb.Settings(code_dir=os.path.dirname(os.path.abspath(__file__))),
save_code=True,
)
worker_init_fn = None
if args.seed >= 0:
utils.set_seed(args.seed)
worker_init_fn = np.random.seed(args.seed)
# plot_freq=5 seed가 다를 때의 실험을 위해 잠시 주석처리
#device = xm.xla_device()
#print(device)
net = WSAD(args.len_feature,flag = "Train", args=args)
net = net.cuda()
#net = net.to(device)
normal_train_loader = data.DataLoader(
UCFVideo(root_dir = args.root_dir, mode = 'Train', num_segments = args.num_segments, len_feature = args.len_feature, is_normal = True),
batch_size = args.batch_size,
shuffle = True, num_workers = args.num_workers,
worker_init_fn = worker_init_fn, drop_last = True)
abnormal_train_loader = data.DataLoader(
UCFVideo(root_dir = args.root_dir, mode='Train', num_segments = args.num_segments, len_feature = args.len_feature, is_normal = False),
batch_size = args.batch_size,
shuffle = True, num_workers = args.num_workers,
worker_init_fn = worker_init_fn, drop_last = True)
test_loader = data.DataLoader(
UCFVideo(root_dir = args.root_dir, mode = 'Test', num_segments = args.num_segments, len_feature = args.len_feature),
batch_size = 10,
shuffle = False, num_workers = args.num_workers,
worker_init_fn = worker_init_fn)
test_info = {'step': [], 'AUC': [], 'AP': []}
best_auc = 0
criterion = LossComputer()
optimizer = torch.optim.Adam(net.parameters(), lr = args.lr[0],
betas = (0.9, 0.999), weight_decay = args.weight_decay)
best_scores = {
'best_AUC': -1,
'best_AP': -1,
}
metric = test(net, test_loader, test_info, 0)
for step in tqdm(
range(1, args.num_iters + 1),
total = args.num_iters,
dynamic_ncols = True
):
## 각 step 별 learning rate 및 dataloader 설정
if step > 1 and args.lr[step - 1] != args.lr[step - 2]:
for param_group in optimizer.param_groups:
param_group["lr"] = args.lr[step - 1]
if (step - 1) % len(normal_train_loader) == 0:
normal_loader_iter = iter(normal_train_loader)
if (step - 1) % len(abnormal_train_loader) == 0:
abnormal_loader_iter = iter(abnormal_train_loader)
## 학습 및 loss 반환
losses = train(net, normal_loader_iter,abnormal_loader_iter, optimizer, criterion)
wandb.log(losses, step=step)
if step==0 or step == 300 or step == 700 or step==997:
torch.save(net.state_dict(), os.path.join(args.model_path, f"wsad_epoch_{step}.pt"))
print(f"Model saved at epoch {step}")
## 주기적으로 test를 통한 성능 확인
if step % args.plot_freq == 0 and step > 0:
metric = test(net, test_loader, test_info, step)
print('AUC: ', test_info['AUC'][-1])
if test_info["AUC"][-1] > best_scores['best_AUC']:
utils.save_best_record(test_info, os.path.join(args.log_path, "ucf_best_record_{}.txt".format(args.seed)))
torch.save(net.state_dict(), os.path.join(args.model_path, "ucf_best_{}.pkl".format(args.seed)))
for n, v in metric.items():
best_name = 'best_' + n
best_scores[best_name] = v if v > best_scores[best_name] else best_scores[best_name]
wandb.log(metric, step=step)
wandb.log(best_scores, step=step)
Below is the error log:
WARNING:root:Found CUDA without GPU_NUM_DEVICES. Defaulting to PJRT_DEVICE=CUDA with GPU_NUM_DEVICES=1
wandb: Using wandb-core as the SDK backend. Please refer to for more information.
wandb: Currently logged in as: jiaqifoo (jiaqifoo-universiti-tunku-abdul-rahman). Use `wandb login --relogin` to force relogin
wandb: Tracking run with wandb version 0.19.1
wandb: Run `wandb offline` to turn off syncing.
wandb: Syncing run train
Traceback (most recent call last):
File "/content/drive/MyDrive/fyp/main.py", line 106, in <module>
metric = test(net, test_loader, test_info, 0)
File "/content/drive/MyDrive/fyp/test.py", line 61, in test
frame_predicts = get_predicts(test_loader, net)
File "/content/drive/MyDrive/fyp/test.py", line 22, in get_predicts
res = net(_data)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "/content/drive/MyDrive/fyp/models/model.py", line 122, in forward
x = self.selfatt(x) # feat enhfance(attn+ff) (128, 200, 512)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "/content/drive/MyDrive/fyp/models/translayer.py", line 84, in forward
x = attn(x) + x # (128, 200, 512)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "/content/drive/MyDrive/fyp/models/translayer.py", line 20, in forward
return self.fn(self.norm(x), **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "/content/drive/MyDrive/fyp/models/translayer.py", line 57, in forward
dots = torch.matmul(q, k.transpose(-1, -2)) * self.scale # root(d_k) (128, 4, 200, 200)
torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 6.79 GiB. GPU 0 has a total capacity of 14.75 GiB of which 6.61 GiB is free. Process 28173 has 100.00 MiB memory in use. Process 272388 has 8.04 GiB memory in use. Of the allocated memory 7.85 GiB is allocated by PyTorch, and 70.97 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (.html#environment-variables)
wandb: Find logs at: wandb/run-20241228_094147-fx4f8y5f/logs