2s
一, 大致框架
二, 零散的代码学习
一, 大致框架
data-get(N, C, T, V, M)(已经包含时间和空间信息)(样本数,channel,时间帧数,num_node,人数)。
joint_data;- graph_spatial(A)- (agcn.py)model(A,B,C;AGCN) -(main.py)train - test- softmax score a
bone_data;- graph_spatial(A)- (agcn.py)model(A,B,C;AGCN) -(main.py)train - test- softmax score b。
(ensemble.py)a+b -> fused score , action label。
① 针对graph文件夹,就是为了return A,即得到邻接矩阵。论文中使用的N*N表示Ak,即代码中的V(num_node)
class Graph:def __init__(self, labeling_mode='spatial'):self.A = self.get_adjacency_matrix(labeling_mode)...def get_adjacency_matrix(self, labeling_mode=None):if labeling_mode is None:return self.Aif labeling_mode == 'spatial':A = tools.get_spatial_graph(num_node, self_link, inward, outward)else:raise ValueError()return Atools.py
def get_spatial_graph(num_node, self_link, inward, outward):I = edge2mat(self_link, num_node)In = normalize_digraph(edge2mat(inward, num_node))Out = normalize_digraph(edge2mat(outward, num_node)) #inward, outward是列表,列表里是(a,b)这种坐标类型 的数据A = np.stack((I, In, Out)) #3×V*Vreturn Adef edge2mat(link, num_node):A = np.zeros((num_node, num_node)) #论文中使用的N*N表示Ak,即代码中的Vfor i, j in link:A[j, i] = 1return Adef normalize_digraph(A): # 除以每列的和(归一化)Dl = np.sum(A, 0) #对每一列相加h, w = A.shape #即代码中V*VDn = np.zeros((w, w))for i in range(w):if Dl[i] > 0:Dn[i, i] = Dl[i] ** (-1)AD = np.dot(A, Dn) #h×w w×w -> h×w 即V*Vreturn AD
determines whether there are connections between two vertexes, It represents the physical structure of the human body.
ntu_rgb_d.py(kinetics.py,num_node = 18,inward本身从0开始)
num_node = 25
self_link = [(i, i) for i in range(num_node)] #相同关节点的连接
inward_ori_index = [(1, 2), (2, 21), (3, 21), (4, 3), (5, 21), (6, 5), (7, 6),(8, 7), (9, 21), (10, 9), (11, 10), (12, 11), (13, 1),(14, 13), (15, 14), (16, 15), (17, 1), (18, 17), (19, 18),(20, 19), (22, 23), (23, 8), (24, 25), (25, 12)] #关节点间的可连接方式
inward = [(i - 1, j - 1) for (i, j) in inward_ori_index] #为了从0开始
outward = [(j, i) for (i, j) in inward] #反过来,为了构建无向图
neighbor = inward + outward
② 针对model文件夹,分别对应论文中 adaptive graph convolutional network > 4.1layer(unit_gcn,unit_tcn)4.2block(TCN_GCN_unit);4.3network(Model)
#For the temporal dimension,it is straightforward to perform the graph convolution similar to the classical convolution operation.
class unit_tcn(nn.Module): #temporal GCN(Kt × 1 convolution on the C ×T ×N feature maps) + bndef __init__(self, in_channels, out_channels, kernel_size=9, stride=1):super(unit_tcn, self).__init__()pad = int((kernel_size - 1) / 2) #输入输出维度不变self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=(kernel_size, 1), padding=(pad, 0),stride=(stride, 1))self.bn = nn.BatchNorm2d(out_channels)...def forward(self, x):x = self.bn(self.conv(x))return xclass unit_gcn(nn.Module): #spatial GCN + bn + reludef __init__(self, in_channels, out_channels, A, coff_embedding=4, num_subset=3, adaptive=True, attention=True):super(unit_gcn, self).__init__()inter_channels = out_channels // coff_embeddingnum_jpts = A.shape[-1]self.conv_d = nn.ModuleList() #容器 appendfor i in range(self.num_subset):self.conv_d.append(nn.Conv2d(in_channels, out_channels, 1))if adaptive:self.PA = nn.Parameter(torch.from_numpy(A.astype(np.float32)))self.conv_a = nn.ModuleList()self.conv_b = nn.ModuleList()for i in range(self.num_subset):self.conv_a.append(nn.Conv2d(in_channels, inter_channels, 1))self.conv_b.append(nn.Conv2d(in_channels, inter_channels, 1))else:self.A = Variable(torch.from_numpy(A.astype(np.float32)), requires_grad=False)self.adaptive = adaptiveif attention:self.conv_ta = nn.Conv1d(out_channels, 1, 9, padding=4)nn.init.constant_(self.conv_ta.weight, 0)nn.init.constant_(self.conv_ta.bias, 0)# s attentionker_jpt = num_jpts - 1 if not num_jpts % 2 else num_jpts #0 代表 假 , 1 代表真pad = (ker_jpt - 1) // 2self.conv_sa = nn.Conv1d(out_channels, 1, ker_jpt, padding=pad)nn.init.xavier_normal_(self.conv_sa.weight)nn.init.constant_(self.conv_sa.bias, 0)# channel attentionrr = 2self.fc1c = nn.Linear(out_channels, out_channels // rr)self.fc2c = nn.Linear(out_channels // rr, out_channels)nn.init.kaiming_normal_(self.fc1c.weight)nn.init.constant_(self.fc1c.bias, 0)nn.init.constant_(self.fc2c.weight, 0)nn.init.constant_(self.fc2c.bias, 0)self.attention = attentionif in_channels != out_channels:self.down = nn.Sequential(nn.Conv2d(in_channels, out_channels, 1),nn.BatchNorm2d(out_channels))else:self.down = lambda x: xself.bn = nn.BatchNorm2d(out_channels)self.soft = nn.Softmax(-2)self.tan = nn.Tanh()self.sigmoid = nn.Sigmoid()self.relu = nn.ReLU(inplace=True)for m in self.modules():if isinstance(m, nn.Conv2d):conv_init(m)elif isinstance(m, nn.BatchNorm2d):bn_init(m, 1)bn_init(self.bn, 1e-6)for i in range(self.num_subset):conv_branch_init(self.conv_d[i], self.num_subset)def forward(self, x):N, C, T, V = x.size()y = Noneif self.adaptive: #自适应A = A + self.PA #A+B #nn.Parameter(torch.from_numpy(A.astype(np.float32))) #3*V*Vfor i in range(self.num_subset): #f in Cin*T*N two embedding functions(one 1 × 1 convolutional layer )A1 = self.conv_a[i](x).permute(0, 3, 1, 2).contiguous().view(N, V, self.inter_c * T) #N*V*CT(论文中的N*CT)A2 = self.conv_b[i](x).view(N, self.inter_c * T, V) #N*CT*V(论文中的CT*N)A1 = self.soft(torch.matmul(A1, A2) / A1.size(-1)) # N V V(论文中的N*N)高维矩阵 乘 CkA1 = A[i] + A1 #(论文中N*N) -> A+B+CA2 = x.view(N, C * T, V) #论文中的CT*Nz = self.conv_d[i](torch.matmul(A2, A1).view(N, C, T, V)) #N*CT*V (x.size,论文中CT*N,每一个num_subset的输出)y = z + y if y is not None else zelse:A = self.A.cuda(x.get_device()) * self.maskfor i in range(self.num_subset):A1 = A[i]A2 = x.view(N, C * T, V)z = self.conv_d[i](torch.matmul(A2, A1).view(N, C, T, V))y = z + y if y is not None else zy = self.bn(y)y += self.down(x) #residualy = self.relu(y)if self.attention:# spatial attention# temporal attention# channel attention ...return yclass TCN_GCN_unit(nn.Module): #Adaptive graph convolutional blockdef __init__(self, in_channels, out_channels, A, stride=1, residual=True, adaptive=True, attention=True):super(TCN_GCN_unit, self).__init__()self.gcn1 = unit_gcn(in_channels, out_channels, A, adaptive=adaptive, attention=attention)self.tcn1 = unit_tcn(out_channels, out_channels, stride=stride) #conv,bn,reluself.relu = nn.ReLU(inplace=True)self.attention = attentionif not residual:self.residual = lambda x: 0elif (in_channels == out_channels) and (stride == 1):self.residual = lambda x: xelse:self.residual = unit_tcn(in_channels, out_channels, kernel_size=1, stride=stride) #一层conv代表残差def forward(self, x):y = self.relu(self.tcn1(self.gcn1(x)) + self.residual(x))return yclass Model(nn.Module): #Adaptive graph convolutional networkdef __init__(self, num_class=60, num_point=25, num_person=2, graph=None, graph_args=dict(), in_channels=3,drop_out=0, adaptive=True, attention=True):super(Model, self).__init__()Graph = import_class(graph)self.graph = Graph(**graph_args)A = self.graph.Aself.data_bn = nn.BatchNorm1d(num_person * in_channels * num_point)self.l1 = TCN_GCN_unit(3, 64, A, residual=False, adaptive=adaptive, attention=attention)self.l2 = TCN_GCN_unit(64, 64, A, adaptive=adaptive, attention=attention)self.l3 = TCN_GCN_unit(64, 64, A, adaptive=adaptive, attention=attention)self.l4 = TCN_GCN_unit(64, 64, A, adaptive=adaptive, attention=attention)self.l5 = TCN_GCN_unit(64, 128, A, stride=2, adaptive=adaptive, attention=attention)self.l6 = TCN_GCN_unit(128, 128, A, adaptive=adaptive, attention=attention)self.l7 = TCN_GCN_unit(128, 128, A, adaptive=adaptive, attention=attention)self.l8 = TCN_GCN_unit(128, 256, A, stride=2, adaptive=adaptive, attention=attention)self.l9 = TCN_GCN_unit(256, 256, A, adaptive=adaptive, attention=attention)self.l10 = TCN_GCN_unit(256, 256, A, adaptive=adaptive, attention=attention)self.fc = nn.Linear(256, num_class)def forward(self, x):N, C, T, V, M = x.size()x = x.permute(0, 4, 3, 1, 2).contiguous().view(N, M * V * C, T)x = self.data_bn(x)x = x.view(N, M, V, C, T).permute(0, 1, 3, 4, 2).contiguous().view(N * M, C, T, V)x = self.l1(x)...x = self.l10(x)# N*M,C,T,Vc_new = x.size(1)x = x.view(N, M, c_new, -1)x = x.mean(3).mean(1)x = self.drop_out(x)return self.fc(x) #全连接分类
③ data_gen文件夹下,部分可对应4.4. Two-stream networks,
ntu_gendata.py[kinetics_gendata.py] (get data of joints)fp = np.zeros((len(sample_label), 3, max_frame, num_joint, max_body_true), dtype=np.float32) # N, C, T, V, Mfor i, s in enumerate(tqdm(sample_name)):data = read_xyz(os.path.join(data_path, s), max_body=max_body_kinect, num_joint=num_joint) #4,25;data-> 3,seq_info['numFrame'],num_joint,max_body[data, label = feeder[i]]fp[i, :, 0:data.shape[1], :, :] = data #0:data.shape[1]--seq_info['numFrame']fp = pre_normalization(fp) #N, C, T, V, M preprocess.pynp.save('{}/{}_data_joint.npy'.format(out_path, part), fp) #保存关节数据[np.save(data_out_path, fp)]gen_bone_data.py (calculate the data of bones based on the data of joints)
for dataset in datasets: #人体关键节点的定义及其连接方式 'ntu/xview', 'ntu/xsub',for set in sets: # 'train', 'val'print(dataset, set)data = np.load('../data/{}/{}_data_joint.npy'.format(dataset, set)) #下载关节数据N, C, T, V, M = data.shapefp_sp = open_memmap('../data/{}/{}_data_bone.npy'.format(dataset, set),dtype='float32',mode='w+',shape=(N, 3, T, V, M)) #骨骼信息 #创建或加载内存映射.npy文件fp_sp[:, :C, :, :, :] = datafor v1, v2 in tqdm(paris[dataset]): #paris是不同数据集的 人关节点的 (a,b)连接索引(论文中有人体关键点的图)if dataset != 'kinetics':v1 -= 1 #1~25 -> 0~24v2 -= 1 #1->0fp_sp[:, :, :, v1, :] = data[:, :, :, v1, :] - data[:, :, :, v2, :] #length information and direction information of the bonemerge_joint_bone_data.py
for dataset in datasets:for set in sets:print(dataset, set)data_jpt = np.load('../data/{}/{}_data_joint.npy'.format(dataset, set)) #关节data_bone = np.load('../data/{}/{}_data_bone.npy'.format(dataset, set)) #骨骼N, C, T, V, M = data_jpt.shapedata_jpt_bone = np.concatenate((data_jpt, data_bone), axis=1) #对应行进行拼接np.save('../data/{}/{}_data_joint_bone.npy'.format(dataset, set), data_jpt_bone) #joint+bonegen_motion_data.py # gen_motion_data.py处理得到的temporal edges没用到,temporal edges只是为了后边时间上的卷积即可
for dataset in datasets:for set in sets:for part in parts:print(dataset, set, part)data = np.load('../data/{}/{}_data_{}.npy'.format(dataset, set, part)) #下载信息 数据集 训练/验证 关节/骨骼N, C, T, V, M = data.shapefp_sp = open_memmap('../data/{}/{}_data_{}_motion.npy'.format(dataset, set, part),dtype='float32',mode='w+',shape=(N, 3, T, V, M)) #写motion信息for t in tqdm(range(T - 1)):fp_sp[:, :, t, :, :] = data[:, :, t + 1, :, :] - data[:, :, t, :, :] #temporal 相同点 连接fp_sp[:, :, T - 1, :, :] = 0 #举个例子,只有三个时间点,那么表示相邻时间节点的连接的数据, 就只有两个(即三个点,只连接相邻点,有两条线)
④ main.py
class GradualWarmupScheduler(_LRScheduler):
def init_seed(_):
def get_parser():
class Processor():def __init__(self, arg):def load_data(self):def load_model(self):Model = import_class(self.arg.model)def load_optimizer(self):def save_arg(self):def adjust_learning_rate(self, epoch):def print_time(self):def print_log(self, str, print_time=True):def record_time(self):def split_time(self):def train(self, epoch, save_model=False):def eval(self, epoch, save_score=False, loader_name=['test'], wrong_file=None, result_file=None):def start(self):
def str2bool(v):
def import_class(name): #import_class(self.arg.feeder) #default='feeder.feeder', help='data loader will be used'components = name.split('.') #查找所有的'.'间隔的内容,并用列表放置 --2021.4.12更正mod = __import__(components[0]) # import return model __import__() 函数用于动态加载类和函数 因为考虑到类名经常会发生变化,这里取第一个名字对应的.py文件 【例如feeder.Feeder--> 就是要找到feeder.py的文件】for comp in components[1:]:mod = getattr(mod, comp) #getattr() 函数用于返回一个对象comp属性值 【针对上边例子,这里就是要获取feeder.py模块文件中的Feeder类,以供对应位置使用】return mod
⑤ README.md (4.4. Two-stream networks)
-
Preprocess the data with #先对数据进行处理,得到关节数据
python data_gen/ntu_gendata.py
python data_gen/kinetics-gendata.py.
-
Generate the bone data with: #关节数据转换为骨骼数据
python data_gen/gen_bone_data.py
Change the config file depending on what you want. #分别将关节和骨骼的时空数据送入J-stream 和 B-stream,训练
`python main.py --config ./config/nturgbd-cross-view/train_joint.yaml``python main.py --config ./config/nturgbd-cross-view/train_bone.yaml`
To ensemble the results of joints and bones, run test firstly to generate the scores of the softmax layer. #测试,产生各自softmax分数
`python main.py --config ./config/nturgbd-cross-view/test_joint.yaml``python main.py --config ./config/nturgbd-cross-view/test_bone.yaml`
Then combine the generated scores with: #两个softmax分数相加to obtain the fused score and predictthe action label
`python ensemble.py` --datasets ntu/xview
二, 零散的代码学习
rotation.py
import numpy as np
import mathdef rotation_matrix(axis, theta): #axis给定轴,theta给定θ弧度。return np.array() #逆时旋转,返回旋转矩阵
def unit_vector(vector):return vector / np.linalg.norm(vector) #向量vector/默认是二范数->单位向量
def angle_between(v1, v2): #弧度角return np.arccos()
def x_rotation(vector, theta): #绕x轴旋转三维矢量return np.dot(R, vector)
def y_rotation(vector, theta): #绕y轴旋转三维矢量return np.dot(R, vector)
def z_rotation(vector, theta): #绕z轴旋转三维矢量return np.dot(R, vector)
1. vector / np.linalg.norm(vector) 向量vector/(默认,根号下每个元素的平方)二范数 -> 单位向量
2. np.arccos(np.clip(np.dot(v1_u, v2_u), -1.0, 1.0)) np.clip:v1_u, v2_u对应项相乘再相加,范围应该在(-1.0, 1.0),超出则等于边界值。 np.arccos返回的是弧度值
preprocess.py
import syssys.path.extend(['../'])
from data_gen.rotation import *
from tqdm import tqdm #进度条def pre_normalization(data, zaxis=[0, 1], xaxis=[8, 4]):
#用前面的帧填充空帧#近中心关节#1(ntu中的脊柱关节和动力学中的颈部关节)#将第一人的髋关节(jpt 0)和脊椎(jpt 1)之间的骨骼与z轴平行#np.cross求叉积(向量积);求出的新的向量是垂直于 z轴#使右肩(jpt 8)和左肩(jpt 4)之间的骨头平行return data
3.左侧 project 工具栏窗口顶部那个齿轮有个 show member 选项,默认是不开的,勾选后 py 文件会显示内部定义的 class 和 def。每个文件可以自由选择折叠还是展开。
4. 关于tqdm
from tqdm import tqdm 进度条
a=(-1,1,0)
for i,j in enumerate(tqdm(a)):print(i,j)>>>0%| | 0/3 [00:00<?, ?it/s]
0 -1
1 1
2 0
100%|███████████████████████████████████████████| 3/3 [00:00<00:00, 7281.78it/s]
5. axis = np.cross(joint_top - joint_bottom, [0, 0, 1]) np.cross(a,b)求叉积(向量积);求出的新的向垂直于 a,b形成的平面
6. np.sum()是求总和;np.sum(-1)是先求一个[]里的和
>>> np.eye(3)
array([[1., 0., 0.],[0., 1., 0.],[0., 0., 1.]])>>> np.eye(3).sum(-1)
array([1., 1., 1.])
>>> np.eye(3).sum(-1).sum(-1)
3.0
>>> np.eye(3).sum()
3.0>>> np.array([[1,1],[2,2]]).sum(-1)
array([2, 4])
>>> np.array([[1,1],[2,2]]).sum()
6
>>> np.array([[1,1],[2,2]]).sum(0)
array([3, 3])
>>> np.array([[1,1],[2,2]]).sum(1)
array([2, 4])
ntu_gendata.py
import argparse
import pickle
from tqdm import tqdm
import syssys.path.extend(['../'])
from data_gen.preprocess import pre_normalization
import numpy as np
import osdef read_skeleton_filter(file): #每一个帧,每个人,每一个关节return skeleton_sequence
def get_nonzero_std(s): # tvcreturn s
def read_xyz(file, max_body=4, num_joint=25): # 取了前两个bodyreturn data #3,seq_info['numFrame'],num_joint,max_body
def gendata(data_path, out_path, ignored_sample_path=None, benchmark='xview', part='eval'):fp = pre_normalization(fp) #N, C, T, V, M
7. index = energy.argsort()[::-1][0:max_body_true]
argsort返回的是数组值从小到大的索引值;[::-1]取从后向前的元素;max_body_true=2(定义好的);;
argsort(axis=1)表示按行排列
8. os.listdir(data_path): 返回指定路径下的文件和文件夹列表
kinetics_gendata.py
class Feeder_kinetics(Dataset):def __init__(self,data_path,label_path,ignore_empty_sample=True,window_size=-1,num_person_in=5,num_person_out=2):def load_data(self):def __len__(self):return len(self.sample_name)def __iter__(self):return selfdef __getitem__(self, index):return data_numpy, labeldef gendata(data_path, label_path,data_out_path, label_out_path,num_person_in=num_person_in, # observe the first 5 personsnum_person_out=num_person_out, # then choose 2 persons with the highest scoremax_frame=max_frame):
9. data_numpy[1, frame_index, :, m] = pose[1::2] [a::b]从下标为a的元素开始,每隔b个元素输出一次;
若b=-1,表示倒数. a表示倒数(012...)a开始。
main.py
10. super().__init__(optimizer) optimizer是 _LRScheduler继承类 的输入
11. answer = input('delete it? y/n:') 接受一个标准输入数据,返回为 string 类型
12. Python __import__() 函数用于动态加载类和函数
getattr(object, name[, default]) 函数用于返回一个对象属性值。等效于object.name
*self._args 表示接受元组类参数;
**kwargs 表示接受字典类参数;
13. vars([object]) 函数返回对象object的属性和属性值的字典对象
14. localtime = time.asctime(time.localtime(time.time()))
localtime格式化时间戳为本地的时间;asctime() 函数接受时间元组并返回一个可读的形式为"Tue Dec 11 18:07:14 2008"(2008年12月11日 周二18时07分14秒)的24个字符的字符串。
15. open('{}/log.txt'.format(self.arg.work_dir), 'a') as f: a代表追加,也就是说,打开这个文件之后直百接定位到文件的末尾。
16. round(v * 100 / sum(timer.values())) round() 方法返回浮点数v * 100 / sum(timer.values())的四舍五入值。
17. value, predict_label = torch.max(output.data, 1) value是每行的最大值,predict_label是对应的索引
>>> import torch
>>> import numpy as np
>>> c=np.array([[1,2],[4,3]])
>>> print(c)
[[1 2][4 3]]
>>> b=torch.from_numpy(c)
>>> print(b)
tensor([[1, 2],[4, 3]])
>>> torch.max(b,1) #b是softmax函数输出的一个tensor,1是每行的最大值(axis)
torch.return_types.max(
values=tensor([2, 4]),
indices=tensor([1, 0]))
feeders/tools.py
18. begin = np.random.randint(step) 取[0, step)的随机整数
19. begin = valid_frame.argmax() 返回的是 元素最大值所对应的索引值
20. move_time = random.choice(move_time_candidate) choice() 方法返回一个列表,元组或字符串的随机项。
21. np.arange函数
node = np.arange(0, T, T * 1.0 / move_time).round().astype(int) #round() 方法返回浮点数 的四舍五入值。 np.arange :【0, T】,步长为T * 1.0 / move_time
node = np.append(node, T) #为node添加T
A = np.random.choice(angle_candidate, num_node) #angle_candidate中选num_node个(注意是np.)
22. self.sample_name, self.label = pickle.load(f, encoding='latin1')
用python2.X pickle写了一个文件,用python3的pickle读取时, 加上encoding='latin1',代码就可以正确识别编码输出内容了。
23. data.mean(axis=2, keepdims=True) 求均值,axis表示维度,keepdims=True表示保持原来维度
[-top_k:]表示倒数top_k个
24. data = data.reshape((1,) + data.shape) #np.array([1,2,3,4]).reshape((2,)+(2,)) -> array([[1, 2], [3, 4]])
agcn.py
25. self.PA = torch.nn.Parameter(torch.from_numpy(A.astype(np.float32)))
将一个不可训练的类型Tensor转换成可以训练的类型parameter,成为了模型中根据训练可以改动的参数
26. self.bn = nn.BatchNorm2d(out_channels) ; self.relu = nn.ReLU(inplace=True)
inplace=True从上层网络bn中传递下来的tensor直接进行修改,这样能够节省运算内存,不用多存储其他变量。