midiapipe+stgcn(时空图卷积网络)实现人体姿态判断(单目标)(代码片段)

Huterox Huterox     2023-03-09     348

关键词:

文章目录

前言

冒个泡,年少无知吹完的牛皮是要还的呀。
那么这里的话要做的一个东西就是一个人体的姿态判断,比如一个人是坐着还是站着还是摔倒了,如果摔倒了我们要做什么操作,之类的。

不过这里比较可惜的就是这个midiapipe 它里面的Pose的话是只有一个pose的也就是单目标的一个检测,所以距离我想要的一个效果是很难受的,不过这个dome还是挺好玩的。

实现效果如下:

Midiapipe关键点检测

这个dome的核心之一,就是这个检测到人体的一个关键点,

import time
from collections import deque

import cv2
import numpy as np
import mediapipe as mp

from stgcn.stgcn import STGCN
from PIL import Image, ImageDraw, ImageFont


# 人体关键点检测模块
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_pose = mp.solutions.pose

# 人脸模块
mpFace = mp.solutions.face_detection
faceDetection = mpFace.FaceDetection(min_detection_confidence=0.5)

KEY_JOINTS = [
    mp_pose.PoseLandmark.NOSE,
    mp_pose.PoseLandmark.LEFT_SHOULDER,
    mp_pose.PoseLandmark.RIGHT_SHOULDER,
    mp_pose.PoseLandmark.LEFT_ELBOW,
    mp_pose.PoseLandmark.RIGHT_ELBOW,
    mp_pose.PoseLandmark.LEFT_WRIST,
    mp_pose.PoseLandmark.RIGHT_WRIST,
    mp_pose.PoseLandmark.LEFT_HIP,
    mp_pose.PoseLandmark.RIGHT_HIP,
    mp_pose.PoseLandmark.LEFT_KNEE,
    mp_pose.PoseLandmark.RIGHT_KNEE,
    mp_pose.PoseLandmark.LEFT_ANKLE,
    mp_pose.PoseLandmark.RIGHT_ANKLE
]

POSE_CONNECTIONS = [(6, 4), (4, 2), (2, 13), (13, 1), (5, 3), (3, 1), (12, 10),
                    (10, 8), (8, 2), (11, 9), (9, 7), (7, 1), (13, 0)]

POINT_COLORS = [(0, 255, 255), (0, 191, 255), (0, 255, 102), (0, 77, 255), (0, 255, 0),  # Nose, LEye, REye, LEar, REar
                (77, 255, 255), (77, 255, 204), (77, 204, 255), (191, 255, 77), (77, 191, 255), (191, 255, 77),  # LShoulder, RShoulder, LElbow, RElbow, LWrist, RWrist
                (204, 77, 255), (77, 255, 204), (191, 77, 255), (77, 255, 191), (127, 77, 255), (77, 255, 127), (0, 255, 255)]  # LHip, RHip, LKnee, Rknee, LAnkle, RAnkle, Neck

LINE_COLORS = [(0, 215, 255), (0, 255, 204), (0, 134, 255), (0, 255, 50), (77, 255, 222),
               (77, 196, 255), (77, 135, 255), (191, 255, 77), (77, 255, 77), (77, 222, 255),
               (255, 156, 127), (0, 127, 255), (255, 127, 77), (0, 77, 255), (255, 77, 36)]


POSE_MAPPING = ["站着","走着","坐着","躺下","站起来","坐下","摔倒"]

POSE_MAPPING_COLOR = [
    (255,255,240),(	245,222,179),(244,164,96),(	210,180,140),
    (255,127,80),(255,165,79),(	255,48,48)
]

# 为了检测动作的准确度,每30帧进行一次检测
ACTION_MODEL_MAX_FRAMES = 30

class FallDetection:
    def __init__(self):
        self.action_model = STGCN(weight_file='./weights/tsstg-model.pth', device='cpu')
        self.joints_list = deque(maxlen=ACTION_MODEL_MAX_FRAMES)

    def draw_skeleton(self, frame, pts):
        l_pair = POSE_CONNECTIONS
        p_color = POINT_COLORS
        line_color = LINE_COLORS

        part_line = 
        pts = np.concatenate((pts, np.expand_dims((pts[1, :] + pts[2, :]) / 2, 0)), axis=0)
        for n in range(pts.shape[0]):
            if pts[n, 2] <= 0.05:
                continue
            cor_x, cor_y = int(pts[n, 0]), int(pts[n, 1])
            part_line[n] = (cor_x, cor_y)
            cv2.circle(frame, (cor_x, cor_y), 3, p_color[n], -1)
            # cv2.putText(frame, str(n), (cor_x+10, cor_y+10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 255), 1)

        for i, (start_p, end_p) in enumerate(l_pair):
            if start_p in part_line and end_p in part_line:
                start_xy = part_line[start_p]
                end_xy = part_line[end_p]
                cv2.line(frame, start_xy, end_xy, line_color[i], int(1*(pts[start_p, 2] + pts[end_p, 2]) + 3))
        return frame

    def cv2_add_chinese_text(self, img, text, position, textColor=(0, 255, 0), textSize=30):
        if (isinstance(img, np.ndarray)):  # 判断是否OpenCV图片类型
            img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
        # 创建一个可以在给定图像上绘图的对象
        draw = ImageDraw.Draw(img)
        # 字体的格式,opencv不支持中文,需要指定字体
        fontStyle = ImageFont.truetype(
            "./fonts/MSYH.ttc", textSize, encoding="utf-8")

        draw.text(position, text, textColor, font=fontStyle)

        return cv2.cvtColor(np.asarray(img), cv2.COLOR_RGB2BGR)

    def detect(self):
        cap = cv2.VideoCapture(0)
        # cap.set(3, 540)
        # cap.set(4, 960)
        # cap.set(5,30)
        image_h = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
        image_w = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
        frame_num = 0
        print(image_h, image_w)

        with mp_pose.Pose(
                min_detection_confidence=0.7,
                min_tracking_confidence=0.5) as pose:
            while cap.isOpened():
                fps_time = time.time()
                frame_num += 1
                success, image = cap.read()
                if not success:
                    print("Ignoring empty camera frame.")
                    continue

                # 提高性能,这里是做那个姿态的一个推理
                image.flags.writeable = False
                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                results = pose.process(image)

                if results.pose_landmarks:
                    # 识别骨骼点
                    image.flags.writeable = True
                    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)


                    landmarks = results.pose_landmarks.landmark
                    joints = np.array([[landmarks[joint].x * image_w,
                                        landmarks[joint].y * image_h,
                                        landmarks[joint].visibility]
                                       for joint in KEY_JOINTS])
                    # 人体框
                    box_l, box_r = int(joints[:, 0].min())-50, int(joints[:, 0].max())+50
                    box_t, box_b = int(joints[:, 1].min())-100, int(joints[:, 1].max())+100

                    self.joints_list.append(joints)

                    # 识别动作
                    action = ''
                    clr = (0, 255, 0)
                    # 30帧数据预测动作类型
                    if len(self.joints_list) == ACTION_MODEL_MAX_FRAMES:
                        pts = np.array(self.joints_list, dtype=np.float32)
                        out = self.action_model.predict(pts, (image_w, image_h))
                        #
                        index = out[0].argmax()
                        action_name = POSE_MAPPING[index]
                        cls = POSE_MAPPING_COLOR[index]
                        action = ': :.2f%'.format(action_name, out[0].max() * 100)
                        print(action)
                    # 绘制骨骼点和动作类别
                    image = self.draw_skeleton(image, self.joints_list[-1])
                    image = cv2.rectangle(image, (box_l, box_t), (box_r, box_b), (255, 0, 0), 1)
                    image = self.cv2_add_chinese_text(image, f'当前状态:action', (box_l + 10, box_t + 10), clr, 40)

                else:
                    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

                image = cv2.putText(image, f'FPS: int(1.0 / (time.time() - fps_time))',
                                    (50, 50), cv2.FONT_HERSHEY_PLAIN, 3, (0, 255, 0), 2)

                cv2.imshow('Pose', image)

                if cv2.waitKey(1) & 0xFF == ord("q"):
                    break


        cap.release()
        cv2.destroyAllWindows()

if __name__ == '__main__':
    FallDetection().detect()

stgcn 姿态评估

首先的话,他这个时空图神经网络,我是没有研究过的,这玩意就是啥呢,就是把pose传入然后一通运算,然后就可以得到一个动作以及所属类别,也就是说这玩意是一个分类的图网络。这部分的话我不是很熟悉,这是我的盲区,所以我这里就把这个当作黑盒处理了。那么同样的这部分代码也是直接在Github上面cv过来,然后集成到这个项目里面。

是的,算法的运用开发和我们正常的开发其实区别不大,重新训练任务只是调参,适当调整网络模型,以及训练数据即可,颠覆性的改动=重新设计算法。

这部分代码并不多,我就直接贴出来了:

按顺序从上到下


import torch
import torch.nn as nn
import torch.nn.functional as F
from stgcn.Utils import Graph


class GraphConvolution(nn.Module):
    """The basic module for applying a graph convolution.
    Args:
        - in_channel: (int) Number of channels in the input sequence data.
        - out_channels: (int) Number of channels produced by the convolution.
        - kernel_size: (int) Size of the graph convolving kernel.
        - t_kernel_size: (int) Size of the temporal convolving kernel.
        - t_stride: (int, optional) Stride of the temporal convolution. Default: 1
        - t_padding: (int, optional) Temporal zero-padding added to both sides of
            the input. Default: 0
        - t_dilation: (int, optional) Spacing between temporal kernel elements. Default: 1
        - bias: (bool, optional) If `True`, adds a learnable bias to the output.
            Default: `True`
    Shape:
        - Inputs x: Graph sequence in :math:`(N, in_channels, T_in, V)`,
                 A: Graph adjacency matrix in :math:`(K, V, V)`,
        - Output: Graph sequence out in :math:`(N, out_channels, T_out, V)`

            where
                :math:`N` is a batch size,
                :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`,
                :math:`T_in/T_out` is a length of input/output sequence,
                :math:`V` is the number of graph nodes.

    """
    def __init__(self, in_channels, out_channels, kernel_size,
                 t_kernel_size=1,
                 t_stride=1,
                 t_padding=0,
                 t_dilation=1,
                 bias=True):
        super().__init__()

        self.kernel_size = kernel_size
        self.conv = nn.Conv2d(in_channels,
                              out_channels * kernel_size,
                              kernel_size=(t_kernel_size, 1),
                              padding=(t_padding, 0),
                              stride=(t_stride, 1),
                              dilation=(t_dilation, 1),
                              bias=bias)

    def forward(self, x, A):
        x = self.conv(x)
        n, kc, t, v = x.size()
        x = x.view(n, self.kernel_size, kc//self.kernel_size, t, v)
        x = torch.einsum('nkctv,kvw->nctw', (x, A))

        return x.contiguous()


class st_gcn(nn.Module):
    """Applies a spatial temporal graph convolution over an input graph sequence.
    Args:
        - in_channels: (int) Number of channels in the input sequence data.
        - out_channels: (int) Number of channels produced by the convolution.
        - kernel_size: (tuple) Size of the temporal convolving kernel and
            graph convolving kernel.
        - stride: (int, optional) Stride of the temporal convolution. Default: 1
        - dropout: (int, optional) Dropout rate of the final output. Default: 0
        - residual: (bool, optional) If `True`, applies a residual mechanism.
            Default: `True`
    Shape:
        - Inputs x: Graph sequence in :math: `(N, in_channels, T_in, V)`,
                 A: Graph Adjecency matrix in :math: `(K, V, V)`,
        - Output: Graph sequence out in :math: `(N, out_channels, T_out, V)`
            where
                :math:`N` is a batch size,
                :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`,
                :math:`T_in/T_out` is a length of input/output sequence,
                :math:`V` is the number of graph nodes.
    """
    def __init__(self, in_channels, out_channels, kernel_size,
                 stride=1,
                 dropout=0,
                 residual=True):
        super().__init__()
        assert len(kernel_size) == 2
        assert kernel_size[0] % 2 == 1

        padding = ((kernel_size[0] - 1) // 2, 0)

        self.gcn = GraphConvolution(in_channels, out_channels, kernel_size[1])
        self.tcn = nn.Sequential(nn.BatchNorm2d(out_channels),
                                 nn.ReLU(inplace=True),
                                 nn.Conv2d(out_channels,
                                           out_channels,
                                           (kernel_size[0], 1),
                                       

从图(graph)到图卷积(graphconvolution):漫谈图神经网络模型(代码片段)

...于图神经网络的系列文章,文章目录如下:从图(Graph)到图卷积(GraphConvolution):漫谈图神经网络模型(一)从图(Graph)到图卷积(GraphConvolution):漫谈图神经网络模型(二)从图(Graph)到图卷积(GraphConvolution):漫谈图神经网络模型(三)笔者最... 查看详情

从图(graph)到图卷积(graphconvolution):漫谈图神经网络模型

...于图神经网络的系列文章,文章目录如下:从图(Graph)到图卷积(GraphConvolution):漫谈图神经网络模型(一)从图(Graph)到图卷积(GraphConvolution):漫谈图神经网络模型(二)从图(Graph)到图卷积(GraphConvolution):漫谈图神经网络模型(三)在上一... 查看详情

基于图卷积网络的图深度学习

基于图卷积网络的图深度学习先简单回顾一下,深度学习到底干成功了哪些事情!深度学习近些年在语音识别,图片识别,自然语音处理等领域可谓是屡建奇功。ImageNet:是一个计算机视觉系统识别项目,是目前世界上图像识别... 查看详情

如何使用图卷积网络对图进行深度学习(代码片段)

文章目录简介什么是图卷积网络?一个简单的传播规则存在的问题添加自循环规范化特征表示把它放在一起加回权重添加激活函数回到现实扎卡里的空手道俱乐部构建GCN结论简介由于高度复杂但信息丰富的图结构,图上... 查看详情

带你换个角度理解图卷积网络

摘要:本文带大家从另一个角度来理解和认识图卷积网络的概念。本文分享自华为云社区《技术综述十二:图网络的基本概念》,原文作者:一笑倾城。基础概念笔者认为,图的核心思想是学习一个函数映射f(... 查看详情

深度学习100例|第52天-图卷积神经网络(gcn):实现论文分类

查看详情

图卷积网络gcn

GCNCNN中的卷积本质上就是共享参数的过滤器,可以较为有效地提取空间特征而很多其他的研究中还有很多非欧拉结构的数据1.CNN无法处理非欧拉结构的数据,传统的离散卷积在NonEuclideanStructure的数据上无法保持平移不变性... 查看详情

深入浅出图神经网络|gnn原理解析☄学习笔记图信号处理与图卷积神经网络(代码片段)

...网络|GNN原理解析☄学习笔记(五)图信号处理与图卷积神经网络文章目录深入浅出图神经网络|GNN原理解析☄学习笔记(五)图信号处理与图卷积神经网络矩阵乘法的三种形式图信号与图的拉普拉斯矩阵图傅里叶... 查看详情

深度学习与图神经网络核心技术实践应用高级研修班-day3图神经网络(gnn)(代码片段)

...2图神经网络的思想和工作原理1.3图神经网络的应用场景2.图卷积神经网络2.1图卷积神经网络的定义2.2图卷积神经网络的卷积方式2.3卷积神经网络与图卷积神经网络的区别2.4图卷积神经网络的卷积方式2.5图卷积神经网络的原理与理... 查看详情

gcn-图卷积神经网络算法简单实现(含python代码)(代码片段)

...程讲解三、代码实现和结果分析1.导入包2.数据准备¶3. 图卷积层定义4.GC 查看详情

腾讯技术工程|腾讯ailab11篇论文精选:图像描述nmt模型图卷积神经网络等

...计算成本的预测表现、NMT模型中的特定翻译问题、自适应图卷积神经网络、DNN面对对抗样本的优化问题等,本文精选了11篇论文的精彩内容。(本文转自公众号新智元)1.学习用于图像描述的引导解码(LearningtoGuideDecodingforImageCapt... 查看详情

图卷积神经网络gcn的一些理解以及dgl代码实例的一些讲解(代码片段)

...f0c;因此,图神经网络的学习也是必不可少的。GCNGCN是图卷积神经网络, 查看详情

gcn图卷积网络入门详解

...再深入了解它背后的数学原理。字幕组双语原文:【GCN】图卷积网络(GCN)入门详解英语原文:GraphConvolutionalNetworks(GCN)翻译:听风1996、大表哥许多问题的本质上都是图。在我们的世界里,我们看到很多数据都是图,比如分子、社... 查看详情

深度学习100例|第52天-图卷积神经网络(gcn):实现论文分类(代码片段)

文章目录一、GCN是什么二、数据集-CoraDataset1.数据集介绍2.准备数据三、划分训练集、测试集和验证集四、模型训练1.Loss计算2.训练模型3.结果可视化五、同系列作品🚀我的环境:语言环境:Python3.6.5编译器:jupytern... 查看详情

可解释高效的异质图卷积神经网络

...:https://github.com/kepsail/ie-HGCN1.引言目前面向异质图的图卷积神经网络普遍存在两个重要的不足:(1)大部分已有工作依赖用户人工输入一系列任务相关的元路径(Meta-path),这对于没有专业知识的用户... 查看详情

图卷积神经网络(gcn)综述与实现(pytorch版)(代码片段)

图卷积神经网络(GCN)综述与实现(PyTorch版)本文的实验环境为PyTorch=1.11.0+cu113,PyG=2.0.4,相关依赖库和数据集的下载请见链接。一、图卷积神经网络介绍1.1传统图像卷积卷积神经网络中的卷积(Convolution)指的是... 查看详情

网络特征处理基于图神经网络

...图卷积网络GCN图注意力网络GAN图自编码器GA图生成网络图时空网络GSN图论我记得运筹学课本有一章节是讲过图论的。G=(V,E),V是土中节点的集合,E是边的集合,一个图中的节点个数为N.图也可以用矩阵表示:邻接矩阵A:用来表示节... 查看详情

论文盘点:基于图卷积gnn的多目标跟踪算法解析

论文盘点:基于图卷积GNN的多目标跟踪算法解析Source:PaperWeekly  [1]JiangX,LiP,LiY,etal.GraphNeuralBasedEnd-to-endDataAssociationFrameworkforOnlineMultiple-ObjectTracking[J].arXivpreprintarXiv:1907.05315,2019.&nb 查看详情