@inproceedings{ec2a9d0cd69a49abb5cf7cc678c97494,
title = "Beyond Vision: A Semantic Reasoning Enhanced Model for Gesture Recognition with Improved Spatiotemporal Capacity",
abstract = "Gesture recognition is an imperative and practical problem owing to its great application potential. Although recent works have made great progress in this field, there also exist three non-negligible problems: 1) existing works lack efficient temporal modeling ability; 2) existing works lack effective spatial attention capacity; 3) most works only focus on the visual information, without considering the semantic relationship between different classes. To tackle the first problem, we propose a Long and Short-term Temporal Shift Module (LS-TSM). It extends the original TSM and expands the step size of shift operation to model long-term and short-term temporal information simultaneously. For the second problem, we expect to focus on the spatial area where the change of hand mainly occurs. Therefore, we propose a Spatial Attention Module (SAM) which utilizes the RGB difference between frames to get a spatial attention mask to assign different weights to different spatial positions. As for the last, we propose a Label Relation Module (LRM) which can take full advantage of the relationship among classes based on their labels{\textquoteright} semantic information. With the proposed modules, our work achieves the state-of-the-art performance on two commonly used gesture datasets, i.e., EgoGesture and NVGesture datasets. Extensive experiments demonstrate the effectiveness of our proposed modules.",
keywords = "Gesture recognition, Semantic relation, Spatial attention, Temporal modeling",
author = "Yizhe Wang and Congqi Cao and Yanning Zhang",
note = "Publisher Copyright: {\textcopyright} The Author(s), under exclusive license to Springer Nature Switzerland AG 2022.; 5th Chinese Conference on Pattern Recognition and Computer Vision, PRCV 2022 ; Conference date: 04-11-2022 Through 07-11-2022",
year = "2022",
doi = "10.1007/978-3-031-18913-5\_33",
language = "英语",
isbn = "9783031189128",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "420--434",
editor = "Shiqi Yu and Jianguo Zhang and Zhaoxiang Zhang and Tieniu Tan and Yuen, \{Pong C.\} and Yike Guo and Junwei Han and Jianhuang Lai",
booktitle = "Pattern Recognition and Computer Vision - 5th Chinese Conference, PRCV 2022, Proceedings",
}