@inproceedings{406ef29da7a64351820945ba960a0f50,
title = "Spatio-Temporal Synergy with ViT: Enhancing Collaborative Perception and Object Detection for Heterogeneous Agents",
abstract = "To address the limitations of traditional heterogeneous agent cooperative sensing methods in terms of feedback latency and spatiotemporal dependencies, this paper proposes a heterogeneous agents enhancing cooperative perception and object detection system. The system is based on the Vision Transformer (ViT) model, leveraging its superior global context awareness and multimodal data fusion capabilities. Additionally, it incorporates the proposed adaptive delay position sensing module and spatiotemporal dependency dynamic modeling module, effectively resolving issues related to data transmission latency and complex spatiotemporal dependencies between agents. This significantly enhances the accuracy and timeliness of heterogeneous multi-agent collaborative sensing systems.",
keywords = "Cooperative perception, Deep learning, Heterogeneous agents, Vision transformer",
author = "Yuan Gao and Sicong Liu and Xiangrui Xu and Zhiyang Ding and Bin Guo and Zhiwen Yu",
note = "Publisher Copyright: {\textcopyright} 2024 ACM.; 1st ACM International Workshop on Resource-efficient Mobile and Embedded LLM System in AIoT, RMELS 2024 ; Conference date: 04-11-2024",
year = "2024",
month = nov,
day = "4",
doi = "10.1145/3698383.3699621",
language = "英语",
series = "RMELS 2024 - Proceedings of the 1st ACM International Workshop on Resource-efficient Mobile and Embedded LLM System in AIoT, Part of: ACM Sensys 2024",
publisher = "Association for Computing Machinery, Inc",
pages = "3--5",
booktitle = "RMELS 2024 - Proceedings of the 1st ACM International Workshop on Resource-efficient Mobile and Embedded LLM System in AIoT, Part of",
}