@inproceedings{ac7700da78f64f1ab93df22a6da036f5,
title = "MMGLOTS: Multi-Modal Global-Local Transformer Segmentor for Remote Sensing Image Segmentation",
abstract = "Multi-modal semantic segmentation of remote sensing (RS) images is a challenging task due to the complex relationship between different modalities and the large intra-class variance of objects in RS images. Existing semantic segmentation methods can only utilize the information of a single modality, which is not sufficient to obtain accurate segmentation results. To address this problem, in this paper, a novel multimodal global-local transformer segmentor (MMGLOTS) is proposed to cope with the multi-modal semantic segmentation task. Specifically, the semantic features of each modality are extracted by the multi-modal semantic feature extractor (MMSFE) with an adaptive fusion strategy. Then, the features are aggregated, and deep representations of both local and global dependencies are obtained by the global-local transformer (GLT). The final prediction is obtained by progressively restoring the deep representations with a prediction restorer (PR). Extensive experiments on two multi-modal semantic segmentation datasets show that our method achieves superior performance and the proposed method achieves the first place on the newly held Cross-City Multi-modal Semantic Segmentation Challenge 2023.",
keywords = "Global-local, Multi-modal, Semantic segmentation, Transformer",
author = "Yuheng Liu and Ye Wang and Yifan Zhang and Shaohui Mei",
note = "Publisher Copyright: {\textcopyright} 2023 IEEE.; 13th Workshop on Hyperspectral Imaging and Signal Processing: Evolution in Remote Sensing, WHISPERS 2023 ; Conference date: 31-10-2023 Through 02-11-2023",
year = "2023",
doi = "10.1109/WHISPERS61460.2023.10431036",
language = "英语",
series = "Workshop on Hyperspectral Image and Signal Processing, Evolution in Remote Sensing",
publisher = "IEEE Computer Society",
booktitle = "2023 13th Workshop on Hyperspectral Imaging and Signal Processing",
}