@inproceedings{75afe60cd4ef41b88c663c4434c6dc15,
title = "Spot the Difference: Difference Visual Question Answering with Residual Alignment",
abstract = "Difference Visual Question Answering (DiffVQA) introduces a new task aimed at understanding and responding to questions regarding the disparities observed between two images. Unlike traditional medical VQA tasks, DiffVQA closely mirrors the diagnostic procedures of radiologists, who frequently conduct longitudinal comparisons of images taken at different time points for a given patient. This task accentuates the discrepancies between images captured at distinct temporal intervals. To better address the variations, this paper proposes a novel Residual Alignment model (ReAl) tailored for DiffVQA. ReAl is designed to produce flexible and accurate answers by analyzing the discrepancies in chest X-ray images of the same patient across different time points. Compared to the previous method, ReAl additionally aid a residual input branch, where the residual of two images is fed into this branch. Additionally, a Residual Feature Alignment (RFA) module is introduced to ensure that ReAl effectively captures and learns the disparities between corresponding images. Experimental evaluations conducted on the MIMIC-Diff-VQA dataset demonstrate the superiority of ReAl over previous state-of-the-art methods, consistently achieving better performance. Ablation experiments further validate the effectiveness of the RFA module in enhancing the model{\textquoteright}s attention to differences. The code implementation of the proposed approach will be made available.",
keywords = "Diffenence VQA, Generative model, Residual feature alignment",
author = "Zilin Lu and Yutong Xie and Qingjie Zeng and Mengkang Lu and Qi Wu and Yong Xia",
note = "Publisher Copyright: {\textcopyright} The Author(s), under exclusive license to Springer Nature Switzerland AG 2024.; 27th International Conference on Medical Image Computing and Computer-Assisted Intervention, MICCAI 2024 ; Conference date: 06-10-2024 Through 10-10-2024",
year = "2024",
doi = "10.1007/978-3-031-72086-4_61",
language = "英语",
isbn = "9783031720857",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "649--658",
editor = "Linguraru, {Marius George} and Qi Dou and Aasa Feragen and Stamatia Giannarou and Ben Glocker and Karim Lekadir and Schnabel, {Julia A.}",
booktitle = "Medical Image Computing and Computer Assisted Intervention – MICCAI 2024 - 27th International Conference, Proceedings",
}