@inproceedings{c8acf095eea0400596bf25f2a562223a,
title = "Image Captioning Algorithm Based on Sufficient Visual Information and Text Information",
abstract = "Most existing attention-based methods on image captioning focus on the current visual information and text information at each step to generate the next word, without considering the coherence between the visual information and the text information itself. We propose sufficient visual information (SVI) module to supplement the existing visual information contained in the network, and propose sufficient text information (STI) module to predict more text Words to supplement the text information contained in the network. Sufficient visual information module embed the attention value from the past two steps into the current attention to adapt to human visual coherence. Sufficient text information module can predict the next three words in one step, and jointly use their probabilities for inference. Finally, this paper combines these two modules to form an image captioning algorithm based on sufficient visual information and text information model (SVITI) to further integrate existing visual information and future text information in the network, thereby improving the image captioning performance of the model. These three methods are used in the classic image captioning algorithm, and have achieved achieve significant performance improvement compared to the latest method on the MS COCO dataset.",
keywords = "Image captioning, Sufficient text information, Sufficient visual information",
author = "Yongqiang Zhao and Yuan Rao and Lianwei Wu and Cong Feng",
note = "Publisher Copyright: {\textcopyright} 2020, Springer Nature Switzerland AG.; 27th International Conference on Neural Information Processing, ICONIP 2020 ; Conference date: 18-11-2020 Through 22-11-2020",
year = "2020",
doi = "10.1007/978-3-030-63823-8\_69",
language = "英语",
isbn = "9783030638221",
series = "Communications in Computer and Information Science",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "607--615",
editor = "Haiqin Yang and Kitsuchart Pasupa and Leung, \{Andrew Chi-Sing\} and Kwok, \{James T.\} and Chan, \{Jonathan H.\} and Irwin King",
booktitle = "Neural Information Processing - 27th International Conference, ICONIP 2020, Proceedings",
}