@inproceedings{bf90e19d1c604cda9a1a8e2b10edb8bf,
title = "A Transformer Architecture with Adaptive Attention for Fine-Grained Visual Classification",
abstract = "The fine-grained visual classification (FGVC) problem is to classify different subclasses in same superclass. Due to the similarity between subclasses, the problem requires capturing fine-grained discriminative features. Although current approaches are able to extract more fine-grained features by designing complex feature extraction modules, the excessive focus on discriminative features results in ignoring massive global feature information and reducing the ability of resisting background noise. This paper propose a transformer architecture based on vision transformer (ViT) with adaptive attention (TransAA). To optimize the attention of ViT, we design two modules. An attention-weakening module is designed to enforce the model to capture more feature information, and an attention-enhancement module is designed to enhance the extraction ability of the critical features. Otherwise, we introduce a sample weighting loss function in the training process to adaptively adjust both weakening and enhancement processes. The performance of the TransAA is demonstrated on three benchmark fine-grained datasets.",
keywords = "adaptive attention, fine-grained visual classification, vision transformer",
author = "Changli Cai and Tiankui Zhang and Zhewei Weng and Chunyan Feng and Yapeng Wang",
note = "Publisher Copyright: {\textcopyright} 2021 IEEE.; 7th International Conference on Computer and Communications, ICCC 2021 ; Conference date: 10-12-2021 Through 13-12-2021",
year = "2021",
doi = "10.1109/ICCC54389.2021.9674560",
language = "English",
series = "2021 7th International Conference on Computer and Communications, ICCC 2021",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "863--867",
booktitle = "2021 7th International Conference on Computer and Communications, ICCC 2021",
address = "United States",
}