@inproceedings{384cd897411a40569937c5b8b1b97aae,
title = "Data Augmentation with ECAPA-TDNN Architecture for Automatic Speaker Recognition",
abstract = "This paper focuses on seven data augmentation methods based on the Emphasized Channel Attention Propagation and Aggregation-Time Delay Neural Network (ECAPA-TDNN) model for increasing the diversity of training data to improve model accuracy and true positive rate (TPR/recall). We propose a method to improve classification performance by replacing and reducing the datasets. We also verified the effect of the number of layers on the classification performance by modifying the number of layers of the SE-Res2Block in the ECAPA-TDNN model. The proposed method is validated with the ZhVoice and VoxCeleb datasets, and the results show that the best model accuracy and classification performance can be obtained by using ZhVoice with seven data augmentations on a 3-layer SE-Res2Block. The accuracy reached 0.9477, the TPR reached 0.8945, and the EER was 0.1278. We also used the diagonal cosine algorithm to determine the similarity between two speakers, validating the classification performance of the model.",
keywords = "ECAPA-TDNN, automatic speaker recognition, data augmentation",
author = "Pinyan Li and Hoi, {Lap Man} and Yapeng Wang and Im, {Sio Kei}",
note = "Publisher Copyright: {\textcopyright} 2023 IEEE.; 12th IEEE International Conference on Renewable Energy Research and Applications, ICRERA 2023 ; Conference date: 29-08-2023 Through 01-09-2023",
year = "2023",
doi = "10.1109/ICRERA59003.2023.10269366",
language = "English",
series = "12th IEEE International Conference on Renewable Energy Research and Applications, ICRERA 2023",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "414--420",
booktitle = "12th IEEE International Conference on Renewable Energy Research and Applications, ICRERA 2023",
address = "United States",
}