@inproceedings{0015c2b3011e425bb4a009ceac21f4a8,
title = "HM-CONFORMER: A CONFORMER-BASED AUDIO DEEPFAKE DETECTION SYSTEM WITH HIERARCHICAL POOLING AND MULTI-LEVEL CLASSIFICATION TOKEN AGGREGATION METHODS",
abstract = "Audio deepfake detection (ADD) is the task of detecting spoofing attacks generated by text-to-speech or voice conversion systems. Spoofing evidence, which helps to distinguish between spoofed and bona-fide utterances, might exist either locally or globally in the input features. To capture these, the Conformer, which consists of Transformers and CNN, possesses a suitable structure. However, since the Conformer was designed for sequence-to-sequence tasks, its direct application to ADD tasks may be sub-optimal. To tackle this limitation, we propose HM-Conformer by adopting two components: (1) Hierarchical pooling method progressively reducing the sequence length to eliminate duplicated information (2) Multi-level classification token aggregation method utilizing classification tokens to gather information from different blocks. Owing to these components, HM-Conformer can efficiently detect spoofing evidence by processing various sequence lengths and aggregating them. In experimental results on the ASVspoof 2021 Deepfake dataset, HM-Conformer achieved a 15.71\% EER, showing competitive performance compared to recent systems.",
keywords = "Anti-spoofing, Audio deepfake detection, Conformer, Hierarchical pooling, Multi-level classification token aggregation",
author = "Shin, \{Hyun Seo\} and Jungwoo Heo and Kim, \{Ju Ho\} and Lim, \{Chan Yeong\} and Wonbin Kim and Yu, \{Ha Jin\}",
note = "Publisher Copyright: {\textcopyright} 2024 IEEE.; 2024 IEEE International Conference on Acoustics, Speech, and Signal Processing, ICASSP 2024 ; Conference date: 14-04-2024 Through 19-04-2024",
year = "2024",
doi = "10.1109/ICASSP48485.2024.10448453",
language = "English",
series = "ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "10581--10585",
booktitle = "ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings",
address = "United States",
}