@inproceedings{7565cabf0c114ed0b078fe8d456c9cf2,
title = "DCasenet: An integrated pretrained deep neural network for detecting and classifying acoustic scenes and events",
abstract = "Although acoustic scenes and events include many related tasks, their combined detection and classification have been scarcely investigated. We propose three architectures of deep neural networks that are integrated to simultaneously perform acoustic scene classification, audio tagging, and sound event detection. The first two architectures are inspired by human cognitive processes. The first architecture resembles the short-term perception for scene classification of adults, who can detect various sound events that are then used to identify the acoustic scene. The second architecture resembles the long-term learning of babies, being also the concept underlying self-supervised learning. Babies first observe the effects of abstract notions such as gravity and then learn specific tasks using such perceptions. The third architecture adds a few layers to the second one that solely perform a single task before its corresponding output layer. The aim is to build an integrated system that can serve as a pretrained model to perform the three abovementioned tasks. Experiments on three datasets demonstrate that the proposed architecture, called DcaseNet, can be either directly used for any of the tasks while providing suitable results or fine-tuned to improve the performance of one task. The code and pretrained DcaseNet weights are available at https://github.com/Jungjee/DcaseNet.",
keywords = "Acoustic scene classification, Audio tagging, Deep neural networks, Sound event detection",
author = "Jung, \{Jee Weon\} and Shim, \{Hye Jin\} and Kim, \{Ju Ho\} and Yu, \{Ha Jin\}",
note = "Publisher Copyright: {\textcopyright}2021 IEEE; 2021 IEEE International Conference on Acoustics, Speech, and Signal Processing, ICASSP 2021 ; Conference date: 06-06-2021 Through 11-06-2021",
year = "2021",
doi = "10.1109/ICASSP39728.2021.9414406",
language = "English",
series = "ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "621--625",
booktitle = "ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings",
address = "United States",
}