@inproceedings{321b86288dad446a8103c33f8282976a,
title = "An AutoEncoder-based Numerical Training Data Augmentation Technique",
abstract = "This paper aims to automatically augment numerical tabular data by using the variational autoencoder model. For this, we try to solve the problem of class imbalance in numerical data and to improve the performance of the classification model by augmenting the training data. In this paper, we propose a new augmentation technique called 'D-VAE' which performs data augmentation through variational autoencoder with discretization for numerical columuns; D-VAE artificially increases the number of records and the number of columns for a given tabular data. The main features of the proposed technique are to kperform discretization and feature selection in the preprocessing process. For the discretization process, we use k-means algorithm, through which records within a given table are grouped, and then converted into one-hot vectors according to the clustering results. In addition, for memory efficiency, we reduced the number of parameters of the VAE model by using a relatively small number of features through feature selection called REFCV. To evaluate the performance of the proposed technique, we conducted various experiments by numerical data augmentation ratio using four open datasets.",
keywords = "Autoencoder, Data Augmentation, Deep learning, Tabular data, VAE",
author = "Jueun Jeong and Hanseok Jeong and Kim, {Han Joon}",
note = "Publisher Copyright: {\textcopyright} 2022 IEEE.; 2022 IEEE International Conference on Big Data, Big Data 2022 ; Conference date: 17-12-2022 Through 20-12-2022",
year = "2022",
doi = "10.1109/BigData55660.2022.10020487",
language = "English",
series = "Proceedings - 2022 IEEE International Conference on Big Data, Big Data 2022",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "5944--5951",
editor = "Shusaku Tsumoto and Yukio Ohsawa and Lei Chen and {Van den Poel}, Dirk and Xiaohua Hu and Yoichi Motomura and Takuya Takagi and Lingfei Wu and Ying Xie and Akihiro Abe and Vijay Raghavan",
booktitle = "Proceedings - 2022 IEEE International Conference on Big Data, Big Data 2022",
address = "United States",
}