@inproceedings{245cee0849ca4298b4715f2e729f979b,
title = "Tensor Space Model-based Textual Data Augmentation for Text Classification",
abstract = "In this paper, we first introduce a new text representation method to convert a textual document into a tensor space model named TextCuboid, which can preserve various meanings of polysemy. Based upon the new model, we propose two novel data augmentation techniques (called Boolean augmentation and CuboidGAN) that can be directly applied to the TextCuboid model for text classification tasks. Boolean augmentation includes three simple keyword modifications: synonym replacement, synonym insertion, and random deletion. CuboidGAN is composed of two key components, style encoding, and residual regression, and it is trained in two phases to generate unambiguous and plausible concept vectors. Through intensive experiments using five commonly used datasets, we prove that our proposed methods perform better data augmentation than other conventional methods. We also show that each augmentation method component significantly contributes to text classification through ablation studies.",
keywords = "Autoencoder, Data Augmentation, Deep Learning, Generative Adversarial Networks, Tensor Space Model, Text Classification, Text Representation Model",
author = "Minsuk Chang and Kim, {Han Joon}",
note = "Publisher Copyright: {\textcopyright} 2023 IEEE.; 2023 IEEE International Conference on Big Data, BigData 2023 ; Conference date: 15-12-2023 Through 18-12-2023",
year = "2023",
doi = "10.1109/BigData59044.2023.10386629",
language = "English",
series = "Proceedings - 2023 IEEE International Conference on Big Data, BigData 2023",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "4276--4283",
editor = "Jingrui He and Themis Palpanas and Xiaohua Hu and Alfredo Cuzzocrea and Dejing Dou and Dominik Slezak and Wei Wang and Aleksandra Gruca and Lin, {Jerry Chun-Wei} and Rakesh Agrawal",
booktitle = "Proceedings - 2023 IEEE International Conference on Big Data, BigData 2023",
address = "United States",
}