@inproceedings{a4cee65bac7b43649a7aebd42b7c94b3,
title = "KU-DMIS at EHRSQL 2024: Generating SQL query via question templatization in EHR",
abstract = "Transforming natural language questions into SQL queries is crucial for precise data retrieval from electronic health record (EHR) databases. A significant challenge in this process is detecting and rejecting unanswerable questions that request information beyond the database{\textquoteright}s scope or exceed the system{\textquoteright}s capabilities. In this paper, we introduce a novel text-to-SQL framework that robustly handles out-of-domain questions and verifies the generated queries with query execution. Our framework begins by standardizing the structure of questions into a templated format. We use a powerful large language model (LLM), fine-tuned GPT-3.5 with detailed prompts involving the table schemas of the EHR database system. Our experimental results demonstrate the effectiveness of our framework on the EHRSQL-2024 benchmark benchmark, a shared task in the ClinicalNLP workshop. Although a straightforward fine-tuning of GPT shows promising results on the development set, it struggled with the out-of-domain questions in the test set. With our framework, we improve our system{\textquoteright}s adaptability and achieve competitive performances in the official leaderboard of the EHRSQL-2024 challenge.",
author = "Hajung Kim and Chanhwi Kim and Hoonick Lee and Kyochul Jang and Jiwoo Lee and Kyungjae Lee and Gangwoo Kim and Jaewoo Kang",
note = "Publisher Copyright: {\textcopyright} 2024 Association for Computational Linguistics.; 6th Workshop on Clinical Natural Language Processing, ClinicalNLP 2024, held at NAACL 2024 ; Conference date: 21-06-2024",
year = "2024",
language = "English",
series = "ClinicalNLP 2024 - 6th Workshop on Clinical Natural Language Processing, Proceedings of the Workshop",
publisher = "Association for Computational Linguistics (ACL)",
pages = "672--686",
editor = "Tristan Naumann and Abacha, \{Asma Ben\} and Steven Bethard and Kirk Roberts and Danielle Bitterman",
booktitle = "ClinicalNLP 2024 - 6th Workshop on Clinical Natural Language Processing, Proceedings of the Workshop",
address = "United States",
}