@article{26477d15fe024a21a59a94e1a256e5d4,
title = "A review of medical image data augmentation techniques for deep learning applications",
abstract = "Research in artificial intelligence for radiology and radiotherapy has recently become increasingly reliant on the use of deep learning-based algorithms. While the performance of the models which these algorithms produce can significantly outperform more traditional machine learning methods, they do rely on larger datasets being available for training. To address this issue, data augmentation has become a popular method for increasing the size of a training dataset, particularly in fields where large datasets aren{\textquoteright}t typically available, which is often the case when working with medical images. Data augmentation aims to generate additional data which is used to train the model and has been shown to improve performance when validated on a separate unseen dataset. This approach has become commonplace so to help understand the types of data augmentation techniques used in state-of-the-art deep learning models, we conducted a systematic review of the literature where data augmentation was utilised on medical images (limited to CT and MRI) to train a deep learning model. Articles were categorised into basic, deformable, deep learning or other data augmentation techniques. As artificial intelligence models trained using augmented data make their way into the clinic, this review aims to give an insight to these techniques and confidence in the validity of the models produced.",
keywords = "CT, data augmentation, deep learning, medical imaging, MRI",
author = "Phillip Chlap and Hang Min and Nym Vandenberg and Jason Dowling and Lois Holloway and Annette Haworth",
note = "Funding Information: Data augmentation of medical images is becoming commonplace for deep learning applications. A suite of basic techniques (e.g. scaling, rotation and flipping of images) are widely used in model design, and as more advanced techniques, such as deformable augmentation (e.g. randomised displacement fields) and deep learning-based approaches (e.g. GAN) are refined, they are becoming more widely utilised in model design. While realism can be a goal for augmented data, it is not always a necessity as unrealistic augmentations may still produce a more generalisable model. Data augmentation is particularly advantageous in scenarios where there is insufficient training data available, to help correct overfitting of a model. The key factor in assessing the performance of a deep learning model is its results on a test set of real unseen data. If this test set represents a population sufficiently, then confidence can be had in the assessment of the model performance and the training process of the model, including any data augmentation that may have been performed. We would like to thank Dr. Jesmin Shafiq and the University of New South Wales library staff for their advice and guidance in conducting the systematic review. We would also like to thank Mr. Siyu Liu from the University of Queensland for providing the images in Figure?6 of using GAN to generate synthetic images from semantic labels. Phillip Chlap is supported by an Australian Government Research Training Program (RTP) Scholarship. Publisher Copyright: {\textcopyright} 2021 The Royal Australian and New Zealand College of Radiologists",
year = "2021",
month = aug,
doi = "10.1111/1754-9485.13261",
language = "English",
volume = "65",
pages = "545--563",
journal = "Journal of Medical Imaging and Radiation Oncology",
issn = "1754-9477",
publisher = "Wiley-Blackwell",
number = "5",
}