@misc{17337, author = {Alexander Pishva and Vajira Thambawita and Jim Torresen and Steven Hicks}, title = {RePolyp: A Framework for Generating Realistic Colon Polyps with Corresponding Segmentation Masks using Diffusion Models}, abstract = {The field of synthetic medical data has become increasingly important due to the urgent need for large and diverse datasets in the medical sector. Using diffusion models in data generation has created more authentic and varied medical data. In this study, a framework is presented that utilizes diffusion models trained on openly accessible data to generate realistic-looking colon polyps, along with their corresponding ground truth masks. The usefulness of the synthetic polyps is evaluated by using them to train segmentation models designed to segment colon polyps in real-world images. The results demonstrate that the generated synthetic data is highly accurate and suggest that including synthetic polyps in the training dataset improves the predictive performance and generalization of the segmentation models. When the training dataset consists of pre-generated synthetic data from our model, we achieve a mean intersection over union (mIoU) improvement of 4.64\% on the validation data and a 4.14\% mIoU improvement when testing across different datasets. These results indicate that generating synthetic medical data using diffusion models is valuable for addressing the need for diverse and extensive medical datasets.}, year = {2023}, journal = {2023 IEEE 36th International Symposium on Computer-Based Medical Systems (CBMS)}, pages = {47-52}, publisher = {IEEE}, doi = {https://doi.org/10.1109/CBMS58004.2023.00190}, }