@article{17905,
  author = {Andrea Stor{\r a}s and Steffen M{\ae}land and Jonas Isaksen and Steven Hicks and Vajira Thambawita and Claus Graff and Hugo Hammer and P{\r a}l Halvorsen and Michael Riegler and J{\o}rgen Kanters},
  title = {Evaluating gradient-based explanation methods for neural network ECG analysis using heatmaps},
  abstract = {Objective: Evaluate popular explanation methods using heatmap visualizations to explain the predictions of deep neural networks for electrocardiogram (ECG) analysis and provide recommendations for selection of explanations methods.

Materials and Methods: A residual deep neural network was trained on ECGs to predict intervals and amplitudes. Nine commonly used explanation methods (Saliency, Deconvolution, Guided backpropagation, Gradient SHAP, SmoothGrad, Input {\texttimes} gradient, DeepLIFT, Integrated gradients, GradCAM) were qualitatively evaluated by medical experts and objectively evaluated using a perturbation-based method.

Results: No single explanation method consistently outperformed the other methods, but some methods were clearly inferior. We found considerable disagreement between the human expert evaluation and the objective evaluation by perturbation.

Discussion: The best explanation method depended on the ECG measure. To ensure that future explanations of deep neural networks for medical data analyses are useful to medical experts, data scientists developing new explanation methods should collaborate tightly with domain experts. Because there is no explanation method that performs best in all use cases, several methods should be applied.

Conclusion: Several explanation methods should be used to determine the most suitable approach.},
  year = {2025},
  journal = {JAMIA},
  volume = {32},
  pages = {79-88},
  publisher = {Oxford Press},
}