@article{18198, author = {Birk Torpmann-Hagen and Michael Riegler and P{\r a}l Halvorsen and Dag Johansen}, title = {Runtime Verification for Visual Deep Learning Systems With Loss Prediction}, abstract = {Deep neural networks often demonstrate remarkable performance on benchmarks, but have been shown to readily fail once they leave lab-conditions and are deployed in real-world conditions. This can be attributed to their sensitivity to minor changes in the nature of the data, typically referred to as distributional shifts. To address this, a growing body of work has emerged implementing distributional shift detectors, a form of runtime verification intended to detect when a neural network encounters data for which it is unlikely to predict correctly. These methods generally involve extracting features which characterize the degree to which a given point is out-of-distribution and setting a suitable threshold for this feature which sufficiently separates in-distribution and out-of-distribution data. In this paper, we advocate for a significant methodological shift from framing the detection of distributional-shift as a classification problem to framing it as a regression problem. We first justify this view through an empirical analysis of several popular methods of distributional shift detection, where we show that optimal thresholds for one type and severity of shift does not necessarily generalize to other shifts and that classification generally lacks the granularity and expressivity for use as a form of runtime verification in deployment scenarios. We then show that the relationships between the features these classifiers are based upon and network loss are often fairly strong. Motivated by these results, we perform a feasibility study wherein we train a generalized linear model on synthetically-augmented data, which we assess on instances of organic distributional shift in several domain-adaptation datasets. We attain promising results, with for instance, an average percentage error of 5\% when predicting network loss for a polyp segmentation benchmark, equivalent to predicting intersection-over-union scores with an average error of only 0.03. We discuss the implic...}, year = {2025}, journal = {IEEE Access}, publisher = {IEEE}, url = {https://ieeexplore.ieee.org/document/10930478}, doi = {10.1109/ACCESS.2025.3552157}, }