@article{13417, author = {Sagar Sen and Dusica Marijan and Carlo Ieva and Astrid Grime and Atle Sander}, title = {Modelling and Verifying Combinatorial Interactions to Test Data Intensive Systems: Experience with Optimal Archiving at the Norwegian Customs and Excise Directorate}, abstract = {Testing data-intensive systems is paramount to increaseour reliance on information processed in e-governance,scientific/medical research, and social networks. Data accrued inthese systems often go through several manual and computationalsteps involving human inputs in interactive media and complexbatch appications that aim to ensure high quality of data interms of validity, correctness, and adherence to business rules. Acommon industrial practice in testing data-intensive systems isto extract test databases from live production streams and verifythe data in them through a checklist of requirements eitherby tedious manual observation or by executing complex SQLqueries composed and understood by very few domain experts.We elevate the specification of such requirements on data bymodelling data interactions between fields cross-cutting the testdatabase\’s schema. These interactions are modelled as test casesin a classification tree model. The model documents intuitiveexpert knowledge about what to expect in the test databaseand is given executable semantics using our human-in-the-looptool DEPICT. DEPICT verifies if interactions occurred or notin systematically extracted test databases. Non-occurrence ofexpected interactions or occurrence of unexpected interactionsindicate faults in the data. We present experiences on how ourmodel-driven approach has been successfully applied to verifytest databases in the Norwegian Public Sector. In particular, wepresent case studies at (1) the Norwegian Customs and ExciseDirectorate for verifying the adherence to customs regulationsand (2) the Cancer Registry of Norway to verify its data qualitymanagement process involving both human coders and complexlegacy batches.}, year = {2016}, journal = {IEEE Transaction on Reliability}, pages = {1-14}, publisher = {IEEE}, }