@misc{9118, keywords = {Conference}, author = {Sagar Sen and Carlo Ieva and Dusica Marijan and Arnab Sarkar and Arnaud Gotlieb}, title = {Test Selection Based on Data Interactions in Data-Intensive Systems}, abstract = {Testing data-intensive systems is paramount to increase our reliance on information in e-governance, scientific/ medical research, and social networks. Common practice to test these systems is by using a live production database. This testing approach is space and time inefficient and lacks clarity about what test cases or scenarios are covered. In this paper, we leverage classification tree modelling to specify desired test cases as data interactions between a set of fields across multiple tables of an existing database. Our methodology and tool, DEPICT, uses test case specifications in classification tree models to (a) automatically derive a spanning tree representing a relationship between any set of fields for any given database schema (b) generates queries to create an efficient inner join between related tables in the spanning tree (c) extract records from various tables that satisfy data interactions in the classification tree model (d) discovers holes or unsatisfied test cases in the test databases. We perform experiments to show that our approach is fast and scalable to extract test databases. Our experiments are based on selecting test databases from 8000 declarations for 60,000 items from the Norwegian Customs and Excise information system TVINN.}, year = {2013}, number = {2013-03}, month = {10/2013}, publisher = {Simula Research Laboratory}, }