@misc{15118, keywords = {continuous deployment, problem identification, diagnosis, event log mining, event log analysis}, author = {Carl Rosenberg and Leon Moonen}, title = {On the Use of Automated Log Clustering to Support Effort Reduction in Continuous Engineering}, abstract = {Continuous engineering (CE) practices, such as continuous integrationand continuous deployment, have become key to modern softwaredevelopment. They are characterized by short automated build andtest cycles that give developers early feedback on potential issues.CE practices help to release software more frequently, and reducesrisk by increasing incrementality. However, effective use of CEpractices in industrial projects requires making sense of the vastamounts of data that results from the repeated build and test cycles.The goal of this paper is to investigate to what extent these datacan be treated more effectively by automatically grouping logs ofruns that failed for the same underlying reasons, and what effortreduction can be achieved. To this end, we replicate and extendearlier work on system log clustering to evaluate its efficacy inthe CE context, and to investigate the impact of five alternativelog vectorization techniques.We built a prototype tool that is used to conduct an empirical casestudy on continuous deployment logs provided by our industrialcollaborator. Questions to be answered include: (1) Can we reducethe effort needed to discover all latent issues in a set of failingruns? (2) How to best leverage the contrast between passing andfailing runs to increase accuracy? (3) What trade-offs are therebetween effort reduction and accuracy? We present a quantitativeand qualitative analysis of the results of our study. We concludeby evaluating the trade-offs, and give recommendations for applyingthis approach in practice.}, year = {2018}, journal = {25th Asia-Pacific Software Engineering Conference (APSEC 2018)}, pages = {179-188}, month = {12/2018}, publisher = {IEEE}, }