@phdthesis{17846, author = {Jan Evang and Haakon Bryhni and Foivos Michelinakis and Olav Lysne}, title = {Resilient Networks for Critical Services}, abstract = {Network services play a pivotal role in today{\textquoteright}s society, serving the needs of businesses, governments and for individuals in their daily life. While we often take the seamless functionality of the Internet for granted, its growing use by Critical Services underscores the escalating importance of comprehending both resilience and security challenges. The interconnected networks that make up the Internet are operated by various actors such as enterprises, governmental agencies, and content delivery networks (CDNs), in addition to global and local Internet Service Providers (ISPs). Maintaining these network services is a complex task. Numerous components could potentially disrupt the service, and it is essential for network service operators to understand the risks associated with each component. Network security encompasses three fundamental objectives: Confidentiality, Integrity, and Availability [1]. Confidentiality and integrity are often addressed together, due to shared common attack vectors and mitigation solutions. Ensuring availability, however, presents a distinctive challenge. The primary focus of availability is to guarantee that the network service remains operational and usable. Although breaches in confidentiality and integrity can have indirect effects on availability, the nature of risk mitigation strategies differs significantly. In this setting, resilience and redundancy are central concepts. Together, the papers in this thesis analyse the complete risk landscape applicable to delivering a resilient network for critical services. A majority of the research is performed on the Media Network Services (MNS) global video conferencing network, chosen for its relevance to risk management, and the applicability of results to other network operators. Papers I and VI use 18 months of measurement data to analyse the root causes of network outages, revealing that the most important outages stem from leased Internet links, physical faults, and human errors. In contrast, relatively few are attributed to local network faults or malicious attacks. This insight into the root causes serves as a foundational understanding for subsequent analyses. Paper II presents 5 years of risk registry data highlighting the role of management standards like ISO27001 in risk reduction, showcasing their efficacy in fostering a robust risk management framework across various organizational levels. Paper III delves into the intricate domain of Internet risks, demonstrating effective mitigation strategies to enhance network resilience against outages, packet loss and high latency originating from the Internet. Paper IV establishes a co-variation between organisations{\textquoteright} security implementations and adherence to two security standards, Mutually Agreed Norms for Routing Security (MANRS) and ISO27001. By verifying Resource Public Key Infrastructure (RPKI) participation, IP spoofer protection, and Internet risk scores for organizations adhering to MANRS and/or ISO27001, we demonstrate that a security-aware company culture is connected to better security practices. Recent paradigm-shifting incidents like COVID-19 and the Russian incursion into Ukraine demonstrate the importance of considering governance risks. Paper VII extends the scope to encompass national governance risks, specifically the high dependency of national web services on foreign micro services and cloud services, highlighting the imperative of considering broader contextual factors. Drawing from the collective insights of these papers, combining the theoretical analyses with experiments on an operational network and real-life experiences, Paper V emerges as a synthesis, proposing an innovative cohesive 10-layer model that pragmatically organizes identified risks. This model stands as a testament to the integration of empirical findings into a practical framework, and the results can be generalized to a range of different networks. By utilizing the 10-layer model, network operators will reduce their availability risk and deliver a higher quality service to their customers.}, year = {2024}, journal = {Oslo Metropolitan University}, volume = {PhD}, pages = {112}, month = {09/2024}, publisher = {Oslomet - storbyuniversitetet}, address = {Oslo, Norway}, isbn = {978-82-8364-581-1}, }