@article{11346, author = {Jes{\'u}s Escudero-Sahuquillo and Ernst Gran and Pedro Garcia and Jos{\'e} Flich and Tor Skeie and Olav Lysne and Francisco Quiles and Jose Duato}, title = {Efficient and Cost-Effective Hybrid Congestion Control for HPC Interconnection Networks}, abstract = {Interconnection networks are key components in High-Performance Computing (HPC) systems, their performance having a strong influence on the overall system one. However, at high load, congestion and its negative effects (e.g. Head-of-line blocking) threaten the performance of the network, and so the one of the entire system. Congestion control (CC) is crucial to ensure an efficient utilization of the interconnection network during congestion situations. As one major trend is to reduce the effective wiring in interconnection networks to reduce cost and power consumption, the network will operate very close to its capacity. Thus, congestion control becomes essential. Existing CC techniques can be divided into two general approaches. One is to throttle traffic injection at the sources that contribute to congestion, and the other is to isolate the congested traffic in specially designated resources. However, both approaches have different, but non-overlapping weaknesses: injection throttling techniques have a slow reaction against congestion, while isolating traffic in special resources may lead the system to run out of those resources. In this paper we propose EcoCC, a new Efficient and Cost-Effective CC technique, that combines injection throttling and congested-flow isolation to minimize their respective drawbacks and maximize overall system performance. This new strategy is suitable for current commercial switch architectures, where it could be implemented without requiring significant complexity. Experimental results, using simulations under synthetic and real tracebased traffic patterns, show that this technique improves by up to 55\% over some of the most successful congestion control techniques.}, year = {2015}, journal = {IEEE Transactions on Parallel and Distributed Systems}, volume = {26}, number = {pp}, pages = {107-119}, publisher = {IEEE}, doi = {10.1109/TPDS.2014.2307851}, }