@article{13896, keywords = {fractional differential equations, loop unrolling, parallel computing, vectorization}, author = {Wei Zhang and Xing Cai}, title = {Solving 3D Time-Fractional Diffusion Equations by High-Performance Parallel Computing}, abstract = {Numerically solving time-fractional diffusion equations, especially in three space dimensions, is a daunting computational task. This is due to the huge requirements of both computation time and memory storage. Compared with solving integer-ordered diffusion equations, the costs for time and storage both increase by a factor that equals the number of time steps involved. Aiming to overcome these two obstacles, we study in this paper three programming techniques: loop unrolling, vectorization and parallelization. For a representative numerical scheme that adopts finite differencing and explicit time integration, the performance-enhancing techniques are indeed shown to dramatically reduce the computation time, while allowing the use of many CPU cores and thereby a large amount of memory storage. Moreover, we have developed simple-to-use performance models that support our empirical findings, which are based on using up to 8192 CPU cores and 12.2 terabytes.}, year = {2016}, journal = {Fractional Calculus and Applied Analysis}, volume = {19}, pages = {140-160}, publisher = {DE GRUYTER}, url = {http://www.degruyter.com/view/j/fca.2016.19.issue-1/fca-2016-0008/fca-2016-0008.xml}, doi = {10.1515/fca-2016-0008}, }