@inproceedings{3565e602a5124043b40d97fdeb497c51,
title = "I/O scheduling for limited-size burst-buffers deployed high performance computing",
abstract = "Burst-Buffers is a high throughput, small size intermediate storage system integrated between computing nodes and permanent storage system to mitigate the I/O bottleneck problem in modern High Performance Computing (HPC) platforms. This system, however, is unable to effectively handle variable-intensity I/O bursts resulted by unpredictable concurrent accesses to the shared Parallel File System (PFS). In this paper, we introduce a probabilistic I/O scheduling method that takes into account of the burst-buffer load state and instantaneous I/O load distribution of the system based on the probabilistic model of applications to relieve the I/O congestion when I/O load exceeds the PFS bandwidth caused by dynamic application interference. The proposed scheduling method for limited-size Burst-Buffers deployed HPC platforms makes online decision of probabilistic selection of concurrent I/O requests for going through (to PFS), buffering (to Burst-Buffers) or declination in accordance to both the available I/O bandwidth and the current buffer state in order to maximize system efficiency or minimize application dilation. Extensive experiment results on actual characteristic synthetic data show that our method handles the I/O congestion effectively.",
keywords = "Burst-buffers, High performance computing, I/O congestion, I/O scheduling",
author = "Benbo Zha and Hong Shen",
note = "Publisher Copyright: {\textcopyright} 2019 IEEE.; 20th International Conference on Parallel and Distributed Computing, Applications and Technologies, PDCAT 2019 ; Conference date: 05-12-2019 Through 07-12-2019",
year = "2019",
month = dec,
doi = "10.1109/PDCAT46702.2019.00021",
language = "English",
series = "Proceedings - 2019 20th International Conference on Parallel and Distributed Computing, Applications and Technologies, PDCAT 2019",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "52--57",
editor = "Hui Tian and Hong Shen and Tan, {Wee Lum}",
booktitle = "Proceedings - 2019 20th International Conference on Parallel and Distributed Computing, Applications and Technologies, PDCAT 2019",
address = "United States",
}