@inproceedings{39d18e8c0a0b4599a349ab7e0383beff,
title = "Imbalanced data classification using improved clustering algorithm and under-sampling method",
abstract = "Imbalanced classification problem is a hot issue in data mining and machine learning. Traditional classification algorithms are proposed based on some form of symmetry hypothesis of class distribution, whose main purpose is to improve the overall classification performance. It is difficult to obtain ideal classification result when handling imbalanced datasets. In order to improve the classification performance of imbalanced datasets, this paper proposes a cluster-based under-sampling algorithm (CUS) according to the important characteristic of support vector machines (SVM) classification relying on support vector. Firstly, majority class is divided into different clusters using improved clustering by fast search and find of density peaks (CFSFDP) algorithm. The improved clustering algorithm can realize automatic selection of clustering centers, which overcomes the limitation of the original algorithm. Then the minority class and each cluster of the majority class are used to construct training set to get the support vector of each cluster by support vector machine. Retaining support vectors for each cluster and deleting non-support vectors are to construct a new majority class sample points to obtain relatively balanced datasets. Finally, the new datasets are classified by support vector machines and the performance is evaluated by cross validation sets. The experimental results show that CUS algorithm is effective.",
keywords = "Classification, Clustering by fast search and find of density peaks, Imbalanced dataset, Support vector machine, Under-sampling",
author = "Lu Cao and Hong Shen",
note = "Publisher Copyright: {\textcopyright} 2019 IEEE.; 20th International Conference on Parallel and Distributed Computing, Applications and Technologies, PDCAT 2019 ; Conference date: 05-12-2019 Through 07-12-2019",
year = "2019",
month = dec,
doi = "10.1109/PDCAT46702.2019.00071",
language = "English",
series = "Proceedings - 2019 20th International Conference on Parallel and Distributed Computing, Applications and Technologies, PDCAT 2019",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "358--363",
editor = "Hui Tian and Hong Shen and Tan, {Wee Lum}",
booktitle = "Proceedings - 2019 20th International Conference on Parallel and Distributed Computing, Applications and Technologies, PDCAT 2019",
address = "United States",
}