@ARTICLE{Chawla_04, AUTHOR = {Nitesh V. Chawla and Lawrence O. Hall and Kevin W. Bowyer and W. Philip Kegelmeyer}, TITLE = {{Learning Ensembles from Bites: A Scalable and Accurate Approach}}, JOURNAL = {{Journal of Machine Learning Research}}, YEAR = {2004}, VOLUME = {5}, NUMBER = {}, PAGES = {421--451}, MONTH = {April}, NOTE = {}, KEYWORDS = {}, ISBN = {}, URL = {http://delivery.acm.org/10.1145/1010000/1005347/p421-chawla.pdf?key1=1005347&key2=3527580901&coll=GUIDE&dl=GUIDE&CFID=24745947&CFTOKEN=29878325}, ABSTRACT = {Bagging and boosting are two popular ensemble methods that typically achieve better accuracy than a single classifier. These techniques have limitations on massive data sets, because the size of the data set can be a bottleneck. Voting many classifiers built on small subsets of data (pasting small votes) is a promising approach for learning from massive data sets, one that can utilize the power of boosting and bagging. We propose a framework for building hundreds or thousands of such classifiers on small subsets of data in a distributed environment. Experiments show this approach is fast, accurate, and scalable.}, }