@ARTICLE{Cho_02, AUTHOR = {Vincent Cho and Beat {W\"{u}thrich}}, TITLE = {{Distributed Mining of Classification Rules}}, JOURNAL = {{Knowledge and Information Systems}}, YEAR = {2002}, VOLUME = {4}, NUMBER = {1}, PAGES = {1-30}, MONTH = {January}, NOTE = {}, KEYWORDS = {}, ISBN = {}, URL = {http://portal.acm.org/citation.cfm?id=639626}, ABSTRACT = {Many successful data-mining techniques and systems have been developed. These techniques usually apply to centralized databases with less restricted requirements on learning and response time. Not so much effort has yet been put into mining distributed databases and real-time issues. In this paper, we investigate issues of fast-distributed data mining. We assume that merging the distributed databases into a single one would either be too costly (distributed case) or the individual fragments would be non-uniform so that mining only one fragment would bias the result (fragmented case). The goal is to classify the objects O of the database into one of several mutually exclusive classes Ci. Our approach to make mining fast and feasible is as follows. From each data site or fragment dbk, only a single rule rik is generated for each class Ci. A small subset {ri1,..., rih} of these individual rules is selected to form a rule set Ri for each class Ci . These rule subsets represent adequately the hidden knowledge of the entire database. Various selection criteria to form Ri are discussed, both theoretically and experimentally.}, }