@ARTICLE{Hung_02, AUTHOR = {E. Hung and D. Cheung}, TITLE = {{Parallel Mining of Outliers in Large Database}}, JOURNAL = {{Distributed and Parallel Databases}}, YEAR = {2002}, VOLUME = {12}, NUMBER = {}, PAGES = {5--26}, MONTH = {July}, NOTE = {}, KEYWORDS = {}, ISBN = {}, URL = {http://www.cs.umd.edu/~ehung/paper/pnl2kluwer.pdf}, ABSTRACT = {Data mining is a new, important and fast growing database application. Outlier (exception) detection is one kind of data mining, which can be applied in a variety of areas like monitoring of credit card fraud and criminal activities in electronic commerce. With the ever-increasing size and attributes (dimensions) of database, previously proposed detection methods for two dimensions are no longer applicable. The time complexity of the Nested-Loop (NL) algorithm (Knorr and Ng, in Proc. 24th VLDB, 1998) is linear to the dimensionality but quadratic to the dataset size, inducing an unacceptable cost for large dataset. A more efficient version (ENL) and its parallel version (PENL) are introduced. In theory, the improvement of performance in PENL is linear to the number of processors, as shown in a performance comparison between ENL and PENL using Bulk Synchronization Parallel (BSP) model. The great improvement is further verified by experiments on a parallel computer system IBM 9076 SP2. The results show that it is a very good choice to mine outliers in a cluster of workstations with a low-cost interconnected by a commodity communication network.}, }