@INPROCEEDINGS{Nassar_04,
    AUTHOR      = {Samer Nassar and {J\"{o}rg} Sander and Corrine Cheng},
    TITLE       = {{Incremental and Effective Data Summarization for Dynamic Hierarchical Clustering}},
    BOOKTITLE   = {{Proceedings of the 2004 ACM SIGMOD International Conference on Management of Data}},
    YEAR        = {2004},
    EDITOR      = {},
    PAGES       = {467--478},
    PUBLISHER   = {},
    VOLUME      = {},
    NUMBER      = {},
    SERIES      = {},
    ADDRESS     = {Paris, France},
    MONTH       = {June},
    NOTE        = {},
    KEYWORDS    = {},
    ISBN        = {},
    URL         = {http://delivery.acm.org/10.1145/1010000/1007621/p467-nassar.pdf?key1=1007621&key2=1117580901&coll=GUIDE&dl=GUIDE&CFID=24745861&CFTOKEN=85294640},
    ABSTRACT    = {Mining informative patterns from very large, dynamically
                   changing databases poses numerous interesting challenges. Data
                   summarizations (e.g., data bubbles) have been proposed to
                   compress very large static databases into representative points
                   suitable for subsequent effective hierarchical cluster analysis. In
                   many real world applications, however, the databases dynamically
                   change due to frequent insertions and deletions, possibly changing
                   the data distribution and clustering structure over time.
                   Completely reapplying both the data summarization and the
                   clustering algorithm to detect the changes in the clustering
                   structure and update the uncovered data patterns following such
                   deletions and insertions is prohibitively expensive for large fast
                   changing databases. In this paper, we propose a new scheme to
                   maintain data bubbles incrementally. By using incremental data
                   bubbles, a high-quality hierarchical clustering is quickly available
                   at any point in time. In our scheme, a quality measure for
                   incremental data bubbles is used to identify data bubbles that do
                   not compress well their underlying data points after certain
                   insertions and deletions. Only these data bubbles are re-built
                   using efficient split and merge operations. An extensive
                   experimental evaluation shows that the incremental data bubbles
                   provide significantly faster data summarization than completely
                   re-building the data bubbles after a certain number of insertions
                   and deletions, and are effective in preserving (and in some cases
                   even improving) the quality of the data summarization.},
}