@INPROCEEDINGS{Eisenhardt_03, AUTHOR = {M. Eisenhardt and W. Muller and A. Henrich}, TITLE = {{Classifying Documents by Distributed P2P Clustering}}, BOOKTITLE = {{Proceedings of Informatik 2003}}, YEAR = {2003}, EDITOR = {}, PAGES = {}, PUBLISHER = {}, VOLUME = {}, NUMBER = {}, SERIES = {GI Lecture Notes in Informatics}, ADDRESS = {Frankfurt, Germany}, MONTH = {September}, NOTE = {}, KEYWORDS = {}, ISBN = {}, URL = {http://ai1.inf.uni-bayreuth.de/forschung/publikationsliste/2003/EMH_informatik2003.pdf}, ABSTRACT = {Clustering documents into classes is an important task in many Information Retrieval (IR) systems. This achieved grouping enables a description of the contents of the document collection in terms of the classes the documents fall into. The compactness of such a description is even more desirable in cases where the document collection is spread across different computers and locations; document classes can then be used to describe each partial document collection in a conveniently short form that can easily be exchanged with other nodes on the network. Unfortunately, most clustering schemes cannot easily be distributed. Additionally, the costs of transferring all data to a central clustering service are prohibitive in large-scale systems. In this paper, we introduce an approach which is capable of classifying documents that are distributed across a Peer-to-Peer (P2P) network. We present measurements taken on a P2P network using synthetic and real-world data sets.}, }