%------- % "singleLink" demonstrate clustering algorithm %------- n = input('dimensionality of data space n (n >= 2):'); assert( n >=2 ); m = input('number of data points (vectors in R^n) to generate:'); assert( m >=2 ); % generate random points in the 0:1 hypercube % represent the points in an m*n array called d, so that each point % is a row of the d matrix d = rand(m, n); % if dimension is 2 or 3, we can do a plot % clear the old plots, if any if n == 2 gplot clear gset noparametric elseif n == 3 gsplot clear gset parametric else more off; page_output_immediately = 1; endif % plot the points in the matrix if n <= 3 xlabel("x axis"); ylabel("y axis"); zlabel("z axis"); gset size ratio 1 gset nokey gset pointsize 2 if n == 2 gplot [0:1] [0:1] d with points elseif n == 3 gsplot [0:1] [0:1] [0:1] d with points endif % keep the same plot going through each pass hold on; endif; % now build a distance matrix, using Euclidean distance % the matrix will be upper triangular if n>3 printf("building similarity matrix\n"); endif s=zeros(m); for i = 1:m for j = i:m % note MATLAB vector op s(i,j) = sqrt(sumsq((d(i,:) - d(j,:)))) ; endfor endfor % loop through the data % at each step, join the most similar pair of objects (points or clusters) % that are not yet in the same cluster % make a vector which says which cluster a given point is in % initialze with each point in its own cluster cluster = zeros(1,m); for i=1:m cluster(i) = i; endfor if n>3 printf("initial clustering\n"); endif for pass = 1:m-1 % loop through the points, pairwise, and find closesti and closestj, the points % in different clusters with the smallest distance closestDist = n; % bigger than is possible in n dimensional hypercube for i=1:m; for j=i+1:m; if (cluster(i) != cluster(j)) & (s(i,j) < closestDist) closesti = i; closestj = j; closestDist = s(i,j); endif endfor endfor assert(cluster(closesti) != cluster(closestj)); %printf ("the two closest points are %d in cluster %d and %d in cluster %d\n", % closestj,cluster(closestj), closesti, cluster(closesti)); newLink = zeros(2,n); newLink(1,:) = d(closesti,:); newLink(2,:) = d(closestj,:); oldc = cluster(closestj); newc = cluster(closesti); if n == 2 gplot newLink with lines elseif n == 3 gsplot newLink with lines else printf("merge cluster %d into cluster %d\n", oldc, newc); endif % connect the clusters by making each point in closesti's cluster a member of % closestj's cluster for k=1:m if cluster(k) == oldc cluster(k) = newc; endif endfor endfor