%-------
% "singleLink" demonstrate clustering algorithm
%-------
n = input('dimensionality of data space n (n >= 2):');
assert( n >=2 );
m = input('number of data points (vectors in R^n) to generate:');
assert( m >=2 );
% generate random points in the 0:1 hypercube
% represent the points in an m*n array called d, so that each point
% is a row of the d matrix
d = rand(m, n);
% if dimension is 2 or 3, we can do a plot
% clear the old plots, if any
if n == 2
gplot clear
gset noparametric
elseif n == 3
gsplot clear
gset parametric
else
more off;
page_output_immediately = 1;
endif
% plot the points in the matrix
if n <= 3
xlabel("x axis");
ylabel("y axis");
zlabel("z axis");
gset size ratio 1
gset nokey
gset pointsize 2
if n == 2
gplot [0:1] [0:1] d with points
elseif n == 3
gsplot [0:1] [0:1] [0:1] d with points
endif
% keep the same plot going through each pass
hold on;
endif;
% now build a distance matrix, using Euclidean distance
% the matrix will be upper triangular
if n>3
printf("building similarity matrix\n");
endif
s=zeros(m);
for i = 1:m
for j = i:m % note MATLAB vector op
s(i,j) = sqrt(sumsq((d(i,:) - d(j,:)))) ;
endfor
endfor
% loop through the data
% at each step, join the most similar pair of objects (points or clusters)
% that are not yet in the same cluster
% make a vector which says which cluster a given point is in
% initialze with each point in its own cluster
cluster = zeros(1,m);
for i=1:m
cluster(i) = i;
endfor
if n>3
printf("initial clustering\n");
endif
for pass = 1:m-1
% loop through the points, pairwise, and find closesti and closestj, the points
% in different clusters with the smallest distance
closestDist = n; % bigger than is possible in n dimensional hypercube
for i=1:m;
for j=i+1:m;
if (cluster(i) != cluster(j)) & (s(i,j) < closestDist)
closesti = i;
closestj = j;
closestDist = s(i,j);
endif
endfor
endfor
assert(cluster(closesti) != cluster(closestj));
%printf ("the two closest points are %d in cluster %d and %d in cluster %d\n",
% closestj,cluster(closestj), closesti, cluster(closesti));
newLink = zeros(2,n);
newLink(1,:) = d(closesti,:);
newLink(2,:) = d(closestj,:);
oldc = cluster(closestj);
newc = cluster(closesti);
if n == 2
gplot newLink with lines
elseif n == 3
gsplot newLink with lines
else
printf("merge cluster %d into cluster %d\n", oldc, newc);
endif
% connect the clusters by making each point in closesti's cluster a member of
% closestj's cluster
for k=1:m
if cluster(k) == oldc
cluster(k) = newc;
endif
endfor
endfor