# parallel_matmul.py find how large matrix multiply can be # checked that time increases order n^3 # doubling N takes 8 times as long # can be days for 10,000 by 10,000 from time import time import threading class MulThread(threading.Thread): """ This does matrix multiply on its part of the result matrix. Easy for this case, just parallelize the outer loop. """ def __init__(self, i1, i2, a, b, c): threading.Thread.__init__(self) self.i1 = i1 self.i2 = i2 self.a = a self.b = b self.c = c print "MulThread inited from %d to %d" % (self.i1, self.i2) def run(self): print "MulThread running from %d to %d" % (self.i1, self.i2) for i in range(self.i1, self.i2): for j in range(N): self.c[i][j] = 0.0 for k in range(N): self.c[i][j] = self.c[i][j] + self.a[i][k]*self.b[k][j] # most time spent here! print "MulThread finished from %d to %d" % (self.i1, self.i2) return P = 4 # number of parallel processors to use i1 = [0 for j in range(P)] i2 = [0 for j in range(P)] jnk = 0 for N in [100, 200, 300]: # N<500 is OK, takes minutes print "parallel multiply N by N matrices, N=%d" %(N) print "P=%d processors hopefully available to be used" %(P) a = [[0 for j in range(N)] for i in range(N)] b = [[0 for j in range(N)] for i in range(N)] c = [[0 for j in range(N)] for i in range(N)] # set up for P processors i1[0] = 0; i2[0] = N/P; for i in range(P-1): i1[i+1] = i2[i]+1 i2[i+1] = i2[0]*(i+2) i2[P-1] = N-1 # fix last limit for i in range(N): for j in range(N): a[i][j] = 1.0+i b[i][j] = 1.0+j print "initialized" t1 = time() for p in range(P): MulThread(i1[p], i2[p], a, b, c).start() # wait for threads to complete # for p in range(P): # MulThread(i1[p], i2[p], a, b, c).join() # print "MulThread joined from %d to %d" % (i1[p], i2[p]) while(threading.active_count()>1): jnk = jnk # print "active_count=%d" %(threading.active_count()) t2 = time() dt = t2-t1 print "N=%d, c[5][5]=%g, raw time=%g seconds" %(N, c[5][5], dt) t2 = 1.0e+6*(dt)/(N*N*N) print "order N^3 normalized time=%g \n" %(t2) # end parallel_matmul.py