// bmul4c.e parallel multiply 4 bit x 4 bit // uses add4c.e component and fadd component // the main components are bmul4c, // a basic building block is 4 full adders is add4csa that uses fadd define add4csa(b, a[4], sum_in[4], cin[4], sum_out[4], cout[4]) // b is a multiplier bit // a is the multiplicand // sum_in and sum_out are what their name implies // cin and cout are vectors here, not single bits signal zero[4] <= #h0; signal aa[4]; circuits aa <= a when b else zero after 1ns; add0 use fadd(aa[0], sum_in[0], cin[0], sum_out[0], cout[0]); add1 use fadd(aa[1], sum_in[1], cin[1], sum_out[1], cout[1]); add2 use fadd(aa[2], sum_in[2], cin[2], sum_out[2], cout[2]); add3 use fadd(aa[3], sum_in[3], cin[3], sum_out[3], cout[3]); end circuits; end add4csa; // bmul4c.e full combinatorial 4 X 4 = 8 bit unsigned multiplier // b is multiplier input, a is multiplicand input, prod is output product define bmul4c(a[4], b[4], prod[8]) signal zero[4] <= #h0; signal s0[4]; signal s1[4]; signal s2[4]; signal s3[4]; signal s0s[4]; signal s1s[4]; signal s2s[4]; signal s3s[4]; signal c0[4]; signal c1[4]; signal c2[4]; signal c3[4]; signal nc1; circuits ba0 use add4csa(b[0], a, zero, zero, s0, c0); // special CSA stage s0s <= #b0.s0[3:1] after 1ns; // shift previous sum prod[0] <= s0[0] after 1ns; // extract product ba1 use add4csa(b[1], a, s0s, c0, s1, c1); s1s <= #b0.s1[3:1] after 1ns; prod[1] <= s1[0] after 1ns; ba2 use add4csa(b[2], a, s1s, c1, s2, c2); s2s <= #b0.s2[3:1] after 1ns; prod[2] <= s2[0] after 1ns; ba3 use add4csa(b[3], a, s2s, c2, s3, c3); s3s <= #b0.s3[3:1] after 1ns; prod[3] <= s3[0] after 1ns; add use add4c(s3s, c3, #b0, prod[7:4], nc1); // normal adder end circuits; end bmul4c;