Monday, 26 May 2014

3.5 WALLACE MULTIPLIER





A Wallace tree is an efficient hardware implementation of a digital circuit that multiplies two integers, devised by an Australian Computer Scientist Chris Wallace in 1964.
The Wallace tree has three steps:
  1. Multiply (that is - AND) each bit of one of the arguments, by each bit of the other, yielding results. Depending on position of the multiplied bits, the wires carry different weights, for example wire of bit carrying result of is 32.
  2. Reduce the number of partial products to two by layers of full and half adders.
  3. Group the wires in two numbers, and add them with a conventional adder.
The second phase works as follows. As long as there are three or more wires with the same weight add a following layer:
  • Take any three wires with the same weights and input them into a full adder. The result will be an output wire of the same weight and an output wire with a higher weight for each three input wires.
  • If there are two wires of the same weight left, input them into a half adder.
  • If there is just one wire left, connect it to the next layer.
The benefit of the Wallace tree is that there are only reduction layers, and each layer has propagation delay. As making the partial products is and the final addition is , the multiplication is only , not much slower than addition (however, much more expensive in the gate count). Naively adding partial products with regular adders would require time. From a complexity theoretic perspective, the Wallace tree algorithm puts multiplication in the class NC


DIAGRAM:

 
PROGRAM:

module wallace(output reg [15:0] product, input [7:0] x, y, input clock);
reg p [7:0][7:0];                 
wire [55:0] s ,c ;               
integer i,j;
always@(y, x)         
 begin

// creating the partial products.

                                for ( i = 0; i <= 7; i = i + 1)
                                                                for ( j = 0; j <= 7; j = j + 1)
                                                                                                                p[j][i] <= x[j] & y[i];       
                                                                                                               
//all the partial products have been obtained and stored in a array of 8 X 8 matrix.                          
end
HA ha_11 ( .sum(s[0]), .carry(c[0]), .a(p[1][0]), .b( p[0][1])); //P1

FA fa_21 ( .sum(s[1]), .carry(c[1]), .a(p[2][0]), .b( p[1][1]), .cin( (c[0]) ) );
HA ha_21 ( .sum(s[2]), .carry(c[2]), .a(p[0][2]), .b( s[1]));                                              //P2

FA fa_31 ( .sum(s[3]), .carry(c[3]), .a(p[3][0]), .b( p[2][1]), .cin( c[1]) );
FA fa_32 ( .sum(s[4]), .carry(c[4]), .a(p[1][2]), .b( s[3]), .cin( c[2]) );
HA ha_31 ( .sum(s[5]), .carry(c[5]), .a(p[0][3]), .b( s[4]));                                               //P3

FA fa_41 ( .sum(s[6]), .carry(c[6]), .a(p[4][0]), .b( p[3][1]), .cin( c[3]) );
FA fa_42 ( .sum(s[7]), .carry(c[7]), .a(p[2][2]), .b(s[6]), .cin( c[4]) );
FA fa_43 ( .sum(s[8]), .carry(c[8]), .a(p[1][3]), .b(s[7]), .cin( c[5]) );
HA ha_41 ( .sum(s[9]), .carry(c[9]), .a(p[0][4]), .b( s[8]));                                             //P4

FA fa_51 ( .sum(s[10]), .carry(c[10]), .a(p[5][0]), .b( p[4][1]), .cin( c[6]) );
FA fa_52 ( .sum(s[11]), .carry(c[11]), .a(p[3][2]), .b( s[10]), .cin( c[7]) );
FA fa_53 ( .sum(s[12]), .carry(c[12]), .a(p[2][3]), .b( s[11]), .cin( c[8]) );
FA fa_54 ( .sum(s[13]), .carry(c[13]), .a(p[1][4]), .b( s[12]), .cin( c[9]) );
HA ha_55 ( .sum(s[14]), .carry(c[14]),  .a(p[0][4]), .b( s[13]));                                     //P5

FA fa_61 ( .sum(s[15]), .carry(c[15]), .a(p[6][0]), .b( p[5][1]), .cin( c[10]) );
FA fa_62 ( .sum(s[16]), .carry(c[16]), .a(p[4][2]), .b( s[15]), .cin( c[11]) );
FA fa_63 ( .sum(s[17]), .carry(c[17]), .a(p[3][3]), .b( s[16]), .cin( c[12]) );
FA fa_64 ( .sum(s[18]), .carry(c[18]), .a(p[2][4]), .b( s[17]), .cin( c[13]) );
HA ha_61 ( .sum(s[19]), .carry(c[19]), .a(p[1][5]), .b( s[18]));
HA ha_42 ( .sum(s[20]), .carry(c[20]), .a(p[0][6]), .b( s[19]));                                      //P6

FA fa_71 ( .sum(s[21]), .carry(c[21]), .a(p[7][0]), .b( p[6][1]), .cin( c[15]) );
FA fa_72 ( .sum(s[22]), .carry(c[22]), .a(p[5][2]), .b( s[21]), .cin( c[16]) );
FA fa_73 ( .sum(s[23]), .carry(c[23]), .a(p[4][3]), .b( s[22]), .cin( c[17]) );
FA fa_74 ( .sum(s[24]), .carry(c[24]), .a(p[3][4]), .b( s[23]), .cin( c[18]) );
FA fa_75 ( .sum(s[25]), .carry(c[25]), .a(p[2][5]), .b( s[24]), .cin( c[19]) );
FA fa_76 ( .sum(s[26]), .carry(c[26]), .a(p[1][6]), .b( s[25]), .cin( c[20]) );
HA ha_71 ( .sum(s[27]), .carry(c[27]), .a(p[0][7]), .b( s[26]));                                      //P7

FA fa_81 ( .sum(s[28]), .carry(c[28]), .a(p[7][1]), .b( p[6][2]), .cin( c[21]) );
FA fa_82 ( .sum(s[29]), .carry(c[29]), .a(p[5][3]), .b( s[28]), .cin( c[22]) );
FA fa_83 ( .sum(s[30]), .carry(c[30]), .a(p[4][4]), .b( s[29]), .cin( c[23]) );
FA fa_84 ( .sum(s[31]), .carry(c[31]), .a(p[3][5]), .b( s[30]), .cin( c[24]) );
FA fa_85 ( .sum(s[32]), .carry(c[32]), .a(p[2][6]), .b( s[31]), .cin( c[25]) );
FA fa_86 ( .sum(s[33]), .carry(c[33]), .a(p[1][7]), .b( s[32]), .cin( c[26]) );
HA ha_81 ( .sum(s[34]), .carry(c[34]), .a(s[33]), .b( c[27]));                                          //P8

FA fa_91 ( .sum(s[35]), .carry(c[35]), .a(p[7][2]), .b( p[6][3]), .cin( c[28]) );
FA fa_92 ( .sum(s[36]), .carry(c[36]), .a(p[5][4]), .b( s[35]), .cin( c[29]) );
FA fa_93 ( .sum(s[37]), .carry(c[37]), .a(p[4][5]), .b( s[36]), .cin( c[30]) );
FA fa_94 ( .sum(s[38]), .carry(c[38]), .a(p[3][6]), .b( s[37]), .cin( c[31]) );
FA fa_95 ( .sum(s[39]), .carry(c[39]), .a(p[2][7]), .b( s[38]), .cin( c[32]) );
FA fa_96 ( .sum(s[40]), .carry(c[40]), .a(s[39]), .b( c[33]), .cin( c[34]) );                         //P9

FA fa_101 ( .sum(s[41]), .carry(c[41]), .a(p[7][3]), .b( p[6][4]), .cin( c[35]) );
FA fa_102 ( .sum(s[42]), .carry(c[42]), .a(p[5][5]), .b( s[41]), .cin( c[36]) );
FA fa_103 ( .sum(s[43]), .carry(c[43]), .a(p[4][6]), .b( s[42]), .cin( c[37]) );
FA fa_104 ( .sum(s[44]), .carry(c[44]), .a(p[3][7]), .b( s[43]), .cin( c[38]) );
FA fa_105 ( .sum(s[45]), .carry(c[45]), .a(s[44]), .b( c[39]), .cin( c[40]) );                    //P10

FA fa_111 ( .sum(s[46]), .carry(c[46]), .a(p[7][4]), .b( p[6][5]), .cin( c[41]) );
FA fa_112 ( .sum(s[47]), .carry(c[47]), .a(p[5][6]), .b( s[46]), .cin( c[42]) );
FA fa_113 ( .sum(s[48]), .carry(c[48]), .a(p[4][7]), .b( s[47]), .cin( c[43]) );
FA fa_114 ( .sum(s[49]), .carry(c[49]), .a(s[48]), .b( c[44]), .cin( c[45]) );                     //P11

FA fa_121 ( .sum(s[50]), .carry(c[50]), .a(p[7][5]), .b( p[6][6]), .cin( c[46]) );
FA fa_122 ( .sum(s[51]), .carry(c[51]), .a(p[5][7]), .b(s[50]), .cin( c[47]) );
FA fa_123 ( .sum(s[52]), .carry(c[52]), .a(s[51]), .b(c[48]), .cin( c[49]) );                    //P12

FA fa_131 ( .sum(s[53]), .carry(c[53]), .a(p[7][6]), .b( p[6][7]), .cin( c[50]) );
FA fa_132 ( .sum(s[54]), .carry(c[54]), .a(s[53]), .b(c[51]), .cin( c[52]) );                  //P13

FA fa_141 ( .sum(s[55]), .carry(c[55]), .a(p[7][7]), .b(c[53]), .cin( c[54]) );              //P14
 // Multiplier product is obtained
always@(posedge clock)
product<=c[55],s[55],s[54],s[52],s[49],s[45],s[40],s[34],s[27],s[20],s[14],s[9],s[5],s[2],s[0],p[0][0]};  //p[0][0]=P0
endmodule

Half Adder
module HA(a,b,sum,carry);
input a,b;
output sum,carry;
assign sum=a^b;
assign carry=a&b;
endmodule

Full Adder
module FA(a,b,cin,sum,carry);
input a,b,cin;
output sum,carry;
reg T1,T2,T3,carry;
assign sum=a^b^cin;
always @(a or b or cin)
begin
T1=a&b;
T2=a&cin;
T3=a&cin;              
carry=T1|T2|T3;
end
endmodule

Sunday, 25 May 2014

3.4 BAUGH-WOOLEY MULTIPLIER

 




Baugh and Wooley have proposed an algorithm for direct two's complement array multiplication. The principal advantage of their algorithm is that the signs of all summands are positive, thus allowing the array to be constructed entirely with the conventional Type 0 full adders. This uniform structure is very attractive for VLSI.We can use the full adder or any adder circuits which were discussed in previous sessions

DIAGRAM:


PROGRAM
 
module BWSM(x, y, p);
input [7:0] x, y;
output [15:0] p;
supply0 zero;
supply1 one;
wire [7:1]p1;
wire [7:1]p2;
wire [7:1]p3;
wire [7:1]p4;
wire [7:1]p5;
wire [7:1]p6;
wire [8:1]p7;
wire [9:1]p8;
wire [7:1]c1;
wire [7:1]c2;
wire [7:1]c3;
wire [7:1]c4;
wire [7:1]c5;
wire [7:1]c6;
wire [8:1]c7;
wire [9:1]c8;

assign p[0] = x[0]&y[0];
full_adder f1_1( x[1] & y[0], zero, x[0] & y[1], p1[1] , c1[1]);
assign p[1]=p1[1];
full_adder f1_2( x[2] & y[0], zero, x[1] & y[1], p1[2], c1[2]);
full_adder f1_3( x[3] & y[0], zero, x[2] & y[1], p1[3], c1[3]);
full_adder f1_4( x[4] & y[0], zero, x[3] & y[1], p1[4], c1[4]);
full_adder f1_5( x[5] & y[0], zero, x[4] & y[1], p1[5], c1[5]);
full_adder f1_6( x[6] & y[0], zero, x[5] & y[1], p1[6], c1[6]);
full_adder f1_7( x[7] & ~y[0], zero, x[6] & y[1], p1[7], c1[7]);

full_adder f2_1( p1[2], c1[1], x[0] & y[2], p2[1] , c2[1] );
assign p[2]=p2[1];
full_adder f2_2( p1[3], c1[2], x[1] & y[2], p2[2] , c2[2] );
full_adder f2_3( p1[4], c1[3], x[2] & y[2], p2[3] , c2[3] );
full_adder f2_4( p1[5], c1[4], x[3] & y[2], p2[4] , c2[4] );
full_adder f2_5( p1[6], c1[5], x[4] & y[2], p2[5] , c2[5] );
full_adder f2_6( p1[7], c1[6], x[5] & y[2], p2[6] , c2[6] );
full_adder f2_7( x[7] & ~y[1], c1[7], x[6] & y[2], p2[7] , c2[7] );

full_adder f3_1( p2[2], c2[1], x[0] & y[3], p3[1] , c3[1] );
assign p[3]=p3[1];
full_adder f3_2( p2[3], c2[2], x[1] & y[3], p3[2] , c3[2] );
full_adder f3_3( p2[4], c2[3], x[2] & y[3], p3[3] , c3[3] );
full_adder f3_4( p2[5], c2[4], x[3] & y[3], p3[4] , c3[4] );
full_adder f3_5( p2[6], c2[5], x[4] & y[3], p3[5] , c3[5] );
full_adder f3_6( p2[7], c2[6], x[5] & y[3], p3[6] , c3[6] );
full_adder f3_7( x[7] & ~y[2], c2[7], x[6] & y[3], p3[7] , c3[7] );

full_adder f4_1( p3[2], c3[1], x[0] & y[4], p4[1] , c4[1] );
assign p[4]=p4[1];
full_adder f4_2( p3[3], c3[2], x[1] & y[4], p4[2] , c4[2] );
full_adder f4_3( p3[4], c3[3], x[2] & y[4], p4[3] , c4[3] );
full_adder f4_4( p3[5], c3[4], x[3] & y[4], p4[4] , c4[4] );
full_adder f4_5( p3[6], c3[5], x[4] & y[4], p4[5] , c4[5] );
full_adder f4_6( p3[7], c3[6], x[5] & y[4], p4[6] , c4[6] );
full_adder f4_7( x[7] & ~y[3], c3[7], x[6] & y[4], p4[7] , c4[7] );

full_adder f5_1( p4[2], c4[1], x[0] & y[5], p5[1] , c5[1] );
assign p[5]=p5[1];
full_adder f5_2( p4[3], c4[2], x[1] & y[5], p5[2] , c5[2] );
full_adder f5_3( p4[4], c4[3], x[2] & y[5], p5[3] , c5[3] );
full_adder f5_4( p4[5], c4[4], x[3] & y[5], p5[4] , c5[4] );
full_adder f5_5( p4[6], c4[5], x[4] & y[5], p5[5] , c5[5] );
full_adder f5_6( p4[7], c4[6], x[5] & y[5], p5[6] , c5[6] );
full_adder f5_7( x[7] & ~y[4], c4[7], x[6] & y[5], p5[7] , c5[7] );

full_adder f6_1( p5[2], c5[1], x[0] & y[6], p6[1] , c6[1] );
assign p[6]=p6[1];
full_adder f6_2( p5[3], c5[2], x[1] & y[6], p6[2] , c6[2] );
full_adder f6_3( p5[4], c5[3], x[2] & y[6], p6[3] , c6[3] );
full_adder f6_4( p5[5], c5[4], x[3] & y[6], p6[4] , c6[4] );
full_adder f6_5( p5[6], c5[5], x[4] & y[6], p6[5] , c6[5] );
full_adder f6_6( p5[7], c5[6], x[5] & y[6], p6[6] , c6[6] );
full_adder f6_7( x[7] & ~y[5], c5[7], x[6] & y[6], p6[7] , c6[7] );

full_adder f7_1( p6[2], c6[1], ~x[0] & y[7], p7[1] , c7[1] );
full_adder f7_2( p6[3], c6[2], ~x[1] & y[7], p7[2] , c7[2] );
full_adder f7_3( p6[4], c6[3], ~x[2] & y[7], p7[3] , c7[3] );
full_adder f7_4( p6[5], c6[4], ~x[3] & y[7], p7[4] , c7[4] );
full_adder f7_5( p6[6], c6[5], ~x[4] & y[7], p7[5] , c7[5] );
full_adder f7_6( p6[7], c6[6], ~x[5] & y[7], p7[6] , c7[6] );
full_adder f7_7( x[7] & ~y[6], c6[7], ~x[6] & y[7], p7[7] , c7[7] );
full_adder f7_8( ~x[7], ~y[7], x[7] & y[7], p7[8] , c7[8] );

full_adder f8_1( p7[1], x[7] , y[7] , p8[1] , c8[1] );
full_adder f8_2( p7[2], c7[1], c8[1], p8[2] , c8[2] );
full_adder f8_3( p7[3], c7[2], c8[2], p8[3] , c8[3] );
full_adder f8_4( p7[4], c7[3], c8[3], p8[4] , c8[4] );
full_adder f8_5( p7[5], c7[4], c8[4], p8[5] , c8[5] );
full_adder f8_6( p7[6], c7[5], c8[5], p8[6] , c8[6] );
full_adder f8_7( p7[7], c7[6], c8[6], p8[7] , c8[7] );
full_adder f8_8( p7[8], c7[7], c8[7], p8[8] , c8[8] );
full_adder f8_9( one , c7[8], c8[8], p8[9] , c8[9] );

assign p[7]=p8[1];
assign p[8]=p8[2];
assign p[9]=p8[3];
assign p[10]=p8[4];
assign p[11]=p8[5];
assign p[12]=p8[6];
assign p[13]=p8[7];
assign p[14]=p8[8];
assign p[15]=p8[9];
endmodule