------------------------------------------------------------ -- Copyright: 2010 Integrated Sytems Laboratory, ETH Zurich -- http://www.iis.ee.ethz.ch/~sha3 ------------------------------------------------------------ library ieee; use ieee.numeric_std.all; use ieee.std_logic_1164.all; use work.simdpkg.all; entity mexpansion is port ( ClkxCI : in std_logic; RstxRBI : in std_logic; InEnxEI : in std_logic; FinxSI : in std_logic; IxDI : in unsigned(5 downto 0); IPxDI : in unsigned(5 downto 0); MxDI : in marray; MmemxDI : in marray; WxDO : out simdline ); end mexpansion; architecture rtl of mexpansion is signal ii, ip : integer; type signed16x8x16 is array (0 to 15, 0 to 7) of signed(15 downto 0); signal YxDN, YxDP : signed16x8x16; type signed8x8x16 is array (0 to 7, 0 to 7) of signed(15 downto 0); signal MinxD : signed8x8x16; type signed128x16 is array (0 to 127) of signed(15 downto 0); signal YxDPP, YxDNN : signed128x16; type signed8x16 is array (0 to 7) of signed(15 downto 0); signal TWMult1xD, ButtYxD, T11xD, T12xD, T2xD, T3xD, T4xD, T40xD, T5xD : signed8x16; type signed8x9 is array (0 to 7) of signed(8 downto 0); signal TWMult2xD : signed8x9; type signed8x25 is array (0 to 7) of signed(24 downto 0); signal T1xD : signed8x25; signal T20xD, T21xD, T210xD, T22xD, T220xD : signed(15 downto 0); signal T30xD, T31xD, T310xD : signed(15 downto 0); signal WxDP, WxDN : simdline; type signed4x32 is array (0 to 3) of signed(31 downto 0); signal MoutHxD, MoutLxD : signed4x32; type signed4x16 is array (0 to 3) of signed(15 downto 0); signal Ysel0xD, Ysel1xD : signed4x16; begin -- rtl ii <= to_integer(IxDI); ip <= to_integer(IPxDI) when to_integer(IPxDI) < 32 else 0; WxDO <= WxDP; fm1: for i in 0 to 7 generate fm2: for j in 0 to 7 generate MinxD(i,j) <= x"00" & signed(MmemxDI(i*8+j)); end generate fm2; end generate fm1; p_ntt: process (FinxSI, MinxD, T12xD, T5xD, YxDP, ii) begin -- process p_ntt TWMult1xD <= (others => (others => '0')); TWMult2xD <= (others => (others => '0')); YxDN <= YxDP; ButtYxD <= (others => (others => '0')); if ii < 8 and ii >= 0 then TWMult1xD(0) <= MinxD(0, ii); TWMult1xD(1) <= MinxD(1, ii); TWMult1xD(2) <= MinxD(2, ii); TWMult1xD(3) <= MinxD(3, ii); TWMult1xD(4) <= MinxD(4, ii); TWMult1xD(5) <= MinxD(5, ii); TWMult1xD(6) <= MinxD(6, ii); if ii = 7 then TWMult1xD(7) <= MinxD(7, ii)-1; elsif ii = 5 and FinxSI = '1' then TWMult1xD(7) <= MinxD(7, ii)-1; else TWMult1xD(7) <= MinxD(7, ii); end if; TWMult2xD(0) <= to_signed(FFT128TW(0, ii), 9); TWMult2xD(1) <= to_signed(FFT128TW(1, ii), 9); TWMult2xD(2) <= to_signed(FFT128TW(2, ii), 9); TWMult2xD(3) <= to_signed(FFT128TW(3, ii), 9); TWMult2xD(4) <= to_signed(FFT128TW(4, ii), 9); TWMult2xD(5) <= to_signed(FFT128TW(5, ii), 9); TWMult2xD(6) <= to_signed(FFT128TW(6, ii), 9); TWMult2xD(7) <= to_signed(FFT128TW(7, ii), 9); YxDN(8, ii) <= T12xD(0); YxDN(9, ii) <= T12xD(1); YxDN(10, ii) <= T12xD(2); YxDN(11, ii) <= T12xD(3); YxDN(12, ii) <= T12xD(4); YxDN(13, ii) <= T12xD(5); YxDN(14, ii) <= T12xD(6); YxDN(15, ii) <= T12xD(7); ButtYxD(0) <= YxDP(0, ii); ButtYxD(1) <= YxDP(1, ii); ButtYxD(2) <= YxDP(2, ii); ButtYxD(3) <= YxDP(3, ii); ButtYxD(4) <= YxDP(4, ii); ButtYxD(5) <= YxDP(5, ii); ButtYxD(6) <= YxDP(6, ii); ButtYxD(7) <= YxDP(7, ii); YxDN(0,ii) <= T5xD(0); YxDN(1,ii) <= T5xD(1); YxDN(2,ii) <= T5xD(2); YxDN(3,ii) <= T5xD(3); YxDN(4,ii) <= T5xD(4); YxDN(5,ii) <= T5xD(5); YxDN(6,ii) <= T5xD(6); YxDN(7,ii) <= T5xD(7); elsif ii > 7 and ii < 16 then TWMult1xD(0) <= YxDP(0, ii-8); TWMult1xD(1) <= YxDP(1, ii-8); TWMult1xD(2) <= YxDP(2, ii-8); TWMult1xD(3) <= YxDP(3, ii-8); TWMult1xD(4) <= YxDP(4, ii-8); TWMult1xD(5) <= YxDP(5, ii-8); TWMult1xD(6) <= YxDP(6, ii-8); TWMult1xD(7) <= YxDP(7, ii-8); TWMult2xD(0) <= to_signed(FFT64TW(0, ii-8), 9); TWMult2xD(1) <= to_signed(FFT64TW(1, ii-8), 9); TWMult2xD(2) <= to_signed(FFT64TW(2, ii-8), 9); TWMult2xD(3) <= to_signed(FFT64TW(3, ii-8), 9); TWMult2xD(4) <= to_signed(FFT64TW(4, ii-8), 9); TWMult2xD(5) <= to_signed(FFT64TW(5, ii-8), 9); TWMult2xD(6) <= to_signed(FFT64TW(6, ii-8), 9); TWMult2xD(7) <= to_signed(FFT64TW(7, ii-8), 9); YxDN(0, ii-8) <= T12xD(0); YxDN(1, ii-8) <= T12xD(1); YxDN(2, ii-8) <= T12xD(2); YxDN(3, ii-8) <= T12xD(3); YxDN(4, ii-8) <= T12xD(4); YxDN(5, ii-8) <= T12xD(5); YxDN(6, ii-8) <= T12xD(6); YxDN(7, ii-8) <= T12xD(7); ButtYxD(0) <= YxDP( 8, ii-8); ButtYxD(1) <= YxDP( 9, ii-8); ButtYxD(2) <= YxDP(10, ii-8); ButtYxD(3) <= YxDP(11, ii-8); ButtYxD(4) <= YxDP(12, ii-8); ButtYxD(5) <= YxDP(13, ii-8); ButtYxD(6) <= YxDP(14, ii-8); ButtYxD(7) <= YxDP(15, ii-8); YxDN( 8, ii-8) <= T5xD(0); YxDN( 9, ii-8) <= T5xD(1); YxDN(10, ii-8) <= T5xD(2); YxDN(11, ii-8) <= T5xD(3); YxDN(12, ii-8) <= T5xD(4); YxDN(13, ii-8) <= T5xD(5); YxDN(14, ii-8) <= T5xD(6); YxDN(15, ii-8) <= T5xD(7); elsif ii > 15 and ii < 24 then TWMult1xD(0) <= YxDP( 8, ii-16); TWMult1xD(1) <= YxDP( 9, ii-16); TWMult1xD(2) <= YxDP(10, ii-16); TWMult1xD(3) <= YxDP(11, ii-16); TWMult1xD(4) <= YxDP(12, ii-16); TWMult1xD(5) <= YxDP(13, ii-16); TWMult1xD(6) <= YxDP(14, ii-16); TWMult1xD(7) <= YxDP(15, ii-16); TWMult2xD(0) <= to_signed(FFT64TW(0, ii-16), 9); TWMult2xD(1) <= to_signed(FFT64TW(1, ii-16), 9); TWMult2xD(2) <= to_signed(FFT64TW(2, ii-16), 9); TWMult2xD(3) <= to_signed(FFT64TW(3, ii-16), 9); TWMult2xD(4) <= to_signed(FFT64TW(4, ii-16), 9); TWMult2xD(5) <= to_signed(FFT64TW(5, ii-16), 9); TWMult2xD(6) <= to_signed(FFT64TW(6, ii-16), 9); TWMult2xD(7) <= to_signed(FFT64TW(7, ii-16), 9); YxDN( 8, ii-16) <= T12xD(0); YxDN( 9, ii-16) <= T12xD(1); YxDN(10, ii-16) <= T12xD(2); YxDN(11, ii-16) <= T12xD(3); YxDN(12, ii-16) <= T12xD(4); YxDN(13, ii-16) <= T12xD(5); YxDN(14, ii-16) <= T12xD(6); YxDN(15, ii-16) <= T12xD(7); ButtYxD(0) <= YxDP(ii-16, 0); ButtYxD(1) <= YxDP(ii-16, 1); ButtYxD(2) <= YxDP(ii-16, 2); ButtYxD(3) <= YxDP(ii-16, 3); ButtYxD(4) <= YxDP(ii-16, 4); ButtYxD(5) <= YxDP(ii-16, 5); ButtYxD(6) <= YxDP(ii-16, 6); ButtYxD(7) <= YxDP(ii-16, 7); YxDN(ii-16, 0) <= T5xD(0); YxDN(ii-16, 1) <= T5xD(1); YxDN(ii-16, 2) <= T5xD(2); YxDN(ii-16, 3) <= T5xD(3); YxDN(ii-16, 4) <= T5xD(4); YxDN(ii-16, 5) <= T5xD(5); YxDN(ii-16, 6) <= T5xD(6); YxDN(ii-16, 7) <= T5xD(7); elsif ii > 23 and ii < 32 then ButtYxD(0) <= YxDP(ii-16, 0); ButtYxD(1) <= YxDP(ii-16, 1); ButtYxD(2) <= YxDP(ii-16, 2); ButtYxD(3) <= YxDP(ii-16, 3); ButtYxD(4) <= YxDP(ii-16, 4); ButtYxD(5) <= YxDP(ii-16, 5); ButtYxD(6) <= YxDP(ii-16, 6); ButtYxD(7) <= YxDP(ii-16, 7); YxDN(ii-16, 0) <= T5xD(0); YxDN(ii-16, 1) <= T5xD(1); YxDN(ii-16, 2) <= T5xD(2); YxDN(ii-16, 3) <= T5xD(3); YxDN(ii-16, 4) <= T5xD(4); YxDN(ii-16, 5) <= T5xD(5); YxDN(ii-16, 6) <= T5xD(6); YxDN(ii-16, 7) <= T5xD(7); end if; end process p_ntt; -- TW Mult ----------------------------------------------------------------------------- tw_mult : for i in 0 to 7 generate T1xD(i) <= TWMult1xD(i) * TWMult2xD(i); T11xD(i) <= (x"00" & T1xD(i)(7 downto 0)) - (T1xD(i)(24) & T1xD(i)(24) & T1xD(i)(24) & T1xD(i)(24) & T1xD(i)(24) & T1xD(i)(24) & T1xD(i)(24) & T1xD(i)(24) & T1xD(i)(24) & T1xD(i)(14 downto 8)); T12xD(i) <= T11xD(i) when T11xD(i) < 129 else T11xD(i)-257; end generate tw_mult; -- Butterfly ----------------------------------------------------------------------------- T2xD(0) <= (ButtYxD(0) + ButtYxD(4)); T2xD(1) <= (ButtYxD(1) + ButtYxD(5)); T2xD(2) <= (ButtYxD(2) + ButtYxD(6)); T2xD(3) <= (ButtYxD(3) + ButtYxD(7)); T2xD(4) <= (ButtYxD(0) - ButtYxD(4)); T20xD <= (ButtYxD(1) - ButtYxD(5)); T21xD <= (ButtYxD(2) - ButtYxD(6)); T22xD <= (ButtYxD(3) - ButtYxD(7)); T2xD(5) <= (T20xD(15) or T20xD(13)) & T20xD(12 downto 0) & "00"; T210xD <= (T21xD(15) or T21xD(11)) & T21xD(10 downto 0) & "0000"; T220xD <= (T22xD(15) or T22xD(9)) & T22xD(8 downto 0) & "000000"; T2xD(6) <= (x"00" & T210xD(7 downto 0)) - (T210xD(15) & T210xD(15) & T210xD(15) & T210xD(15) & T210xD(15) & T210xD(15) & T210xD(15) & T210xD(15) & T210xD(15 downto 8)); T2xD(7) <= (x"00" & T220xD(7 downto 0)) - (T220xD(15) & T220xD(15) & T220xD(15) & T220xD(15) & T220xD(15) & T220xD(15) & T220xD(15) & T220xD(15) & T220xD(15 downto 8)); ----------------------------------------------------------------------------- T3xD(0) <= (T2xD(0) + T2xD(2)); T3xD(1) <= (T2xD(1) + T2xD(3)); T3xD(2) <= (T2xD(0) - T2xD(2)); T30xD <= (T2xD(1) - T2xD(3)); T3xD(4) <= (T2xD(4) + T2xD(6)); T3xD(5) <= (T2xD(5) + T2xD(7)); T3xD(6) <= (T2xD(4) - T2xD(6)); T31xD <= (T2xD(5) - T2xD(7)); T3xD(3) <= (T30xD(15) or T30xD(11)) & T30xD(10 downto 0) & "0000"; T310xD <= (T31xD(15) or T31xD(11)) & T31xD(10 downto 0) & "0000"; T3xD(7) <= (x"00" & T310xD(7 downto 0)) - (T310xD(15) & T310xD(15) & T310xD(15) & T310xD(15) & T310xD(15) & T310xD(15) & T310xD(15) & T310xD(15) & T310xD(15 downto 8)); ----------------------------------------------------------------------------- T4xD(0) <= (T3xD(0) + T3xD(1)); T4xD(1) <= (T3xD(0) - T3xD(1)); T4xD(2) <= (T3xD(2) + T3xD(3)); T4xD(3) <= (T3xD(2) - T3xD(3)); T4xD(4) <= (T3xD(4) + T3xD(5)); T4xD(5) <= (T3xD(4) - T3xD(5)); T4xD(6) <= (T3xD(6) + T3xD(7)); T4xD(7) <= (T3xD(6) - T3xD(7)); bf_fullred : for i in 0 to 7 generate T40xD(i) <= (x"00" & T4xD(i)(7 downto 0)) - (T4xD(i)(15) & T4xD(i)(15) & T4xD(i)(15) & T4xD(i)(15) & T4xD(i)(15) & T4xD(i)(15) & T4xD(i)(15) & T4xD(i)(15) & T4xD(i)(15 downto 8)); T5xD(i) <= T40xD(i) when T40xD(i) < 129 else T40xD(i)-257; end generate bf_fullred; -- YxDNN Memory ----------------------------------------------------------------------------- fy1: for i in 0 to 15 generate fy2: for j in 0 to 7 generate YxDNN(i*8+j) <= YxDP(i,j); end generate fy2; end generate fy1; -- Second part ----------------------------------------------------------------------------- f_mult: for i in 0 to 3 generate Ysel0xD(i) <= YxDPP(WPERM(ip, i*2)) when ip >= 0 else YxDPP( WPERM(0, i*2)); Ysel1xD(i) <= YxDPP(WPERM(ip, i*2+1)) when ip >= 0 else YxDPP( WPERM(0, i*2+1)); MoutHxD(i) <= Ysel0xD(i) * 185 when ip < 16 else Ysel0xD(i) * 233; MoutLxD(i) <= Ysel1xD(i) * 185 when ip < 16 else Ysel1xD(i) * 233; f_ass : for j in 31 downto 16 generate WxDN(i)(j-16) <= MoutHxD(i)(j-16); WxDN(i)(j) <= MoutLxD(i)(j-16); end generate f_ass; end generate f_mult; -- Message Memory ----------------------------------------------------------------------------- p_mem : process (ClkxCI, RstxRBI) begin -- process p_mem if RstxRBI = '0' then -- asynchronous reset (active low) WxDP <= (others => (others => '0')); YxDP <= (others => (others => (others => '0'))); YxDPP <= (others => (others => '0')); elsif ClkxCI'event and ClkxCI = '1' then -- rising clock edge WxDP <= WxDN; if ii = 32 then for i in 0 to 127 loop YxDPP(i) <= YxDNN(P(i)); end loop; -- i end if; for i in 0 to 7 loop for j in 0 to 7 loop if InEnxEI = '1' then if i = 7 and j = 7 then YxDP(i, j) <= (x"00" & signed(MxDI(i*8+j))) + 1; elsif i = 7 and j = 5 and FinxSI = '1' then YxDP(i, j) <= (x"00" & signed(MxDI(i*8+j))) + 1; else YxDP(i, j) <= (x"00" & signed(MxDI(i*8+j))); end if; YxDP(i+8, j) <= (others => '0'); else YxDP(i, j) <= YxDN(i, j); YxDP(i+8, j) <= YxDN(i+8, j); end if; end loop; -- j end loop; -- i end if; end process p_mem; end rtl;