------------------------------------------------------------
-- Copyright: 2010 Integrated Sytems Laboratory, ETH Zurich
-- http://www.iis.ee.ethz.ch/~sha3
------------------------------------------------------------
library ieee;
use ieee.numeric_std.all;
use ieee.std_logic_1164.all;
use work.simdpkg.all;
entity mexpansion is
port (
ClkxCI : in std_logic;
RstxRBI : in std_logic;
InEnxEI : in std_logic;
FinxSI : in std_logic;
IxDI : in unsigned(5 downto 0);
IPxDI : in unsigned(5 downto 0);
MxDI : in marray;
MmemxDI : in marray;
WxDO : out simdline
);
end mexpansion;
architecture rtl of mexpansion is
signal ii, ip : integer;
type signed16x8x16 is array (0 to 15, 0 to 7) of signed(15 downto 0);
signal YxDN, YxDP : signed16x8x16;
type signed8x8x16 is array (0 to 7, 0 to 7) of signed(15 downto 0);
signal MinxD : signed8x8x16;
type signed128x16 is array (0 to 127) of signed(15 downto 0);
signal YxDPP, YxDNN : signed128x16;
type signed8x16 is array (0 to 7) of signed(15 downto 0);
signal TWMult1xD, ButtYxD, T11xD, T12xD, T2xD, T3xD, T4xD, T40xD, T5xD : signed8x16;
type signed8x9 is array (0 to 7) of signed(8 downto 0);
signal TWMult2xD : signed8x9;
type signed8x25 is array (0 to 7) of signed(24 downto 0);
signal T1xD : signed8x25;
signal T20xD, T21xD, T210xD, T22xD, T220xD : signed(15 downto 0);
signal T30xD, T31xD, T310xD : signed(15 downto 0);
signal WxDP, WxDN : simdline;
type signed4x32 is array (0 to 3) of signed(31 downto 0);
signal MoutHxD, MoutLxD : signed4x32;
type signed4x16 is array (0 to 3) of signed(15 downto 0);
signal Ysel0xD, Ysel1xD : signed4x16;
begin -- rtl
ii <= to_integer(IxDI);
ip <= to_integer(IPxDI) when to_integer(IPxDI) < 32 else 0;
WxDO <= WxDP;
fm1: for i in 0 to 7 generate
fm2: for j in 0 to 7 generate
MinxD(i,j) <= x"00" & signed(MmemxDI(i*8+j));
end generate fm2;
end generate fm1;
p_ntt: process (FinxSI, MinxD, T12xD, T5xD, YxDP, ii)
begin -- process p_ntt
TWMult1xD <= (others => (others => '0'));
TWMult2xD <= (others => (others => '0'));
YxDN <= YxDP;
ButtYxD <= (others => (others => '0'));
if ii < 8 and ii >= 0 then
TWMult1xD(0) <= MinxD(0, ii);
TWMult1xD(1) <= MinxD(1, ii);
TWMult1xD(2) <= MinxD(2, ii);
TWMult1xD(3) <= MinxD(3, ii);
TWMult1xD(4) <= MinxD(4, ii);
TWMult1xD(5) <= MinxD(5, ii);
TWMult1xD(6) <= MinxD(6, ii);
if ii = 7 then
TWMult1xD(7) <= MinxD(7, ii)-1;
elsif ii = 5 and FinxSI = '1' then
TWMult1xD(7) <= MinxD(7, ii)-1;
else
TWMult1xD(7) <= MinxD(7, ii);
end if;
TWMult2xD(0) <= to_signed(FFT128TW(0, ii), 9);
TWMult2xD(1) <= to_signed(FFT128TW(1, ii), 9);
TWMult2xD(2) <= to_signed(FFT128TW(2, ii), 9);
TWMult2xD(3) <= to_signed(FFT128TW(3, ii), 9);
TWMult2xD(4) <= to_signed(FFT128TW(4, ii), 9);
TWMult2xD(5) <= to_signed(FFT128TW(5, ii), 9);
TWMult2xD(6) <= to_signed(FFT128TW(6, ii), 9);
TWMult2xD(7) <= to_signed(FFT128TW(7, ii), 9);
YxDN(8, ii) <= T12xD(0);
YxDN(9, ii) <= T12xD(1);
YxDN(10, ii) <= T12xD(2);
YxDN(11, ii) <= T12xD(3);
YxDN(12, ii) <= T12xD(4);
YxDN(13, ii) <= T12xD(5);
YxDN(14, ii) <= T12xD(6);
YxDN(15, ii) <= T12xD(7);
ButtYxD(0) <= YxDP(0, ii);
ButtYxD(1) <= YxDP(1, ii);
ButtYxD(2) <= YxDP(2, ii);
ButtYxD(3) <= YxDP(3, ii);
ButtYxD(4) <= YxDP(4, ii);
ButtYxD(5) <= YxDP(5, ii);
ButtYxD(6) <= YxDP(6, ii);
ButtYxD(7) <= YxDP(7, ii);
YxDN(0,ii) <= T5xD(0);
YxDN(1,ii) <= T5xD(1);
YxDN(2,ii) <= T5xD(2);
YxDN(3,ii) <= T5xD(3);
YxDN(4,ii) <= T5xD(4);
YxDN(5,ii) <= T5xD(5);
YxDN(6,ii) <= T5xD(6);
YxDN(7,ii) <= T5xD(7);
elsif ii > 7 and ii < 16 then
TWMult1xD(0) <= YxDP(0, ii-8);
TWMult1xD(1) <= YxDP(1, ii-8);
TWMult1xD(2) <= YxDP(2, ii-8);
TWMult1xD(3) <= YxDP(3, ii-8);
TWMult1xD(4) <= YxDP(4, ii-8);
TWMult1xD(5) <= YxDP(5, ii-8);
TWMult1xD(6) <= YxDP(6, ii-8);
TWMult1xD(7) <= YxDP(7, ii-8);
TWMult2xD(0) <= to_signed(FFT64TW(0, ii-8), 9);
TWMult2xD(1) <= to_signed(FFT64TW(1, ii-8), 9);
TWMult2xD(2) <= to_signed(FFT64TW(2, ii-8), 9);
TWMult2xD(3) <= to_signed(FFT64TW(3, ii-8), 9);
TWMult2xD(4) <= to_signed(FFT64TW(4, ii-8), 9);
TWMult2xD(5) <= to_signed(FFT64TW(5, ii-8), 9);
TWMult2xD(6) <= to_signed(FFT64TW(6, ii-8), 9);
TWMult2xD(7) <= to_signed(FFT64TW(7, ii-8), 9);
YxDN(0, ii-8) <= T12xD(0);
YxDN(1, ii-8) <= T12xD(1);
YxDN(2, ii-8) <= T12xD(2);
YxDN(3, ii-8) <= T12xD(3);
YxDN(4, ii-8) <= T12xD(4);
YxDN(5, ii-8) <= T12xD(5);
YxDN(6, ii-8) <= T12xD(6);
YxDN(7, ii-8) <= T12xD(7);
ButtYxD(0) <= YxDP( 8, ii-8);
ButtYxD(1) <= YxDP( 9, ii-8);
ButtYxD(2) <= YxDP(10, ii-8);
ButtYxD(3) <= YxDP(11, ii-8);
ButtYxD(4) <= YxDP(12, ii-8);
ButtYxD(5) <= YxDP(13, ii-8);
ButtYxD(6) <= YxDP(14, ii-8);
ButtYxD(7) <= YxDP(15, ii-8);
YxDN( 8, ii-8) <= T5xD(0);
YxDN( 9, ii-8) <= T5xD(1);
YxDN(10, ii-8) <= T5xD(2);
YxDN(11, ii-8) <= T5xD(3);
YxDN(12, ii-8) <= T5xD(4);
YxDN(13, ii-8) <= T5xD(5);
YxDN(14, ii-8) <= T5xD(6);
YxDN(15, ii-8) <= T5xD(7);
elsif ii > 15 and ii < 24 then
TWMult1xD(0) <= YxDP( 8, ii-16);
TWMult1xD(1) <= YxDP( 9, ii-16);
TWMult1xD(2) <= YxDP(10, ii-16);
TWMult1xD(3) <= YxDP(11, ii-16);
TWMult1xD(4) <= YxDP(12, ii-16);
TWMult1xD(5) <= YxDP(13, ii-16);
TWMult1xD(6) <= YxDP(14, ii-16);
TWMult1xD(7) <= YxDP(15, ii-16);
TWMult2xD(0) <= to_signed(FFT64TW(0, ii-16), 9);
TWMult2xD(1) <= to_signed(FFT64TW(1, ii-16), 9);
TWMult2xD(2) <= to_signed(FFT64TW(2, ii-16), 9);
TWMult2xD(3) <= to_signed(FFT64TW(3, ii-16), 9);
TWMult2xD(4) <= to_signed(FFT64TW(4, ii-16), 9);
TWMult2xD(5) <= to_signed(FFT64TW(5, ii-16), 9);
TWMult2xD(6) <= to_signed(FFT64TW(6, ii-16), 9);
TWMult2xD(7) <= to_signed(FFT64TW(7, ii-16), 9);
YxDN( 8, ii-16) <= T12xD(0);
YxDN( 9, ii-16) <= T12xD(1);
YxDN(10, ii-16) <= T12xD(2);
YxDN(11, ii-16) <= T12xD(3);
YxDN(12, ii-16) <= T12xD(4);
YxDN(13, ii-16) <= T12xD(5);
YxDN(14, ii-16) <= T12xD(6);
YxDN(15, ii-16) <= T12xD(7);
ButtYxD(0) <= YxDP(ii-16, 0);
ButtYxD(1) <= YxDP(ii-16, 1);
ButtYxD(2) <= YxDP(ii-16, 2);
ButtYxD(3) <= YxDP(ii-16, 3);
ButtYxD(4) <= YxDP(ii-16, 4);
ButtYxD(5) <= YxDP(ii-16, 5);
ButtYxD(6) <= YxDP(ii-16, 6);
ButtYxD(7) <= YxDP(ii-16, 7);
YxDN(ii-16, 0) <= T5xD(0);
YxDN(ii-16, 1) <= T5xD(1);
YxDN(ii-16, 2) <= T5xD(2);
YxDN(ii-16, 3) <= T5xD(3);
YxDN(ii-16, 4) <= T5xD(4);
YxDN(ii-16, 5) <= T5xD(5);
YxDN(ii-16, 6) <= T5xD(6);
YxDN(ii-16, 7) <= T5xD(7);
elsif ii > 23 and ii < 32 then
ButtYxD(0) <= YxDP(ii-16, 0);
ButtYxD(1) <= YxDP(ii-16, 1);
ButtYxD(2) <= YxDP(ii-16, 2);
ButtYxD(3) <= YxDP(ii-16, 3);
ButtYxD(4) <= YxDP(ii-16, 4);
ButtYxD(5) <= YxDP(ii-16, 5);
ButtYxD(6) <= YxDP(ii-16, 6);
ButtYxD(7) <= YxDP(ii-16, 7);
YxDN(ii-16, 0) <= T5xD(0);
YxDN(ii-16, 1) <= T5xD(1);
YxDN(ii-16, 2) <= T5xD(2);
YxDN(ii-16, 3) <= T5xD(3);
YxDN(ii-16, 4) <= T5xD(4);
YxDN(ii-16, 5) <= T5xD(5);
YxDN(ii-16, 6) <= T5xD(6);
YxDN(ii-16, 7) <= T5xD(7);
end if;
end process p_ntt;
-- TW Mult
-----------------------------------------------------------------------------
tw_mult : for i in 0 to 7 generate
T1xD(i) <= TWMult1xD(i) * TWMult2xD(i);
T11xD(i) <= (x"00" & T1xD(i)(7 downto 0)) - (T1xD(i)(24) & T1xD(i)(24) & T1xD(i)(24) & T1xD(i)(24) &
T1xD(i)(24) & T1xD(i)(24) & T1xD(i)(24) & T1xD(i)(24) &
T1xD(i)(24) & T1xD(i)(14 downto 8));
T12xD(i) <= T11xD(i) when T11xD(i) < 129 else T11xD(i)-257;
end generate tw_mult;
-- Butterfly
-----------------------------------------------------------------------------
T2xD(0) <= (ButtYxD(0) + ButtYxD(4));
T2xD(1) <= (ButtYxD(1) + ButtYxD(5));
T2xD(2) <= (ButtYxD(2) + ButtYxD(6));
T2xD(3) <= (ButtYxD(3) + ButtYxD(7));
T2xD(4) <= (ButtYxD(0) - ButtYxD(4));
T20xD <= (ButtYxD(1) - ButtYxD(5));
T21xD <= (ButtYxD(2) - ButtYxD(6));
T22xD <= (ButtYxD(3) - ButtYxD(7));
T2xD(5) <= (T20xD(15) or T20xD(13)) & T20xD(12 downto 0) & "00";
T210xD <= (T21xD(15) or T21xD(11)) & T21xD(10 downto 0) & "0000";
T220xD <= (T22xD(15) or T22xD(9)) & T22xD(8 downto 0) & "000000";
T2xD(6) <= (x"00" & T210xD(7 downto 0)) - (T210xD(15) & T210xD(15) & T210xD(15) & T210xD(15) &
T210xD(15) & T210xD(15) & T210xD(15) & T210xD(15) &
T210xD(15 downto 8));
T2xD(7) <= (x"00" & T220xD(7 downto 0)) - (T220xD(15) & T220xD(15) & T220xD(15) & T220xD(15) &
T220xD(15) & T220xD(15) & T220xD(15) & T220xD(15) &
T220xD(15 downto 8));
-----------------------------------------------------------------------------
T3xD(0) <= (T2xD(0) + T2xD(2));
T3xD(1) <= (T2xD(1) + T2xD(3));
T3xD(2) <= (T2xD(0) - T2xD(2));
T30xD <= (T2xD(1) - T2xD(3));
T3xD(4) <= (T2xD(4) + T2xD(6));
T3xD(5) <= (T2xD(5) + T2xD(7));
T3xD(6) <= (T2xD(4) - T2xD(6));
T31xD <= (T2xD(5) - T2xD(7));
T3xD(3) <= (T30xD(15) or T30xD(11)) & T30xD(10 downto 0) & "0000";
T310xD <= (T31xD(15) or T31xD(11)) & T31xD(10 downto 0) & "0000";
T3xD(7) <= (x"00" & T310xD(7 downto 0)) - (T310xD(15) & T310xD(15) & T310xD(15) & T310xD(15) &
T310xD(15) & T310xD(15) & T310xD(15) & T310xD(15) &
T310xD(15 downto 8));
-----------------------------------------------------------------------------
T4xD(0) <= (T3xD(0) + T3xD(1));
T4xD(1) <= (T3xD(0) - T3xD(1));
T4xD(2) <= (T3xD(2) + T3xD(3));
T4xD(3) <= (T3xD(2) - T3xD(3));
T4xD(4) <= (T3xD(4) + T3xD(5));
T4xD(5) <= (T3xD(4) - T3xD(5));
T4xD(6) <= (T3xD(6) + T3xD(7));
T4xD(7) <= (T3xD(6) - T3xD(7));
bf_fullred : for i in 0 to 7 generate
T40xD(i) <= (x"00" & T4xD(i)(7 downto 0)) - (T4xD(i)(15) & T4xD(i)(15) & T4xD(i)(15) & T4xD(i)(15) &
T4xD(i)(15) & T4xD(i)(15) & T4xD(i)(15) & T4xD(i)(15) &
T4xD(i)(15 downto 8));
T5xD(i) <= T40xD(i) when T40xD(i) < 129 else T40xD(i)-257;
end generate bf_fullred;
-- YxDNN Memory
-----------------------------------------------------------------------------
fy1: for i in 0 to 15 generate
fy2: for j in 0 to 7 generate
YxDNN(i*8+j) <= YxDP(i,j);
end generate fy2;
end generate fy1;
-- Second part
-----------------------------------------------------------------------------
f_mult: for i in 0 to 3 generate
Ysel0xD(i) <= YxDPP(WPERM(ip, i*2)) when ip >= 0 else YxDPP( WPERM(0, i*2));
Ysel1xD(i) <= YxDPP(WPERM(ip, i*2+1)) when ip >= 0 else YxDPP( WPERM(0, i*2+1));
MoutHxD(i) <= Ysel0xD(i) * 185 when ip < 16 else Ysel0xD(i) * 233;
MoutLxD(i) <= Ysel1xD(i) * 185 when ip < 16 else Ysel1xD(i) * 233;
f_ass : for j in 31 downto 16 generate
WxDN(i)(j-16) <= MoutHxD(i)(j-16);
WxDN(i)(j) <= MoutLxD(i)(j-16);
end generate f_ass;
end generate f_mult;
-- Message Memory
-----------------------------------------------------------------------------
p_mem : process (ClkxCI, RstxRBI)
begin -- process p_mem
if RstxRBI = '0' then -- asynchronous reset (active low)
WxDP <= (others => (others => '0'));
YxDP <= (others => (others => (others => '0')));
YxDPP <= (others => (others => '0'));
elsif ClkxCI'event and ClkxCI = '1' then -- rising clock edge
WxDP <= WxDN;
if ii = 32 then
for i in 0 to 127 loop
YxDPP(i) <= YxDNN(P(i));
end loop; -- i
end if;
for i in 0 to 7 loop
for j in 0 to 7 loop
if InEnxEI = '1' then
if i = 7 and j = 7 then
YxDP(i, j) <= (x"00" & signed(MxDI(i*8+j))) + 1;
elsif i = 7 and j = 5 and FinxSI = '1' then
YxDP(i, j) <= (x"00" & signed(MxDI(i*8+j))) + 1;
else
YxDP(i, j) <= (x"00" & signed(MxDI(i*8+j)));
end if;
YxDP(i+8, j) <= (others => '0');
else
YxDP(i, j) <= YxDN(i, j);
YxDP(i+8, j) <= YxDN(i+8, j);
end if;
end loop; -- j
end loop; -- i
end if;
end process p_mem;
end rtl;