------------------------------------------------------------
-- Copyright: 2010 Integrated Sytems Laboratory, ETH Zurich
--            http://www.iis.ee.ethz.ch/~sha3
------------------------------------------------------------
library ieee;
use ieee.numeric_std.all;
use ieee.std_logic_1164.all;
use work.simdpkg.all;

entity mexpansion is
  
  port (
    ClkxCI  : in  std_logic;
    RstxRBI : in  std_logic;
    InEnxEI : in  std_logic;
    FinxSI  : in  std_logic;
    IxDI    : in  unsigned(5 downto 0);
    IPxDI   : in  unsigned(5 downto 0);
    MxDI    : in  marray;
    MmemxDI : in  marray;
    WxDO    : out simdline
    );

end mexpansion;

architecture rtl of mexpansion is

  signal ii, ip           : integer;

  type   signed16x8x16 is array (0 to 15, 0 to 7) of signed(15 downto 0);
  signal YxDN, YxDP : signed16x8x16;

  type   signed8x8x16 is array (0 to 7, 0 to 7) of signed(15 downto 0);
  signal MinxD : signed8x8x16;

  type   signed128x16 is array (0 to 127) of signed(15 downto 0);
  signal YxDPP, YxDNN : signed128x16;

  type   signed8x16 is array (0 to 7) of signed(15 downto 0);
  signal TWMult1xD, ButtYxD, T11xD, T12xD, T2xD, T3xD, T4xD, T40xD, T5xD : signed8x16;

  type   signed8x9 is array (0 to 7) of signed(8 downto 0);
  signal TWMult2xD : signed8x9;

  type   signed8x25 is array (0 to 7) of signed(24 downto 0);
  signal T1xD : signed8x25;

  signal T20xD, T21xD, T210xD, T22xD, T220xD : signed(15 downto 0);
  signal T30xD, T31xD, T310xD                : signed(15 downto 0);

  signal WxDP, WxDN : simdline;

  type signed4x32 is array (0 to 3) of signed(31 downto 0);
  signal MoutHxD, MoutLxD  : signed4x32;

  type   signed4x16 is array (0 to 3) of signed(15 downto 0);
  signal Ysel0xD, Ysel1xD  : signed4x16;
  

begin  -- rtl

  ii <= to_integer(IxDI);
  ip <= to_integer(IPxDI) when to_integer(IPxDI) < 32 else 0;

  WxDO <= WxDP;

  fm1: for i in 0 to 7 generate
    fm2: for j in 0 to 7 generate
      MinxD(i,j) <= x"00" & signed(MmemxDI(i*8+j));
      
    end generate fm2;
  end generate fm1;



  p_ntt: process (FinxSI, MinxD, T12xD, T5xD, YxDP, ii)
  begin  -- process p_ntt

    TWMult1xD <= (others => (others => '0'));
    TWMult2xD <= (others => (others => '0'));
    YxDN      <= YxDP;
    ButtYxD   <= (others => (others => '0'));

    if ii < 8 and ii >= 0 then
      TWMult1xD(0) <= MinxD(0, ii);
      TWMult1xD(1) <= MinxD(1, ii);
      TWMult1xD(2) <= MinxD(2, ii);
      TWMult1xD(3) <= MinxD(3, ii);
      TWMult1xD(4) <= MinxD(4, ii);
      TWMult1xD(5) <= MinxD(5, ii);
      TWMult1xD(6) <= MinxD(6, ii);

      if ii = 7 then
        TWMult1xD(7) <= MinxD(7, ii)-1;

      elsif ii = 5 and FinxSI = '1' then
        TWMult1xD(7) <= MinxD(7, ii)-1;
        
      else
        TWMult1xD(7) <= MinxD(7, ii);
        
      end if;
      
      TWMult2xD(0) <= to_signed(FFT128TW(0, ii), 9);
      TWMult2xD(1) <= to_signed(FFT128TW(1, ii), 9);
      TWMult2xD(2) <= to_signed(FFT128TW(2, ii), 9);
      TWMult2xD(3) <= to_signed(FFT128TW(3, ii), 9);
      TWMult2xD(4) <= to_signed(FFT128TW(4, ii), 9);
      TWMult2xD(5) <= to_signed(FFT128TW(5, ii), 9);
      TWMult2xD(6) <= to_signed(FFT128TW(6, ii), 9);
      TWMult2xD(7) <= to_signed(FFT128TW(7, ii), 9);

      YxDN(8, ii)  <= T12xD(0);
      YxDN(9, ii)  <= T12xD(1);
      YxDN(10, ii) <= T12xD(2);
      YxDN(11, ii) <= T12xD(3);
      YxDN(12, ii) <= T12xD(4);
      YxDN(13, ii) <= T12xD(5);
      YxDN(14, ii) <= T12xD(6);
      YxDN(15, ii) <= T12xD(7);

      ButtYxD(0) <= YxDP(0, ii);
      ButtYxD(1) <= YxDP(1, ii);
      ButtYxD(2) <= YxDP(2, ii);
      ButtYxD(3) <= YxDP(3, ii);
      ButtYxD(4) <= YxDP(4, ii);
      ButtYxD(5) <= YxDP(5, ii);
      ButtYxD(6) <= YxDP(6, ii);
      ButtYxD(7) <= YxDP(7, ii);

      YxDN(0,ii) <= T5xD(0);
      YxDN(1,ii) <= T5xD(1);
      YxDN(2,ii) <= T5xD(2);
      YxDN(3,ii) <= T5xD(3);
      YxDN(4,ii) <= T5xD(4);
      YxDN(5,ii) <= T5xD(5);
      YxDN(6,ii) <= T5xD(6);
      YxDN(7,ii) <= T5xD(7);

    elsif ii > 7 and ii < 16 then
      TWMult1xD(0) <= YxDP(0, ii-8);
      TWMult1xD(1) <= YxDP(1, ii-8);
      TWMult1xD(2) <= YxDP(2, ii-8);
      TWMult1xD(3) <= YxDP(3, ii-8);
      TWMult1xD(4) <= YxDP(4, ii-8);
      TWMult1xD(5) <= YxDP(5, ii-8);
      TWMult1xD(6) <= YxDP(6, ii-8);
      TWMult1xD(7) <= YxDP(7, ii-8);
      
      TWMult2xD(0) <= to_signed(FFT64TW(0, ii-8), 9);
      TWMult2xD(1) <= to_signed(FFT64TW(1, ii-8), 9);
      TWMult2xD(2) <= to_signed(FFT64TW(2, ii-8), 9);
      TWMult2xD(3) <= to_signed(FFT64TW(3, ii-8), 9);
      TWMult2xD(4) <= to_signed(FFT64TW(4, ii-8), 9);
      TWMult2xD(5) <= to_signed(FFT64TW(5, ii-8), 9);
      TWMult2xD(6) <= to_signed(FFT64TW(6, ii-8), 9);
      TWMult2xD(7) <= to_signed(FFT64TW(7, ii-8), 9);

      YxDN(0, ii-8) <= T12xD(0);
      YxDN(1, ii-8) <= T12xD(1);
      YxDN(2, ii-8) <= T12xD(2);
      YxDN(3, ii-8) <= T12xD(3);
      YxDN(4, ii-8) <= T12xD(4);
      YxDN(5, ii-8) <= T12xD(5);
      YxDN(6, ii-8) <= T12xD(6);
      YxDN(7, ii-8) <= T12xD(7);
      
      ButtYxD(0) <= YxDP( 8, ii-8);
      ButtYxD(1) <= YxDP( 9, ii-8);
      ButtYxD(2) <= YxDP(10, ii-8);
      ButtYxD(3) <= YxDP(11, ii-8);
      ButtYxD(4) <= YxDP(12, ii-8);
      ButtYxD(5) <= YxDP(13, ii-8);
      ButtYxD(6) <= YxDP(14, ii-8);
      ButtYxD(7) <= YxDP(15, ii-8);

      YxDN( 8, ii-8) <= T5xD(0);
      YxDN( 9, ii-8) <= T5xD(1);
      YxDN(10, ii-8) <= T5xD(2);
      YxDN(11, ii-8) <= T5xD(3);
      YxDN(12, ii-8) <= T5xD(4);
      YxDN(13, ii-8) <= T5xD(5);
      YxDN(14, ii-8) <= T5xD(6);
      YxDN(15, ii-8) <= T5xD(7);

    elsif ii > 15 and ii < 24 then
      TWMult1xD(0) <= YxDP( 8, ii-16);
      TWMult1xD(1) <= YxDP( 9, ii-16);
      TWMult1xD(2) <= YxDP(10, ii-16);
      TWMult1xD(3) <= YxDP(11, ii-16);
      TWMult1xD(4) <= YxDP(12, ii-16);
      TWMult1xD(5) <= YxDP(13, ii-16);
      TWMult1xD(6) <= YxDP(14, ii-16);
      TWMult1xD(7) <= YxDP(15, ii-16);
      
      TWMult2xD(0) <= to_signed(FFT64TW(0, ii-16), 9);
      TWMult2xD(1) <= to_signed(FFT64TW(1, ii-16), 9);
      TWMult2xD(2) <= to_signed(FFT64TW(2, ii-16), 9);
      TWMult2xD(3) <= to_signed(FFT64TW(3, ii-16), 9);
      TWMult2xD(4) <= to_signed(FFT64TW(4, ii-16), 9);
      TWMult2xD(5) <= to_signed(FFT64TW(5, ii-16), 9);
      TWMult2xD(6) <= to_signed(FFT64TW(6, ii-16), 9);
      TWMult2xD(7) <= to_signed(FFT64TW(7, ii-16), 9);

      YxDN( 8, ii-16) <= T12xD(0);
      YxDN( 9, ii-16) <= T12xD(1);
      YxDN(10, ii-16) <= T12xD(2);
      YxDN(11, ii-16) <= T12xD(3);
      YxDN(12, ii-16) <= T12xD(4);
      YxDN(13, ii-16) <= T12xD(5);
      YxDN(14, ii-16) <= T12xD(6);
      YxDN(15, ii-16) <= T12xD(7);
      
      ButtYxD(0) <= YxDP(ii-16, 0);
      ButtYxD(1) <= YxDP(ii-16, 1);
      ButtYxD(2) <= YxDP(ii-16, 2);
      ButtYxD(3) <= YxDP(ii-16, 3);
      ButtYxD(4) <= YxDP(ii-16, 4);
      ButtYxD(5) <= YxDP(ii-16, 5);
      ButtYxD(6) <= YxDP(ii-16, 6);
      ButtYxD(7) <= YxDP(ii-16, 7);

      YxDN(ii-16, 0) <= T5xD(0);
      YxDN(ii-16, 1) <= T5xD(1);
      YxDN(ii-16, 2) <= T5xD(2);
      YxDN(ii-16, 3) <= T5xD(3);
      YxDN(ii-16, 4) <= T5xD(4);
      YxDN(ii-16, 5) <= T5xD(5);
      YxDN(ii-16, 6) <= T5xD(6);
      YxDN(ii-16, 7) <= T5xD(7);

    elsif ii > 23 and ii < 32 then      
      ButtYxD(0) <= YxDP(ii-16, 0);
      ButtYxD(1) <= YxDP(ii-16, 1);
      ButtYxD(2) <= YxDP(ii-16, 2);
      ButtYxD(3) <= YxDP(ii-16, 3);
      ButtYxD(4) <= YxDP(ii-16, 4);
      ButtYxD(5) <= YxDP(ii-16, 5);
      ButtYxD(6) <= YxDP(ii-16, 6);
      ButtYxD(7) <= YxDP(ii-16, 7);

      YxDN(ii-16, 0) <= T5xD(0);
      YxDN(ii-16, 1) <= T5xD(1);
      YxDN(ii-16, 2) <= T5xD(2);
      YxDN(ii-16, 3) <= T5xD(3);
      YxDN(ii-16, 4) <= T5xD(4);
      YxDN(ii-16, 5) <= T5xD(5);
      YxDN(ii-16, 6) <= T5xD(6);
      YxDN(ii-16, 7) <= T5xD(7);     
      
    end if;
  end process p_ntt;

  -- TW Mult
  -----------------------------------------------------------------------------
  tw_mult : for i in 0 to 7 generate
    T1xD(i)  <= TWMult1xD(i) * TWMult2xD(i);
    T11xD(i) <= (x"00" & T1xD(i)(7 downto 0)) - (T1xD(i)(24) & T1xD(i)(24) & T1xD(i)(24) & T1xD(i)(24) &
                                                 T1xD(i)(24) & T1xD(i)(24) & T1xD(i)(24) & T1xD(i)(24) & 
                                                 T1xD(i)(24) & T1xD(i)(14 downto 8));
    T12xD(i) <= T11xD(i) when T11xD(i) < 129 else T11xD(i)-257;
  end generate tw_mult;
  

  -- Butterfly
  -----------------------------------------------------------------------------
  T2xD(0) <= (ButtYxD(0) + ButtYxD(4));
  T2xD(1) <= (ButtYxD(1) + ButtYxD(5));
  T2xD(2) <= (ButtYxD(2) + ButtYxD(6));
  T2xD(3) <= (ButtYxD(3) + ButtYxD(7));
  T2xD(4) <= (ButtYxD(0) - ButtYxD(4));
  T20xD   <= (ButtYxD(1) - ButtYxD(5));
  T21xD   <= (ButtYxD(2) - ButtYxD(6));
  T22xD   <= (ButtYxD(3) - ButtYxD(7));

  T2xD(5) <= (T20xD(15) or T20xD(13)) & T20xD(12 downto 0) & "00";
  T210xD  <= (T21xD(15) or T21xD(11)) & T21xD(10 downto 0) & "0000";
  T220xD  <= (T22xD(15) or T22xD(9)) & T22xD(8 downto 0) & "000000";
  
  T2xD(6) <= (x"00" & T210xD(7 downto 0)) - (T210xD(15) & T210xD(15) & T210xD(15) & T210xD(15) &
                                             T210xD(15) & T210xD(15) & T210xD(15) & T210xD(15) & 
                                             T210xD(15 downto 8));
  
  T2xD(7) <= (x"00" & T220xD(7 downto 0)) - (T220xD(15) & T220xD(15) & T220xD(15) & T220xD(15) &
                                             T220xD(15) & T220xD(15) & T220xD(15) & T220xD(15) &
                                             T220xD(15 downto 8));

  -----------------------------------------------------------------------------
  
  T3xD(0) <= (T2xD(0) + T2xD(2));
  T3xD(1) <= (T2xD(1) + T2xD(3));
  T3xD(2) <= (T2xD(0) - T2xD(2));
  T30xD   <= (T2xD(1) - T2xD(3));
  T3xD(4) <= (T2xD(4) + T2xD(6));
  T3xD(5) <= (T2xD(5) + T2xD(7));
  T3xD(6) <= (T2xD(4) - T2xD(6));
  T31xD   <= (T2xD(5) - T2xD(7));

  T3xD(3) <= (T30xD(15) or T30xD(11)) & T30xD(10 downto 0) & "0000";
  T310xD  <= (T31xD(15) or T31xD(11)) & T31xD(10 downto 0) & "0000";
  
  T3xD(7) <= (x"00" & T310xD(7 downto 0)) - (T310xD(15) & T310xD(15) & T310xD(15) & T310xD(15) &
                                             T310xD(15) & T310xD(15) & T310xD(15) & T310xD(15) & 
                                             T310xD(15 downto 8));

  -----------------------------------------------------------------------------

  T4xD(0) <= (T3xD(0) + T3xD(1));
  T4xD(1) <= (T3xD(0) - T3xD(1));
  T4xD(2) <= (T3xD(2) + T3xD(3));
  T4xD(3) <= (T3xD(2) - T3xD(3));
  T4xD(4) <= (T3xD(4) + T3xD(5));
  T4xD(5) <= (T3xD(4) - T3xD(5));
  T4xD(6) <= (T3xD(6) + T3xD(7));
  T4xD(7) <= (T3xD(6) - T3xD(7));
  
  bf_fullred : for i in 0 to 7 generate
    T40xD(i) <= (x"00" & T4xD(i)(7 downto 0)) - (T4xD(i)(15) & T4xD(i)(15) & T4xD(i)(15) & T4xD(i)(15) &
                                                 T4xD(i)(15) & T4xD(i)(15) & T4xD(i)(15) & T4xD(i)(15) & 
                                                 T4xD(i)(15 downto 8));
    T5xD(i)  <= T40xD(i) when T40xD(i) < 129 else T40xD(i)-257;
  end generate bf_fullred;

  
  -- YxDNN Memory
  -----------------------------------------------------------------------------
  fy1: for i in 0 to 15 generate
    fy2: for j in 0 to 7 generate
      YxDNN(i*8+j) <= YxDP(i,j);
      
    end generate fy2;
  end generate fy1;
  
  -- Second part
  -----------------------------------------------------------------------------
  f_mult: for i in 0 to 3 generate

    Ysel0xD(i) <= YxDPP(WPERM(ip, i*2))   when ip >= 0 else YxDPP( WPERM(0, i*2));
    Ysel1xD(i) <= YxDPP(WPERM(ip, i*2+1)) when ip >= 0 else YxDPP( WPERM(0, i*2+1));

    MoutHxD(i) <= Ysel0xD(i) * 185 when ip < 16 else Ysel0xD(i) * 233;
    MoutLxD(i) <= Ysel1xD(i) * 185 when ip < 16 else Ysel1xD(i) * 233;

    f_ass : for j in 31 downto 16 generate
      WxDN(i)(j-16) <= MoutHxD(i)(j-16);
      WxDN(i)(j)    <= MoutLxD(i)(j-16);
    end generate f_ass;
    
    
  end generate f_mult;

  -- Message Memory
  -----------------------------------------------------------------------------
  p_mem : process (ClkxCI, RstxRBI)
  begin  -- process p_mem
    if RstxRBI = '0' then               -- asynchronous reset (active low)
      WxDP  <= (others => (others => '0'));
      YxDP  <= (others => (others => (others => '0')));
      YxDPP <= (others => (others => '0'));
      
      
    elsif ClkxCI'event and ClkxCI = '1' then  -- rising clock edge
       WxDP <= WxDN;

       if ii = 32 then
         for i in 0 to 127 loop
           YxDPP(i) <= YxDNN(P(i));
           
         end loop;  -- i
       end if;
       
       for i in 0 to 7 loop
         for j in 0 to 7 loop
           if InEnxEI = '1' then
             if i = 7 and j = 7 then
               YxDP(i, j)   <= (x"00" & signed(MxDI(i*8+j))) + 1;

             elsif i = 7 and j = 5 and FinxSI = '1' then
               YxDP(i, j)   <= (x"00" &  signed(MxDI(i*8+j))) + 1;
               
             else
               YxDP(i, j)   <= (x"00" & signed(MxDI(i*8+j)));
               
             end if;
             
             YxDP(i+8, j) <= (others => '0');

           else
             YxDP(i, j)   <= YxDN(i, j);
             YxDP(i+8, j) <= YxDN(i+8, j);
             
           end if;
         end loop;  -- j
       end loop;  -- i
       
    end if;
  end process p_mem;

end rtl;

Generated on Fri Sep 24 10:39:12 CEST 2010
Home