------------------------------------------------------------
-- Copyright: 2010 Integrated Sytems Laboratory, ETH Zurich
--            http://www.iis.ee.ethz.ch/~sha3
------------------------------------------------------------
-------------------------------------------------------------------------------
-- Title      : ECHO 0.2 Gb/s implementation
-- Project    : 
-------------------------------------------------------------------------------
-- File       : echo_slow.vhd
-- Author     : Frank/Luca account  
-- Company    : Integrated Systems Laboratory, ETH Zurich
-- Created    : 2010-02-23
-- Last update: 2010-04-13
-- Platform   : ModelSim (simulation), Synopsys (synthesis)
-- Standard   : VHDL'87
-------------------------------------------------------------------------------
-- Description: This is a slow ECHO datapath with a single 32bit  AES
-------------------------------------------------------------------------------
-- Copyright (c) 2010 Integrated Systems Laboratory, ETH Zurich
-------------------------------------------------------------------------------
-- Revisions  :
-- Date        Version  Author  Description
-- 2010-02-23  1.0      sha3	Created
-- 2010-04-12  1.1      sha3    There is a small problem with the assignment
--                              ordering in general. THis is being addressed
--                              Added a register to keep the SALT round. This
--                              is the round where we do not use the key
-------------------------------------------------------------------------------

library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

entity echo is
  
  port (
    DataInxDI  : in  std_logic_vector(1535 downto 0);  -- All inputs parallel
    LastxSI    : in  std_logic;         -- 1: this data will be the last round
                                        -- 0: continue chaining
    InENxSI    : in  std_logic;
    DataOutxDO : out std_logic_vector(255 downto 0);
    OutEnxSO   : out std_logic;
    CLKxCI     : in  std_logic;
    RSTxRBI    : in  std_logic);

end echo;

architecture slow of echo is

-- This file is based on the FAST ECHO implementation. Several things
-- have been modified in order to use as much as possible from the fast
-- echo.

  
 signal DataShufflexD : std_logic_vector(1535 downto 0);
  
 -- initialization string for VxDP according to section 2.1, on pg 7 of the
 -- ECHO description
 constant VINIT : std_logic_vector(511 downto 0) :=
     (496 => '1', 368 => '1' , 240 => '1', 112 => '1', others => '0');

-- somehow we fixed the length of the message to be one 1376. It is not
-- difficult to make it work with any length, but at the moment this is the
-- easiest alternative
 constant CINIT : std_logic_vector(63 downto 0) := X"000000000000056"&"0000";
-- this has to be one less than CINIT(4 downto 0);
 constant CSTOP : std_logic_vector(4 downto 0) := "10111";
 
 
 signal VxDN, VxDP : std_logic_vector(511 downto 0);
 
 signal SxDN, SxDP, SxD : std_logic_vector(2047 downto 0);
 signal BigSubxD, BigMixD  : std_logic_vector(2047 downto 0);
 type   s_type is array (0 to 15) of std_logic_vector(127 downto 0);
 signal SNxD, BSxD, BMxD, SPxD, AESxD      : s_type;


-- this is the 128 bit state of the aes
 signal APxD,ANxD : std_logic_vector(127 downto 0);
-- the 4 temporary storage that we have for the shiftrows
 signal ATxDN,ATxDP : std_logic_vector(31 downto 0);

-- and this is the 32 bit that comes in and out
 signal AInxD, ASubxD, AMixD, AKeyxD, AOutxD : std_logic_vector(31 downto 0);
 type BytesType is array (0 to 15) of std_logic_vector (7 downto 0);
 signal AxD, BxD : BytesType;
 
 
 -- The name is Big Mixcolumns Input and BigMixcolumnsOutput
 type bm_type is array (0 to 63) of std_logic_vector(31 downto 0);
 signal BMIxD,BMOxD : bm_type;

-- Big Final Signals 
 signal BF1xD, BF2xD : std_logic_vector(511 downto 0);
 
 signal KeyxD : std_logic_vector(127 downto 0);
 signal KSxD : std_logic_vector(63 downto 0);

 signal CntxSP, CntxSN     : std_logic_vector(63 downto 0);
 signal SCntxSP, SCntxSN : integer range 0 to 15;
 signal RndCntxSP, RndCntxSN : integer range 0 to 15;
 signal LastxSP, LastxSN   : std_logic;
 
 type states_type is (init, chain, aes0, aes1, aes2, aes3, bigmix, last);
 signal StatexDP, StatexDN : states_type;

 signal SaltxSP, SaltxSN : std_logic;
 
 component sbox
    port (
      InpxDI : in  std_logic_vector(7 downto 0);
      OupxDO : out std_logic_vector(7 downto 0));
  end component;
 
 component mixcolumn
   port (
     InpxDI : in  std_logic_vector(31 downto 0);
     OupxDO : out std_logic_vector(31 downto 0));
 end component;
 
begin  -- slow


-- reshuffling, strange way of writing in data.. at least for me
gen_shuffle: for i in 0 to 11 generate
  DataShufflexD((i+1)*128-1 downto i*128) <= DataInxDI((12-i)*128 -1 downto (11-i)*128);
end generate gen_shuffle;

-- make typing easier 
gen_smap: for i in 0 to 15 generate
  SPxD(i)                               <= SxDP((128*(i+1)) -1 downto 128*i);
  BigMixD((128*(i+1)) -1 downto 128*i)  <= BMxD(i);
  BigSubxD((128*(i+1)) -1 downto 128*i) <= AESxD(i);  
end generate gen_smap;
             
-------------------------------------------------------------------------------
-- S Register
-------------------------------------------------------------------------------

            
-- input multiplexer chooses the next stae of SxDN
   p_inmux: process (DataShufflexD, VxDP, SxDP, StatexDP, BigMixD, BigSubxD)
   begin  -- process p_inmux
     SxDN <= SxDP;

     case StatexDP is
       when init   => SxDN <= DataShufflexD & VINIT;
       when chain  => SxDN <= DataShufflexD & VxDP;
       when bigmix => SxDN <= BigMixD;               
       when others => SxDN <= BigSubxD;
     end case;
   end process p_inmux;

-------------------------------------------------------------------------------
-- AES block
-------------------------------------------------------------------------------

   
-- Multiplexer to select one of the 16 BigStates for AES (APxD)
-- At the same time write back the 128 bit value back to S
--
-- This is a 2-level mux. At the first level we pick 128 out 2048
-- At the second level we will pick one out of APxD
   APxD <= SPxD(SCntxSP);

   p_AESdata: process (SCntxSP, SPxD, ANxD)
   begin  -- process p_AESdata
     AESxD          <= SPxD;
     AESxD(SCntxSP) <= ANxD;     
   end process p_AESdata;



   
-- for readability
-- APxD is the input to the AES block
-- this is mapped to AxD(i)
-- The output of the AES Block is ANxD
-- this is mapped to BxD(i)
 gen_byte_assign: for i in 0 to 15 generate
                     AxD(i)                      <= APxD(((15-i)*8)+7 downto (15-i)*8);
                     ANxD(((15-i)*8)+7 downto (15-i)*8) <= BxD(i);
                   end generate gen_byte_assign;

                     
  p_AESround: process (AxD, StatexDP, ATxDP, AOutxD)
  begin  -- process p_AESround
    AInxD <= AxD(0) & AxD(5) & AxD(10) & AxD(15); 
    ATxDN <= ATxDP;
    BxD   <= AxD;
    
--    case StatexDP is
--      when aes0 =>
--          AInxD AxD(0) & AxD(5) & AxD(10) & AxD(15);  -- 0 5 10 15
--          BxD(0)              AOutxD(31 downto 24);  --0
--          ATxDN( 7 downto  0) AOutxD(23 downto 16);  --T1 =5
--          ATxDN(15 downto  8) AOutxD(15 downto  8);  --T2 =10
--          ATxDN(23 downto 16) AOutxD( 7 downto  0);  --T3 =15
--      when aes1 =>
--          AInxD AxD(4) & AxD(9) & AxD(14)& AxD(3);  -- 4 9 14 3
--          BxD(4)              AOutxD(31 downto 24);  --4
--          ATxDN( 7 downto  0) AOutxD(23 downto 16);  --T1=9
--          ATxDN(31 downto 24) AOutxD(15 downto  8);  --T4=14
--          BxD(3)              AOutxD( 7 downto  0);  --3
--          BxD(5)              ATxDP ( 7 downto  0);  --5
--      when aes2 =>
--          AInxD AxD(8)& AxD(13)& AxD(2) & AxD(7);  --8 13 2 7
--          BxD(8)              AOutxD(31 downto 24);  -- 8
--          ATxDN( 7 downto  0) AOutxD(23 downto 16);  -- T1=13
--          BxD(2)              AOutxD(15 downto  8);  -- 2
--          BxD(7)              AOutxD( 7 downto  0);  -- 7
--          BxD(9)              ATxDP(  7 downto  0);  -- 9
--          BxD(10)             ATxDP( 15 downto  8);  -- 10
--      when aes3 =>
--          AInxD AxD(12)& AxD(1) & AxD(6) & AxD(11);  --12 1 6 11
--          BxD(12)             AOutxD(31 downto 24);  -- 12
--          BxD(1)              AOutxD(23 downto 16);  -- 1
--          BxD(6)              AOutxD(15 downto  8);  -- 6
--          BxD(11)             AOutxD( 7 downto  0);  -- 11
--          BxD(13)             ATxDP(  7 downto  0);  -- 13
--          BxD(14)             ATxDP( 31 downto 24);  -- 14
--          BxD(15)             ATxDP( 23 downto 16);  -- 15
--      when others => null;

    case StatexDP is
      when aes0 =>
          AInxD <= AxD(0) & AxD(5) & AxD(10) & AxD(15);  -- 0 5 10 15
--          AInxD AxD(0) & AxD(4) & AxD(8) & AxD(12);  -- 0 4 8 12
          BxD(0)              <= AOutxD(31 downto 24);  --0
          ATxDN( 7 downto  0) <= AOutxD(23 downto 16);  --T1 =1 
          ATxDN(15 downto  8) <= AOutxD(15 downto  8);  --T2 =2
          ATxDN(23 downto 16) <= AOutxD( 7 downto  0);  --T3 =3
      when aes1 =>
          AInxD <= AxD(4) & AxD(9) & AxD(14)& AxD(3);  -- 4 9 14 3
--          AInxD AxD(5) & AxD(9) & AxD(13)& AxD(1);  -- 5 9 13 1
          BxD(4)              <= AOutxD(31 downto 24);  --4
          BxD(5)              <= AOutxD(23 downto 16);  --5
          ATxDN(23 downto 16) <= AOutxD(15 downto  8);  --T3=6
          ATxDN(31 downto 24) <= AOutxD( 7 downto  0);  --T4=7
          BxD(3)              <= ATxDP( 23 downto 16);  --3
      when aes2 =>
          AInxD <= AxD(8)& AxD(13)& AxD(2) & AxD(7);  --8 13 2 7
--          AInxD AxD(10)& AxD(14)& AxD(2) & AxD(6);  --10 14 2 6
          BxD(8)              <= AOutxD(31 downto 24);  -- 8
          BxD(9)              <= AOutxD(23 downto 16);  -- 9
          BxD(10)             <= AOutxD(15 downto  8);  -- 10
          ATxDN(15 downto 8)  <= AOutxD( 7 downto  0);  --T2=11
          BxD(2)              <= ATxDP( 15 downto  8);  -- 2
          BxD(7)              <= ATxDP( 31 downto 24);  -- 7
      when aes3 =>
          AInxD <= AxD(12)& AxD(1) & AxD(6) & AxD(11);  --12 1 6 11
--          AInxD AxD(15)& AxD(3) & AxD(7) & AxD(11);  --15 3 7 11
          BxD(12)             <= AOutxD(31 downto 24);  -- 12
          BxD(13)             <= AOutxD(23 downto 16);  -- 13
          BxD(14)             <= AOutxD(15 downto  8);  -- 14
          BxD(15)             <= AOutxD( 7 downto  0);  -- 15
          BxD(1)              <= ATxDP(  7 downto  0);  -- 1
          BxD(6)              <= ATxDP( 23 downto 16);  -- 6
          BxD(11)             <= ATxDP( 15 downto  8);  -- 11
      when others => null;



    end case;
  end process p_AESround;


-- Instantiate four Sboxes
 g_sbox: for i in 0 to 3 generate 
          i_sbox: sbox
                  port map (
                            InpxDI => AInxD ( ((i+1)*8)-1 downto i*8),
                            OupxDO => ASubxD( ((i+1)*8)-1 downto i*8));
          end generate g_sbox;

-- Now the mixcolumn
         i_mixcolumn: mixcolumn
                      port map (
                                InpxDI => ASubxD,
                                OupxDO => AMixD);


-------------------------------------------------------------------------------
-- Shuffle since ECHO has a strange way of representing data 
-------------------------------------------------------------------------------

gen_keyshuffle: for i in 0 to 7 generate
  KSxD((i+1)*8-1 downto i*8)  <= CntxSP ((8-i)*8-1 downto (7-i)*8);
end generate gen_keyshuffle;

-- Determine the round key
-- Note that every odd round the key will be all zeroes (instead of the Salt)
-- If SaltxSP = '1' we are in an odd round
          KeyxD <= KSxD & X"0000000000000000"  when SaltxSP = '0' else (others => '0');
          
-- Select the small part of the roundkey
       p_AES_key_part: process (KeyxD, StatexDP)
        begin  -- process p_AES_key_part
          AKeyxD <= KeyxD (31 downto 0);
          case StatexDP is
            when aes0 => AKeyxD <= KeyxD(127 downto 96);
            when aes1 => AKeyxD <= KeyxD( 95 downto 64);
            when aes2 => AKeyxD <= KeyxD( 63 downto 32);
            when aes3 => AKeyxD <= KeyxD( 31 downto  0);
            when others => null;
          end case;
        end process p_AES_key_part; 

-- Now add the Key to the part we have mixed
        AOutxD <= AMixD xor AKeyxD;
-- The AOutxD will be copied to the appropriate BxD or ATxD by
-- the process p_AESround above.


-------------------------------------------------------------------------------
-- Big MixColumn
-------------------------------------------------------------------------------


-- Now the big Shiftrows; BS == BigShift
BSxD( 0) <= SPxD( 0); BSxD( 4) <= SPxD( 4); BSxD( 8) <= SPxD( 8); BSxD(12) <= SPxD(12);
BSxD( 1) <= SPxD( 5); BSxD( 5) <= SPxD( 9); BSxD( 9) <= SPxD(13); BSxD(13) <= SPxD( 1);
BSxD( 2) <= SPxD(10); BSxD( 6) <= SPxD(14); BSxD(10) <= SPxD( 2); BSxD(14) <= SPxD( 6);
BSxD( 3) <= SPxD(15); BSxD( 7) <= SPxD( 3); BSxD(11) <= SPxD( 7); BSxD(15) <= SPxD(11);
       
-- I think instead of adding the multiplexers here, it would be
-- better to do everything in parallel. However, we need to see this
-- as this is a fairly large chunk. 64 mixcols in parallel 

gen_bigmix: for i in 0 to 3 generate
  gen_bigmiy: for j in 0 to 15 generate
                --map inputs 
                BMIxD(i*16 + j) <= BSxD( i*4   )((j+1)*8-1 downto j*8) &
                                   BSxD((i*4)+1)((j+1)*8-1 downto j*8) &
                                   BSxD((i*4)+2)((j+1)*8-1 downto j*8) &
                                   BSxD((i*4)+3)((j+1)*8-1 downto j*8) ;
                --instantiate the mixcolumns
                i_mixcolumn: mixcolumn
                              port map (
                                         InpxDI => BMIxD(i*16 + j),
                                         OupxDO => BMOxD(i*16 + j));
                --map outputs
                BMxD( i*4   )((j+1)*8-1 downto j*8) <= BMOxD(i*16 + j)(31 downto 24);
                BMxD((i*4)+1)((j+1)*8-1 downto j*8) <= BMOxD(i*16 + j)(23 downto 16);
                BMxD((i*4)+2)((j+1)*8-1 downto j*8) <= BMOxD(i*16 + j)(15 downto  8);
                BMxD((i*4)+3)((j+1)*8-1 downto j*8) <= BMOxD(i*16 + j)( 7 downto  0);
                
  end generate gen_bigmiy;
end generate gen_bigmix;
        
-- BM is mapped to BigMixD in the gen statement gen_smap;
  

-------------------------------------------------------------------------------
-- Big Final
-- This is divided into two steps. Once the new data is here it is immediately
-- XOR'ed and added to V (BF1)
-- At the end of calculation State S is XOR'ed with V again
-- (BF2)
-- Technically the VxDP VxDN xor BF1xD xor BF2xD. However BF1 is available
-- at the very beginning and BF2 is available at the very end.
-------------------------------------------------------------------------------

-- this is the State XOR'ed. This is the present state
BF2xD(511 downto 384) <= SPxD(0) xor SPxD(4) xor SPxD(8)  xor SPxD(12) ; 
BF2xD(383 downto 256) <= SPxD(1) xor SPxD(5) xor SPxD(9)  xor SPxD(13) ;
BF2xD(255 downto 128) <= SPxD(2) xor SPxD(6) xor SPxD(10) xor SPxD(14) ;
BF2xD(127 downto   0) <= SPxD(3) xor SPxD(7) xor SPxD(11) xor SPxD(15) ;

-- this one is the message XOR'ed
-- NOte that we need the shuffled data in for this as well 
BF1xD(511 downto 384) <= DataShufflexD( 127 downto    0) xor DataShufflexD( 639 downto  512) xor DataShufflexD(1151 downto 1024);
BF1xD(383 downto 256) <= DataShufflexD( 255 downto  128) xor DataShufflexD( 767 downto  640) xor DataShufflexD(1279 downto 1152);
BF1xD(255 downto 128) <= DataShufflexD( 383 downto  256) xor DataShufflexD( 895 downto  768) xor DataShufflexD(1407 downto 1280);
BF1xD(127 downto   0) <= DataShufflexD( 511 downto  384) xor DataShufflexD(1023 downto  896) xor DataShufflexD(1535 downto 1408);



-------------------------------------------------------------------------------
-- V Register
-------------------------------------------------------------------------------

  p_v: process (VxDP, StatexDP, BF1xD, BF2xD)
  begin  -- process p_v
    VxDN <= VxDP;

    case StatexDP is
              when init  => VxDN <= VINIT xor BF1xD;  -- beginning
              when chain => VxDN <= VxDP  xor BF1xD;  -- we have new data
              when last  => VxDN <= VxDP  xor BF2xD;  -- last cycle
              when others => null;
    end case;        
  end process p_v;

-------------------------------------------------------------------------------
-- Main FSM
-------------------------------------------------------------------------------
 p_fsm: process (StatexDP, LastxSI, InENxSI, SCntxSP, RndCntxSP, CntxSP, LastxSP, SaltxSP)
 begin  -- process p_fsm
  -- defaults 
   StatexDN <= StatexDP;
   OutEnxSO <= '0';
   RndCntxSN <= RndCntxSP;
   SCntxSN   <= SCntxSP;
   CntxSN    <= CntxSP;
   LastxSN   <= LastxSP;
   SaltxSN   <= SaltxSP;
   
   case StatexDP is
     when init =>
        RndCntxSN <= 0;
        SCntxSN   <= 0;
        CntxSN    <= CINIT;
          if InENxSI='1' then
            StatexDN <= aes0;
            if LastxSI = '1' then
              LastxSN <= '1';
            end if;
          end if;
     when chain =>
        RndCntxSN <= 0;
        SCntxSN   <= 0;
        -- Cnt continues
          if InENxSI='1' then
            StatexDN <= aes0;
          end if;
      when aes0 => StatexDN <= aes1; 
      when aes1 => StatexDN <= aes2;
      when aes2 => StatexDN <= aes3;
      when aes3 =>
             if SaltxSP='0' then
               CntxSN <= std_logic_vector(unsigned (CntxSP) + "1");
             end if;
             if SCntxSP=15 then
               SCntxSN <= 0;
               if RndCntxSP =15 then
                 RndCntxSN <= 0;
               else
                 RndCntxSN <= RndCntxSP + 1;                 
               end if;

              -- We will use the SaltxSP to determine in which round we are 
               if SaltxSP= '0' then
                 StatexDN <= aes0;      -- even
                 SaltxSN <= '1';
               else
                 StatexDN <= bigmix;    -- odd
                 SaltxSN <= '0';
               end if;
             else
               SCntxSN <= SCntxSP + 1;
               StatexDN <= aes0;  
             end if;
       when bigmix =>
             -- be careful here RndCntxSP will be reset in the preceeding aes3
             -- round, so we need to check for 0 to determine the end. 
             if RndCntxSP=0 then
               StatexDN <= last;
             else
               StatexDN <= aes0;        --not finished
             end if;
       when last =>
              OutEnxSO <= '1';          -- Normally we would have an ack here
              if LastxSP='1' then       -- Was last block
                LastxSN <= '0';         -- clear Last
                StatexDN <= init;
              else
                StatexDN <= chain;
              end if;
     when others => null;
   end case;
   
 end process p_fsm;

-------------------------------------------------------------------------------
-- The Output
-------------------------------------------------------------------------------

-- output is valid for *ONE* cycle in the state 'last'
-- it is not ideal, we would probably would have liked to have an output
-- acknowledge but at teh moment this will suffice 
DataOutxDO <= VxDN(511 downto 256);

        
-------------------------------------------------------------------------------
-- Clocked process for all the registers
-------------------------------------------------------------------------------


p_clk : process (CLKxCI, RSTxRBI)
begin  -- process p_clk
  if RSTxRBI = '0' then                     -- asynchronous reset (active low)
    SxDP      <= (others => '0');
    VxDP      <= VINIT;
    ATxDP     <= (others => '0');
    CntxSP    <= CINIT;
    LastxSP   <= '0';
    SCntxSP   <= 0;
    RndCntxSP <= 0;
    StatexDP  <= init;
    SaltxSP   <= '0';
  elsif CLKxCI'event and CLKxCI = '1' then  -- rising clock edge
    SxDP      <= SxDN;
    VxDP      <= VxDN;
    ATxDP     <= ATxDN;
    CntxSP    <= CntxSN;
    LastxSP   <= LastxSN;
    SCntxSP   <= SCntxSN;
    RndCntxSP <= RndCntxSN;
    StatexDP  <= StatexDN;
    SaltxSP   <= SaltxSN;
  end if;
end process p_clk;

                

end slow;

Generated on Fri Sep 24 10:39:12 CEST 2010
Home