-- EMACS settings: -*- tab-width: 2; indent-tabs-mode: t -*-
-- vim: tabstop=2:shiftwidth=2:noexpandtab
-- kate: tab-width 2; replace-tabs off; indent-width 2;
-- =============================================================================
-- Authors: Patrick Lehmann
-- Martin Zabel
--
-- Entity: Tag-unit with fully-parallel compare of tag.
--
-- Description:
--
-- Tag-unit with fully-parallel compare of tag.
--
-- Configuration
-- *************
--
-- +--------------------+----------------------------------------------------+
-- | Parameter | Description |
-- +====================+====================================================+
-- | REPLACEMENT_POLICY | Replacement policy. For supported policies see |
-- | | PoC.cache_replacement_policy. |
-- +--------------------+----------------------------------------------------+
-- | CACHE_LINES | Number of cache lines. |
-- +--------------------+----------------------------------------------------+
-- | ASSOCIATIVITY | Associativity of the cache. |
-- +--------------------+----------------------------------------------------+
-- | ADDRESS_BITS | Number of address bits. Each address identifies |
-- | | exactly one cache line in memory. |
-- +--------------------+----------------------------------------------------+
--
--
-- Command truth table
-- *******************
--
-- +---------+-----------+-------------+---------+----------------------------------+
-- | Request | ReadWrite | Invalidate | Replace | Command |
-- +=========+===========+=============+=========+==================================+
-- | 0 | 0 | 0 | 0 | None |
-- +---------+-----------+-------------+---------+----------------------------------+
-- | 1 | 0 | 0 | 0 | Read cache line |
-- +---------+-----------+-------------+---------+----------------------------------+
-- | 1 | 1 | 0 | 0 | Update cache line |
-- +---------+-----------+-------------+---------+----------------------------------+
-- | 1 | 0 | 1 | 0 | Read cache line and discard it |
-- +---------+-----------+-------------+---------+----------------------------------+
-- | 1 | 1 | 1 | 0 | Write cache line and discard it |
-- +---------+-----------+-------------+---------+----------------------------------+
-- | 0 | | 0 | 1 | Replace cache line. |
-- +---------+-----------+-------------+---------+----------------------------------+
--
--
-- Operation
-- *********
--
-- All inputs are synchronous to the rising-edge of the clock `clock`.
--
-- All commands use ``Address`` to lookup (request) or replace a cache line.
-- Each command is completed within one clock cycle.
--
-- Upon requests, the outputs ``CacheMiss`` and ``CacheHit`` indicate (high-active)
-- immediately (combinational) whether the ``Address`` is stored within the cache, or not.
-- But, the cache-line usage is updated at the rising-edge of the clock.
-- If hit, ``LineIndex`` specifies the cache line where to find the content.
--
-- The output ``ReplaceLineIndex`` indicates which cache line will be replaced as
-- next by a replace command. The output ``OldAddress`` specifies the old tag stored at this
-- index. The replace command will store the ``Address`` and update the cache-line
-- usage at the rising-edge of the clock.
--
-- For a direct-mapped cache, the number of ``CACHE_LINES`` must be a power of 2.
-- For a set-associative cache, the expression ``CACHE_LINES / ASSOCIATIVITY``
-- must be a power of 2.
--
-- .. NOTE::
-- The port ``NewAddress`` has been removed. Use ``Address`` instead as
-- described above.
--
-- If ``Address`` is fed from a register and an Altera FPGA is used, then
-- Quartus Map converts the tag memory from a memory with asynchronous read to a
-- memory with synchronous read by adding a pass-through logic. Quartus Map
-- reports warning 276020 which is intended.
--
-- .. WARNING::
--
-- If the design is synthesized with Xilinx ISE / XST, then the synthesis
-- option "Keep Hierarchy" must be set to SOFT or TRUE.
--
-- License:
-- =============================================================================
-- Copyright 2007-2016 Technische Universitaet Dresden - Germany
-- Chair of VLSI-Design, Diagnostics and Architecture
--
-- Licensed under the Apache License, Version 2.0 (the "License");
-- you may not use this file except in compliance with the License.
-- You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
-- =============================================================================
library IEEE;
use IEEE.STD_LOGIC_1164.all;
use IEEE.NUMERIC_STD.all;
library PoC;
use PoC.utils.all;
use PoC.vectors.all;
entity [docs]cache_tagunit_par is
generic (
REPLACEMENT_POLICY : string := "LRU";
CACHE_LINES : positive := 32;
ASSOCIATIVITY : positive := 32;
ADDRESS_BITS : positive := 8
);
port (
Clock : in std_logic;
Reset : in std_logic;
Replace : in std_logic;
ReplaceLineIndex : out std_logic_vector(log2ceilnz(CACHE_LINES) - 1 downto 0);
OldAddress : out std_logic_vector(ADDRESS_BITS - 1 downto 0);
Request : in std_logic;
ReadWrite : in std_logic;
Invalidate : in std_logic;
Address : in std_logic_vector(ADDRESS_BITS - 1 downto 0);
LineIndex : out std_logic_vector(log2ceilnz(CACHE_LINES) - 1 downto 0);
TagHit : out std_logic;
TagMiss : out std_logic
);
end entity;
architecture [docs]rtl of cache_tagunit_par is
attribute KEEP : boolean;
constant SETS : positive := CACHE_LINES / ASSOCIATIVITY;
-- Returns true if unsigned value contains metalogical values.
-- Similar function is_x(unsigned) is only shipped with VHDL'08.
function [docs]contains_x(value : unsigned) return boolean is
begin
-- Use pragma to get rid of meaningless Quartus warning 10325 "ignored
-- choice containing meta-values ..." in function is_x(std_logic_vector).
-- Synthesis tools which ignore this pragma should return false for is_x().
-- synthesis translate_off
return is_x(std_logic_vector(value));
-- synthesis translate_on
return false; -- no meta-values in hardware here
end function;
begin
-- ===========================================================================
-- Full-Associative Cache
-- ===========================================================================
genFA : if CACHE_LINES = ASSOCIATIVITY generate
constant TAG_BITS : positive := ADDRESS_BITS;
constant WAY_BITS : positive := log2ceilnz(ASSOCIATIVITY);
subtype T_TAG_LINE is std_logic_vector(TAG_BITS - 1 downto 0);
type T_TAG_LINE_VECTOR is array (natural range <>) of T_TAG_LINE;
signal TagHits : std_logic_vector(CACHE_LINES - 1 downto 0); -- includes Valid
signal TagMemory : T_TAG_LINE_VECTOR(CACHE_LINES - 1 downto 0);
signal ValidMemory : std_logic_vector(CACHE_LINES - 1 downto 0) := (others => '0');
signal HitWay : unsigned(WAY_BITS - 1 downto 0);
signal Policy_ReplaceWay : std_logic_vector(WAY_BITS - 1 downto 0);
signal ReplaceWay_us : unsigned(WAY_BITS - 1 downto 0);
signal TagHit_i : std_logic; -- includes Valid and Request
signal TagMiss_i : std_logic; -- includes Valid and Request
begin
-- generate comparators and convert hit-vector to binary index (cache line address)
-- use process, so that "onehot2bin" does not report false errors in
-- simulation due to delta-cycles updates
process(Address, TagMemory, ValidMemory)
variable hits : std_logic_vector(CACHE_LINES - 1 downto 0); -- includes Valid
begin
for i in 0 to CACHE_LINES - 1 loop
hits(i) := to_sl(TagMemory(i) = Address and ValidMemory(i) = '1');
end loop;
TagHits <= hits;
HitWay <= onehot2bin(hits, 0);
end process;
process(Clock)
begin
if rising_edge(Clock) then
if (Replace = '1') then
TagMemory(to_integer(ReplaceWay_us)) <= Address;
end if;
for i in ValidMemory'range loop
if Reset = '1' then
ValidMemory(i) <= '0';
elsif (Replace = '1' and ReplaceWay_us = i) or
(Invalidate = '1' and TagHits(i) = '1')
then
ValidMemory(i) <= Replace; -- clear when Invalidate
end if;
end loop;
end if;
end process;
-- hit/miss calculation
TagHit_i <= slv_or(TagHits) and Request;
TagMiss_i <= not slv_or(TagHits) and Request;
-- outputs
LineIndex <= std_logic_vector(HitWay);
TagHit <= TagHit_i;
TagMiss <= TagMiss_i;
ReplaceWay_us <= unsigned(Policy_ReplaceWay);
ReplaceLineIndex <= Policy_ReplaceWay;
OldAddress <= (others => 'X') when contains_x(ReplaceWay_us) else
TagMemory(to_integer(ReplaceWay_us));
-- replacement policy
Policy : entity PoC.cache_replacement_policy
generic map (
REPLACEMENT_POLICY => REPLACEMENT_POLICY,
CACHE_WAYS => ASSOCIATIVITY
)
port map (
Clock => Clock,
Reset => Reset,
Replace => Replace,
ReplaceWay => Policy_ReplaceWay,
TagAccess => TagHit_i,
ReadWrite => ReadWrite,
Invalidate => Invalidate,
HitWay => std_logic_vector(HitWay)
);
end generate;
-- ===========================================================================
-- Direct-Mapped Cache
-- ===========================================================================
genDM : if ASSOCIATIVITY = 1 generate
-- Addresses are splitted into a tag part and an index part.
constant INDEX_BITS : positive := log2ceilnz(CACHE_LINES);
constant TAG_BITS : positive := ADDRESS_BITS - INDEX_BITS;
subtype T_TAG_LINE is std_logic_vector(TAG_BITS-1 downto 0);
type T_TAG_LINE_VECTOR is array(natural range <>) of T_TAG_LINE;
signal Address_Tag : T_TAG_LINE;
signal Address_Index : unsigned(INDEX_BITS - 1 downto 0);
signal DM_TagHit : std_logic; -- includes Valid
signal TagMemory : T_TAG_LINE_VECTOR(CACHE_LINES-1 downto 0);
signal ValidMemory : std_logic_vector(CACHE_LINES-1 downto 0) := (others => '0');
-- If Address is fed from a register, then:
--
-- * the TagMemory must be implemented as distributed RAM on Xilinx
-- FPGAs because only this RAM has the intended mixed-port
-- read-during-write behavior. But even if ``ram_style`` is set,
-- synthesis sometimes generates a wrong netlist if KEEP_HIERARCHY is
-- set to off (default).
--
-- * Mapping the TagMemory to block RAM is however possible on Altera
-- FPGAs because Quartus adds the neccessary bypass logic.
--
-- If Address is not fed from a register, then:
--
-- * distributed RAM will be used on Xilinx FPGAs anyway,
-- * LUTs and FFs are used on Altera FPGAs.
attribute ram_style : string; -- XST specific
attribute ram_style of TagMemory : signal is "distributed";
signal TagHit_i : std_logic;
signal TagMiss_i : std_logic;
signal Tag : T_TAG_LINE; -- read tag from memory
signal Valid : std_logic; -- read valid from memory
begin
assert CACHE_LINES = 2**INDEX_BITS report "Unsupported number of cache lines." severity failure;
-- Split incoming 'Address'
Address_Tag <= Address(Address'left downto INDEX_BITS);
Address_Index <= unsigned(Address(INDEX_BITS-1 downto 0));
-- Access tag / valid memory and compare tags.
Tag <= (others => 'X') when contains_x(Address_Index) else
TagMemory (to_integer(Address_Index));
Valid <= 'X' when contains_x(Address_Index) else
ValidMemory(to_integer(Address_Index));
DM_TagHit <= to_sl(Tag = Address_Tag) and Valid;
process(Clock)
begin
if rising_edge(Clock) then
if (Replace = '1') then
TagMemory(to_integer(Address_Index)) <= Address_Tag;
end if;
if Reset = '1' then
ValidMemory <= (others => '0');
elsif (Replace = '1') or (TagHit_i = '1' and Invalidate = '1') then
ValidMemory(to_integer(Address_Index)) <= Replace; -- clear when Invalidate
end if;
end if;
end process;
-- hit/miss calculation
TagHit_i <= DM_TagHit and Request;
TagMiss_i <= not DM_TagHit and Request;
-- outputs
LineIndex <= std_logic_vector(Address_Index);
TagHit <= TagHit_i;
TagMiss <= TagMiss_i;
ReplaceLineIndex <= std_logic_vector(Address_Index);
OldAddress <= Tag & std_logic_vector(Address_Index);
end generate;
-- ===========================================================================
-- Set-Assoziative Cache
-- ===========================================================================
genSA : if (ASSOCIATIVITY > 1) and (SETS > 1) generate
-- Addresses are splitted into a tag part and an index part.
constant CACHE_SETS : positive := CACHE_LINES / ASSOCIATIVITY;
constant INDEX_BITS : positive := log2ceilnz(CACHE_SETS);
constant TAG_BITS : positive := ADDRESS_BITS - INDEX_BITS;
constant WAY_BITS : positive := log2ceilnz(ASSOCIATIVITY);
subtype T_TAG_LINE is std_logic_vector(TAG_BITS-1 downto 0);
type T_TAG_LINE_VECTOR is array(natural range <>) of T_TAG_LINE;
type T_WAY_VECTOR is array(natural range<>) of std_logic_vector(WAY_BITS-1 downto 0);
-- Splitted address
signal Address_Tag : T_TAG_LINE;
signal Address_Index : unsigned(INDEX_BITS - 1 downto 0);
-- Way-specific signals
signal TagHits : std_logic_vector(ASSOCIATIVITY-1 downto 0); -- includes Valid
signal OldTags : T_TAG_LINE_VECTOR(ASSOCIATIVITY-1 downto 0);
-- Cache-set specific signals
signal CS_TagAccess : std_logic_vector(CACHE_SETS-1 downto 0);
signal CS_Invalidate : std_logic_vector(CACHE_SETS-1 downto 0);
signal CS_Replace : std_logic_vector(CACHE_SETS-1 downto 0);
signal Policy_ReplaceWay : T_WAY_VECTOR(CACHE_SETS-1 downto 0);
-- Way where hit occurs and way to replace
signal HitWay : unsigned(WAY_BITS-1 downto 0);
signal ReplaceWay : unsigned(WAY_BITS-1 downto 0);
signal TagHit_i : std_logic;
signal TagMiss_i : std_logic;
begin
assert CACHE_SETS = 2**INDEX_BITS report "Unsupported number of cache-sets." severity failure;
----------------------------------------------------------------------------
-- Split incoming 'Address'
-- Enable only one cache-set
----------------------------------------------------------------------------
Address_Tag <= Address(Address'left downto INDEX_BITS);
Address_Index <= unsigned(Address(INDEX_BITS-1 downto 0));
----------------------------------------------------------------------------
-- Generate tag-memory and comparators for each way
----------------------------------------------------------------------------
genWay : for way in 0 to ASSOCIATIVITY-1 generate
signal TagMemory : T_TAG_LINE_VECTOR(CACHE_SETS-1 downto 0);
signal ValidMemory : std_logic_vector(CACHE_SETS-1 downto 0) := (others => '0');
-- If Address is fed from a register, then:
--
-- * the TagMemory must be implemented as distributed RAM on Xilinx
-- FPGAs because only this RAM has the intended mixed-port
-- read-during-write behavior. But even if ``ram_style`` is set,
-- synthesis sometimes generates a wrong netlist if KEEP_HIERARCHY is
-- set to off (default).
--
-- * Mapping the TagMemory to block RAM is however possible on Altera
-- FPGAs because Quartus adds the neccessary bypass logic.
--
-- If Address is not fed from a register, then:
--
-- * distributed RAM will be used on Xilinx FPGAs anyway,
-- * LUTs and FFs are used on Altera FPGAs.
attribute ram_style : string; -- XST specific
attribute ram_style of TagMemory : signal is "distributed";
signal Tag : T_TAG_LINE; -- read tag from memory
signal Valid : std_logic; -- read valid from memory
begin
-- Access tag / valid memory and compare tags.
Tag <= (others => 'X') when contains_x(Address_Index) else
TagMemory (to_integer(Address_Index));
Valid <= 'X' when contains_x(Address_Index) else
ValidMemory(to_integer(Address_Index));
TagHits(way) <= to_sl(Tag = Address_Tag) and Valid;
-- memory update
process (Clock) is
begin -- process
if rising_edge(Clock) then
if Replace = '1' and ReplaceWay = way then
TagMemory(to_integer(Address_Index)) <= Address_Tag;
end if;
if Reset = '1' then
ValidMemory <= (others => '0');
elsif Replace = '1' and ReplaceWay = way then
ValidMemory(to_integer(Address_Index)) <= '1';
elsif Invalidate = '1' and TagHits(way) = '1' then
ValidMemory(to_integer(Address_Index)) <= '0';
end if;
end if;
end process;
-- old address when replacing
OldTags(way) <= Tag;
end generate genWay;
HitWay <= onehot2bin(TagHits, 0);
----------------------------------------------------------------------------
-- Global hit / miss calculation and output
----------------------------------------------------------------------------
TagHit_i <= slv_or(TagHits) and Request;
TagMiss_i <= not slv_or(TagHits) and Request;
LineIndex <= std_logic_vector(HitWay) & std_logic_vector(Address_Index);
TagHit <= TagHit_i;
TagMiss <= TagMiss_i;
----------------------------------------------------------------------------
-- Generate policy for each cache-set
----------------------------------------------------------------------------
process(Address_Index, TagHit_i)
begin
CS_TagAccess <= (others => '0');
if contains_x(Address_Index) then -- for simulation only
null;--TODO: CS_TagAccess <= (others => 'X');
else
CS_TagAccess(to_integer(Address_Index)) <= TagHit_i;
end if;
end process;
process(Address_Index, Invalidate)
begin
CS_Invalidate <= (others => '0');
if contains_x(Address_Index) then -- for simulation only
null;--TODO: CS_Invalidate <= (others => 'X');
else
CS_Invalidate(to_integer(Address_Index)) <= Invalidate;
end if;
end process;
process(Address_Index, Replace)
begin
CS_Replace <= (others => '0');
if contains_x(Address_Index) then -- for simulation only
null;--TODO: CS_Replace <= (others => 'X');
else
CS_Replace(to_integer(Address_Index)) <= Replace;
end if;
end process;
genSet : for cs in 0 to CACHE_SETS-1 generate
begin
Policy : entity PoC.cache_replacement_policy
generic map (
REPLACEMENT_POLICY => REPLACEMENT_POLICY,
CACHE_WAYS => ASSOCIATIVITY
)
port map (
Clock => Clock,
Reset => Reset,
Replace => CS_Replace(cs),
ReplaceWay => Policy_ReplaceWay(cs), -- way to replace
TagAccess => CS_TagAccess(cs),
ReadWrite => ReadWrite,
Invalidate => CS_Invalidate(cs),
HitWay => std_logic_vector(HitWay) -- accessed way
);
end generate genSet;
ReplaceWay <= (others => 'X') when contains_x(Address_Index) else
unsigned(Policy_ReplaceWay(to_integer(Address_Index)));
----------------------------------------------------------------------------
-- Replace-specific outputs
----------------------------------------------------------------------------
ReplaceLineIndex <= std_logic_vector(ReplaceWay) & std_logic_vector(Address_Index);
OldAddress <= (others => 'X') when contains_x(ReplaceWay) else
OldTags(to_integer(ReplaceWay)) & std_logic_vector(Address_Index);
end generate;
end architecture;