-- EMACS settings: -*- tab-width: 2; indent-tabs-mode: t -*-
-- vim: tabstop=2:shiftwidth=2:noexpandtab
-- kate: tab-width 2; replace-tabs off; indent-width 2;
-- =============================================================================
-- Authors: Martin Zabel
-- Patrick Lehmann
--
-- Entity: Cache with parallel tag-unit and data memory.
--
-- Description:
--
-- Cache with parallel tag-unit and data memory. For the data memory,
-- :ref:`IP:ocram_sp` is used.
--
-- Configuration
-- *************
--
-- +--------------------+----------------------------------------------------+
-- | Parameter | Description |
-- +====================+====================================================+
-- | REPLACEMENT_POLICY | Replacement policy. For supported policies see |
-- | | PoC.cache_replacement_policy. |
-- +--------------------+----------------------------------------------------+
-- | CACHE_LINES | Number of cache lines. |
-- +--------------------+----------------------------------------------------+
-- | ASSOCIATIVITY | Associativity of the cache. |
-- +--------------------+----------------------------------------------------+
-- | ADDR_BITS | Number of address bits. Each address identifies |
-- | | exactly one cache line in memory. |
-- +--------------------+----------------------------------------------------+
-- | DATA_BITS | Size of a cache line in bits. |
-- | | DATA_BITS must be divisible by 8. |
-- +--------------------+----------------------------------------------------+
--
--
-- Command truth table
-- *******************
--
-- +---------+-----------+-------------+---------+---------------------------------+
-- | Request | ReadWrite | Invalidate | Replace | Command |
-- +=========+===========+=============+=========+=================================+
-- | 0 | 0 | 0 | 0 | None |
-- +---------+-----------+-------------+---------+---------------------------------+
-- | 1 | 0 | 0 | 0 | Read cache line |
-- +---------+-----------+-------------+---------+---------------------------------+
-- | 1 | 1 | 0 | 0 | Update cache line |
-- +---------+-----------+-------------+---------+---------------------------------+
-- | 1 | 0 | 1 | 0 | Read cache line and discard it |
-- +---------+-----------+-------------+---------+---------------------------------+
-- | 1 | 1 | 1 | 0 | Write cache line and discard it |
-- +---------+-----------+-------------+---------+---------------------------------+
-- | 0 | 0 | 0 | 1 | Read cache line before replace. |
-- +---------+-----------+-------------+---------+---------------------------------+
-- | 0 | 1 | 0 | 1 | Replace cache line. |
-- +---------+-----------+-------------+---------+---------------------------------+
--
--
-- Operation
-- *********
--
-- All inputs are synchronous to the rising-edge of the clock `clock`.
--
-- All commands use ``Address`` to lookup (request) or replace a cache line.
-- ``Address`` and ``OldAddress`` do not include the word/byte select part.
-- Each command is completed within one clock cycle, but outputs are delayed as
-- described below.
--
-- Upon requests, the outputs ``CacheMiss`` and ``CacheHit`` indicate (high-active)
-- whether the ``Address`` is stored within the cache, or not. Both outputs have a
-- latency of one clock cycle (pipelined) if ``HIT_MISS_REG`` is true, otherwise the
-- result is outputted immediately (combinational).
--
-- Upon writing a cache line, the new content is given by ``CacheLineIn``.
-- Only the bytes which are not masked, i.e. the corresponding bit in WriteMask
-- is '0', are actually written.
--
-- Upon reading a cache line, the current content is outputed on ``CacheLineOut``
-- with a latency of one clock cycle.
--
-- Replacing a cache line requires two steps, both with ``Replace = '1'``:
--
-- 1. Read old contents of cache line by setting ``ReadWrite`` to '0'. The old
-- content is outputed on ``CacheLineOut`` and the old tag on ``OldAddress``,
-- both with a latency of one clock cycle.
--
-- 2. Write new cache line by setting ``ReadWrite`` to '1'. The new content is
-- given by ``CacheLineIn``. All bytes shall be written, i.e.
-- ``WriteMask = 0``. The new cache line content will be outputed
-- again on ``CacheLineOut`` in the next clock cycle (latency = 1).
--
-- .. WARNING::
--
-- If the design is synthesized with Xilinx ISE / XST, then the synthesis
-- option "Keep Hierarchy" must be set to SOFT or TRUE.
--
-- License:
-- =============================================================================
-- Copyright 2007-2016 Technische Universitaet Dresden - Germany
-- Chair of VLSI-Design, Diagnostics and Architecture
--
-- Licensed under the Apache License, Version 2.0 (the "License");
-- you may not use this file except in compliance with the License.
-- You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
-- =============================================================================
library IEEE;
use IEEE.STD_LOGIC_1164.all;
use IEEE.NUMERIC_STD.all;
library PoC;
use PoC.utils.all;
use PoC.vectors.all;
entity [docs]cache_par2 is
generic (
REPLACEMENT_POLICY : string := "LRU";
CACHE_LINES : positive := 32;
ASSOCIATIVITY : positive := 32;
ADDR_BITS : positive := 8;
DATA_BITS : positive := 8;
HIT_MISS_REG : boolean := true -- must be true for Cocotb.
);
port (
Clock : in std_logic;
Reset : in std_logic;
Request : in std_logic;
ReadWrite : in std_logic;
WriteMask : in std_logic_vector(DATA_BITS/8 - 1 downto 0) := (others => '0');
Invalidate : in std_logic;
Replace : in std_logic;
Address : in std_logic_vector(ADDR_BITS-1 downto 0);
CacheLineIn : in std_logic_vector(DATA_BITS - 1 downto 0);
CacheLineOut : out std_logic_vector(DATA_BITS - 1 downto 0);
CacheHit : out std_logic := '0';
CacheMiss : out std_logic := '0';
OldAddress : out std_logic_vector(ADDR_BITS-1 downto 0)
);
end entity;
architecture [docs]rtl of cache_par2 is
attribute KEEP : boolean;
constant LINE_INDEX_BITS : positive := log2ceilnz(CACHE_LINES);
subtype T_CACHE_LINE is std_logic_vector(DATA_BITS - 1 downto 0);
type T_CACHE_LINE_VECTOR is array (natural range <>) of T_CACHE_LINE;
-- look-up (request)
signal TU_LineIndex : std_logic_vector(LINE_INDEX_BITS - 1 downto 0);
signal TU_TagHit : std_logic;
signal TU_TagMiss : std_logic;
-- replace
signal ReplaceWrite : std_logic;
signal TU_ReplaceLineIndex : std_logic_vector(LINE_INDEX_BITS - 1 downto 0);
signal TU_OldAddress : std_logic_vector(OldAddress'range);
-- data memory
signal MemoryIndex_us : unsigned(LINE_INDEX_BITS - 1 downto 0);
signal MemoryAccess : std_logic;
begin
ReplaceWrite <= Replace and ReadWrite;
-- Cache TagUnit
TU : entity PoC.cache_tagunit_par
generic map (
REPLACEMENT_POLICY => REPLACEMENT_POLICY,
CACHE_LINES => CACHE_LINES,
ASSOCIATIVITY => ASSOCIATIVITY,
ADDRESS_BITS => ADDR_BITS
)
port map (
Clock => Clock,
Reset => Reset,
Replace => ReplaceWrite,
ReplaceLineIndex => TU_ReplaceLineIndex,
OldAddress => TU_OldAddress,
Request => Request,
ReadWrite => ReadWrite,
Invalidate => Invalidate,
Address => Address,
LineIndex => TU_LineIndex,
TagHit => TU_TagHit,
TagMiss => TU_TagMiss
);
-- Address selector
MemoryIndex_us <= unsigned(TU_LineIndex) when Request = '1' else
unsigned(TU_ReplaceLineIndex);
MemoryAccess <= (Request and TU_TagHit) or Replace;
-- Data Memory
gLane: for i in 0 to DATA_BITS/8 - 1 generate
signal we : std_logic;
begin
we <= ReadWrite and not WriteMask(i);
data_mem: entity work.ocram_sp
generic map (
A_BITS => LINE_INDEX_BITS,
D_BITS => 8, -- 8 bit per lane
FILENAME => "")
port map (
clk => Clock,
ce => MemoryAccess,
we => we,
a => MemoryIndex_us,
d => CacheLineIn (i*8+7 downto i*8),
q => CacheLineOut(i*8+7 downto i*8));
end generate gLane;
-- Hit / Miss
gNoHitMissReg: if not HIT_MISS_REG generate
CacheMiss <= TU_TagMiss;
CacheHit <= TU_TagHit;
end generate gNoHitMissReg;
gHitMissReg: if HIT_MISS_REG generate -- Pipelined outputs.
process(Clock)
begin
if rising_edge(Clock) then
if Reset = '1' then
CacheMiss <= '0';
CacheHit <= '0';
else
CacheMiss <= TU_TagMiss;
CacheHit <= TU_TagHit;
end if;
end if;
end process;
end generate gHitMissReg;
-- Same latency as CacheLineOut
OldAddress <= TU_OldAddress when rising_edge(Clock);
end architecture;