-- EMACS settings: -*- tab-width: 2; indent-tabs-mode: t -*-
-- vim: tabstop=2:shiftwidth=2:noexpandtab
-- kate: tab-width 2; replace-tabs off; indent-width 2;
-- =============================================================================
-- Authors: Patrick Lehmann
-- Martin Zabel
--
-- Entity: Cache with parallel tag-unit and data memory.
--
-- Description:
--
-- Implements a cache with parallel tag-unit and data memory.
--
-- .. NOTE::
-- This component infers a single-port memory with read-first behavior, that
-- is, upon writes the old-data is returned on the read output. Such memory
-- (e.g. LUT-RAM) is not available on all devices. Thus, synthesis may
-- infer a lot of flip-flops plus multiplexers instead, which is very inefficient.
-- It is recommended to use :doc:`PoC.cache.par2 <cache_par2>` instead which has a
-- slightly different interface.
--
-- All inputs are synchronous to the rising-edge of the clock `clock`.
--
-- **Command truth table:**
--
-- +---------+-----------+-------------+---------+---------------------------------+
-- | Request | ReadWrite | Invalidate | Replace | Command |
-- +=========+===========+=============+=========+=================================+
-- | 0 | 0 | 0 | 0 | None |
-- +---------+-----------+-------------+---------+---------------------------------+
-- | 1 | 0 | 0 | 0 | Read cache line |
-- +---------+-----------+-------------+---------+---------------------------------+
-- | 1 | 1 | 0 | 0 | Update cache line |
-- +---------+-----------+-------------+---------+---------------------------------+
-- | 1 | 0 | 1 | 0 | Read cache line and discard it |
-- +---------+-----------+-------------+---------+---------------------------------+
-- | 1 | 1 | 1 | 0 | Write cache line and discard it |
-- +---------+-----------+-------------+---------+---------------------------------+
-- | 0 | | 0 | 1 | Replace cache line. |
-- +---------+-----------+-------------+---------+---------------------------------+
--
-- All commands use ``Address`` to lookup (request) or replace a cache line.
-- ``Address`` and ``OldAddress`` do not include the word/byte select part.
-- Each command is completed within one clock cycle, but outputs are delayed as
-- described below.
--
-- Upon requests, the outputs ``CacheMiss`` and ``CacheHit`` indicate (high-active)
-- whether the ``Address`` is stored within the cache, or not. Both outputs have a
-- latency of one clock cycle.
--
-- Upon writing a cache line, the new content is given by ``CacheLineIn``.
-- Upon reading a cache line, the current content is outputed on ``CacheLineOut``
-- with a latency of one clock cycle.
--
-- Upon replacing a cache line, the new content is given by ``CacheLineIn``. The
-- old content is outputed on ``CacheLineOut`` and the old tag on ``OldAddress``,
-- both with a latency of one clock cycle.
--
-- .. WARNING::
--
-- If the design is synthesized with Xilinx ISE / XST, then the synthesis
-- option "Keep Hierarchy" must be set to SOFT or TRUE.
--
-- License:
-- =============================================================================
-- Copyright 2007-2016 Technische Universitaet Dresden - Germany
-- Chair of VLSI-Design, Diagnostics and Architecture
--
-- Licensed under the Apache License, Version 2.0 (the "License");
-- you may not use this file except in compliance with the License.
-- You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
-- =============================================================================
library IEEE;
use IEEE.STD_LOGIC_1164.all;
use IEEE.NUMERIC_STD.all;
library PoC;
use PoC.utils.all;
use PoC.vectors.all;
entity [docs]cache_par is
generic (
REPLACEMENT_POLICY : string := "LRU";
CACHE_LINES : positive := 32;--1024;
ASSOCIATIVITY : positive := 32;--4;
ADDRESS_BITS : positive := 8; --32-6;
DATA_BITS : positive := 8 --64*8
);
port (
Clock : in std_logic;
Reset : in std_logic;
Request : in std_logic;
ReadWrite : in std_logic;
Invalidate : in std_logic;
Replace : in std_logic;
Address : in std_logic_vector(ADDRESS_BITS - 1 downto 0);
CacheLineIn : in std_logic_vector(DATA_BITS - 1 downto 0);
CacheLineOut : out std_logic_vector(DATA_BITS - 1 downto 0);
CacheHit : out std_logic := '0';
CacheMiss : out std_logic := '0';
OldAddress : out std_logic_vector(ADDRESS_BITS - 1 downto 0)
);
end entity;
architecture [docs]rtl of cache_par is
attribute KEEP : boolean;
constant LINE_INDEX_BITS : positive := log2ceilnz(CACHE_LINES);
subtype T_CACHE_LINE is std_logic_vector(DATA_BITS - 1 downto 0);
type T_CACHE_LINE_VECTOR is array (natural range <>) of T_CACHE_LINE;
-- look-up (request)
signal TU_LineIndex : std_logic_vector(LINE_INDEX_BITS - 1 downto 0);
signal TU_TagHit : std_logic;
signal TU_TagMiss : std_logic;
-- replace
signal TU_ReplaceLineIndex : std_logic_vector(LINE_INDEX_BITS - 1 downto 0);
signal TU_OldAddress : std_logic_vector(ADDRESS_BITS - 1 downto 0);
signal MemoryIndex_us : unsigned(LINE_INDEX_BITS - 1 downto 0);
signal CacheMemory : T_CACHE_LINE_VECTOR(CACHE_LINES - 1 downto 0);
begin
-- Cache TagUnit
TU : entity PoC.cache_tagunit_par
generic map (
REPLACEMENT_POLICY => REPLACEMENT_POLICY,
CACHE_LINES => CACHE_LINES,
ASSOCIATIVITY => ASSOCIATIVITY,
ADDRESS_BITS => ADDRESS_BITS
)
port map (
Clock => Clock,
Reset => Reset,
Replace => Replace,
ReplaceLineIndex => TU_ReplaceLineIndex,
OldAddress => TU_OldAddress,
Request => Request,
ReadWrite => ReadWrite,
Invalidate => Invalidate,
Address => Address,
LineIndex => TU_LineIndex,
TagHit => TU_TagHit,
TagMiss => TU_TagMiss
);
-- Address selector
MemoryIndex_us <= unsigned(TU_LineIndex) when Request = '1' else
unsigned(TU_ReplaceLineIndex);
process(Clock)
begin
if rising_edge(Clock) then
if ((Request and TU_TagHit and ReadWrite) or Replace) = '1' then
CacheMemory(to_integer(MemoryIndex_us)) <= CacheLineIn;
end if;
-- Single-port memory with read before write is required here.
-- Cannot be mapped to `PoC.ocram_sdp`.
CacheLineOut <= CacheMemory(to_integer(MemoryIndex_us));
-- Control outputs have same latency as cache line data.
if Reset = '1' then
CacheMiss <= '0';
CacheHit <= '0';
else
CacheMiss <= TU_TagMiss;
CacheHit <= TU_TagHit;
end if;
OldAddress <= TU_OldAddress;
end if;
end process;
end architecture;