This adds a cache class that can be used as part of the memory hierarchy.

As an example, an instruction cache is added (with an optional flag) to
mpact_riscv and mpact_cheriot.

PiperOrigin-RevId: 678760853
Change-Id: I5e1290a0e325373577316afbc0448b64bdf45f22
diff --git a/mpact/sim/generic/BUILD b/mpact/sim/generic/BUILD
index fce7e43..11730ac 100644
--- a/mpact/sim/generic/BUILD
+++ b/mpact/sim/generic/BUILD
@@ -192,6 +192,7 @@
         ":internal",
         "//mpact/sim/proto:component_data_cc_proto",
         "@com_google_absl//absl/status",
+        "@com_google_absl//absl/strings:string_view",
     ],
 )
 
@@ -229,6 +230,8 @@
         "@com_google_absl//absl/container:btree",
         "@com_google_absl//absl/log",
         "@com_google_absl//absl/status",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/strings:string_view",
     ],
 )
 
diff --git a/mpact/sim/generic/component.cc b/mpact/sim/generic/component.cc
index 3a542f3..08b63bb 100644
--- a/mpact/sim/generic/component.cc
+++ b/mpact/sim/generic/component.cc
@@ -18,6 +18,11 @@
 #include <utility>
 
 #include "absl/log/log.h"
+#include "absl/status/status.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/string_view.h"
+#include "mpact/sim/generic/config.h"
+#include "mpact/sim/generic/counters_base.h"
 
 namespace mpact {
 namespace sim {
diff --git a/mpact/sim/generic/component.h b/mpact/sim/generic/component.h
index 60ca8c1..7dcc52b 100644
--- a/mpact/sim/generic/component.h
+++ b/mpact/sim/generic/component.h
@@ -21,8 +21,9 @@
 
 #include "absl/container/btree_map.h"
 #include "absl/status/status.h"
+#include "absl/strings/string_view.h"
 #include "mpact/sim/generic/config.h"
-#include "mpact/sim/generic/counters.h"
+#include "mpact/sim/generic/counters_base.h"
 #include "mpact/sim/proto/component_data.pb.h"
 
 namespace mpact {
diff --git a/mpact/sim/generic/config.h b/mpact/sim/generic/config.h
index e20f854..ce7f8d6 100644
--- a/mpact/sim/generic/config.h
+++ b/mpact/sim/generic/config.h
@@ -18,11 +18,11 @@
 #include <cstdint>
 #include <functional>
 #include <string>
-#include <utility>
 #include <variant>
 #include <vector>
 
 #include "absl/status/status.h"
+#include "absl/strings/string_view.h"
 #include "mpact/sim/generic/variant_helper.h"
 #include "mpact/sim/proto/component_data.pb.h"
 
diff --git a/mpact/sim/generic/counters.h b/mpact/sim/generic/counters.h
index bf5b7ff..1c086cb 100644
--- a/mpact/sim/generic/counters.h
+++ b/mpact/sim/generic/counters.h
@@ -16,6 +16,7 @@
 #define MPACT_SIM_GENERIC_COUNTERS_H_
 
 #include <cstdint>
+#include <functional>
 #include <string>
 #include <type_traits>
 #include <utility>
@@ -23,7 +24,6 @@
 
 #include "absl/status/status.h"
 #include "absl/strings/str_cat.h"
-#include "absl/types/variant.h"
 #include "mpact/sim/generic/counters_base.h"
 #include "mpact/sim/generic/variant_helper.h"
 #include "mpact/sim/proto/component_data.pb.h"
diff --git a/mpact/sim/generic/instruction.h b/mpact/sim/generic/instruction.h
index 737ff63..60ab5d8 100644
--- a/mpact/sim/generic/instruction.h
+++ b/mpact/sim/generic/instruction.h
@@ -17,7 +17,6 @@
 
 #include <cstdint>
 #include <functional>
-#include <iterator>
 #include <string>
 #include <utility>
 #include <vector>
diff --git a/mpact/sim/util/memory/BUILD b/mpact/sim/util/memory/BUILD
index eb2b496..71ee338 100644
--- a/mpact/sim/util/memory/BUILD
+++ b/mpact/sim/util/memory/BUILD
@@ -65,3 +65,27 @@
         "@com_google_absl//absl/strings:str_format",
     ],
 )
+
+cc_library(
+    name = "cache",
+    srcs = [
+        "cache.cc",
+    ],
+    hdrs = [
+        "cache.h",
+    ],
+    copts = ["-O3"],
+    deps = [
+        ":memory",
+        "//mpact/sim/generic:component",
+        "//mpact/sim/generic:core",
+        "//mpact/sim/generic:counters",
+        "//mpact/sim/generic:instruction",
+        "@com_google_absl//absl/functional:bind_front",
+        "@com_google_absl//absl/log",
+        "@com_google_absl//absl/log:check",
+        "@com_google_absl//absl/numeric:bits",
+        "@com_google_absl//absl/status",
+        "@com_google_absl//absl/strings",
+    ],
+)
diff --git a/mpact/sim/util/memory/cache.cc b/mpact/sim/util/memory/cache.cc
new file mode 100644
index 0000000..732841d
--- /dev/null
+++ b/mpact/sim/util/memory/cache.cc
@@ -0,0 +1,372 @@
+// Copyright 2024 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mpact/sim/util/memory/cache.h"
+
+#include <cstdint>
+#include <limits>
+#include <string>
+#include <vector>
+
+#include "absl/functional/bind_front.h"
+#include "absl/log/check.h"
+#include "absl/log/log.h"
+#include "absl/numeric/bits.h"
+#include "absl/status/status.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_split.h"
+#include "absl/strings/string_view.h"
+#include "mpact/sim/generic/component.h"
+#include "mpact/sim/generic/instruction.h"
+#include "mpact/sim/util/memory/memory_interface.h"
+#include "mpact/sim/util/memory/tagged_memory_interface.h"
+
+namespace mpact::sim::util {
+
+using ::mpact::sim::generic::Component;
+
+// Constructors.
+Cache::Cache(std::string name, Component *parent, MemoryInterface *memory)
+    : Component(name, parent),
+      read_hit_counter_("read_hit", 0ULL),
+      read_miss_counter_("read_miss", 0ULL),
+      write_hit_counter_("write_hit", 0ULL),
+      write_miss_counter_("write_miss", 0ULL),
+      dirty_line_writeback_counter_("dirty_line_writeback", 0ULL),
+      read_around_counter_("read_around", 0ULL),
+      write_around_counter_("write_around", 0ULL),
+      memory_(memory),
+      tagged_memory_(nullptr) {
+  // Register the counters.
+  CHECK_OK(AddCounter(&read_hit_counter_));
+  CHECK_OK(AddCounter(&read_miss_counter_));
+  CHECK_OK(AddCounter(&write_hit_counter_));
+  CHECK_OK(AddCounter(&write_miss_counter_));
+  CHECK_OK(AddCounter(&dirty_line_writeback_counter_));
+  CHECK_OK(AddCounter(&read_around_counter_));
+  CHECK_OK(AddCounter(&write_around_counter_));
+  cache_inst_ = new Instruction(nullptr);
+  cache_inst_->set_semantic_function(absl::bind_front(&Cache::LoadChild, this));
+}
+
+Cache::Cache(std::string name, Component *parent,
+             TaggedMemoryInterface *tagged_memory)
+    : Component(name, parent),
+      read_hit_counter_("read_hit", 0ULL),
+      read_miss_counter_("read_miss", 0ULL),
+      write_hit_counter_("write_hit", 0ULL),
+      write_miss_counter_("write_miss", 0ULL),
+      dirty_line_writeback_counter_("dirty_line_writeback", 0ULL),
+      read_around_counter_("read_around", 0ULL),
+      write_around_counter_("write_around", 0ULL),
+      memory_(tagged_memory),
+      tagged_memory_(tagged_memory) {
+  // Register the counters.
+  CHECK_OK(AddCounter(&read_hit_counter_));
+  CHECK_OK(AddCounter(&read_miss_counter_));
+  CHECK_OK(AddCounter(&write_hit_counter_));
+  CHECK_OK(AddCounter(&write_miss_counter_));
+  CHECK_OK(AddCounter(&dirty_line_writeback_counter_));
+  CHECK_OK(AddCounter(&read_around_counter_));
+  CHECK_OK(AddCounter(&write_around_counter_));
+}
+
+// The simple constructors just call the main constructors.
+Cache::Cache(std::string name, MemoryInterface *memory)
+    : Cache(name, nullptr, memory) {}
+
+Cache::Cache(std::string name, TaggedMemoryInterface *tagged_memory)
+    : Cache(name, nullptr, tagged_memory) {}
+
+Cache::Cache(std::string name, Component *parent)
+    : Cache(name, parent, static_cast<MemoryInterface *>(nullptr)) {}
+
+Cache::Cache(std::string name)
+    : Cache(name, static_cast<Component *>(nullptr)) {}
+
+Cache::~Cache() {
+  delete[] cache_lines_;
+  cache_inst_->DecRef();
+}
+
+absl::Status Cache::Configure(const std::string &config,
+                              CounterValueOutputBase<uint64_t> *cycle_counter) {
+  if (cycle_counter == nullptr) {
+    return absl::InvalidArgumentError("Cycle counter is null");
+  }
+  cycle_counter_ = cycle_counter;
+  // Split the configuration string into fields, make sure there are 4 fields.
+  std::vector<std::string> config_vec = absl::StrSplit(config, ',');
+  if (config_vec.size() != 4) {
+    return absl::InvalidArgumentError("Invalid configuration - too few fields");
+  }
+  // Compute the cache size in bytes, including any suffixes ('k', 'M', 'G').
+  std::string::size_type size;
+  uint64_t cache_size = std::stoull(config_vec[0], &size);
+  if (size < config_vec[0].size()) {
+    auto suffix = config_vec[0].substr(size);
+    if (suffix == "k") {
+      cache_size *= 1024;
+    } else if (suffix == "M") {
+      cache_size *= 1024 * 1024;
+    } else if (suffix == "G") {
+      cache_size *= 1024 * 1024 * 1024;
+    } else {
+      return absl::InvalidArgumentError(
+          absl::StrCat("Invalid cache size suffix: '", suffix, "'"));
+    }
+  }
+  // Sanity check the cache parameters.
+  uint64_t line_size = std::stoull(config_vec[1], &size);
+  if (size != config_vec[1].size()) {
+    return absl::InvalidArgumentError("Invalid value for line size");
+  }
+  num_sets_ = std::stoull(config_vec[2], &size);
+  if (size != config_vec[2].size()) {
+    return absl::InvalidArgumentError("Invalid value for number of sets");
+  }
+  // Write allocate.
+  if (config_vec[3] == "true") {
+    write_allocate_ = true;
+  } else if (config_vec[3] == "false") {
+    write_allocate_ = false;
+  } else {
+    return absl::InvalidArgumentError("Invalid write allocate value");
+  }
+  if (!absl::has_single_bit(cache_size)) {
+    return absl::InvalidArgumentError("Cache size is not a power of 2");
+  }
+  if (!absl::has_single_bit(line_size)) {
+    return absl::InvalidArgumentError("Line size is not a power of 2");
+  }
+  if (num_sets_ != 0 && !absl::has_single_bit(num_sets_)) {
+    return absl::InvalidArgumentError("Number of sets is not a power of 2");
+  }
+  if (cache_size == 0) {
+    return absl::InvalidArgumentError("Cache size is zero");
+  }
+  if (line_size < 4) {
+    return absl::InvalidArgumentError("Line size must be at least 4 bytes");
+  }
+  if (cache_size < line_size) {
+    return absl::InvalidArgumentError("Cache size is less than line size");
+  }
+  // If num_sets is 0, then the cache is fully associative.
+  if (num_sets_ == 0) {
+    num_sets_ = cache_size / line_size;
+  }
+  uint64_t num_lines = cache_size / line_size;
+  if (num_sets_ > num_lines) {
+    return absl::InvalidArgumentError(
+        "Cache associativity is greater than the number of lines");
+  }
+  delete[] cache_lines_;
+  cache_lines_ = new CacheLine[num_lines];
+  block_shift_ = absl::bit_width(line_size) - 1;
+  index_mask_ = (1ULL << (absl::bit_width(num_lines / num_sets_) - 1)) - 1;
+  set_shift_ = absl::bit_width(num_sets_) - 1;
+  return absl::OkStatus();
+}
+
+// Each of the following memory (and tagged memory) interface methods will call
+// the CacheLookup method to perform the cache access. If the access is a miss,
+// the method will call the ReplaceBlock method to replace the block in the
+// cache. The memory request itself will be forwarded to the memory interface
+// provided to the constructor (if not nullptr).
+void Cache::Load(uint64_t address, DataBuffer *db, Instruction *inst,
+                 ReferenceCount *context) {
+  (void)CacheLookup(address, db->size<uint8_t>(), /*is_read=*/true);
+  if (memory_ == nullptr) return;
+
+  auto *cache_context = new CacheContext{context, db, inst, db->latency()};
+  context->IncRef();
+  inst->IncRef();
+  db->set_latency(0);
+  memory_->Load(address, db, cache_inst_, cache_context);
+  cache_context->DecRef();
+}
+
+void Cache::Load(DataBuffer *address_db, DataBuffer *mask_db, int el_size,
+                 DataBuffer *db, Instruction *inst, ReferenceCount *context) {
+  auto address_span = address_db->Get<uint64_t>();
+  auto mask_span = mask_db->Get<bool>();
+  for (int i = 0; i < address_db->size<uint64_t>(); i++) {
+    if (mask_span[i]) {
+      (void)CacheLookup(address_span[i], el_size, /*is_read=*/true);
+    }
+  }
+  if (memory_ == nullptr) return;
+
+  auto *cache_context = new CacheContext(context, db, inst, db->latency());
+  context->IncRef();
+  inst->IncRef();
+  db->set_latency(0);
+  memory_->Load(address_db, mask_db, el_size, db, cache_inst_, cache_context);
+  cache_context->DecRef();
+}
+
+void Cache::Load(uint64_t address, DataBuffer *db, DataBuffer *tags,
+                 Instruction *inst, ReferenceCount *context) {
+  (void)CacheLookup(address, db->size<uint8_t>(), /*is_read=*/true);
+  if (tagged_memory_ == nullptr) return;
+
+  auto *cache_context = new CacheContext{context, db, inst, db->latency()};
+  context->IncRef();
+  inst->IncRef();
+  db->set_latency(0);
+  tagged_memory_->Load(address, db, cache_inst_, cache_context);
+  cache_context->DecRef();
+}
+
+void Cache::Store(uint64_t address, DataBuffer *db) {
+  (void)CacheLookup(address, db->size<uint8_t>(), /*is_read=*/false);
+  if (memory_ == nullptr) return;
+  memory_->Store(address, db);
+}
+
+void Cache::Store(DataBuffer *address_db, DataBuffer *mask_db, int el_size,
+                  DataBuffer *db) {
+  auto address_span = address_db->Get<uint64_t>();
+  auto mask_span = mask_db->Get<bool>();
+  for (int i = 0; i < address_db->size<uint64_t>(); i++) {
+    if (mask_span[i]) {
+      (void)CacheLookup(address_span[i], el_size, /*is_read=*/false);
+    }
+  }
+  if (memory_ == nullptr) return;
+
+  memory_->Store(address_db, mask_db, el_size, db);
+}
+
+void Cache::Store(uint64_t address, DataBuffer *db, DataBuffer *tags) {
+  (void)CacheLookup(address, db->size<uint8_t>(), /*is_read=*/false);
+  if (tagged_memory_ == nullptr) return;
+
+  tagged_memory_->Store(address, db, tags);
+}
+
+// This is the semantic function that is bound to the cache_inst_ instruction
+// and is used to perform the writeback to the processor of the data that was
+// read.
+void Cache::LoadChild(const Instruction *inst) {
+  auto *cache_context = static_cast<CacheContext *>(inst->context());
+  auto *context = cache_context->context;
+  auto *db = cache_context->db;
+  auto *og_inst = cache_context->inst;
+  // Reset the db latency to the original value.
+  db->set_latency(cache_context->latency);
+  if (nullptr != inst) {
+    if (db->latency() > 0) {
+      og_inst->IncRef();
+      og_inst->state()->function_delay_line()->Add(db->latency(),
+                                                   [og_inst, context]() {
+                                                     og_inst->Execute(context);
+                                                     if (context != nullptr)
+                                                       context->DecRef();
+                                                     og_inst->DecRef();
+                                                   });
+    }
+    cache_context->DecRef();
+    og_inst->DecRef();
+  }
+}
+
+// Cache lookup function.
+int Cache::CacheLookup(uint64_t address, int size, bool is_read) {
+  int miss_count = 0;
+  // If the size spans more than one block, perform and access for each block.
+  uint64_t first_block = address >> block_shift_;
+  uint64_t last_block = (address + size - 1) >> block_shift_;
+  for (uint64_t block = first_block; block <= last_block; block++) {
+    // Compute the cache index.
+    uint64_t index = (block & index_mask_) << set_shift_;
+    bool hit = false;
+    // Iterate over the number of sets in the cache.
+    for (int set = 0; set < num_sets_; set++) {
+      CacheLine &line = cache_lines_[index + set];
+      if (line.valid && line.tag == block) {
+        hit = true;
+        line.lru = cycle_counter_->GetValue();
+        break;
+      }
+    }
+    if (hit) {
+      if (is_read) {
+        read_hit_counter_.Increment(1ULL);
+      } else {
+        write_hit_counter_.Increment(1ULL);
+      }
+    } else {
+      if (is_read) {
+        ReplaceBlock(block, is_read);
+        miss_count++;
+        read_miss_counter_.Increment(1ULL);
+      } else {
+        ReplaceBlock(block, is_read);
+        write_miss_counter_.Increment(1ULL);
+        if (write_allocate_) {
+          miss_count++;
+        }
+      }
+    }
+  }
+  return miss_count;
+}
+
+void Cache::ReplaceBlock(uint64_t block, bool is_read) {
+  // Recompute the cache index.
+  uint64_t index = (block & index_mask_) << set_shift_;
+  int victim = -1;
+  uint64_t victim_lru = std::numeric_limits<uint64_t>::max();
+  for (int set = 0; set < num_sets_; set++) {
+    CacheLine &line = cache_lines_[index + set];
+    // If the line is invalid, use it as the victim.
+    if (!line.valid) {
+      victim = index + set;
+      break;
+    }
+    // Skip any pinned lines.
+    if (line.pinned) continue;
+    // See if the next line has a smaller lru timestamp, if so, make that the
+    // victim.
+    if (line.lru < victim_lru) {
+      victim = index + set;
+      victim_lru = line.lru;
+    }
+  }
+  // If there is no victim, that means the lines were pinned, so couldn't
+  // replace. In this case the miss really becomes a read/write around.
+  if (victim == -1) {
+    if (is_read) {
+      read_around_counter_.Increment(1ULL);
+    } else {
+      write_around_counter_.Increment(1ULL);
+    }
+    return;
+  }
+  // Perform the replacement on the victim.
+  CacheLine &line = cache_lines_[victim];
+  // If the line is dirty (and valid), count the writeback.
+  if (line.valid && line.dirty) {
+    dirty_line_writeback_counter_.Increment(1ULL);
+  }
+  // Initialize the new line.
+  line.valid = true;
+  line.tag = block;
+  line.pinned = false;
+  line.dirty = false;
+  line.lru = cycle_counter_->GetValue();
+}
+
+}  // namespace mpact::sim::util
diff --git a/mpact/sim/util/memory/cache.h b/mpact/sim/util/memory/cache.h
new file mode 100644
index 0000000..7d82c38
--- /dev/null
+++ b/mpact/sim/util/memory/cache.h
@@ -0,0 +1,185 @@
+// Copyright 2024 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef MPACT_SIM_UTIL_MEMORY_CACHE_H_
+#define MPACT_SIM_UTIL_MEMORY_CACHE_H_
+
+#include <cstdint>
+#include <limits>
+#include <string>
+
+#include "absl/status/status.h"
+#include "mpact/sim/generic/component.h"
+#include "mpact/sim/generic/counters.h"
+#include "mpact/sim/generic/ref_count.h"
+#include "mpact/sim/util/memory/memory_interface.h"
+#include "mpact/sim/util/memory/tagged_memory_interface.h"
+
+// This file defines a class for modeling a cache. It implements the memory
+// interface, so it can be placed on the memory access path. A cache instance
+// takes a memory interface as a constructor argument, and will forward all
+// memory requests to that interface, after processing the memory request as
+// a cache access. This cache class can be used with both the plain and the
+// tagged memory interfaces. However, it is an error to use the cache with
+// a tagged memory interface if only a plan memory interface was provided
+// to the constructor.
+//
+// The cache is configured with a separate call that passes in a configuration
+// string that is parsed into the cache parameters. The configuration string
+// is expected to be in the format:
+//
+// <cache_size>,<block_size>,<associativity>,<write_allocate>
+//
+// where:
+// <cache_size> is the size of the cache in bytes.
+// <block_size> is the size of a cache block in bytes.
+// <associativity> is the number of ways in the cache.
+// <write_allocate> is a boolean indicating whether write allocate is enabled.
+//
+// The configuration call also takes a counter as an argument. This counter is
+// intended to be the cycle counter for the simulation. The cache uses this
+// counter to tag the cache lines with the time of last access, in order to
+// compute the LRU line upon replacement.
+
+namespace mpact::sim::util {
+
+using ::mpact::sim::generic::Component;
+using ::mpact::sim::generic::CounterValueOutputBase;
+using ::mpact::sim::generic::SimpleCounter;
+using ::mpact::sim::proto::ComponentData;
+
+// This class implements a simple cache.
+class Cache : public Component, public TaggedMemoryInterface {
+ public:
+  // The CacheContext class is used to store information necessary to fulfill
+  // the memory request when it's forwarded on to the memory interface.
+  struct CacheContext : public ReferenceCount {
+    // The context of the original memory reference.
+    ReferenceCount *context;
+    // Original data buffer.
+    DataBuffer *db;
+    // Instruction to be executed upon memory access completion.
+    Instruction *inst;
+    // Latency of the memory access.
+    int latency;
+    // Two constructors depending on whether the cache is used with a tagged
+    // memory interface or not.
+    CacheContext(ReferenceCount *context_, DataBuffer *db_, Instruction *inst_,
+                 int latency_)
+        : context(context_), db(db_), inst(inst_), latency(latency_) {}
+    CacheContext(ReferenceCount *context_, DataBuffer *db_, DataBuffer *tags_,
+                 Instruction *inst_, int latency_)
+        : context(context_), db(db_), inst(inst_), latency(latency_) {}
+  };
+
+  // Two constructors depending on whether the cache is used with a tagged
+  // memory interface or not. The constructors take three arguments, the name
+  // to use for the cache, a pointer to the parent component (used to register
+  // and provide access to the performance counters), and a memory interface
+  // used to forward memory requests to.
+  Cache(std::string name, Component *parent, MemoryInterface *memory);
+  Cache(std::string name, Component *parent,
+        TaggedMemoryInterface *tagged_memory);
+  // Shorthand constructors that omit some parameters.
+  Cache(std::string name, MemoryInterface *memory);
+  Cache(std::string name, TaggedMemoryInterface *tagged_memory);
+  Cache(std::string name, Component *parent);
+  explicit Cache(std::string name);
+  Cache() = delete;
+  Cache(const Cache &) = delete;
+  Cache operator=(const Cache &) = delete;
+  ~Cache() override;
+
+  // Configure the cache. The configuration string is expected to be in the
+  // format:
+  //
+  // <cache_size>,<block_size>,<associativity>,<write_allocate>
+  //
+  // where:
+  // <cache_size> is the size of the cache in bytes (power of 2)
+  // <block_size> is the size of a cache block in bytes (power of 2).
+  // <associativity> is the number of ways in the cache (0 is fully set
+  // associative) (power of 2).
+  // <write_allocate> is a boolean indicating whether write
+  // allocate is enabled.
+  //
+  // cycle_counter is a pointer to a counter that counts cycles in the
+  // simulation.
+  absl::Status Configure(const std::string &config,
+                         CounterValueOutputBase<uint64_t> *cycle_counter);
+
+  // MemoryInterface and TaggedMemoryInterfacemethods.
+  void Load(uint64_t address, DataBuffer *db, Instruction *inst,
+            ReferenceCount *context) override;
+  void Load(DataBuffer *address_db, DataBuffer *mask_db, int el_size,
+            DataBuffer *db, Instruction *inst,
+            ReferenceCount *context) override;
+  void Load(uint64_t address, DataBuffer *db, DataBuffer *tags,
+            Instruction *inst, ReferenceCount *context) override;
+  void Store(uint64_t address, DataBuffer *db) override;
+  void Store(DataBuffer *address, DataBuffer *mask, int el_size,
+             DataBuffer *db) override;
+  void Store(uint64_t address, DataBuffer *db, DataBuffer *tags) override;
+
+ private:
+  // This struct represents a cache line.
+  struct CacheLine {
+    // True if the line is valid.
+    bool valid = false;
+    // The tag includes both the index and the remaining tag bits of the
+    // address.
+    uint64_t tag;
+    // True if the line is pinned. Pinned lines are never replaced.
+    bool pinned = false;
+    // True if the line is dirty. Dirty lines are written back to memory
+    // upon replacement.
+    bool dirty = false;
+    // LRU timestamp.
+    uint64_t lru = std::numeric_limits<uint64_t>::max();
+  };
+  // This is a semantic function that is bound to a local instruction instance
+  // and is used to perform the writeback to the processor of the data that was
+  // read.
+  void LoadChild(const Instruction *inst);
+  // Cache read/write function. Returns the number of cache misses.
+  int CacheLookup(uint64_t address, int size, bool is_read);
+  void ReplaceBlock(uint64_t block, bool is_read);
+  // The cache.
+  CacheLine *cache_lines_ = nullptr;
+  // Shift amounts and mask used to compute the index from the address.
+  int block_shift_;
+  int set_shift_;
+  uint64_t index_mask_;
+  // True if allocate cache line on write is enabled.
+  bool write_allocate_;
+  uint64_t num_sets_;
+  // Instruction object used to perform the writeback to the processor.
+  Instruction *cache_inst_;
+  CounterValueOutputBase<uint64_t> *cycle_counter_;
+  // Performance counters.
+  SimpleCounter<uint64_t> read_hit_counter_;
+  SimpleCounter<uint64_t> read_miss_counter_;
+  SimpleCounter<uint64_t> write_hit_counter_;
+  SimpleCounter<uint64_t> write_miss_counter_;
+  SimpleCounter<uint64_t> dirty_line_writeback_counter_;
+  SimpleCounter<uint64_t> read_around_counter_;
+  SimpleCounter<uint64_t> write_around_counter_;
+  // Memory interface pointers.
+  MemoryInterface *memory_;
+  TaggedMemoryInterface *tagged_memory_;
+};
+
+}  // namespace mpact::sim::util
+
+#endif  // MPACT_SIM_UTIL_MEMORY_CACHE_H_
diff --git a/mpact/sim/util/memory/memory_interface.h b/mpact/sim/util/memory/memory_interface.h
index 99db44f..6d75548 100644
--- a/mpact/sim/util/memory/memory_interface.h
+++ b/mpact/sim/util/memory/memory_interface.h
@@ -16,8 +16,6 @@
 #define MPACT_SIM_UTIL_MEMORY_MEMORY_INTERFACE_H_
 
 #include <cstdint>
-#include <functional>
-#include <type_traits>
 
 #include "mpact/sim/generic/data_buffer.h"
 #include "mpact/sim/generic/instruction.h"
diff --git a/mpact/sim/util/memory/test/BUILD b/mpact/sim/util/memory/test/BUILD
index c5085e4..5bdbdec 100644
--- a/mpact/sim/util/memory/test/BUILD
+++ b/mpact/sim/util/memory/test/BUILD
@@ -142,3 +142,18 @@
         "@com_google_googletest//:gtest_main",
     ],
 )
+
+cc_test(
+    name = "cache_test",
+    size = "small",
+    srcs = ["cache_test.cc"],
+    deps = [
+        "//mpact/sim/generic:core",
+        "//mpact/sim/generic:counters",
+        "//mpact/sim/util/memory",
+        "//mpact/sim/util/memory:cache",
+        "@com_google_absl//absl/log:check",
+        "@com_google_googletest//:gtest",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
diff --git a/mpact/sim/util/memory/test/cache_test.cc b/mpact/sim/util/memory/test/cache_test.cc
new file mode 100644
index 0000000..3c74907
--- /dev/null
+++ b/mpact/sim/util/memory/test/cache_test.cc
@@ -0,0 +1,137 @@
+// Copyright 2024 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mpact/sim/util/memory/cache.h"
+
+#include <cstdint>
+
+#include "absl/log/check.h"
+#include "googlemock/include/gmock/gmock.h"  // IWYU pragma: keep
+#include "googletest/include/gtest/gtest.h"
+#include "mpact/sim/generic/counters.h"
+#include "mpact/sim/generic/data_buffer.h"
+
+namespace {
+
+using ::mpact::sim::generic::DataBuffer;
+using ::mpact::sim::generic::DataBufferFactory;
+using ::mpact::sim::generic::SimpleCounter;
+using ::mpact::sim::util::Cache;
+
+class CacheTest : public testing::Test {
+ protected:
+  CacheTest() : cycle_counter_("cycle_counter", 0ULL) {
+    // Create a cache 16kB, 16B blocks, direct mapped.
+    cache_ = new Cache("cache");
+    db_ = db_factory_.Allocate<uint32_t>(1);
+    read_hits_ = reinterpret_cast<SimpleCounter<uint64_t> *>(
+        cache_->GetCounter("read_hit"));
+    read_misses_ = reinterpret_cast<SimpleCounter<uint64_t> *>(
+        cache_->GetCounter("read_miss"));
+    write_hits_ = reinterpret_cast<SimpleCounter<uint64_t> *>(
+        cache_->GetCounter("write_hit"));
+    write_misses_ = reinterpret_cast<SimpleCounter<uint64_t> *>(
+        cache_->GetCounter("write_miss"));
+    dirty_line_writebacks_ = reinterpret_cast<SimpleCounter<uint64_t> *>(
+        cache_->GetCounter("dirty_line_writeback"));
+    read_arounds_ = reinterpret_cast<SimpleCounter<uint64_t> *>(
+        cache_->GetCounter("read_around"));
+    write_arounds_ = reinterpret_cast<SimpleCounter<uint64_t> *>(
+        cache_->GetCounter("write_around"));
+  }
+
+  ~CacheTest() override {
+    delete cache_;
+    db_->DecRef();
+  }
+
+  DataBufferFactory db_factory_;
+  DataBuffer *db_;
+  Cache *cache_;
+  SimpleCounter<uint64_t> *read_hits_;
+  SimpleCounter<uint64_t> *read_misses_;
+  SimpleCounter<uint64_t> *write_hits_;
+  SimpleCounter<uint64_t> *write_misses_;
+  SimpleCounter<uint64_t> *dirty_line_writebacks_;
+  SimpleCounter<uint64_t> *read_arounds_;
+  SimpleCounter<uint64_t> *write_arounds_;
+  SimpleCounter<uint64_t> cycle_counter_;
+};
+
+TEST_F(CacheTest, DirectMappedReadsCold) {
+  // Create a cache 16kB, 16B blocks, direct mapped.
+  CHECK_OK(cache_->Configure("1k,16,1,true", &cycle_counter_));
+
+  for (uint64_t address = 0; address < 1024; address += 4) {
+    cache_->Load(address, db_, nullptr, nullptr);
+  }
+  uint64_t refs = 1024 / 4;
+  EXPECT_EQ(read_misses_->GetValue(), refs / 4);
+  EXPECT_EQ(read_hits_->GetValue(), (refs / 4) * 3);
+}
+
+TEST_F(CacheTest, DirectMappedReadsWarm) {
+  // Create a cache 16kB, 16B blocks, direct mapped.
+  CHECK_OK(cache_->Configure("1k,16,1,true", &cycle_counter_));
+
+  // Warm the cache.
+  for (uint64_t address = 0; address < 1024; address += 4) {
+    cache_->Load(address, db_, nullptr, nullptr);
+  }
+  // Clear the counters.
+  read_misses_->SetValue(0);
+  read_hits_->SetValue(0);
+
+  // Access the cache again. Should be all hits.
+  for (uint64_t address = 0; address < 1024; address += 4) {
+    cache_->Load(address, db_, nullptr, nullptr);
+  }
+  uint64_t refs = 1024 / 4;
+  EXPECT_EQ(read_misses_->GetValue(), 0);
+  EXPECT_EQ(read_hits_->GetValue(), refs);
+
+  // Clear the counters.
+  read_misses_->SetValue(0);
+  read_hits_->SetValue(0);
+  // Access the next 1k, should be like a cold cache.
+  for (uint64_t address = 1024; address < 2048; address += 4) {
+    cache_->Load(address, db_, nullptr, nullptr);
+  }
+  EXPECT_EQ(read_misses_->GetValue(), refs / 4);
+  EXPECT_EQ(read_hits_->GetValue(), (refs / 4) * 3);
+}
+
+TEST_F(CacheTest, TwoWayReads) {
+  // Create a cache 16kB, 16B blocks, two way set associative.
+  CHECK_OK(cache_->Configure("1k,16,2,true", &cycle_counter_));
+  // Fill half the cache.
+  for (uint64_t address = 0; address < 512; address += 16) {
+    cache_->Load(address, db_, nullptr, nullptr);
+    cache_->Load(address + 1024, db_, nullptr, nullptr);
+  }
+  EXPECT_EQ(read_misses_->GetValue(), 2 * 512 / 16);
+  EXPECT_EQ(read_hits_->GetValue(), 0);
+  // Clear the counters.
+  read_misses_->SetValue(0);
+  read_hits_->SetValue(0);
+  // All these references should hit.
+  for (uint64_t address = 0; address < 512; address += 16) {
+    cache_->Load(address, db_, nullptr, nullptr);
+    cache_->Load(address + 1024, db_, nullptr, nullptr);
+  }
+  EXPECT_EQ(read_misses_->GetValue(), 0);
+  EXPECT_EQ(read_hits_->GetValue(), 2 * 512 / 16);
+}
+
+}  // namespace