Adds capability to specify cacheable or non-cacheable ranges for the cache. When there are non-cacheable references they are counted separately from other statistics. PiperOrigin-RevId: 681961164 Change-Id: If83725f0408318a39456f526de0fbf3d4bd83793
diff --git a/mpact/sim/util/memory/BUILD b/mpact/sim/util/memory/BUILD index 71ee338..af7aded 100644 --- a/mpact/sim/util/memory/BUILD +++ b/mpact/sim/util/memory/BUILD
@@ -81,6 +81,7 @@ "//mpact/sim/generic:core", "//mpact/sim/generic:counters", "//mpact/sim/generic:instruction", + "@com_google_absl//absl/container:btree", "@com_google_absl//absl/functional:bind_front", "@com_google_absl//absl/log", "@com_google_absl//absl/log:check",
diff --git a/mpact/sim/util/memory/cache.cc b/mpact/sim/util/memory/cache.cc index f8ee594..0a40bb0 100644 --- a/mpact/sim/util/memory/cache.cc +++ b/mpact/sim/util/memory/cache.cc
@@ -14,6 +14,8 @@ #include "mpact/sim/util/memory/cache.h" +#include <unistd.h> + #include <cstdint> #include <limits> #include <string> @@ -46,6 +48,8 @@ dirty_line_writeback_counter_("dirty_line_writeback", 0ULL), read_around_counter_("read_around", 0ULL), write_around_counter_("write_around", 0ULL), + read_non_cacheable_counter_("read_non_cacheable", 0ULL), + write_non_cacheable_counter_("write_non_cacheable", 0ULL), memory_(memory), tagged_memory_(nullptr) { // Register the counters. @@ -56,6 +60,8 @@ CHECK_OK(AddCounter(&dirty_line_writeback_counter_)); CHECK_OK(AddCounter(&read_around_counter_)); CHECK_OK(AddCounter(&write_around_counter_)); + CHECK_OK(AddCounter(&read_non_cacheable_counter_)); + CHECK_OK(AddCounter(&write_non_cacheable_counter_)); cache_inst_ = new Instruction(nullptr); cache_inst_->set_semantic_function(absl::bind_front(&Cache::LoadChild, this)); } @@ -80,6 +86,10 @@ CHECK_OK(AddCounter(&dirty_line_writeback_counter_)); CHECK_OK(AddCounter(&read_around_counter_)); CHECK_OK(AddCounter(&write_around_counter_)); + CHECK_OK(AddCounter(&read_non_cacheable_counter_)); + CHECK_OK(AddCounter(&write_non_cacheable_counter_)); + cache_inst_ = new Instruction(nullptr); + cache_inst_->set_semantic_function(absl::bind_front(&Cache::LoadChild, this)); } // The simple constructors just call the main constructors. @@ -108,7 +118,7 @@ cycle_counter_ = cycle_counter; // Split the configuration string into fields, make sure there are 4 fields. std::vector<std::string> config_vec = absl::StrSplit(config, ','); - if (config_vec.size() != 4) { + if (config_vec.size() < 4) { return absl::InvalidArgumentError("Invalid configuration - too few fields"); } // Compute the cache size in bytes, including any suffixes ('k', 'M', 'G'). @@ -127,11 +137,12 @@ absl::StrCat("Invalid cache size suffix: '", suffix, "'")); } } - // Sanity check the cache parameters. + // Line size in bytes. uint64_t line_size = std::stoull(config_vec[1], &size); if (size != config_vec[1].size()) { return absl::InvalidArgumentError("Invalid value for line size"); } + // Number of sets (set associativity) - 0 means fully set associative. num_sets_ = std::stoull(config_vec[2], &size); if (size != config_vec[2].size()) { return absl::InvalidArgumentError("Invalid value for number of sets"); @@ -144,6 +155,50 @@ } else { return absl::InvalidArgumentError("Invalid write allocate value"); } + // If there are more than 4 entries, check for cacheable, or non-cacheable + // memory ranges. Format is: [c|nc]:<start_address>:<size> + for (int i = 4; i < config_vec.size(); i++) { + std::vector<std::string> range_vec = absl::StrSplit(config_vec[i], ':'); + if (range_vec.size() != 3) { + return absl::InvalidArgumentError( + "Invalid (non)cacheable range - must have 3 fields"); + } + uint64_t range_start = std::stoull(range_vec[1], &size, 0); + if (size != range_vec[1].size()) { + return absl::InvalidArgumentError( + "Invalid cacheable range - invalid start address"); + } + uint64_t range_end = std::stoull(range_vec[2], &size, 0); + if (size != range_vec[2].size()) { + return absl::InvalidArgumentError( + "Invalid cacheable range - invalid size"); + } + if (range_start > range_end) { + return absl::InvalidArgumentError( + "Invalid cacheable range - start address is greater than end " + "address"); + } + if ((range_vec[0] != "c") && (range_vec[0] != "nc")) { + return absl::InvalidArgumentError( + "Invalid cacheable range - must start with 'c' or 'nc'"); + } + if (range_vec[0] == "c") { + if (!non_cacheable_ranges_.empty()) { + return absl::InvalidArgumentError( + "Cannot mix cacheable and non-cacheable ranges"); + } + cacheable_ranges_.insert(AddressRange(range_start, range_end)); + has_cacheable_ = true; + } else { + if (!cacheable_ranges_.empty()) { + return absl::InvalidArgumentError( + "Cannot mix cacheable and non-cacheable ranges"); + } + non_cacheable_ranges_.insert(AddressRange(range_start, range_end)); + has_non_cacheable_ = true; + } + } + // Sanity check more cache parameters. if (!absl::has_single_bit(cache_size)) { return absl::InvalidArgumentError("Cache size is not a power of 2"); } @@ -187,6 +242,7 @@ void Cache::Load(uint64_t address, DataBuffer *db, Instruction *inst, ReferenceCount *context) { (void)CacheLookup(address, db->size<uint8_t>(), /*is_read=*/true); + if (memory_ == nullptr) return; auto *cache_context = new CacheContext(context, db, inst, db->latency()); @@ -291,6 +347,25 @@ uint64_t first_block = address >> block_shift_; uint64_t last_block = (address + size - 1) >> block_shift_; for (uint64_t block = first_block; block <= last_block; block++) { + // Check each access to see if it is cachaeable or not. + if (has_cacheable_ || has_non_cacheable_) { + bool dont_fetch = + (has_cacheable_ && !cacheable_ranges_.contains( + AddressRange(address, address + size - 1))) || + (has_non_cacheable_ && non_cacheable_ranges_.contains(AddressRange( + address, address + size - 1))); + if (dont_fetch) { + // Perform read/write-around. + if (is_read) { + read_non_cacheable_counter_.Increment(1ULL); + } else { + write_non_cacheable_counter_.Increment(1ULL); + } + // Skip the below since it's not cacheable. + continue; + } + } + // Compute the cache index. uint64_t index = (block & index_mask_) << set_shift_; bool hit = false;
diff --git a/mpact/sim/util/memory/cache.h b/mpact/sim/util/memory/cache.h index 580e956..4891748 100644 --- a/mpact/sim/util/memory/cache.h +++ b/mpact/sim/util/memory/cache.h
@@ -19,6 +19,7 @@ #include <limits> #include <string> +#include "absl/container/btree_set.h" #include "absl/status/status.h" #include "mpact/sim/generic/component.h" #include "mpact/sim/generic/counters.h" @@ -142,6 +143,26 @@ } private: + // Address range struct used as key in maps from range to callback function. + struct AddressRange { + uint64_t start; + uint64_t end; + explicit AddressRange(uint64_t address) : start(address), end(address) {} + AddressRange(uint64_t start_address, uint64_t end_address) + : start(start_address), end(end_address) {} + }; + // Comparator used in maps/sets to compare two address ranges so as to be able + // to order the keys. Note, two address ranges are "equal" (as in + // overlapping), if neither is less than the other. In this context A range is + // less than another if the addresses of the first are less than the addresses + // of the second. Thus if neither is less than the other, they overlap in + // in some way. + struct AddressRangeLess { + constexpr bool operator()(const AddressRange &lhs, + const AddressRange &rhs) const { + return lhs.end < rhs.start; + } + }; // This struct represents a cache line. struct CacheLine { // True if the line is valid. @@ -173,6 +194,11 @@ // True if allocate cache line on write is enabled. bool write_allocate_ = false; uint64_t num_sets_ = 0; + // Cacheability ranges. + absl::btree_multiset<AddressRange, AddressRangeLess> non_cacheable_ranges_; + absl::btree_multiset<AddressRange, AddressRangeLess> cacheable_ranges_; + bool has_non_cacheable_ = false; + bool has_cacheable_ = false; // Instruction object used to perform the writeback to the processor. Instruction *cache_inst_; CounterValueOutputBase<uint64_t> *cycle_counter_; @@ -184,6 +210,8 @@ SimpleCounter<uint64_t> dirty_line_writeback_counter_; SimpleCounter<uint64_t> read_around_counter_; SimpleCounter<uint64_t> write_around_counter_; + SimpleCounter<uint64_t> read_non_cacheable_counter_; + SimpleCounter<uint64_t> write_non_cacheable_counter_; // Memory interface pointers. MemoryInterface *memory_; TaggedMemoryInterface *tagged_memory_;
diff --git a/mpact/sim/util/memory/test/BUILD b/mpact/sim/util/memory/test/BUILD index 5bdbdec..ce12c31 100644 --- a/mpact/sim/util/memory/test/BUILD +++ b/mpact/sim/util/memory/test/BUILD
@@ -153,6 +153,7 @@ "//mpact/sim/util/memory", "//mpact/sim/util/memory:cache", "@com_google_absl//absl/log:check", + "@com_google_absl//absl/status", "@com_google_googletest//:gtest", "@com_google_googletest//:gtest_main", ],
diff --git a/mpact/sim/util/memory/test/cache_test.cc b/mpact/sim/util/memory/test/cache_test.cc index 34223ba..bff1111 100644 --- a/mpact/sim/util/memory/test/cache_test.cc +++ b/mpact/sim/util/memory/test/cache_test.cc
@@ -17,6 +17,7 @@ #include <cstdint> #include "absl/log/check.h" +#include "absl/status/status.h" #include "googlemock/include/gmock/gmock.h" // IWYU pragma: keep #include "googletest/include/gtest/gtest.h" #include "mpact/sim/generic/counters.h" @@ -55,6 +56,10 @@ cache_->GetCounter("read_around")); write_arounds_ = reinterpret_cast<SimpleCounter<uint64_t> *>( cache_->GetCounter("write_around")); + read_non_cacheable_ = reinterpret_cast<SimpleCounter<uint64_t> *>( + cache_->GetCounter("read_non_cacheable")); + write_non_cacheable_ = reinterpret_cast<SimpleCounter<uint64_t> *>( + cache_->GetCounter("write_non_cacheable")); } ~CacheTest() override { @@ -72,6 +77,8 @@ SimpleCounter<uint64_t> *dirty_line_writebacks_; SimpleCounter<uint64_t> *read_arounds_; SimpleCounter<uint64_t> *write_arounds_; + SimpleCounter<uint64_t> *read_non_cacheable_; + SimpleCounter<uint64_t> *write_non_cacheable_; SimpleCounter<uint64_t> cycle_counter_; }; @@ -281,4 +288,90 @@ st_tag_db->DecRef(); } +TEST_F(CacheTest, CacheableRanges) { + // Create a cache 16kB, 16B blocks, direct mapped. + CHECK_OK(cache_->Configure("1k,16,1,true,c:0x1000:0x1fff,c:0x3000:0x3fff", + &cycle_counter_)); + // These accesses should be cacheable. + for (uint64_t address = 0x1000; address < 0x2000; address += 0x100) { + cache_->Load(address, db_, nullptr, nullptr); + } + EXPECT_EQ(read_non_cacheable_->GetValue(), 0); + EXPECT_EQ(read_misses_->GetValue(), 0x1000 / 0x100); + // These accesses should be non-cacheable. + for (uint64_t address = 0x2000; address < 0x3000; address += 0x100) { + cache_->Load(address, db_, nullptr, nullptr); + } + EXPECT_EQ(read_non_cacheable_->GetValue(), 0x1000 / 0x100); + EXPECT_EQ(read_misses_->GetValue(), 0x1000 / 0x100); + read_misses_->SetValue(0); + read_non_cacheable_->SetValue(0); + // These accesses should be cacheable. + for (uint64_t address = 0x3000; address < 0x4000; address += 0x100) { + cache_->Load(address, db_, nullptr, nullptr); + } + EXPECT_EQ(read_non_cacheable_->GetValue(), 0); + EXPECT_EQ(read_misses_->GetValue(), 0x1000 / 0x100); + // These accesses should be non-cacheable. + for (uint64_t address = 0x4000; address < 0x5000; address += 0x100) { + cache_->Load(address, db_, nullptr, nullptr); + } + EXPECT_EQ(read_non_cacheable_->GetValue(), 0x1000 / 0x100); + EXPECT_EQ(read_misses_->GetValue(), 0x1000 / 0x100); +} + +TEST_F(CacheTest, NonCacheableRanges) { + // Create a cache 16kB, 16B blocks, direct mapped. + CHECK_OK(cache_->Configure("1k,16,1,true,nc:0x1000:0x1fff,nc:0x3000:0x3fff", + &cycle_counter_)); + // These accesses should be non-cacheable. + for (uint64_t address = 0x1000; address < 0x2000; address += 0x100) { + cache_->Load(address, db_, nullptr, nullptr); + } + EXPECT_EQ(read_non_cacheable_->GetValue(), 0x1000 / 0x100); + EXPECT_EQ(read_misses_->GetValue(), 0); + // These accesses should be cacheable. + for (uint64_t address = 0x2000; address < 0x3000; address += 0x100) { + cache_->Load(address, db_, nullptr, nullptr); + } + EXPECT_EQ(read_non_cacheable_->GetValue(), 0x1000 / 0x100); + EXPECT_EQ(read_misses_->GetValue(), 0x1000 / 0x100); + read_misses_->SetValue(0); + read_non_cacheable_->SetValue(0); + // These accesses should be non-cacheable. + for (uint64_t address = 0x3000; address < 0x4000; address += 0x100) { + cache_->Load(address, db_, nullptr, nullptr); + } + EXPECT_EQ(read_non_cacheable_->GetValue(), 0x1000 / 0x100); + EXPECT_EQ(read_misses_->GetValue(), 0); + // These accesses should be cacheable. + for (uint64_t address = 0x4000; address < 0x5000; address += 0x100) { + cache_->Load(address, db_, nullptr, nullptr); + } + EXPECT_EQ(read_non_cacheable_->GetValue(), 0x1000 / 0x100); + EXPECT_EQ(read_misses_->GetValue(), 0x1000 / 0x100); +} + +TEST_F(CacheTest, CacheableRangesConfigErrors) { + absl::Status status; + // Not enough fields. + status = cache_->Configure("1k,16,1,true,c:0x1000,c:0x2000", &cycle_counter_); + EXPECT_EQ(status.code(), absl::StatusCode::kInvalidArgument); + // Mix of cacheable and non-cacheable ranges. + status = cache_->Configure("1k,16,1,true,c:0x1000:0x1fff,nc:0x2000:0x2fff", + &cycle_counter_); + EXPECT_EQ(status.code(), absl::StatusCode::kInvalidArgument); + // Syntax error in number. + status = cache_->Configure("1k,16,1,true,c:0x1000x:0x1fff", &cycle_counter_); + EXPECT_EQ(status.code(), absl::StatusCode::kInvalidArgument); + status = cache_->Configure("1k,16,1,true,c:0x1000:0x1fxff", &cycle_counter_); + EXPECT_EQ(status.code(), absl::StatusCode::kInvalidArgument); + // Using neither c nor nc. + status = cache_->Configure("1k,16,1,true,x:0x1000:0x1fff", &cycle_counter_); + EXPECT_EQ(status.code(), absl::StatusCode::kInvalidArgument); + // Lower bound is greater than upper bound. + status = cache_->Configure("1k,16,1,true,c:0x1fff:0x1000", &cycle_counter_); + EXPECT_EQ(status.code(), absl::StatusCode::kInvalidArgument); +} + } // namespace