diff --git a/src/application/blinktree_benchmark/benchmark.cpp b/src/application/blinktree_benchmark/benchmark.cpp index 24117ba..e10b7e9 100644 --- a/src/application/blinktree_benchmark/benchmark.cpp +++ b/src/application/blinktree_benchmark/benchmark.cpp @@ -23,9 +23,15 @@ Benchmark::Benchmark(benchmark::Cores &&cores, const std::uint16_t iterations, s { this->_chronometer.add(benchmark::Perf::CYCLES); this->_chronometer.add(benchmark::Perf::INSTRUCTIONS); - this->_chronometer.add(benchmark::Perf::STALLS_MEM_ANY); - this->_chronometer.add(benchmark::Perf::SW_PREFETCH_ACCESS_NTA); - this->_chronometer.add(benchmark::Perf::SW_PREFETCH_ACCESS_WRITE); + //this->_chronometer.add(benchmark::Perf::STALLS_MEM_ANY); + //this->_chronometer.add(benchmark::Perf::SW_PREFETCH_ACCESS_NTA); + //this->_chronometer.add(benchmark::Perf::SW_PREFETCH_ACCESS_WRITE); + this->_chronometer.add(benchmark::Perf::LLC_MISSES); + this->_chronometer.add(benchmark::Perf::DTLB_READ_MISSES); + this->_chronometer.add(benchmark::Perf::DTLB_STORE_MISSES); + this->_chronometer.add(benchmark::Perf::ITLB_LOAD_MISSES); + this->_chronometer.add(benchmark::Perf::SW_PAGE_FAULTS_MAJOR); + this->_chronometer.add(benchmark::Perf::SW_PAGE_FAULTS_MINOR); } std::cout << "core configuration: \n" << this->_cores.dump(2) << std::endl; @@ -53,7 +59,10 @@ void Benchmark::start() { this->_request_scheduler.clear(); } - + + auto *start_task = mx::tasking::runtime::new_task(0U, *this); + mx::tasking::runtime::spawn(*start_task, 0U); + // Create one request scheduler per core. for (auto core_index = 0U; core_index < this->_cores.current().size(); core_index++) { @@ -70,8 +79,8 @@ void Benchmark::start() { mx::tasking::runtime::profile(this->profile_file_name()); } - this->_chronometer.start(static_cast(static_cast(this->_workload)), - this->_current_iteration + 1, this->_cores.current()); + //this->_chronometer.start(static_cast(static_cast(this->_workload)), + // this->_current_iteration + 1, this->_cores.current()); } const mx::util::core_set &Benchmark::core_set() @@ -109,6 +118,14 @@ void Benchmark::requests_finished() if (open_requests == 0U) // All request schedulers are done. { + std::uint16_t core_id = mx::system::topology::core_id(); + if (core_id != 0) { + this->_open_requests++; + auto *stop_task = mx::tasking::runtime::new_task(0U, *this); + stop_task->annotate(static_cast(0)); + mx::tasking::runtime::spawn(*stop_task, core_id); + return; + } // Stop and print time (and performance counter). const auto result = this->_chronometer.stop(this->_workload.size()); mx::tasking::runtime::stop(); @@ -193,7 +210,7 @@ void Benchmark::requests_finished() std::string Benchmark::profile_file_name() const { - return "profiling-" + std::to_string(this->_cores.current().size()) + "-cores" + "-phase-" + - std::to_string(static_cast(static_cast(this->_workload))) + "-iteration-" + - std::to_string(this->_current_iteration) + ".json"; -} \ No newline at end of file + return "profiling-" + std::to_string(static_cast(this->_cores.current().size())) + "-cores" + "-phase-" + + std::to_string(static_cast(static_cast(this->_workload))) + "-iteration-" + + std::to_string(static_cast(this->_current_iteration)) + ".json"; +} diff --git a/src/benchmark/chronometer.h b/src/benchmark/chronometer.h index f906b94..97b7c18 100644 --- a/src/benchmark/chronometer.h +++ b/src/benchmark/chronometer.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -177,15 +178,16 @@ public: _core_set = core_set; _perf.start(); - _start = std::chrono::steady_clock::now(); + _start = mx::system::Environment::timestamp(); // std::chrono::steady_clock::now(); } InterimResult

stop(const std::uint64_t count_operations) { - const auto end = std::chrono::steady_clock::now(); + const auto end = mx::system::Environment::timestamp(); // std::chrono::steady_clock::now(); _perf.stop(); - const auto milliseconds = std::chrono::duration_cast(end - _start); + const auto milliseconds = std::chrono::milliseconds( + (end - _start) / 2000000UL); // std::chrono::duration_cast(end - _start); return {count_operations, _current_phase, @@ -209,7 +211,8 @@ private: P _current_phase; mx::util::core_set _core_set; alignas(64) Perf _perf; - alignas(64) std::chrono::steady_clock::time_point _start; + //alignas(64) std::chrono::steady_clock::time_point _start; + alignas(64) uint64_t _start; std::unordered_map statistic_map( const mx::tasking::profiling::Statistic::Counter counter) diff --git a/src/benchmark/perf.cpp b/src/benchmark/perf.cpp index 366e671..4841dbe 100644 --- a/src/benchmark/perf.cpp +++ b/src/benchmark/perf.cpp @@ -24,6 +24,13 @@ using namespace benchmark; */ [[maybe_unused]] PerfCounter Perf::LLC_MISSES = {"llc-miss", PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES}; +[[maybe_unused]] PerfCounter Perf::DTLB_READ_MISSES = {"dtlb-read-miss", PERF_TYPE_HW_CACHE, 0x10003}; +[[maybe_unused]] PerfCounter Perf::DTLB_STORE_MISSES = {"dtlb-store-miss", PERF_TYPE_HW_CACHE, 0x10103}; +[[maybe_unused]] PerfCounter Perf::ITLB_LOAD_MISSES = {"itlb-load-miss", PERF_TYPE_HW_CACHE, 0x10004}; +[[maybe_unused]] PerfCounter Perf::SW_PAGE_FAULTS = {"sw-page-faults", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS}; +[[maybe_unused]] PerfCounter Perf::SW_PAGE_FAULTS_MINOR = {"sw-page-faults-minor", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MIN}; +[[maybe_unused]] PerfCounter Perf::SW_PAGE_FAULTS_MAJOR = {"sw-page-faults-major", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MAJ}; + /** * Counter "LLC Reference" * Accesses to the LLC, in which the data is present(hit) or not present(miss) diff --git a/src/benchmark/perf.h b/src/benchmark/perf.h index 71dc7bb..2ecf278 100644 --- a/src/benchmark/perf.h +++ b/src/benchmark/perf.h @@ -107,6 +107,12 @@ public: [[maybe_unused]] static PerfCounter SW_PREFETCH_ACCESS_T0; [[maybe_unused]] static PerfCounter SW_PREFETCH_ACCESS_T1_T2; [[maybe_unused]] static PerfCounter SW_PREFETCH_ACCESS_WRITE; + [[maybe_unused]] static PerfCounter DTLB_STORE_MISSES; + [[maybe_unused]] static PerfCounter DTLB_READ_MISSES; + [[maybe_unused]] static PerfCounter ITLB_LOAD_MISSES; + [[maybe_unused]] static PerfCounter SW_PAGE_FAULTS; + [[maybe_unused]] static PerfCounter SW_PAGE_FAULTS_MINOR; + [[maybe_unused]] static PerfCounter SW_PAGE_FAULTS_MAJOR; Perf() noexcept = default; ~Perf() noexcept = default; @@ -154,4 +160,4 @@ public: private: std::vector _counter; }; -} // namespace benchmark \ No newline at end of file +} // namespace benchmark