blinktree: Implemented performance counter interface for Genode.

This commit is contained in:
Michael Mueller
2023-03-27 17:05:05 +02:00
parent b9e1da2ad1
commit dec071829d
2 changed files with 53 additions and 45 deletions

View File

@@ -1,8 +1,6 @@
#pragma once #pragma once
#ifdef PERF_SUPPORT
#include "perf.h" #include "perf.h"
#endif
#include "phase.h" #include "phase.h"
#include <chrono> #include <chrono>
#include <json.hpp> #include <json.hpp>
@@ -51,7 +49,7 @@ template <typename P> class InterimResult
public: public:
InterimResult(const std::uint64_t operation_count, const P &phase, const std::uint16_t iteration, InterimResult(const std::uint64_t operation_count, const P &phase, const std::uint16_t iteration,
const std::uint16_t core_count, const std::chrono::milliseconds time, const std::uint16_t core_count, const std::chrono::milliseconds time,
/*std::vector<PerfCounter> &counter,*/ std::vector<PerfCounter> &counter,
std::unordered_map<std::uint16_t, std::uint64_t> executed_tasks, std::unordered_map<std::uint16_t, std::uint64_t> executed_tasks,
std::unordered_map<std::uint16_t, std::uint64_t> executed_reader_tasks, std::unordered_map<std::uint16_t, std::uint64_t> executed_reader_tasks,
std::unordered_map<std::uint16_t, std::uint64_t> executed_writer_tasks, std::unordered_map<std::uint16_t, std::uint64_t> executed_writer_tasks,
@@ -65,12 +63,10 @@ public:
_scheduled_tasks_on_core(std::move(scheduled_tasks_on_core)), _scheduled_tasks_on_core(std::move(scheduled_tasks_on_core)),
_scheduled_tasks_off_core(std::move(scheduled_tasks_off_core)), _worker_fills(std::move(worker_fills)) _scheduled_tasks_off_core(std::move(scheduled_tasks_off_core)), _worker_fills(std::move(worker_fills))
{ {
#ifdef PERF_SUPPORT
for (auto &c : counter) for (auto &c : counter)
{ {
_performance_counter.emplace_back(std::make_pair(c.name(), c.read())); _performance_counter.emplace_back(std::make_pair(c.name(), c.read()));
} }
#endif
} }
~InterimResult() = default; ~InterimResult() = default;
@@ -181,9 +177,7 @@ public:
_current_phase = phase; _current_phase = phase;
_current_iteration = iteration; _current_iteration = iteration;
_core_set = core_set; _core_set = core_set;
#ifdef PERF_SUPPORT
_perf.start(); _perf.start();
#endif
//_start = std::chrono::steady_clock::now(); //_start = std::chrono::steady_clock::now();
_start = Genode::Trace::timestamp(); _start = Genode::Trace::timestamp();
@@ -193,9 +187,7 @@ public:
{ {
const auto end = Genode::Trace::timestamp(); const auto end = Genode::Trace::timestamp();
//const auto end = std::chrono::steady_clock::now(); //const auto end = std::chrono::steady_clock::now();
#ifdef PERF_SUPPORT
_perf.stop(); _perf.stop();
#endif
//const auto milliseconds = std::chrono::duration_cast<std::chrono::milliseconds>(end-_start); //const auto milliseconds = std::chrono::duration_cast<std::chrono::milliseconds>(end-_start);
const auto milliseconds = std::chrono::milliseconds((end-_start)/2000000UL); const auto milliseconds = std::chrono::milliseconds((end-_start)/2000000UL);
@@ -205,7 +197,7 @@ public:
_current_iteration, _current_iteration,
_core_set.size(), _core_set.size(),
milliseconds, milliseconds,
//_perf.counter(), _perf.counter(),
statistic_map(mx::tasking::profiling::Statistic::Executed), statistic_map(mx::tasking::profiling::Statistic::Executed),
statistic_map(mx::tasking::profiling::Statistic::ExecutedReader), statistic_map(mx::tasking::profiling::Statistic::ExecutedReader),
statistic_map(mx::tasking::profiling::Statistic::ExecutedWriter), statistic_map(mx::tasking::profiling::Statistic::ExecutedWriter),
@@ -214,16 +206,12 @@ public:
statistic_map(mx::tasking::profiling::Statistic::ScheduledOffChannel), statistic_map(mx::tasking::profiling::Statistic::ScheduledOffChannel),
statistic_map(mx::tasking::profiling::Statistic::Fill)}; statistic_map(mx::tasking::profiling::Statistic::Fill)};
} }
#ifdef PERF_SUPPORT
void add(PerfCounter &performance_counter) { _perf.add(performance_counter); } void add(PerfCounter &performance_counter) { _perf.add(performance_counter); }
#endif
private: private:
std::uint16_t _current_iteration{0U}; std::uint16_t _current_iteration{0U};
P _current_phase; P _current_phase;
mx::util::core_set _core_set; mx::util::core_set _core_set;
#ifdef PERF_SUPPORT
alignas(64) Perf _perf; alignas(64) Perf _perf;
#endif
//alignas(64) std::chrono::steady_clock::time_point _start; //alignas(64) std::chrono::steady_clock::time_point _start;
alignas(64) size_t _start; alignas(64) size_t _start;

View File

@@ -1,12 +1,11 @@
#pragma once #pragma once
#include <algorithm> #include <algorithm>
#include <asm/unistd.h>
#include <cstring> #include <cstring>
#include <linux/perf_event.h> // TODO: Find Genode equivalent #include <iostream>
#include <string> #include <string>
#include <sys/ioctl.h>
#include <unistd.h>
#include <vector> #include <vector>
#include <base/trace/perf.h>
/* /*
* For more Performance Counter take a look into the Manual from Intel: * For more Performance Counter take a look into the Manual from Intel:
@@ -28,46 +27,65 @@ namespace benchmark {
class PerfCounter class PerfCounter
{ {
public: public:
PerfCounter(std::string &&name, const std::uint64_t type, const std::uint64_t event_id) : _name(std::move(name)) PerfCounter(std::string &&name, const Genode::Trace::Performance_counter::Type type, const std::uint64_t event_id, const std::uint64_t mask) : _name(std::move(name)), _type(type), _event_id(static_cast<Genode::uint64_t>(event_id)), _mask(static_cast<Genode::uint64_t>(mask))
{ {
/*std::memset(&_perf_event_attribute, 0, sizeof(perf_event_attr));
_perf_event_attribute.type = type;
_perf_event_attribute.size = sizeof(perf_event_attr);
_perf_event_attribute.config = event_id;
_perf_event_attribute.disabled = true;
_perf_event_attribute.inherit = 1;
_perf_event_attribute.exclude_kernel = false;
_perf_event_attribute.exclude_hv = false;
_perf_event_attribute.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING;*/
} }
~PerfCounter() = default; ~PerfCounter() = default;
bool open() bool open()
{ {
/*_file_descriptor = syscall(__NR_perf_event_open, &_perf_event_attribute, 0, -1, -1, 0);*/ try {
return _file_descriptor >= 0; _counter = Genode::Trace::Performance_counter::acquire(_type);
} catch (Genode::Trace::Pfc_no_avail) {
std::cerr << "Failed to open performance counters." << std::endl;
}
try {
Genode::Trace::Performance_counter::setup(_counter, _event_id, _mask, (_type == Genode::Trace::Performance_counter::Type::CORE ? 0x30000 : 0x550f000000000000));
} catch (Genode::Trace::Pfc_access_error &e) {
std::cerr << "Error while setting up performance counter: " << e.error_code() << std::endl;
}
return _counter >= 0;
} }
bool start() bool start()
{ {
//ioctl(_file_descriptor, PERF_EVENT_IOC_RESET, 0); try {
//ioctl(_file_descriptor, PERF_EVENT_IOC_ENABLE, 0); Genode::Trace::Performance_counter::start(_counter);
return ::read(_file_descriptor, &_prev, sizeof(read_format)) == sizeof(read_format); _prev.value = static_cast<std::uint64_t>(Genode::Trace::Performance_counter::read(_counter));
std::cout << "PMC " << _name << " prev.value=" << _prev.value << std::endl;
}
catch (Genode::Trace::Pfc_access_error &e)
{
std::cerr << "Failed to start counter: " << e.error_code() << std::endl;
}
return _prev.value >= 0;
} }
bool stop() bool stop()
{ {
//const auto is_read = ::read(_file_descriptor, &_data, sizeof(read_format)) == sizeof(read_format); try {
//ioctl(_file_descriptor, PERF_EVENT_IOC_DISABLE, 0); _data.value = Genode::Trace::Performance_counter::read(_counter);
return false; // is_read; Genode::Trace::Performance_counter::stop(_counter);
Genode::Trace::Performance_counter::reset(_counter);
std::cout << "PMC " << _name << " data.value=" << _data.value << std::endl;
}
catch (Genode::Trace::Pfc_access_error &e)
{
std::cerr << "Failed to stop counter: " << e.error_code() << std::endl;
}
// const auto is_read = ::read(_file_descriptor, &_data, sizeof(read_format)) == sizeof(read_format);
// ioctl(_file_descriptor, PERF_EVENT_IOC_DISABLE, 0);
return _data.value >= 0; // is_read;
} }
[[nodiscard]] double read() const [[nodiscard]] double read() const
{ {
const auto multiplexing_correction = static_cast<double>(_data.time_enabled - _prev.time_enabled) / std::cout << "PMC " << _name << " value: " << (_data.value - _prev.value) << std::endl;
static_cast<double>(_data.time_running - _prev.time_running); return static_cast<double>(_data.value - _prev.value);
return static_cast<double>(_data.value - _prev.value) * multiplexing_correction;
} }
[[nodiscard]] const std::string &name() const { return _name; } [[nodiscard]] const std::string &name() const { return _name; }
@@ -84,8 +102,10 @@ private:
}; };
const std::string _name; const std::string _name;
std::int32_t _file_descriptor = -1; Genode::Trace::Performance_counter::Type _type;
//perf_event_attr _perf_event_attribute{}; Genode::uint64_t _event_id;
Genode::uint64_t _mask;
Genode::Trace::Performance_counter::Counter _counter;
read_format _prev{}; read_format _prev{};
read_format _data{}; read_format _data{};
}; };
@@ -101,11 +121,11 @@ public:
[[maybe_unused]] static PerfCounter L1_MISSES; [[maybe_unused]] static PerfCounter L1_MISSES;
[[maybe_unused]] [[maybe_unused]] static PerfCounter LLC_MISSES; [[maybe_unused]] [[maybe_unused]] static PerfCounter LLC_MISSES;
[[maybe_unused]] static PerfCounter LLC_REFERENCES; [[maybe_unused]] static PerfCounter LLC_REFERENCES;
[[maybe_unused]] static PerfCounter STALLED_CYCLES_BACKEND; //[[maybe_unused]] static PerfCounter STALLED_CYCLES_BACKEND;
[[maybe_unused]] static PerfCounter STALLS_MEM_ANY; //[[maybe_unused]] static PerfCounter STALLS_MEM_ANY;
[[maybe_unused]] static PerfCounter SW_PREFETCH_ACCESS_NTA; [[maybe_unused]] static PerfCounter SW_PREFETCH_ACCESS_NTA;
[[maybe_unused]] static PerfCounter SW_PREFETCH_ACCESS_T0; //[[maybe_unused]] static PerfCounter SW_PREFETCH_ACCESS_T0;
[[maybe_unused]] static PerfCounter SW_PREFETCH_ACCESS_T1_T2; //[[maybe_unused]] static PerfCounter SW_PREFETCH_ACCESS_T1_T2;
[[maybe_unused]] static PerfCounter SW_PREFETCH_ACCESS_WRITE; [[maybe_unused]] static PerfCounter SW_PREFETCH_ACCESS_WRITE;
Perf() noexcept = default; Perf() noexcept = default;