mirror of
https://github.com/mmueller41/genode.git
synced 2026-01-21 12:32:56 +01:00
Refactored and fixed micro-benchmark for CPU allocation.
This commit is contained in:
@@ -1,3 +1,14 @@
|
||||
/**
|
||||
* @file main.cc
|
||||
* @author Michael Müller <michael.mueller@uos.de>
|
||||
* @brief Micro-benchmark for evaluating CPU core allocation costs
|
||||
* @details This micro-benchmark measures the time for allocating and activating a certain number of CPU cores.
|
||||
* @version 0.1
|
||||
* @date 2025-03-10
|
||||
*
|
||||
* @copyright Copyright (c) 2025
|
||||
*
|
||||
*/
|
||||
#include <libc/component.h>
|
||||
#include <base/log.h>
|
||||
#include <tukija/syscall-generic.h>
|
||||
@@ -11,38 +22,25 @@
|
||||
#include <atomic>
|
||||
#include <timer_session/connection.h>
|
||||
|
||||
#define CALLS 100
|
||||
#define CORES 14
|
||||
#define HYPERCALL
|
||||
#define CALLS 100 /* Number of times to repeat the allocation of CPU cores */
|
||||
#define CORES 4 /* Initial number of CPUs to allocate */
|
||||
constexpr int STEP{4}; /* How many CPU cores to add after each CALLS iterations. */
|
||||
|
||||
//Genode::Trace::timestamp();
|
||||
//static Genode::Trace::Timestamp rdtsc_cost = 0;
|
||||
/* Global parameters */
|
||||
Genode::Env *genv = nullptr;
|
||||
static Genode::Trace::Timestamp start = 0;
|
||||
static const unsigned long loops = 10000UL;
|
||||
static std::atomic<long> counter(0);
|
||||
static std::atomic<bool> ready{false};
|
||||
static std::atomic<bool> restart{true};
|
||||
static std::atomic<int> yield_ctr{-(31-CORES)};
|
||||
static unsigned long tsc_freq_khz = 0;
|
||||
static Tukija::Cip *cip = Tukija::Cip::cip();
|
||||
static Tukija::Tip const *tip = Tukija::Tip::tip();
|
||||
static Genode::Trace::Timestamp start = 0; /* Start point in time */
|
||||
static const unsigned long loops = 10000UL; /* Number of times an allocation shall be repeated, needed to get
|
||||
a statistically meaningful number of data points */
|
||||
static std::atomic<long> counter(0); /* Atomic counter incremened by activated workers, used as barrier */
|
||||
static bool volatile ready{false}; /* Signal flag that all workers are set up and ready for benchmarking */
|
||||
static bool volatile restart{true}; /* Signals that the benchmark run shall restart */
|
||||
static std::atomic<int> yield_ctr{-(63-CORES)}; /* Counter for the number of workers that must yield before
|
||||
a new benchmark run can be started. */
|
||||
static unsigned long tsc_freq_khz = 0; /* TSC frequency in kHz, used for calculating measured time intervals */
|
||||
static Tukija::Cip *cip = Tukija::Cip::cip(); /* Cell info page, stores info about the current core allocation */
|
||||
static Tukija::Tip const *tip = Tukija::Tip::tip(); /* Used to query topology information */
|
||||
|
||||
int cores, i;
|
||||
|
||||
|
||||
struct Channel {
|
||||
unsigned long yield_flag : 1,
|
||||
op : 2,
|
||||
tnum : 61;
|
||||
unsigned long delta_alloc;
|
||||
unsigned long delta_activate;
|
||||
unsigned long delta_setflag;
|
||||
unsigned long delta_findborrower;
|
||||
unsigned long delta_block;
|
||||
unsigned long delta_enter;
|
||||
unsigned long delta_return;
|
||||
};
|
||||
int cores, i; /* Global iterator variables */
|
||||
|
||||
struct Cell : public Genode::Thread
|
||||
{
|
||||
@@ -50,117 +48,152 @@ struct Cell : public Genode::Thread
|
||||
Libc::Env &env;
|
||||
Timer::Connection &_timer;
|
||||
|
||||
/**
|
||||
* @brief pthread wrapper function for a Genode thread's entry method.
|
||||
*
|
||||
* @param args
|
||||
* @return void*
|
||||
*/
|
||||
static void *pthread_entry(void *args) {
|
||||
Cell *cell = reinterpret_cast<Cell *>(args);
|
||||
cell->entry();
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void entry() override
|
||||
{
|
||||
Genode::Trace::Timestamp latency = 0;
|
||||
Tukija::uint64_t count_allocs = 0;
|
||||
|
||||
void worker_loop() {
|
||||
Tukija::Cip::Worker *my_channel = &cip->worker_for_location(Genode::Thread::myself()->affinity());
|
||||
unsigned channel_id = cip->location_to_kernel_cpu(Genode::Thread::myself()->affinity());
|
||||
|
||||
Genode::log("Started worker", _id, " on CPU with affinity ", channel_id, "@", const_cast<Tukija::Tip*>(tip)->dom_of_cpu(channel_id).id, "-", Genode::Thread::myself()->affinity(), " signal channel: ", my_channel->yield_flag, " at ", my_channel);
|
||||
|
||||
while (true) {
|
||||
|
||||
/* If the current thread is not the main worker, i.e its _id is not 0, (that allocates CPU cores),
|
||||
it should sleep voluntarily releasing its CPU core. This ensures that
|
||||
we can measure the allocation of CPU cores and the activation of workers
|
||||
in one row. */
|
||||
if (__atomic_load_n(&restart, __ATOMIC_SEQ_CST)) {
|
||||
//Genode::log("Worker ", _id, " on CPU ", channel_id, " yielding. cores=", cip->cores_current, " #:", cip->cores_current.count());
|
||||
Tukija::release(Tukija::Resource_type::CPU_CORE);
|
||||
}
|
||||
|
||||
/* The thread was woken up, so increase the thread counter */
|
||||
|
||||
// Genode::log("Worker ", _id, " woke up on CPU ", channel_id, " counter=", counter.load());
|
||||
if (counter.fetch_add(1) == cores-1) {
|
||||
__atomic_store_n(&ready, true, __ATOMIC_SEQ_CST);
|
||||
//Genode::log("Worker", _id, " Signaled ready, ctr = ", counter.load());
|
||||
}
|
||||
|
||||
|
||||
/* As long as no restart was signalled, poll the yield flag to check whether
|
||||
we received a yield request from the hypervisor. */
|
||||
while (!__atomic_load_n(&restart, __ATOMIC_SEQ_CST)) {
|
||||
if ((my_channel->yield_flag != 0))
|
||||
{
|
||||
//Tukija::return_to_owner(Tukija::Resource_type::CPU_CORE);
|
||||
}
|
||||
}
|
||||
//Genode::log("Worker ", _id, " on CPU ", channel_id, " restarting.");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief the actual benchmark
|
||||
*
|
||||
*/
|
||||
void benchmark_loop()
|
||||
{
|
||||
Genode::Trace::Timestamp latency = 0;
|
||||
|
||||
/* First determine our channel id and store the pointer to our
|
||||
CIP worker structure. This is needed in order to get the hypervisor's yield signal. */
|
||||
|
||||
/* Calculate TSC frequency in GHz */
|
||||
unsigned long _tsc_freq_ghz = tsc_freq_khz / 1000000UL;
|
||||
|
||||
Genode::log("Started worker", _id, " on CPU with affinity ", channel_id, "@", const_cast<Tukija::Tip*>(tip)->dom_of_cpu(channel_id).id, "-", Genode::Thread::myself()->affinity(), " signal channel: ", my_channel->yield_flag, " at ", my_channel);
|
||||
int max_cores = cip->habitat_affinity.total();
|
||||
|
||||
for (cores = CORES; cores <= 14; cores+=4) {
|
||||
for (i = 0; i < CALLS; ) {
|
||||
/* Now allocate cores starting with a number of CORES and increase by 4 additional cores
|
||||
after CALLS iterations. */
|
||||
for (cores = CORES; cores <= max_cores;)
|
||||
{
|
||||
//Genode::log("Starting benchmark for ", cores, " CPU cores. Parameters: ", yield_ctr.load(), ":", counter.load());
|
||||
for (i = 0; i < CALLS;)
|
||||
{
|
||||
|
||||
if ((i == 0 && yield_ctr >= cores-1) || (i > 0 && yield_ctr >= cores-1))
|
||||
ready = true;
|
||||
|
||||
if (_id != 0 && restart.load()) {
|
||||
yield_ctr.fetch_add(1);
|
||||
// Genode::log("Worker ", _id, "yielded, yield_ctr = ", yield_ctr.load());
|
||||
Tukija::release(Tukija::Resource_type::CPU_CORE);
|
||||
}
|
||||
|
||||
//Genode::log("Worker ", _id, " on CPU ", channel_id, " woke up");
|
||||
counter.fetch_add(1);
|
||||
if (counter >= cores-1) {
|
||||
ready = true;
|
||||
// Genode::log("{\"allocation:\": ", allocation, ", \"id\":", _id, ",\"clk_total\":", (end-::start), ", \"mean_clk\":", (end-::start)/count_allocs ,", \"count\": ", count_allocs, "\"channel-id\":", channel_id, "},");
|
||||
}
|
||||
|
||||
/*if (my_channel->op == 2) {
|
||||
Genode::Trace::Timestamp now = Genode::Trace::timestamp();
|
||||
//Tukija::core_allocation(allocation);
|
||||
my_channel->delta_return = now - my_channel->delta_return;
|
||||
Genode::log("{\"iteration\": ", i, ", \"cores\":", cores, ", \"d_block\": ", my_channel->delta_block / _tsc_freq_ghz, ", \"d_enter\":", my_channel->delta_enter / _tsc_freq_ghz, ", \"d_return\":", my_channel->delta_return / _tsc_freq_ghz, ", \"op\": \"yield\"},");
|
||||
}
|
||||
my_channel->op = 0;*/
|
||||
if (_id == 0) {
|
||||
//Genode::log("Waiting on start signal");
|
||||
while (ready.load() == false)
|
||||
__builtin_ia32_pause();
|
||||
|
||||
//Genode::log("Got start signal");
|
||||
_timer.msleep(2);
|
||||
|
||||
//Genode::log("Woke up for new iteration");
|
||||
ready = false;
|
||||
restart = false;
|
||||
::start = Genode::Trace::timestamp();
|
||||
}
|
||||
|
||||
Genode::Trace::Timestamp end = 0;
|
||||
while (_id==0)
|
||||
/* Here, we check, if the yield counter is equally to the number
|
||||
of cores to allocate-1. This ensures that all workers have gone to sleep
|
||||
and released their CPU core. */
|
||||
//Genode::log("Waiting for ", cores, " workers to yield");
|
||||
while (cip->cores_current.count() != 1 )
|
||||
{
|
||||
|
||||
if (_id == 0) {
|
||||
//Genode::log("Allocating 4 cores");
|
||||
|
||||
//my_channel->tnum = i;
|
||||
//my_channel->op = 1; /* 1 for alloc, 2 for yield */
|
||||
|
||||
//my_channel->delta_enter = Genode::Trace::timestamp();
|
||||
Tukija::uint8_t rc = Tukija::alloc(Tukija::Resource_type::CPU_CORE, cores);
|
||||
if (rc == Tukija::NOVA_OK)
|
||||
{
|
||||
|
||||
while(ready.load() == false)
|
||||
__builtin_ia32_pause();
|
||||
end = Genode::Trace::timestamp();
|
||||
//my_channel->delta_return = end - my_channel->delta_return;
|
||||
latency += (end - ::start) / _tsc_freq_ghz;
|
||||
//Genode::log("{\"iteration\": ", i, ", \"cores\":", cores, ", \"delta_enter:\" ", my_channel->delta_enter / _tsc_freq_ghz, ", \"delta_alloc\": ", my_channel->delta_alloc / _tsc_freq_ghz, ", \"delta_activate:\": ", my_channel->delta_activate / _tsc_freq_ghz, ", \"delta_setflag\": ", my_channel->delta_setflag / _tsc_freq_ghz, ", \"delta_return\": ", my_channel->delta_return / _tsc_freq_ghz, "},");
|
||||
restart = true;
|
||||
counter = 0;
|
||||
yield_ctr = 0;
|
||||
//if (i%100==0) {
|
||||
|
||||
Genode::log("{\"iteration\": ", i, ", \"cores\":", cores, ", \"allocation\": ", cip->cores_new, ",\"start\": ", ::start, ", \"end\": ", end, " ,\"ns\": ", (latency), "},");
|
||||
//my_channel->delta_setflag = 0;
|
||||
latency = 0;
|
||||
cip->cores_new.clear();
|
||||
//}
|
||||
i++;
|
||||
break;
|
||||
} else {
|
||||
//Genode::log("cores allocated: ", allocated);
|
||||
break;
|
||||
// Genode::log("cores allocated: ", allocated);
|
||||
}
|
||||
count_allocs++;
|
||||
}
|
||||
__builtin_ia32_pause();
|
||||
}
|
||||
//Genode::log("Finished allocation. Waiting for yield signal, id = ", channel_id, "\n");
|
||||
while (restart.load() == false) {
|
||||
Tukija::Cip::Worker volatile *res = __atomic_load_n(&my_channel, __ATOMIC_SEQ_CST);
|
||||
if (res->yield_flag) {
|
||||
//Genode::log("Got yield signal on channel ", channel_id);
|
||||
Tukija::return_to_owner(Tukija::Resource_type::CPU_CORE);
|
||||
|
||||
//Genode::log("Workers ready.");
|
||||
_timer.msleep(2);
|
||||
|
||||
restart = false;
|
||||
ready = false;
|
||||
/* Mark beginning of benchmark */
|
||||
::start = Genode::Trace::timestamp();
|
||||
|
||||
Genode::Trace::Timestamp end = 0;
|
||||
/* Allocated the specified number of CPU cores */
|
||||
Tukija::uint8_t rc = Tukija::alloc(Tukija::Resource_type::CPU_CORE, cores);
|
||||
if (rc == Tukija::NOVA_OK)
|
||||
{
|
||||
//Genode::log("Cores activated ", cip->cores_current);
|
||||
/* If we get NOVA_OK as return code, cores were allocated. However, it is not guaranteed yet
|
||||
that we got the specified number of CPU cores (it could be less). So, we have to wait until
|
||||
all requested workers have actually woken up and see if the number of waken threads matches
|
||||
the requested number of CPU cores. */
|
||||
//Genode::log("Allocation returned successfully.");
|
||||
while (!__atomic_load_n(&ready, __ATOMIC_SEQ_CST)) {
|
||||
__builtin_ia32_pause();
|
||||
}
|
||||
|
||||
/* Now, we need to restart the run. Hence, we set the restart flag to true and
|
||||
reset the counter variables. */
|
||||
counter = 0;
|
||||
yield_ctr.store(0);
|
||||
ready = false;
|
||||
|
||||
/* ALl requested CPU cores have been allocated and workers were activated. So we can now
|
||||
mark the end of this run, and calculate the time this run took. */
|
||||
end = Genode::Trace::timestamp();
|
||||
latency += (end - ::start) / _tsc_freq_ghz;
|
||||
|
||||
/* Print the results out in JSON */
|
||||
Genode::log("{\"iteration\": ", i, ", \"cores\":", cores, ", \"allocation\": ", cip->cores_new, ", \"running\": ", cip->cores_current, ",\"start\": ", ::start, ", \"end\": ", end, " ,\"ns\": ", (latency), "},");
|
||||
latency = 0;
|
||||
|
||||
__atomic_store_n(&restart, true, __ATOMIC_SEQ_CST);
|
||||
|
||||
//Genode::log("Restarting.");
|
||||
/* Also clear the CPUset of new cores, so that we will not see cores allocated by previous runs. */
|
||||
cip->cores_new.clear();
|
||||
if (++i == CALLS)
|
||||
cores += STEP;
|
||||
|
||||
} else {
|
||||
Genode::log("Core allocation failed.");
|
||||
}
|
||||
}
|
||||
}
|
||||
Genode::log("Benchmak finished.");
|
||||
}
|
||||
|
||||
void entry() override
|
||||
{
|
||||
/* We distinguish betweeen the main thread that allocates CPU cores and */
|
||||
if (_id == 0)
|
||||
benchmark_loop();
|
||||
else
|
||||
worker_loop(); /* and worker threads that are just used to measure wake-up latencies */
|
||||
}
|
||||
|
||||
Cell(Libc::Env &env, Timer::Connection &timer, Genode::uint16_t id, Location const &location)
|
||||
: Thread(env, Name("test_", location.xpos(), "x", location.ypos()), 4 * 4096, location, Weight(),
|
||||
env.cpu()), _id(id), env(env), _timer(timer)
|
||||
@@ -201,15 +234,17 @@ void Libc::Component::construct(Libc::Env &env)
|
||||
Genode::Trace::Timestamp thread_start = Genode::Trace::timestamp();
|
||||
|
||||
Genode::Affinity::Location loc = me->affinity();
|
||||
unsigned cpuid = cip->location_to_kernel_cpu(loc);
|
||||
|
||||
unsigned int cpu_id = cip->location_to_kernel_cpu(loc);
|
||||
cip->cores_current.set(cpu_id);
|
||||
|
||||
for (Genode::uint16_t cpu = 1; cpu < space.total(); cpu++)
|
||||
{
|
||||
Genode::String<32> const name{"worker", cpu};
|
||||
/* Do not create a worker for EP's CPU yet, because
|
||||
we want to measure the time it takes to create only the worker threads */
|
||||
if (cpu == (space.total() - cpuid))
|
||||
continue;
|
||||
/*if (cpu == (space.total() - cpuid))
|
||||
continue;*/
|
||||
|
||||
Cell *worker = new Cell(env, _timer, cpu, space.location_of_index(cpu));
|
||||
Libc::pthread_create_from_session(&workers[cpu], Cell::pthread_entry,
|
||||
|
||||
Reference in New Issue
Block a user