Refactored and fixed micro-benchmark for CPU allocation.

This commit is contained in:
Michael Mueller
2025-03-11 13:59:31 +01:00
parent a6e880e267
commit 2d69361890

View File

@@ -1,3 +1,14 @@
/**
* @file main.cc
* @author Michael Müller <michael.mueller@uos.de>
* @brief Micro-benchmark for evaluating CPU core allocation costs
* @details This micro-benchmark measures the time for allocating and activating a certain number of CPU cores.
* @version 0.1
* @date 2025-03-10
*
* @copyright Copyright (c) 2025
*
*/
#include <libc/component.h>
#include <base/log.h>
#include <tukija/syscall-generic.h>
@@ -11,38 +22,25 @@
#include <atomic>
#include <timer_session/connection.h>
#define CALLS 100
#define CORES 14
#define HYPERCALL
#define CALLS 100 /* Number of times to repeat the allocation of CPU cores */
#define CORES 4 /* Initial number of CPUs to allocate */
constexpr int STEP{4}; /* How many CPU cores to add after each CALLS iterations. */
//Genode::Trace::timestamp();
//static Genode::Trace::Timestamp rdtsc_cost = 0;
/* Global parameters */
Genode::Env *genv = nullptr;
static Genode::Trace::Timestamp start = 0;
static const unsigned long loops = 10000UL;
static std::atomic<long> counter(0);
static std::atomic<bool> ready{false};
static std::atomic<bool> restart{true};
static std::atomic<int> yield_ctr{-(31-CORES)};
static unsigned long tsc_freq_khz = 0;
static Tukija::Cip *cip = Tukija::Cip::cip();
static Tukija::Tip const *tip = Tukija::Tip::tip();
static Genode::Trace::Timestamp start = 0; /* Start point in time */
static const unsigned long loops = 10000UL; /* Number of times an allocation shall be repeated, needed to get
a statistically meaningful number of data points */
static std::atomic<long> counter(0); /* Atomic counter incremened by activated workers, used as barrier */
static bool volatile ready{false}; /* Signal flag that all workers are set up and ready for benchmarking */
static bool volatile restart{true}; /* Signals that the benchmark run shall restart */
static std::atomic<int> yield_ctr{-(63-CORES)}; /* Counter for the number of workers that must yield before
a new benchmark run can be started. */
static unsigned long tsc_freq_khz = 0; /* TSC frequency in kHz, used for calculating measured time intervals */
static Tukija::Cip *cip = Tukija::Cip::cip(); /* Cell info page, stores info about the current core allocation */
static Tukija::Tip const *tip = Tukija::Tip::tip(); /* Used to query topology information */
int cores, i;
struct Channel {
unsigned long yield_flag : 1,
op : 2,
tnum : 61;
unsigned long delta_alloc;
unsigned long delta_activate;
unsigned long delta_setflag;
unsigned long delta_findborrower;
unsigned long delta_block;
unsigned long delta_enter;
unsigned long delta_return;
};
int cores, i; /* Global iterator variables */
struct Cell : public Genode::Thread
{
@@ -50,117 +48,152 @@ struct Cell : public Genode::Thread
Libc::Env &env;
Timer::Connection &_timer;
/**
* @brief pthread wrapper function for a Genode thread's entry method.
*
* @param args
* @return void*
*/
static void *pthread_entry(void *args) {
Cell *cell = reinterpret_cast<Cell *>(args);
cell->entry();
return nullptr;
}
void entry() override
{
Genode::Trace::Timestamp latency = 0;
Tukija::uint64_t count_allocs = 0;
void worker_loop() {
Tukija::Cip::Worker *my_channel = &cip->worker_for_location(Genode::Thread::myself()->affinity());
unsigned channel_id = cip->location_to_kernel_cpu(Genode::Thread::myself()->affinity());
Genode::log("Started worker", _id, " on CPU with affinity ", channel_id, "@", const_cast<Tukija::Tip*>(tip)->dom_of_cpu(channel_id).id, "-", Genode::Thread::myself()->affinity(), " signal channel: ", my_channel->yield_flag, " at ", my_channel);
while (true) {
/* If the current thread is not the main worker, i.e its _id is not 0, (that allocates CPU cores),
it should sleep voluntarily releasing its CPU core. This ensures that
we can measure the allocation of CPU cores and the activation of workers
in one row. */
if (__atomic_load_n(&restart, __ATOMIC_SEQ_CST)) {
//Genode::log("Worker ", _id, " on CPU ", channel_id, " yielding. cores=", cip->cores_current, " #:", cip->cores_current.count());
Tukija::release(Tukija::Resource_type::CPU_CORE);
}
/* The thread was woken up, so increase the thread counter */
// Genode::log("Worker ", _id, " woke up on CPU ", channel_id, " counter=", counter.load());
if (counter.fetch_add(1) == cores-1) {
__atomic_store_n(&ready, true, __ATOMIC_SEQ_CST);
//Genode::log("Worker", _id, " Signaled ready, ctr = ", counter.load());
}
/* As long as no restart was signalled, poll the yield flag to check whether
we received a yield request from the hypervisor. */
while (!__atomic_load_n(&restart, __ATOMIC_SEQ_CST)) {
if ((my_channel->yield_flag != 0))
{
//Tukija::return_to_owner(Tukija::Resource_type::CPU_CORE);
}
}
//Genode::log("Worker ", _id, " on CPU ", channel_id, " restarting.");
}
}
/**
* @brief the actual benchmark
*
*/
void benchmark_loop()
{
Genode::Trace::Timestamp latency = 0;
/* First determine our channel id and store the pointer to our
CIP worker structure. This is needed in order to get the hypervisor's yield signal. */
/* Calculate TSC frequency in GHz */
unsigned long _tsc_freq_ghz = tsc_freq_khz / 1000000UL;
Genode::log("Started worker", _id, " on CPU with affinity ", channel_id, "@", const_cast<Tukija::Tip*>(tip)->dom_of_cpu(channel_id).id, "-", Genode::Thread::myself()->affinity(), " signal channel: ", my_channel->yield_flag, " at ", my_channel);
int max_cores = cip->habitat_affinity.total();
for (cores = CORES; cores <= 14; cores+=4) {
for (i = 0; i < CALLS; ) {
/* Now allocate cores starting with a number of CORES and increase by 4 additional cores
after CALLS iterations. */
for (cores = CORES; cores <= max_cores;)
{
//Genode::log("Starting benchmark for ", cores, " CPU cores. Parameters: ", yield_ctr.load(), ":", counter.load());
for (i = 0; i < CALLS;)
{
if ((i == 0 && yield_ctr >= cores-1) || (i > 0 && yield_ctr >= cores-1))
ready = true;
if (_id != 0 && restart.load()) {
yield_ctr.fetch_add(1);
// Genode::log("Worker ", _id, "yielded, yield_ctr = ", yield_ctr.load());
Tukija::release(Tukija::Resource_type::CPU_CORE);
}
//Genode::log("Worker ", _id, " on CPU ", channel_id, " woke up");
counter.fetch_add(1);
if (counter >= cores-1) {
ready = true;
// Genode::log("{\"allocation:\": ", allocation, ", \"id\":", _id, ",\"clk_total\":", (end-::start), ", \"mean_clk\":", (end-::start)/count_allocs ,", \"count\": ", count_allocs, "\"channel-id\":", channel_id, "},");
}
/*if (my_channel->op == 2) {
Genode::Trace::Timestamp now = Genode::Trace::timestamp();
//Tukija::core_allocation(allocation);
my_channel->delta_return = now - my_channel->delta_return;
Genode::log("{\"iteration\": ", i, ", \"cores\":", cores, ", \"d_block\": ", my_channel->delta_block / _tsc_freq_ghz, ", \"d_enter\":", my_channel->delta_enter / _tsc_freq_ghz, ", \"d_return\":", my_channel->delta_return / _tsc_freq_ghz, ", \"op\": \"yield\"},");
}
my_channel->op = 0;*/
if (_id == 0) {
//Genode::log("Waiting on start signal");
while (ready.load() == false)
__builtin_ia32_pause();
//Genode::log("Got start signal");
_timer.msleep(2);
//Genode::log("Woke up for new iteration");
ready = false;
restart = false;
::start = Genode::Trace::timestamp();
}
Genode::Trace::Timestamp end = 0;
while (_id==0)
/* Here, we check, if the yield counter is equally to the number
of cores to allocate-1. This ensures that all workers have gone to sleep
and released their CPU core. */
//Genode::log("Waiting for ", cores, " workers to yield");
while (cip->cores_current.count() != 1 )
{
if (_id == 0) {
//Genode::log("Allocating 4 cores");
//my_channel->tnum = i;
//my_channel->op = 1; /* 1 for alloc, 2 for yield */
//my_channel->delta_enter = Genode::Trace::timestamp();
Tukija::uint8_t rc = Tukija::alloc(Tukija::Resource_type::CPU_CORE, cores);
if (rc == Tukija::NOVA_OK)
{
while(ready.load() == false)
__builtin_ia32_pause();
end = Genode::Trace::timestamp();
//my_channel->delta_return = end - my_channel->delta_return;
latency += (end - ::start) / _tsc_freq_ghz;
//Genode::log("{\"iteration\": ", i, ", \"cores\":", cores, ", \"delta_enter:\" ", my_channel->delta_enter / _tsc_freq_ghz, ", \"delta_alloc\": ", my_channel->delta_alloc / _tsc_freq_ghz, ", \"delta_activate:\": ", my_channel->delta_activate / _tsc_freq_ghz, ", \"delta_setflag\": ", my_channel->delta_setflag / _tsc_freq_ghz, ", \"delta_return\": ", my_channel->delta_return / _tsc_freq_ghz, "},");
restart = true;
counter = 0;
yield_ctr = 0;
//if (i%100==0) {
Genode::log("{\"iteration\": ", i, ", \"cores\":", cores, ", \"allocation\": ", cip->cores_new, ",\"start\": ", ::start, ", \"end\": ", end, " ,\"ns\": ", (latency), "},");
//my_channel->delta_setflag = 0;
latency = 0;
cip->cores_new.clear();
//}
i++;
break;
} else {
//Genode::log("cores allocated: ", allocated);
break;
// Genode::log("cores allocated: ", allocated);
}
count_allocs++;
}
__builtin_ia32_pause();
}
//Genode::log("Finished allocation. Waiting for yield signal, id = ", channel_id, "\n");
while (restart.load() == false) {
Tukija::Cip::Worker volatile *res = __atomic_load_n(&my_channel, __ATOMIC_SEQ_CST);
if (res->yield_flag) {
//Genode::log("Got yield signal on channel ", channel_id);
Tukija::return_to_owner(Tukija::Resource_type::CPU_CORE);
//Genode::log("Workers ready.");
_timer.msleep(2);
restart = false;
ready = false;
/* Mark beginning of benchmark */
::start = Genode::Trace::timestamp();
Genode::Trace::Timestamp end = 0;
/* Allocated the specified number of CPU cores */
Tukija::uint8_t rc = Tukija::alloc(Tukija::Resource_type::CPU_CORE, cores);
if (rc == Tukija::NOVA_OK)
{
//Genode::log("Cores activated ", cip->cores_current);
/* If we get NOVA_OK as return code, cores were allocated. However, it is not guaranteed yet
that we got the specified number of CPU cores (it could be less). So, we have to wait until
all requested workers have actually woken up and see if the number of waken threads matches
the requested number of CPU cores. */
//Genode::log("Allocation returned successfully.");
while (!__atomic_load_n(&ready, __ATOMIC_SEQ_CST)) {
__builtin_ia32_pause();
}
/* Now, we need to restart the run. Hence, we set the restart flag to true and
reset the counter variables. */
counter = 0;
yield_ctr.store(0);
ready = false;
/* ALl requested CPU cores have been allocated and workers were activated. So we can now
mark the end of this run, and calculate the time this run took. */
end = Genode::Trace::timestamp();
latency += (end - ::start) / _tsc_freq_ghz;
/* Print the results out in JSON */
Genode::log("{\"iteration\": ", i, ", \"cores\":", cores, ", \"allocation\": ", cip->cores_new, ", \"running\": ", cip->cores_current, ",\"start\": ", ::start, ", \"end\": ", end, " ,\"ns\": ", (latency), "},");
latency = 0;
__atomic_store_n(&restart, true, __ATOMIC_SEQ_CST);
//Genode::log("Restarting.");
/* Also clear the CPUset of new cores, so that we will not see cores allocated by previous runs. */
cip->cores_new.clear();
if (++i == CALLS)
cores += STEP;
} else {
Genode::log("Core allocation failed.");
}
}
}
Genode::log("Benchmak finished.");
}
void entry() override
{
/* We distinguish betweeen the main thread that allocates CPU cores and */
if (_id == 0)
benchmark_loop();
else
worker_loop(); /* and worker threads that are just used to measure wake-up latencies */
}
Cell(Libc::Env &env, Timer::Connection &timer, Genode::uint16_t id, Location const &location)
: Thread(env, Name("test_", location.xpos(), "x", location.ypos()), 4 * 4096, location, Weight(),
env.cpu()), _id(id), env(env), _timer(timer)
@@ -201,15 +234,17 @@ void Libc::Component::construct(Libc::Env &env)
Genode::Trace::Timestamp thread_start = Genode::Trace::timestamp();
Genode::Affinity::Location loc = me->affinity();
unsigned cpuid = cip->location_to_kernel_cpu(loc);
unsigned int cpu_id = cip->location_to_kernel_cpu(loc);
cip->cores_current.set(cpu_id);
for (Genode::uint16_t cpu = 1; cpu < space.total(); cpu++)
{
Genode::String<32> const name{"worker", cpu};
/* Do not create a worker for EP's CPU yet, because
we want to measure the time it takes to create only the worker threads */
if (cpu == (space.total() - cpuid))
continue;
/*if (cpu == (space.total() - cpuid))
continue;*/
Cell *worker = new Cell(env, _timer, cpu, space.location_of_index(cpu));
Libc::pthread_create_from_session(&workers[cpu], Cell::pthread_entry,