diff --git a/repos/ealanos/src/app/allocating_cell/main.cc b/repos/ealanos/src/app/allocating_cell/main.cc index 6d58f1e3c3..be4a9670f1 100644 --- a/repos/ealanos/src/app/allocating_cell/main.cc +++ b/repos/ealanos/src/app/allocating_cell/main.cc @@ -1,3 +1,14 @@ +/** + * @file main.cc + * @author Michael Müller + * @brief Micro-benchmark for evaluating CPU core allocation costs + * @details This micro-benchmark measures the time for allocating and activating a certain number of CPU cores. + * @version 0.1 + * @date 2025-03-10 + * + * @copyright Copyright (c) 2025 + * + */ #include #include #include @@ -11,38 +22,25 @@ #include #include -#define CALLS 100 -#define CORES 14 -#define HYPERCALL +#define CALLS 100 /* Number of times to repeat the allocation of CPU cores */ +#define CORES 4 /* Initial number of CPUs to allocate */ +constexpr int STEP{4}; /* How many CPU cores to add after each CALLS iterations. */ - //Genode::Trace::timestamp(); -//static Genode::Trace::Timestamp rdtsc_cost = 0; +/* Global parameters */ Genode::Env *genv = nullptr; -static Genode::Trace::Timestamp start = 0; -static const unsigned long loops = 10000UL; -static std::atomic counter(0); -static std::atomic ready{false}; -static std::atomic restart{true}; -static std::atomic yield_ctr{-(31-CORES)}; -static unsigned long tsc_freq_khz = 0; -static Tukija::Cip *cip = Tukija::Cip::cip(); -static Tukija::Tip const *tip = Tukija::Tip::tip(); +static Genode::Trace::Timestamp start = 0; /* Start point in time */ +static const unsigned long loops = 10000UL; /* Number of times an allocation shall be repeated, needed to get + a statistically meaningful number of data points */ +static std::atomic counter(0); /* Atomic counter incremened by activated workers, used as barrier */ +static bool volatile ready{false}; /* Signal flag that all workers are set up and ready for benchmarking */ +static bool volatile restart{true}; /* Signals that the benchmark run shall restart */ +static std::atomic yield_ctr{-(63-CORES)}; /* Counter for the number of workers that must yield before + a new benchmark run can be started. */ +static unsigned long tsc_freq_khz = 0; /* TSC frequency in kHz, used for calculating measured time intervals */ +static Tukija::Cip *cip = Tukija::Cip::cip(); /* Cell info page, stores info about the current core allocation */ +static Tukija::Tip const *tip = Tukija::Tip::tip(); /* Used to query topology information */ -int cores, i; - - -struct Channel { - unsigned long yield_flag : 1, - op : 2, - tnum : 61; - unsigned long delta_alloc; - unsigned long delta_activate; - unsigned long delta_setflag; - unsigned long delta_findborrower; - unsigned long delta_block; - unsigned long delta_enter; - unsigned long delta_return; -}; +int cores, i; /* Global iterator variables */ struct Cell : public Genode::Thread { @@ -50,117 +48,152 @@ struct Cell : public Genode::Thread Libc::Env &env; Timer::Connection &_timer; + /** + * @brief pthread wrapper function for a Genode thread's entry method. + * + * @param args + * @return void* + */ static void *pthread_entry(void *args) { Cell *cell = reinterpret_cast(args); cell->entry(); return nullptr; } - void entry() override - { - Genode::Trace::Timestamp latency = 0; - Tukija::uint64_t count_allocs = 0; - + void worker_loop() { Tukija::Cip::Worker *my_channel = &cip->worker_for_location(Genode::Thread::myself()->affinity()); unsigned channel_id = cip->location_to_kernel_cpu(Genode::Thread::myself()->affinity()); + + Genode::log("Started worker", _id, " on CPU with affinity ", channel_id, "@", const_cast(tip)->dom_of_cpu(channel_id).id, "-", Genode::Thread::myself()->affinity(), " signal channel: ", my_channel->yield_flag, " at ", my_channel); + + while (true) { + + /* If the current thread is not the main worker, i.e its _id is not 0, (that allocates CPU cores), + it should sleep voluntarily releasing its CPU core. This ensures that + we can measure the allocation of CPU cores and the activation of workers + in one row. */ + if (__atomic_load_n(&restart, __ATOMIC_SEQ_CST)) { + //Genode::log("Worker ", _id, " on CPU ", channel_id, " yielding. cores=", cip->cores_current, " #:", cip->cores_current.count()); + Tukija::release(Tukija::Resource_type::CPU_CORE); + } + /* The thread was woken up, so increase the thread counter */ + + // Genode::log("Worker ", _id, " woke up on CPU ", channel_id, " counter=", counter.load()); + if (counter.fetch_add(1) == cores-1) { + __atomic_store_n(&ready, true, __ATOMIC_SEQ_CST); + //Genode::log("Worker", _id, " Signaled ready, ctr = ", counter.load()); + } + + + /* As long as no restart was signalled, poll the yield flag to check whether + we received a yield request from the hypervisor. */ + while (!__atomic_load_n(&restart, __ATOMIC_SEQ_CST)) { + if ((my_channel->yield_flag != 0)) + { + //Tukija::return_to_owner(Tukija::Resource_type::CPU_CORE); + } + } + //Genode::log("Worker ", _id, " on CPU ", channel_id, " restarting."); + } + } + + /** + * @brief the actual benchmark + * + */ + void benchmark_loop() + { + Genode::Trace::Timestamp latency = 0; + + /* First determine our channel id and store the pointer to our + CIP worker structure. This is needed in order to get the hypervisor's yield signal. */ + + /* Calculate TSC frequency in GHz */ unsigned long _tsc_freq_ghz = tsc_freq_khz / 1000000UL; - Genode::log("Started worker", _id, " on CPU with affinity ", channel_id, "@", const_cast(tip)->dom_of_cpu(channel_id).id, "-", Genode::Thread::myself()->affinity(), " signal channel: ", my_channel->yield_flag, " at ", my_channel); + int max_cores = cip->habitat_affinity.total(); - for (cores = CORES; cores <= 14; cores+=4) { - for (i = 0; i < CALLS; ) { + /* Now allocate cores starting with a number of CORES and increase by 4 additional cores + after CALLS iterations. */ + for (cores = CORES; cores <= max_cores;) + { + //Genode::log("Starting benchmark for ", cores, " CPU cores. Parameters: ", yield_ctr.load(), ":", counter.load()); + for (i = 0; i < CALLS;) + { - if ((i == 0 && yield_ctr >= cores-1) || (i > 0 && yield_ctr >= cores-1)) - ready = true; - - if (_id != 0 && restart.load()) { - yield_ctr.fetch_add(1); - // Genode::log("Worker ", _id, "yielded, yield_ctr = ", yield_ctr.load()); - Tukija::release(Tukija::Resource_type::CPU_CORE); - } - - //Genode::log("Worker ", _id, " on CPU ", channel_id, " woke up"); - counter.fetch_add(1); - if (counter >= cores-1) { - ready = true; - // Genode::log("{\"allocation:\": ", allocation, ", \"id\":", _id, ",\"clk_total\":", (end-::start), ", \"mean_clk\":", (end-::start)/count_allocs ,", \"count\": ", count_allocs, "\"channel-id\":", channel_id, "},"); - } - - /*if (my_channel->op == 2) { - Genode::Trace::Timestamp now = Genode::Trace::timestamp(); - //Tukija::core_allocation(allocation); - my_channel->delta_return = now - my_channel->delta_return; - Genode::log("{\"iteration\": ", i, ", \"cores\":", cores, ", \"d_block\": ", my_channel->delta_block / _tsc_freq_ghz, ", \"d_enter\":", my_channel->delta_enter / _tsc_freq_ghz, ", \"d_return\":", my_channel->delta_return / _tsc_freq_ghz, ", \"op\": \"yield\"},"); - } - my_channel->op = 0;*/ - if (_id == 0) { - //Genode::log("Waiting on start signal"); - while (ready.load() == false) - __builtin_ia32_pause(); - - //Genode::log("Got start signal"); - _timer.msleep(2); - - //Genode::log("Woke up for new iteration"); - ready = false; - restart = false; - ::start = Genode::Trace::timestamp(); - } - - Genode::Trace::Timestamp end = 0; - while (_id==0) + /* Here, we check, if the yield counter is equally to the number + of cores to allocate-1. This ensures that all workers have gone to sleep + and released their CPU core. */ + //Genode::log("Waiting for ", cores, " workers to yield"); + while (cip->cores_current.count() != 1 ) { - - if (_id == 0) { - //Genode::log("Allocating 4 cores"); - - //my_channel->tnum = i; - //my_channel->op = 1; /* 1 for alloc, 2 for yield */ - - //my_channel->delta_enter = Genode::Trace::timestamp(); - Tukija::uint8_t rc = Tukija::alloc(Tukija::Resource_type::CPU_CORE, cores); - if (rc == Tukija::NOVA_OK) - { - - while(ready.load() == false) - __builtin_ia32_pause(); - end = Genode::Trace::timestamp(); - //my_channel->delta_return = end - my_channel->delta_return; - latency += (end - ::start) / _tsc_freq_ghz; - //Genode::log("{\"iteration\": ", i, ", \"cores\":", cores, ", \"delta_enter:\" ", my_channel->delta_enter / _tsc_freq_ghz, ", \"delta_alloc\": ", my_channel->delta_alloc / _tsc_freq_ghz, ", \"delta_activate:\": ", my_channel->delta_activate / _tsc_freq_ghz, ", \"delta_setflag\": ", my_channel->delta_setflag / _tsc_freq_ghz, ", \"delta_return\": ", my_channel->delta_return / _tsc_freq_ghz, "},"); - restart = true; - counter = 0; - yield_ctr = 0; - //if (i%100==0) { - - Genode::log("{\"iteration\": ", i, ", \"cores\":", cores, ", \"allocation\": ", cip->cores_new, ",\"start\": ", ::start, ", \"end\": ", end, " ,\"ns\": ", (latency), "},"); - //my_channel->delta_setflag = 0; - latency = 0; - cip->cores_new.clear(); - //} - i++; - break; - } else { - //Genode::log("cores allocated: ", allocated); - break; - // Genode::log("cores allocated: ", allocated); - } - count_allocs++; - } + __builtin_ia32_pause(); } - //Genode::log("Finished allocation. Waiting for yield signal, id = ", channel_id, "\n"); - while (restart.load() == false) { - Tukija::Cip::Worker volatile *res = __atomic_load_n(&my_channel, __ATOMIC_SEQ_CST); - if (res->yield_flag) { - //Genode::log("Got yield signal on channel ", channel_id); - Tukija::return_to_owner(Tukija::Resource_type::CPU_CORE); + + //Genode::log("Workers ready."); + _timer.msleep(2); + + restart = false; + ready = false; + /* Mark beginning of benchmark */ + ::start = Genode::Trace::timestamp(); + + Genode::Trace::Timestamp end = 0; + /* Allocated the specified number of CPU cores */ + Tukija::uint8_t rc = Tukija::alloc(Tukija::Resource_type::CPU_CORE, cores); + if (rc == Tukija::NOVA_OK) + { + //Genode::log("Cores activated ", cip->cores_current); + /* If we get NOVA_OK as return code, cores were allocated. However, it is not guaranteed yet + that we got the specified number of CPU cores (it could be less). So, we have to wait until + all requested workers have actually woken up and see if the number of waken threads matches + the requested number of CPU cores. */ + //Genode::log("Allocation returned successfully."); + while (!__atomic_load_n(&ready, __ATOMIC_SEQ_CST)) { + __builtin_ia32_pause(); } + + /* Now, we need to restart the run. Hence, we set the restart flag to true and + reset the counter variables. */ + counter = 0; + yield_ctr.store(0); + ready = false; + + /* ALl requested CPU cores have been allocated and workers were activated. So we can now + mark the end of this run, and calculate the time this run took. */ + end = Genode::Trace::timestamp(); + latency += (end - ::start) / _tsc_freq_ghz; + + /* Print the results out in JSON */ + Genode::log("{\"iteration\": ", i, ", \"cores\":", cores, ", \"allocation\": ", cip->cores_new, ", \"running\": ", cip->cores_current, ",\"start\": ", ::start, ", \"end\": ", end, " ,\"ns\": ", (latency), "},"); + latency = 0; + + __atomic_store_n(&restart, true, __ATOMIC_SEQ_CST); + + //Genode::log("Restarting."); + /* Also clear the CPUset of new cores, so that we will not see cores allocated by previous runs. */ + cip->cores_new.clear(); + if (++i == CALLS) + cores += STEP; + + } else { + Genode::log("Core allocation failed."); } } } Genode::log("Benchmak finished."); } + + void entry() override + { + /* We distinguish betweeen the main thread that allocates CPU cores and */ + if (_id == 0) + benchmark_loop(); + else + worker_loop(); /* and worker threads that are just used to measure wake-up latencies */ + } + Cell(Libc::Env &env, Timer::Connection &timer, Genode::uint16_t id, Location const &location) : Thread(env, Name("test_", location.xpos(), "x", location.ypos()), 4 * 4096, location, Weight(), env.cpu()), _id(id), env(env), _timer(timer) @@ -201,15 +234,17 @@ void Libc::Component::construct(Libc::Env &env) Genode::Trace::Timestamp thread_start = Genode::Trace::timestamp(); Genode::Affinity::Location loc = me->affinity(); - unsigned cpuid = cip->location_to_kernel_cpu(loc); + + unsigned int cpu_id = cip->location_to_kernel_cpu(loc); + cip->cores_current.set(cpu_id); for (Genode::uint16_t cpu = 1; cpu < space.total(); cpu++) { Genode::String<32> const name{"worker", cpu}; /* Do not create a worker for EP's CPU yet, because we want to measure the time it takes to create only the worker threads */ - if (cpu == (space.total() - cpuid)) - continue; + /*if (cpu == (space.total() - cpuid)) + continue;*/ Cell *worker = new Cell(env, _timer, cpu, space.location_of_index(cpu)); Libc::pthread_create_from_session(&workers[cpu], Cell::pthread_entry,