Refactored and fixed micro-benchmark for CPU allocation.

2026-01-21 12:32:56 +01:00 · 2025-03-11 13:59:31 +01:00
parent a6e880e267
commit 2d69361890
1 changed files with 157 additions and 122 deletions
--- a/repos/ealanos/src/app/allocating_cell/main.cc
+++ b/repos/ealanos/src/app/allocating_cell/main.cc
@@ -1,3 +1,14 @@
+/**
+ * @file main.cc
+ * @author Michael Müller <michael.mueller@uos.de>
+ * @brief Micro-benchmark for evaluating CPU core allocation costs
+ * @details This micro-benchmark measures the time for allocating and activating a certain number of CPU cores.
+ * @version 0.1
+ * @date 2025-03-10
+ * 
+ * @copyright Copyright (c) 2025
+ * 
+ */
 #include <libc/component.h>
 #include <base/log.h>
 #include <tukija/syscall-generic.h>
@@ -11,38 +22,25 @@
 #include <atomic>
 #include <timer_session/connection.h>

-#define CALLS 100
-#define CORES 14
-#define HYPERCALL
+#define CALLS 100       /* Number of times to repeat the allocation of CPU cores */
+#define CORES 4         /* Initial number of CPUs to allocate */
+constexpr int STEP{4};  /* How many CPU cores to add after each CALLS iterations. */

-    //Genode::Trace::timestamp();
-//static Genode::Trace::Timestamp rdtsc_cost = 0;
+/* Global parameters */
 Genode::Env *genv = nullptr;
-static Genode::Trace::Timestamp start = 0;
-static const unsigned long loops = 10000UL;
-static std::atomic<long> counter(0);
-static std::atomic<bool> ready{false};
-static std::atomic<bool> restart{true};
-static std::atomic<int> yield_ctr{-(31-CORES)};
-static unsigned long tsc_freq_khz = 0;
-static Tukija::Cip *cip = Tukija::Cip::cip();
-static Tukija::Tip const *tip = Tukija::Tip::tip();
+static Genode::Trace::Timestamp start = 0; /* Start point in time */
+static const unsigned long loops = 10000UL; /* Number of times an allocation shall be repeated, needed to get
+ a statistically meaningful number of data points */
+static std::atomic<long> counter(0); /* Atomic counter incremened by activated workers, used as barrier */
+static bool volatile  ready{false}; /* Signal flag that all workers are set up and ready for benchmarking */
+static bool volatile restart{true}; /* Signals that the benchmark run shall restart */
+static std::atomic<int> yield_ctr{-(63-CORES)}; /* Counter for the number of workers that must yield before 
+                                                 a new benchmark run can be started. */
+static unsigned long tsc_freq_khz = 0;  /* TSC frequency in kHz, used for calculating measured time intervals */
+static Tukija::Cip *cip = Tukija::Cip::cip(); /* Cell info page, stores info about the current core allocation */
+static Tukija::Tip const *tip = Tukija::Tip::tip(); /* Used to query topology information */

-int cores, i;
-
-
-struct Channel {
-    unsigned long yield_flag : 1,
-        op : 2,
-        tnum : 61;
-    unsigned long delta_alloc;
-    unsigned long delta_activate;
-    unsigned long delta_setflag;
-    unsigned long delta_findborrower;
-    unsigned long delta_block;
-    unsigned long delta_enter;
-    unsigned long delta_return;
-};
+int cores, i; /* Global iterator variables */

 struct Cell : public Genode::Thread
 {
@@ -50,117 +48,152 @@ struct Cell : public Genode::Thread
    Libc::Env &env;
    Timer::Connection &_timer;

+    /**
+     * @brief pthread wrapper function for a Genode thread's entry method.
+     * 
+     * @param args 
+     * @return void* 
+     */
    static void *pthread_entry(void *args) {
        Cell *cell = reinterpret_cast<Cell *>(args);
        cell->entry();
        return nullptr;
    }

-    void entry() override
-    {
-        Genode::Trace::Timestamp latency = 0;
-        Tukija::uint64_t count_allocs = 0;
-
+    void worker_loop() {
        Tukija::Cip::Worker *my_channel = &cip->worker_for_location(Genode::Thread::myself()->affinity());
        unsigned channel_id = cip->location_to_kernel_cpu(Genode::Thread::myself()->affinity());
+        
+        Genode::log("Started worker", _id, " on CPU with affinity ", channel_id, "@", const_cast<Tukija::Tip*>(tip)->dom_of_cpu(channel_id).id, "-", Genode::Thread::myself()->affinity(), " signal channel: ", my_channel->yield_flag, " at ", my_channel);
+        
+        while (true) {
+            
+            /* If the current thread is not the main worker, i.e its _id is not 0, (that allocates CPU cores), 
+                it should sleep voluntarily releasing its CPU core. This ensures that 
+                we can measure the allocation of CPU cores and the activation of workers 
+                in one row. */
+            if (__atomic_load_n(&restart, __ATOMIC_SEQ_CST)) {
+                //Genode::log("Worker ", _id, " on CPU ", channel_id, " yielding. cores=", cip->cores_current, " #:", cip->cores_current.count());
+                Tukija::release(Tukija::Resource_type::CPU_CORE);
+            }

+            /* The thread was woken up, so increase the thread counter */
+
+           // Genode::log("Worker ", _id, " woke up on CPU ", channel_id, " counter=", counter.load());
+            if (counter.fetch_add(1) == cores-1) {
+                __atomic_store_n(&ready, true, __ATOMIC_SEQ_CST);
+                //Genode::log("Worker", _id, " Signaled ready, ctr = ", counter.load());
+            }
+
+
+            /* As long as no restart was signalled, poll the yield flag to check whether
+               we received a yield request from the hypervisor. */
+            while (!__atomic_load_n(&restart, __ATOMIC_SEQ_CST)) {
+                if ((my_channel->yield_flag != 0))
+                {
+                    //Tukija::return_to_owner(Tukija::Resource_type::CPU_CORE);
+                }
+            }
+            //Genode::log("Worker ", _id, " on CPU ", channel_id, " restarting.");
+        }
+    }
+
+    /**
+     * @brief the actual benchmark
+     * 
+     */
+    void benchmark_loop()
+    {
+        Genode::Trace::Timestamp latency = 0;
+
+        /* First determine our channel id and store the pointer to our 
+           CIP worker structure. This is needed in order to get the hypervisor's yield signal. */
+
+        /* Calculate TSC frequency in GHz */
        unsigned long _tsc_freq_ghz = tsc_freq_khz / 1000000UL;

-        Genode::log("Started worker", _id, " on CPU with affinity ", channel_id, "@", const_cast<Tukija::Tip*>(tip)->dom_of_cpu(channel_id).id, "-", Genode::Thread::myself()->affinity(), " signal channel: ", my_channel->yield_flag, " at ", my_channel);
+        int max_cores = cip->habitat_affinity.total();

-        for (cores = CORES; cores <= 14; cores+=4) {
-            for (i = 0; i < CALLS; ) {
+        /* Now allocate cores starting with a number of CORES and increase by 4 additional cores 
+           after CALLS iterations. */
+        for (cores = CORES; cores <= max_cores;)
+        {
+            //Genode::log("Starting benchmark for ", cores, " CPU cores. Parameters: ", yield_ctr.load(), ":", counter.load());
+            for (i = 0; i < CALLS;)
+            {

-                if ((i == 0 && yield_ctr >= cores-1) || (i > 0 && yield_ctr >= cores-1))
-                    ready = true;
-
-                if (_id != 0 && restart.load()) {
-                    yield_ctr.fetch_add(1);
-                    // Genode::log("Worker ", _id, "yielded, yield_ctr = ", yield_ctr.load());
-                    Tukija::release(Tukija::Resource_type::CPU_CORE);
-                }
-
-                //Genode::log("Worker ", _id, " on CPU ", channel_id, " woke up");
-                counter.fetch_add(1);
-                if (counter >= cores-1) {
-                    ready = true;
-                    // Genode::log("{\"allocation:\": ", allocation, ", \"id\":", _id, ",\"clk_total\":", (end-::start), ", \"mean_clk\":", (end-::start)/count_allocs ,", \"count\": ", count_allocs, "\"channel-id\":", channel_id, "},");
-                }
-
-                /*if (my_channel->op == 2) {
-                    Genode::Trace::Timestamp now = Genode::Trace::timestamp();
-                    //Tukija::core_allocation(allocation);
-                    my_channel->delta_return = now - my_channel->delta_return;
-                    Genode::log("{\"iteration\": ", i, ", \"cores\":", cores, ", \"d_block\": ", my_channel->delta_block / _tsc_freq_ghz, ", \"d_enter\":", my_channel->delta_enter / _tsc_freq_ghz, ", \"d_return\":", my_channel->delta_return / _tsc_freq_ghz, ", \"op\": \"yield\"},");
-                }
-                my_channel->op = 0;*/
-                if (_id == 0) {
-                    //Genode::log("Waiting on start signal");
-                    while (ready.load() == false)
-                        __builtin_ia32_pause();
-
-                    //Genode::log("Got start signal");
-                    _timer.msleep(2);
-
-                    //Genode::log("Woke up for new iteration");
-                    ready = false;
-                    restart = false;
-                    ::start = Genode::Trace::timestamp();
-                }
-
-                Genode::Trace::Timestamp end = 0;
-                while (_id==0)
+                /* Here, we check, if the yield counter is equally to the number
+                    of cores to allocate-1. This ensures that all workers have gone to sleep
+                    and released their CPU core. */
+                //Genode::log("Waiting for ", cores, " workers to yield");
+                while (cip->cores_current.count() != 1 )
                {
-
-                    if (_id == 0) {
-                        //Genode::log("Allocating 4 cores");
-
-                        //my_channel->tnum = i;
-                        //my_channel->op = 1; /* 1 for alloc, 2 for yield */
-
-                        //my_channel->delta_enter = Genode::Trace::timestamp();
-                        Tukija::uint8_t rc = Tukija::alloc(Tukija::Resource_type::CPU_CORE, cores);
-                        if (rc == Tukija::NOVA_OK)
-                        {
-
-                            while(ready.load() == false)
-                                __builtin_ia32_pause();
-                            end = Genode::Trace::timestamp();
-                            //my_channel->delta_return = end - my_channel->delta_return;
-                            latency += (end - ::start) / _tsc_freq_ghz;
-                            //Genode::log("{\"iteration\": ", i, ", \"cores\":", cores, ", \"delta_enter:\" ", my_channel->delta_enter / _tsc_freq_ghz, ", \"delta_alloc\": ", my_channel->delta_alloc / _tsc_freq_ghz, ", \"delta_activate:\": ", my_channel->delta_activate / _tsc_freq_ghz, ", \"delta_setflag\": ", my_channel->delta_setflag / _tsc_freq_ghz, ", \"delta_return\": ", my_channel->delta_return / _tsc_freq_ghz, "},");
-                            restart = true;
-                            counter = 0;
-                            yield_ctr = 0;
-                            //if (i%100==0) {
-
-                            Genode::log("{\"iteration\": ", i, ", \"cores\":", cores, ", \"allocation\": ", cip->cores_new, ",\"start\": ", ::start, ", \"end\": ", end, " ,\"ns\": ", (latency), "},");
-                            //my_channel->delta_setflag = 0;
-                            latency = 0;
-                            cip->cores_new.clear();
-                            //}
-                            i++;
-                            break;
-                        } else {
-                            //Genode::log("cores allocated: ", allocated);
-                            break;
-                            // Genode::log("cores allocated: ", allocated);
-                        }
-                        count_allocs++;
-                    }
+                    __builtin_ia32_pause();
                }
-                //Genode::log("Finished allocation. Waiting for yield signal, id = ", channel_id, "\n");
-                while (restart.load() == false) {
-                    Tukija::Cip::Worker volatile *res = __atomic_load_n(&my_channel, __ATOMIC_SEQ_CST);
-                    if (res->yield_flag) {
-                        //Genode::log("Got yield signal on channel ", channel_id);
-                        Tukija::return_to_owner(Tukija::Resource_type::CPU_CORE);
+
+                //Genode::log("Workers ready.");
+                _timer.msleep(2);
+
+                restart = false;
+                ready = false;
+                /* Mark beginning of benchmark */
+                ::start = Genode::Trace::timestamp();
+                
+                Genode::Trace::Timestamp end = 0;
+                /* Allocated the specified number of CPU cores */
+                Tukija::uint8_t rc = Tukija::alloc(Tukija::Resource_type::CPU_CORE, cores);
+                if (rc == Tukija::NOVA_OK)
+                {
+                    //Genode::log("Cores activated ", cip->cores_current);
+                    /* If we get NOVA_OK as return code, cores were allocated. However, it is not guaranteed yet
+                       that we got the specified number of CPU cores (it could be less). So, we have to wait until
+                       all requested workers have actually woken up and see if the number of waken threads matches
+                       the requested number of CPU cores. */
+                     //Genode::log("Allocation returned successfully.");
+                    while (!__atomic_load_n(&ready, __ATOMIC_SEQ_CST)) {
+                        __builtin_ia32_pause();
                    }
+                    
+                    /* Now, we need to restart the run. Hence, we set the restart flag to true and 
+                       reset the counter variables. */
+                    counter = 0;
+                    yield_ctr.store(0);
+                    ready = false;
+
+                    /* ALl requested CPU cores have been allocated and workers were activated. So we can now
+                       mark the end of this run, and calculate the time this run took. */
+                    end = Genode::Trace::timestamp();
+                    latency += (end - ::start) / _tsc_freq_ghz;
+
+                    /* Print the results out in JSON */
+                    Genode::log("{\"iteration\": ", i, ", \"cores\":", cores, ", \"allocation\": ", cip->cores_new, ", \"running\": ", cip->cores_current, ",\"start\": ", ::start, ", \"end\": ", end, " ,\"ns\": ", (latency), "},");
+                    latency = 0;
+                    
+                    __atomic_store_n(&restart, true, __ATOMIC_SEQ_CST);
+
+                    //Genode::log("Restarting.");
+                    /* Also clear the CPUset of new cores, so that we will not see cores allocated by previous runs. */
+                    cip->cores_new.clear();
+                    if (++i == CALLS)
+                        cores += STEP;
+
+                } else {
+                    Genode::log("Core allocation failed.");
                }
            }
        }
        Genode::log("Benchmak finished.");
    }
+
+    void entry() override
+    {
+        /* We distinguish betweeen the main thread that allocates CPU cores and */
+        if (_id == 0)
+            benchmark_loop();
+        else
+            worker_loop(); /* and worker threads that are just used to measure wake-up latencies */
+    }
+
    Cell(Libc::Env &env, Timer::Connection &timer, Genode::uint16_t id, Location const &location)
        : Thread(env, Name("test_", location.xpos(), "x", location.ypos()), 4 * 4096, location, Weight(), 
        env.cpu()), _id(id), env(env), _timer(timer)
@@ -201,15 +234,17 @@ void Libc::Component::construct(Libc::Env &env)
            Genode::Trace::Timestamp thread_start = Genode::Trace::timestamp();
            
            Genode::Affinity::Location loc = me->affinity();
-            unsigned cpuid = cip->location_to_kernel_cpu(loc);
+
+            unsigned int cpu_id = cip->location_to_kernel_cpu(loc);
+            cip->cores_current.set(cpu_id);

            for (Genode::uint16_t cpu = 1; cpu < space.total(); cpu++)
            {
                Genode::String<32> const name{"worker", cpu};
                /* Do not create a worker for EP's CPU yet, because
                   we want to measure the time it takes to create only the worker threads */
-                if (cpu == (space.total() - cpuid))
-                    continue;
+                /*if (cpu == (space.total() - cpuid))
+                    continue;*/

                Cell *worker = new Cell(env, _timer, cpu, space.location_of_index(cpu));
                Libc::pthread_create_from_session(&workers[cpu], Cell::pthread_entry,