From 4f364c4548e7523ee310bf4c3d050a2324f9545c Mon Sep 17 00:00:00 2001 From: Michael Mueller Date: Thu, 24 Oct 2024 14:28:01 +0200 Subject: [PATCH] Added antagonist to create synthetic interference on caches, CPU and memory controllers. --- repos/mml/run/antagonist.run | 95 ++++++ repos/mml/src/app/antagonist/cpu.h | 26 ++ repos/mml/src/app/antagonist/main.cc | 254 ++++++++++++++ repos/mml/src/app/antagonist/mem.h | 61 ++++ repos/mml/src/app/antagonist/stress_linux.cc | 145 ++++++++ .../src/app/antagonist/synthetic_worker.cc | 315 ++++++++++++++++++ .../mml/src/app/antagonist/synthetic_worker.h | 166 +++++++++ repos/mml/src/app/antagonist/target.mk | 16 + repos/mml/src/app/antagonist/types.h | 50 +++ repos/mml/src/app/antagonist/util.cc | 14 + repos/mml/src/app/antagonist/util.h | 6 + repos/mml/src/app/sythetic_worker.h | 167 ++++++++++ 12 files changed, 1315 insertions(+) create mode 100644 repos/mml/run/antagonist.run create mode 100644 repos/mml/src/app/antagonist/cpu.h create mode 100644 repos/mml/src/app/antagonist/main.cc create mode 100644 repos/mml/src/app/antagonist/mem.h create mode 100644 repos/mml/src/app/antagonist/stress_linux.cc create mode 100644 repos/mml/src/app/antagonist/synthetic_worker.cc create mode 100644 repos/mml/src/app/antagonist/synthetic_worker.h create mode 100644 repos/mml/src/app/antagonist/target.mk create mode 100644 repos/mml/src/app/antagonist/types.h create mode 100644 repos/mml/src/app/antagonist/util.cc create mode 100644 repos/mml/src/app/antagonist/util.h create mode 100644 repos/mml/src/app/sythetic_worker.h diff --git a/repos/mml/run/antagonist.run b/repos/mml/run/antagonist.run new file mode 100644 index 0000000000..ce03327a20 --- /dev/null +++ b/repos/mml/run/antagonist.run @@ -0,0 +1,95 @@ +set build_components { + core init hoitaja timer app/antagonist +} + +source ${genode_dir}/repos/base/run/platform_drv.inc +append_platform_drv_build_components +build $build_components +create_boot_directory + +install_config { + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 2022-07-20 14:30 + + + + + + + + + + + + + +} +set boot_modules { + core init hoitaja timer vfs.lib.so libm.lib.so libc.lib.so stdcxx.lib.so ld.lib.so stress_genode +} +build_boot_image $boot_modules +append qemu_args "-nographic" +run_genode_until forever diff --git a/repos/mml/src/app/antagonist/cpu.h b/repos/mml/src/app/antagonist/cpu.h new file mode 100644 index 0000000000..1acf27c13d --- /dev/null +++ b/repos/mml/src/app/antagonist/cpu.h @@ -0,0 +1,26 @@ +/* + * cpu.h - basic definitions for x86_64 CPUs + */ + +#pragma once + +/* + * Endianness + */ + +#define __LITTLE_ENDIAN 1234 +#define __BIG_ENDIAN 4321 + +#define __BYTE_ORDER __LITTLE_ENDIAN + + +/* + * Word Size + */ + +#define __32BIT_WORDS 32 +#define __64BIT_WORDS 64 + +#define __WORD_SIZE __64BIT_WORDS + +#define CACHE_LINE_SIZE 64 diff --git a/repos/mml/src/app/antagonist/main.cc b/repos/mml/src/app/antagonist/main.cc new file mode 100644 index 0000000000..a5562a9759 --- /dev/null +++ b/repos/mml/src/app/antagonist/main.cc @@ -0,0 +1,254 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define CALLS 100 +#define CORES 14 +#define HYPERCALL + + //Genode::Trace::timestamp(); +static Genode::Trace::Timestamp rdtsc_cost = 0; +Genode::Env *genv = nullptr; +static Genode::Trace::Timestamp start = 0; +static const unsigned long loops = 10000UL; +static Nova::mword_t channel = 0; +static std::atomic counter(0); +static std::atomic ready{false}; +static std::atomic restart{true}; +static std::atomic yield_ctr{-(31-CORES)}; +static unsigned long tsc_freq_khz = 0; +int cores, i; + +struct Channel { + unsigned long yield_flag : 1, + op : 2, + tnum : 61; + unsigned long delta_alloc; + unsigned long delta_activate; + unsigned long delta_setflag; + unsigned long delta_findborrower; + unsigned long delta_block; + unsigned long delta_enter; + unsigned long delta_return; +}; + +struct Cell : public Genode::Thread +{ + Genode::uint16_t _id; + Libc::Env &env; + Timer::Connection &_timer; + + static void *pthread_entry(void *args) { + Cell *cell = reinterpret_cast(args); + cell->entry(); + return nullptr; + } + + void entry() override + { + Genode::Trace::Timestamp latency = 0; + Nova::mword_t channel_id = 0; + Nova::uint64_t count_allocs = 0; + Nova::cpu_id(channel_id); + struct Channel *channels = reinterpret_cast(channel); + struct Channel volatile *my_channel = &channels[channel_id]; + + unsigned long _tsc_freq_ghz = tsc_freq_khz / 1000000UL; + + //Genode::log("Started worker", _id, " on CPU with affinity ", channel_id, Genode::Thread::myself()->affinity(), " signal channel: ", my_channel->yield_flag, " at ", my_channel); + + for (cores = CORES; cores <= 14; cores+=4) { + for (i = 0; i < CALLS; ) { + + if ((i == 0 && yield_ctr >= cores-1) || (i > 0 && yield_ctr >= cores-1)) + ready = true; + + if (_id != 0 && restart.load()) { + yield_ctr.fetch_add(1); + // Genode::log("Worker ", _id, "yielded, yield_ctr = ", yield_ctr.load()); + Nova::yield(); + } + + //Genode::log("Worker ", _id, " on CPU ", channel_id, " woke up"); + counter.fetch_add(1); + if (counter >= cores-1) { + ready = true; + // Genode::log("{\"allocation:\": ", allocation, ", \"id\":", _id, ",\"clk_total\":", (end-::start), ", \"mean_clk\":", (end-::start)/count_allocs ,", \"count\": ", count_allocs, "\"channel-id\":", channel_id, "},"); + } + + if (my_channel->op == 2) { + Nova::mword_t allocation = 0; + Genode::Trace::Timestamp now = Genode::Trace::timestamp(); + Nova::core_allocation(allocation); + my_channel->delta_return = now - my_channel->delta_return; + Genode::log("{\"iteration\": ", i, ", \"cores\":", cores, ", \"d_block\": ", my_channel->delta_block / _tsc_freq_ghz, ", \"d_enter\":", my_channel->delta_enter / _tsc_freq_ghz, ", \"d_return\":", my_channel->delta_return / _tsc_freq_ghz, ", \"op\": \"yield\"},"); + } + my_channel->op = 0; + if (_id == 0) { + //Genode::log("Waiting on start signal"); + while (ready.load() == false) + __builtin_ia32_pause(); + + //Genode::log("Got start signal"); + _timer.msleep(2); + + //Genode::log("Woke up for new iteration"); + ready = false; + restart = false; + ::start = Genode::Trace::timestamp(); + } + + Genode::Trace::Timestamp end = 0; + while (_id==0) + { + + if (_id == 0) { + Nova::mword_t allocated = 0; + //Genode::log("Allocating 4 cores"); + + my_channel->tnum = i; + my_channel->op = 1; /* 1 for alloc, 2 for yield */ + + my_channel->delta_enter = Genode::Trace::timestamp(); + Nova::uint8_t rc = Nova::alloc_cores(cores, allocated); + if (rc == Nova::NOVA_OK) + { + + while(ready.load() == false) + __builtin_ia32_pause(); + end = Genode::Trace::timestamp(); + my_channel->delta_return = end - my_channel->delta_return; + latency += (end - ::start) / _tsc_freq_ghz; + Nova::mword_t allocation = 0; + Genode::log("{\"iteration\": ", i, ", \"cores\":", cores, ", \"delta_enter:\" ", my_channel->delta_enter / _tsc_freq_ghz, ", \"delta_alloc\": ", my_channel->delta_alloc / _tsc_freq_ghz, ", \"delta_activate:\": ", my_channel->delta_activate / _tsc_freq_ghz, ", \"delta_setflag\": ", my_channel->delta_setflag / _tsc_freq_ghz, ", \"delta_return\": ", my_channel->delta_return / _tsc_freq_ghz, "},"); + Nova::core_allocation(allocation); + restart = true; + counter = 0; + yield_ctr = 0; + //if (i%100==0) { + + Genode::log("{\"iteration\": ", i, ", \"cores\":", cores, ", \"allocation\": ", allocation, ",\"start\": ", ::start, ", \"end\": ", end, " ,\"ns\": ", (latency), "},"); + my_channel->delta_setflag = 0; + latency = 0; + //} + i++; + break; + } else { + //Genode::log("cores allocated: ", allocated); + break; + // Genode::log("cores allocated: ", allocated); + } + count_allocs++; + } + } + //Genode::log("Finished allocation. Waiting for yield signal, id = ", channel_id, "\n"); + while (restart.load() == false) { + Channel volatile *res = __atomic_load_n(&my_channel, __ATOMIC_SEQ_CST); + if (res->yield_flag) { + Genode::log("Got yield signal on channel ", channel_id); + Nova::yield(true); + } + } + } + } + Genode::log("Benchmak finished."); + } + Cell(Libc::Env &env, Timer::Connection &timer, Genode::uint16_t id, Location const &location) + : Thread(env, Name("test_", location.xpos(), "x", location.ypos()), 4 * 4096, location, Weight(), env.cpu()), _id(id), env(env), _timer(timer) + { } +}; + + +void Libc::Component::construct(Libc::Env &env) +{ + Nova::uint8_t res = 0; + genv = &env; + + Libc::with_libc([&]() + { + Timer::Connection _timer{env}; + + Genode::Ram_dataspace_capability ds = env.ram().alloc(4096); + channel = env.rm().attach(ds); + + Genode::memset(reinterpret_cast(channel), 0, 4096); + + //Genode::Heap _heap{env.ram(), env.rm()}; + + //Genode::log("Registering MxTasking entrypoint"); + if ((res = Nova::mxinit(0, 0, channel))) { + Genode::error("Failed to init MxTasking: ", res); + } + Genode::log("Registered MxTasking, yielding ..."); + + try { + Genode::Attached_rom_dataspace info(env, "platform_info"); + tsc_freq_khz = info.xml().sub_node("hardware").sub_node("tsc") + .attribute_value("freq_khz", 0ULL); + } catch (...) { }; + + start = Genode::Trace::timestamp(); + for (unsigned c = 0; c < 1000; c++) { + //Genode::Trace::Timestamp start = Genode::Trace::timestamp(); + + /*Nova::uint8_t rc = Nova::yield(); + if (rc != Nova::NOVA_OK) + break;*/ + Genode::Trace::timestamp(); + // Genode::Trace::Timestamp end = Genode::Trace::timestamp(); + // delay += (end - start); + } + Genode::Trace::Timestamp end = Genode::Trace::timestamp(); + rdtsc_cost = (end - start) / 1000 / 2; + + Genode::log("My affinity is ", env.cpu().affinity_space(), " of size ", env.cpu().affinity_space().total()); + Genode::log("Will create workers for affinity space: ", env.topo().global_affinity_space()); + start = Genode::Trace::timestamp(); + Genode::Thread *me = Genode::Thread::myself(); + + unsigned long cpuid = 0; + Nova::cpu_id(cpuid); + + Genode::Affinity::Space space = env.topo().global_affinity_space(); + Genode::log("My main thread is on phys. CPU ", cpuid); + + pthread_t workers[space.total()]; + std::cout << "Creating workers" << std::endl; + Genode::Trace::Timestamp thread_start = Genode::Trace::timestamp(); + for (Genode::uint16_t cpu = 1; cpu < space.total(); cpu++) + { + Genode::String<32> const name{"worker", cpu}; + if (cpu == (space.total() - cpuid)) + continue; + Cell *worker = new Cell(env, _timer, cpu, space.location_of_index(cpu)); + Libc::pthread_create_from_session(&workers[cpu], Cell::pthread_entry, worker, 4 * 4096, name.string(), &env.cpu(), space.location_of_index(cpu)); + // Genode::log("Created worker for CPU ", cpu); + // worker->start(); + } + Genode::Trace::Timestamp thread_stop = Genode::Trace::timestamp(); + Genode::log("Took ", (thread_stop - thread_start) / 2000, " μs to start workers"); + + pthread_t main_pt{}; + + Genode::Affinity::Location loc = me->affinity(); + //Genode::log("Starting main worker on CPU ", cpuid); + Cell *main_cell = new Cell(env, _timer, 0, loc); + + //Cell *main = new (_heap) Cell(env, 0, Genode::Affinity::Location(20,0)); + /*Libc::pthread_create_from_thread(&main_pt, *main, &main); + main->start();*/ + // Nova::yield(false); + //_timer.msleep(10000); + Libc::pthread_create_from_session(&main_pt, Cell::pthread_entry, main_cell, 8 * 4096, "main_worker", &env.cpu(), loc); + pthread_join(main_pt, 0); }); + Genode::log("Leaving component"); +} \ No newline at end of file diff --git a/repos/mml/src/app/antagonist/mem.h b/repos/mml/src/app/antagonist/mem.h new file mode 100644 index 0000000000..dc0fa8ac46 --- /dev/null +++ b/repos/mml/src/app/antagonist/mem.h @@ -0,0 +1,61 @@ +/* + * mem.h - memory management + */ + +#pragma once + +#include "types.h" + +enum { + PGSHIFT_4KB = 12, + PGSHIFT_2MB = 21, + PGSHIFT_1GB = 30, +}; + +enum { + PGSIZE_4KB = (1 << PGSHIFT_4KB), /* 4096 bytes */ + PGSIZE_2MB = (1 << PGSHIFT_2MB), /* 2097152 bytes */ + PGSIZE_1GB = (1 << PGSHIFT_1GB), /* 1073741824 bytes */ +}; + +#define PGMASK_4KB (PGSIZE_4KB - 1) +#define PGMASK_2MB (PGSIZE_2MB - 1) +#define PGMASK_1GB (PGSIZE_1GB - 1) + +/* page numbers */ +#define PGN_4KB(la) (((uintptr_t)(la)) >> PGSHIFT_4KB) +#define PGN_2MB(la) (((uintptr_t)(la)) >> PGSHIFT_2MB) +#define PGN_1GB(la) (((uintptr_t)(la)) >> PGSHIFT_1GB) + +#define PGOFF_4KB(la) (((uintptr_t)(la)) & PGMASK_4KB) +#define PGOFF_2MB(la) (((uintptr_t)(la)) & PGMASK_2MB) +#define PGOFF_1GB(la) (((uintptr_t)(la)) & PGMASK_1GB) + +#define PGADDR_4KB(la) (((uintptr_t)(la)) & ~((uintptr_t)PGMASK_4KB)) +#define PGADDR_2MB(la) (((uintptr_t)(la)) & ~((uintptr_t)PGMASK_2MB)) +#define PGADDR_1GB(la) (((uintptr_t)(la)) & ~((uintptr_t)PGMASK_1GB)) + +typedef unsigned long physaddr_t; /* physical addresses */ +typedef unsigned long virtaddr_t; /* virtual addresses */ + +#ifndef MAP_FAILED +#define MAP_FAILED ((void *)-1) +#endif + +typedef unsigned int mem_key_t; + +extern void *mem_map_anom(void *base, size_t len, size_t pgsize, int node); +extern void *mem_map_file(void *base, size_t len, int fd, off_t offset); +extern void *mem_map_shm(mem_key_t key, void *base, size_t len, + size_t pgsize, bool exclusive); +extern void *mem_map_shm_rdonly(mem_key_t key, void *base, size_t len, + size_t pgsize); +extern int mem_unmap_shm(void *base); +extern int mem_lookup_page_phys_addrs(void *addr, size_t len, size_t pgsize, + physaddr_t *maddrs); + +static inline int +mem_lookup_page_phys_addr(void *addr, size_t pgsize, physaddr_t *paddr) +{ + return mem_lookup_page_phys_addrs(addr, pgsize, pgsize, paddr); +} diff --git a/repos/mml/src/app/antagonist/stress_linux.cc b/repos/mml/src/app/antagonist/stress_linux.cc new file mode 100644 index 0000000000..2ad780b7a5 --- /dev/null +++ b/repos/mml/src/app/antagonist/stress_linux.cc @@ -0,0 +1,145 @@ + +#include "synthetic_worker.h" + +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include + +namespace +{ + + int threads; + uint64_t n; + std::string worker_spec; + + class SyntheticWork : public mx::tasking::TaskInterface + { + private: + SyntheticWorker *_w{nullptr}; + uint64_t *_cnt; + + public: + SyntheticWork(SyntheticWorker *w, uint64_t *cnt) : _w(w), _cnt(cnt) {} + ~SyntheticWork() override = default; + + mx::tasking::TaskResult execute(const std::uint16_t , const std::uint16_t) override + { + _w->Work(n); + (*_cnt)++; + //mx::tasking::runtime::scheduler().allocate_cores(64); + return mx::tasking::TaskResult::make_succeed(this); + } + }; + + void + MainHandler(void *arg) + { + std::vector cnt(threads); + + auto cores = mx::util::core_set::build(threads); + std::cout << "Core set to use: " << cores << std::endl; + mx::tasking::runtime::init(cores, 0, false); + + for (int i = 0; i < threads; ++i) + { + Genode::log("Creating synthetic worker ", i); + auto *w = SyntheticWorkerFactory(worker_spec); + if (w == nullptr) { + std::cerr << "Failed to create worker." << std::endl; + exit(1); + } + auto *work = mx::tasking::runtime::new_task(i, w, &cnt[i]); + work->annotate(static_cast(i)); + mx::tasking::runtime::spawn(*work, mx::system::topology::core_id()); + } + + auto monitor = std::thread([&]() + { + uint64_t last_total = 0; + auto last = std::chrono::steady_clock::now(); + while (1) { + std::chrono::seconds sec(1); + std::this_thread::sleep_for(sec); + auto now = std::chrono::steady_clock::now(); + uint64_t total = 0; + double duration = + std::chrono::duration_cast>(now - last) + .count(); + for (int i = 0; i < threads; i++) total += cnt[i]; + std::cerr << static_cast(total - last_total) / duration + << std::endl; + last_total = total; + last = now; + } }); + mx::tasking::runtime::start_and_wait(); + monitor.join(); + + // never returns + } + +} // anonymous namespace + +void PrintUsage() +{ + std::cerr << "usage: [#threads] [#n] [worker_spec] " + << std::endl; +} + +int main(int argc, char *argv[]) +{ + int ret; + if (argc < 4) + { + PrintUsage(); + return -EINVAL; + } + + threads = std::stoi(argv[1], nullptr, 0); + n = std::stoul(argv[2], nullptr, 0); + worker_spec = std::string(argv[3]); + + // ret = base_init(); + if (ret) + return ret; + + // ret = base_init_thread(); + if (ret) + return ret; + + MainHandler(NULL); + + return 0; +} + +void Libc::Component::construct(Libc::Env &env) { + + mx::system::Environment::set_env(&env); + + auto sys_cores = mx::util::core_set::build(64); + mx::system::Environment::set_cores(&sys_cores); + + mx::memory::GlobalHeap::myself(); + std::uint16_t cores = 64; + //env.cpu().affinity_space().total(); + + char cores_arg[10]; + sprintf(cores_arg, "%d", cores); + + char *args[] = {"stress_genode", cores_arg, "1", "cacheantagonist:4090880"}; + + Libc::with_libc([&]() + { + std::cout << "Starting Cache Antagonist" << std::endl; + main(4, args); + }); +} \ No newline at end of file diff --git a/repos/mml/src/app/antagonist/synthetic_worker.cc b/repos/mml/src/app/antagonist/synthetic_worker.cc new file mode 100644 index 0000000000..1d3921c4d6 --- /dev/null +++ b/repos/mml/src/app/antagonist/synthetic_worker.cc @@ -0,0 +1,315 @@ +// synthetic_worker.cc - support for generation of synthetic work + +extern "C" +{ + #include "mem.h" +#include +#include +} + +#include "synthetic_worker.h" +#include "util.h" + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +bool synth_barrier_wait() { } + +namespace +{ + + void *memcpy_ermsb(void *dst, const void *src, size_t n) + { + asm volatile("rep movsb" : "+D"(dst), "+S"(src), "+c"(n)::"memory"); + return dst; + } + + inline void clflush(volatile void *p) { asm volatile("clflush (%0)" ::"r"(p)); } + + // Store data (indicated by the param c) to the cache line using the + // non-temporal store. + inline void nt_cacheline_store(char *p, int c) + { + /*__m128i i = _mm_set_epi8(c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c); + _mm_stream_si128((__m128i *)&p[0], i); + _mm_stream_si128((__m128i *)&p[16], i); + _mm_stream_si128((__m128i *)&p[32], i); + _mm_stream_si128((__m128i *)&p[48], i);*/ + } + +} // anonymous namespace + +void SqrtWorker::Work(uint64_t n) +{ + constexpr double kNumber = 2350845.545; + for (uint64_t i = 0; i < n; ++i) + { + volatile double v = sqrt(i * kNumber); + std::ignore = v; // silences compiler warning + } +} + +#define SQRT(src_var, dest_var, src_xmm, dest_xmm) \ + asm volatile("movq %1, %%" src_xmm \ + "\n" \ + "sqrtsd %%" src_xmm ", %%" dest_xmm \ + "\n" \ + "movq %%" dest_xmm ", %0 \n" \ + : "=r"(dest_var) \ + : "g"(src_var) \ + : src_xmm, dest_xmm, "memory") + +void AsmSqrtWorker::Work(uint64_t n) +{ + constexpr double kNumber = 2350845.545; + double src_0, src_1, src_2, src_3; + double dest_0, dest_1, dest_2, dest_3; + for (uint64_t i = 0; i < n; i += 4) + { + src_0 = i * kNumber; + src_1 = (i + 1) * kNumber; + src_2 = (i + 2) * kNumber; + src_3 = (i + 3) * kNumber; + SQRT(src_0, dest_0, "xmm0", "xmm1"); + SQRT(src_1, dest_1, "xmm2", "xmm3"); + SQRT(src_2, dest_2, "xmm4", "xmm5"); + SQRT(src_3, dest_3, "xmm6", "xmm7"); + } +} + +StridedMemtouchWorker *StridedMemtouchWorker::Create(std::size_t size, + std::size_t stride) +{ + char *buf = new char[size](); + return new StridedMemtouchWorker(buf, size, stride); +} + +void StridedMemtouchWorker::Work(uint64_t n) +{ + for (uint64_t i = 0; i < n; ++i) + { + volatile char c = buf_[(stride_ * i) % size_]; + std::ignore = c; // silences compiler warning + } +} + +/* TODO: MemStreamWorker is currently broken as clang lacks the intrinsics needed */ +MemStreamWorker *MemStreamWorker::Create(std::size_t size) +{ + void *addr; + int prot, flags; + + prot = PROT_READ | PROT_WRITE; + flags = MAP_PRIVATE | MAP_ANONYMOUS; + // | MAP_POPULATE | MAP_HUGETLB | + // (PGSHIFT_2MB << MAP_HUGE_SHIFT); + + addr = mmap(NULL, size, prot, flags, -1, 0); + if (addr == MAP_FAILED) + return nullptr; + + memset(addr, 0xAB, size); + return new MemStreamWorker(static_cast(addr), size); +} + +MemStreamWorker::~MemStreamWorker() +{ + munmap((void *)buf_, (size_)); +} + +void MemStreamWorker::Work(uint64_t n) +{ + if (n > size_) + n = size_; + for (uint64_t i = 0; i < n; ++i) + { + volatile char c = buf_[i]; + std::ignore = c; // silences compiler warning + } +} + +RandomMemtouchWorker *RandomMemtouchWorker::Create(std::size_t size, + unsigned int seed) +{ + char *buf = new char[size](); + std::vector v(size); + std::iota(std::begin(v), std::end(v), 0); + std::mt19937 g(seed); + std::shuffle(v.begin(), v.end(), g); + return new RandomMemtouchWorker(buf, std::move(v)); +} + +void RandomMemtouchWorker::Work(uint64_t n) +{ + for (uint64_t i = 0; i < n; ++i) + buf_[schedule_[i % schedule_.size()]]++; +} + +CacheAntagonistWorker *CacheAntagonistWorker::Create(std::size_t size) +{ + char *buf = new char[size](); + return new CacheAntagonistWorker(buf, size); +} + +void CacheAntagonistWorker::Work(uint64_t n) +{ + for (uint64_t i = 0; i < n; ++i) + memcpy_ermsb(&buf_[0], &buf_[size_ / 2], size_ / 2); +} + +MemBWAntagonistWorker *MemBWAntagonistWorker::Create(std::size_t size, + int nop_period, + int nop_num) +{ + // non-temporal store won't bypass cache when accessing the remote memory. + auto numa_id = mx::system::topology::node_id(mx::system::topology::core_id()); + char *buf = reinterpret_cast(mx::memory::GlobalHeap::allocate(numa_id, size)); + // numa_alloc_* will allocate memory in pages, therefore it must be cacheline + // aligned. + if (reinterpret_cast(buf) % CACHELINE_SIZE != 0) + { + // Should never be executed. + Genode::error("The allocated memory should be cacheline size aligned."); + return nullptr; + } + // Flush the cache explicitly. Non-temporal store will still write into cache + // if the corresponding data is already at cache. + for (std::size_t i = 0; i < size; i += CACHELINE_SIZE) + { + clflush(reinterpret_cast(buf + i)); + } + return new MemBWAntagonistWorker(buf, size, nop_period, nop_num); +} + +void MemBWAntagonistWorker::Work(uint64_t n) +{ + int cnt = 0; + for (uint64_t k = 0; k < n; k++) + { + for (std::size_t i = 0; i < size_; i += CACHELINE_SIZE) + { + nt_cacheline_store(buf_ + i, 0); + if (cnt++ == nop_period_) + { + cnt = 0; + for (int j = 0; j < nop_num_; j++) + { + asm(""); + } + } + } + } +} + +DynamicCacheAntagonistWorker *DynamicCacheAntagonistWorker::Create( + std::size_t size, int period, int nop_num) +{ + char *buf = new char[size](); + return new DynamicCacheAntagonistWorker(buf, size, period, nop_num); +} + +void DynamicCacheAntagonistWorker::Work(uint64_t n) +{ + double *ptr = reinterpret_cast(buf_); + size_t offset = size_ / 2 / sizeof(double); + for (uint64_t i = 0; i < n; ++i) + { + for (size_t j = 0; j < offset; j++) + { + ptr[j + offset] = ptr[j]; + if (cnt_++ == period_) + { + //synth_barrier_wait(); + cnt_ = 0; + for (int k = 0; k < nop_num_; k++) + { + asm(""); + } + } + } + } +} + +SyntheticWorker *SyntheticWorkerFactory(std::string s) +{ + std::vector tokens = split(s, ':'); + + // the first token is the type of worker, must be specified + if (tokens.size() < 1) + return nullptr; + + if (tokens[0] == "sqrt") + { + if (tokens.size() != 1) + return nullptr; + return new SqrtWorker(); + } + else if (tokens[0] == "asmsqrt") + { + if (tokens.size() != 1) + return nullptr; + return new AsmSqrtWorker(); + } + else if (tokens[0] == "stridedmem") + { + if (tokens.size() != 3) + return nullptr; + unsigned long size = std::stoul(tokens[1], nullptr, 0); + unsigned long stride = std::stoul(tokens[2], nullptr, 0); + return StridedMemtouchWorker::Create(size, stride); + } + else if (tokens[0] == "randmem") + { + if (tokens.size() != 3) + return nullptr; + unsigned long size = std::stoul(tokens[1], nullptr, 0); + unsigned long seed = std::stoul(tokens[2], nullptr, 0); + if (seed > std::numeric_limits::max()) + return nullptr; + return RandomMemtouchWorker::Create(size, seed); + } + else if (tokens[0] == "memstream") + { + if (tokens.size() != 2) + return nullptr; + unsigned long size = std::stoul(tokens[1], nullptr, 0); + return MemStreamWorker::Create(size); + } + else if (tokens[0] == "cacheantagonist") + { + if (tokens.size() != 2) + return nullptr; + unsigned long size = std::stoul(tokens[1], nullptr, 0); + return CacheAntagonistWorker::Create(size); + } + else if (tokens[0] == "membwantagonist") + { + if (tokens.size() != 4) + return nullptr; + unsigned long size = std::stoul(tokens[1], nullptr, 0); + unsigned long nop_period = std::stoul(tokens[2], nullptr, 0); + unsigned long nop_num = std::stoul(tokens[3], nullptr, 0); + return MemBWAntagonistWorker::Create(size, nop_period, nop_num); + } + else if (tokens[0] == "dynamiccacheantagonist") + { + if (tokens.size() != 4) + return nullptr; + unsigned long size = std::stoul(tokens[1], nullptr, 0); + unsigned long period = std::stoul(tokens[2], nullptr, 0); + unsigned long long nop_num = std::stoul(tokens[3], nullptr, 0); + return DynamicCacheAntagonistWorker::Create(size, period, nop_num); + } + + // invalid type of worker + return nullptr; +} diff --git a/repos/mml/src/app/antagonist/synthetic_worker.h b/repos/mml/src/app/antagonist/synthetic_worker.h new file mode 100644 index 0000000000..f7a86685c0 --- /dev/null +++ b/repos/mml/src/app/antagonist/synthetic_worker.h @@ -0,0 +1,166 @@ +// synthetic_worker.h - support for generation of synthetic work + +#pragma once + +//#include +#include +#include +#include +#include +#include + +#define CACHELINE_SIZE (64) + +class SyntheticWorker +{ +public: + virtual ~SyntheticWorker() {} + // Perform n iterations of fake work. + virtual void Work(uint64_t n) = 0; +}; + +class SqrtWorker : public SyntheticWorker +{ +public: + SqrtWorker() {} + ~SqrtWorker() {} + + // Performs n iterations of sqrt(). + void Work(uint64_t n); +}; + +class AsmSqrtWorker : public SyntheticWorker +{ +public: + AsmSqrtWorker() {} + ~AsmSqrtWorker() {} + + // Performs n iterations of sqrt(). + void Work(uint64_t n); +}; + +class StridedMemtouchWorker : public SyntheticWorker +{ +public: + ~StridedMemtouchWorker() { delete buf_; } + + // Creates a strided memory touching worker. + static StridedMemtouchWorker *Create(std::size_t size, size_t stride); + + // Performs n strided memory touches. + void Work(uint64_t n); + +private: + StridedMemtouchWorker(char *buf, std::size_t size, size_t stride) + : buf_(buf), size_(size), stride_(stride) {} + + volatile char *buf_; + std::size_t size_; + std::size_t stride_; +}; + +class MemStreamWorker : public SyntheticWorker +{ +public: + ~MemStreamWorker(); + + // Creates a memory streaming worker. + static MemStreamWorker *Create(std::size_t size); + + // Performs n memory reads. + void Work(uint64_t n); + +private: + MemStreamWorker(char *buf, std::size_t size) : buf_(buf), size_(size) {} + + volatile char *buf_; + std::size_t size_; +}; + +class RandomMemtouchWorker : public SyntheticWorker +{ +public: + ~RandomMemtouchWorker() { delete buf_; } + + // Creates a random memory touching worker. + static RandomMemtouchWorker *Create(std::size_t size, unsigned int seed); + + // Performs n random memory touches. + void Work(uint64_t n); + +private: + RandomMemtouchWorker(char *buf, std::vector schedule) + : buf_(buf), schedule_(std::move(schedule)) {} + + volatile char *buf_; + std::vector schedule_; +}; + +class CacheAntagonistWorker : public SyntheticWorker +{ +public: + ~CacheAntagonistWorker() { delete buf_; } + + // Creates a cache antagonist worker. + static CacheAntagonistWorker *Create(std::size_t size); + + // Perform n cache accesses. + void Work(uint64_t n); + +private: + CacheAntagonistWorker(char *buf, std::size_t size) : buf_(buf), size_(size) {} + + char *buf_; + std::size_t size_; +}; + +class MemBWAntagonistWorker : public SyntheticWorker +{ +public: + ~MemBWAntagonistWorker() { free(buf_); } + + // Creates a memory bandwidth antagonist worker. It allocates an array whose + // size is indicated by the parameter. + static MemBWAntagonistWorker *Create(std::size_t size, int nop_period, + int nop_num); + + // Perform n times array stores. + void Work(uint64_t n); + +private: + MemBWAntagonistWorker(char *buf, std::size_t size, int nop_period, + int nop_num) + : buf_(buf), size_(size), nop_period_(nop_period), nop_num_(nop_num) {} + + char *buf_; + std::size_t size_; + int nop_period_; + int nop_num_; +}; + +class DynamicCacheAntagonistWorker : public SyntheticWorker +{ +public: + ~DynamicCacheAntagonistWorker() { delete buf_; } + + // Creates a cache antagonist worker. + static DynamicCacheAntagonistWorker *Create(std::size_t size, int period, + int nop_num); + + // Perform n cache accesses. + void Work(uint64_t n); + +private: + DynamicCacheAntagonistWorker(char *buf, std::size_t size, int period, + int nop_num) + : buf_(buf), size_(size), period_(period), nop_num_(nop_num) {} + + char *buf_; + std::size_t size_; + int period_; + int nop_num_; + int cnt_; +}; + +// Parses a string to generate one of the above fake workers. +SyntheticWorker *SyntheticWorkerFactory(std::string s); diff --git a/repos/mml/src/app/antagonist/target.mk b/repos/mml/src/app/antagonist/target.mk new file mode 100644 index 0000000000..87052693a5 --- /dev/null +++ b/repos/mml/src/app/antagonist/target.mk @@ -0,0 +1,16 @@ +MXINC_DIR=$(REP_DIR)/src/app/antagonist +GENODE_GCC_TOOLCHAIN_DIR ?= /usr/local/genode/tool/21.05 + +TARGET = stress_genode +# soure file for benchmark framework + +SRC_CC = stress_linux.cc synthetic_worker.cc util.cc +LIBS += base libc stdcxx mxtasking +EXT_OBJECTS += /usr/local/genode/tool/lib/clang/14.0.5/lib/linux/libclang_rt.builtins-x86_64.a /usr/local/genode/tool/lib/libatomic.a +CUSTOM_CC = /usr/local/genode/tool/bin/clang +CUSTOM_CXX = /usr/local/genode/tool/bin/clang++ +CC_OPT += --target=x86_64-genode --sysroot=/does/not/exist --gcc-toolchain=$(GENODE_GCC_TOOLCHAIN_DIR) -Wno-error -O2 -g -DNDEBUG -I$(MXINC_DIR) -std=c++20 #-D_GLIBCXX_ATOMIC_BUILTINS_8 -D__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 +#CC_OPT += -femulated-tls -DCLANG_CXX11_ATOMICS +CC_CXX_WARN_STRICT = +CUSTOM_CXX_LIB := $(CROSS_DEV_PREFIX)g++ +#CXX_LD += $(CROSS_DEV_PREFIX)g++ diff --git a/repos/mml/src/app/antagonist/types.h b/repos/mml/src/app/antagonist/types.h new file mode 100644 index 0000000000..a28c9a2b5d --- /dev/null +++ b/repos/mml/src/app/antagonist/types.h @@ -0,0 +1,50 @@ +/* + * types.h - primitive type definitions + */ + +#pragma once + +#include +#include "cpu.h" + +typedef unsigned char uint8_t; +typedef unsigned short uint16_t; +typedef unsigned int uint32_t; + +typedef signed char int8_t; +typedef signed short int16_t; +typedef signed int int32_t; + +#ifndef __WORD_SIZE +#error __WORD_SIZE is undefined +#endif + +#if __WORD_SIZE == __64BIT_WORDS + +typedef unsigned long uint64_t; +typedef signed long int64_t; + +#else /* __WORDSIZE == __64BIT_WORDS */ + +typedef unsigned long long uint64_t; +typedef signed long long int64_t; + +#endif /* __WORDSIZE == __64BIT_WORDS */ + +typedef unsigned long uintptr_t; +typedef long intptr_t; +typedef long off_t; +typedef unsigned long size_t; +typedef long ssize_t; + +typedef struct { + volatile int locked; +} spinlock_t; + +typedef struct { + volatile int cnt; +} atomic_t; + +typedef struct { + volatile long cnt; +} atomic64_t; diff --git a/repos/mml/src/app/antagonist/util.cc b/repos/mml/src/app/antagonist/util.cc new file mode 100644 index 0000000000..d7cb303509 --- /dev/null +++ b/repos/mml/src/app/antagonist/util.cc @@ -0,0 +1,14 @@ +#include "util.h" + +std::vector split(const std::string &text, char sep) +{ + std::vector tokens; + std::string::size_type start = 0, end = 0; + while ((end = text.find(sep, start)) != std::string::npos) + { + tokens.push_back(text.substr(start, end - start)); + start = end + 1; + } + tokens.push_back(text.substr(start)); + return tokens; +} diff --git a/repos/mml/src/app/antagonist/util.h b/repos/mml/src/app/antagonist/util.h new file mode 100644 index 0000000000..46b7006a15 --- /dev/null +++ b/repos/mml/src/app/antagonist/util.h @@ -0,0 +1,6 @@ +#pragma once +#include +#include +#include + +std::vector split(const std::string &text, char sep); diff --git a/repos/mml/src/app/sythetic_worker.h b/repos/mml/src/app/sythetic_worker.h new file mode 100644 index 0000000000..4f0a0b4c07 --- /dev/null +++ b/repos/mml/src/app/sythetic_worker.h @@ -0,0 +1,167 @@ +// synthetic_worker.h - support for generation of synthetic work + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#define CACHELINE_SIZE (64) + +class SyntheticWorker +{ +public: + virtual ~SyntheticWorker() {} + // Perform n iterations of fake work. + virtual void Work(uint64_t n) = 0; +}; + +class SqrtWorker : public SyntheticWorker +{ +public: + SqrtWorker() {} + ~SqrtWorker() {} + + // Performs n iterations of sqrt(). + void Work(uint64_t n); +}; + +class AsmSqrtWorker : public SyntheticWorker +{ +public: + AsmSqrtWorker() {} + ~AsmSqrtWorker() {} + + // Performs n iterations of sqrt(). + void Work(uint64_t n); +}; + +class StridedMemtouchWorker : public SyntheticWorker +{ +public: + ~StridedMemtouchWorker() { delete buf_; } + + // Creates a strided memory touching worker. + static StridedMemtouchWorker *Create(std::size_t size, size_t stride); + + // Performs n strided memory touches. + void Work(uint64_t n); + +private: + StridedMemtouchWorker(char *buf, std::size_t size, size_t stride) + : buf_(buf), size_(size), stride_(stride) {} + + volatile char *buf_; + std::size_t size_; + std::size_t stride_; +}; + +class MemStreamWorker : public SyntheticWorker +{ +public: + ~MemStreamWorker(); + + // Creates a memory streaming worker. + static MemStreamWorker *Create(std::size_t size); + + // Performs n memory reads. + void Work(uint64_t n); + +private: + MemStreamWorker(char *buf, std::size_t size) : buf_(buf), size_(size) {} + + volatile char *buf_; + std::size_t size_; +}; + +class RandomMemtouchWorker : public SyntheticWorker +{ +public: + ~RandomMemtouchWorker() { delete buf_; } + + // Creates a random memory touching worker. + static RandomMemtouchWorker *Create(std::size_t size, unsigned int seed); + + // Performs n random memory touches. + void Work(uint64_t n); + +private: + RandomMemtouchWorker(char *buf, std::vector schedule) + : buf_(buf), schedule_(std::move(schedule)) {} + + volatile char *buf_; + std::vector schedule_; +}; + +class CacheAntagonistWorker : public SyntheticWorker +{ +public: + ~CacheAntagonistWorker() { delete buf_; } + + // Creates a cache antagonist worker. + static CacheAntagonistWorker *Create(std::size_t size); + + // Perform n cache accesses. + void Work(uint64_t n); + +private: + CacheAntagonistWorker(char *buf, std::size_t size) : buf_(buf), size_(size) {} + + char *buf_; + std::size_t size_; +}; + +class MemBWAntagonistWorker : public SyntheticWorker +{ +public: + ~MemBWAntagonistWorker() { numa_free(buf_, size_); } + + // Creates a memory bandwidth antagonist worker. It allocates an array whose + // size is indicated by the parameter. + static MemBWAntagonistWorker *Create(std::size_t size, int nop_period, + int nop_num); + + // Perform n times array stores. + void Work(uint64_t n); + +private: + MemBWAntagonistWorker(char *buf, std::size_t size, int nop_period, + int nop_num) + : buf_(buf), size_(size), nop_period_(nop_period), nop_num_(nop_num) {} + + char *buf_; + std::size_t size_; + int nop_period_; + int nop_num_; +}; + +class DynamicCacheAntagonistWorker : public SyntheticWorker +{ +public: + ~DynamicCacheAntagonistWorker() { delete buf_; } + + // Creates a cache antagonist worker. + static DynamicCacheAntagonistWorker *Create(std::size_t size, int period, + int nop_num); + + // Perform n cache accesses. + void Work(uint64_t n); + +private: + DynamicCacheAntagonistWorker(char *buf, std::size_t size, int period, + int nop_num) + : buf_(buf), size_(size), period_(period), nop_num_(nop_num) {} + + char *buf_; + std::size_t size_; + int period_; + int nop_num_; + int cnt_; +}; + +// Parses a string to generate one of the above fake workers. +SyntheticWorker *SyntheticWorkerFactory(std::string s);