Added antagonist to create synthetic interference on caches, CPU and memory controllers.

This commit is contained in:
Michael Mueller
2024-10-24 14:28:01 +02:00
parent 15f7092285
commit 4f364c4548
12 changed files with 1315 additions and 0 deletions

View File

@@ -0,0 +1,95 @@
set build_components {
core init hoitaja timer app/antagonist
}
source ${genode_dir}/repos/base/run/platform_drv.inc
append_platform_drv_build_components
build $build_components
create_boot_directory
install_config {
<config>
<parent-provides>
<service name="LOG"/>
<service name="PD"/>
<service name="CPU"/>
<service name="ROM"/>
<service name="RAM"/>
<service name="IRQ"/>
<service name="IO_MEM"/>
<service name="IO_PORT"/>
<service name="CAP"/>
<service name="RM"/>
<service name="SIGNAL"/>
<service name="TOPO"/>
<service name="TRACE"/>
</parent-provides>
<default-route>
<any-service><parent/><any-child/></any-service>
</default-route>
<!--<affinity-space width="32" height="1"/>-->
<default caps="2000"/>
<start name="timer">
<resource name="RAM" quantum="16M"/>
<provides><service name="Timer"/></provides>
<route>
<any-service><parent/><any-child/></any-service>
</route>
</start>
<start name="hoitaja" caps="62000">
<resource name="RAM" quantum="250G"/>
<provides><service name="TASKING"/></provides>
<config prio_levels="32">
<parent-provides>
<service name="LOG"/>
<service name="PD"/>
<service name="CPU"/>
<service name="ROM"/>
<service name="RAM"/>
<service name="IRQ"/>
<service name="IO_MEM"/>
<service name="IO_PORT"/>
<service name="CAP"/>
<service name="RM"/>
<service name="SIGNAL"/>
<service name="TOPO"/>
<service name="Timer"/>
<service name="TASKING"/>
<service name="TRACE"/>
</parent-provides>
<default-route>
<any-service><parent/><any-child/></any-service>
</default-route>
<default caps="600"/>
<affinity-space width="31" height="1"/>
<start name="antagonist">
<binary name="stress_genode"/>
<resource name="RAM" quantum="60G"/>
<route>
<any-service><parent/><any-child/></any-service>
</route>
<config>
<vfs> <dir name="dev">
<log/>
<inline name="rtc">2022-07-20 14:30</inline>
</dir>
</vfs>
<libc stdout="/dev/log" stderr="/dev/log" rtc="/dev/rtc"/>
</config>
</start>
</config>
<route>
<service name="Timer"> <child name="timer"/> </service>
<any-service><parent/><any-child/></any-service>
</route>
</start>
</config>
}
set boot_modules {
core init hoitaja timer vfs.lib.so libm.lib.so libc.lib.so stdcxx.lib.so ld.lib.so stress_genode
}
build_boot_image $boot_modules
append qemu_args "-nographic"
run_genode_until forever

View File

@@ -0,0 +1,26 @@
/*
* cpu.h - basic definitions for x86_64 CPUs
*/
#pragma once
/*
* Endianness
*/
#define __LITTLE_ENDIAN 1234
#define __BIG_ENDIAN 4321
#define __BYTE_ORDER __LITTLE_ENDIAN
/*
* Word Size
*/
#define __32BIT_WORDS 32
#define __64BIT_WORDS 64
#define __WORD_SIZE __64BIT_WORDS
#define CACHE_LINE_SIZE 64

View File

@@ -0,0 +1,254 @@
#include <libc/component.h>
#include <base/log.h>
#include <nova/syscall-generic.h>
#include <nova/syscalls.h>
#include <base/heap.h>
#include <base/attached_rom_dataspace.h>
#include <cstdio>
#include <iostream>
#include <internal/thread_create.h>
#include <thread>
#include <atomic>
#include <timer_session/connection.h>
#define CALLS 100
#define CORES 14
#define HYPERCALL
//Genode::Trace::timestamp();
static Genode::Trace::Timestamp rdtsc_cost = 0;
Genode::Env *genv = nullptr;
static Genode::Trace::Timestamp start = 0;
static const unsigned long loops = 10000UL;
static Nova::mword_t channel = 0;
static std::atomic<long> counter(0);
static std::atomic<bool> ready{false};
static std::atomic<bool> restart{true};
static std::atomic<int> yield_ctr{-(31-CORES)};
static unsigned long tsc_freq_khz = 0;
int cores, i;
struct Channel {
unsigned long yield_flag : 1,
op : 2,
tnum : 61;
unsigned long delta_alloc;
unsigned long delta_activate;
unsigned long delta_setflag;
unsigned long delta_findborrower;
unsigned long delta_block;
unsigned long delta_enter;
unsigned long delta_return;
};
struct Cell : public Genode::Thread
{
Genode::uint16_t _id;
Libc::Env &env;
Timer::Connection &_timer;
static void *pthread_entry(void *args) {
Cell *cell = reinterpret_cast<Cell *>(args);
cell->entry();
return nullptr;
}
void entry() override
{
Genode::Trace::Timestamp latency = 0;
Nova::mword_t channel_id = 0;
Nova::uint64_t count_allocs = 0;
Nova::cpu_id(channel_id);
struct Channel *channels = reinterpret_cast<Channel *>(channel);
struct Channel volatile *my_channel = &channels[channel_id];
unsigned long _tsc_freq_ghz = tsc_freq_khz / 1000000UL;
//Genode::log("Started worker", _id, " on CPU with affinity ", channel_id, Genode::Thread::myself()->affinity(), " signal channel: ", my_channel->yield_flag, " at ", my_channel);
for (cores = CORES; cores <= 14; cores+=4) {
for (i = 0; i < CALLS; ) {
if ((i == 0 && yield_ctr >= cores-1) || (i > 0 && yield_ctr >= cores-1))
ready = true;
if (_id != 0 && restart.load()) {
yield_ctr.fetch_add(1);
// Genode::log("Worker ", _id, "yielded, yield_ctr = ", yield_ctr.load());
Nova::yield();
}
//Genode::log("Worker ", _id, " on CPU ", channel_id, " woke up");
counter.fetch_add(1);
if (counter >= cores-1) {
ready = true;
// Genode::log("{\"allocation:\": ", allocation, ", \"id\":", _id, ",\"clk_total\":", (end-::start), ", \"mean_clk\":", (end-::start)/count_allocs ,", \"count\": ", count_allocs, "\"channel-id\":", channel_id, "},");
}
if (my_channel->op == 2) {
Nova::mword_t allocation = 0;
Genode::Trace::Timestamp now = Genode::Trace::timestamp();
Nova::core_allocation(allocation);
my_channel->delta_return = now - my_channel->delta_return;
Genode::log("{\"iteration\": ", i, ", \"cores\":", cores, ", \"d_block\": ", my_channel->delta_block / _tsc_freq_ghz, ", \"d_enter\":", my_channel->delta_enter / _tsc_freq_ghz, ", \"d_return\":", my_channel->delta_return / _tsc_freq_ghz, ", \"op\": \"yield\"},");
}
my_channel->op = 0;
if (_id == 0) {
//Genode::log("Waiting on start signal");
while (ready.load() == false)
__builtin_ia32_pause();
//Genode::log("Got start signal");
_timer.msleep(2);
//Genode::log("Woke up for new iteration");
ready = false;
restart = false;
::start = Genode::Trace::timestamp();
}
Genode::Trace::Timestamp end = 0;
while (_id==0)
{
if (_id == 0) {
Nova::mword_t allocated = 0;
//Genode::log("Allocating 4 cores");
my_channel->tnum = i;
my_channel->op = 1; /* 1 for alloc, 2 for yield */
my_channel->delta_enter = Genode::Trace::timestamp();
Nova::uint8_t rc = Nova::alloc_cores(cores, allocated);
if (rc == Nova::NOVA_OK)
{
while(ready.load() == false)
__builtin_ia32_pause();
end = Genode::Trace::timestamp();
my_channel->delta_return = end - my_channel->delta_return;
latency += (end - ::start) / _tsc_freq_ghz;
Nova::mword_t allocation = 0;
Genode::log("{\"iteration\": ", i, ", \"cores\":", cores, ", \"delta_enter:\" ", my_channel->delta_enter / _tsc_freq_ghz, ", \"delta_alloc\": ", my_channel->delta_alloc / _tsc_freq_ghz, ", \"delta_activate:\": ", my_channel->delta_activate / _tsc_freq_ghz, ", \"delta_setflag\": ", my_channel->delta_setflag / _tsc_freq_ghz, ", \"delta_return\": ", my_channel->delta_return / _tsc_freq_ghz, "},");
Nova::core_allocation(allocation);
restart = true;
counter = 0;
yield_ctr = 0;
//if (i%100==0) {
Genode::log("{\"iteration\": ", i, ", \"cores\":", cores, ", \"allocation\": ", allocation, ",\"start\": ", ::start, ", \"end\": ", end, " ,\"ns\": ", (latency), "},");
my_channel->delta_setflag = 0;
latency = 0;
//}
i++;
break;
} else {
//Genode::log("cores allocated: ", allocated);
break;
// Genode::log("cores allocated: ", allocated);
}
count_allocs++;
}
}
//Genode::log("Finished allocation. Waiting for yield signal, id = ", channel_id, "\n");
while (restart.load() == false) {
Channel volatile *res = __atomic_load_n(&my_channel, __ATOMIC_SEQ_CST);
if (res->yield_flag) {
Genode::log("Got yield signal on channel ", channel_id);
Nova::yield(true);
}
}
}
}
Genode::log("Benchmak finished.");
}
Cell(Libc::Env &env, Timer::Connection &timer, Genode::uint16_t id, Location const &location)
: Thread(env, Name("test_", location.xpos(), "x", location.ypos()), 4 * 4096, location, Weight(), env.cpu()), _id(id), env(env), _timer(timer)
{ }
};
void Libc::Component::construct(Libc::Env &env)
{
Nova::uint8_t res = 0;
genv = &env;
Libc::with_libc([&]()
{
Timer::Connection _timer{env};
Genode::Ram_dataspace_capability ds = env.ram().alloc(4096);
channel = env.rm().attach(ds);
Genode::memset(reinterpret_cast<char *>(channel), 0, 4096);
//Genode::Heap _heap{env.ram(), env.rm()};
//Genode::log("Registering MxTasking entrypoint");
if ((res = Nova::mxinit(0, 0, channel))) {
Genode::error("Failed to init MxTasking: ", res);
}
Genode::log("Registered MxTasking, yielding ...");
try {
Genode::Attached_rom_dataspace info(env, "platform_info");
tsc_freq_khz = info.xml().sub_node("hardware").sub_node("tsc")
.attribute_value("freq_khz", 0ULL);
} catch (...) { };
start = Genode::Trace::timestamp();
for (unsigned c = 0; c < 1000; c++) {
//Genode::Trace::Timestamp start = Genode::Trace::timestamp();
/*Nova::uint8_t rc = Nova::yield();
if (rc != Nova::NOVA_OK)
break;*/
Genode::Trace::timestamp();
// Genode::Trace::Timestamp end = Genode::Trace::timestamp();
// delay += (end - start);
}
Genode::Trace::Timestamp end = Genode::Trace::timestamp();
rdtsc_cost = (end - start) / 1000 / 2;
Genode::log("My affinity is ", env.cpu().affinity_space(), " of size ", env.cpu().affinity_space().total());
Genode::log("Will create workers for affinity space: ", env.topo().global_affinity_space());
start = Genode::Trace::timestamp();
Genode::Thread *me = Genode::Thread::myself();
unsigned long cpuid = 0;
Nova::cpu_id(cpuid);
Genode::Affinity::Space space = env.topo().global_affinity_space();
Genode::log("My main thread is on phys. CPU ", cpuid);
pthread_t workers[space.total()];
std::cout << "Creating workers" << std::endl;
Genode::Trace::Timestamp thread_start = Genode::Trace::timestamp();
for (Genode::uint16_t cpu = 1; cpu < space.total(); cpu++)
{
Genode::String<32> const name{"worker", cpu};
if (cpu == (space.total() - cpuid))
continue;
Cell *worker = new Cell(env, _timer, cpu, space.location_of_index(cpu));
Libc::pthread_create_from_session(&workers[cpu], Cell::pthread_entry, worker, 4 * 4096, name.string(), &env.cpu(), space.location_of_index(cpu));
// Genode::log("Created worker for CPU ", cpu);
// worker->start();
}
Genode::Trace::Timestamp thread_stop = Genode::Trace::timestamp();
Genode::log("Took ", (thread_stop - thread_start) / 2000, " μs to start workers");
pthread_t main_pt{};
Genode::Affinity::Location loc = me->affinity();
//Genode::log("Starting main worker on CPU ", cpuid);
Cell *main_cell = new Cell(env, _timer, 0, loc);
//Cell *main = new (_heap) Cell(env, 0, Genode::Affinity::Location(20,0));
/*Libc::pthread_create_from_thread(&main_pt, *main, &main);
main->start();*/
// Nova::yield(false);
//_timer.msleep(10000);
Libc::pthread_create_from_session(&main_pt, Cell::pthread_entry, main_cell, 8 * 4096, "main_worker", &env.cpu(), loc);
pthread_join(main_pt, 0); });
Genode::log("Leaving component");
}

View File

@@ -0,0 +1,61 @@
/*
* mem.h - memory management
*/
#pragma once
#include "types.h"
enum {
PGSHIFT_4KB = 12,
PGSHIFT_2MB = 21,
PGSHIFT_1GB = 30,
};
enum {
PGSIZE_4KB = (1 << PGSHIFT_4KB), /* 4096 bytes */
PGSIZE_2MB = (1 << PGSHIFT_2MB), /* 2097152 bytes */
PGSIZE_1GB = (1 << PGSHIFT_1GB), /* 1073741824 bytes */
};
#define PGMASK_4KB (PGSIZE_4KB - 1)
#define PGMASK_2MB (PGSIZE_2MB - 1)
#define PGMASK_1GB (PGSIZE_1GB - 1)
/* page numbers */
#define PGN_4KB(la) (((uintptr_t)(la)) >> PGSHIFT_4KB)
#define PGN_2MB(la) (((uintptr_t)(la)) >> PGSHIFT_2MB)
#define PGN_1GB(la) (((uintptr_t)(la)) >> PGSHIFT_1GB)
#define PGOFF_4KB(la) (((uintptr_t)(la)) & PGMASK_4KB)
#define PGOFF_2MB(la) (((uintptr_t)(la)) & PGMASK_2MB)
#define PGOFF_1GB(la) (((uintptr_t)(la)) & PGMASK_1GB)
#define PGADDR_4KB(la) (((uintptr_t)(la)) & ~((uintptr_t)PGMASK_4KB))
#define PGADDR_2MB(la) (((uintptr_t)(la)) & ~((uintptr_t)PGMASK_2MB))
#define PGADDR_1GB(la) (((uintptr_t)(la)) & ~((uintptr_t)PGMASK_1GB))
typedef unsigned long physaddr_t; /* physical addresses */
typedef unsigned long virtaddr_t; /* virtual addresses */
#ifndef MAP_FAILED
#define MAP_FAILED ((void *)-1)
#endif
typedef unsigned int mem_key_t;
extern void *mem_map_anom(void *base, size_t len, size_t pgsize, int node);
extern void *mem_map_file(void *base, size_t len, int fd, off_t offset);
extern void *mem_map_shm(mem_key_t key, void *base, size_t len,
size_t pgsize, bool exclusive);
extern void *mem_map_shm_rdonly(mem_key_t key, void *base, size_t len,
size_t pgsize);
extern int mem_unmap_shm(void *base);
extern int mem_lookup_page_phys_addrs(void *addr, size_t len, size_t pgsize,
physaddr_t *maddrs);
static inline int
mem_lookup_page_phys_addr(void *addr, size_t pgsize, physaddr_t *paddr)
{
return mem_lookup_page_phys_addrs(addr, pgsize, pgsize, paddr);
}

View File

@@ -0,0 +1,145 @@
#include "synthetic_worker.h"
#include <chrono>
#include <iostream>
#include <thread>
#include <nova/syscall-generic.h>
#include <nova/syscalls.h>
#include <libc/component.h>
#include <mx/system/environment.h>
#include <mx/tasking/runtime.h>
#include <mx/util/core_set.h>
#include <base/log.h>
namespace
{
int threads;
uint64_t n;
std::string worker_spec;
class SyntheticWork : public mx::tasking::TaskInterface
{
private:
SyntheticWorker *_w{nullptr};
uint64_t *_cnt;
public:
SyntheticWork(SyntheticWorker *w, uint64_t *cnt) : _w(w), _cnt(cnt) {}
~SyntheticWork() override = default;
mx::tasking::TaskResult execute(const std::uint16_t , const std::uint16_t) override
{
_w->Work(n);
(*_cnt)++;
//mx::tasking::runtime::scheduler().allocate_cores(64);
return mx::tasking::TaskResult::make_succeed(this);
}
};
void
MainHandler(void *arg)
{
std::vector<uint64_t> cnt(threads);
auto cores = mx::util::core_set::build(threads);
std::cout << "Core set to use: " << cores << std::endl;
mx::tasking::runtime::init(cores, 0, false);
for (int i = 0; i < threads; ++i)
{
Genode::log("Creating synthetic worker ", i);
auto *w = SyntheticWorkerFactory(worker_spec);
if (w == nullptr) {
std::cerr << "Failed to create worker." << std::endl;
exit(1);
}
auto *work = mx::tasking::runtime::new_task<SyntheticWork>(i, w, &cnt[i]);
work->annotate(static_cast<mx::tasking::TaskInterface::channel>(i));
mx::tasking::runtime::spawn(*work, mx::system::topology::core_id());
}
auto monitor = std::thread([&]()
{
uint64_t last_total = 0;
auto last = std::chrono::steady_clock::now();
while (1) {
std::chrono::seconds sec(1);
std::this_thread::sleep_for(sec);
auto now = std::chrono::steady_clock::now();
uint64_t total = 0;
double duration =
std::chrono::duration_cast<std::chrono::duration<double>>(now - last)
.count();
for (int i = 0; i < threads; i++) total += cnt[i];
std::cerr << static_cast<double>(total - last_total) / duration
<< std::endl;
last_total = total;
last = now;
} });
mx::tasking::runtime::start_and_wait();
monitor.join();
// never returns
}
} // anonymous namespace
void PrintUsage()
{
std::cerr << "usage: [#threads] [#n] [worker_spec] <use_barrier>"
<< std::endl;
}
int main(int argc, char *argv[])
{
int ret;
if (argc < 4)
{
PrintUsage();
return -EINVAL;
}
threads = std::stoi(argv[1], nullptr, 0);
n = std::stoul(argv[2], nullptr, 0);
worker_spec = std::string(argv[3]);
// ret = base_init();
if (ret)
return ret;
// ret = base_init_thread();
if (ret)
return ret;
MainHandler(NULL);
return 0;
}
void Libc::Component::construct(Libc::Env &env) {
mx::system::Environment::set_env(&env);
auto sys_cores = mx::util::core_set::build(64);
mx::system::Environment::set_cores(&sys_cores);
mx::memory::GlobalHeap::myself();
std::uint16_t cores = 64;
//env.cpu().affinity_space().total();
char cores_arg[10];
sprintf(cores_arg, "%d", cores);
char *args[] = {"stress_genode", cores_arg, "1", "cacheantagonist:4090880"};
Libc::with_libc([&]()
{
std::cout << "Starting Cache Antagonist" << std::endl;
main(4, args);
});
}

View File

@@ -0,0 +1,315 @@
// synthetic_worker.cc - support for generation of synthetic work
extern "C"
{
#include "mem.h"
#include <string.h>
#include <sys/mman.h>
}
#include "synthetic_worker.h"
#include "util.h"
#include <algorithm>
#include <cmath>
#include <cstdlib>
#include <limits>
#include <numeric>
#include <random>
#include <tuple>
#include <base/log.h>
#include <mx/memory/global_heap.h>
bool synth_barrier_wait() { }
namespace
{
void *memcpy_ermsb(void *dst, const void *src, size_t n)
{
asm volatile("rep movsb" : "+D"(dst), "+S"(src), "+c"(n)::"memory");
return dst;
}
inline void clflush(volatile void *p) { asm volatile("clflush (%0)" ::"r"(p)); }
// Store data (indicated by the param c) to the cache line using the
// non-temporal store.
inline void nt_cacheline_store(char *p, int c)
{
/*__m128i i = _mm_set_epi8(c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c);
_mm_stream_si128((__m128i *)&p[0], i);
_mm_stream_si128((__m128i *)&p[16], i);
_mm_stream_si128((__m128i *)&p[32], i);
_mm_stream_si128((__m128i *)&p[48], i);*/
}
} // anonymous namespace
void SqrtWorker::Work(uint64_t n)
{
constexpr double kNumber = 2350845.545;
for (uint64_t i = 0; i < n; ++i)
{
volatile double v = sqrt(i * kNumber);
std::ignore = v; // silences compiler warning
}
}
#define SQRT(src_var, dest_var, src_xmm, dest_xmm) \
asm volatile("movq %1, %%" src_xmm \
"\n" \
"sqrtsd %%" src_xmm ", %%" dest_xmm \
"\n" \
"movq %%" dest_xmm ", %0 \n" \
: "=r"(dest_var) \
: "g"(src_var) \
: src_xmm, dest_xmm, "memory")
void AsmSqrtWorker::Work(uint64_t n)
{
constexpr double kNumber = 2350845.545;
double src_0, src_1, src_2, src_3;
double dest_0, dest_1, dest_2, dest_3;
for (uint64_t i = 0; i < n; i += 4)
{
src_0 = i * kNumber;
src_1 = (i + 1) * kNumber;
src_2 = (i + 2) * kNumber;
src_3 = (i + 3) * kNumber;
SQRT(src_0, dest_0, "xmm0", "xmm1");
SQRT(src_1, dest_1, "xmm2", "xmm3");
SQRT(src_2, dest_2, "xmm4", "xmm5");
SQRT(src_3, dest_3, "xmm6", "xmm7");
}
}
StridedMemtouchWorker *StridedMemtouchWorker::Create(std::size_t size,
std::size_t stride)
{
char *buf = new char[size]();
return new StridedMemtouchWorker(buf, size, stride);
}
void StridedMemtouchWorker::Work(uint64_t n)
{
for (uint64_t i = 0; i < n; ++i)
{
volatile char c = buf_[(stride_ * i) % size_];
std::ignore = c; // silences compiler warning
}
}
/* TODO: MemStreamWorker is currently broken as clang lacks the intrinsics needed */
MemStreamWorker *MemStreamWorker::Create(std::size_t size)
{
void *addr;
int prot, flags;
prot = PROT_READ | PROT_WRITE;
flags = MAP_PRIVATE | MAP_ANONYMOUS;
// | MAP_POPULATE | MAP_HUGETLB |
// (PGSHIFT_2MB << MAP_HUGE_SHIFT);
addr = mmap(NULL, size, prot, flags, -1, 0);
if (addr == MAP_FAILED)
return nullptr;
memset(addr, 0xAB, size);
return new MemStreamWorker(static_cast<char *>(addr), size);
}
MemStreamWorker::~MemStreamWorker()
{
munmap((void *)buf_, (size_));
}
void MemStreamWorker::Work(uint64_t n)
{
if (n > size_)
n = size_;
for (uint64_t i = 0; i < n; ++i)
{
volatile char c = buf_[i];
std::ignore = c; // silences compiler warning
}
}
RandomMemtouchWorker *RandomMemtouchWorker::Create(std::size_t size,
unsigned int seed)
{
char *buf = new char[size]();
std::vector<unsigned int> v(size);
std::iota(std::begin(v), std::end(v), 0);
std::mt19937 g(seed);
std::shuffle(v.begin(), v.end(), g);
return new RandomMemtouchWorker(buf, std::move(v));
}
void RandomMemtouchWorker::Work(uint64_t n)
{
for (uint64_t i = 0; i < n; ++i)
buf_[schedule_[i % schedule_.size()]]++;
}
CacheAntagonistWorker *CacheAntagonistWorker::Create(std::size_t size)
{
char *buf = new char[size]();
return new CacheAntagonistWorker(buf, size);
}
void CacheAntagonistWorker::Work(uint64_t n)
{
for (uint64_t i = 0; i < n; ++i)
memcpy_ermsb(&buf_[0], &buf_[size_ / 2], size_ / 2);
}
MemBWAntagonistWorker *MemBWAntagonistWorker::Create(std::size_t size,
int nop_period,
int nop_num)
{
// non-temporal store won't bypass cache when accessing the remote memory.
auto numa_id = mx::system::topology::node_id(mx::system::topology::core_id());
char *buf = reinterpret_cast<char *>(mx::memory::GlobalHeap::allocate(numa_id, size));
// numa_alloc_* will allocate memory in pages, therefore it must be cacheline
// aligned.
if (reinterpret_cast<uint64_t>(buf) % CACHELINE_SIZE != 0)
{
// Should never be executed.
Genode::error("The allocated memory should be cacheline size aligned.");
return nullptr;
}
// Flush the cache explicitly. Non-temporal store will still write into cache
// if the corresponding data is already at cache.
for (std::size_t i = 0; i < size; i += CACHELINE_SIZE)
{
clflush(reinterpret_cast<volatile void *>(buf + i));
}
return new MemBWAntagonistWorker(buf, size, nop_period, nop_num);
}
void MemBWAntagonistWorker::Work(uint64_t n)
{
int cnt = 0;
for (uint64_t k = 0; k < n; k++)
{
for (std::size_t i = 0; i < size_; i += CACHELINE_SIZE)
{
nt_cacheline_store(buf_ + i, 0);
if (cnt++ == nop_period_)
{
cnt = 0;
for (int j = 0; j < nop_num_; j++)
{
asm("");
}
}
}
}
}
DynamicCacheAntagonistWorker *DynamicCacheAntagonistWorker::Create(
std::size_t size, int period, int nop_num)
{
char *buf = new char[size]();
return new DynamicCacheAntagonistWorker(buf, size, period, nop_num);
}
void DynamicCacheAntagonistWorker::Work(uint64_t n)
{
double *ptr = reinterpret_cast<double *>(buf_);
size_t offset = size_ / 2 / sizeof(double);
for (uint64_t i = 0; i < n; ++i)
{
for (size_t j = 0; j < offset; j++)
{
ptr[j + offset] = ptr[j];
if (cnt_++ == period_)
{
//synth_barrier_wait();
cnt_ = 0;
for (int k = 0; k < nop_num_; k++)
{
asm("");
}
}
}
}
}
SyntheticWorker *SyntheticWorkerFactory(std::string s)
{
std::vector<std::string> tokens = split(s, ':');
// the first token is the type of worker, must be specified
if (tokens.size() < 1)
return nullptr;
if (tokens[0] == "sqrt")
{
if (tokens.size() != 1)
return nullptr;
return new SqrtWorker();
}
else if (tokens[0] == "asmsqrt")
{
if (tokens.size() != 1)
return nullptr;
return new AsmSqrtWorker();
}
else if (tokens[0] == "stridedmem")
{
if (tokens.size() != 3)
return nullptr;
unsigned long size = std::stoul(tokens[1], nullptr, 0);
unsigned long stride = std::stoul(tokens[2], nullptr, 0);
return StridedMemtouchWorker::Create(size, stride);
}
else if (tokens[0] == "randmem")
{
if (tokens.size() != 3)
return nullptr;
unsigned long size = std::stoul(tokens[1], nullptr, 0);
unsigned long seed = std::stoul(tokens[2], nullptr, 0);
if (seed > std::numeric_limits<unsigned int>::max())
return nullptr;
return RandomMemtouchWorker::Create(size, seed);
}
else if (tokens[0] == "memstream")
{
if (tokens.size() != 2)
return nullptr;
unsigned long size = std::stoul(tokens[1], nullptr, 0);
return MemStreamWorker::Create(size);
}
else if (tokens[0] == "cacheantagonist")
{
if (tokens.size() != 2)
return nullptr;
unsigned long size = std::stoul(tokens[1], nullptr, 0);
return CacheAntagonistWorker::Create(size);
}
else if (tokens[0] == "membwantagonist")
{
if (tokens.size() != 4)
return nullptr;
unsigned long size = std::stoul(tokens[1], nullptr, 0);
unsigned long nop_period = std::stoul(tokens[2], nullptr, 0);
unsigned long nop_num = std::stoul(tokens[3], nullptr, 0);
return MemBWAntagonistWorker::Create(size, nop_period, nop_num);
}
else if (tokens[0] == "dynamiccacheantagonist")
{
if (tokens.size() != 4)
return nullptr;
unsigned long size = std::stoul(tokens[1], nullptr, 0);
unsigned long period = std::stoul(tokens[2], nullptr, 0);
unsigned long long nop_num = std::stoul(tokens[3], nullptr, 0);
return DynamicCacheAntagonistWorker::Create(size, period, nop_num);
}
// invalid type of worker
return nullptr;
}

View File

@@ -0,0 +1,166 @@
// synthetic_worker.h - support for generation of synthetic work
#pragma once
//#include <emmintrin.h>
#include <cstddef>
#include <cstdint>
#include <memory>
#include <string>
#include <vector>
#define CACHELINE_SIZE (64)
class SyntheticWorker
{
public:
virtual ~SyntheticWorker() {}
// Perform n iterations of fake work.
virtual void Work(uint64_t n) = 0;
};
class SqrtWorker : public SyntheticWorker
{
public:
SqrtWorker() {}
~SqrtWorker() {}
// Performs n iterations of sqrt().
void Work(uint64_t n);
};
class AsmSqrtWorker : public SyntheticWorker
{
public:
AsmSqrtWorker() {}
~AsmSqrtWorker() {}
// Performs n iterations of sqrt().
void Work(uint64_t n);
};
class StridedMemtouchWorker : public SyntheticWorker
{
public:
~StridedMemtouchWorker() { delete buf_; }
// Creates a strided memory touching worker.
static StridedMemtouchWorker *Create(std::size_t size, size_t stride);
// Performs n strided memory touches.
void Work(uint64_t n);
private:
StridedMemtouchWorker(char *buf, std::size_t size, size_t stride)
: buf_(buf), size_(size), stride_(stride) {}
volatile char *buf_;
std::size_t size_;
std::size_t stride_;
};
class MemStreamWorker : public SyntheticWorker
{
public:
~MemStreamWorker();
// Creates a memory streaming worker.
static MemStreamWorker *Create(std::size_t size);
// Performs n memory reads.
void Work(uint64_t n);
private:
MemStreamWorker(char *buf, std::size_t size) : buf_(buf), size_(size) {}
volatile char *buf_;
std::size_t size_;
};
class RandomMemtouchWorker : public SyntheticWorker
{
public:
~RandomMemtouchWorker() { delete buf_; }
// Creates a random memory touching worker.
static RandomMemtouchWorker *Create(std::size_t size, unsigned int seed);
// Performs n random memory touches.
void Work(uint64_t n);
private:
RandomMemtouchWorker(char *buf, std::vector<unsigned int> schedule)
: buf_(buf), schedule_(std::move(schedule)) {}
volatile char *buf_;
std::vector<unsigned int> schedule_;
};
class CacheAntagonistWorker : public SyntheticWorker
{
public:
~CacheAntagonistWorker() { delete buf_; }
// Creates a cache antagonist worker.
static CacheAntagonistWorker *Create(std::size_t size);
// Perform n cache accesses.
void Work(uint64_t n);
private:
CacheAntagonistWorker(char *buf, std::size_t size) : buf_(buf), size_(size) {}
char *buf_;
std::size_t size_;
};
class MemBWAntagonistWorker : public SyntheticWorker
{
public:
~MemBWAntagonistWorker() { free(buf_); }
// Creates a memory bandwidth antagonist worker. It allocates an array whose
// size is indicated by the parameter.
static MemBWAntagonistWorker *Create(std::size_t size, int nop_period,
int nop_num);
// Perform n times array stores.
void Work(uint64_t n);
private:
MemBWAntagonistWorker(char *buf, std::size_t size, int nop_period,
int nop_num)
: buf_(buf), size_(size), nop_period_(nop_period), nop_num_(nop_num) {}
char *buf_;
std::size_t size_;
int nop_period_;
int nop_num_;
};
class DynamicCacheAntagonistWorker : public SyntheticWorker
{
public:
~DynamicCacheAntagonistWorker() { delete buf_; }
// Creates a cache antagonist worker.
static DynamicCacheAntagonistWorker *Create(std::size_t size, int period,
int nop_num);
// Perform n cache accesses.
void Work(uint64_t n);
private:
DynamicCacheAntagonistWorker(char *buf, std::size_t size, int period,
int nop_num)
: buf_(buf), size_(size), period_(period), nop_num_(nop_num) {}
char *buf_;
std::size_t size_;
int period_;
int nop_num_;
int cnt_;
};
// Parses a string to generate one of the above fake workers.
SyntheticWorker *SyntheticWorkerFactory(std::string s);

View File

@@ -0,0 +1,16 @@
MXINC_DIR=$(REP_DIR)/src/app/antagonist
GENODE_GCC_TOOLCHAIN_DIR ?= /usr/local/genode/tool/21.05
TARGET = stress_genode
# soure file for benchmark framework
SRC_CC = stress_linux.cc synthetic_worker.cc util.cc
LIBS += base libc stdcxx mxtasking
EXT_OBJECTS += /usr/local/genode/tool/lib/clang/14.0.5/lib/linux/libclang_rt.builtins-x86_64.a /usr/local/genode/tool/lib/libatomic.a
CUSTOM_CC = /usr/local/genode/tool/bin/clang
CUSTOM_CXX = /usr/local/genode/tool/bin/clang++
CC_OPT += --target=x86_64-genode --sysroot=/does/not/exist --gcc-toolchain=$(GENODE_GCC_TOOLCHAIN_DIR) -Wno-error -O2 -g -DNDEBUG -I$(MXINC_DIR) -std=c++20 #-D_GLIBCXX_ATOMIC_BUILTINS_8 -D__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8
#CC_OPT += -femulated-tls -DCLANG_CXX11_ATOMICS
CC_CXX_WARN_STRICT =
CUSTOM_CXX_LIB := $(CROSS_DEV_PREFIX)g++
#CXX_LD += $(CROSS_DEV_PREFIX)g++

View File

@@ -0,0 +1,50 @@
/*
* types.h - primitive type definitions
*/
#pragma once
#include <stdbool.h>
#include "cpu.h"
typedef unsigned char uint8_t;
typedef unsigned short uint16_t;
typedef unsigned int uint32_t;
typedef signed char int8_t;
typedef signed short int16_t;
typedef signed int int32_t;
#ifndef __WORD_SIZE
#error __WORD_SIZE is undefined
#endif
#if __WORD_SIZE == __64BIT_WORDS
typedef unsigned long uint64_t;
typedef signed long int64_t;
#else /* __WORDSIZE == __64BIT_WORDS */
typedef unsigned long long uint64_t;
typedef signed long long int64_t;
#endif /* __WORDSIZE == __64BIT_WORDS */
typedef unsigned long uintptr_t;
typedef long intptr_t;
typedef long off_t;
typedef unsigned long size_t;
typedef long ssize_t;
typedef struct {
volatile int locked;
} spinlock_t;
typedef struct {
volatile int cnt;
} atomic_t;
typedef struct {
volatile long cnt;
} atomic64_t;

View File

@@ -0,0 +1,14 @@
#include "util.h"
std::vector<std::string> split(const std::string &text, char sep)
{
std::vector<std::string> tokens;
std::string::size_type start = 0, end = 0;
while ((end = text.find(sep, start)) != std::string::npos)
{
tokens.push_back(text.substr(start, end - start));
start = end + 1;
}
tokens.push_back(text.substr(start));
return tokens;
}

View File

@@ -0,0 +1,6 @@
#pragma once
#include <string>
#include <tuple>
#include <vector>
std::vector<std::string> split(const std::string &text, char sep);

View File

@@ -0,0 +1,167 @@
// synthetic_worker.h - support for generation of synthetic work
#pragma once
#include <emmintrin.h>
#include <numa.h>
#include <cstddef>
#include <cstdint>
#include <memory>
#include <string>
#include <vector>
#define CACHELINE_SIZE (64)
class SyntheticWorker
{
public:
virtual ~SyntheticWorker() {}
// Perform n iterations of fake work.
virtual void Work(uint64_t n) = 0;
};
class SqrtWorker : public SyntheticWorker
{
public:
SqrtWorker() {}
~SqrtWorker() {}
// Performs n iterations of sqrt().
void Work(uint64_t n);
};
class AsmSqrtWorker : public SyntheticWorker
{
public:
AsmSqrtWorker() {}
~AsmSqrtWorker() {}
// Performs n iterations of sqrt().
void Work(uint64_t n);
};
class StridedMemtouchWorker : public SyntheticWorker
{
public:
~StridedMemtouchWorker() { delete buf_; }
// Creates a strided memory touching worker.
static StridedMemtouchWorker *Create(std::size_t size, size_t stride);
// Performs n strided memory touches.
void Work(uint64_t n);
private:
StridedMemtouchWorker(char *buf, std::size_t size, size_t stride)
: buf_(buf), size_(size), stride_(stride) {}
volatile char *buf_;
std::size_t size_;
std::size_t stride_;
};
class MemStreamWorker : public SyntheticWorker
{
public:
~MemStreamWorker();
// Creates a memory streaming worker.
static MemStreamWorker *Create(std::size_t size);
// Performs n memory reads.
void Work(uint64_t n);
private:
MemStreamWorker(char *buf, std::size_t size) : buf_(buf), size_(size) {}
volatile char *buf_;
std::size_t size_;
};
class RandomMemtouchWorker : public SyntheticWorker
{
public:
~RandomMemtouchWorker() { delete buf_; }
// Creates a random memory touching worker.
static RandomMemtouchWorker *Create(std::size_t size, unsigned int seed);
// Performs n random memory touches.
void Work(uint64_t n);
private:
RandomMemtouchWorker(char *buf, std::vector<unsigned int> schedule)
: buf_(buf), schedule_(std::move(schedule)) {}
volatile char *buf_;
std::vector<unsigned int> schedule_;
};
class CacheAntagonistWorker : public SyntheticWorker
{
public:
~CacheAntagonistWorker() { delete buf_; }
// Creates a cache antagonist worker.
static CacheAntagonistWorker *Create(std::size_t size);
// Perform n cache accesses.
void Work(uint64_t n);
private:
CacheAntagonistWorker(char *buf, std::size_t size) : buf_(buf), size_(size) {}
char *buf_;
std::size_t size_;
};
class MemBWAntagonistWorker : public SyntheticWorker
{
public:
~MemBWAntagonistWorker() { numa_free(buf_, size_); }
// Creates a memory bandwidth antagonist worker. It allocates an array whose
// size is indicated by the parameter.
static MemBWAntagonistWorker *Create(std::size_t size, int nop_period,
int nop_num);
// Perform n times array stores.
void Work(uint64_t n);
private:
MemBWAntagonistWorker(char *buf, std::size_t size, int nop_period,
int nop_num)
: buf_(buf), size_(size), nop_period_(nop_period), nop_num_(nop_num) {}
char *buf_;
std::size_t size_;
int nop_period_;
int nop_num_;
};
class DynamicCacheAntagonistWorker : public SyntheticWorker
{
public:
~DynamicCacheAntagonistWorker() { delete buf_; }
// Creates a cache antagonist worker.
static DynamicCacheAntagonistWorker *Create(std::size_t size, int period,
int nop_num);
// Perform n cache accesses.
void Work(uint64_t n);
private:
DynamicCacheAntagonistWorker(char *buf, std::size_t size, int period,
int nop_num)
: buf_(buf), size_(size), period_(period), nop_num_(nop_num) {}
char *buf_;
std::size_t size_;
int period_;
int nop_num_;
int cnt_;
};
// Parses a string to generate one of the above fake workers.
SyntheticWorker *SyntheticWorkerFactory(std::string s);