Merge branch 'ealan' into tasking-profiler

This commit is contained in:
Marcel Lütke Dreimann
2023-06-01 16:37:09 +02:00
48 changed files with 2358 additions and 95 deletions

24
README.md Normal file
View File

@@ -0,0 +1,24 @@
# EalánOS — An Operating System for Heterogeneous Many-core Systems
EalánOS is a research operating system, based on the [Genode OS Framework](https://genode.org/), that explores new architectural designs and resource management strategies for many-core systems with heterogeneous computing and memory resources. It is a reference implementation of the [MxKernel](https://mxkernel.org/) architecture.
## MxKernel Architecture
The MxKernel is a new operating system architecture inspired by many-core operating systems, such as [FOS](https://dl.acm.org/doi/abs/10.1145/1531793.1531805) and [Tesselation](https://www.usenix.org/event/hotpar09/tech/full_papers/liu/liu_html/), as well as hypervisors, exokernels and unikernels.
Novel approaches of the MxKernel include the use of tasks, short-lived closed units of work, instead of threads as control-flow abstraction, and the concept of elastic cells as process abstraction. The architecture has first been described in the paper [MxKernel: Rethinking Operating System Architecture for Many-core Hardware](https://sites.google.com/site/sfma2019eurosys/Program/sfma-mxkernel.pdf?attredirects=0) presented at the [9th Workshop on Systems for Multi-core and Heterogeneous Architectures](https://sites.google.com/site/sfma2019eurosys/).
## Task-based programming
EalánOS promotes task-parallel programming by including the [MxTasking](https://github.com/jmuehlig/mxtasking.git) task-parallel runtime library. MxTasking improves on the common task-parallel programming paradigm by allowing tasks to be annotated with hints about the tasks behavior, such as memory accesses. These annotations are used by the runtime environment to implement advanced features, like automatic prefetching of data and automatic synchronization of concurrent memory accesses.
## Documentation
Because EalánOS is based on Genode, the primary documentation, for now, can be found in the book [Genode Foundations](https://genode.org/documentation/genode-foundations-22-05.pdf).
## Features added to Genode
EalánOS extends the Genode OS framework by functionality needed and helpful for many-core systems with non-uniform memory access (NUMA), such as
- A topology service that allows to query NUMA information from within a Genode component.
- A port of [MxTasking](https://github.com/jmuehlig/mxtasking.git), a task-based framework designed to aid in developing parallel applications.
- (WiP) A extension of Genode's RAM service that enables applications to allocate memory from a specific NUMA region, similar to libnuma's `numa_alloc_on_node`, and thus improve NUMA-locality of internal data objects.
- (WiP) An interface for using Hardware Performance Monitoring Counters inside Genode components. Currently, performance counters are only implemented for AMD's Zen1 microarchitecture.
### Acknowledgement
The work on EalánOS and the MxKernel architecture is supported by the German Research Foundation (DFG) as part of the priority program 2037 "[Scalable Data Management on Future Hardware](https://dfg-spp2037.de/)" under Grant numbers SP968/9-1 and SP968/9-2.
The MxTasking framework is developed as part of the same DFG project at the [DBIS group at TU Dortmund Universitiy](http://dbis.cs.tu-dortmund.de/cms/de/home/index.html) and funded under Grant numbers TE1117/2-1.

View File

@@ -3,7 +3,8 @@
* \author Norman Feske
* \author Sebastian Sumpf
* \author Alexander Boettcher
* \date 2009-12-27
* \author Michael Müller
* \date 2022-12-13
*/
/*
@@ -133,11 +134,19 @@ namespace Nova {
bool has_feature_svm() const { return feature_flags & (1 << 2); }
struct Cpu_desc {
enum Vendor
{
UNKNOWN,
INTEL,
AMD
};
uint8_t flags;
uint8_t thread;
uint8_t core;
uint8_t package;
uint8_t acpi_id;
uint8_t vendor;
uint8_t family;
uint8_t model;
uint8_t stepping:4;
@@ -255,6 +264,19 @@ namespace Nova {
SC_EC_TIME = 3,
};
/**
* Hpc operations
*
*/
enum Hpc_op
{
HPC_SETUP = 6U,
HPC_START = 7U,
HPC_STOP = 8U,
HPC_RESET = 9U,
HPC_READ = 10U,
};
/**
* Pd operations
*/

View File

@@ -253,6 +253,36 @@ namespace Nova {
return util_time(NOVA_EC_CTRL, ec, Ec_op::EC_TIME, time);
}
ALWAYS_INLINE
inline uint8_t hpc_ctrl(Hpc_op op, mword_t sel, mword_t type, mword_t &p1, mword_t &p2, mword_t &p3)
{
uint8_t res = syscall_6(NOVA_EC_CTRL, op, sel, type, p1, p2, p3);
return res;
}
ALWAYS_INLINE
inline uint8_t hpc_read(mword_t sel, mword_t type, mword_t &value)
{
return syscall_5(NOVA_EC_CTRL, HPC_READ, sel, type, value);
}
ALWAYS_INLINE
inline uint8_t hpc_start(mword_t sel, mword_t type)
{
return syscall_1(NOVA_EC_CTRL, HPC_START, sel, type);
}
ALWAYS_INLINE
inline uint8_t hpc_stop(mword_t sel, mword_t type)
{
return syscall_1(NOVA_EC_CTRL, HPC_STOP, sel, type);
}
ALWAYS_INLINE
inline uint8_t hpc_reset(mword_t sel, mword_t type, mword_t val)
{
return syscall_2(NOVA_EC_CTRL, HPC_RESET, sel, type, val);
}
ALWAYS_INLINE
inline uint8_t create_sc(mword_t sc, mword_t pd, mword_t ec, Qpd qpd)

View File

@@ -14,3 +14,4 @@ SRC_CC += stack_area_addr.cc
SRC_CC += cap_map.cc
SRC_CC += capability.cc
SRC_CC += signal_transmitter.cc
SRC_CC += perf.cc

View File

@@ -1 +1 @@
d850a1b6412ce630abedf7b9aa623b5caa994235
52fcb4b19aa032eaba5484a69c3c4c491c2a6915

View File

@@ -4,7 +4,7 @@ DOWNLOADS := nova.git
# feature/numa branch
URL(nova) := https://github.com/mmueller41/NOVA.git
REV(nova) := 6479677bd61db47bcdcb4bd796566f83b9f655ef
REV(nova) := 4707840843206d63f72ba9238756355d16b52be3
DIR(nova) := src/kernel/nova
PATCHES := $(sort $(wildcard $(REP_DIR)/patches/*.patch))

View File

@@ -20,6 +20,7 @@
#include <core_mem_alloc.h>
#include <address_space.h>
#include <base/allocator.h>
#include <nova/syscall-generic.h>
namespace Genode {
@@ -51,9 +52,13 @@ namespace Genode {
/* map of virtual cpu ids in Genode to kernel cpu ids */
uint8_t map_cpu_ids[MAX_SUPPORTED_CPUS];
/* map of virtual cpu ids in Genode to kernel NUMA ids */
uint8_t cpu_numa_map[MAX_SUPPORTED_CPUS];
/* map of kernel NUMA region to Genode memory ranges */
Genode::Range_allocator::Range numa_mem_ranges[MAX_SUPPORTED_CPUS]; // TODO: Add new macro for max of numa regions
addr_t _map_pages(addr_t phys_page, addr_t pages,
bool guard_page = false);
@@ -164,6 +169,17 @@ namespace Genode {
}
}
}
/**
* @brief Return NUMA-interal vendor code for CPU
*
*/
Nova::Hip::Cpu_desc::Vendor cpu_vendor() {
extern addr_t __initial_sp;
Nova::Hip const &hip = *(Nova::Hip *)__initial_sp;
return static_cast<Nova::Hip::Cpu_desc::Vendor>(hip.cpu_desc_of_cpu(0)->vendor);
}
};
}

View File

@@ -56,30 +56,35 @@ static inline void * alloc_region(Dataspace_component &ds, const size_t size)
void Ram_dataspace_factory::_clear_ds(Dataspace_component &ds)
{
size_t const page_rounded_size = align_addr(ds.size(), get_page_size_log2());
//size_t memset_count = page_rounded_size / 4;
//addr_t memset_ptr = ds.core_local_addr();
size_t memset_count = page_rounded_size / 32;
addr_t memset_ptr = ds.core_local_addr();
/*
if ((memset_count * 4 == page_rounded_size) && !(memset_ptr & 0x3))
asm volatile ("rep stosl" : "+D" (memset_ptr), "+c" (memset_count)
if ((memset_count * 32 == page_rounded_size) && !(memset_ptr & 0x3))
{
asm volatile ("rep stosq" : "+D" (memset_ptr), "+c" (memset_count)
: "a" (0) : "memory");
else
} else
memset(reinterpret_cast<void *>(memset_ptr), 0, page_rounded_size);
*/
}
void Ram_dataspace_factory::_unmap_ds_from_core(Dataspace_component &ds)
{
size_t const page_rounded_size = align_addr(ds.size(), get_page_size_log2());
/* we don't keep any core-local mapping */
unmap_local(*reinterpret_cast<Nova::Utcb *>(Thread::myself()->utcb()),
ds.core_local_addr(),
page_rounded_size >> get_page_size_log2());
ds.core_local_addr(),
page_rounded_size >> get_page_size_log2());
platform().region_alloc().free((void*)ds.core_local_addr(),
page_rounded_size);
platform().region_alloc().free((void *)ds.core_local_addr(),
page_rounded_size);
ds.assign_core_local_addr(nullptr);
}
void Ram_dataspace_factory::_export_ram_ds(Dataspace_component &ds) {
size_t page_rounded_size = align_addr(ds.size(), get_page_size_log2());

View File

@@ -36,7 +36,7 @@ CC_OPT += -mpreferred-stack-boundary=2 -mregparm=3
else
ifeq ($(filter-out $(SPECS),64bit),)
override CC_MARCH = -m64
CC_WARN += -Wframe-larger-than=256
CC_WARN += -Wframe-larger-than=1024
CC_OPT += -mpreferred-stack-boundary=4 -mcmodel=kernel -mno-red-zone
else
$(error Unsupported environment)

View File

@@ -0,0 +1,86 @@
/*
* \brief Performance Counter infrastructure, NOVA-specific implemantation
* \author Michael Müller
* \date 2022-12-15
*/
#include <base/trace/perf.h>
#include <nova/syscall-generic.h>
#include <nova/syscalls.h>
#include <base/log.h>
unsigned long Genode::Trace::Performance_counter::private_freemask { 0xffff };
unsigned long Genode::Trace::Performance_counter::shared_freemask { 0xffff0000 };
void Genode::Trace::Performance_counter::_init_masks()
{
Nova::Hip::Cpu_desc::Vendor vendor = Nova::Hip::Cpu_desc::AMD;
if (vendor == Nova::Hip::Cpu_desc::AMD)
{
private_freemask = 0x3f; // 6 core performance counters
shared_freemask = 0x1f0000; // 5 L3 complex performance counters
}
else if (vendor == Nova::Hip::Cpu_desc::INTEL)
{
private_freemask = 0x7fff;
shared_freemask = 0x7fff0000; // 15 CBO performance counters
}
}
void Genode::Trace::Performance_counter::setup(unsigned counter, uint64_t event, uint64_t mask, uint64_t flags)
{
Nova::mword_t evt = event;
Nova::mword_t msk = mask;
Nova::mword_t flg = flags;
Nova::uint8_t rc;
Nova::mword_t type = (counter >>4);
Nova::mword_t sel = type == Performance_counter::CORE ? counter : counter & 0xf;
if ((rc = (Nova::hpc_ctrl(Nova::HPC_SETUP, sel, type, evt, msk, flg))) != Nova::NOVA_OK)
throw Genode::Trace::Pfc_access_error(rc);
}
void Genode::Trace::Performance_counter::start(unsigned counter)
{
Nova::uint8_t rc;
Nova::mword_t type = (counter >> 4);
Nova::mword_t sel = type == Performance_counter::CORE ? counter : counter >>4;
if ((rc = Nova::hpc_start(sel, type)) != Nova::NOVA_OK)
throw Genode::Trace::Pfc_access_error(rc);
}
void Genode::Trace::Performance_counter::stop(unsigned counter)
{
Nova::uint8_t rc;
Nova::mword_t type = (counter >>4);
Nova::mword_t sel = type == Performance_counter::CORE ? counter : counter & 0xf;
if ((rc = Nova::hpc_stop(sel, type)) != Nova::NOVA_OK)
throw Genode::Trace::Pfc_access_error(rc);
}
void Genode::Trace::Performance_counter::reset(unsigned counter, unsigned val)
{
Nova::uint8_t rc;
Nova::mword_t type = (counter >>4);
Nova::mword_t sel = type == Performance_counter::CORE ? counter : counter & 0xf;
if ((rc = Nova::hpc_reset(sel, type, val)) != Nova::NOVA_OK)
throw Genode::Trace::Pfc_access_error(rc);
}
Genode::uint64_t Genode::Trace::Performance_counter::read(unsigned counter)
{
Nova::uint8_t rc;
Nova::mword_t value = 0;
Nova::mword_t type = (counter >>4);
Nova::mword_t sel = type == Performance_counter::CORE ? counter : counter & 0xf;
if ((rc = Nova::hpc_read(sel, type, value)) != Nova::NOVA_OK)
throw Genode::Trace::Pfc_access_error(rc);
return static_cast<Genode::uint64_t>(value);
}

View File

@@ -14,6 +14,7 @@
#ifndef _INCLUDE__BASE__ATTACHED_RAM_DATASPACE_H_
#define _INCLUDE__BASE__ATTACHED_RAM_DATASPACE_H_
#include <util/string.h>
#include <util/touch.h>
#include <base/ram_allocator.h>
#include <base/env.h>
@@ -105,6 +106,7 @@ class Genode::Attached_ram_dataspace
_size(size), _ram(&ram), _rm(&rm), _cache(cache)
{
_alloc_and_attach();
memset(_local_addr, 0, _size);
}
/**

View File

@@ -93,8 +93,15 @@ struct Genode::Local_connection_base : Noncopyable
if (_session_state->phase == Session_state::INSUFFICIENT_RAM_QUOTA
|| _session_state->phase == Session_state::INSUFFICIENT_CAP_QUOTA)
warning("giving up to increase session quota for ", service.name(), " session "
{
warning("[", label, "] giving up to increase session quota for ", service.name(), " session "
"after ", (int)NUM_ATTEMPTS, " attempts");
if (_session_state->phase == Session_state::INSUFFICIENT_RAM_QUOTA)
warning("Insufficient RAM quota: ", resources.ram_quota.value);
if (_session_state->phase == Session_state::INSUFFICIENT_CAP_QUOTA)
warning("Insufficient CAP quota ", resources.cap_quota.value);
}
}
void close()

View File

@@ -0,0 +1,93 @@
/*
* \brief Performance Counter infrastructure
* \author Michael Müller
* \date 2022-12-15
*/
#pragma once
#include <base/stdint.h>
namespace Genode { namespace Trace {
class Pfc_no_avail {
};
class Performance_counter
{
private:
static unsigned long private_freemask;
static unsigned long shared_freemask;
static unsigned _alloc(unsigned long *free_mask)
{
unsigned long current_mask, new_mask;
unsigned bit;
do
{
current_mask = *free_mask;
bit = __builtin_ffsl(current_mask);
new_mask = current_mask & ~(1 << (bit - 1));
} while (!__atomic_compare_exchange(free_mask, &current_mask, &new_mask, true, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED));
if (!bit) // Allocation failed
throw Pfc_no_avail();
return bit - 1; // number of the allocated counter
}
static void _init_masks();
public:
typedef unsigned int Counter;
enum Type
{
CORE = 0,
CACHE = 1
};
static unsigned acquire(Type type) {
return (type == Type::CORE) ? alloc_core() : alloc_cbo();
}
static unsigned alloc_cbo() {
if (shared_freemask == 0xffff0000)
_init_masks();
return _alloc(&shared_freemask);
}
static unsigned alloc_core() {
if (private_freemask == 0xffff)
_init_masks();
return _alloc(&private_freemask);
}
static void release(unsigned counter) {
bool core = static_cast<bool>(counter >> 4);
if (core)
private_freemask |= (1 << counter);
else
shared_freemask |= (1 << counter);
}
static void setup(unsigned counter, Genode::uint64_t event, Genode::uint64_t mask, Genode::uint64_t flags);
static void start(unsigned counter);
static void stop(unsigned counter);
static void reset(unsigned counter, unsigned val=0);
static uint64_t read(unsigned counter);
};
class Pfc_access_error {
private:
Genode::uint8_t _rc;
public:
Pfc_access_error(uint8_t rc) : _rc(rc) {}
Genode::uint8_t error_code() { return _rc; }
};
}
}

View File

@@ -27,7 +27,7 @@ struct Genode::Topo_connection : Connection<Topo_session>, Topo_session_client
{
enum
{
RAM_QUOTA = 262144
RAM_QUOTA = 2097152UL
};
Topo_connection(Env &env, const char *label = "", Affinity const &affinity = Affinity())

View File

@@ -54,6 +54,9 @@ _ZN5Timer10ConnectionC1ERN6Genode3EnvEPKc T
_ZN5Timer10ConnectionC1ERN6Genode3EnvERNS1_10EntrypointEPKc T
_ZN5Timer10ConnectionC2ERN6Genode3EnvEPKc T
_ZN5Timer10ConnectionC2ERN6Genode3EnvERNS1_10EntrypointEPKc T
_ZN6Genode5Trace19Performance_counter15shared_freemaskE D 8
_ZN6Genode5Trace19Performance_counter16private_freemaskE D 8
_ZN6Genode5Trace19Performance_counter11_init_masksEv T
_ZN6Genode10Entrypoint16_dispatch_signalERNS_6SignalE T
_ZN6Genode10Entrypoint16schedule_suspendEPFvvES2_ T
_ZN6Genode10Entrypoint22Signal_proxy_component6signalEv T
@@ -274,6 +277,11 @@ _ZN6Genode5Trace6Logger17_evaluate_controlEv T
_ZN6Genode5Trace6Logger3logEPKcm T
_ZN6Genode5Trace6LoggerC1Ev T
_ZN6Genode5Trace6LoggerC2Ev T
_ZN6Genode5Trace19Performance_counter4readEj T
_ZN6Genode5Trace19Performance_counter4stopEj T
_ZN6Genode5Trace19Performance_counter5resetEjj T
_ZN6Genode5Trace19Performance_counter5setupEjyyy T
_ZN6Genode5Trace19Performance_counter5startEj T
_ZN6Genode5Trace18Partitioned_buffer4initEm T
_ZN6Genode5Trace18Partitioned_buffer6commitEm T
_ZN6Genode5Trace18Partitioned_buffer7reserveEm T

View File

@@ -172,6 +172,7 @@ class Genode::Cpu_thread_component : public Rpc_object<Cpu_thread>,
_address_space_region_map.add_client(_rm_client);
_platform_thread.pager(_rm_client);
_platform_thread.affinity(location);
_trace_sources.insert(&_trace_source);
}

View File

@@ -82,6 +82,11 @@ class Genode::Ram_dataspace_factory : public Ram_allocator,
*/
void _clear_ds(Dataspace_component &ds);
/**
* Remove core-local mappings of dataspace
*/
void _unmap_ds_from_core(Dataspace_component &ds);
public:
Ram_dataspace_factory(Rpc_entrypoint &ep,

View File

@@ -20,6 +20,7 @@
#include <base/affinity.h>
#include <base/heap.h>
#include <topo_session/topo_session.h>
#include <platform.h>
#include <topo_session/node.h>
namespace Genode {
@@ -32,7 +33,7 @@ class Genode::Topo_session_component : public Session_object<Topo_session>
Genode::Affinity &_affinity;
Sliced_heap _md_alloc;
Topology::Numa_region _node_affinities[64][64];
Topology::Numa_region _node_affinities[Genode::Platform::MAX_SUPPORTED_CPUS][Genode::Platform::MAX_SUPPORTED_CPUS];
unsigned _node_count;
Topology::Numa_region _nodes[64];

View File

@@ -286,7 +286,7 @@ int main()
size_t const avail_ram_quota = core_pd.avail_ram().value;
size_t const avail_cap_quota = core_pd.avail_caps().value;
size_t const preserved_ram_quota = 224*1024;
size_t const preserved_ram_quota = 224*1024+(1<<20);
size_t const preserved_cap_quota = 1000;
if (avail_ram_quota < preserved_ram_quota) {

View File

@@ -123,6 +123,7 @@ Ram_dataspace_factory::try_alloc(size_t ds_size, Cache cache)
Dataspace_component &ds = *ds_ptr;
/* create native shared memory representation of dataspace */
#ifdef ZERO_AT_ALLOC
try { _export_ram_ds(ds); }
catch (Core_virtual_memory_exhausted) {
warning("could not export RAM dataspace of size ", ds.size());
@@ -137,8 +138,8 @@ Ram_dataspace_factory::try_alloc(size_t ds_size, Cache cache)
* function must also make sure to flush all cache lines related to the
* address range used by the dataspace.
*/
_clear_ds(ds);
_unmap_ds_from_core(ds);
#endif
Dataspace_capability ds_cap = _ep.manage(&ds);
phys_alloc_guard.keep = true;
@@ -181,8 +182,25 @@ void Ram_dataspace_factory::free(Ram_dataspace_capability ds_cap)
});
/* call dataspace destructor and free memory */
if (ds)
if (ds) {
try { _export_ram_ds(*ds); }
catch (Core_virtual_memory_exhausted) {
warning("could not export RAM dataspace of size ", ds->size());
/* cleanup unneeded resources */
destroy(_ds_slab, ds);
return;
}
/*
* Fill new dataspaces with zeros. For non-cached RAM dataspaces, this
* function must also make sure to flush all cache lines related to the
* address range used by the dataspace.
*/
_clear_ds(*ds);
_unmap_ds_from_core(*ds);
destroy(_ds_slab, ds);
}
}

View File

@@ -0,0 +1 @@
INC_DIR += $(call select_from_ports,libpfm4)/include

View File

@@ -0,0 +1,204 @@
LIBPFM4_DIR := $(call select_from_ports,libpfm4)/src/lib/libpfm4
CC_OPT += -D_REENTRANT -fvisibility=hidden
SRC_CC = $(LIBPFM4_DIR)/lib/pfmlib_common.c
# build libpfm only for x86_64 for now
CONFIG_PFMLIB_ARCH_X86_64=y
CONFIG_PFMLIB_ARCH_X86=y
CONFIG_PFMLIB_SHARED?=n
CONFIG_PFMLIB_DEBUG?=y
CONFIG_PFMLIB_NOPYTHON?=y
#
# list all library support modules
#
ifeq ($(CONFIG_PFMLIB_ARCH_IA64),y)
INCARCH = $(INC_IA64)
#SRCS += pfmlib_gen_ia64.c pfmlib_itanium.c pfmlib_itanium2.c pfmlib_montecito.c
CFLAGS += -DCONFIG_PFMLIB_ARCH_IA64
endif
ifeq ($(CONFIG_PFMLIB_ARCH_X86),y)
ifeq ($(SYS),Linux)
SRCS += pfmlib_intel_x86_perf_event.c pfmlib_amd64_perf_event.c \
pfmlib_intel_netburst_perf_event.c \
pfmlib_intel_snbep_unc_perf_event.c
endif
INCARCH = $(INC_X86)
SRCS += pfmlib_amd64.c pfmlib_intel_core.c pfmlib_intel_x86.c \
pfmlib_intel_x86_arch.c pfmlib_intel_atom.c \
pfmlib_intel_nhm_unc.c pfmlib_intel_nhm.c \
pfmlib_intel_wsm.c \
pfmlib_intel_snb.c pfmlib_intel_snb_unc.c \
pfmlib_intel_ivb.c pfmlib_intel_ivb_unc.c \
pfmlib_intel_hsw.c \
pfmlib_intel_bdw.c \
pfmlib_intel_skl.c \
pfmlib_intel_icl.c \
pfmlib_intel_spr.c \
pfmlib_intel_rapl.c \
pfmlib_intel_snbep_unc.c \
pfmlib_intel_snbep_unc_cbo.c \
pfmlib_intel_snbep_unc_ha.c \
pfmlib_intel_snbep_unc_imc.c \
pfmlib_intel_snbep_unc_pcu.c \
pfmlib_intel_snbep_unc_qpi.c \
pfmlib_intel_snbep_unc_ubo.c \
pfmlib_intel_snbep_unc_r2pcie.c \
pfmlib_intel_snbep_unc_r3qpi.c \
pfmlib_intel_ivbep_unc_cbo.c \
pfmlib_intel_ivbep_unc_ha.c \
pfmlib_intel_ivbep_unc_imc.c \
pfmlib_intel_ivbep_unc_pcu.c \
pfmlib_intel_ivbep_unc_qpi.c \
pfmlib_intel_ivbep_unc_ubo.c \
pfmlib_intel_ivbep_unc_r2pcie.c \
pfmlib_intel_ivbep_unc_r3qpi.c \
pfmlib_intel_ivbep_unc_irp.c \
pfmlib_intel_hswep_unc_cbo.c \
pfmlib_intel_hswep_unc_ha.c \
pfmlib_intel_hswep_unc_imc.c \
pfmlib_intel_hswep_unc_pcu.c \
pfmlib_intel_hswep_unc_qpi.c \
pfmlib_intel_hswep_unc_ubo.c \
pfmlib_intel_hswep_unc_r2pcie.c \
pfmlib_intel_hswep_unc_r3qpi.c \
pfmlib_intel_hswep_unc_irp.c \
pfmlib_intel_hswep_unc_sbo.c \
pfmlib_intel_bdx_unc_cbo.c \
pfmlib_intel_bdx_unc_ubo.c \
pfmlib_intel_bdx_unc_sbo.c \
pfmlib_intel_bdx_unc_ha.c \
pfmlib_intel_bdx_unc_imc.c \
pfmlib_intel_bdx_unc_irp.c \
pfmlib_intel_bdx_unc_pcu.c \
pfmlib_intel_bdx_unc_qpi.c \
pfmlib_intel_bdx_unc_r2pcie.c \
pfmlib_intel_bdx_unc_r3qpi.c \
pfmlib_intel_skx_unc_cha.c \
pfmlib_intel_skx_unc_iio.c \
pfmlib_intel_skx_unc_imc.c \
pfmlib_intel_skx_unc_irp.c \
pfmlib_intel_skx_unc_m2m.c \
pfmlib_intel_skx_unc_m3upi.c \
pfmlib_intel_skx_unc_pcu.c \
pfmlib_intel_skx_unc_ubo.c \
pfmlib_intel_skx_unc_upi.c \
pfmlib_intel_knc.c \
pfmlib_intel_slm.c \
pfmlib_intel_tmt.c \
pfmlib_intel_knl.c \
pfmlib_intel_knl_unc_imc.c \
pfmlib_intel_knl_unc_edc.c \
pfmlib_intel_knl_unc_cha.c \
pfmlib_intel_knl_unc_m2pcie.c \
pfmlib_intel_glm.c \
pfmlib_intel_netburst.c \
pfmlib_amd64_k7.c pfmlib_amd64_k8.c pfmlib_amd64_fam10h.c \
pfmlib_amd64_fam11h.c pfmlib_amd64_fam12h.c \
pfmlib_amd64_fam14h.c pfmlib_amd64_fam15h.c \
pfmlib_amd64_fam17h.c pfmlib_amd64_fam16h.c \
pfmlib_amd64_fam19h.c pfmlib_amd64_rapl.c \
pfmlib_amd64_fam19h_l3.c
CFLAGS += -DCONFIG_PFMLIB_ARCH_X86
ifeq ($(CONFIG_PFMLIB_ARCH_I386),y)
SRCS += pfmlib_intel_coreduo.c pfmlib_intel_p6.c
CFLAGS += -DCONFIG_PFMLIB_ARCH_I386
endif
ifeq ($(CONFIG_PFMLIB_ARCH_X86_64),y)
CFLAGS += -DCONFIG_PFMLIB_ARCH_X86_64
endif
endif
ifeq ($(CONFIG_PFMLIB_ARCH_POWERPC),y)
ifeq ($(SYS),Linux)
SRCS += pfmlib_powerpc_perf_event.c
endif
INCARCH = $(INC_POWERPC)
SRCS += pfmlib_powerpc.c pfmlib_power4.c pfmlib_ppc970.c pfmlib_power5.c \
pfmlib_power6.c pfmlib_power7.c pfmlib_torrent.c pfmlib_power8.c \
pfmlib_power9.c pfmlib_powerpc_nest.c pfmlib_power10.c
CFLAGS += -DCONFIG_PFMLIB_ARCH_POWERPC
endif
ifeq ($(CONFIG_PFMLIB_ARCH_S390X),y)
ifeq ($(SYS),Linux)
SRCS += pfmlib_s390x_perf_event.c
endif
INCARCH = $(INC_S390X)
SRCS += pfmlib_s390x_cpumf.c
CFLAGS += -DCONFIG_PFMLIB_ARCH_S390X
endif
ifeq ($(CONFIG_PFMLIB_ARCH_SPARC),y)
ifeq ($(SYS),Linux)
SRCS += pfmlib_sparc_perf_event.c
endif
INCARCH = $(INC_SPARC)
SRCS += pfmlib_sparc.c pfmlib_sparc_ultra12.c pfmlib_sparc_ultra3.c pfmlib_sparc_ultra4.c pfmlib_sparc_niagara.c
CFLAGS += -DCONFIG_PFMLIB_ARCH_SPARC
endif
ifeq ($(CONFIG_PFMLIB_ARCH_ARM),y)
ifeq ($(SYS),Linux)
SRCS += pfmlib_arm_perf_event.c
endif
INCARCH = $(INC_ARM)
SRCS += pfmlib_arm.c pfmlib_arm_armv7_pmuv1.c pfmlib_arm_armv6.c pfmlib_arm_armv8.c pfmlib_tx2_unc_perf_event.c pfmlib_kunpeng_unc_perf_event.c
CFLAGS += -DCONFIG_PFMLIB_ARCH_ARM
endif
ifeq ($(CONFIG_PFMLIB_ARCH_ARM64),y)
ifeq ($(SYS),Linux)
SRCS += pfmlib_arm_perf_event.c
endif
INCARCH = $(INC_ARM64)
SRCS += pfmlib_arm.c pfmlib_arm_armv8.c pfmlib_tx2_unc_perf_event.c pfmlib_kunpeng_unc_perf_event.c
CFLAGS += -DCONFIG_PFMLIB_ARCH_ARM64
endif
ifeq ($(CONFIG_PFMLIB_ARCH_MIPS),y)
ifeq ($(SYS),Linux)
SRCS += pfmlib_mips_perf_event.c
endif
INCARCH = $(INC_MIPS)
SRCS += pfmlib_mips.c pfmlib_mips_74k.c
CFLAGS += -DCONFIG_PFMLIB_ARCH_MIPS
endif
ifeq ($(CONFIG_PFMLIB_CELL),y)
INCARCH = $(INC_CELL)
#SRCS += pfmlib_cell.c
CFLAGS += -DCONFIG_PFMLIB_CELL
endif
SRC_CC += $(addprefix $(LIBPFM4_DIR)/lib/,$(SRCS))
vpath %.c $(LIBPFM4_DIR)/lib
CC_OPT += $(CFLAGS)
INC_DIR += $(LIBPFM4_DIR)/include $(LIBPFM4_DIR)/lib/events
vpath %.h $(INC_DIR)
LIBS += base libm libc

View File

@@ -0,0 +1 @@
b0ec09148c2be9f4a96203a3d2de4ebed6ce2da0

View File

@@ -0,0 +1,13 @@
LICENSE := PD
DOWNLOADS := libpfm4.git
VERSION := git
URL(libpfm4) := https://github.com/wcohen/libpfm4.git
REV(libpfm4) := 8aaaf1747e96031a47ed6bd9337ff61a21f8cc64
DIR(libpfm4) := src/lib/libpfm4
DIRS += include
DIRS += include/perfmon
DIR_CONTENT(include) += src/lib/libpfm4/include/perfmon
DIR_CONTENT(include/perfmon) += src/lib/libpfm4/include/perfmon/*.h

View File

@@ -1 +1 @@
07a3844690ae8eb15832d93e29567a5a8e6e45af
03dc91ed3385b2a62dee0c4f20daf9b5cb29ba24

View File

@@ -3,7 +3,7 @@ DOWNLOADS := mxtasking.git
VERSION := git
URL(mxtasking) := https://github.com/mmueller41/mxtasking.git
REV(mxtasking) := bfc90d4dcf88b7072c76d70e897cb4072f399248
REV(mxtasking) := fcf0a2810ba69d1017d6d7d9a5d6e60ac962f9f1
DIR(mxtasking) := src/lib/mxtasking
DIRS += include/mx/memory

View File

@@ -0,0 +1 @@
libpfm4

View File

@@ -0,0 +1,17 @@
MIRROR_FROM_REP_DIR := lib/mk/libpfm4.mk lib/import/import-libpfm4.mk
content: src/lib/libpfm4 COPYING $(MIRROR_FROM_REP_DIR)
PORT_DIR := $(call port_dir,$(REP_DIR)/ports/libpfm4)
src/lib/libpfm4:
mkdir -p $@
cp -r $(PORT_DIR)/src/lib/libpfm4/* $@
rm -rf $@/.git
echo "LIBS = libpfm4" > $@/target.mk
$(MIRROR_FROM_REP_DIR):
$(mirror_from_rep_dir)
LICENSE:
echo "libpfm license, see src/lib/libpfm4/COPYING" > $@

View File

@@ -0,0 +1,3 @@
base
libm
libc

View File

@@ -79,4 +79,4 @@ void Component::construct(Genode::Env &env)
* Default stack size for libc-using components
*/
Genode::size_t Libc::Component::stack_size() __attribute__((weak));
Genode::size_t Libc::Component::stack_size() { return 32UL*1024*sizeof(long); }
Genode::size_t Libc::Component::stack_size() { return 96UL*1024*sizeof(long); }

View File

@@ -20,7 +20,9 @@ set config {
<service name="IO_MEM"/>
<service name="IO_PORT"/>
<service name="RM"/>
<service name="TOPO"/>
</parent-provides>
<affinity-space width="32" height="1"/>
<default-route>
<any-service><parent/><any-child/></any-service>
</default-route>
@@ -38,7 +40,8 @@ append_platform_drv_config
append config {
<start name="hello_mxtask">
<resource name="RAM" quantum="3G"/>
<resource name="RAM" quantum="16G"/>
<affinity xpos="0" ypos="0" width="16" height="1"/>
<config>
<vfs> <dir name="dev">
<log/>
@@ -60,6 +63,6 @@ set boot_modules {
append_platform_drv_boot_modules
build_boot_image $boot_modules
append qemu_args "-nographic -m 64"
append qemu_args "-nographic"
run_genode_until forever

View File

@@ -0,0 +1,80 @@
set build_components {
core init timer app/hpc_test
}
source ${genode_dir}/repos/base/run/platform_drv.inc
append_platform_drv_build_components
build $build_components
create_boot_directory
set config {
<config>
<parent-provides>
<service name="LOG"/>
<service name="PD"/>
<service name="CPU"/>
<service name="ROM"/>
<service name="RAM"/>
<service name="IRQ"/>
<service name="IO_MEM"/>
<service name="IO_PORT"/>
<service name="CAP"/>
<service name="RM"/>
<service name="SIGNAL"/>
<service name="TOPO"/>
</parent-provides>
<default-route>
<any-service><parent/><any-child/></any-service>
</default-route>
<default caps="200"/>
<start name="timer">
<resource name="RAM" quantum="16M"/>
<provides><service name="Timer"/></provides>
<route>
<any-service><parent/><any-child/></any-service>
</route>
</start>
}
append config {
<start name="hpc_test1">
<binary name="hpc_test"/>
<resource name="RAM" quantum="64M"/>
<config>
<vfs> <dir name="dev"> <log/> <inline name="rtc">2022-07-20 14:30</inline> </dir> </vfs>
<libc stdout="/dev/log" stderr="/dev/log" rtc="/dev/rtc"/>
</config>
<route>
<service name="Timer"><child name="timer"/></service>
<any-service><parent/><any-child/></any-service>
</route>
</start>
<start name="hpc_test2">
<binary name="hpc_test"/>
<resource name="RAM" quantum="64M"/>
<config>
<vfs> <dir name="dev"> <log/> <inline name="rtc">2022-07-20 14:30</inline> </dir> </vfs>
<libc stdout="/dev/log" stderr="/dev/log" rtc="/dev/rtc"/>
</config>
<route>
<service name="Timer"><child name="timer"/></service>
<any-service><parent/><any-child/></any-service>
</route>
</start>
</config>
}
install_config $config
set boot_modules {
core init timer vfs.lib.so ld.lib.so posix.lib.so libc.lib.so libm.lib.so stdcxx.lib.so hpc_test
}
append_platform_drv_boot_modules
build_boot_image $boot_modules
append qemu_args "-nographic "
run_genode_until forever

View File

@@ -0,0 +1,68 @@
set build_components {
core init timer app/libpfm_test
}
source ${genode_dir}/repos/base/run/platform_drv.inc
append_platform_drv_build_components
build $build_components
create_boot_directory
set config {
<config>
<parent-provides>
<service name="LOG"/>
<service name="LOG"/>
<service name="PD"/>
<service name="CPU"/>
<service name="ROM"/>
<service name="RAM"/>
<service name="IRQ"/>
<service name="IO_MEM"/>
<service name="IO_PORT"/>
<service name="CAP"/>
<service name="RM"/>
<service name="SIGNAL"/>
<service name="TOPO"/>
</parent-provides>
<default-route>
<any-service><parent/><any-child/></any-service>
</default-route>
<default caps="200"/>
<start name="timer">
<resource name="RAM" quantum="1M"/>
<provides><service name="Timer"/></provides>
<route>
<any-service><parent/><any-child/></any-service>
</route>
</start>
}
append config {
<start name="libpfm_test">
<resource name="RAM" quantum="10M"/>
<config>
<vfs> <dir name="dev"> <log/> <inline name="rtc">2022-07-20 14:30</inline> </dir> </vfs>
<libc stdout="/dev/log" stderr="/dev/log" rtc="/dev/rtc"/>
</config>
<route>
<service name="Timer"><child name="timer"/></service>
<any-service><parent/><any-child/></any-service>
</route>
</start>
</config>
}
install_config $config
set boot_modules {
core init timer vfs.lib.so ld.lib.so posix.lib.so libc.lib.so libm.lib.so stdcxx.lib.so libpfm_test
}
append_platform_drv_boot_modules
build_boot_image $boot_modules
append qemu_args "-nographic "
run_genode_until forever

116
repos/mml/run/livedemo.run Normal file
View File

@@ -0,0 +1,116 @@
set build_components {
core init timer app/blinktree
}
build $build_components
create_boot_directory
set config {
<config>
<default caps="200"/>
<default-route>
<any-service><parent/><any-child/></any-service>
</default-route>
<parent-provides>
<service name="PD"/>
<service name="CPU"/>
<service name="RAM"/>
<service name="ROM"/>
<service name="RM"/>
<service name="LOG"/>
<service name="TOPO"/>
</parent-provides>
<affinity-space width="32" height="1"/>
<start name="timer">
<provides><service name="Timer"/></provides>
<resource name="RAM" quantum="8M"/>
<route>
<any-service><parent/><any-child/></any-service>
</route>
</start>
<start name="blinktree1">
<binary name="blinktree"/>
<resource name="RAM" quantum="80G"/>
<affinity xpos="1" ypos="0" width="31" height="1"/>
<route>
<service name="Timer"><child name="timer"/></service>
<any-service><parent/><any-child/></any-service>
</route>
<config>
<vfs>
<dir name="dev">
<log/>
<inline name="rtc">2022-07-20 14:30</inline>
</dir>
<dir name="workloads">
<rom name="fill_randint_workloada"/>
<rom name="mixed_randint_workloada"/>
</dir>
</vfs>
<libc stdout="/dev/log" stderr="/dev/log" rtc="/dev/rtc"/>
</config>
</start>
<start name="blinktree2">
<binary name="blinktree"/>
<resource name="RAM" quantum="80G"/>
<affinity xpos="1" ypos="0" width="31" height="1"/>
<route>
<service name="Timer"><child name="timer"/></service>
<any-service><parent/><any-child/></any-service>
</route>
<config>
<vfs>
<dir name="dev">
<log/>
<inline name="rtc">2022-07-20 14:30</inline>
</dir>
<dir name="workloads">
<rom name="fill_randint_workloada"/>
<rom name="mixed_randint_workloada"/>
</dir>
</vfs>
<libc stdout="/dev/log" stderr="/dev/log" rtc="/dev/rtc"/>
</config>
</start>
<start name="blinktree3">
<binary name="blinktree"/>
<resource name="RAM" quantum="80G"/>
<affinity xpos="1" ypos="0" width="31" height="1"/>
<route>
<service name="Timer"><child name="timer"/></service>
<any-service><parent/><any-child/></any-service>
</route>
<config>
<vfs>
<dir name="dev">
<log/>
<inline name="rtc">2022-07-20 14:30</inline>
</dir>
<dir name="workloads">
<rom name="fill_randint_workloada"/>
<rom name="mixed_randint_workloada"/>
</dir>
</vfs>
<libc stdout="/dev/log" stderr="/dev/log" rtc="/dev/rtc"/>
</config>
</start>
</config>
}
install_config $config
set boot_modules {
core init timer vfs.lib.so ld.lib.so libm.lib.so libc.lib.so stdcxx.lib.so posix.lib.so blinktree fill_randint_workloada mixed_randint_workloada
}
build_boot_image $boot_modules
append qemu_args "-nographic"
run_genode_until forever

View File

@@ -1,8 +1,6 @@
#pragma once
#ifdef PERF_SUPPORT
#include "perf.h"
#endif
#include "phase.h"
#include <chrono>
#include <json.hpp>
@@ -51,7 +49,7 @@ template <typename P> class InterimResult
public:
InterimResult(const std::uint64_t operation_count, const P &phase, const std::uint16_t iteration,
const std::uint16_t core_count, const std::chrono::milliseconds time,
/*std::vector<PerfCounter> &counter,*/
std::vector<PerfCounter> &counter,
std::unordered_map<std::uint16_t, std::uint64_t> executed_tasks,
std::unordered_map<std::uint16_t, std::uint64_t> executed_reader_tasks,
std::unordered_map<std::uint16_t, std::uint64_t> executed_writer_tasks,
@@ -65,12 +63,10 @@ public:
_scheduled_tasks_on_core(std::move(scheduled_tasks_on_core)),
_scheduled_tasks_off_core(std::move(scheduled_tasks_off_core)), _worker_fills(std::move(worker_fills))
{
#ifdef PERF_SUPPORT
for (auto &c : counter)
{
_performance_counter.emplace_back(std::make_pair(c.name(), c.read()));
}
#endif
}
~InterimResult() = default;
@@ -181,9 +177,7 @@ public:
_current_phase = phase;
_current_iteration = iteration;
_core_set = core_set;
#ifdef PERF_SUPPORT
_perf.start();
#endif
//_start = std::chrono::steady_clock::now();
_start = Genode::Trace::timestamp();
@@ -193,9 +187,7 @@ public:
{
const auto end = Genode::Trace::timestamp();
//const auto end = std::chrono::steady_clock::now();
#ifdef PERF_SUPPORT
_perf.stop();
#endif
//const auto milliseconds = std::chrono::duration_cast<std::chrono::milliseconds>(end-_start);
const auto milliseconds = std::chrono::milliseconds((end-_start)/2000000UL);
@@ -205,7 +197,7 @@ public:
_current_iteration,
_core_set.size(),
milliseconds,
//_perf.counter(),
_perf.counter(),
statistic_map(mx::tasking::profiling::Statistic::Executed),
statistic_map(mx::tasking::profiling::Statistic::ExecutedReader),
statistic_map(mx::tasking::profiling::Statistic::ExecutedWriter),
@@ -214,16 +206,12 @@ public:
statistic_map(mx::tasking::profiling::Statistic::ScheduledOffChannel),
statistic_map(mx::tasking::profiling::Statistic::Fill)};
}
#ifdef PERF_SUPPORT
void add(PerfCounter &performance_counter) { _perf.add(performance_counter); }
#endif
private:
std::uint16_t _current_iteration{0U};
P _current_phase;
mx::util::core_set _core_set;
#ifdef PERF_SUPPORT
alignas(64) Perf _perf;
#endif
//alignas(64) std::chrono::steady_clock::time_point _start;
alignas(64) size_t _start;

View File

@@ -6,29 +6,27 @@ using namespace benchmark;
* Counter "Instructions Retired"
* Counts when the last uop of an instruction retires.
*/
[[maybe_unused]] PerfCounter Perf::INSTRUCTIONS = {"instr", PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS};
[[maybe_unused]] PerfCounter Perf::INSTRUCTIONS = {"instr", Genode::Trace::Performance_counter::Type::CORE, 0xc0, 0x0};
/**
*/
[[maybe_unused]] PerfCounter Perf::CYCLES = {"cycles", PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES};
[[maybe_unused]] PerfCounter Perf::CYCLES = {"cycles", Genode::Trace::Performance_counter::Type::CORE, 0x76, 0x0};
/**
*/
[[maybe_unused]] PerfCounter Perf::L1_MISSES = {"l1-miss", PERF_TYPE_HW_CACHE,
PERF_COUNT_HW_CACHE_L1D | (PERF_COUNT_HW_CACHE_OP_READ << 8) |
(PERF_COUNT_HW_CACHE_RESULT_MISS << 16)};
[[maybe_unused]] PerfCounter Perf::L1_MISSES = {"l1-miss", Genode::Trace::Performance_counter::Type::CORE, 0x43, 0x5b};
/**
* Counter "LLC Misses"
* Accesses to the LLC in which the data is not present(miss).
*/
[[maybe_unused]] PerfCounter Perf::LLC_MISSES = {"llc-miss", PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES};
[[maybe_unused]] PerfCounter Perf::LLC_MISSES = {"llc-miss", Genode::Trace::Performance_counter::Type::CACHE, 0x6, 0xff};
/**
* Counter "LLC Reference"
* Accesses to the LLC, in which the data is present(hit) or not present(miss)
*/
[[maybe_unused]] PerfCounter Perf::LLC_REFERENCES = {"llc-ref", PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES};
[[maybe_unused]] PerfCounter Perf::LLC_REFERENCES = {"llc-ref", Genode::Trace::Performance_counter::Type::CACHE, 0x4, 0xff};
/**
* Micro architecture "Skylake"
@@ -36,7 +34,7 @@ using namespace benchmark;
* EventSel=A3H,UMask=14H, CMask=20
* Execution stalls while memory subsystem has an outstanding load.
*/
PerfCounter Perf::STALLS_MEM_ANY = {"memory-stall", PERF_TYPE_RAW, 0x145314a3};
//PerfCounter Perf::STALLS_MEM_ANY = {"memory-stall", PERF_TYPE_RAW, 0x145314a3};
/**
* Micro architecture "Skylake"
@@ -44,7 +42,7 @@ PerfCounter Perf::STALLS_MEM_ANY = {"memory-stall", PERF_TYPE_RAW, 0x145314a3};
* EventSel=32H,UMask=01H
* Number of PREFETCHNTA instructions executed.
*/
[[maybe_unused]] PerfCounter Perf::SW_PREFETCH_ACCESS_NTA = {"sw-prefetch-nta", PERF_TYPE_RAW, 0x530132};
[[maybe_unused]] PerfCounter Perf::SW_PREFETCH_ACCESS_NTA = {"sw-prefetch-nta", Genode::Trace::Performance_counter::Type::CORE, 0x4b, 0x4};
/**
* Micro architecture "Skylake"
@@ -52,7 +50,7 @@ PerfCounter Perf::STALLS_MEM_ANY = {"memory-stall", PERF_TYPE_RAW, 0x145314a3};
* EventSel=32H,UMask=02H
* Number of PREFETCHT0 instructions executed.
*/
[[maybe_unused]] PerfCounter Perf::SW_PREFETCH_ACCESS_T0 = {"sw-prefetch-t0", PERF_TYPE_RAW, 0x530232};
//[[maybe_unused]] PerfCounter Perf::SW_PREFETCH_ACCESS_T0 = {"sw-prefetch-t0", Genode::Trace::Performance_counter::Type::CORE, 0x4b, };
/**
* Micro architecture "Skylake"
@@ -60,7 +58,7 @@ PerfCounter Perf::STALLS_MEM_ANY = {"memory-stall", PERF_TYPE_RAW, 0x145314a3};
* EventSel=32H,UMask=04H
* Number of PREFETCHT1 or PREFETCHT2 instructions executed.
*/
[[maybe_unused]] PerfCounter Perf::SW_PREFETCH_ACCESS_T1_T2 = {"sw-prefetch-t1t2", PERF_TYPE_RAW, 0x530432};
//[[maybe_unused]] PerfCounter Perf::SW_PREFETCH_ACCESS_T1_T2 = {"sw-prefetch-t1t2", PERF_TYPE_RAW, 0x530432};
/**
* Micro architecture "Skylake"
@@ -68,4 +66,4 @@ PerfCounter Perf::STALLS_MEM_ANY = {"memory-stall", PERF_TYPE_RAW, 0x145314a3};
* EventSel=32H,UMask=08H
* Number of PREFETCHW instructions executed.
*/
[[maybe_unused]] PerfCounter Perf::SW_PREFETCH_ACCESS_WRITE = {"sw-prefetch-w", PERF_TYPE_RAW, 0x530832};
[[maybe_unused]] PerfCounter Perf::SW_PREFETCH_ACCESS_WRITE = {"sw-prefetch-w", Genode::Trace::Performance_counter::Type::CORE, 0x4b, 0x2};

View File

@@ -1,12 +1,11 @@
#pragma once
#include <algorithm>
#include <asm/unistd.h>
#include <cstring>
#include <linux/perf_event.h> // TODO: Find Genode equivalent
#include <iostream>
#include <string>
#include <sys/ioctl.h>
#include <unistd.h>
#include <vector>
#include <base/trace/perf.h>
/*
* For more Performance Counter take a look into the Manual from Intel:
@@ -28,46 +27,62 @@ namespace benchmark {
class PerfCounter
{
public:
PerfCounter(std::string &&name, const std::uint64_t type, const std::uint64_t event_id) : _name(std::move(name))
PerfCounter(std::string &&name, const Genode::Trace::Performance_counter::Type type, const std::uint64_t event_id, const std::uint64_t mask) : _name(std::move(name)), _type(type), _event_id(static_cast<Genode::uint64_t>(event_id)), _mask(static_cast<Genode::uint64_t>(mask))
{
/*std::memset(&_perf_event_attribute, 0, sizeof(perf_event_attr));
_perf_event_attribute.type = type;
_perf_event_attribute.size = sizeof(perf_event_attr);
_perf_event_attribute.config = event_id;
_perf_event_attribute.disabled = true;
_perf_event_attribute.inherit = 1;
_perf_event_attribute.exclude_kernel = false;
_perf_event_attribute.exclude_hv = false;
_perf_event_attribute.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING;*/
}
~PerfCounter() = default;
bool open()
{
/*_file_descriptor = syscall(__NR_perf_event_open, &_perf_event_attribute, 0, -1, -1, 0);*/
return _file_descriptor >= 0;
try {
_counter = Genode::Trace::Performance_counter::acquire(_type);
} catch (Genode::Trace::Pfc_no_avail) {
std::cerr << "Failed to open performance counters." << std::endl;
}
try {
Genode::Trace::Performance_counter::setup(_counter, _event_id, _mask, (_type == Genode::Trace::Performance_counter::Type::CORE ? 0x30000 : 0x550f000000000000));
} catch (Genode::Trace::Pfc_access_error &e) {
std::cerr << "Error while setting up performance counter: " << e.error_code() << std::endl;
}
return _counter >= 0;
}
bool start()
{
//ioctl(_file_descriptor, PERF_EVENT_IOC_RESET, 0);
//ioctl(_file_descriptor, PERF_EVENT_IOC_ENABLE, 0);
return ::read(_file_descriptor, &_prev, sizeof(read_format)) == sizeof(read_format);
try {
Genode::Trace::Performance_counter::start(_counter);
_prev.value = static_cast<std::uint64_t>(Genode::Trace::Performance_counter::read(_counter));
}
catch (Genode::Trace::Pfc_access_error &e)
{
std::cerr << "Failed to start counter: " << e.error_code() << std::endl;
}
return _prev.value >= 0;
}
bool stop()
{
//const auto is_read = ::read(_file_descriptor, &_data, sizeof(read_format)) == sizeof(read_format);
//ioctl(_file_descriptor, PERF_EVENT_IOC_DISABLE, 0);
return false; // is_read;
try {
_data.value = Genode::Trace::Performance_counter::read(_counter);
Genode::Trace::Performance_counter::stop(_counter);
Genode::Trace::Performance_counter::reset(_counter);
}
catch (Genode::Trace::Pfc_access_error &e)
{
std::cerr << "Failed to stop counter: " << e.error_code() << std::endl;
}
// const auto is_read = ::read(_file_descriptor, &_data, sizeof(read_format)) == sizeof(read_format);
// ioctl(_file_descriptor, PERF_EVENT_IOC_DISABLE, 0);
return _data.value >= 0; // is_read;
}
[[nodiscard]] double read() const
{
const auto multiplexing_correction = static_cast<double>(_data.time_enabled - _prev.time_enabled) /
static_cast<double>(_data.time_running - _prev.time_running);
return static_cast<double>(_data.value - _prev.value) * multiplexing_correction;
return static_cast<double>(_data.value - _prev.value);
}
[[nodiscard]] const std::string &name() const { return _name; }
@@ -84,8 +99,10 @@ private:
};
const std::string _name;
std::int32_t _file_descriptor = -1;
//perf_event_attr _perf_event_attribute{};
Genode::Trace::Performance_counter::Type _type;
Genode::uint64_t _event_id;
Genode::uint64_t _mask;
Genode::Trace::Performance_counter::Counter _counter;
read_format _prev{};
read_format _data{};
};
@@ -101,11 +118,11 @@ public:
[[maybe_unused]] static PerfCounter L1_MISSES;
[[maybe_unused]] [[maybe_unused]] static PerfCounter LLC_MISSES;
[[maybe_unused]] static PerfCounter LLC_REFERENCES;
[[maybe_unused]] static PerfCounter STALLED_CYCLES_BACKEND;
[[maybe_unused]] static PerfCounter STALLS_MEM_ANY;
//[[maybe_unused]] static PerfCounter STALLED_CYCLES_BACKEND;
//[[maybe_unused]] static PerfCounter STALLS_MEM_ANY;
[[maybe_unused]] static PerfCounter SW_PREFETCH_ACCESS_NTA;
[[maybe_unused]] static PerfCounter SW_PREFETCH_ACCESS_T0;
[[maybe_unused]] static PerfCounter SW_PREFETCH_ACCESS_T1_T2;
//[[maybe_unused]] static PerfCounter SW_PREFETCH_ACCESS_T0;
//[[maybe_unused]] static PerfCounter SW_PREFETCH_ACCESS_T1_T2;
[[maybe_unused]] static PerfCounter SW_PREFETCH_ACCESS_WRITE;
Perf() noexcept = default;

View File

@@ -22,16 +22,14 @@ Benchmark::Benchmark(Libc::Env &env, benchmark::Cores &&cores, const std::uint16
_result_file_name(std::move(result_file_name)), _statistic_file_name(std::move(statistic_file_name)),
_tree_file_name(std::move(tree_file_name)), _profile(profile), _workload(env)
{
#ifdef PERF_SUPPORT
if (use_performance_counter)
{
this->_chronometer.add(benchmark::Perf::CYCLES);
this->_chronometer.add(benchmark::Perf::INSTRUCTIONS);
this->_chronometer.add(benchmark::Perf::STALLS_MEM_ANY);
//this->_chronometer.add(benchmark::Perf::STALLS_MEM_ANY);
this->_chronometer.add(benchmark::Perf::SW_PREFETCH_ACCESS_NTA);
this->_chronometer.add(benchmark::Perf::SW_PREFETCH_ACCESS_WRITE);
}
#endif
std::cout << "core configuration: \n" << this->_cores.dump(2) << std::endl;
this->_workload.build(fill_workload_file, mixed_workload_file);
@@ -117,7 +115,18 @@ void Benchmark::requests_finished()
if (open_requests == 0U) // All request schedulers are done.
{
std::uint16_t core_id = mx::system::topology::core_id();
if (core_id != 0)
{
this->_open_requests++;
auto *stop_task = mx::tasking::runtime::new_task<StopMeasurementTask>(0U, *this);
stop_task->annotate(static_cast<mx::tasking::TaskInterface::channel>(0));
mx::tasking::runtime::spawn(*stop_task, core_id);
return;
}
// Stop and print time (and performance counter).
//Genode::log("Stopping timer");
const auto result = this->_chronometer.stop(this->_workload.size());
mx::tasking::runtime::stop();
@@ -126,7 +135,7 @@ void Benchmark::requests_finished()
//std::cout << result << std::endl;
//if (mx::system::topology::core_id() == 0)
//std::cout << result << "\t " << (_end - _start) << " cycles" << std::endl;
std::cout << result.to_json().dump() << std::endl;
std::cout << "core: " << mx::system::topology::core_id() << result.to_json().dump() << std::endl;
// std::cout << result << std::endl;

View File

@@ -110,6 +110,7 @@ private:
[[nodiscard]] std::string profile_file_name() const;
friend class StartMeasurementTask;
friend class StopMeasurementTask;
};
class StartMeasurementTask : public mx::tasking::TaskInterface
@@ -123,9 +124,26 @@ class StartMeasurementTask : public mx::tasking::TaskInterface
mx::tasking::TaskResult execute(const std::uint16_t core_id, const std::uint16_t channel_id) override
{
//Genode::log("Starting timer");
_benchmark._chronometer.start(static_cast<std::uint16_t>(static_cast<benchmark::phase>(_benchmark._workload)), _benchmark._current_iteration + 1, _benchmark._cores.current());
//_benchmark._start = Genode::Trace::timestamp();
return mx::tasking::TaskResult::make_remove();
}
};
class StopMeasurementTask : public mx::tasking::TaskInterface
{
private:
Benchmark &_benchmark;
public:
constexpr StopMeasurementTask(Benchmark& benchmark) : _benchmark(benchmark) {}
~StopMeasurementTask() override = default;
mx::tasking::TaskResult execute(const std::uint16_t core_id, const std::uint16_t channel_id) override
{
_benchmark.requests_finished();
return mx::tasking::TaskResult::make_remove();
}
};
} // namespace application::blinktree_benchmark

View File

@@ -9,6 +9,7 @@
#include <tuple>
#include <libc/component.h>
#include <cstring>
#include <cstdio>
using namespace application::blinktree_benchmark;
@@ -202,13 +203,13 @@ void Libc::Component::construct(Libc::Env &env) {
std::uint16_t cores = env.cpu().affinity_space().total();
char cores_arg[10];
snprintf(cores_arg, 9, "1:%d", cores);
sprintf(cores_arg, "%d", cores);
char *args[] = {"blinktree_benchmark", "-i", "4", "-pd", "3", cores_arg};
char *args[] = {"blinktree_benchmark", "-i", "4", "-pd", "3", "-p", cores_arg};
Libc::with_libc([&]()
{
std::cout << "Starting B-link tree benchmark" << std::endl;
bt_main(env, 6, args);
bt_main(env, 7, args);
});
}

View File

@@ -1,4 +1,5 @@
MXINC_DIR=$(REP_DIR)/src/app/blinktree
GENODE_GCC_TOOLCHAIN_DIR ?= /usr/local/genode/tool/21.05
TARGET = blinktree
# soure file for benchmark framework
@@ -6,11 +7,18 @@ SRC_MXBENCH = benchmark/workload_set.cpp
SRC_MXBENCH += benchmark/workload.cpp
SRC_MXBENCH += benchmark/cores.cpp
SRC_MXBENCH += benchmark/string_util.cpp
SRC_MXBENCH += benchmark/perf.cpp
# source files for blinktree benchmark
SRC_BTREE += blinktree_benchmark/main.cpp
SRC_BTREE += blinktree_benchmark/benchmark.cpp
SRC_CC = ${SRC_MXBENCH} ${SRC_BTREE}
LIBS += base libc stdcxx mxtasking
CC_OPT += -Wno-error -fno-aligned-new -I$(MXINC_DIR)
EXT_OBJECTS += /usr/local/genode/tool/lib/clang/14.0.5/lib/linux/libclang_rt.builtins-x86_64.a /usr/local/genode/tool/lib/libatomic.a
CUSTOM_CC = /usr/local/genode/tool/bin/clang
CUSTOM_CXX = /usr/local/genode/tool/bin/clang++
CC_OPT += --target=x86_64-genode --sysroot=/does/not/exist --gcc-toolchain=$(GENODE_GCC_TOOLCHAIN_DIR) -Wno-error -O2 -g -fno-aligned-new -DNDEBUG -I$(MXINC_DIR) -std=c++17 #-D_GLIBCXX_ATOMIC_BUILTINS_8 -D__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8
CC_OPT += -femulated-tls -DCLANG_CXX11_ATOMICS
CC_CXX_WARN_STRICT =
CUSTOM_CXX_LIB := $(CROSS_DEV_PREFIX)g++
#CXX_LD += $(CROSS_DEV_PREFIX)g++

View File

@@ -0,0 +1,89 @@
/**
* @file main.cc
* @author Michael Müller (michael.mueller@uos.de)
* @brief Some test for programing hardware performance counters in NOVA
* @version 0.1
* @date 2022-12-14
*
* @copyright Copyright (c) 2022
*
*/
#include <nova/syscall-generic.h>
#include <nova/syscalls.h>
#include <iostream>
#include <chrono>
#include <thread>
#include <x86intrin.h>
int main(void)
{
Nova::mword_t event = 0x26;
Nova::mword_t mask = 0x00;
Nova::mword_t flags = 0x70000;
Nova::uint8_t rc;
if ((rc = Nova::hpc_ctrl(Nova::HPC_SETUP, 0, 1, event, mask, flags)) != Nova::NOVA_OK) {
std::cerr << "Failed to setup performance counter 0" << std::endl;
return -1;
}
std::cout << "Counter 0 setup" << std::endl;
event = 0x60;
mask = 0xfe;
if ((rc = Nova::hpc_ctrl(Nova::HPC_SETUP, 1, 1, event, mask, flags)) != Nova::NOVA_OK)
{
std::cerr << "Failed to setup performance counter 1, rc = " << static_cast<Nova::uint32_t>(rc) << std::endl;
return -1;
}
event = 0x62;
mask = 0x1;
if ((rc = Nova::hpc_ctrl(Nova::HPC_SETUP, 2, 1, event, mask, flags)) != Nova::NOVA_OK)
{
std::cerr << "Failed to setup performance counter 2, rc = " << static_cast<Nova::uint32_t>(rc) << std::endl;
return -1;
}
if ((rc = Nova::hpc_start(0, 1)) != Nova::NOVA_OK) {
std::cerr << "Failed to start counter 0" << std::endl;
return -2;
}
if ((rc = Nova::hpc_start(1, 1)) != Nova::NOVA_OK) {
std::cerr << "Failed to start counter 0" << std::endl;
return -2;
}
if ((rc = Nova::hpc_start(2, 1)) != Nova::NOVA_OK) {
std::cerr << "Failed to start counter 0" << std::endl;
return -2;
}
for (;;) {
std::this_thread::sleep_for(std::chrono::milliseconds(2000));
Nova::mword_t count = 0;
_mm_clflush(&count);
if ((rc = Nova::hpc_read(0, 1, count)) != Nova::NOVA_OK)
{
std::cerr << "Failed to read counter 0" << std::endl;
}
std::cout << count << " cache line flushes" << std::endl;
Nova::mword_t latency = 0;
if ((rc = Nova::hpc_read(2, 1, latency)) != Nova::NOVA_OK)
{
std::cerr << "Failed to read counter 1" << std::endl;
}
Nova::mword_t l2_requests = 0;
if ((rc = Nova::hpc_read(1, 1, l2_requests)) != Nova::NOVA_OK)
{
std::cerr << "Failed to read counter 1" << std::endl;
}
count = (latency * 4) / l2_requests;
std::cout << "L2 latency:" << count << " cycles" << std::endl;
}
return 0;
}

View File

@@ -0,0 +1,5 @@
TARGET = hpc_test
SRC_CC = trace_pfc.cc
LIBS += base posix libm libc stdcxx
CC_OPT += -Wno-error -Wno-permissive -fpermissive -Wno-error=conversion

View File

@@ -0,0 +1,105 @@
/**
* @file trace_pfc.cc
* @author Michael Müller (michael.mueller@uos.de)
* @brief Tests for Genode wrappers around Performance counter syscalls in NOVA
* @version 0.1
* @date 2022-12-15
*
* @copyright Copyright (c) 2022
*
*/
#include <base/trace/perf.h>
#include <iostream>
#include <chrono>
#include <thread>
#include <x86intrin.h>
using namespace Genode;
int main(void)
{
Trace::Performance_counter::Counter ctr_clflush, ctr_l2_latency, ctr_l2_requests, /*ctr_l3_miss,*/ ctr_l2_prefetch;
try {
ctr_clflush = Trace::Performance_counter::alloc_core();
ctr_l2_latency = Trace::Performance_counter::alloc_core();
ctr_l2_requests = Trace::Performance_counter::alloc_core();
ctr_l2_prefetch = Trace::Performance_counter::acquire(Trace::Performance_counter::Type::CORE);
// ctr_l3_miss = Trace::Performance_counter::alloc_cbo();
}
catch (Trace::Pfc_no_avail)
{
std::cout << "Unable to allocate performance counters." << std::endl;
return -1;
}
std::cout << "Performance counter allocation successful." << std::endl;
try {
Trace::Performance_counter::setup(ctr_clflush, 0x26, 0x00, 0x70000);
Trace::Performance_counter::setup(ctr_l2_latency, 0x62, 0x01, 0x30000);
Trace::Performance_counter::setup(ctr_l2_requests, 0x60, 0xfe, 0x30000);
Trace::Performance_counter::setup(ctr_l2_prefetch, 0xc0, 0x00, 0x30000);
//Trace::Performance_counter::setup(ctr_l3_miss, 0x6, 0xff, 0x550f000000000000);
} catch (Trace::Pfc_access_error &e) {
std::cerr << "PFC access failed. rc=" << e.error_code() << std::endl;
return -1;
}
std::cout << "Performance counters successfully set up." << std::endl;
try {
Trace::Performance_counter::start(ctr_clflush);
Trace::Performance_counter::start(ctr_l2_latency);
Trace::Performance_counter::start(ctr_l2_requests);
Trace::Performance_counter::start(ctr_l2_prefetch);
//Trace::Performance_counter::start(ctr_l3_miss);
} catch (Trace::Pfc_access_error &e) {
std::cerr << "PFC access failed. rc=" << e.error_code() << std::endl;
return -1;
}
std::cout << "Performance counters started." << std::endl;
for (;;) {
Genode::uint64_t clflushes, latency, requests, /*l3_misses,*/ l2_prefetches;
clflushes = latency = requests = l2_prefetches = 0;
std::this_thread::sleep_for(std::chrono::seconds(2));
_mm_clflush(&clflushes);
_mm_clflush(&clflushes);
try {
clflushes = Trace::Performance_counter::read(ctr_clflush);
latency = Trace::Performance_counter::read(ctr_l2_latency);
requests = Trace::Performance_counter::read(ctr_l2_requests);
l2_prefetches = Trace::Performance_counter::read(ctr_l2_prefetch);
//l3_misses = Trace::Performance_counter::read(ctr_l3_miss);
} catch (Trace::Pfc_access_error &e) {
std::cerr << "PFC access failed. rc=" << e.error_code() << std::endl;
return 1;
}
std::cout << clflushes << " cache line flushes." << std::endl;
//std::cout << "L2 latency: " << (latency * 4) / requests << " cycles." << std::endl;
std::cout << l2_prefetches << " L2 prefetch requests." << std::endl;
/*
try {
Trace::Performance_counter::stop(ctr_l2_prefetch);
Trace::Performance_counter::reset(ctr_l2_prefetch, 0xdeadbeef);
Trace::Performance_counter::start(ctr_l2_prefetch);
std::cout << Trace::Performance_counter::read(ctr_l2_prefetch) << " L2 prefetches after context-switch" << std::endl;
Trace::Performance_counter::stop(ctr_l2_prefetch);
Trace::Performance_counter::reset(ctr_l2_prefetch, l2_prefetches);
Trace::Performance_counter::start(ctr_l2_prefetch);
} catch (Trace::Pfc_access_error &e) {
std::cerr << "PFC access failed. rc=" << e.error_code() << std::endl;
}
*/
// std::cout << l3_misses << " L3 misses" << std::endl;
}
return 0;
}

View File

@@ -0,0 +1,174 @@
/*
* check_events.c - show event encoding
*
* Copyright (c) 2009 Google, Inc
* Contributed by Stephane Eranian <eranian@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
* PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
* CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
* OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file is part of libpfm, a performance monitoring support library for
* applications on Linux.
*/
#include <sys/types.h>
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
#include <perfmon/err.h>
#include <perfmon/pfmlib.h>
int pmu_is_present(pfm_pmu_t p)
{
pfm_pmu_info_t pinfo;
int ret;
memset(&pinfo, 0, sizeof(pinfo));
ret = pfm_get_pmu_info(p, &pinfo);
return ret == PFM_SUCCESS ? pinfo.is_present : 0;
}
int main(int argc, const char **argv)
{
pfm_pmu_info_t pinfo;
pfm_pmu_encode_arg_t e;
const char *arg[3];
const char **p;
char *fqstr;
pfm_event_info_t info;
int j, ret;
pfm_pmu_t i;
int total_supported_events = 0;
int total_available_events = 0;
unsigned long low, high, msr;
msr = 0xc0010200;
asm volatile("rdmsr"
: "=a"(low), "=d"(high)
: "c"(msr)); /*
* Initialize pfm library (required before we can use it)
*/
ret = pfm_initialize();
if (ret != PFM_SUCCESS)
errx(1, "cannot initialize library: %s\n", pfm_strerror(ret));
memset(&pinfo, 0, sizeof(pinfo));
memset(&info, 0, sizeof(info));
printf("Supported PMU models:\n");
for (i = PFM_PMU_NONE; i < PFM_PMU_MAX; i++)
{
ret = pfm_get_pmu_info(i, &pinfo);
if (ret != PFM_SUCCESS)
continue;
printf("\t[%d, %s, \"%s\"]\n", i, pinfo.name, pinfo.desc);
}
printf("Detected PMU models:\n");
for (i = PFM_PMU_NONE; i < PFM_PMU_MAX; i++)
{
ret = pfm_get_pmu_info(i, &pinfo);
if (ret != PFM_SUCCESS)
continue;
if (pinfo.is_present)
{
printf("\t[%d, %s, \"%s\"]\n", i, pinfo.name, pinfo.desc);
total_supported_events += pinfo.nevents;
}
total_available_events += pinfo.nevents;
}
printf("Total events: %d available, %d supported\n", total_available_events, total_supported_events);
/*
* be nice to user!
*/
if (argc < 2 && pmu_is_present(PFM_PMU_PERF_EVENT))
{
arg[0] = "PERF_COUNT_HW_CPU_CYCLES";
arg[1] = "PERF_COUNT_HW_INSTRUCTIONS";
arg[2] = NULL;
p = arg;
}
else
{
p = argv + 1;
}
if (!*p)
errx(1, "you must pass at least one event");
memset(&e, 0, sizeof(e));
while (*p)
{
/*
* extract raw event encoding
*
* For perf_event encoding, use
* #include <perfmon/pfmlib_perf_event.h>
* and the function:
* pfm_get_perf_event_encoding()
*/
fqstr = NULL;
e.fstr = &fqstr;
ret = pfm_get_os_event_encoding(*p, PFM_PLM0 | PFM_PLM3, PFM_OS_NONE, &e);
if (ret != PFM_SUCCESS)
{
/*
* codes is too small for this event
* free and let the library resize
*/
if (ret == PFM_ERR_TOOSMALL)
{
free(e.codes);
e.codes = NULL;
e.count = 0;
free(fqstr);
continue;
}
if (ret == PFM_ERR_NOTFOUND && strstr(*p, "::"))
errx(1, "%s: try setting LIBPFM_ENCODE_INACTIVE=1", pfm_strerror(ret));
errx(1, "cannot encode event %s: %s", *p, pfm_strerror(ret));
}
ret = pfm_get_event_info(e.idx, PFM_OS_NONE, &info);
if (ret != PFM_SUCCESS)
errx(1, "cannot get event info: %s", pfm_strerror(ret));
ret = pfm_get_pmu_info(info.pmu, &pinfo);
if (ret != PFM_SUCCESS)
errx(1, "cannot get PMU info: %s", pfm_strerror(ret));
printf("Requested Event: %s\n", *p);
printf("Actual Event: %s\n", fqstr);
printf("PMU : %s\n", pinfo.desc);
printf("IDX : %d\n", e.idx);
printf("Codes :");
for (j = 0; j < e.count; j++)
printf(" 0x%" PRIx64, e.codes[j]);
putchar('\n');
free(fqstr);
p++;
}
if (e.codes)
free(e.codes);
return 0;
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,5 @@
TARGET = libpfm_test
SRC_CC = check_events.c
LIBS += base posix libm libc stdcxx libpfm4
CC_OPT += -Wno-error -Wno-permissive -fpermissive

View File

@@ -1,4 +1,4 @@
TARGET = thread_test
SRC_CC = thread_test.cc
LIBS += base stdcxx
LIBS += base libc stdcxx
CXXFLAGS += -Wno-error