diff --git a/README.md b/README.md new file mode 100644 index 0000000000..32ba0c54c9 --- /dev/null +++ b/README.md @@ -0,0 +1,24 @@ +# EalánOS — An Operating System for Heterogeneous Many-core Systems + +EalánOS is a research operating system, based on the [Genode OS Framework](https://genode.org/), that explores new architectural designs and resource management strategies for many-core systems with heterogeneous computing and memory resources. It is a reference implementation of the [MxKernel](https://mxkernel.org/) architecture. + +## MxKernel Architecture +The MxKernel is a new operating system architecture inspired by many-core operating systems, such as [FOS](https://dl.acm.org/doi/abs/10.1145/1531793.1531805) and [Tesselation](https://www.usenix.org/event/hotpar09/tech/full_papers/liu/liu_html/), as well as hypervisors, exokernels and unikernels. +Novel approaches of the MxKernel include the use of tasks, short-lived closed units of work, instead of threads as control-flow abstraction, and the concept of elastic cells as process abstraction. The architecture has first been described in the paper [MxKernel: Rethinking Operating System Architecture for Many-core Hardware](https://sites.google.com/site/sfma2019eurosys/Program/sfma-mxkernel.pdf?attredirects=0) presented at the [9th Workshop on Systems for Multi-core and Heterogeneous Architectures](https://sites.google.com/site/sfma2019eurosys/). + +## Task-based programming +EalánOS promotes task-parallel programming by including the [MxTasking](https://github.com/jmuehlig/mxtasking.git) task-parallel runtime library. MxTasking improves on the common task-parallel programming paradigm by allowing tasks to be annotated with hints about the tasks behavior, such as memory accesses. These annotations are used by the runtime environment to implement advanced features, like automatic prefetching of data and automatic synchronization of concurrent memory accesses. + +## Documentation +Because EalánOS is based on Genode, the primary documentation, for now, can be found in the book [Genode Foundations](https://genode.org/documentation/genode-foundations-22-05.pdf). + +## Features added to Genode +EalánOS extends the Genode OS framework by functionality needed and helpful for many-core systems with non-uniform memory access (NUMA), such as +- A topology service that allows to query NUMA information from within a Genode component. +- A port of [MxTasking](https://github.com/jmuehlig/mxtasking.git), a task-based framework designed to aid in developing parallel applications. +- (WiP) A extension of Genode's RAM service that enables applications to allocate memory from a specific NUMA region, similar to libnuma's `numa_alloc_on_node`, and thus improve NUMA-locality of internal data objects. +- (WiP) An interface for using Hardware Performance Monitoring Counters inside Genode components. Currently, performance counters are only implemented for AMD's Zen1 microarchitecture. + +### Acknowledgement +The work on EalánOS and the MxKernel architecture is supported by the German Research Foundation (DFG) as part of the priority program 2037 "[Scalable Data Management on Future Hardware](https://dfg-spp2037.de/)" under Grant numbers SP968/9-1 and SP968/9-2. +The MxTasking framework is developed as part of the same DFG project at the [DBIS group at TU Dortmund Universitiy](http://dbis.cs.tu-dortmund.de/cms/de/home/index.html) and funded under Grant numbers TE1117/2-1. \ No newline at end of file diff --git a/repos/base-nova/include/nova/syscall-generic.h b/repos/base-nova/include/nova/syscall-generic.h index eed3d3f90d..4eda9e8c54 100644 --- a/repos/base-nova/include/nova/syscall-generic.h +++ b/repos/base-nova/include/nova/syscall-generic.h @@ -3,7 +3,8 @@ * \author Norman Feske * \author Sebastian Sumpf * \author Alexander Boettcher - * \date 2009-12-27 + * \author Michael Müller + * \date 2022-12-13 */ /* @@ -133,11 +134,19 @@ namespace Nova { bool has_feature_svm() const { return feature_flags & (1 << 2); } struct Cpu_desc { + enum Vendor + { + UNKNOWN, + INTEL, + AMD + }; + uint8_t flags; uint8_t thread; uint8_t core; uint8_t package; uint8_t acpi_id; + uint8_t vendor; uint8_t family; uint8_t model; uint8_t stepping:4; @@ -255,6 +264,19 @@ namespace Nova { SC_EC_TIME = 3, }; + /** + * Hpc operations + * + */ + enum Hpc_op + { + HPC_SETUP = 6U, + HPC_START = 7U, + HPC_STOP = 8U, + HPC_RESET = 9U, + HPC_READ = 10U, + }; + /** * Pd operations */ diff --git a/repos/base-nova/include/spec/64bit/nova/syscalls.h b/repos/base-nova/include/spec/64bit/nova/syscalls.h index 9e8fa765fc..cd0204e09a 100644 --- a/repos/base-nova/include/spec/64bit/nova/syscalls.h +++ b/repos/base-nova/include/spec/64bit/nova/syscalls.h @@ -253,6 +253,36 @@ namespace Nova { return util_time(NOVA_EC_CTRL, ec, Ec_op::EC_TIME, time); } + ALWAYS_INLINE + inline uint8_t hpc_ctrl(Hpc_op op, mword_t sel, mword_t type, mword_t &p1, mword_t &p2, mword_t &p3) + { + uint8_t res = syscall_6(NOVA_EC_CTRL, op, sel, type, p1, p2, p3); + return res; + } + + ALWAYS_INLINE + inline uint8_t hpc_read(mword_t sel, mword_t type, mword_t &value) + { + return syscall_5(NOVA_EC_CTRL, HPC_READ, sel, type, value); + } + + ALWAYS_INLINE + inline uint8_t hpc_start(mword_t sel, mword_t type) + { + return syscall_1(NOVA_EC_CTRL, HPC_START, sel, type); + } + + ALWAYS_INLINE + inline uint8_t hpc_stop(mword_t sel, mword_t type) + { + return syscall_1(NOVA_EC_CTRL, HPC_STOP, sel, type); + } + + ALWAYS_INLINE + inline uint8_t hpc_reset(mword_t sel, mword_t type, mword_t val) + { + return syscall_2(NOVA_EC_CTRL, HPC_RESET, sel, type, val); + } ALWAYS_INLINE inline uint8_t create_sc(mword_t sc, mword_t pd, mword_t ec, Qpd qpd) diff --git a/repos/base-nova/lib/mk/base-nova-common.mk b/repos/base-nova/lib/mk/base-nova-common.mk index 81ebc9f3d3..a33b1476b5 100644 --- a/repos/base-nova/lib/mk/base-nova-common.mk +++ b/repos/base-nova/lib/mk/base-nova-common.mk @@ -14,3 +14,4 @@ SRC_CC += stack_area_addr.cc SRC_CC += cap_map.cc SRC_CC += capability.cc SRC_CC += signal_transmitter.cc +SRC_CC += perf.cc diff --git a/repos/base-nova/ports/nova.hash b/repos/base-nova/ports/nova.hash index 96a5286e98..79c825a918 100644 --- a/repos/base-nova/ports/nova.hash +++ b/repos/base-nova/ports/nova.hash @@ -1 +1 @@ -d850a1b6412ce630abedf7b9aa623b5caa994235 +52fcb4b19aa032eaba5484a69c3c4c491c2a6915 diff --git a/repos/base-nova/ports/nova.port b/repos/base-nova/ports/nova.port index c46b0978e5..736bd5faf0 100644 --- a/repos/base-nova/ports/nova.port +++ b/repos/base-nova/ports/nova.port @@ -4,7 +4,7 @@ DOWNLOADS := nova.git # feature/numa branch URL(nova) := https://github.com/mmueller41/NOVA.git -REV(nova) := 6479677bd61db47bcdcb4bd796566f83b9f655ef +REV(nova) := 4707840843206d63f72ba9238756355d16b52be3 DIR(nova) := src/kernel/nova PATCHES := $(sort $(wildcard $(REP_DIR)/patches/*.patch)) diff --git a/repos/base-nova/src/core/include/platform.h b/repos/base-nova/src/core/include/platform.h index c648c84db9..30addd45d3 100644 --- a/repos/base-nova/src/core/include/platform.h +++ b/repos/base-nova/src/core/include/platform.h @@ -20,6 +20,7 @@ #include #include #include +#include namespace Genode { @@ -51,9 +52,13 @@ namespace Genode { /* map of virtual cpu ids in Genode to kernel cpu ids */ uint8_t map_cpu_ids[MAX_SUPPORTED_CPUS]; + + /* map of virtual cpu ids in Genode to kernel NUMA ids */ uint8_t cpu_numa_map[MAX_SUPPORTED_CPUS]; + /* map of kernel NUMA region to Genode memory ranges */ Genode::Range_allocator::Range numa_mem_ranges[MAX_SUPPORTED_CPUS]; // TODO: Add new macro for max of numa regions + addr_t _map_pages(addr_t phys_page, addr_t pages, bool guard_page = false); @@ -164,6 +169,17 @@ namespace Genode { } } } + + /** + * @brief Return NUMA-interal vendor code for CPU + * + */ + Nova::Hip::Cpu_desc::Vendor cpu_vendor() { + extern addr_t __initial_sp; + Nova::Hip const &hip = *(Nova::Hip *)__initial_sp; + + return static_cast(hip.cpu_desc_of_cpu(0)->vendor); + } }; } diff --git a/repos/base-nova/src/core/ram_dataspace_support.cc b/repos/base-nova/src/core/ram_dataspace_support.cc index 066e4ca203..3b7f241f9a 100644 --- a/repos/base-nova/src/core/ram_dataspace_support.cc +++ b/repos/base-nova/src/core/ram_dataspace_support.cc @@ -56,30 +56,35 @@ static inline void * alloc_region(Dataspace_component &ds, const size_t size) void Ram_dataspace_factory::_clear_ds(Dataspace_component &ds) { + size_t const page_rounded_size = align_addr(ds.size(), get_page_size_log2()); - //size_t memset_count = page_rounded_size / 4; - //addr_t memset_ptr = ds.core_local_addr(); + size_t memset_count = page_rounded_size / 32; + addr_t memset_ptr = ds.core_local_addr(); - /* - if ((memset_count * 4 == page_rounded_size) && !(memset_ptr & 0x3)) - asm volatile ("rep stosl" : "+D" (memset_ptr), "+c" (memset_count) + if ((memset_count * 32 == page_rounded_size) && !(memset_ptr & 0x3)) + { + asm volatile ("rep stosq" : "+D" (memset_ptr), "+c" (memset_count) : "a" (0) : "memory"); - else + } else memset(reinterpret_cast(memset_ptr), 0, page_rounded_size); - */ +} + +void Ram_dataspace_factory::_unmap_ds_from_core(Dataspace_component &ds) +{ + size_t const page_rounded_size = align_addr(ds.size(), get_page_size_log2()); + /* we don't keep any core-local mapping */ unmap_local(*reinterpret_cast(Thread::myself()->utcb()), - ds.core_local_addr(), - page_rounded_size >> get_page_size_log2()); + ds.core_local_addr(), + page_rounded_size >> get_page_size_log2()); - platform().region_alloc().free((void*)ds.core_local_addr(), - page_rounded_size); + platform().region_alloc().free((void *)ds.core_local_addr(), + page_rounded_size); ds.assign_core_local_addr(nullptr); } - void Ram_dataspace_factory::_export_ram_ds(Dataspace_component &ds) { size_t page_rounded_size = align_addr(ds.size(), get_page_size_log2()); diff --git a/repos/base-nova/src/kernel/nova/target.mk b/repos/base-nova/src/kernel/nova/target.mk index bbaeb9b126..9ff0f3779b 100644 --- a/repos/base-nova/src/kernel/nova/target.mk +++ b/repos/base-nova/src/kernel/nova/target.mk @@ -36,7 +36,7 @@ CC_OPT += -mpreferred-stack-boundary=2 -mregparm=3 else ifeq ($(filter-out $(SPECS),64bit),) override CC_MARCH = -m64 -CC_WARN += -Wframe-larger-than=256 +CC_WARN += -Wframe-larger-than=1024 CC_OPT += -mpreferred-stack-boundary=4 -mcmodel=kernel -mno-red-zone else $(error Unsupported environment) diff --git a/repos/base-nova/src/lib/base/perf.cc b/repos/base-nova/src/lib/base/perf.cc new file mode 100644 index 0000000000..242a3228e7 --- /dev/null +++ b/repos/base-nova/src/lib/base/perf.cc @@ -0,0 +1,86 @@ + +/* + * \brief Performance Counter infrastructure, NOVA-specific implemantation + * \author Michael Müller + * \date 2022-12-15 + */ + +#include + +#include +#include +#include + +unsigned long Genode::Trace::Performance_counter::private_freemask { 0xffff }; +unsigned long Genode::Trace::Performance_counter::shared_freemask { 0xffff0000 }; + +void Genode::Trace::Performance_counter::_init_masks() +{ + Nova::Hip::Cpu_desc::Vendor vendor = Nova::Hip::Cpu_desc::AMD; + if (vendor == Nova::Hip::Cpu_desc::AMD) + { + private_freemask = 0x3f; // 6 core performance counters + shared_freemask = 0x1f0000; // 5 L3 complex performance counters + } + else if (vendor == Nova::Hip::Cpu_desc::INTEL) + { + private_freemask = 0x7fff; + shared_freemask = 0x7fff0000; // 15 CBO performance counters + } +} + +void Genode::Trace::Performance_counter::setup(unsigned counter, uint64_t event, uint64_t mask, uint64_t flags) +{ + Nova::mword_t evt = event; + Nova::mword_t msk = mask; + Nova::mword_t flg = flags; + Nova::uint8_t rc; + Nova::mword_t type = (counter >>4); + Nova::mword_t sel = type == Performance_counter::CORE ? counter : counter & 0xf; + + if ((rc = (Nova::hpc_ctrl(Nova::HPC_SETUP, sel, type, evt, msk, flg))) != Nova::NOVA_OK) + throw Genode::Trace::Pfc_access_error(rc); +} + +void Genode::Trace::Performance_counter::start(unsigned counter) +{ + Nova::uint8_t rc; + Nova::mword_t type = (counter >> 4); + Nova::mword_t sel = type == Performance_counter::CORE ? counter : counter >>4; + + if ((rc = Nova::hpc_start(sel, type)) != Nova::NOVA_OK) + throw Genode::Trace::Pfc_access_error(rc); +} + +void Genode::Trace::Performance_counter::stop(unsigned counter) +{ + Nova::uint8_t rc; + Nova::mword_t type = (counter >>4); + Nova::mword_t sel = type == Performance_counter::CORE ? counter : counter & 0xf; + + if ((rc = Nova::hpc_stop(sel, type)) != Nova::NOVA_OK) + throw Genode::Trace::Pfc_access_error(rc); +} + +void Genode::Trace::Performance_counter::reset(unsigned counter, unsigned val) +{ + Nova::uint8_t rc; + Nova::mword_t type = (counter >>4); + Nova::mword_t sel = type == Performance_counter::CORE ? counter : counter & 0xf; + + if ((rc = Nova::hpc_reset(sel, type, val)) != Nova::NOVA_OK) + throw Genode::Trace::Pfc_access_error(rc); +} + +Genode::uint64_t Genode::Trace::Performance_counter::read(unsigned counter) +{ + Nova::uint8_t rc; + Nova::mword_t value = 0; + Nova::mword_t type = (counter >>4); + Nova::mword_t sel = type == Performance_counter::CORE ? counter : counter & 0xf; + + if ((rc = Nova::hpc_read(sel, type, value)) != Nova::NOVA_OK) + throw Genode::Trace::Pfc_access_error(rc); + + return static_cast(value); +} \ No newline at end of file diff --git a/repos/base/include/base/attached_ram_dataspace.h b/repos/base/include/base/attached_ram_dataspace.h index 10f1026cc8..85bf88e220 100644 --- a/repos/base/include/base/attached_ram_dataspace.h +++ b/repos/base/include/base/attached_ram_dataspace.h @@ -14,6 +14,7 @@ #ifndef _INCLUDE__BASE__ATTACHED_RAM_DATASPACE_H_ #define _INCLUDE__BASE__ATTACHED_RAM_DATASPACE_H_ +#include #include #include #include @@ -105,6 +106,7 @@ class Genode::Attached_ram_dataspace _size(size), _ram(&ram), _rm(&rm), _cache(cache) { _alloc_and_attach(); + memset(_local_addr, 0, _size); } /** diff --git a/repos/base/include/base/local_connection.h b/repos/base/include/base/local_connection.h index 56c1d2e721..87876a259a 100644 --- a/repos/base/include/base/local_connection.h +++ b/repos/base/include/base/local_connection.h @@ -93,8 +93,15 @@ struct Genode::Local_connection_base : Noncopyable if (_session_state->phase == Session_state::INSUFFICIENT_RAM_QUOTA || _session_state->phase == Session_state::INSUFFICIENT_CAP_QUOTA) - warning("giving up to increase session quota for ", service.name(), " session " + { + warning("[", label, "] giving up to increase session quota for ", service.name(), " session " "after ", (int)NUM_ATTEMPTS, " attempts"); + if (_session_state->phase == Session_state::INSUFFICIENT_RAM_QUOTA) + warning("Insufficient RAM quota: ", resources.ram_quota.value); + + if (_session_state->phase == Session_state::INSUFFICIENT_CAP_QUOTA) + warning("Insufficient CAP quota ", resources.cap_quota.value); + } } void close() diff --git a/repos/base/include/base/trace/perf.h b/repos/base/include/base/trace/perf.h new file mode 100644 index 0000000000..e2eb6ee81b --- /dev/null +++ b/repos/base/include/base/trace/perf.h @@ -0,0 +1,93 @@ +/* + * \brief Performance Counter infrastructure + * \author Michael Müller + * \date 2022-12-15 + */ + +#pragma once + +#include + +namespace Genode { namespace Trace { + + class Pfc_no_avail { + }; + + class Performance_counter + { + + private: + static unsigned long private_freemask; + static unsigned long shared_freemask; + + static unsigned _alloc(unsigned long *free_mask) + { + unsigned long current_mask, new_mask; + unsigned bit; + + do + { + current_mask = *free_mask; + bit = __builtin_ffsl(current_mask); + new_mask = current_mask & ~(1 << (bit - 1)); + } while (!__atomic_compare_exchange(free_mask, ¤t_mask, &new_mask, true, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED)); + + if (!bit) // Allocation failed + throw Pfc_no_avail(); + + return bit - 1; // number of the allocated counter + } + + static void _init_masks(); + + public: + typedef unsigned int Counter; + + enum Type + { + CORE = 0, + CACHE = 1 + }; + + static unsigned acquire(Type type) { + return (type == Type::CORE) ? alloc_core() : alloc_cbo(); + } + + static unsigned alloc_cbo() { + if (shared_freemask == 0xffff0000) + _init_masks(); + return _alloc(&shared_freemask); + } + + static unsigned alloc_core() { + if (private_freemask == 0xffff) + _init_masks(); + return _alloc(&private_freemask); + } + + static void release(unsigned counter) { + bool core = static_cast(counter >> 4); + if (core) + private_freemask |= (1 << counter); + else + shared_freemask |= (1 << counter); + } + + static void setup(unsigned counter, Genode::uint64_t event, Genode::uint64_t mask, Genode::uint64_t flags); + static void start(unsigned counter); + static void stop(unsigned counter); + static void reset(unsigned counter, unsigned val=0); + static uint64_t read(unsigned counter); + }; + + class Pfc_access_error { + private: + Genode::uint8_t _rc; + + public: + Pfc_access_error(uint8_t rc) : _rc(rc) {} + Genode::uint8_t error_code() { return _rc; } + }; + + } +} \ No newline at end of file diff --git a/repos/base/include/topo_session/connection.h b/repos/base/include/topo_session/connection.h index f20601f391..f2ff65b994 100644 --- a/repos/base/include/topo_session/connection.h +++ b/repos/base/include/topo_session/connection.h @@ -27,7 +27,7 @@ struct Genode::Topo_connection : Connection, Topo_session_client { enum { - RAM_QUOTA = 262144 + RAM_QUOTA = 2097152UL }; Topo_connection(Env &env, const char *label = "", Affinity const &affinity = Affinity()) diff --git a/repos/base/lib/symbols/ld b/repos/base/lib/symbols/ld index d4f08f65dd..719db66190 100644 --- a/repos/base/lib/symbols/ld +++ b/repos/base/lib/symbols/ld @@ -54,6 +54,9 @@ _ZN5Timer10ConnectionC1ERN6Genode3EnvEPKc T _ZN5Timer10ConnectionC1ERN6Genode3EnvERNS1_10EntrypointEPKc T _ZN5Timer10ConnectionC2ERN6Genode3EnvEPKc T _ZN5Timer10ConnectionC2ERN6Genode3EnvERNS1_10EntrypointEPKc T +_ZN6Genode5Trace19Performance_counter15shared_freemaskE D 8 +_ZN6Genode5Trace19Performance_counter16private_freemaskE D 8 +_ZN6Genode5Trace19Performance_counter11_init_masksEv T _ZN6Genode10Entrypoint16_dispatch_signalERNS_6SignalE T _ZN6Genode10Entrypoint16schedule_suspendEPFvvES2_ T _ZN6Genode10Entrypoint22Signal_proxy_component6signalEv T @@ -274,6 +277,11 @@ _ZN6Genode5Trace6Logger17_evaluate_controlEv T _ZN6Genode5Trace6Logger3logEPKcm T _ZN6Genode5Trace6LoggerC1Ev T _ZN6Genode5Trace6LoggerC2Ev T +_ZN6Genode5Trace19Performance_counter4readEj T +_ZN6Genode5Trace19Performance_counter4stopEj T +_ZN6Genode5Trace19Performance_counter5resetEjj T +_ZN6Genode5Trace19Performance_counter5setupEjyyy T +_ZN6Genode5Trace19Performance_counter5startEj T _ZN6Genode5Trace18Partitioned_buffer4initEm T _ZN6Genode5Trace18Partitioned_buffer6commitEm T _ZN6Genode5Trace18Partitioned_buffer7reserveEm T diff --git a/repos/base/src/core/include/cpu_thread_component.h b/repos/base/src/core/include/cpu_thread_component.h index e1b9652a5e..0360533ab7 100644 --- a/repos/base/src/core/include/cpu_thread_component.h +++ b/repos/base/src/core/include/cpu_thread_component.h @@ -172,6 +172,7 @@ class Genode::Cpu_thread_component : public Rpc_object, _address_space_region_map.add_client(_rm_client); _platform_thread.pager(_rm_client); + _platform_thread.affinity(location); _trace_sources.insert(&_trace_source); } diff --git a/repos/base/src/core/include/ram_dataspace_factory.h b/repos/base/src/core/include/ram_dataspace_factory.h index 01defb4302..c0a2294a0c 100644 --- a/repos/base/src/core/include/ram_dataspace_factory.h +++ b/repos/base/src/core/include/ram_dataspace_factory.h @@ -82,6 +82,11 @@ class Genode::Ram_dataspace_factory : public Ram_allocator, */ void _clear_ds(Dataspace_component &ds); + /** + * Remove core-local mappings of dataspace + */ + void _unmap_ds_from_core(Dataspace_component &ds); + public: Ram_dataspace_factory(Rpc_entrypoint &ep, diff --git a/repos/base/src/core/include/topo_session_component.h b/repos/base/src/core/include/topo_session_component.h index 10ffb0e7b0..e86ffd7665 100644 --- a/repos/base/src/core/include/topo_session_component.h +++ b/repos/base/src/core/include/topo_session_component.h @@ -20,6 +20,7 @@ #include #include #include +#include #include namespace Genode { @@ -32,7 +33,7 @@ class Genode::Topo_session_component : public Session_object Genode::Affinity &_affinity; Sliced_heap _md_alloc; - Topology::Numa_region _node_affinities[64][64]; + Topology::Numa_region _node_affinities[Genode::Platform::MAX_SUPPORTED_CPUS][Genode::Platform::MAX_SUPPORTED_CPUS]; unsigned _node_count; Topology::Numa_region _nodes[64]; diff --git a/repos/base/src/core/main.cc b/repos/base/src/core/main.cc index 45c6fd56df..f6e39e028c 100644 --- a/repos/base/src/core/main.cc +++ b/repos/base/src/core/main.cc @@ -286,7 +286,7 @@ int main() size_t const avail_ram_quota = core_pd.avail_ram().value; size_t const avail_cap_quota = core_pd.avail_caps().value; - size_t const preserved_ram_quota = 224*1024; + size_t const preserved_ram_quota = 224*1024+(1<<20); size_t const preserved_cap_quota = 1000; if (avail_ram_quota < preserved_ram_quota) { diff --git a/repos/base/src/core/ram_dataspace_factory.cc b/repos/base/src/core/ram_dataspace_factory.cc index 05079bc4eb..a8b8f158f7 100644 --- a/repos/base/src/core/ram_dataspace_factory.cc +++ b/repos/base/src/core/ram_dataspace_factory.cc @@ -123,6 +123,7 @@ Ram_dataspace_factory::try_alloc(size_t ds_size, Cache cache) Dataspace_component &ds = *ds_ptr; /* create native shared memory representation of dataspace */ +#ifdef ZERO_AT_ALLOC try { _export_ram_ds(ds); } catch (Core_virtual_memory_exhausted) { warning("could not export RAM dataspace of size ", ds.size()); @@ -137,8 +138,8 @@ Ram_dataspace_factory::try_alloc(size_t ds_size, Cache cache) * function must also make sure to flush all cache lines related to the * address range used by the dataspace. */ - _clear_ds(ds); - + _unmap_ds_from_core(ds); +#endif Dataspace_capability ds_cap = _ep.manage(&ds); phys_alloc_guard.keep = true; @@ -181,8 +182,25 @@ void Ram_dataspace_factory::free(Ram_dataspace_capability ds_cap) }); /* call dataspace destructor and free memory */ - if (ds) + if (ds) { + try { _export_ram_ds(*ds); } + catch (Core_virtual_memory_exhausted) { + warning("could not export RAM dataspace of size ", ds->size()); + + /* cleanup unneeded resources */ + destroy(_ds_slab, ds); + return; + } + + /* + * Fill new dataspaces with zeros. For non-cached RAM dataspaces, this + * function must also make sure to flush all cache lines related to the + * address range used by the dataspace. + */ + _clear_ds(*ds); + _unmap_ds_from_core(*ds); destroy(_ds_slab, ds); + } } diff --git a/repos/libports/lib/import/import-libpfm4.mk b/repos/libports/lib/import/import-libpfm4.mk new file mode 100644 index 0000000000..ba6094e1cf --- /dev/null +++ b/repos/libports/lib/import/import-libpfm4.mk @@ -0,0 +1 @@ +INC_DIR += $(call select_from_ports,libpfm4)/include \ No newline at end of file diff --git a/repos/libports/lib/mk/libpfm4.mk b/repos/libports/lib/mk/libpfm4.mk new file mode 100644 index 0000000000..c998bfad52 --- /dev/null +++ b/repos/libports/lib/mk/libpfm4.mk @@ -0,0 +1,204 @@ +LIBPFM4_DIR := $(call select_from_ports,libpfm4)/src/lib/libpfm4 + +CC_OPT += -D_REENTRANT -fvisibility=hidden + +SRC_CC = $(LIBPFM4_DIR)/lib/pfmlib_common.c + +# build libpfm only for x86_64 for now +CONFIG_PFMLIB_ARCH_X86_64=y +CONFIG_PFMLIB_ARCH_X86=y + +CONFIG_PFMLIB_SHARED?=n +CONFIG_PFMLIB_DEBUG?=y +CONFIG_PFMLIB_NOPYTHON?=y + +# +# list all library support modules +# +ifeq ($(CONFIG_PFMLIB_ARCH_IA64),y) +INCARCH = $(INC_IA64) +#SRCS += pfmlib_gen_ia64.c pfmlib_itanium.c pfmlib_itanium2.c pfmlib_montecito.c +CFLAGS += -DCONFIG_PFMLIB_ARCH_IA64 +endif + +ifeq ($(CONFIG_PFMLIB_ARCH_X86),y) + +ifeq ($(SYS),Linux) +SRCS += pfmlib_intel_x86_perf_event.c pfmlib_amd64_perf_event.c \ + pfmlib_intel_netburst_perf_event.c \ + pfmlib_intel_snbep_unc_perf_event.c +endif + +INCARCH = $(INC_X86) +SRCS += pfmlib_amd64.c pfmlib_intel_core.c pfmlib_intel_x86.c \ + pfmlib_intel_x86_arch.c pfmlib_intel_atom.c \ + pfmlib_intel_nhm_unc.c pfmlib_intel_nhm.c \ + pfmlib_intel_wsm.c \ + pfmlib_intel_snb.c pfmlib_intel_snb_unc.c \ + pfmlib_intel_ivb.c pfmlib_intel_ivb_unc.c \ + pfmlib_intel_hsw.c \ + pfmlib_intel_bdw.c \ + pfmlib_intel_skl.c \ + pfmlib_intel_icl.c \ + pfmlib_intel_spr.c \ + pfmlib_intel_rapl.c \ + pfmlib_intel_snbep_unc.c \ + pfmlib_intel_snbep_unc_cbo.c \ + pfmlib_intel_snbep_unc_ha.c \ + pfmlib_intel_snbep_unc_imc.c \ + pfmlib_intel_snbep_unc_pcu.c \ + pfmlib_intel_snbep_unc_qpi.c \ + pfmlib_intel_snbep_unc_ubo.c \ + pfmlib_intel_snbep_unc_r2pcie.c \ + pfmlib_intel_snbep_unc_r3qpi.c \ + pfmlib_intel_ivbep_unc_cbo.c \ + pfmlib_intel_ivbep_unc_ha.c \ + pfmlib_intel_ivbep_unc_imc.c \ + pfmlib_intel_ivbep_unc_pcu.c \ + pfmlib_intel_ivbep_unc_qpi.c \ + pfmlib_intel_ivbep_unc_ubo.c \ + pfmlib_intel_ivbep_unc_r2pcie.c \ + pfmlib_intel_ivbep_unc_r3qpi.c \ + pfmlib_intel_ivbep_unc_irp.c \ + pfmlib_intel_hswep_unc_cbo.c \ + pfmlib_intel_hswep_unc_ha.c \ + pfmlib_intel_hswep_unc_imc.c \ + pfmlib_intel_hswep_unc_pcu.c \ + pfmlib_intel_hswep_unc_qpi.c \ + pfmlib_intel_hswep_unc_ubo.c \ + pfmlib_intel_hswep_unc_r2pcie.c \ + pfmlib_intel_hswep_unc_r3qpi.c \ + pfmlib_intel_hswep_unc_irp.c \ + pfmlib_intel_hswep_unc_sbo.c \ + pfmlib_intel_bdx_unc_cbo.c \ + pfmlib_intel_bdx_unc_ubo.c \ + pfmlib_intel_bdx_unc_sbo.c \ + pfmlib_intel_bdx_unc_ha.c \ + pfmlib_intel_bdx_unc_imc.c \ + pfmlib_intel_bdx_unc_irp.c \ + pfmlib_intel_bdx_unc_pcu.c \ + pfmlib_intel_bdx_unc_qpi.c \ + pfmlib_intel_bdx_unc_r2pcie.c \ + pfmlib_intel_bdx_unc_r3qpi.c \ + pfmlib_intel_skx_unc_cha.c \ + pfmlib_intel_skx_unc_iio.c \ + pfmlib_intel_skx_unc_imc.c \ + pfmlib_intel_skx_unc_irp.c \ + pfmlib_intel_skx_unc_m2m.c \ + pfmlib_intel_skx_unc_m3upi.c \ + pfmlib_intel_skx_unc_pcu.c \ + pfmlib_intel_skx_unc_ubo.c \ + pfmlib_intel_skx_unc_upi.c \ + pfmlib_intel_knc.c \ + pfmlib_intel_slm.c \ + pfmlib_intel_tmt.c \ + pfmlib_intel_knl.c \ + pfmlib_intel_knl_unc_imc.c \ + pfmlib_intel_knl_unc_edc.c \ + pfmlib_intel_knl_unc_cha.c \ + pfmlib_intel_knl_unc_m2pcie.c \ + pfmlib_intel_glm.c \ + pfmlib_intel_netburst.c \ + pfmlib_amd64_k7.c pfmlib_amd64_k8.c pfmlib_amd64_fam10h.c \ + pfmlib_amd64_fam11h.c pfmlib_amd64_fam12h.c \ + pfmlib_amd64_fam14h.c pfmlib_amd64_fam15h.c \ + pfmlib_amd64_fam17h.c pfmlib_amd64_fam16h.c \ + pfmlib_amd64_fam19h.c pfmlib_amd64_rapl.c \ + pfmlib_amd64_fam19h_l3.c + +CFLAGS += -DCONFIG_PFMLIB_ARCH_X86 + +ifeq ($(CONFIG_PFMLIB_ARCH_I386),y) +SRCS += pfmlib_intel_coreduo.c pfmlib_intel_p6.c +CFLAGS += -DCONFIG_PFMLIB_ARCH_I386 +endif + +ifeq ($(CONFIG_PFMLIB_ARCH_X86_64),y) +CFLAGS += -DCONFIG_PFMLIB_ARCH_X86_64 +endif + +endif + +ifeq ($(CONFIG_PFMLIB_ARCH_POWERPC),y) + +ifeq ($(SYS),Linux) +SRCS += pfmlib_powerpc_perf_event.c +endif + +INCARCH = $(INC_POWERPC) +SRCS += pfmlib_powerpc.c pfmlib_power4.c pfmlib_ppc970.c pfmlib_power5.c \ + pfmlib_power6.c pfmlib_power7.c pfmlib_torrent.c pfmlib_power8.c \ + pfmlib_power9.c pfmlib_powerpc_nest.c pfmlib_power10.c +CFLAGS += -DCONFIG_PFMLIB_ARCH_POWERPC +endif + +ifeq ($(CONFIG_PFMLIB_ARCH_S390X),y) + +ifeq ($(SYS),Linux) +SRCS += pfmlib_s390x_perf_event.c +endif + +INCARCH = $(INC_S390X) +SRCS += pfmlib_s390x_cpumf.c +CFLAGS += -DCONFIG_PFMLIB_ARCH_S390X +endif + +ifeq ($(CONFIG_PFMLIB_ARCH_SPARC),y) + +ifeq ($(SYS),Linux) +SRCS += pfmlib_sparc_perf_event.c +endif + +INCARCH = $(INC_SPARC) +SRCS += pfmlib_sparc.c pfmlib_sparc_ultra12.c pfmlib_sparc_ultra3.c pfmlib_sparc_ultra4.c pfmlib_sparc_niagara.c +CFLAGS += -DCONFIG_PFMLIB_ARCH_SPARC +endif + +ifeq ($(CONFIG_PFMLIB_ARCH_ARM),y) + +ifeq ($(SYS),Linux) +SRCS += pfmlib_arm_perf_event.c +endif + +INCARCH = $(INC_ARM) +SRCS += pfmlib_arm.c pfmlib_arm_armv7_pmuv1.c pfmlib_arm_armv6.c pfmlib_arm_armv8.c pfmlib_tx2_unc_perf_event.c pfmlib_kunpeng_unc_perf_event.c +CFLAGS += -DCONFIG_PFMLIB_ARCH_ARM +endif + +ifeq ($(CONFIG_PFMLIB_ARCH_ARM64),y) + +ifeq ($(SYS),Linux) +SRCS += pfmlib_arm_perf_event.c +endif + +INCARCH = $(INC_ARM64) +SRCS += pfmlib_arm.c pfmlib_arm_armv8.c pfmlib_tx2_unc_perf_event.c pfmlib_kunpeng_unc_perf_event.c +CFLAGS += -DCONFIG_PFMLIB_ARCH_ARM64 +endif + +ifeq ($(CONFIG_PFMLIB_ARCH_MIPS),y) + +ifeq ($(SYS),Linux) +SRCS += pfmlib_mips_perf_event.c +endif + +INCARCH = $(INC_MIPS) +SRCS += pfmlib_mips.c pfmlib_mips_74k.c +CFLAGS += -DCONFIG_PFMLIB_ARCH_MIPS +endif + +ifeq ($(CONFIG_PFMLIB_CELL),y) +INCARCH = $(INC_CELL) +#SRCS += pfmlib_cell.c +CFLAGS += -DCONFIG_PFMLIB_CELL +endif + +SRC_CC += $(addprefix $(LIBPFM4_DIR)/lib/,$(SRCS)) +vpath %.c $(LIBPFM4_DIR)/lib + +CC_OPT += $(CFLAGS) + +INC_DIR += $(LIBPFM4_DIR)/include $(LIBPFM4_DIR)/lib/events +vpath %.h $(INC_DIR) + +LIBS += base libm libc diff --git a/repos/libports/ports/libpfm4.hash b/repos/libports/ports/libpfm4.hash new file mode 100644 index 0000000000..6eeb6653d7 --- /dev/null +++ b/repos/libports/ports/libpfm4.hash @@ -0,0 +1 @@ +b0ec09148c2be9f4a96203a3d2de4ebed6ce2da0 diff --git a/repos/libports/ports/libpfm4.port b/repos/libports/ports/libpfm4.port new file mode 100644 index 0000000000..f0d7542ca3 --- /dev/null +++ b/repos/libports/ports/libpfm4.port @@ -0,0 +1,13 @@ +LICENSE := PD +DOWNLOADS := libpfm4.git +VERSION := git + +URL(libpfm4) := https://github.com/wcohen/libpfm4.git +REV(libpfm4) := 8aaaf1747e96031a47ed6bd9337ff61a21f8cc64 +DIR(libpfm4) := src/lib/libpfm4 + +DIRS += include +DIRS += include/perfmon + +DIR_CONTENT(include) += src/lib/libpfm4/include/perfmon +DIR_CONTENT(include/perfmon) += src/lib/libpfm4/include/perfmon/*.h \ No newline at end of file diff --git a/repos/libports/ports/mxtasking.hash b/repos/libports/ports/mxtasking.hash index c0e35b1baf..840e0ee902 100644 --- a/repos/libports/ports/mxtasking.hash +++ b/repos/libports/ports/mxtasking.hash @@ -1 +1 @@ -07a3844690ae8eb15832d93e29567a5a8e6e45af +03dc91ed3385b2a62dee0c4f20daf9b5cb29ba24 diff --git a/repos/libports/ports/mxtasking.port b/repos/libports/ports/mxtasking.port index 05ca8e5167..1d63c5d981 100644 --- a/repos/libports/ports/mxtasking.port +++ b/repos/libports/ports/mxtasking.port @@ -3,7 +3,7 @@ DOWNLOADS := mxtasking.git VERSION := git URL(mxtasking) := https://github.com/mmueller41/mxtasking.git -REV(mxtasking) := bfc90d4dcf88b7072c76d70e897cb4072f399248 +REV(mxtasking) := fcf0a2810ba69d1017d6d7d9a5d6e60ac962f9f1 DIR(mxtasking) := src/lib/mxtasking DIRS += include/mx/memory diff --git a/repos/libports/recipes/src/libpfm4/api b/repos/libports/recipes/src/libpfm4/api new file mode 100644 index 0000000000..954b4ab6ae --- /dev/null +++ b/repos/libports/recipes/src/libpfm4/api @@ -0,0 +1 @@ +libpfm4 \ No newline at end of file diff --git a/repos/libports/recipes/src/libpfm4/content.mk b/repos/libports/recipes/src/libpfm4/content.mk new file mode 100644 index 0000000000..1301d4f0ba --- /dev/null +++ b/repos/libports/recipes/src/libpfm4/content.mk @@ -0,0 +1,17 @@ +MIRROR_FROM_REP_DIR := lib/mk/libpfm4.mk lib/import/import-libpfm4.mk + +content: src/lib/libpfm4 COPYING $(MIRROR_FROM_REP_DIR) + +PORT_DIR := $(call port_dir,$(REP_DIR)/ports/libpfm4) + +src/lib/libpfm4: + mkdir -p $@ + cp -r $(PORT_DIR)/src/lib/libpfm4/* $@ + rm -rf $@/.git + echo "LIBS = libpfm4" > $@/target.mk + +$(MIRROR_FROM_REP_DIR): + $(mirror_from_rep_dir) + +LICENSE: + echo "libpfm license, see src/lib/libpfm4/COPYING" > $@ \ No newline at end of file diff --git a/repos/libports/recipes/src/libpfm4/used_api b/repos/libports/recipes/src/libpfm4/used_api new file mode 100644 index 0000000000..186e29c4c6 --- /dev/null +++ b/repos/libports/recipes/src/libpfm4/used_api @@ -0,0 +1,3 @@ +base +libm +libc \ No newline at end of file diff --git a/repos/libports/src/lib/libc/component.cc b/repos/libports/src/lib/libc/component.cc index 1adab4e67f..171778cb83 100644 --- a/repos/libports/src/lib/libc/component.cc +++ b/repos/libports/src/lib/libc/component.cc @@ -79,4 +79,4 @@ void Component::construct(Genode::Env &env) * Default stack size for libc-using components */ Genode::size_t Libc::Component::stack_size() __attribute__((weak)); -Genode::size_t Libc::Component::stack_size() { return 32UL*1024*sizeof(long); } +Genode::size_t Libc::Component::stack_size() { return 96UL*1024*sizeof(long); } diff --git a/repos/mml/run/hello_mxtask.run b/repos/mml/run/hello_mxtask.run index de73b12439..3bc51d944f 100644 --- a/repos/mml/run/hello_mxtask.run +++ b/repos/mml/run/hello_mxtask.run @@ -20,7 +20,9 @@ set config { + + @@ -38,7 +40,8 @@ append_platform_drv_config append config { - + + @@ -60,6 +63,6 @@ set boot_modules { append_platform_drv_boot_modules build_boot_image $boot_modules -append qemu_args "-nographic -m 64" +append qemu_args "-nographic" run_genode_until forever \ No newline at end of file diff --git a/repos/mml/run/hpc_test.run b/repos/mml/run/hpc_test.run new file mode 100644 index 0000000000..11e9f26d63 --- /dev/null +++ b/repos/mml/run/hpc_test.run @@ -0,0 +1,80 @@ +set build_components { + core init timer app/hpc_test +} + +source ${genode_dir}/repos/base/run/platform_drv.inc +append_platform_drv_build_components + +build $build_components + +create_boot_directory + +set config { + + + + + + + + + + + + + + + + + + + + + + + + + + +} + +append config { + + + + + 2022-07-20 14:30 + + + + + + + + + + + + 2022-07-20 14:30 + + + + + + + + +} + +install_config $config + +set boot_modules { + core init timer vfs.lib.so ld.lib.so posix.lib.so libc.lib.so libm.lib.so stdcxx.lib.so hpc_test +} + +append_platform_drv_boot_modules + +build_boot_image $boot_modules +append qemu_args "-nographic " + +run_genode_until forever \ No newline at end of file diff --git a/repos/mml/run/libpfm_test.run b/repos/mml/run/libpfm_test.run new file mode 100644 index 0000000000..12d1aec044 --- /dev/null +++ b/repos/mml/run/libpfm_test.run @@ -0,0 +1,68 @@ +set build_components { + core init timer app/libpfm_test +} + +source ${genode_dir}/repos/base/run/platform_drv.inc +append_platform_drv_build_components + +build $build_components + +create_boot_directory + +set config { + + + + + + + + + + + + + + + + + + + + + + + + + + + +} + +append config { + + + + 2022-07-20 14:30 + + + + + + + + +} + +install_config $config + +set boot_modules { + core init timer vfs.lib.so ld.lib.so posix.lib.so libc.lib.so libm.lib.so stdcxx.lib.so libpfm_test +} + +append_platform_drv_boot_modules + +build_boot_image $boot_modules +append qemu_args "-nographic " + +run_genode_until forever \ No newline at end of file diff --git a/repos/mml/run/livedemo.run b/repos/mml/run/livedemo.run new file mode 100644 index 0000000000..5e0aa96dd9 --- /dev/null +++ b/repos/mml/run/livedemo.run @@ -0,0 +1,116 @@ +set build_components { + core init timer app/blinktree +} + +build $build_components + +create_boot_directory + +set config { + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 2022-07-20 14:30 + + + + + + + + + + + + + + + + + + + + + + 2022-07-20 14:30 + + + + + + + + + + + + + + + + + + + + + + 2022-07-20 14:30 + + + + + + + + + + + + + +} + +install_config $config + +set boot_modules { + core init timer vfs.lib.so ld.lib.so libm.lib.so libc.lib.so stdcxx.lib.so posix.lib.so blinktree fill_randint_workloada mixed_randint_workloada +} + +build_boot_image $boot_modules +append qemu_args "-nographic" +run_genode_until forever \ No newline at end of file diff --git a/repos/mml/src/app/blinktree/benchmark/chronometer.h b/repos/mml/src/app/blinktree/benchmark/chronometer.h index 6c6d88a234..7b725251ed 100644 --- a/repos/mml/src/app/blinktree/benchmark/chronometer.h +++ b/repos/mml/src/app/blinktree/benchmark/chronometer.h @@ -1,8 +1,6 @@ #pragma once -#ifdef PERF_SUPPORT #include "perf.h" -#endif #include "phase.h" #include #include @@ -51,7 +49,7 @@ template class InterimResult public: InterimResult(const std::uint64_t operation_count, const P &phase, const std::uint16_t iteration, const std::uint16_t core_count, const std::chrono::milliseconds time, - /*std::vector &counter,*/ + std::vector &counter, std::unordered_map executed_tasks, std::unordered_map executed_reader_tasks, std::unordered_map executed_writer_tasks, @@ -65,12 +63,10 @@ public: _scheduled_tasks_on_core(std::move(scheduled_tasks_on_core)), _scheduled_tasks_off_core(std::move(scheduled_tasks_off_core)), _worker_fills(std::move(worker_fills)) { -#ifdef PERF_SUPPORT for (auto &c : counter) { _performance_counter.emplace_back(std::make_pair(c.name(), c.read())); } -#endif } ~InterimResult() = default; @@ -181,9 +177,7 @@ public: _current_phase = phase; _current_iteration = iteration; _core_set = core_set; -#ifdef PERF_SUPPORT _perf.start(); -#endif //_start = std::chrono::steady_clock::now(); _start = Genode::Trace::timestamp(); @@ -193,9 +187,7 @@ public: { const auto end = Genode::Trace::timestamp(); //const auto end = std::chrono::steady_clock::now(); -#ifdef PERF_SUPPORT _perf.stop(); -#endif //const auto milliseconds = std::chrono::duration_cast(end-_start); const auto milliseconds = std::chrono::milliseconds((end-_start)/2000000UL); @@ -205,7 +197,7 @@ public: _current_iteration, _core_set.size(), milliseconds, - //_perf.counter(), + _perf.counter(), statistic_map(mx::tasking::profiling::Statistic::Executed), statistic_map(mx::tasking::profiling::Statistic::ExecutedReader), statistic_map(mx::tasking::profiling::Statistic::ExecutedWriter), @@ -214,16 +206,12 @@ public: statistic_map(mx::tasking::profiling::Statistic::ScheduledOffChannel), statistic_map(mx::tasking::profiling::Statistic::Fill)}; } -#ifdef PERF_SUPPORT void add(PerfCounter &performance_counter) { _perf.add(performance_counter); } -#endif private: std::uint16_t _current_iteration{0U}; P _current_phase; mx::util::core_set _core_set; -#ifdef PERF_SUPPORT alignas(64) Perf _perf; -#endif //alignas(64) std::chrono::steady_clock::time_point _start; alignas(64) size_t _start; diff --git a/repos/mml/src/app/blinktree/benchmark/perf.cpp b/repos/mml/src/app/blinktree/benchmark/perf.cpp index 366e671854..f18167d0da 100644 --- a/repos/mml/src/app/blinktree/benchmark/perf.cpp +++ b/repos/mml/src/app/blinktree/benchmark/perf.cpp @@ -6,29 +6,27 @@ using namespace benchmark; * Counter "Instructions Retired" * Counts when the last uop of an instruction retires. */ -[[maybe_unused]] PerfCounter Perf::INSTRUCTIONS = {"instr", PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS}; +[[maybe_unused]] PerfCounter Perf::INSTRUCTIONS = {"instr", Genode::Trace::Performance_counter::Type::CORE, 0xc0, 0x0}; /** */ -[[maybe_unused]] PerfCounter Perf::CYCLES = {"cycles", PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES}; +[[maybe_unused]] PerfCounter Perf::CYCLES = {"cycles", Genode::Trace::Performance_counter::Type::CORE, 0x76, 0x0}; /** */ -[[maybe_unused]] PerfCounter Perf::L1_MISSES = {"l1-miss", PERF_TYPE_HW_CACHE, - PERF_COUNT_HW_CACHE_L1D | (PERF_COUNT_HW_CACHE_OP_READ << 8) | - (PERF_COUNT_HW_CACHE_RESULT_MISS << 16)}; +[[maybe_unused]] PerfCounter Perf::L1_MISSES = {"l1-miss", Genode::Trace::Performance_counter::Type::CORE, 0x43, 0x5b}; /** * Counter "LLC Misses" * Accesses to the LLC in which the data is not present(miss). */ -[[maybe_unused]] PerfCounter Perf::LLC_MISSES = {"llc-miss", PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES}; +[[maybe_unused]] PerfCounter Perf::LLC_MISSES = {"llc-miss", Genode::Trace::Performance_counter::Type::CACHE, 0x6, 0xff}; /** * Counter "LLC Reference" * Accesses to the LLC, in which the data is present(hit) or not present(miss) */ -[[maybe_unused]] PerfCounter Perf::LLC_REFERENCES = {"llc-ref", PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES}; +[[maybe_unused]] PerfCounter Perf::LLC_REFERENCES = {"llc-ref", Genode::Trace::Performance_counter::Type::CACHE, 0x4, 0xff}; /** * Micro architecture "Skylake" @@ -36,7 +34,7 @@ using namespace benchmark; * EventSel=A3H,UMask=14H, CMask=20 * Execution stalls while memory subsystem has an outstanding load. */ -PerfCounter Perf::STALLS_MEM_ANY = {"memory-stall", PERF_TYPE_RAW, 0x145314a3}; +//PerfCounter Perf::STALLS_MEM_ANY = {"memory-stall", PERF_TYPE_RAW, 0x145314a3}; /** * Micro architecture "Skylake" @@ -44,7 +42,7 @@ PerfCounter Perf::STALLS_MEM_ANY = {"memory-stall", PERF_TYPE_RAW, 0x145314a3}; * EventSel=32H,UMask=01H * Number of PREFETCHNTA instructions executed. */ -[[maybe_unused]] PerfCounter Perf::SW_PREFETCH_ACCESS_NTA = {"sw-prefetch-nta", PERF_TYPE_RAW, 0x530132}; +[[maybe_unused]] PerfCounter Perf::SW_PREFETCH_ACCESS_NTA = {"sw-prefetch-nta", Genode::Trace::Performance_counter::Type::CORE, 0x4b, 0x4}; /** * Micro architecture "Skylake" @@ -52,7 +50,7 @@ PerfCounter Perf::STALLS_MEM_ANY = {"memory-stall", PERF_TYPE_RAW, 0x145314a3}; * EventSel=32H,UMask=02H * Number of PREFETCHT0 instructions executed. */ -[[maybe_unused]] PerfCounter Perf::SW_PREFETCH_ACCESS_T0 = {"sw-prefetch-t0", PERF_TYPE_RAW, 0x530232}; +//[[maybe_unused]] PerfCounter Perf::SW_PREFETCH_ACCESS_T0 = {"sw-prefetch-t0", Genode::Trace::Performance_counter::Type::CORE, 0x4b, }; /** * Micro architecture "Skylake" @@ -60,7 +58,7 @@ PerfCounter Perf::STALLS_MEM_ANY = {"memory-stall", PERF_TYPE_RAW, 0x145314a3}; * EventSel=32H,UMask=04H * Number of PREFETCHT1 or PREFETCHT2 instructions executed. */ -[[maybe_unused]] PerfCounter Perf::SW_PREFETCH_ACCESS_T1_T2 = {"sw-prefetch-t1t2", PERF_TYPE_RAW, 0x530432}; +//[[maybe_unused]] PerfCounter Perf::SW_PREFETCH_ACCESS_T1_T2 = {"sw-prefetch-t1t2", PERF_TYPE_RAW, 0x530432}; /** * Micro architecture "Skylake" @@ -68,4 +66,4 @@ PerfCounter Perf::STALLS_MEM_ANY = {"memory-stall", PERF_TYPE_RAW, 0x145314a3}; * EventSel=32H,UMask=08H * Number of PREFETCHW instructions executed. */ -[[maybe_unused]] PerfCounter Perf::SW_PREFETCH_ACCESS_WRITE = {"sw-prefetch-w", PERF_TYPE_RAW, 0x530832}; \ No newline at end of file +[[maybe_unused]] PerfCounter Perf::SW_PREFETCH_ACCESS_WRITE = {"sw-prefetch-w", Genode::Trace::Performance_counter::Type::CORE, 0x4b, 0x2}; \ No newline at end of file diff --git a/repos/mml/src/app/blinktree/benchmark/perf.h b/repos/mml/src/app/blinktree/benchmark/perf.h index 544a675fad..2a2ae39c00 100644 --- a/repos/mml/src/app/blinktree/benchmark/perf.h +++ b/repos/mml/src/app/blinktree/benchmark/perf.h @@ -1,12 +1,11 @@ #pragma once #include -#include #include -#include // TODO: Find Genode equivalent +#include #include -#include -#include #include +#include + /* * For more Performance Counter take a look into the Manual from Intel: @@ -28,46 +27,62 @@ namespace benchmark { class PerfCounter { public: - PerfCounter(std::string &&name, const std::uint64_t type, const std::uint64_t event_id) : _name(std::move(name)) + PerfCounter(std::string &&name, const Genode::Trace::Performance_counter::Type type, const std::uint64_t event_id, const std::uint64_t mask) : _name(std::move(name)), _type(type), _event_id(static_cast(event_id)), _mask(static_cast(mask)) { - /*std::memset(&_perf_event_attribute, 0, sizeof(perf_event_attr)); - _perf_event_attribute.type = type; - _perf_event_attribute.size = sizeof(perf_event_attr); - _perf_event_attribute.config = event_id; - _perf_event_attribute.disabled = true; - _perf_event_attribute.inherit = 1; - _perf_event_attribute.exclude_kernel = false; - _perf_event_attribute.exclude_hv = false; - _perf_event_attribute.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING;*/ + } ~PerfCounter() = default; bool open() { - /*_file_descriptor = syscall(__NR_perf_event_open, &_perf_event_attribute, 0, -1, -1, 0);*/ - return _file_descriptor >= 0; + try { + _counter = Genode::Trace::Performance_counter::acquire(_type); + } catch (Genode::Trace::Pfc_no_avail) { + std::cerr << "Failed to open performance counters." << std::endl; + } + + try { + Genode::Trace::Performance_counter::setup(_counter, _event_id, _mask, (_type == Genode::Trace::Performance_counter::Type::CORE ? 0x30000 : 0x550f000000000000)); + } catch (Genode::Trace::Pfc_access_error &e) { + std::cerr << "Error while setting up performance counter: " << e.error_code() << std::endl; + } + + return _counter >= 0; } bool start() { - //ioctl(_file_descriptor, PERF_EVENT_IOC_RESET, 0); - //ioctl(_file_descriptor, PERF_EVENT_IOC_ENABLE, 0); - return ::read(_file_descriptor, &_prev, sizeof(read_format)) == sizeof(read_format); + try { + Genode::Trace::Performance_counter::start(_counter); + _prev.value = static_cast(Genode::Trace::Performance_counter::read(_counter)); + } + catch (Genode::Trace::Pfc_access_error &e) + { + std::cerr << "Failed to start counter: " << e.error_code() << std::endl; + } + return _prev.value >= 0; } bool stop() { - //const auto is_read = ::read(_file_descriptor, &_data, sizeof(read_format)) == sizeof(read_format); - //ioctl(_file_descriptor, PERF_EVENT_IOC_DISABLE, 0); - return false; // is_read; + try { + _data.value = Genode::Trace::Performance_counter::read(_counter); + Genode::Trace::Performance_counter::stop(_counter); + Genode::Trace::Performance_counter::reset(_counter); + } + catch (Genode::Trace::Pfc_access_error &e) + { + std::cerr << "Failed to stop counter: " << e.error_code() << std::endl; + } + // const auto is_read = ::read(_file_descriptor, &_data, sizeof(read_format)) == sizeof(read_format); + // ioctl(_file_descriptor, PERF_EVENT_IOC_DISABLE, 0); + return _data.value >= 0; // is_read; } [[nodiscard]] double read() const { - const auto multiplexing_correction = static_cast(_data.time_enabled - _prev.time_enabled) / - static_cast(_data.time_running - _prev.time_running); - return static_cast(_data.value - _prev.value) * multiplexing_correction; + return static_cast(_data.value - _prev.value); } [[nodiscard]] const std::string &name() const { return _name; } @@ -84,8 +99,10 @@ private: }; const std::string _name; - std::int32_t _file_descriptor = -1; - //perf_event_attr _perf_event_attribute{}; + Genode::Trace::Performance_counter::Type _type; + Genode::uint64_t _event_id; + Genode::uint64_t _mask; + Genode::Trace::Performance_counter::Counter _counter; read_format _prev{}; read_format _data{}; }; @@ -101,11 +118,11 @@ public: [[maybe_unused]] static PerfCounter L1_MISSES; [[maybe_unused]] [[maybe_unused]] static PerfCounter LLC_MISSES; [[maybe_unused]] static PerfCounter LLC_REFERENCES; - [[maybe_unused]] static PerfCounter STALLED_CYCLES_BACKEND; - [[maybe_unused]] static PerfCounter STALLS_MEM_ANY; + //[[maybe_unused]] static PerfCounter STALLED_CYCLES_BACKEND; + //[[maybe_unused]] static PerfCounter STALLS_MEM_ANY; [[maybe_unused]] static PerfCounter SW_PREFETCH_ACCESS_NTA; - [[maybe_unused]] static PerfCounter SW_PREFETCH_ACCESS_T0; - [[maybe_unused]] static PerfCounter SW_PREFETCH_ACCESS_T1_T2; + //[[maybe_unused]] static PerfCounter SW_PREFETCH_ACCESS_T0; + //[[maybe_unused]] static PerfCounter SW_PREFETCH_ACCESS_T1_T2; [[maybe_unused]] static PerfCounter SW_PREFETCH_ACCESS_WRITE; Perf() noexcept = default; diff --git a/repos/mml/src/app/blinktree/blinktree_benchmark/benchmark.cpp b/repos/mml/src/app/blinktree/blinktree_benchmark/benchmark.cpp index d0c124e05e..80c4ef7c7e 100644 --- a/repos/mml/src/app/blinktree/blinktree_benchmark/benchmark.cpp +++ b/repos/mml/src/app/blinktree/blinktree_benchmark/benchmark.cpp @@ -22,16 +22,14 @@ Benchmark::Benchmark(Libc::Env &env, benchmark::Cores &&cores, const std::uint16 _result_file_name(std::move(result_file_name)), _statistic_file_name(std::move(statistic_file_name)), _tree_file_name(std::move(tree_file_name)), _profile(profile), _workload(env) { -#ifdef PERF_SUPPORT if (use_performance_counter) { this->_chronometer.add(benchmark::Perf::CYCLES); this->_chronometer.add(benchmark::Perf::INSTRUCTIONS); - this->_chronometer.add(benchmark::Perf::STALLS_MEM_ANY); + //this->_chronometer.add(benchmark::Perf::STALLS_MEM_ANY); this->_chronometer.add(benchmark::Perf::SW_PREFETCH_ACCESS_NTA); this->_chronometer.add(benchmark::Perf::SW_PREFETCH_ACCESS_WRITE); } -#endif std::cout << "core configuration: \n" << this->_cores.dump(2) << std::endl; this->_workload.build(fill_workload_file, mixed_workload_file); @@ -117,7 +115,18 @@ void Benchmark::requests_finished() if (open_requests == 0U) // All request schedulers are done. { + std::uint16_t core_id = mx::system::topology::core_id(); + if (core_id != 0) + { + this->_open_requests++; + auto *stop_task = mx::tasking::runtime::new_task(0U, *this); + stop_task->annotate(static_cast(0)); + mx::tasking::runtime::spawn(*stop_task, core_id); + return; + } + // Stop and print time (and performance counter). + //Genode::log("Stopping timer"); const auto result = this->_chronometer.stop(this->_workload.size()); mx::tasking::runtime::stop(); @@ -126,7 +135,7 @@ void Benchmark::requests_finished() //std::cout << result << std::endl; //if (mx::system::topology::core_id() == 0) //std::cout << result << "\t " << (_end - _start) << " cycles" << std::endl; - std::cout << result.to_json().dump() << std::endl; + std::cout << "core: " << mx::system::topology::core_id() << result.to_json().dump() << std::endl; // std::cout << result << std::endl; diff --git a/repos/mml/src/app/blinktree/blinktree_benchmark/benchmark.h b/repos/mml/src/app/blinktree/blinktree_benchmark/benchmark.h index 44f1cf9454..67cf8157da 100644 --- a/repos/mml/src/app/blinktree/blinktree_benchmark/benchmark.h +++ b/repos/mml/src/app/blinktree/blinktree_benchmark/benchmark.h @@ -110,6 +110,7 @@ private: [[nodiscard]] std::string profile_file_name() const; friend class StartMeasurementTask; + friend class StopMeasurementTask; }; class StartMeasurementTask : public mx::tasking::TaskInterface @@ -123,9 +124,26 @@ class StartMeasurementTask : public mx::tasking::TaskInterface mx::tasking::TaskResult execute(const std::uint16_t core_id, const std::uint16_t channel_id) override { + //Genode::log("Starting timer"); _benchmark._chronometer.start(static_cast(static_cast(_benchmark._workload)), _benchmark._current_iteration + 1, _benchmark._cores.current()); //_benchmark._start = Genode::Trace::timestamp(); return mx::tasking::TaskResult::make_remove(); } }; + +class StopMeasurementTask : public mx::tasking::TaskInterface +{ + private: + Benchmark &_benchmark; + + public: + constexpr StopMeasurementTask(Benchmark& benchmark) : _benchmark(benchmark) {} + ~StopMeasurementTask() override = default; + + mx::tasking::TaskResult execute(const std::uint16_t core_id, const std::uint16_t channel_id) override + { + _benchmark.requests_finished(); + return mx::tasking::TaskResult::make_remove(); + } +}; } // namespace application::blinktree_benchmark \ No newline at end of file diff --git a/repos/mml/src/app/blinktree/blinktree_benchmark/main.cpp b/repos/mml/src/app/blinktree/blinktree_benchmark/main.cpp index fe2af33288..de111a6bb6 100644 --- a/repos/mml/src/app/blinktree/blinktree_benchmark/main.cpp +++ b/repos/mml/src/app/blinktree/blinktree_benchmark/main.cpp @@ -9,6 +9,7 @@ #include #include #include +#include using namespace application::blinktree_benchmark; @@ -202,13 +203,13 @@ void Libc::Component::construct(Libc::Env &env) { std::uint16_t cores = env.cpu().affinity_space().total(); char cores_arg[10]; - snprintf(cores_arg, 9, "1:%d", cores); + sprintf(cores_arg, "%d", cores); - char *args[] = {"blinktree_benchmark", "-i", "4", "-pd", "3", cores_arg}; + char *args[] = {"blinktree_benchmark", "-i", "4", "-pd", "3", "-p", cores_arg}; Libc::with_libc([&]() { std::cout << "Starting B-link tree benchmark" << std::endl; - bt_main(env, 6, args); + bt_main(env, 7, args); }); } diff --git a/repos/mml/src/app/blinktree/target.mk b/repos/mml/src/app/blinktree/target.mk index 06ea86154a..6e4c070b3e 100644 --- a/repos/mml/src/app/blinktree/target.mk +++ b/repos/mml/src/app/blinktree/target.mk @@ -1,4 +1,5 @@ MXINC_DIR=$(REP_DIR)/src/app/blinktree +GENODE_GCC_TOOLCHAIN_DIR ?= /usr/local/genode/tool/21.05 TARGET = blinktree # soure file for benchmark framework @@ -6,11 +7,18 @@ SRC_MXBENCH = benchmark/workload_set.cpp SRC_MXBENCH += benchmark/workload.cpp SRC_MXBENCH += benchmark/cores.cpp SRC_MXBENCH += benchmark/string_util.cpp +SRC_MXBENCH += benchmark/perf.cpp # source files for blinktree benchmark SRC_BTREE += blinktree_benchmark/main.cpp SRC_BTREE += blinktree_benchmark/benchmark.cpp SRC_CC = ${SRC_MXBENCH} ${SRC_BTREE} -LIBS += base libc stdcxx mxtasking -CC_OPT += -Wno-error -fno-aligned-new -I$(MXINC_DIR) +LIBS += base libc stdcxx mxtasking +EXT_OBJECTS += /usr/local/genode/tool/lib/clang/14.0.5/lib/linux/libclang_rt.builtins-x86_64.a /usr/local/genode/tool/lib/libatomic.a +CUSTOM_CC = /usr/local/genode/tool/bin/clang +CUSTOM_CXX = /usr/local/genode/tool/bin/clang++ +CC_OPT += --target=x86_64-genode --sysroot=/does/not/exist --gcc-toolchain=$(GENODE_GCC_TOOLCHAIN_DIR) -Wno-error -O2 -g -fno-aligned-new -DNDEBUG -I$(MXINC_DIR) -std=c++17 #-D_GLIBCXX_ATOMIC_BUILTINS_8 -D__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 +CC_OPT += -femulated-tls -DCLANG_CXX11_ATOMICS CC_CXX_WARN_STRICT = +CUSTOM_CXX_LIB := $(CROSS_DEV_PREFIX)g++ +#CXX_LD += $(CROSS_DEV_PREFIX)g++ diff --git a/repos/mml/src/app/hpc_test/main.cc b/repos/mml/src/app/hpc_test/main.cc new file mode 100644 index 0000000000..c23f10d69e --- /dev/null +++ b/repos/mml/src/app/hpc_test/main.cc @@ -0,0 +1,89 @@ +/** + * @file main.cc + * @author Michael Müller (michael.mueller@uos.de) + * @brief Some test for programing hardware performance counters in NOVA + * @version 0.1 + * @date 2022-12-14 + * + * @copyright Copyright (c) 2022 + * + */ + +#include +#include + +#include +#include +#include +#include + +int main(void) +{ + Nova::mword_t event = 0x26; + Nova::mword_t mask = 0x00; + Nova::mword_t flags = 0x70000; + Nova::uint8_t rc; + + if ((rc = Nova::hpc_ctrl(Nova::HPC_SETUP, 0, 1, event, mask, flags)) != Nova::NOVA_OK) { + std::cerr << "Failed to setup performance counter 0" << std::endl; + return -1; + } + + std::cout << "Counter 0 setup" << std::endl; + event = 0x60; + mask = 0xfe; + if ((rc = Nova::hpc_ctrl(Nova::HPC_SETUP, 1, 1, event, mask, flags)) != Nova::NOVA_OK) + { + std::cerr << "Failed to setup performance counter 1, rc = " << static_cast(rc) << std::endl; + return -1; + } + + event = 0x62; + mask = 0x1; + if ((rc = Nova::hpc_ctrl(Nova::HPC_SETUP, 2, 1, event, mask, flags)) != Nova::NOVA_OK) + { + std::cerr << "Failed to setup performance counter 2, rc = " << static_cast(rc) << std::endl; + return -1; + } + if ((rc = Nova::hpc_start(0, 1)) != Nova::NOVA_OK) { + std::cerr << "Failed to start counter 0" << std::endl; + return -2; + } + + if ((rc = Nova::hpc_start(1, 1)) != Nova::NOVA_OK) { + std::cerr << "Failed to start counter 0" << std::endl; + return -2; + } + + if ((rc = Nova::hpc_start(2, 1)) != Nova::NOVA_OK) { + std::cerr << "Failed to start counter 0" << std::endl; + return -2; + } + + for (;;) { + std::this_thread::sleep_for(std::chrono::milliseconds(2000)); + Nova::mword_t count = 0; + + _mm_clflush(&count); + if ((rc = Nova::hpc_read(0, 1, count)) != Nova::NOVA_OK) + { + std::cerr << "Failed to read counter 0" << std::endl; + } + std::cout << count << " cache line flushes" << std::endl; + + Nova::mword_t latency = 0; + if ((rc = Nova::hpc_read(2, 1, latency)) != Nova::NOVA_OK) + { + std::cerr << "Failed to read counter 1" << std::endl; + } + Nova::mword_t l2_requests = 0; + if ((rc = Nova::hpc_read(1, 1, l2_requests)) != Nova::NOVA_OK) + { + std::cerr << "Failed to read counter 1" << std::endl; + } + count = (latency * 4) / l2_requests; + std::cout << "L2 latency:" << count << " cycles" << std::endl; + } + + return 0; +} diff --git a/repos/mml/src/app/hpc_test/target.mk b/repos/mml/src/app/hpc_test/target.mk new file mode 100644 index 0000000000..0d72ae45a4 --- /dev/null +++ b/repos/mml/src/app/hpc_test/target.mk @@ -0,0 +1,5 @@ +TARGET = hpc_test +SRC_CC = trace_pfc.cc +LIBS += base posix libm libc stdcxx +CC_OPT += -Wno-error -Wno-permissive -fpermissive -Wno-error=conversion + diff --git a/repos/mml/src/app/hpc_test/trace_pfc.cc b/repos/mml/src/app/hpc_test/trace_pfc.cc new file mode 100644 index 0000000000..15fa27beb0 --- /dev/null +++ b/repos/mml/src/app/hpc_test/trace_pfc.cc @@ -0,0 +1,105 @@ +/** + * @file trace_pfc.cc + * @author Michael Müller (michael.mueller@uos.de) + * @brief Tests for Genode wrappers around Performance counter syscalls in NOVA + * @version 0.1 + * @date 2022-12-15 + * + * @copyright Copyright (c) 2022 + * + */ + +#include + +#include +#include +#include +#include + +using namespace Genode; + +int main(void) +{ + Trace::Performance_counter::Counter ctr_clflush, ctr_l2_latency, ctr_l2_requests, /*ctr_l3_miss,*/ ctr_l2_prefetch; + + try { + ctr_clflush = Trace::Performance_counter::alloc_core(); + ctr_l2_latency = Trace::Performance_counter::alloc_core(); + ctr_l2_requests = Trace::Performance_counter::alloc_core(); + ctr_l2_prefetch = Trace::Performance_counter::acquire(Trace::Performance_counter::Type::CORE); + // ctr_l3_miss = Trace::Performance_counter::alloc_cbo(); + } + catch (Trace::Pfc_no_avail) + { + std::cout << "Unable to allocate performance counters." << std::endl; + return -1; + } + + std::cout << "Performance counter allocation successful." << std::endl; + + try { + Trace::Performance_counter::setup(ctr_clflush, 0x26, 0x00, 0x70000); + Trace::Performance_counter::setup(ctr_l2_latency, 0x62, 0x01, 0x30000); + Trace::Performance_counter::setup(ctr_l2_requests, 0x60, 0xfe, 0x30000); + Trace::Performance_counter::setup(ctr_l2_prefetch, 0xc0, 0x00, 0x30000); + //Trace::Performance_counter::setup(ctr_l3_miss, 0x6, 0xff, 0x550f000000000000); + } catch (Trace::Pfc_access_error &e) { + std::cerr << "PFC access failed. rc=" << e.error_code() << std::endl; + return -1; + } + + std::cout << "Performance counters successfully set up." << std::endl; + + try { + Trace::Performance_counter::start(ctr_clflush); + Trace::Performance_counter::start(ctr_l2_latency); + Trace::Performance_counter::start(ctr_l2_requests); + Trace::Performance_counter::start(ctr_l2_prefetch); + //Trace::Performance_counter::start(ctr_l3_miss); + } catch (Trace::Pfc_access_error &e) { + std::cerr << "PFC access failed. rc=" << e.error_code() << std::endl; + return -1; + } + + std::cout << "Performance counters started." << std::endl; + + for (;;) { + Genode::uint64_t clflushes, latency, requests, /*l3_misses,*/ l2_prefetches; + clflushes = latency = requests = l2_prefetches = 0; + + std::this_thread::sleep_for(std::chrono::seconds(2)); + _mm_clflush(&clflushes); + _mm_clflush(&clflushes); + + try { + clflushes = Trace::Performance_counter::read(ctr_clflush); + latency = Trace::Performance_counter::read(ctr_l2_latency); + requests = Trace::Performance_counter::read(ctr_l2_requests); + l2_prefetches = Trace::Performance_counter::read(ctr_l2_prefetch); + //l3_misses = Trace::Performance_counter::read(ctr_l3_miss); + } catch (Trace::Pfc_access_error &e) { + std::cerr << "PFC access failed. rc=" << e.error_code() << std::endl; + return 1; + } + + std::cout << clflushes << " cache line flushes." << std::endl; + //std::cout << "L2 latency: " << (latency * 4) / requests << " cycles." << std::endl; + std::cout << l2_prefetches << " L2 prefetch requests." << std::endl; + /* + try { + Trace::Performance_counter::stop(ctr_l2_prefetch); + Trace::Performance_counter::reset(ctr_l2_prefetch, 0xdeadbeef); + Trace::Performance_counter::start(ctr_l2_prefetch); + std::cout << Trace::Performance_counter::read(ctr_l2_prefetch) << " L2 prefetches after context-switch" << std::endl; + Trace::Performance_counter::stop(ctr_l2_prefetch); + Trace::Performance_counter::reset(ctr_l2_prefetch, l2_prefetches); + Trace::Performance_counter::start(ctr_l2_prefetch); + } catch (Trace::Pfc_access_error &e) { + std::cerr << "PFC access failed. rc=" << e.error_code() << std::endl; + } +*/ + // std::cout << l3_misses << " L3 misses" << std::endl; + } + + return 0; +} \ No newline at end of file diff --git a/repos/mml/src/app/libpfm_test/check_events.c b/repos/mml/src/app/libpfm_test/check_events.c new file mode 100644 index 0000000000..9edaebd7e1 --- /dev/null +++ b/repos/mml/src/app/libpfm_test/check_events.c @@ -0,0 +1,174 @@ +/* + * check_events.c - show event encoding + * + * Copyright (c) 2009 Google, Inc + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux. + */ +#include +#include +#include +#include +#include +#include +#include + +#include + +int pmu_is_present(pfm_pmu_t p) +{ + pfm_pmu_info_t pinfo; + int ret; + + memset(&pinfo, 0, sizeof(pinfo)); + ret = pfm_get_pmu_info(p, &pinfo); + return ret == PFM_SUCCESS ? pinfo.is_present : 0; +} + +int main(int argc, const char **argv) +{ + pfm_pmu_info_t pinfo; + pfm_pmu_encode_arg_t e; + const char *arg[3]; + const char **p; + char *fqstr; + pfm_event_info_t info; + int j, ret; + pfm_pmu_t i; + int total_supported_events = 0; + int total_available_events = 0; + + unsigned long low, high, msr; + msr = 0xc0010200; + + asm volatile("rdmsr" + : "=a"(low), "=d"(high) + : "c"(msr)); /* + * Initialize pfm library (required before we can use it) + */ + ret = pfm_initialize(); + if (ret != PFM_SUCCESS) + errx(1, "cannot initialize library: %s\n", pfm_strerror(ret)); + + memset(&pinfo, 0, sizeof(pinfo)); + memset(&info, 0, sizeof(info)); + + printf("Supported PMU models:\n"); + for (i = PFM_PMU_NONE; i < PFM_PMU_MAX; i++) + { + ret = pfm_get_pmu_info(i, &pinfo); + if (ret != PFM_SUCCESS) + continue; + + printf("\t[%d, %s, \"%s\"]\n", i, pinfo.name, pinfo.desc); + } + + printf("Detected PMU models:\n"); + for (i = PFM_PMU_NONE; i < PFM_PMU_MAX; i++) + { + ret = pfm_get_pmu_info(i, &pinfo); + if (ret != PFM_SUCCESS) + continue; + if (pinfo.is_present) + { + printf("\t[%d, %s, \"%s\"]\n", i, pinfo.name, pinfo.desc); + total_supported_events += pinfo.nevents; + } + total_available_events += pinfo.nevents; + } + + printf("Total events: %d available, %d supported\n", total_available_events, total_supported_events); + + /* + * be nice to user! + */ + if (argc < 2 && pmu_is_present(PFM_PMU_PERF_EVENT)) + { + arg[0] = "PERF_COUNT_HW_CPU_CYCLES"; + arg[1] = "PERF_COUNT_HW_INSTRUCTIONS"; + arg[2] = NULL; + p = arg; + } + else + { + p = argv + 1; + } + + if (!*p) + errx(1, "you must pass at least one event"); + + memset(&e, 0, sizeof(e)); + while (*p) + { + /* + * extract raw event encoding + * + * For perf_event encoding, use + * #include + * and the function: + * pfm_get_perf_event_encoding() + */ + fqstr = NULL; + e.fstr = &fqstr; + ret = pfm_get_os_event_encoding(*p, PFM_PLM0 | PFM_PLM3, PFM_OS_NONE, &e); + if (ret != PFM_SUCCESS) + { + /* + * codes is too small for this event + * free and let the library resize + */ + if (ret == PFM_ERR_TOOSMALL) + { + free(e.codes); + e.codes = NULL; + e.count = 0; + free(fqstr); + continue; + } + if (ret == PFM_ERR_NOTFOUND && strstr(*p, "::")) + errx(1, "%s: try setting LIBPFM_ENCODE_INACTIVE=1", pfm_strerror(ret)); + errx(1, "cannot encode event %s: %s", *p, pfm_strerror(ret)); + } + ret = pfm_get_event_info(e.idx, PFM_OS_NONE, &info); + if (ret != PFM_SUCCESS) + errx(1, "cannot get event info: %s", pfm_strerror(ret)); + + ret = pfm_get_pmu_info(info.pmu, &pinfo); + if (ret != PFM_SUCCESS) + errx(1, "cannot get PMU info: %s", pfm_strerror(ret)); + + printf("Requested Event: %s\n", *p); + printf("Actual Event: %s\n", fqstr); + printf("PMU : %s\n", pinfo.desc); + printf("IDX : %d\n", e.idx); + printf("Codes :"); + for (j = 0; j < e.count; j++) + printf(" 0x%" PRIx64, e.codes[j]); + putchar('\n'); + + free(fqstr); + p++; + } + if (e.codes) + free(e.codes); + return 0; +} \ No newline at end of file diff --git a/repos/mml/src/app/libpfm_test/showevtinfo.c b/repos/mml/src/app/libpfm_test/showevtinfo.c new file mode 100644 index 0000000000..3c775c8da0 --- /dev/null +++ b/repos/mml/src/app/libpfm_test/showevtinfo.c @@ -0,0 +1,1020 @@ +/* + * showevtinfo.c - show event information + * + * Copyright (c) 2010 Google, Inc + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define MAXBUF 1024 +#define COMBO_MAX 18 + +static struct +{ + int compact; + int sort; + uint8_t encode; + uint8_t combo; + uint8_t combo_lim; + uint8_t name_only; + uint8_t desc; + char *csv_sep; + pfm_event_info_t efilter; + pfm_event_attr_info_t ufilter; + pfm_os_t os; + uint64_t mask; +} options; + +typedef struct +{ + uint64_t code; + int idx; +} code_info_t; + +static void show_event_info_compact(pfm_event_info_t *info); + +static const char *srcs[PFM_ATTR_CTRL_MAX] = { + [PFM_ATTR_CTRL_UNKNOWN] = "???", + [PFM_ATTR_CTRL_PMU] = "PMU", + [PFM_ATTR_CTRL_PERF_EVENT] = "perf_event", +}; + +#ifdef PFMLIB_WINDOWS +int set_env_var(const char *var, const char *value, int ov) +{ + size_t len; + char *str; + int ret; + + len = strlen(var) + 1 + strlen(value) + 1; + + str = malloc(len); + if (!str) + return PFM_ERR_NOMEM; + + sprintf(str, "%s=%s", var, value); + + ret = putenv(str); + + free(str); + + return ret ? PFM_ERR_INVAL : PFM_SUCCESS; +} +#else +static inline int +set_env_var(const char *var, const char *value, int ov) +{ + return setenv(var, value, ov); +} +#endif + +static int +event_has_pname(char *s) +{ + char *p; + return (p = strchr(s, ':')) && *(p + 1) == ':'; +} + +static int +print_codes(char *buf, int plm, int max_encoding) +{ + uint64_t *codes = NULL; + int j, ret, count = 0; + + ret = pfm_get_event_encoding(buf, PFM_PLM0 | PFM_PLM3, NULL, NULL, &codes, &count); + if (ret != PFM_SUCCESS) + { + if (ret == PFM_ERR_NOTFOUND) + errx(1, "encoding failed, try setting env variable LIBPFM_ENCODE_INACTIVE=1"); + return -1; + } + for (j = 0; j < max_encoding; j++) + { + if (j < count) + printf("0x%" PRIx64, codes[j]); + printf("%s", options.csv_sep); + } + free(codes); + return 0; +} + +static int +check_valid(char *buf, int plm) +{ + uint64_t *codes = NULL; + int ret, count = 0; + + ret = pfm_get_event_encoding(buf, PFM_PLM0 | PFM_PLM3, NULL, NULL, &codes, &count); + if (ret != PFM_SUCCESS) + return -1; + free(codes); + return 0; +} + +static int +match_ufilters(pfm_event_attr_info_t *info) +{ + uint32_t ufilter1 = 0; + uint32_t ufilter2 = 0; + + if (options.ufilter.is_dfl) + ufilter1 |= 0x1; + + if (info->is_dfl) + ufilter2 |= 0x1; + + if (options.ufilter.is_precise) + ufilter1 |= 0x2; + + if (info->is_precise) + ufilter2 |= 0x2; + + if (!ufilter1) + return 1; + + /* at least one filter matches */ + return ufilter1 & ufilter2; +} + +static int +match_efilters(pfm_event_info_t *info) +{ + pfm_event_attr_info_t ainfo; + int n = 0; + int i, ret; + + if (options.efilter.is_precise && !info->is_precise) + return 0; + + memset(&ainfo, 0, sizeof(ainfo)); + ainfo.size = sizeof(ainfo); + + pfm_for_each_event_attr(i, info) + { + ret = pfm_get_event_attr_info(info->idx, i, options.os, &ainfo); + if (ret != PFM_SUCCESS) + continue; + if (match_ufilters(&ainfo)) + return 1; + if (ainfo.type == PFM_ATTR_UMASK) + n++; + } + return n ? 0 : 1; +} + +static void +show_event_info_combo(pfm_event_info_t *info) +{ + pfm_event_attr_info_t *ainfo; + pfm_pmu_info_t pinfo; + char buf[MAXBUF]; + size_t len; + int numasks = 0; + int i, j, ret; + uint64_t total, m, u; + + memset(&pinfo, 0, sizeof(pinfo)); + + pinfo.size = sizeof(pinfo); + + ret = pfm_get_pmu_info(info->pmu, &pinfo); + if (ret != PFM_SUCCESS) + errx(1, "cannot get PMU info"); + + ainfo = calloc(info->nattrs, sizeof(*ainfo)); + if (!ainfo) + err(1, "event %s : ", info->name); + + /* + * extract attribute information and count number + * of umasks + * + * we cannot just drop non umasks because we need + * to keep attributes in order for the enumeration + * of 2^n + */ + pfm_for_each_event_attr(i, info) + { + ainfo[i].size = sizeof(*ainfo); + + ret = pfm_get_event_attr_info(info->idx, i, options.os, &ainfo[i]); + if (ret != PFM_SUCCESS) + errx(1, "cannot get attribute info: %s", pfm_strerror(ret)); + + if (ainfo[i].type == PFM_ATTR_UMASK) + numasks++; + } + if (numasks > options.combo_lim) + { + warnx("event %s has too many umasks to print all combinations, dropping to simple enumeration", info->name); + free(ainfo); + show_event_info_compact(info); + return; + } + + if (numasks) + { + if (info->nattrs > (int)((sizeof(total) << 3))) + { + warnx("too many umasks, cannot show all combinations for event %s", info->name); + goto end; + } + total = 1ULL << info->nattrs; + + for (u = 1; u < total; u++) + { + len = sizeof(buf); + len -= snprintf(buf, len, "%s::%s", pinfo.name, info->name); + if (len <= 0) + { + warnx("event name too long%s", info->name); + goto end; + } + for (m = u, j = 0; m; m >>= 1, j++) + { + if (m & 0x1ULL) + { + /* we have hit a non umasks attribute, skip */ + if (ainfo[j].type != PFM_ATTR_UMASK) + break; + + if (len < (1 + strlen(ainfo[j].name))) + { + warnx("umasks combination too long for event %s", buf); + break; + } + strncat(buf, ":", len - 1); + buf[len - 1] = '\0'; + len--; + strncat(buf, ainfo[j].name, len - 1); + buf[len - 1] = '\0'; + len -= strlen(ainfo[j].name); + } + } + /* if found a valid umask combination, check encoding */ + if (m == 0) + { + if (options.encode) + ret = print_codes(buf, PFM_PLM0 | PFM_PLM3, pinfo.max_encoding); + else + ret = check_valid(buf, PFM_PLM0 | PFM_PLM3); + if (!ret) + printf("%s\n", buf); + } + } + } + else + { + snprintf(buf, sizeof(buf) - 1, "%s::%s", pinfo.name, info->name); + buf[sizeof(buf) - 1] = '\0'; + + ret = options.encode ? print_codes(buf, PFM_PLM0 | PFM_PLM3, pinfo.max_encoding) : 0; + if (!ret) + printf("%s\n", buf); + } +end: + free(ainfo); +} + +static void +show_event_info_compact(pfm_event_info_t *info) +{ + pfm_event_attr_info_t ainfo; + pfm_pmu_info_t pinfo; + char buf[MAXBUF]; + int i, ret, um = 0; + + memset(&ainfo, 0, sizeof(ainfo)); + memset(&pinfo, 0, sizeof(pinfo)); + + pinfo.size = sizeof(pinfo); + ainfo.size = sizeof(ainfo); + + ret = pfm_get_pmu_info(info->pmu, &pinfo); + if (ret != PFM_SUCCESS) + errx(1, "cannot get pmu info: %s", pfm_strerror(ret)); + + if (options.name_only) + { + if (options.encode) + printf("0x%-10" PRIx64, info->code); + printf("%s\n", info->name); + return; + } + pfm_for_each_event_attr(i, info) + { + ret = pfm_get_event_attr_info(info->idx, i, options.os, &ainfo); + if (ret != PFM_SUCCESS) + errx(1, "cannot get attribute info: %s", pfm_strerror(ret)); + + if (ainfo.type != PFM_ATTR_UMASK) + continue; + + if (!match_ufilters(&ainfo)) + continue; + + snprintf(buf, sizeof(buf) - 1, "%s::%s:%s", pinfo.name, info->name, ainfo.name); + buf[sizeof(buf) - 1] = '\0'; + + ret = 0; + if (options.encode) + { + ret = print_codes(buf, PFM_PLM0 | PFM_PLM3, pinfo.max_encoding); + } + if (!ret) + { + printf("%s", buf); + if (options.desc) + { + printf("%s", options.csv_sep); + printf("\"%s. %s.\"", info->desc, ainfo.desc); + } + putchar('\n'); + } + um++; + } + if (um == 0) + { + if (!match_efilters(info)) + return; + + snprintf(buf, sizeof(buf) - 1, "%s::%s", pinfo.name, info->name); + buf[sizeof(buf) - 1] = '\0'; + if (options.encode) + { + ret = print_codes(buf, PFM_PLM0 | PFM_PLM3, pinfo.max_encoding); + if (ret) + return; + } + printf("%s", buf); + if (options.desc) + { + printf("%s", options.csv_sep); + printf("\"%s.\"", info->desc); + } + putchar('\n'); + } +} + +int compare_codes(const void *a, const void *b) +{ + const code_info_t *aa = a; + const code_info_t *bb = b; + uint64_t m = options.mask; + + if ((aa->code & m) < (bb->code & m)) + return -1; + if ((aa->code & m) == (bb->code & m)) + return 0; + return 1; +} + +static void +print_event_flags(pfm_event_info_t *info) +{ + int n = 0; + int spec = info->is_speculative; + + if (info->is_precise) + { + printf("[precise] "); + n++; + } + + if (info->support_hw_smpl) + { + printf("[hw_smpl] "); + n++; + } + + if (spec > PFM_EVENT_INFO_SPEC_NA) + { + printf("[%s] ", spec == PFM_EVENT_INFO_SPEC_TRUE ? "speculative" : "non-speculative"); + n++; + } + + if (!n) + printf("None"); +} + +static void +print_attr_flags(pfm_event_attr_info_t *info) +{ + int n = 0; + int spec = info->is_speculative; + + if (info->is_dfl) + { + printf("[default] "); + n++; + } + + if (info->is_precise) + { + printf("[precise] "); + n++; + } + + if (info->support_hw_smpl) + { + printf("[hw_smpl] "); + n++; + } + + if (spec > PFM_EVENT_INFO_SPEC_NA) + { + printf("[%s] ", spec == PFM_EVENT_INFO_SPEC_TRUE ? "speculative" : "non-speculative"); + n++; + } + + if (!n) + printf("None "); +} + +static void +show_event_info(pfm_event_info_t *info) +{ + pfm_event_attr_info_t ainfo; + pfm_pmu_info_t pinfo; + int mod = 0, um = 0; + int i, ret; + const char *src; + + if (options.name_only) + { + printf("%s\n", info->name); + return; + } + + memset(&ainfo, 0, sizeof(ainfo)); + memset(&pinfo, 0, sizeof(pinfo)); + + pinfo.size = sizeof(pinfo); + ainfo.size = sizeof(ainfo); + + if (!match_efilters(info)) + return; + ret = pfm_get_pmu_info(info->pmu, &pinfo); + if (ret) + errx(1, "cannot get pmu info: %s", pfm_strerror(ret)); + + printf("#-----------------------------\n" + "IDX : %d\n" + "PMU name : %s (%s)\n" + "Name : %s\n" + "Equiv : %s\n", + info->idx, + pinfo.name, + pinfo.desc, + info->name, + info->equiv ? info->equiv : "None"); + + printf("Flags : "); + print_event_flags(info); + putchar('\n'); + + printf("Desc : %s\n", info->desc ? info->desc : "no description available"); + printf("Code : 0x%" PRIx64 "\n", info->code); + + pfm_for_each_event_attr(i, info) + { + ret = pfm_get_event_attr_info(info->idx, i, options.os, &ainfo); + if (ret != PFM_SUCCESS) + errx(1, "cannot retrieve event %s attribute info: %s", info->name, pfm_strerror(ret)); + + if (ainfo.ctrl >= PFM_ATTR_CTRL_MAX) + { + warnx("event: %s has unsupported attribute source %d", info->name, ainfo.ctrl); + ainfo.ctrl = PFM_ATTR_CTRL_UNKNOWN; + } + src = srcs[ainfo.ctrl]; + switch (ainfo.type) + { + case PFM_ATTR_UMASK: + if (!match_ufilters(&ainfo)) + continue; + + printf("Umask-%02u : 0x%02" PRIx64 " : %s : [%s] : ", + um, + ainfo.code, + src, + ainfo.name); + + print_attr_flags(&ainfo); + + putchar(':'); + + if (ainfo.equiv) + printf(" Alias to %s", ainfo.equiv); + else + printf(" %s", ainfo.desc); + + putchar('\n'); + um++; + break; + case PFM_ATTR_MOD_BOOL: + printf("Modif-%02u : 0x%02" PRIx64 " : %s : [%s] : %s (boolean)\n", mod, ainfo.code, src, ainfo.name, ainfo.desc); + mod++; + break; + case PFM_ATTR_MOD_INTEGER: + printf("Modif-%02u : 0x%02" PRIx64 " : %s : [%s] : %s (integer)\n", mod, ainfo.code, src, ainfo.name, ainfo.desc); + mod++; + break; + default: + printf("Attr-%02u : 0x%02" PRIx64 " : %s : [%s] : %s\n", i, ainfo.code, ainfo.name, src, ainfo.desc); + } + } +} + +static int +show_info(char *event, regex_t *preg) +{ + pfm_pmu_info_t pinfo; + pfm_event_info_t info; + pfm_pmu_t j; + int i, ret, match = 0, pname; + size_t len, l = 0; + char *fullname = NULL; + + memset(&pinfo, 0, sizeof(pinfo)); + memset(&info, 0, sizeof(info)); + + pinfo.size = sizeof(pinfo); + info.size = sizeof(info); + + pname = event_has_pname(event); + + /* + * scan all supported events, incl. those + * from undetected PMU models + */ + pfm_for_all_pmus(j) + { + + ret = pfm_get_pmu_info(j, &pinfo); + if (ret != PFM_SUCCESS) + continue; + + /* no pmu prefix, just look for detected PMU models */ + if (!pname && !pinfo.is_present) + continue; + + for (i = pinfo.first_event; i != -1; i = pfm_get_event_next(i)) + { + ret = pfm_get_event_info(i, options.os, &info); + if (ret != PFM_SUCCESS) + errx(1, "cannot get event info: %s", pfm_strerror(ret)); + + len = strlen(info.name) + strlen(pinfo.name) + 1 + 2; + if (len > l) + { + l = len; + fullname = realloc(fullname, l); + if (!fullname) + err(1, "cannot allocate memory"); + } + sprintf(fullname, "%s::%s", pinfo.name, info.name); + + if (regexec(preg, fullname, 0, NULL, 0) == 0) + { + if (options.compact) + if (options.combo) + show_event_info_combo(&info); + else + show_event_info_compact(&info); + else + show_event_info(&info); + match++; + } + } + } + if (fullname) + free(fullname); + + return match; +} + +static int +show_info_sorted(char *event, regex_t *preg) +{ + pfm_pmu_info_t pinfo; + pfm_event_info_t info; + pfm_pmu_t j; + int i, ret, n, match = 0; + size_t len, l = 0; + char *fullname = NULL; + code_info_t *codes; + + memset(&pinfo, 0, sizeof(pinfo)); + memset(&info, 0, sizeof(info)); + + pinfo.size = sizeof(pinfo); + info.size = sizeof(info); + + pfm_for_all_pmus(j) + { + + ret = pfm_get_pmu_info(j, &pinfo); + if (ret != PFM_SUCCESS) + continue; + + codes = malloc(pinfo.nevents * sizeof(*codes)); + if (!codes) + err(1, "cannot allocate memory\n"); + + /* scans all supported events */ + n = 0; + for (i = pinfo.first_event; i != -1; i = pfm_get_event_next(i)) + { + + ret = pfm_get_event_info(i, options.os, &info); + if (ret != PFM_SUCCESS) + errx(1, "cannot get event info: %s", pfm_strerror(ret)); + + if (info.pmu != j) + continue; + + codes[n].idx = info.idx; + codes[n].code = info.code; + n++; + } + qsort(codes, n, sizeof(*codes), compare_codes); + for (i = 0; i < n; i++) + { + ret = pfm_get_event_info(codes[i].idx, options.os, &info); + if (ret != PFM_SUCCESS) + errx(1, "cannot get event info: %s", pfm_strerror(ret)); + + len = strlen(info.name) + strlen(pinfo.name) + 1 + 2; + if (len > l) + { + l = len; + fullname = realloc(fullname, l); + if (!fullname) + err(1, "cannot allocate memory"); + } + sprintf(fullname, "%s::%s", pinfo.name, info.name); + + if (regexec(preg, fullname, 0, NULL, 0) == 0) + { + if (options.compact) + show_event_info_compact(&info); + else + show_event_info(&info); + match++; + } + } + free(codes); + } + if (fullname) + free(fullname); + + return match; +} + +static void +usage(void) +{ + printf("showevtinfo [-L] [-E] [-h] [-s] [-m mask]\n" + "-L\t\tlist one event per line (compact mode)\n" + "-E\t\tlist one event per line with encoding (compact mode)\n" + "-M\t\tdisplay all valid unit masks combination (use with -L or -E)\n" + "-h\t\tget help\n" + "-s\t\tsort event by PMU and by code based on -m mask\n" + "-l\t\tmaximum number of umasks to list all combinations (default: %d)\n" + "-F\t\tshow only events and attributes with certain flags (precise,...)\n" + "-m mask\t\thexadecimal event code mask, bits to match when sorting\n" + "-x sep\t\tuse sep as field separator in compact mode\n" + "-D\t\t\tprint event description in compact mode\n" + "-O os\t\tshow attributes for the specific operating system\n", + COMBO_MAX); +} + +/* + * keep: [pmu::]event + * drop everything else + */ +static void +drop_event_attributes(char *str) +{ + char *p; + + p = strchr(str, ':'); + if (!p) + return; + + str = p + 1; + /* keep PMU name */ + if (*str == ':') + str++; + + /* stop string at 1st attribute */ + p = strchr(str, ':'); + if (p) + *p = '\0'; +} + +#define EVENT_FLAGS(n, f, l) \ + { \ + .name = n, .ebit = f, .ubit = l \ + } +struct attr_flags +{ + const char *name; + int ebit; /* bit position in pfm_event_info_t.flags, -1 means ignore */ + int ubit; /* bit position in pfm_event_attr_info_t.flags, -1 means ignore */ +}; + +static const struct attr_flags event_flags[] = { + EVENT_FLAGS("precise", 0, 1), + EVENT_FLAGS("pebs", 0, 1), + EVENT_FLAGS("default", -1, 0), + EVENT_FLAGS("dfl", -1, 0), + EVENT_FLAGS(NULL, 0, 0)}; + +static void +parse_filters(char *arg) +{ + const struct attr_flags *attr; + char *p; + + while (arg) + { + p = strchr(arg, ','); + if (p) + *p++ = 0; + + for (attr = event_flags; attr->name; attr++) + { + if (!strcasecmp(attr->name, arg)) + { + switch (attr->ebit) + { + case 0: + options.efilter.is_precise = 1; + break; + case -1: + break; + default: + errx(1, "unknown event flag %d", attr->ebit); + } + switch (attr->ubit) + { + case 0: + options.ufilter.is_dfl = 1; + break; + case 1: + options.ufilter.is_precise = 1; + break; + case -1: + break; + default: + errx(1, "unknown umaks flag %d", attr->ubit); + } + break; + } + } + arg = p; + } +} + +static const struct +{ + char *name; + pfm_os_t os; +} supported_oses[] = { + {.name = "none", .os = PFM_OS_NONE}, + {.name = "raw", .os = PFM_OS_NONE}, + {.name = "pmu", .os = PFM_OS_NONE}, + + {.name = "perf", .os = PFM_OS_PERF_EVENT}, + {.name = "perf_ext", .os = PFM_OS_PERF_EVENT_EXT}, + { + .name = NULL, + }}; + +static const char *pmu_types[] = { + "unknown type", + "core", + "uncore", + "OS generic", +}; + +static void +setup_os(char *ostr) +{ + int i; + + for (i = 0; supported_oses[i].name; i++) + { + if (!strcmp(supported_oses[i].name, ostr)) + { + options.os = supported_oses[i].os; + return; + } + } + fprintf(stderr, "unknown OS layer %s, choose from:", ostr); + for (i = 0; supported_oses[i].name; i++) + { + if (i) + fputc(',', stderr); + fprintf(stderr, " %s", supported_oses[i].name); + } + fputc('\n', stderr); + exit(1); +} + +int main(int argc, char **argv) +{ + static char *argv_all[2] = {".*", NULL}; + pfm_pmu_info_t pinfo; + char *endptr = NULL; + char default_sep[2] = "\t"; + char *ostr = NULL; + char **args; + pfm_pmu_t i; + int match; + regex_t preg; + int ret, c; + + memset(&pinfo, 0, sizeof(pinfo)); + + pinfo.size = sizeof(pinfo); + + while ((c = getopt(argc, argv, "hELsm:MNl:F:x:DO:")) != -1) + { + switch (c) + { + case 'L': + options.compact = 1; + break; + case 'F': + parse_filters(optarg); + break; + case 'E': + options.compact = 1; + options.encode = 1; + break; + case 'M': + options.combo = 1; + break; + case 'N': + options.name_only = 1; + break; + case 's': + options.sort = 1; + break; + case 'D': + options.desc = 1; + break; + case 'l': + options.combo_lim = atoi(optarg); + break; + case 'x': + options.csv_sep = optarg; + break; + case 'O': + ostr = optarg; + break; + case 'm': + options.mask = strtoull(optarg, &endptr, 16); + if (*endptr) + errx(1, "mask must be in hexadecimal\n"); + break; + case 'h': + usage(); + exit(0); + default: + errx(1, "unknown option error"); + } + } + /* to allow encoding of events from non detected PMU models */ + ret = set_env_var("LIBPFM_ENCODE_INACTIVE", "1", 1); + if (ret != PFM_SUCCESS) + errx(1, "cannot force inactive encoding"); + + ret = pfm_initialize(); + if (ret != PFM_SUCCESS) + errx(1, "cannot initialize libpfm: %s", pfm_strerror(ret)); + + if (options.mask == 0) + options.mask = ~0; + + if (optind == argc) + { + args = argv_all; + } + else + { + args = argv + optind; + } + if (!options.csv_sep) + options.csv_sep = default_sep; + + /* avoid combinatorial explosion */ + if (options.combo_lim == 0) + options.combo_lim = COMBO_MAX; + + if (ostr) + setup_os(ostr); + else + options.os = PFM_OS_NONE; + + if (!options.compact) + { + int total_supported_events = 0; + int total_available_events = 0; + + printf("Supported PMU models:\n"); + pfm_for_all_pmus(i) + { + ret = pfm_get_pmu_info(i, &pinfo); + if (ret != PFM_SUCCESS) + continue; + + printf("\t[%d, %s, \"%s\"]\n", i, pinfo.name, pinfo.desc); + } + + printf("Detected PMU models:\n"); + pfm_for_all_pmus(i) + { + ret = pfm_get_pmu_info(i, &pinfo); + if (ret != PFM_SUCCESS) + continue; + + if (pinfo.is_present) + { + if (pinfo.type >= PFM_PMU_TYPE_MAX) + pinfo.type = PFM_PMU_TYPE_UNKNOWN; + + printf("\t[%d, %s, \"%s\", %d events, %d max encoding, %d counters, %s PMU]\n", + i, + pinfo.name, + pinfo.desc, + pinfo.nevents, + pinfo.max_encoding, + pinfo.num_cntrs + pinfo.num_fixed_cntrs, + pmu_types[pinfo.type]); + + total_supported_events += pinfo.nevents; + } + total_available_events += pinfo.nevents; + } + printf("Total events: %d available, %d supported\n", total_available_events, total_supported_events); + } + + while (*args) + { + /* drop umasks and modifiers */ + drop_event_attributes(*args); + if (regcomp(&preg, *args, REG_ICASE)) + errx(1, "error in regular expression for event \"%s\"", *argv); + + if (options.sort) + match = show_info_sorted(*args, &preg); + else + match = show_info(*args, &preg); + + if (match == 0) + errx(1, "event %s not found", *args); + + args++; + } + + regfree(&preg); + + pfm_terminate(); + + return 0; +} diff --git a/repos/mml/src/app/libpfm_test/target.mk b/repos/mml/src/app/libpfm_test/target.mk new file mode 100644 index 0000000000..5134b51d97 --- /dev/null +++ b/repos/mml/src/app/libpfm_test/target.mk @@ -0,0 +1,5 @@ +TARGET = libpfm_test +SRC_CC = check_events.c +LIBS += base posix libm libc stdcxx libpfm4 +CC_OPT += -Wno-error -Wno-permissive -fpermissive + diff --git a/repos/mml/src/app/thread_test/target.mk b/repos/mml/src/app/thread_test/target.mk index 79ffb18ea9..55f1259a48 100644 --- a/repos/mml/src/app/thread_test/target.mk +++ b/repos/mml/src/app/thread_test/target.mk @@ -1,4 +1,4 @@ TARGET = thread_test SRC_CC = thread_test.cc -LIBS += base stdcxx +LIBS += base libc stdcxx CXXFLAGS += -Wno-error