mirror of
https://github.com/mmueller41/mxtasking.git
synced 2026-01-21 12:42:57 +01:00
Small changes to eliminate warnings, added Hello World example, bugfix in HashJoin benchmark.
This commit is contained in:
@@ -45,6 +45,7 @@ SET(MX_TASKING_SRC
|
|||||||
src/mx/resource/builder.cpp
|
src/mx/resource/builder.cpp
|
||||||
src/mx/tasking/scheduler.cpp
|
src/mx/tasking/scheduler.cpp
|
||||||
src/mx/tasking/worker.cpp
|
src/mx/tasking/worker.cpp
|
||||||
|
src/mx/tasking/task.cpp
|
||||||
src/mx/tasking/profiling/profiling_task.cpp
|
src/mx/tasking/profiling/profiling_task.cpp
|
||||||
src/mx/util/core_set.cpp
|
src/mx/util/core_set.cpp
|
||||||
src/mx/util/random.cpp
|
src/mx/util/random.cpp
|
||||||
@@ -69,6 +70,7 @@ add_executable(blinktree_benchmark
|
|||||||
src/application/blinktree_benchmark/main.cpp
|
src/application/blinktree_benchmark/main.cpp
|
||||||
src/application/blinktree_benchmark/benchmark.cpp
|
src/application/blinktree_benchmark/benchmark.cpp
|
||||||
)
|
)
|
||||||
|
target_link_libraries(blinktree_benchmark pthread numa atomic mxtasking mxbenchmarking)
|
||||||
|
|
||||||
add_executable(hashjoin_benchmark
|
add_executable(hashjoin_benchmark
|
||||||
src/application/hashjoin_benchmark/main.cpp
|
src/application/hashjoin_benchmark/main.cpp
|
||||||
@@ -77,11 +79,11 @@ add_executable(hashjoin_benchmark
|
|||||||
src/application/hashjoin_benchmark/tpch_table_reader.cpp
|
src/application/hashjoin_benchmark/tpch_table_reader.cpp
|
||||||
src/application/hashjoin_benchmark/notifier.cpp
|
src/application/hashjoin_benchmark/notifier.cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
# Link executables
|
|
||||||
target_link_libraries(blinktree_benchmark pthread numa atomic mxtasking mxbenchmarking)
|
|
||||||
target_link_libraries(hashjoin_benchmark pthread numa atomic mxtasking mxbenchmarking)
|
target_link_libraries(hashjoin_benchmark pthread numa atomic mxtasking mxbenchmarking)
|
||||||
|
|
||||||
|
add_executable(hello_world src/application/hello_world/main.cpp)
|
||||||
|
target_link_libraries(hello_world pthread numa atomic mxtasking mxbenchmarking)
|
||||||
|
|
||||||
# Add tests
|
# Add tests
|
||||||
if (GTEST)
|
if (GTEST)
|
||||||
set(TESTS
|
set(TESTS
|
||||||
|
|||||||
@@ -28,6 +28,10 @@ For detailed information please see README files in `src/application/<app>` fold
|
|||||||
* [B Link Tree benchmark](src/application/blinktree_benchmark/README.md) (`src/application/blinktree_benchmark`)
|
* [B Link Tree benchmark](src/application/blinktree_benchmark/README.md) (`src/application/blinktree_benchmark`)
|
||||||
* [Hash Join benchmark](src/application/hashjoin_benchmark/README.md) (`src/application/hashjoin_benchmark`)
|
* [Hash Join benchmark](src/application/hashjoin_benchmark/README.md) (`src/application/hashjoin_benchmark`)
|
||||||
|
|
||||||
|
## Example
|
||||||
|
We build a small `Hello World!` example, located in `src/application/hello_world`.
|
||||||
|
You might take a look to see how to use `MxTasking`.
|
||||||
|
|
||||||
### Simple example for B Link Tree
|
### Simple example for B Link Tree
|
||||||
* Call `make ycsb-a` to generate the default workload
|
* Call `make ycsb-a` to generate the default workload
|
||||||
* Call `./bin/blinktree_benchmark 1:4` to run benchmark for one to four cores.
|
* Call `./bin/blinktree_benchmark 1:4` to run benchmark for one to four cores.
|
||||||
|
|||||||
@@ -50,6 +50,10 @@ Following, the benchmark will be started and print the results for every iterati
|
|||||||
* After that, the time and throughput are written.
|
* After that, the time and throughput are written.
|
||||||
* If `--perf` is enabled, the output will be extended by some perf counters, which are labeled (like throughput).
|
* If `--perf` is enabled, the output will be extended by some perf counters, which are labeled (like throughput).
|
||||||
|
|
||||||
|
## Plot the results
|
||||||
|
When using `-o FILE`, the results will be written to the given file, using `JSON` format.
|
||||||
|
The plot script `scripts/plot_blinktree_benchmark INPUT_FILE [INPUT_FILE ...]` will aggregate and plot the results using one or more of those `JSON` files.
|
||||||
|
|
||||||
## Examples
|
## Examples
|
||||||
|
|
||||||
###### Running workload A using optimistic synchronization
|
###### Running workload A using optimistic synchronization
|
||||||
|
|||||||
@@ -43,3 +43,7 @@ Following, the benchmark will be started and print the results for every iterati
|
|||||||
* Thirdly, the granularity of how many records per task will be processed.
|
* Thirdly, the granularity of how many records per task will be processed.
|
||||||
* After that, the time and throughput are written.
|
* After that, the time and throughput are written.
|
||||||
* If `--perf` is enabled, the output will be extended by some perf counters, which are labeled (like throughput).
|
* If `--perf` is enabled, the output will be extended by some perf counters, which are labeled (like throughput).
|
||||||
|
|
||||||
|
## Plot the results
|
||||||
|
When using `-o FILE`, the results will be written to the given file, using `JSON` format.
|
||||||
|
The plot script `scripts/plot_hashjoin_benchmark INPUT_FILE` will aggregate and plot the results using one `JSON` file.
|
||||||
|
|||||||
@@ -186,11 +186,6 @@ std::uint64_t Benchmark::tuples_per_core(const std::uint64_t count_join_keys, co
|
|||||||
{
|
{
|
||||||
const auto cache_lines = (count_join_keys * sizeof(std::uint32_t)) / 64U;
|
const auto cache_lines = (count_join_keys * sizeof(std::uint32_t)) / 64U;
|
||||||
const auto cache_lines_per_core = cache_lines / count_cores;
|
const auto cache_lines_per_core = cache_lines / count_cores;
|
||||||
auto p = 1U;
|
|
||||||
while (p < cache_lines_per_core)
|
|
||||||
{
|
|
||||||
p += 64U;
|
|
||||||
}
|
|
||||||
|
|
||||||
return p * (64U / sizeof(std::uint32_t));
|
return cache_lines_per_core * (64U / sizeof(std::uint32_t));
|
||||||
}
|
}
|
||||||
|
|||||||
41
src/application/hello_world/main.cpp
Normal file
41
src/application/hello_world/main.cpp
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
#include <iostream>
|
||||||
|
#include <mx/tasking/runtime.h>
|
||||||
|
|
||||||
|
class HelloWorldTask : public mx::tasking::TaskInterface
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
constexpr HelloWorldTask() = default;
|
||||||
|
~HelloWorldTask() override = default;
|
||||||
|
|
||||||
|
mx::tasking::TaskResult execute(const std::uint16_t /*core_id*/, const std::uint16_t /*channel_id*/) override
|
||||||
|
{
|
||||||
|
std::cout << "Hello World" << std::endl;
|
||||||
|
|
||||||
|
// Stop MxTasking runtime after this task.
|
||||||
|
return mx::tasking::TaskResult::make_stop();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
// Define which cores will be used (1 core here).
|
||||||
|
const auto cores = mx::util::core_set::build(1);
|
||||||
|
|
||||||
|
{ // Scope for the MxTasking runtime.
|
||||||
|
|
||||||
|
// Create a runtime for the given cores.
|
||||||
|
mx::tasking::runtime_guard _{cores};
|
||||||
|
|
||||||
|
// Create an instance of the HelloWorldTask with the current core as first
|
||||||
|
// parameter. The core is required for memory allocation.
|
||||||
|
auto *hello_world_task = mx::tasking::runtime::new_task<HelloWorldTask>(cores.front());
|
||||||
|
|
||||||
|
// Annotate the task to run on the first core.
|
||||||
|
hello_world_task->annotate(cores.front());
|
||||||
|
|
||||||
|
// Schedule the task.
|
||||||
|
mx::tasking::runtime::spawn(*hello_world_task);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
@@ -10,12 +10,6 @@ public:
|
|||||||
*/
|
*/
|
||||||
static constexpr auto max_numa_nodes() { return 2U; }
|
static constexpr auto max_numa_nodes() { return 2U; }
|
||||||
|
|
||||||
/**
|
|
||||||
* Decreases the use of memory of external NUMA regions within the allocator.
|
|
||||||
* @return True, when memory usage of external NUMA regions should be less.
|
|
||||||
*/
|
|
||||||
static constexpr auto low_priority_for_external_numa() { return false; }
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return Interval of each epoch, if memory reclamation is used.
|
* @return Interval of each epoch, if memory reclamation is used.
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -14,10 +14,10 @@ AllocationBlock::AllocationBlock(const std::uint32_t id, const std::uint8_t numa
|
|||||||
}
|
}
|
||||||
|
|
||||||
AllocationBlock::AllocationBlock(AllocationBlock &&other) noexcept
|
AllocationBlock::AllocationBlock(AllocationBlock &&other) noexcept
|
||||||
: _id(other._id), _numa_node_id(other._numa_node_id), _size(other._size), _allocated_block(other._allocated_block),
|
: _id(other._id), _numa_node_id(other._numa_node_id), _size(other._size),
|
||||||
_free_elements(std::move(other._free_elements)), _available_size(other._available_size)
|
_allocated_block(std::exchange(other._allocated_block, nullptr)), _free_elements(std::move(other._free_elements)),
|
||||||
|
_available_size(other._available_size)
|
||||||
{
|
{
|
||||||
other._allocated_block = nullptr;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
AllocationBlock &AllocationBlock::operator=(AllocationBlock &&other) noexcept
|
AllocationBlock &AllocationBlock::operator=(AllocationBlock &&other) noexcept
|
||||||
|
|||||||
@@ -55,7 +55,7 @@ public:
|
|||||||
explicit operator bool() const noexcept { return _memory != nullptr; }
|
explicit operator bool() const noexcept { return _memory != nullptr; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void *_memory = nullptr;
|
void *_memory{nullptr};
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -64,9 +64,11 @@ private:
|
|||||||
* Internal, the ProcessorHeap bufferes allocated memory
|
* Internal, the ProcessorHeap bufferes allocated memory
|
||||||
* to minimize access to the global heap.
|
* to minimize access to the global heap.
|
||||||
*/
|
*/
|
||||||
class ProcessorHeap
|
class alignas(64) ProcessorHeap
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
ProcessorHeap() noexcept = default;
|
||||||
|
|
||||||
explicit ProcessorHeap(const std::uint8_t numa_node_id) noexcept : _numa_node_id(numa_node_id)
|
explicit ProcessorHeap(const std::uint8_t numa_node_id) noexcept : _numa_node_id(numa_node_id)
|
||||||
{
|
{
|
||||||
_allocated_chunks.reserve(1024);
|
_allocated_chunks.reserve(1024);
|
||||||
@@ -81,10 +83,24 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (const auto free_chunk : _free_chunk_buffer)
|
for (const auto free_chunk : _free_chunk_buffer)
|
||||||
|
{
|
||||||
|
if (static_cast<bool>(free_chunk))
|
||||||
{
|
{
|
||||||
GlobalHeap::free(static_cast<void *>(free_chunk), Chunk::size());
|
GlobalHeap::free(static_cast<void *>(free_chunk), Chunk::size());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ProcessorHeap &operator=(ProcessorHeap &&other) noexcept
|
||||||
|
{
|
||||||
|
_numa_node_id = std::exchange(other._numa_node_id, std::numeric_limits<std::uint8_t>::max());
|
||||||
|
_free_chunk_buffer = other._free_chunk_buffer;
|
||||||
|
other._free_chunk_buffer.fill(Chunk{});
|
||||||
|
_next_free_chunk.store(other._next_free_chunk.load());
|
||||||
|
_fill_buffer_flag.store(other._fill_buffer_flag.load());
|
||||||
|
_allocated_chunks = std::move(other._allocated_chunks);
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return ID of the NUMA node the memory is allocated on.
|
* @return ID of the NUMA node the memory is allocated on.
|
||||||
@@ -129,7 +145,7 @@ private:
|
|||||||
inline static constexpr auto CHUNKS = 128U;
|
inline static constexpr auto CHUNKS = 128U;
|
||||||
|
|
||||||
// ID of the NUMA node of this ProcessorHeap.
|
// ID of the NUMA node of this ProcessorHeap.
|
||||||
alignas(64) const std::uint8_t _numa_node_id;
|
std::uint8_t _numa_node_id{std::numeric_limits<std::uint8_t>::max()};
|
||||||
|
|
||||||
// Buffer for free chunks.
|
// Buffer for free chunks.
|
||||||
std::array<Chunk, CHUNKS> _free_chunk_buffer;
|
std::array<Chunk, CHUNKS> _free_chunk_buffer;
|
||||||
@@ -176,10 +192,7 @@ private:
|
|||||||
template <std::size_t S> class alignas(64) CoreHeap
|
template <std::size_t S> class alignas(64) CoreHeap
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
explicit CoreHeap(ProcessorHeap *processor_heap) noexcept
|
explicit CoreHeap(ProcessorHeap *processor_heap) noexcept : _processor_heap(processor_heap) { fill_buffer(); }
|
||||||
: _processor_heap(processor_heap), _numa_node_id(processor_heap->numa_node_id())
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
CoreHeap() noexcept = default;
|
CoreHeap() noexcept = default;
|
||||||
|
|
||||||
@@ -192,26 +205,15 @@ public:
|
|||||||
*
|
*
|
||||||
* @return Pointer to the new allocated memory.
|
* @return Pointer to the new allocated memory.
|
||||||
*/
|
*/
|
||||||
void *allocate() noexcept
|
[[nodiscard]] void *allocate() noexcept
|
||||||
{
|
{
|
||||||
if (empty())
|
if (empty())
|
||||||
{
|
{
|
||||||
fill_buffer();
|
fill_buffer();
|
||||||
}
|
}
|
||||||
|
|
||||||
auto *free_object = _first;
|
auto *free_element = std::exchange(_first, _first->next());
|
||||||
_first = free_object->next();
|
return static_cast<void *>(free_element);
|
||||||
|
|
||||||
if constexpr (config::low_priority_for_external_numa())
|
|
||||||
{
|
|
||||||
free_object->numa_node_id(_numa_node_id);
|
|
||||||
|
|
||||||
return reinterpret_cast<void *>(reinterpret_cast<std::uintptr_t>(free_object) + 64U);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
return static_cast<void *>(free_object);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -223,31 +225,11 @@ public:
|
|||||||
* @param pointer Pointer to the memory object to be freed.
|
* @param pointer Pointer to the memory object to be freed.
|
||||||
*/
|
*/
|
||||||
void free(void *pointer) noexcept
|
void free(void *pointer) noexcept
|
||||||
{
|
|
||||||
if constexpr (config::low_priority_for_external_numa())
|
|
||||||
{
|
|
||||||
const auto address = reinterpret_cast<std::uintptr_t>(pointer);
|
|
||||||
auto *free_object = reinterpret_cast<FreeHeader *>(address - 64U);
|
|
||||||
|
|
||||||
if (free_object->numa_node_id() == _numa_node_id)
|
|
||||||
{
|
|
||||||
free_object->next(_first);
|
|
||||||
_first = free_object;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
_last->next(free_object);
|
|
||||||
free_object->next(nullptr);
|
|
||||||
_last = free_object;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
{
|
||||||
auto *free_object = static_cast<FreeHeader *>(pointer);
|
auto *free_object = static_cast<FreeHeader *>(pointer);
|
||||||
free_object->next(_first);
|
free_object->next(_first);
|
||||||
_first = free_object;
|
_first = free_object;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Fills the buffer by asking the ProcessorHeap for more memory.
|
* Fills the buffer by asking the ProcessorHeap for more memory.
|
||||||
@@ -258,7 +240,7 @@ public:
|
|||||||
auto chunk = _processor_heap->allocate();
|
auto chunk = _processor_heap->allocate();
|
||||||
const auto chunk_address = static_cast<std::uintptr_t>(chunk);
|
const auto chunk_address = static_cast<std::uintptr_t>(chunk);
|
||||||
|
|
||||||
constexpr auto object_size = config::low_priority_for_external_numa() ? S + 64U : S;
|
constexpr auto object_size = S;
|
||||||
constexpr auto count_objects = std::uint64_t{Chunk::size() / object_size};
|
constexpr auto count_objects = std::uint64_t{Chunk::size() / object_size};
|
||||||
|
|
||||||
auto *first_free = reinterpret_cast<FreeHeader *>(chunk_address);
|
auto *first_free = reinterpret_cast<FreeHeader *>(chunk_address);
|
||||||
@@ -274,21 +256,14 @@ public:
|
|||||||
|
|
||||||
last_free->next(nullptr);
|
last_free->next(nullptr);
|
||||||
_first = first_free;
|
_first = first_free;
|
||||||
_last = last_free;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// Processor heap to allocate new chunks.
|
// Processor heap to allocate new chunks.
|
||||||
ProcessorHeap *_processor_heap = nullptr;
|
ProcessorHeap *_processor_heap{nullptr};
|
||||||
|
|
||||||
// ID of the NUMA node the core is placed in.
|
|
||||||
std::uint8_t _numa_node_id = 0U;
|
|
||||||
|
|
||||||
// First element of the list of free memory objects.
|
// First element of the list of free memory objects.
|
||||||
FreeHeader *_first = nullptr;
|
FreeHeader *_first{nullptr};
|
||||||
|
|
||||||
// Last element of the list of free memory objects.
|
|
||||||
FreeHeader *_last = nullptr;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return True, when the buffer is empty.
|
* @return True, when the buffer is empty.
|
||||||
@@ -302,33 +277,24 @@ private:
|
|||||||
template <std::size_t S> class Allocator final : public TaskAllocatorInterface
|
template <std::size_t S> class Allocator final : public TaskAllocatorInterface
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
explicit Allocator(const util::core_set &core_set) : _core_heaps(core_set.size())
|
explicit Allocator(const util::core_set &core_set)
|
||||||
{
|
{
|
||||||
_processor_heaps.fill(nullptr);
|
for (auto node_id = std::uint8_t(0U); node_id < config::max_numa_nodes(); ++node_id)
|
||||||
|
{
|
||||||
|
if (core_set.has_core_of_numa_node(node_id))
|
||||||
|
{
|
||||||
|
_processor_heaps[node_id] = ProcessorHeap{node_id};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for (auto i = 0U; i < core_set.size(); ++i)
|
for (const auto core_id : core_set)
|
||||||
{
|
{
|
||||||
const auto core_id = core_set[i];
|
|
||||||
const auto node_id = system::topology::node_id(core_id);
|
const auto node_id = system::topology::node_id(core_id);
|
||||||
if (_processor_heaps[node_id] == nullptr)
|
_core_heaps[core_id] = CoreHeap<S>{&_processor_heaps[node_id]};
|
||||||
{
|
|
||||||
_processor_heaps[node_id] =
|
|
||||||
new (GlobalHeap::allocate_cache_line_aligned(sizeof(ProcessorHeap))) ProcessorHeap(node_id);
|
|
||||||
}
|
|
||||||
|
|
||||||
auto core_heap = CoreHeap<S>{_processor_heaps[node_id]};
|
|
||||||
core_heap.fill_buffer();
|
|
||||||
_core_heaps.insert(std::make_pair(core_id, std::move(core_heap)));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
~Allocator() override
|
~Allocator() override = default;
|
||||||
{
|
|
||||||
for (auto *processor_heap : _processor_heaps)
|
|
||||||
{
|
|
||||||
delete processor_heap;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Allocates memory from the given CoreHeap.
|
* Allocates memory from the given CoreHeap.
|
||||||
@@ -336,7 +302,7 @@ public:
|
|||||||
* @param core_id ID of the core.
|
* @param core_id ID of the core.
|
||||||
* @return Allocated memory object.
|
* @return Allocated memory object.
|
||||||
*/
|
*/
|
||||||
void *allocate(const std::uint16_t core_id) override { return _core_heaps[core_id].allocate(); }
|
[[nodiscard]] void *allocate(const std::uint16_t core_id) override { return _core_heaps[core_id].allocate(); }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Frees memory.
|
* Frees memory.
|
||||||
@@ -348,9 +314,9 @@ public:
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
// Heap for every processor socket/NUMA region.
|
// Heap for every processor socket/NUMA region.
|
||||||
std::array<ProcessorHeap *, config::max_numa_nodes()> _processor_heaps;
|
std::array<ProcessorHeap, config::max_numa_nodes()> _processor_heaps;
|
||||||
|
|
||||||
// Map from core_id to core-local allocator.
|
// Map from core_id to core-local allocator.
|
||||||
std::unordered_map<std::uint16_t, CoreHeap<S>> _core_heaps;
|
std::array<CoreHeap<S>, tasking::config::max_cores()> _core_heaps;
|
||||||
};
|
};
|
||||||
} // namespace mx::memory::fixed
|
} // namespace mx::memory::fixed
|
||||||
@@ -12,7 +12,7 @@ namespace mx::memory {
|
|||||||
template <class T, typename I> class tagged_ptr
|
template <class T, typename I> class tagged_ptr
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
constexpr tagged_ptr() noexcept : _object_pointer(0U)
|
constexpr tagged_ptr() noexcept
|
||||||
{
|
{
|
||||||
static_assert(sizeof(I) == 2U);
|
static_assert(sizeof(I) == 2U);
|
||||||
static_assert(sizeof(tagged_ptr) == 8U);
|
static_assert(sizeof(tagged_ptr) == 8U);
|
||||||
@@ -81,7 +81,7 @@ private:
|
|||||||
/**
|
/**
|
||||||
* Pointer to the instance of T, only 48bit are used.
|
* Pointer to the instance of T, only 48bit are used.
|
||||||
*/
|
*/
|
||||||
std::uintptr_t _object_pointer : 48;
|
std::uintptr_t _object_pointer : 48 {0U};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Information stored within this pointer, remaining 16bit are used.
|
* Information stored within this pointer, remaining 16bit are used.
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ public:
|
|||||||
* @param core_id Core to allocate memory for.
|
* @param core_id Core to allocate memory for.
|
||||||
* @return Allocated memory.
|
* @return Allocated memory.
|
||||||
*/
|
*/
|
||||||
virtual void *allocate(std::uint16_t core_id) = 0;
|
[[nodiscard]] virtual void *allocate(std::uint16_t core_id) = 0;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Frees the memory at the given core.
|
* Frees the memory at the given core.
|
||||||
@@ -36,12 +36,12 @@ template <std::size_t S> class SystemTaskAllocator final : public TaskAllocatorI
|
|||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
constexpr SystemTaskAllocator() noexcept = default;
|
constexpr SystemTaskAllocator() noexcept = default;
|
||||||
virtual ~SystemTaskAllocator() noexcept = default;
|
~SystemTaskAllocator() noexcept override = default;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return Allocated memory using systems malloc (but aligned).
|
* @return Allocated memory using systems malloc (but aligned).
|
||||||
*/
|
*/
|
||||||
void *allocate(const std::uint16_t /*core_id*/) override { return std::aligned_alloc(64U, S); }
|
[[nodiscard]] void *allocate(const std::uint16_t /*core_id*/) override { return std::aligned_alloc(64U, S); }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Frees the given memory using systems free.
|
* Frees the given memory using systems free.
|
||||||
|
|||||||
@@ -164,7 +164,7 @@ private:
|
|||||||
class information
|
class information
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
constexpr information() noexcept : _channel_id(0U), _synchronization_primitive(0U) {}
|
constexpr information() noexcept = default;
|
||||||
explicit information(const std::uint16_t channel_id,
|
explicit information(const std::uint16_t channel_id,
|
||||||
const synchronization::primitive synchronization_primitive) noexcept
|
const synchronization::primitive synchronization_primitive) noexcept
|
||||||
: _channel_id(channel_id), _synchronization_primitive(static_cast<std::uint16_t>(synchronization_primitive))
|
: _channel_id(channel_id), _synchronization_primitive(static_cast<std::uint16_t>(synchronization_primitive))
|
||||||
@@ -182,8 +182,8 @@ public:
|
|||||||
information &operator=(const information &other) = default;
|
information &operator=(const information &other) = default;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::uint16_t _channel_id : 12;
|
std::uint16_t _channel_id : 12 {0U};
|
||||||
std::uint16_t _synchronization_primitive : 4;
|
std::uint16_t _synchronization_primitive : 4 {0U};
|
||||||
} __attribute__((packed));
|
} __attribute__((packed));
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ public:
|
|||||||
};
|
};
|
||||||
|
|
||||||
// Maximal number of supported cores.
|
// Maximal number of supported cores.
|
||||||
static constexpr auto max_cores() { return 64U; }
|
static constexpr auto max_cores() { return 128U; }
|
||||||
|
|
||||||
// Maximal size for a single task, will be used for task allocation.
|
// Maximal size for a single task, will be used for task allocation.
|
||||||
static constexpr auto task_size() { return 64U; }
|
static constexpr auto task_size() { return 64U; }
|
||||||
|
|||||||
@@ -260,6 +260,11 @@ public:
|
|||||||
runtime::init(core_set, prefetch_distance, use_system_allocator);
|
runtime::init(core_set, prefetch_distance, use_system_allocator);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
runtime_guard(const util::core_set &core_set, const std::uint16_t prefetch_distance = 0U) noexcept
|
||||||
|
: runtime_guard(false, core_set, prefetch_distance)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
~runtime_guard() noexcept { runtime::start_and_wait(); }
|
~runtime_guard() noexcept { runtime::start_and_wait(); }
|
||||||
};
|
};
|
||||||
} // namespace mx::tasking
|
} // namespace mx::tasking
|
||||||
18
src/mx/tasking/task.cpp
Normal file
18
src/mx/tasking/task.cpp
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
#include "task.h"
|
||||||
|
#include "runtime.h"
|
||||||
|
#include <mx/system/topology.h>
|
||||||
|
|
||||||
|
using namespace mx::tasking;
|
||||||
|
|
||||||
|
TaskResult TaskResult::make_stop() noexcept
|
||||||
|
{
|
||||||
|
auto *stop_task = runtime::new_task<StopTaskingTask>(system::topology::core_id());
|
||||||
|
stop_task->annotate(std::uint16_t{0U});
|
||||||
|
return TaskResult::make_succeed_and_remove(stop_task);
|
||||||
|
}
|
||||||
|
|
||||||
|
TaskResult StopTaskingTask::execute(const std::uint16_t /*core_id*/, const std::uint16_t /*channel_id*/)
|
||||||
|
{
|
||||||
|
runtime::stop();
|
||||||
|
return TaskResult::make_remove();
|
||||||
|
}
|
||||||
@@ -16,16 +16,68 @@ enum priority : std::uint8_t
|
|||||||
};
|
};
|
||||||
|
|
||||||
class TaskInterface;
|
class TaskInterface;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The TaskResult is returned by every task to tell the
|
||||||
|
* runtime what happens next. Possibilities are run a
|
||||||
|
* successor task, remove the returning task or stop
|
||||||
|
* the entire runtime.
|
||||||
|
*/
|
||||||
class TaskResult
|
class TaskResult
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
/**
|
||||||
|
* Let the runtime know that the given task
|
||||||
|
* should be run as a successor of the current
|
||||||
|
* task. The runtime will schedule that task.
|
||||||
|
*
|
||||||
|
* @param successor_task Task to succeed.
|
||||||
|
* @return A TaskResult that tells the
|
||||||
|
* runtime to run the given task.
|
||||||
|
*/
|
||||||
static TaskResult make_succeed(TaskInterface *successor_task) noexcept { return TaskResult{successor_task, false}; }
|
static TaskResult make_succeed(TaskInterface *successor_task) noexcept { return TaskResult{successor_task, false}; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Let the runtime know that the given task
|
||||||
|
* should be removed after (successfully)
|
||||||
|
* finishing.
|
||||||
|
*
|
||||||
|
* @return A TaskResult that tells the
|
||||||
|
* runtime to remove the returning task.
|
||||||
|
*/
|
||||||
static TaskResult make_remove() noexcept { return TaskResult{nullptr, true}; }
|
static TaskResult make_remove() noexcept { return TaskResult{nullptr, true}; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Let the runtime know that the given task
|
||||||
|
* should be run as a successor of the current
|
||||||
|
* task and the current task should be removed.
|
||||||
|
*
|
||||||
|
* @param successor_task Task to succeed.
|
||||||
|
* @return A TaskResult that tells the runtime
|
||||||
|
* to run the given task and remove the
|
||||||
|
* returning task.
|
||||||
|
*/
|
||||||
static TaskResult make_succeed_and_remove(TaskInterface *successor_task) noexcept
|
static TaskResult make_succeed_and_remove(TaskInterface *successor_task) noexcept
|
||||||
{
|
{
|
||||||
return TaskResult{successor_task, true};
|
return TaskResult{successor_task, true};
|
||||||
}
|
}
|
||||||
static TaskResult make_null() noexcept { return TaskResult{nullptr, false}; }
|
|
||||||
|
/**
|
||||||
|
* Nothing will happen
|
||||||
|
*
|
||||||
|
* @return An empty TaskResult.
|
||||||
|
*/
|
||||||
|
static TaskResult make_null() noexcept { return {}; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Let the runtime know to stop after
|
||||||
|
* the returning task.
|
||||||
|
*
|
||||||
|
* @return A TaskResult that tells the
|
||||||
|
* runtime to top.
|
||||||
|
*/
|
||||||
|
static TaskResult make_stop() noexcept;
|
||||||
|
|
||||||
constexpr TaskResult() = default;
|
constexpr TaskResult() = default;
|
||||||
~TaskResult() = default;
|
~TaskResult() = default;
|
||||||
|
|
||||||
@@ -205,4 +257,13 @@ private:
|
|||||||
// Tasks annotations.
|
// Tasks annotations.
|
||||||
annotation _annotation;
|
annotation _annotation;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class StopTaskingTask final : public TaskInterface
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
constexpr StopTaskingTask() noexcept = default;
|
||||||
|
~StopTaskingTask() override = default;
|
||||||
|
|
||||||
|
TaskResult execute(std::uint16_t /*core_id*/, std::uint16_t /*channel_id*/) override;
|
||||||
|
};
|
||||||
} // namespace mx::tasking
|
} // namespace mx::tasking
|
||||||
@@ -28,6 +28,13 @@ public:
|
|||||||
};
|
};
|
||||||
|
|
||||||
constexpr core_set() noexcept : _core_identifier({0U}), _numa_nodes(0U) {}
|
constexpr core_set() noexcept : _core_identifier({0U}), _numa_nodes(0U) {}
|
||||||
|
explicit core_set(std::initializer_list<std::uint16_t> &&core_ids) noexcept : core_set()
|
||||||
|
{
|
||||||
|
for (const auto core_id : core_ids)
|
||||||
|
{
|
||||||
|
emplace_back(core_id);
|
||||||
|
}
|
||||||
|
}
|
||||||
~core_set() noexcept = default;
|
~core_set() noexcept = default;
|
||||||
|
|
||||||
core_set &operator=(const core_set &other) noexcept = default;
|
core_set &operator=(const core_set &other) noexcept = default;
|
||||||
@@ -43,6 +50,8 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::uint16_t operator[](const std::uint16_t index) const noexcept { return _core_identifier[index]; }
|
std::uint16_t operator[](const std::uint16_t index) const noexcept { return _core_identifier[index]; }
|
||||||
|
std::uint16_t front() const { return _core_identifier.front(); }
|
||||||
|
std::uint16_t back() const { return _core_identifier.back(); }
|
||||||
|
|
||||||
explicit operator bool() const noexcept { return _size > 0U; }
|
explicit operator bool() const noexcept { return _size > 0U; }
|
||||||
|
|
||||||
@@ -101,6 +110,9 @@ public:
|
|||||||
return _numa_nodes.test(numa_node_id);
|
return _numa_nodes.test(numa_node_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] auto begin() const noexcept { return _core_identifier.begin(); }
|
||||||
|
[[nodiscard]] auto end() const noexcept { return _core_identifier.begin() + _size; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// List of core identifiers.
|
// List of core identifiers.
|
||||||
std::array<std::uint16_t, tasking::config::max_cores()> _core_identifier;
|
std::array<std::uint16_t, tasking::config::max_cores()> _core_identifier;
|
||||||
|
|||||||
Reference in New Issue
Block a user