Initial commit

This commit is contained in:
Jan Mühlig
2021-03-22 18:38:54 +01:00
commit ad8c48083c
128 changed files with 33166 additions and 0 deletions

127
.clang-format Normal file
View File

@@ -0,0 +1,127 @@
---
Language: Cpp
# BasedOnStyle: Microsoft
AccessModifierOffset: -4
AlignAfterOpenBracket: Align
AlignConsecutiveMacros: false
AlignConsecutiveAssignments: false
AlignConsecutiveDeclarations: false
AlignEscapedNewlines: Right
AlignOperands: true
AlignTrailingComments: true
AllowAllArgumentsOnNextLine: true
AllowAllConstructorInitializersOnNextLine: true
AllowAllParametersOfDeclarationOnNextLine: true
AllowShortBlocksOnASingleLine: false
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: InlineOnly
AllowShortLambdasOnASingleLine: All
AllowShortIfStatementsOnASingleLine: Never
AllowShortLoopsOnASingleLine: false
AlwaysBreakAfterDefinitionReturnType: None
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: false
AlwaysBreakTemplateDeclarations: MultiLine
BinPackArguments: true
BinPackParameters: true
BraceWrapping:
AfterCaseLabel: false
AfterClass: true
AfterControlStatement: true
AfterEnum: true
AfterFunction: true
AfterNamespace: false
AfterObjCDeclaration: true
AfterStruct: true
AfterUnion: false
AfterExternBlock: true
BeforeCatch: true
BeforeElse: true
IndentBraces: false
SplitEmptyFunction: true
SplitEmptyRecord: true
SplitEmptyNamespace: true
BreakBeforeBinaryOperators: None
BreakBeforeBraces: Custom
BreakBeforeInheritanceComma: false
BreakInheritanceList: BeforeColon
BreakBeforeTernaryOperators: true
BreakConstructorInitializersBeforeComma: false
BreakConstructorInitializers: BeforeColon
BreakAfterJavaFieldAnnotations: false
BreakStringLiterals: true
ColumnLimit: 120
CommentPragmas: '^ IWYU pragma:'
CompactNamespaces: false
ConstructorInitializerAllOnOneLineOrOnePerLine: false
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
Cpp11BracedListStyle: true
DerivePointerAlignment: false
DisableFormat: false
ExperimentalAutoDetectBinPacking: false
FixNamespaceComments: true
ForEachMacros:
- foreach
- Q_FOREACH
- BOOST_FOREACH
IncludeBlocks: Preserve
IncludeCategories:
- Regex: '^"(llvm|llvm-c|clang|clang-c)/'
Priority: 2
- Regex: '^(<|"(gtest|gmock|isl|json)/)'
Priority: 3
- Regex: '.*'
Priority: 1
IncludeIsMainRegex: '(Test)?$'
IndentCaseLabels: false
IndentPPDirectives: None
IndentWidth: 4
IndentWrappedFunctionNames: false
JavaScriptQuotes: Leave
JavaScriptWrapImports: true
KeepEmptyLinesAtTheStartOfBlocks: true
MacroBlockBegin: ''
MacroBlockEnd: ''
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
ObjCBinPackProtocolList: Auto
ObjCBlockIndentWidth: 2
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: true
PenaltyBreakAssignment: 2
PenaltyBreakBeforeFirstCallParameter: 19
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyBreakTemplateDeclaration: 10
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 1000
PointerAlignment: Right
ReflowComments: true
SortIncludes: true
SortUsingDeclarations: true
SpaceAfterCStyleCast: false
SpaceAfterLogicalNot: false
SpaceAfterTemplateKeyword: true
SpaceBeforeAssignmentOperators: true
SpaceBeforeCpp11BracedList: false
SpaceBeforeCtorInitializerColon: true
SpaceBeforeInheritanceColon: true
SpaceBeforeParens: ControlStatements
SpaceBeforeRangeBasedForLoopColon: true
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 1
SpacesInAngles: false
SpacesInContainerLiterals: true
SpacesInCStyleCastParentheses: false
SpacesInParentheses: false
SpacesInSquareBrackets: false
Standard: Cpp11
StatementMacros:
- Q_UNUSED
- QT_REQUIRE_VERSION
TabWidth: 4
UseTab: Never
...

14
.clang-tidy Normal file
View File

@@ -0,0 +1,14 @@
---
Checks: '
-*,
clang-analyzer-*,
modernize-*,
performance-*,
readability-*,
concurrency-*,
-modernize-use-trailing-return-type,
-google-build-using-namespace,
-readability-simplify-boolean-expr,
-readability-magic-numbers,
-clang-analyzer-core.DivideZero,
'

12
.gitignore vendored Normal file
View File

@@ -0,0 +1,12 @@
workloads/
YCSB/
cmake-build-debug/
cmake_install.cmake
CMakeCache.txt
Makefile
CMakeFiles/
bin/
ycsb_binding/bin
lib/*.so
*.cbp
./idea

107
CMakeLists.txt Normal file
View File

@@ -0,0 +1,107 @@
cmake_minimum_required(VERSION 3.10)
project(mxtasking)
# Check SSE is available
INCLUDE(scripts/FindSSE.cmake)
FindSSE()
# Set compile flags
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_C_COMPILER clang)
set(CMAKE_CXX_COMPILER clang++)
set(CMAKE_CXX_CLANG_TIDY "clang-tidy;--extra-arg-before=-std=c++17 --system-headers=0")
set(CMAKE_CXX_FLAGS "-pedantic -Wall -Wextra -Werror \
-Wno-invalid-offsetof -Wcast-align -Wcast-qual -Wctor-dtor-privacy -Wdisabled-optimization \
-Wformat=2 -Winit-self -Wmissing-declarations -Wmissing-include-dirs -Woverloaded-virtual \
-Wredundant-decls -Wshadow -Wsign-promo -Wstrict-overflow=5 -Wswitch-default -Wundef \
-Wno-unused -Wold-style-cast -Wno-uninitialized")
# Set compile flag for x86_64
if(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native -mtune=native")
endif()
# Set SSE flag if available
IF(SSE4_2_FOUND)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2 -DUSE_SSE2")
ENDIF(SSE4_2_FOUND)
set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g3")
set(CMAKE_CXX_FLAGS_RELEASE "-O3 -g -DNDEBUG -flto")
set(CMAKE_BUILD_TYPE RELEASE)
# Directories for output binaries and libraries
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
# External libraries
find_library(GTEST gtest)
# Include folders
include_directories(src/ lib/)
# Source files
SET(MX_TASKING_SRC
src/mx/resource/builder.cpp
src/mx/tasking/scheduler.cpp
src/mx/tasking/worker.cpp
src/mx/tasking/profiling/profiling_task.cpp
src/mx/util/core_set.cpp
src/mx/util/random.cpp
src/mx/memory/dynamic_size_allocator.cpp
src/mx/memory/reclamation/epoch_manager.cpp
)
SET(MX_BENCHMARKING_SRC
src/benchmark/workload_set.cpp
src/benchmark/workload.cpp
src/benchmark/cores.cpp
src/benchmark/perf.cpp
src/benchmark/string_util.cpp
)
# Build libraries
add_library(mxtasking SHARED ${MX_TASKING_SRC})
add_library(mxbenchmarking SHARED ${MX_BENCHMARKING_SRC})
# Build executables
add_executable(blinktree_benchmark
src/application/blinktree_benchmark/main.cpp
src/application/blinktree_benchmark/benchmark.cpp
)
add_executable(hashjoin_benchmark
src/application/hashjoin_benchmark/main.cpp
src/application/hashjoin_benchmark/benchmark.cpp
src/application/hashjoin_benchmark/merge_task.cpp
src/application/hashjoin_benchmark/tpch_table_reader.cpp
src/application/hashjoin_benchmark/notifier.cpp
)
# Link executables
target_link_libraries(blinktree_benchmark pthread numa atomic mxtasking mxbenchmarking)
target_link_libraries(hashjoin_benchmark pthread numa atomic mxtasking mxbenchmarking)
# Add tests
if (GTEST)
set(TESTS
test/mx/memory/alignment_helper.test.cpp
test/mx/memory/dynamic_size_allocator.test.cpp
test/mx/memory/fixed_size_allocator.test.cpp
test/mx/memory/tagged_ptr.test.cpp
test/mx/util/aligned_t.test.cpp
test/mx/util/mpsc_queue.test.cpp
test/mx/util/queue.test.cpp
test/mx/util/core_set.test.cpp
test/mx/util/vector.test.cpp
)
add_executable(mxtests test/test.cpp ${TESTS})
target_link_libraries(mxtests pthread numa atomic mxtasking mxbenchmarking gtest)
else()
message("Library 'gtest' not found. Please install 'libgtest-dev' for unit tests.")
endif()
# Custom targets
add_custom_target(ycsb-a ${CMAKE_SOURCE_DIR}/scripts/generate_ycsb a randint)
add_custom_target(ycsb-c ${CMAKE_SOURCE_DIR}/scripts/generate_ycsb c randint)

50
README.md Normal file
View File

@@ -0,0 +1,50 @@
# MxTasking: Task-based framework with built-in prefetching and synchronization
MxTasking is a task-based framework that assists the design of latch-free and parallel data structures.
MxTasking eases the information exchange between applications and the operating system, resulting in novel opportunities to manage resources in a truly hardware- and application-conscious way.
# Cite
The code was used for our SIGMOD'21 paper.
Jan Mühlig and Jens Teubner. 2021. MxTasks: How to Make Efficient Synchronization and Prefetching Easy. In Proceedings of the 2021 International Conference on Management of Data. [[PDF]](http://dbis.cs.tu-dortmund.de/TODO)
@inproceedings{muehlig2021mxtasks,
author = {Jan Mühlig and Jens Teubner},
title = {MxTasks: How to Make Efficient Synchronization and Prefetching Easy},
booktitle = {Proceedings of the 2021 International Conference on Management of Data},
year = {2021}
}
## Dependencies
### For building
#### Required
* `cmake` `>= 3.10`
* `clang` `>= 10`
* `clang-tidy` `>= 10`
* `libnuma` or `libnuma-dev`
#### Optional
* `libgtest-dev` for tests in `test/`
### For generating the YCSB workload
* `python` `>= 3`
* `java`
* `curl`
## How to build
* Call `cmake .` to generate `Makefile`.
* Call `make` to generate all binaries.
## How to run
For detailed information please see README files in `src/application/<app>` folders:
* [B Link Tree benchmark](src/application/blinktree_benchmark/README.md) (`src/application/blinktree_benchmark`)
* [Hash Join benchmark](src/application/hashjoin_benchmark/README.md) (`src/application/hashjoin_benchmark`)
### Simple example for B Link Tree
* Call `make ycsb-a` to generate the default workload
* Call `./bin/blinktree_benchmark 1:4` to run benchmark for one to four cores.
## External Libraries
* `argparse` ([view on github](https://github.com/p-ranav/argparse)) under MIT license
* `json` ([view on github](https://github.com/nlohmann/json)) under MIT license
* Yahoo! Cloud Serving Benchmark ([view on github](https://github.com/brianfrankcooper/YCSB)) under Apache License 2.0

619
lib/argparse.hpp Normal file
View File

@@ -0,0 +1,619 @@
/*
__ _ _ __ __ _ _ __ __ _ _ __ ___ ___
/ _` | '__/ _` | '_ \ / _` | '__/ __|/ _ \ Argument Parser for Modern C++
| (_| | | | (_| | |_) | (_| | | \__ \ __/ http://github.com/p-ranav/argparse
\__,_|_| \__, | .__/ \__,_|_| |___/\___|
|___/|_|
Licensed under the MIT License <http://opensource.org/licenses/MIT>.
SPDX-License-Identifier: MIT
Copyright (c) 2019 Pranav Srinivas Kumar <pranav.srinivas.kumar@gmail.com>.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/
#pragma once
#include <algorithm>
#include <any>
#include <functional>
#include <iostream>
#include <iterator>
#include <list>
#include <map>
#include <numeric>
#include <optional>
#include <sstream>
#include <stdexcept>
#include <string>
#include <string_view>
#include <tuple>
#include <type_traits>
#include <variant>
#include <vector>
namespace argparse {
namespace details { // namespace for helper methods
template <typename... Ts> struct is_container_helper {};
template <typename T, typename _ = void>
struct is_container : std::false_type {};
template <> struct is_container<std::string> : std::false_type {};
template <typename T>
struct is_container<
T,
std::conditional_t<false,
is_container_helper<typename T::value_type,
decltype(std::declval<T>().begin()),
decltype(std::declval<T>().end()),
decltype(std::declval<T>().size())>,
void>> : std::true_type {};
template <typename T>
static constexpr bool is_container_v = is_container<T>::value;
template <typename T>
struct is_string_like
: std::conjunction<std::is_constructible<std::string, T>,
std::is_convertible<T, std::string_view>> {};
template <class F, class Tuple, class Extra, size_t... I>
constexpr decltype(auto) apply_plus_one_impl(F &&f, Tuple &&t, Extra &&x,
std::index_sequence<I...>) {
return std::invoke(std::forward<F>(f), std::get<I>(std::forward<Tuple>(t))...,
std::forward<Extra>(x));
}
template <class F, class Tuple, class Extra>
constexpr decltype(auto) apply_plus_one(F &&f, Tuple &&t, Extra &&x) {
return details::apply_plus_one_impl(
std::forward<F>(f), std::forward<Tuple>(t), std::forward<Extra>(x),
std::make_index_sequence<
std::tuple_size_v<std::remove_reference_t<Tuple>>>{});
}
} // namespace details
class ArgumentParser;
class Argument {
friend class ArgumentParser;
friend auto operator<<(std::ostream &, ArgumentParser const &)
-> std::ostream &;
template <size_t N, size_t... I>
explicit Argument(std::string(&&a)[N], std::index_sequence<I...>)
: mIsOptional((is_optional(a[I]) || ...)), mIsRequired(false),
mIsUsed(false) {
((void)mNames.push_back(std::move(a[I])), ...);
std::sort(
mNames.begin(), mNames.end(), [](const auto &lhs, const auto &rhs) {
return lhs.size() == rhs.size() ? lhs < rhs : lhs.size() < rhs.size();
});
}
public:
Argument() = default;
template <typename... Args,
std::enable_if_t<
std::conjunction_v<details::is_string_like<Args>...>, int> = 0>
explicit Argument(Args &&... args)
: Argument({std::string(std::forward<Args>(args))...},
std::make_index_sequence<sizeof...(Args)>{}) {}
Argument &help(std::string aHelp) {
mHelp = std::move(aHelp);
return *this;
}
Argument &default_value(std::any aDefaultValue) {
mDefaultValue = std::move(aDefaultValue);
return *this;
}
Argument &required() {
mIsRequired = true;
return *this;
}
Argument &implicit_value(std::any aImplicitValue) {
mImplicitValue = std::move(aImplicitValue);
mNumArgs = 0;
return *this;
}
template <class F, class... Args>
auto action(F &&aAction, Args &&... aBound)
-> std::enable_if_t<std::is_invocable_v<F, Args..., std::string const>,
Argument &> {
using action_type = std::conditional_t<
std::is_void_v<std::invoke_result_t<F, Args..., std::string const>>,
void_action, valued_action>;
if constexpr (sizeof...(Args) == 0)
mAction.emplace<action_type>(std::forward<F>(aAction));
else
mAction.emplace<action_type>(
[f = std::forward<F>(aAction),
tup = std::make_tuple(std::forward<Args>(aBound)...)](
std::string const &opt) mutable {
return details::apply_plus_one(f, tup, opt);
});
return *this;
}
Argument &nargs(int aNumArgs) {
if (aNumArgs < 0)
throw std::logic_error("Number of arguments must be non-negative");
mNumArgs = aNumArgs;
return *this;
}
Argument &remaining() {
mNumArgs = -1;
return *this;
}
template <typename Iterator>
Iterator consume(Iterator start, Iterator end, std::string usedName = {}) {
if (mIsUsed) {
throw std::runtime_error("Duplicate argument");
}
mIsUsed = true;
mUsedName = std::move(usedName);
if (mNumArgs == 0) {
mValues.emplace_back(mImplicitValue);
return start;
} else if (mNumArgs <= std::distance(start, end)) {
if (auto expected = maybe_nargs()) {
end = std::next(start, *expected);
if (std::any_of(start, end, Argument::is_optional)) {
throw std::runtime_error("optional argument in parameter sequence");
}
}
struct action_apply {
void operator()(valued_action &f) {
std::transform(start, end, std::back_inserter(self.mValues), f);
}
void operator()(void_action &f) {
std::for_each(start, end, f);
if (!self.mDefaultValue.has_value()) {
if (auto expected = self.maybe_nargs())
self.mValues.resize(*expected);
}
}
Iterator start, end;
Argument &self;
};
std::visit(action_apply{start, end, *this}, mAction);
return end;
} else if (mDefaultValue.has_value()) {
return start;
} else {
throw std::runtime_error("Too few arguments");
}
}
/*
* @throws std::runtime_error if argument values are not valid
*/
void validate() const {
if (auto expected = maybe_nargs()) {
if (mIsOptional) {
if (mIsUsed && mValues.size() != *expected &&
!mDefaultValue.has_value()) {
std::stringstream stream;
stream << mUsedName << ": expected " << *expected << " argument(s). "
<< mValues.size() << " provided.";
throw std::runtime_error(stream.str());
} else {
// TODO: check if an implicit value was programmed for this argument
if (!mIsUsed && !mDefaultValue.has_value() && mIsRequired) {
std::stringstream stream;
stream << mNames[0] << ": required.";
throw std::runtime_error(stream.str());
}
if (mIsUsed && mIsRequired && mValues.size() == 0) {
std::stringstream stream;
stream << mUsedName << ": no value provided.";
throw std::runtime_error(stream.str());
}
}
} else {
if (mValues.size() != expected && !mDefaultValue.has_value()) {
std::stringstream stream;
stream << mUsedName << ": expected " << *expected << " argument(s). "
<< mValues.size() << " provided.";
throw std::runtime_error(stream.str());
}
}
}
}
auto maybe_nargs() const -> std::optional<size_t> {
if (mNumArgs < 0)
return std::nullopt;
else
return static_cast<size_t>(mNumArgs);
}
size_t get_arguments_length() const {
return std::accumulate(std::begin(mNames), std::end(mNames), size_t(0),
[](const auto &sum, const auto &s) {
return sum + s.size() +
1; // +1 for space between names
});
}
friend std::ostream &operator<<(std::ostream &stream,
const Argument &argument) {
std::stringstream nameStream;
std::copy(std::begin(argument.mNames), std::end(argument.mNames),
std::ostream_iterator<std::string>(nameStream, " "));
stream << nameStream.str() << "\t" << argument.mHelp;
if (argument.mIsRequired)
stream << "[Required]";
stream << "\n";
return stream;
}
template <typename T> bool operator!=(const T &aRhs) const {
return !(*this == aRhs);
}
/*
* Compare to an argument value of known type
* @throws std::logic_error in case of incompatible types
*/
template <typename T> bool operator==(const T &aRhs) const {
if constexpr (!details::is_container_v<T>) {
return get<T>() == aRhs;
} else {
using ValueType = typename T::value_type;
auto tLhs = get<T>();
return std::equal(std::begin(tLhs), std::end(tLhs), std::begin(aRhs),
std::end(aRhs), [](const auto &lhs, const auto &rhs) {
return std::any_cast<const ValueType &>(lhs) == rhs;
});
}
}
private:
static bool is_integer(const std::string &aValue) {
if (aValue.empty() ||
((!isdigit(aValue[0])) && (aValue[0] != '-') && (aValue[0] != '+')))
return false;
char *tPtr;
strtol(aValue.c_str(), &tPtr, 10);
return (*tPtr == 0);
}
static bool is_float(const std::string &aValue) {
std::istringstream tStream(aValue);
float tFloat;
// noskipws considers leading whitespace invalid
tStream >> std::noskipws >> tFloat;
// Check the entire string was consumed
// and if either failbit or badbit is set
return tStream.eof() && !tStream.fail();
}
// If an argument starts with "-" or "--", then it's optional
static bool is_optional(const std::string &aName) {
return (aName.size() > 1 && aName[0] == '-' && !is_integer(aName) &&
!is_float(aName));
}
static bool is_positional(const std::string &aName) {
return !is_optional(aName);
}
/*
* Get argument value given a type
* @throws std::logic_error in case of incompatible types
*/
template <typename T> T get() const {
if (!mValues.empty()) {
if constexpr (details::is_container_v<T>)
return any_cast_container<T>(mValues);
else
return std::any_cast<T>(mValues.front());
}
if (mDefaultValue.has_value()) {
return std::any_cast<T>(mDefaultValue);
}
throw std::logic_error("No value provided");
}
template <typename T>
static auto any_cast_container(const std::vector<std::any> &aOperand) -> T {
using ValueType = typename T::value_type;
T tResult;
std::transform(
begin(aOperand), end(aOperand), std::back_inserter(tResult),
[](const auto &value) { return std::any_cast<ValueType>(value); });
return tResult;
}
std::vector<std::string> mNames;
std::string mUsedName;
std::string mHelp;
std::any mDefaultValue;
std::any mImplicitValue;
using valued_action = std::function<std::any(const std::string &)>;
using void_action = std::function<void(const std::string &)>;
std::variant<valued_action, void_action> mAction{
std::in_place_type<valued_action>,
[](const std::string &aValue) { return aValue; }};
std::vector<std::any> mValues;
int mNumArgs = 1;
bool mIsOptional : 1;
bool mIsRequired : 1;
bool mIsUsed : 1; // True if the optional argument is used by user
static constexpr auto mHelpOption = "-h";
static constexpr auto mHelpOptionLong = "--help";
};
class ArgumentParser {
public:
explicit ArgumentParser(std::string aProgramName = {})
: mProgramName(std::move(aProgramName)) {
add_argument(Argument::mHelpOption, Argument::mHelpOptionLong)
.help("show this help message and exit")
.nargs(0)
.default_value(false)
.implicit_value(true);
}
ArgumentParser(ArgumentParser &&) noexcept = default;
ArgumentParser &operator=(ArgumentParser &&) = default;
ArgumentParser(const ArgumentParser &other)
: mProgramName(other.mProgramName),
mPositionalArguments(other.mPositionalArguments),
mOptionalArguments(other.mOptionalArguments) {
for (auto it = begin(mPositionalArguments); it != end(mPositionalArguments);
++it)
index_argument(it);
for (auto it = begin(mOptionalArguments); it != end(mOptionalArguments);
++it)
index_argument(it);
}
ArgumentParser &operator=(const ArgumentParser &other) {
auto tmp = other;
std::swap(*this, tmp);
return *this;
}
// Parameter packing
// Call add_argument with variadic number of string arguments
template <typename... Targs> Argument &add_argument(Targs... Fargs) {
auto tArgument = mOptionalArguments.emplace(cend(mOptionalArguments),
std::move(Fargs)...);
if (!tArgument->mIsOptional)
mPositionalArguments.splice(cend(mPositionalArguments),
mOptionalArguments, tArgument);
index_argument(tArgument);
return *tArgument;
}
// Parameter packed add_parents method
// Accepts a variadic number of ArgumentParser objects
template <typename... Targs> void add_parents(const Targs &... Fargs) {
for (const ArgumentParser &tParentParser : {std::ref(Fargs)...}) {
for (auto &tArgument : tParentParser.mPositionalArguments) {
auto it =
mPositionalArguments.insert(cend(mPositionalArguments), tArgument);
index_argument(it);
}
for (auto &tArgument : tParentParser.mOptionalArguments) {
auto it =
mOptionalArguments.insert(cend(mOptionalArguments), tArgument);
index_argument(it);
}
}
}
/* Call parse_args_internal - which does all the work
* Then, validate the parsed arguments
* This variant is used mainly for testing
* @throws std::runtime_error in case of any invalid argument
*/
void parse_args(const std::vector<std::string> &aArguments) {
parse_args_internal(aArguments);
parse_args_validate();
}
/* Main entry point for parsing command-line arguments using this
* ArgumentParser
* @throws std::runtime_error in case of any invalid argument
*/
void parse_args(int argc, const char *const argv[]) {
std::vector<std::string> arguments;
std::copy(argv, argv + argc, std::back_inserter(arguments));
parse_args(arguments);
}
/* Getter enabled for all template types other than std::vector and std::list
* @throws std::logic_error in case of an invalid argument name
* @throws std::logic_error in case of incompatible types
*/
template <typename T = std::string> T get(std::string_view aArgumentName) {
return (*this)[aArgumentName].get<T>();
}
/* Indexing operator. Return a reference to an Argument object
* Used in conjuction with Argument.operator== e.g., parser["foo"] == true
* @throws std::logic_error in case of an invalid argument name
*/
Argument &operator[](std::string_view aArgumentName) {
auto tIterator = mArgumentMap.find(aArgumentName);
if (tIterator != mArgumentMap.end()) {
return *(tIterator->second);
}
throw std::logic_error("No such argument");
}
// Print help message
friend auto operator<<(std::ostream &stream, const ArgumentParser &parser)
-> std::ostream & {
if (auto sen = std::ostream::sentry(stream)) {
stream.setf(std::ios_base::left);
stream << "Usage: " << parser.mProgramName << " [options] ";
size_t tLongestArgumentLength = parser.get_length_of_longest_argument();
for (const auto &argument : parser.mPositionalArguments) {
stream << argument.mNames.front() << " ";
}
stream << "\n\n";
if (!parser.mPositionalArguments.empty())
stream << "Positional arguments:\n";
for (const auto &mPositionalArgument : parser.mPositionalArguments) {
stream.width(tLongestArgumentLength);
stream << mPositionalArgument;
}
if (!parser.mOptionalArguments.empty())
stream << (parser.mPositionalArguments.empty() ? "" : "\n")
<< "Optional arguments:\n";
for (const auto &mOptionalArgument : parser.mOptionalArguments) {
stream.width(tLongestArgumentLength);
stream << mOptionalArgument;
}
}
return stream;
}
// Format help message
auto help() const -> std::stringstream {
std::stringstream out;
out << *this;
return out;
}
// Printing the one and only help message
// I've stuck with a simple message format, nothing fancy.
[[deprecated("Use cout << program; instead. See also help().")]] std::string
print_help() {
auto out = help();
std::cout << out.rdbuf();
return out.str();
}
private:
/*
* @throws std::runtime_error in case of any invalid argument
*/
void parse_args_internal(const std::vector<std::string> &aArguments) {
if (mProgramName.empty() && !aArguments.empty()) {
mProgramName = aArguments.front();
}
auto end = std::end(aArguments);
auto positionalArgumentIt = std::begin(mPositionalArguments);
for (auto it = std::next(std::begin(aArguments)); it != end;) {
const auto &tCurrentArgument = *it;
if (tCurrentArgument == Argument::mHelpOption ||
tCurrentArgument == Argument::mHelpOptionLong) {
throw std::runtime_error("help called");
}
if (Argument::is_positional(tCurrentArgument)) {
if (positionalArgumentIt == std::end(mPositionalArguments)) {
throw std::runtime_error(
"Maximum number of positional arguments exceeded");
}
auto tArgument = positionalArgumentIt++;
it = tArgument->consume(it, end);
} else if (auto tIterator = mArgumentMap.find(tCurrentArgument);
tIterator != mArgumentMap.end()) {
auto tArgument = tIterator->second;
it = tArgument->consume(std::next(it), end, tCurrentArgument);
} else if (const auto &tCompoundArgument = tCurrentArgument;
tCompoundArgument.size() > 1 && tCompoundArgument[0] == '-' &&
tCompoundArgument[1] != '-') {
++it;
for (size_t j = 1; j < tCompoundArgument.size(); j++) {
auto tHypotheticalArgument = std::string{'-', tCompoundArgument[j]};
auto tIterator2 = mArgumentMap.find(tHypotheticalArgument);
if (tIterator2 != mArgumentMap.end()) {
auto tArgument = tIterator2->second;
it = tArgument->consume(it, end, tHypotheticalArgument);
} else {
throw std::runtime_error("Unknown argument");
}
}
} else {
throw std::runtime_error("Unknown argument");
}
}
}
/*
* @throws std::runtime_error in case of any invalid argument
*/
void parse_args_validate() {
// Check if all arguments are parsed
std::for_each(std::begin(mArgumentMap), std::end(mArgumentMap),
[](const auto &argPair) {
const auto &tArgument = argPair.second;
tArgument->validate();
});
}
// Used by print_help.
size_t get_length_of_longest_argument() const {
if (mArgumentMap.empty())
return 0;
std::vector<size_t> argumentLengths(mArgumentMap.size());
std::transform(std::begin(mArgumentMap), std::end(mArgumentMap),
std::begin(argumentLengths), [](const auto &argPair) {
const auto &tArgument = argPair.second;
return tArgument->get_arguments_length();
});
return *std::max_element(std::begin(argumentLengths),
std::end(argumentLengths));
}
using list_iterator = std::list<Argument>::iterator;
void index_argument(list_iterator argIt) {
for (auto &mName : std::as_const(argIt->mNames))
mArgumentMap.emplace(mName, argIt);
}
std::string mProgramName;
std::list<Argument> mPositionalArguments;
std::list<Argument> mOptionalArguments;
std::map<std::string_view, list_iterator, std::less<>> mArgumentMap;
};
} // namespace argparse

20842
lib/json.hpp Normal file

File diff suppressed because it is too large Load Diff

110
scripts/FindSSE.cmake Normal file
View File

@@ -0,0 +1,110 @@
MACRO (FindSSE)
IF(CMAKE_SYSTEM_NAME MATCHES "Linux")
EXEC_PROGRAM(cat ARGS "/proc/cpuinfo" OUTPUT_VARIABLE CPUINFO)
STRING(REGEX REPLACE "^.*(sse2).*$" "\\1" SSE_THERE ${CPUINFO})
STRING(COMPARE EQUAL "sse2" "${SSE_THERE}" SSE2_TRUE)
IF (SSE2_TRUE)
set(SSE2_FOUND true CACHE BOOL "SSE2 available on host")
ELSE (SSE2_TRUE)
set(SSE2_FOUND false CACHE BOOL "SSE2 available on host")
ENDIF (SSE2_TRUE)
# /proc/cpuinfo apparently omits sse3 :(
STRING(REGEX REPLACE "^.*[^s](sse3).*$" "\\1" SSE_THERE ${CPUINFO})
STRING(COMPARE EQUAL "sse3" "${SSE_THERE}" SSE3_TRUE)
IF (NOT SSE3_TRUE)
STRING(REGEX REPLACE "^.*(T2300).*$" "\\1" SSE_THERE ${CPUINFO})
STRING(COMPARE EQUAL "T2300" "${SSE_THERE}" SSE3_TRUE)
ENDIF (NOT SSE3_TRUE)
STRING(REGEX REPLACE "^.*(ssse3).*$" "\\1" SSE_THERE ${CPUINFO})
STRING(COMPARE EQUAL "ssse3" "${SSE_THERE}" SSSE3_TRUE)
IF (SSE3_TRUE OR SSSE3_TRUE)
set(SSE3_FOUND true CACHE BOOL "SSE3 available on host")
ELSE (SSE3_TRUE OR SSSE3_TRUE)
set(SSE3_FOUND false CACHE BOOL "SSE3 available on host")
ENDIF (SSE3_TRUE OR SSSE3_TRUE)
IF (SSSE3_TRUE)
set(SSSE3_FOUND true CACHE BOOL "SSSE3 available on host")
ELSE (SSSE3_TRUE)
set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host")
ENDIF (SSSE3_TRUE)
STRING(REGEX REPLACE "^.*(sse4_1).*$" "\\1" SSE_THERE ${CPUINFO})
STRING(COMPARE EQUAL "sse4_1" "${SSE_THERE}" SSE41_TRUE)
IF (SSE41_TRUE)
set(SSE4_1_FOUND true CACHE BOOL "SSE4.1 available on host")
ELSE (SSE41_TRUE)
set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host")
ENDIF (SSE41_TRUE)
STRING(REGEX REPLACE "^.*(sse4_2).*$" "\\1" SSE_THERE ${CPUINFO})
STRING(COMPARE EQUAL "sse4_2" "${SSE_THERE}" SSE42_TRUE)
IF (SSE42_TRUE)
set(SSE4_2_FOUND true CACHE BOOL "SSE4.2 available on host")
ELSE (SSE42_TRUE)
set(SSE4_2_FOUND false CACHE BOOL "SSE4.2 available on host")
ENDIF (SSE42_TRUE)
ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Darwin")
EXEC_PROGRAM("/usr/sbin/sysctl -n machdep.cpu.features" OUTPUT_VARIABLE
CPUINFO)
STRING(REGEX REPLACE "^.*[^S](SSE2).*$" "\\1" SSE_THERE ${CPUINFO})
STRING(COMPARE EQUAL "SSE2" "${SSE_THERE}" SSE2_TRUE)
IF (SSE2_TRUE)
set(SSE2_FOUND true CACHE BOOL "SSE2 available on host")
ELSE (SSE2_TRUE)
set(SSE2_FOUND false CACHE BOOL "SSE2 available on host")
ENDIF (SSE2_TRUE)
STRING(REGEX REPLACE "^.*[^S](SSE3).*$" "\\1" SSE_THERE ${CPUINFO})
STRING(COMPARE EQUAL "SSE3" "${SSE_THERE}" SSE3_TRUE)
IF (SSE3_TRUE)
set(SSE3_FOUND true CACHE BOOL "SSE3 available on host")
ELSE (SSE3_TRUE)
set(SSE3_FOUND false CACHE BOOL "SSE3 available on host")
ENDIF (SSE3_TRUE)
STRING(REGEX REPLACE "^.*(SSSE3).*$" "\\1" SSE_THERE ${CPUINFO})
STRING(COMPARE EQUAL "SSSE3" "${SSE_THERE}" SSSE3_TRUE)
IF (SSSE3_TRUE)
set(SSSE3_FOUND true CACHE BOOL "SSSE3 available on host")
ELSE (SSSE3_TRUE)
set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host")
ENDIF (SSSE3_TRUE)
STRING(REGEX REPLACE "^.*(SSE4.1).*$" "\\1" SSE_THERE ${CPUINFO})
STRING(COMPARE EQUAL "SSE4.1" "${SSE_THERE}" SSE41_TRUE)
IF (SSE41_TRUE)
set(SSE4_1_FOUND true CACHE BOOL "SSE4.1 available on host")
ELSE (SSE41_TRUE)
set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host")
ENDIF (SSE41_TRUE)
STRING(REGEX REPLACE "^.*(SSE4.2).*$" "\\1" SSE_THERE ${CPUINFO})
STRING(COMPARE EQUAL "SSE4.2" "${SSE_THERE}" SSE42_TRUE)
IF (SSE42_TRUE)
set(SSE4_2_FOUND true CACHE BOOL "SSE4.2 available on host")
ELSE (SSE42_TRUE)
set(SSE4_2_FOUND false CACHE BOOL "SSE4.2 available on host")
ENDIF (SSE42_TRUE)
ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Windows")
# TODO
set(SSE2_FOUND true CACHE BOOL "SSE2 available on host")
set(SSE3_FOUND false CACHE BOOL "SSE3 available on host")
set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host")
set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host")
set(SSE4_2_FOUND false CACHE BOOL "SSE4.2 available on host")
ELSE(CMAKE_SYSTEM_NAME MATCHES "Linux")
set(SSE2_FOUND true CACHE BOOL "SSE2 available on host")
set(SSE3_FOUND false CACHE BOOL "SSE3 available on host")
set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host")
set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host")
set(SSE4_2_FOUND false CACHE BOOL "SSE4.2 available on host")
ENDIF(CMAKE_SYSTEM_NAME MATCHES "Linux")
ENDMACRO(FindSSE)

115
scripts/generate_ycsb Executable file
View File

@@ -0,0 +1,115 @@
#!/usr/bin/python3
import sys
import os
from pathlib import Path
script_dir = Path(os.path.realpath(__file__))
ycsb_dir = "YCSB/"
workload_dir = str(script_dir.parent.parent) + "/workloads_specification/"
output_dir = "workloads/"
if not os.path.exists(output_dir):
os.makedirs(output_dir)
if not os.path.exists(ycsb_dir):
os.system("curl -O --location https://github.com/brianfrankcooper/YCSB/releases/download/0.16.0/ycsb-0.16.0.tar.gz")
os.system("tar xfvz ycsb-0.16.0.tar.gz")
os.system("mv ycsb-0.16.0 " + ycsb_dir)
os.system("rm ycsb-0.16.0.tar.gz")
workload = "workloada"
key_type = "randint"
if len(sys.argv) == 1:
print("Usage: python", sys.argv[0], "<workload_type> <key_type>")
print("Example: 'python", sys.argv[0], "a randint' for generating workloada with randint.")
if len(sys.argv) > 1:
workload = str(sys.argv[1])
if not workload.startswith("workload"):
workload = "workload" + workload
if len(sys.argv) > 2:
key_type = sys.argv[2]
print("Generaring YCSB workload", workload, "with key type", key_type)
out_ycsb_load = output_dir + 'ycsb_load_' + key_type + '_' + workload
out_ycsb_txn = output_dir + 'ycsb_txn_' + key_type + '_' + workload
out_load_ycsbkey = output_dir + 'load_' + 'ycsbkey' + '_' + workload
out_txn_ycsbkey = output_dir + 'txn_' + 'ycsbkey' + '_' + workload
out_load = output_dir + 'fill_' + key_type + '_' + workload
out_txn = output_dir + 'mixed_' + key_type + '_' + workload
cmd_ycsb_load = ycsb_dir + 'bin/ycsb.sh load basic -P ' + workload_dir + workload + ' -s > ' + out_ycsb_load
cmd_ycsb_txn = ycsb_dir + 'bin/ycsb.sh run basic -P ' + workload_dir + workload + ' -s > ' + out_ycsb_txn
os.system(cmd_ycsb_load)
os.system(cmd_ycsb_txn)
#####################################################################################
with open(out_ycsb_load, 'r') as f_load:
with open(out_load_ycsbkey, 'w') as f_load_out:
for line in f_load :
cols = line.split()
if len(cols) > 0 and cols[0] == "INSERT":
f_load_out.write (cols[0] + " " + cols[2][4:] + "\n")
with open (out_ycsb_txn, 'r') as f_txn:
with open (out_txn_ycsbkey, 'w') as f_txn_out:
for line in f_txn :
cols = line.split()
if (cols[0] == 'SCAN') or (cols[0] == 'INSERT') or (cols[0] == 'READ') or (cols[0] == 'UPDATE'):
startkey = cols[2][4:]
if cols[0] == 'SCAN' :
numkeys = cols[3]
f_txn_out.write (cols[0] + ' ' + startkey + ' ' + numkeys + '\n')
else :
f_txn_out.write (cols[0] + ' ' + startkey + '\n')
cmd = 'rm -f ' + out_ycsb_load
os.system(cmd)
cmd = 'rm -f ' + out_ycsb_txn
os.system(cmd)
if key_type == 'randint' :
with open (out_load_ycsbkey, 'r') as f_load:
with open (out_load, 'w') as f_load_out:
for line in f_load :
f_load_out.write (line)
with open (out_txn_ycsbkey, 'r') as f_txn:
with open (out_txn, 'w') as f_txn_out:
for line in f_txn :
f_txn_out.write (line)
elif key_type == 'monoint' :
keymap = {}
with open (out_load_ycsbkey, 'r') as f_load:
with open (out_load, 'w') as f_load_out:
count = 0
for line in f_load :
cols = line.split()
keymap[int(cols[1])] = count
f_load_out.write (cols[0] + ' ' + str(count) + '\n')
count += 1
with open (out_txn_ycsbkey, 'r') as f_txn:
with open (out_txn, 'w') as f_txn_out:
for line in f_txn :
cols = line.split()
if cols[0] == 'SCAN' :
f_txn_out.write (cols[0] + ' ' + str(keymap[int(cols[1])]) + ' ' + cols[2] + '\n')
elif cols[0] == 'INSERT' :
keymap[int(cols[1])] = count
f_txn_out.write (cols[0] + ' ' + str(count) + '\n')
count += 1
else :
f_txn_out.write (cols[0] + ' ' + str(keymap[int(cols[1])]) + '\n')
cmd = 'rm -f ' + out_load_ycsbkey
os.system(cmd)
cmd = 'rm -f ' + out_txn_ycsbkey
os.system(cmd)

View File

@@ -0,0 +1,79 @@
# BLinkTree Benchmark
The BLinkTree-benchmark stores `8` byte numeric keys and values.
Call `./bin/blinktree_benchmark -h` for help and parameters.
## How to generate YCSB workload
* Workload specifications are done by files in `workloads_specification/`.
* Call `make ycsb-a` and `make ycsb-c` to generate workloads **A** and **C**.
* Workload files are stored in `workloads/`
* Use `./bin/blinktree_benchmark -f <fill-file> <mixed-file>` to pass the desired workload.
* Default (if not specified) is `-f workloads/fill_randint_workloada workloads/mixed_randint_workloada`.
## Important CLI arguments
* The first argument is the number of cores:
* `./bin/blinktree_benchmark 1` for using a single core.
* `./bin/blinktree_benchmark 1:24` for using cores `1` up to `24`.
* `-i <NUMBER>` specifies the number of repetitions of each workload.
* `-s <NUMBER>` steps of the cores:
* `-s 1` will increase the used cores by one (core ids: `0,1,2,3,4,5,6,7,..,23`).
* `-s 2` will skip every second core (core ids: `0,1,3,5,7,..23`).
* `-pd <NUMBER>` specifies the prefetch distance.
* `-p` or `--perf` will activate performance counter (result will be printed to console and output file).
* `--latched` will enable latches for synchronization (default off).
* `--exclusive` forces the tasks to access tree nodes exclusively (e.g. by using spinlocks or core-based sequencing) (default off).
* `--sync4me` will use built-in synchronization selection to choose the matching primitive based on annotations.
* `-o <FILE>` will write the results in **json** format to the given file.
## Understanding the output
After started, the benchmark will print a summary of configured cores and workload:
core configuration:
1: 0
2: 0 1
4: 0 1 2 3
workload: fill: 5m / readonly: 5m
Here, we configured the benchmark to use one to four cores; each line of the core configuration displays the number of cores and the core identifiers.
Following, the benchmark will be started and print the results for every iteration:
1 1 0 1478 ms 3.38295e+06 op/s
1 1 1 1237 ms 4.04204e+06 op/s
2 1 0 964 ms 5.18672e+06 op/s
2 1 1 675 ms 7.40741e+06 op/s
4 1 0 935 ms 5.34759e+06 op/s
4 1 1 532 ms 9.3985e+06 op/s
* The first column is the number of used cores.
* The second column displays the iteration of the benchmark (configured by `-i X`).
* Thirdly, the phase-identifier will be printed: `0` for initialization phase (which will be only inserts) and `1` for the workload phase (which is read-only here).
* After that, the time and throughput are written.
* If `--perf` is enabled, the output will be extended by some perf counters, which are labeled (like throughput).
## Plot the results
When using `-o FILE`, the results will be written to the given file, using `JSON` format.
The plot script `scripts/plot_blinktree_benchmark INPUT_FILE [INPUT_FILE ...]` will aggregate and plot the results using one or more of those `JSON` files.
## Examples
###### Running workload A using optimistic synchronization
./bin/blinktree_benchmark 1: -s 2 -i 3 -pd 3 -p -f workloads/fill_randint_workloada workloads/mixed_randint_workloada -o optimistic.json
###### Running workload A using best matching synchronization
./bin/blinktree_benchmark 1: -s 2 -i 3 -pd 3 -p --sync4me -f workloads/fill_randint_workloada workloads/mixed_randint_workloada -o sync4me.json
###### Running workload A using reader/writer-locks
./bin/blinktree_benchmark 1: -s 2 -i 3 -pd 3 -p --latched -f workloads/fill_randint_workloada workloads/mixed_randint_workloada -o rwlocked.json
###### Running workload A using core-based sequencing
./bin/blinktree_benchmark 1: -s 2 -i 3 -pd 3 -p --exclusive -f workloads/fill_randint_workloada workloads/mixed_randint_workloada -o core-sequenced.json
###### Running workload A using spin-locks
./bin/blinktree_benchmark 1: -s 2 -i 3 -pd 3 -p --latched --exclusive -f workloads/fill_randint_workloada workloads/mixed_randint_workloada -o spinlocked.json

View File

@@ -0,0 +1,199 @@
#include "benchmark.h"
#include <cstdlib>
#include <iostream>
#include <json.hpp>
#include <memory>
#include <mx/memory/global_heap.h>
using namespace application::blinktree_benchmark;
Benchmark::Benchmark(benchmark::Cores &&cores, const std::uint16_t iterations, std::string &&fill_workload_file,
std::string &&mixed_workload_file, const bool use_performance_counter,
const mx::synchronization::isolation_level node_isolation_level,
const mx::synchronization::protocol preferred_synchronization_method,
const bool print_tree_statistics, const bool check_tree, std::string &&result_file_name,
std::string &&statistic_file_name, std::string &&tree_file_name, const bool profile)
: _cores(std::move(cores)), _iterations(iterations), _node_isolation_level(node_isolation_level),
_preferred_synchronization_method(preferred_synchronization_method),
_print_tree_statistics(print_tree_statistics), _check_tree(check_tree),
_result_file_name(std::move(result_file_name)), _statistic_file_name(std::move(statistic_file_name)),
_tree_file_name(std::move(tree_file_name)), _profile(profile)
{
if (use_performance_counter)
{
this->_chronometer.add(benchmark::Perf::CYCLES);
this->_chronometer.add(benchmark::Perf::INSTRUCTIONS);
this->_chronometer.add(benchmark::Perf::STALLS_MEM_ANY);
this->_chronometer.add(benchmark::Perf::SW_PREFETCH_ACCESS_NTA);
this->_chronometer.add(benchmark::Perf::SW_PREFETCH_ACCESS_WRITE);
}
std::cout << "core configuration: \n" << this->_cores.dump(2) << std::endl;
this->_workload.build(fill_workload_file, mixed_workload_file);
if (this->_workload.empty(benchmark::phase::FILL) && this->_workload.empty(benchmark::phase::MIXED))
{
std::exit(1);
}
std::cout << "workload: " << this->_workload << "\n" << std::endl;
}
void Benchmark::start()
{
// Reset tree.
if (this->_tree == nullptr)
{
this->_tree = std::make_unique<db::index::blinktree::BLinkTree<std::uint64_t, std::int64_t>>(
this->_node_isolation_level, this->_preferred_synchronization_method);
}
// Reset request scheduler.
if (this->_request_scheduler.empty() == false)
{
this->_request_scheduler.clear();
}
// Create one request scheduler per core.
for (auto core_index = 0U; core_index < this->_cores.current().size(); core_index++)
{
const auto channel_id = core_index;
auto *request_scheduler = mx::tasking::runtime::new_task<RequestSchedulerTask>(
0U, core_index, channel_id, this->_workload, this->_cores.current(), this->_tree.get(), this);
mx::tasking::runtime::spawn(*request_scheduler, 0U);
this->_request_scheduler.push_back(request_scheduler);
}
this->_open_requests = this->_request_scheduler.size();
// Start measurement.
if (this->_profile)
{
mx::tasking::runtime::profile(this->profile_file_name());
}
this->_chronometer.start(static_cast<std::uint16_t>(static_cast<benchmark::phase>(this->_workload)),
this->_current_iteration + 1, this->_cores.current());
}
const mx::util::core_set &Benchmark::core_set()
{
if (this->_current_iteration == std::numeric_limits<std::uint16_t>::max())
{
// This is the very first time we start the benchmark.
this->_current_iteration = 0U;
return this->_cores.next();
}
// Switch from fill to mixed phase.
if (this->_workload == benchmark::phase::FILL && this->_workload.empty(benchmark::phase::MIXED) == false)
{
this->_workload.reset(benchmark::phase::MIXED);
return this->_cores.current();
}
this->_workload.reset(benchmark::phase::FILL);
// Run the next iteration.
if (++this->_current_iteration < this->_iterations)
{
return this->_cores.current();
}
this->_current_iteration = 0U;
// At this point, all phases and all iterations for the current core configuration
// are done. Increase the cores.
return this->_cores.next();
}
void Benchmark::requests_finished()
{
const auto open_requests = --this->_open_requests;
if (open_requests == 0U) // All request schedulers are done.
{
// Stop and print time (and performance counter).
const auto result = this->_chronometer.stop(this->_workload.size());
mx::tasking::runtime::stop();
std::cout << result << std::endl;
// Dump results to file.
if (this->_result_file_name.empty() == false)
{
std::ofstream result_file_stream(this->_result_file_name, std::ofstream::app);
result_file_stream << result.to_json().dump() << std::endl;
}
// Dump statistics to file.
if constexpr (mx::tasking::config::task_statistics())
{
if (this->_statistic_file_name.empty() == false)
{
std::ofstream statistic_file_stream(this->_statistic_file_name, std::ofstream::app);
nlohmann::json statistic_json;
statistic_json["iteration"] = result.iteration();
statistic_json["cores"] = result.core_count();
statistic_json["phase"] = result.phase();
statistic_json["scheduled"] = nlohmann::json();
statistic_json["scheduled-on-channel"] = nlohmann::json();
statistic_json["scheduled-off-channel"] = nlohmann::json();
statistic_json["executed"] = nlohmann::json();
statistic_json["executed-reader"] = nlohmann::json();
statistic_json["executed-writer"] = nlohmann::json();
statistic_json["buffer-fills"] = nlohmann::json();
for (auto i = 0U; i < this->_cores.current().size(); i++)
{
const auto core_id = std::int32_t{this->_cores.current()[i]};
const auto core_id_string = std::to_string(core_id);
statistic_json["scheduled"][core_id_string] =
result.scheduled_tasks(core_id) / double(result.operation_count());
statistic_json["scheduled-on-core"][core_id_string] =
result.scheduled_tasks_on_core(core_id) / double(result.operation_count());
statistic_json["scheduled-off-core"][core_id_string] =
result.scheduled_tasks_off_core(core_id) / double(result.operation_count());
statistic_json["executed"][core_id_string] =
result.executed_tasks(core_id) / double(result.operation_count());
statistic_json["executed-reader"][core_id_string] =
result.executed_reader_tasks(core_id) / double(result.operation_count());
statistic_json["executed-writer"][core_id_string] =
result.executed_writer_tasks(core_id) / double(result.operation_count());
statistic_json["fill"][core_id_string] =
result.worker_fills(core_id) / double(result.operation_count());
}
statistic_file_stream << statistic_json.dump(2) << std::endl;
}
}
// Check and print the tree.
if (this->_check_tree)
{
this->_tree->check();
}
if (this->_print_tree_statistics)
{
this->_tree->print_statistics();
}
const auto is_last_phase =
this->_workload == benchmark::phase::MIXED || this->_workload.empty(benchmark::phase::MIXED);
// Dump the tree.
if (this->_tree_file_name.empty() == false && is_last_phase)
{
std::ofstream tree_file_stream(this->_tree_file_name);
tree_file_stream << static_cast<nlohmann::json>(*(this->_tree)).dump() << std::endl;
}
// Delete the tree to free the hole memory.
if (is_last_phase)
{
this->_tree.reset(nullptr);
}
}
}
std::string Benchmark::profile_file_name() const
{
return "profiling-" + std::to_string(this->_cores.current().size()) + "-cores" + "-phase-" +
std::to_string(static_cast<std::uint16_t>(static_cast<benchmark::phase>(this->_workload))) + "-iteration-" +
std::to_string(this->_current_iteration) + ".json";
}

View File

@@ -0,0 +1,103 @@
#pragma once
#include "listener.h"
#include "request_scheduler.h"
#include <array>
#include <atomic>
#include <benchmark/chronometer.h>
#include <benchmark/cores.h>
#include <benchmark/workload.h>
#include <cstdint>
#include <db/index/blinktree/b_link_tree.h>
#include <memory>
#include <mx/util/core_set.h>
#include <string>
#include <vector>
namespace application::blinktree_benchmark {
/**
* Benchmark executing the task-based BLink-Tree.
*/
class Benchmark final : public Listener
{
public:
Benchmark(benchmark::Cores &&, std::uint16_t iterations, std::string &&fill_workload_file,
std::string &&mixed_workload_file, bool use_performance_counter,
mx::synchronization::isolation_level node_isolation_level,
mx::synchronization::protocol preferred_synchronization_method, bool print_tree_statistics,
bool check_tree, std::string &&result_file_name, std::string &&statistic_file_name,
std::string &&tree_file_name, bool profile);
~Benchmark() noexcept override = default;
/**
* @return Core set the benchmark should run in the current iteration.
*/
const mx::util::core_set &core_set();
/**
* Callback for request tasks to notify they are out of
* new requests.
*/
void requests_finished() override;
/**
* Starts the benchmark after initialization.
*/
void start();
private:
// Collection of cores the benchmark should run on.
benchmark::Cores _cores;
// Number of iterations the benchmark should use.
const std::uint16_t _iterations;
// Current iteration within the actual core set.
std::uint16_t _current_iteration = std::numeric_limits<std::uint16_t>::max();
// Workload to get requests from.
benchmark::Workload _workload;
// Tree to run requests on.
std::unique_ptr<db::index::blinktree::BLinkTree<std::uint64_t, std::int64_t>> _tree;
// The synchronization mechanism to use for tree nodes.
const mx::synchronization::isolation_level _node_isolation_level;
// Preferred synchronization method.
const mx::synchronization::protocol _preferred_synchronization_method;
// If true, the tree statistics (height, number of nodes, ...) will be printed.
const bool _print_tree_statistics;
// If true, the tree will be checked for consistency after each iteration.
const bool _check_tree;
// Name of the file to print results to.
const std::string _result_file_name;
// Name of the file to print further statistics.
const std::string _statistic_file_name;
// Name of the file to serialize the tree to.
const std::string _tree_file_name;
// If true, use idle profiling.
const bool _profile;
// Number of open request tasks; used for tracking the benchmark.
alignas(64) std::atomic_uint16_t _open_requests = 0;
// List of request schedulers.
alignas(64) std::vector<RequestSchedulerTask *> _request_scheduler;
// Chronometer for starting/stopping time and performance counter.
alignas(64) benchmark::Chronometer<std::uint16_t> _chronometer;
/**
* @return Name of the file to write profiling results to.
*/
[[nodiscard]] std::string profile_file_name() const;
};
} // namespace application::blinktree_benchmark

View File

@@ -0,0 +1,17 @@
#pragma once
namespace application::blinktree_benchmark {
class config
{
public:
/**
* @return Number of requests that will be started at a time by the request scheduler.
*/
static constexpr auto batch_size() noexcept { return 500U; }
/**
* @return Number of maximal open requests, system-wide.
*/
static constexpr auto max_parallel_requests() noexcept { return 1500U; }
};
} // namespace application::blinktree_benchmark

View File

@@ -0,0 +1,15 @@
#pragma once
namespace application::blinktree_benchmark {
/**
* The listener will be used to notify the benchmark that request tasks are
* done and no more work is available.
*/
class Listener
{
public:
constexpr Listener() = default;
virtual ~Listener() = default;
virtual void requests_finished() = 0;
};
} // namespace application::blinktree_benchmark

View File

@@ -0,0 +1,179 @@
#include "benchmark.h"
#include <argparse.hpp>
#include <benchmark/cores.h>
#include <mx/system/environment.h>
#include <mx/system/thread.h>
#include <mx/tasking/runtime.h>
#include <mx/util/core_set.h>
#include <tuple>
using namespace application::blinktree_benchmark;
/**
* Instantiates the BLink-Tree benchmark with CLI arguments.
* @param count_arguments Number of CLI arguments.
* @param arguments Arguments itself.
*
* @return Instance of the benchmark and parameters for tasking runtime.
*/
std::tuple<Benchmark *, std::uint16_t, bool> create_benchmark(int count_arguments, char **arguments);
/**
* Starts the benchmark.
*
* @param count_arguments Number of CLI arguments.
* @param arguments Arguments itself.
*
* @return Return code of the application.
*/
int main(int count_arguments, char **arguments)
{
if (mx::system::Environment::is_numa_balancing_enabled())
{
std::cout << "[Warn] NUMA balancing may be enabled, set '/proc/sys/kernel/numa_balancing' to '0'" << std::endl;
}
auto [benchmark, prefetch_distance, use_system_allocator] = create_benchmark(count_arguments, arguments);
if (benchmark == nullptr)
{
return 1;
}
mx::util::core_set cores{};
while ((cores = benchmark->core_set()))
{
mx::tasking::runtime_guard _(use_system_allocator, cores, prefetch_distance);
benchmark->start();
}
delete benchmark;
return 0;
}
std::tuple<Benchmark *, std::uint16_t, bool> create_benchmark(int count_arguments, char **arguments)
{
// Set up arguments.
argparse::ArgumentParser argument_parser("blinktree_benchmark");
argument_parser.add_argument("cores")
.help("Range of the number of cores (1 for using 1 core, 1: for using 1 up to available cores, 1:4 for using "
"cores from 1 to 4).")
.default_value(std::string("1"));
/* Not used for the moment.
argument_parser.add_argument("-c", "--channels-per-core")
.help("Number of how many channels used per core.")
.default_value(std::uint16_t(1))
.action([](const std::string &value) { return std::uint16_t(std::stoi(value)); });
*/
argument_parser.add_argument("-s", "--steps")
.help("Steps, how number of cores is increased (1,2,4,6,.. for -s 2).")
.default_value(std::uint16_t(2))
.action([](const std::string &value) { return std::uint16_t(std::stoi(value)); });
argument_parser.add_argument("-i", "--iterations")
.help("Number of iterations for each workload")
.default_value(std::uint16_t(1))
.action([](const std::string &value) { return std::uint16_t(std::stoi(value)); });
argument_parser.add_argument("-sco", "--system-core-order")
.help("Use systems core order. If not, cores are ordered by node id (should be preferred).")
.implicit_value(true)
.default_value(false);
argument_parser.add_argument("-p", "--perf")
.help("Use performance counter.")
.implicit_value(true)
.default_value(false);
argument_parser.add_argument("--exclusive")
.help("Are all node accesses exclusive?")
.implicit_value(true)
.default_value(false);
argument_parser.add_argument("--latched")
.help("Prefer latch for synchronization?")
.implicit_value(true)
.default_value(false);
argument_parser.add_argument("--olfit")
.help("Prefer OLFIT for synchronization?")
.implicit_value(true)
.default_value(false);
argument_parser.add_argument("--sync4me")
.help("Let the tasking layer decide the synchronization primitive.")
.implicit_value(true)
.default_value(false);
argument_parser.add_argument("--print-stats")
.help("Print tree statistics after every iteration.")
.implicit_value(true)
.default_value(false);
argument_parser.add_argument("--disable-check")
.help("Disable tree check while benchmarking.")
.implicit_value(true)
.default_value(false);
argument_parser.add_argument("-f", "--workload-files")
.help("Files containing the workloads (workloads/fill workloads/mixed for example).")
.nargs(2)
.default_value(
std::vector<std::string>{"workloads/fill_randint_workloada", "workloads/mixed_randint_workloada"});
argument_parser.add_argument("-pd", "--prefetch-distance")
.help("Distance of prefetched data objects (0 = disable prefetching).")
.default_value(std::uint16_t(0))
.action([](const std::string &value) { return std::uint16_t(std::stoi(value)); });
argument_parser.add_argument("--system-allocator")
.help("Use the systems malloc interface to allocate tasks (default disabled).")
.implicit_value(true)
.default_value(false);
argument_parser.add_argument("-ot", "--out-tree")
.help("Name of the file, the tree will be written in json format.")
.default_value(std::string(""));
argument_parser.add_argument("-os", "--out-statistics")
.help("Name of the file, the task statistics will be written in json format.")
.default_value(std::string(""));
argument_parser.add_argument("-o", "--out")
.help("Name of the file, the results will be written to.")
.default_value(std::string(""));
argument_parser.add_argument("--profiling")
.help("Enable profiling (default disabled).")
.implicit_value(true)
.default_value(false);
// Parse arguments.
try
{
argument_parser.parse_args(count_arguments, arguments);
}
catch (std::runtime_error &e)
{
std::cout << argument_parser << std::endl;
return {nullptr, 0U, false};
}
auto order =
argument_parser.get<bool>("-sco") ? mx::util::core_set::Order::Ascending : mx::util::core_set::Order::NUMAAware;
auto cores =
benchmark::Cores({argument_parser.get<std::string>("cores"), argument_parser.get<std::uint16_t>("-s"), order});
auto workload_files = argument_parser.get<std::vector<std::string>>("-f");
const auto isolation_level = argument_parser.get<bool>("--exclusive")
? mx::synchronization::isolation_level::Exclusive
: mx::synchronization::isolation_level::ExclusiveWriter;
auto preferred_synchronization_method = mx::synchronization::protocol::Queue;
if (argument_parser.get<bool>("--latched"))
{
preferred_synchronization_method = mx::synchronization::protocol::Latch;
}
else if (argument_parser.get<bool>("--olfit"))
{
preferred_synchronization_method = mx::synchronization::protocol::OLFIT;
}
else if (argument_parser.get<bool>("--sync4me"))
{
preferred_synchronization_method = mx::synchronization::protocol::None;
}
// Create the benchmark.
auto *benchmark =
new Benchmark(std::move(cores), argument_parser.get<std::uint16_t>("-i"), std::move(workload_files[0]),
std::move(workload_files[1]), argument_parser.get<bool>("-p"), isolation_level,
preferred_synchronization_method, argument_parser.get<bool>("--print-stats"),
argument_parser.get<bool>("--disable-check") == false, argument_parser.get<std::string>("-o"),
argument_parser.get<std::string>("-os"), argument_parser.get<std::string>("-ot"),
argument_parser.get<bool>("--profiling"));
return {benchmark, argument_parser.get<std::uint16_t>("-pd"), argument_parser.get<bool>("--system-allocator")};
}

View File

@@ -0,0 +1,252 @@
#pragma once
#include "config.h"
#include "listener.h"
#include <atomic>
#include <benchmark/workload.h>
#include <cstdint>
#include <db/index/blinktree/b_link_tree.h>
#include <db/index/blinktree/config.h>
#include <db/index/blinktree/insert_value_task.h>
#include <db/index/blinktree/lookup_task.h>
#include <db/index/blinktree/update_task.h>
#include <mx/resource/resource.h>
#include <mx/tasking/runtime.h>
#include <mx/tasking/task.h>
#include <mx/util/core_set.h>
#include <mx/util/reference_counter.h>
namespace application::blinktree_benchmark {
class RequestIndex
{
public:
static RequestIndex make_finished() { return RequestIndex{std::numeric_limits<decltype(_index)>::max(), 0UL}; }
static RequestIndex make_no_new() { return RequestIndex{0UL, 0UL}; }
RequestIndex(const std::uint64_t index, const std::uint64_t count) noexcept : _index(index), _count(count) {}
explicit RequestIndex(std::pair<std::uint64_t, std::uint64_t> &&index_and_count) noexcept
: _index(std::get<0>(index_and_count)), _count(std::get<1>(index_and_count))
{
}
RequestIndex(RequestIndex &&) noexcept = default;
RequestIndex(const RequestIndex &) = default;
~RequestIndex() noexcept = default;
RequestIndex &operator=(RequestIndex &&) noexcept = default;
[[nodiscard]] std::uint64_t index() const noexcept { return _index; }
[[nodiscard]] std::uint64_t count() const noexcept { return _count; }
[[nodiscard]] bool is_finished() const noexcept { return _index == std::numeric_limits<decltype(_index)>::max(); }
[[nodiscard]] bool has_new() const noexcept { return _count > 0UL; }
RequestIndex &operator-=(const std::uint64_t count) noexcept
{
_count -= count;
_index += count;
return *this;
}
private:
std::uint64_t _index;
std::uint64_t _count;
};
/**
* The RequestContainer manages the workload and allocates new batches of requests
* that will be scheduled by the request scheduler.
*/
class RequestContainer
{
public:
RequestContainer(const std::uint16_t core_id, const std::uint64_t max_open_requests,
benchmark::Workload &workload) noexcept
: _finished_requests(core_id), _local_buffer(workload.next(config::batch_size())),
_max_pending_requests(max_open_requests), _workload(workload)
{
}
~RequestContainer() noexcept = default;
/**
* Allocates the next requests to spawn.
*
* @return Pair of workload-index and number of tuples to request.
* When the number is negative, no more requests are available.
*/
RequestIndex next() noexcept
{
const auto finished_requests = _finished_requests.load();
const auto pending_requests = _scheduled_requests - finished_requests;
if (pending_requests >= _max_pending_requests)
{
// Too many open requests somewhere in the system.
return RequestIndex::make_no_new();
}
if (_local_buffer.has_new() == false)
{
_local_buffer = RequestIndex{_workload.next(config::batch_size())};
}
if (_local_buffer.has_new())
{
// How many requests can be scheduled without reaching the request limit?
const auto free_requests = _max_pending_requests - pending_requests;
// Try to spawn all free requests, but at least those in the local buffer.
const auto count = std::min(free_requests, _local_buffer.count());
_scheduled_requests += count;
const auto index = RequestIndex{_local_buffer.index(), count};
_local_buffer -= count;
return index;
}
// Do we have to wait for pending requests or are we finished?
return pending_requests > 0UL ? RequestIndex::make_no_new() : RequestIndex::make_finished();
}
/**
* Callback after inserted a value.
*/
void inserted(const std::uint16_t core_id, const std::uint64_t /*key*/, const std::int64_t /*value*/) noexcept
{
task_finished(core_id);
}
/**
* Callback after updated a value.
*/
void updated(const std::uint16_t core_id, const std::uint64_t /*key*/, const std::int64_t /*value*/) noexcept
{
task_finished(core_id);
}
/**
* Callback after removed a value.
*/
void removed(const std::uint16_t core_id, const std::uint64_t /*key*/) noexcept { task_finished(core_id); }
/**
* Callback after found a value.
*/
void found(const std::uint16_t core_id, const std::uint64_t /*key*/, const std::int64_t /*value*/) noexcept
{
task_finished(core_id);
}
/**
* Callback on missing a value.
*/
void missing(const std::uint16_t core_id, const std::uint64_t /*key*/) noexcept { task_finished(core_id); }
const benchmark::NumericTuple &operator[](const std::size_t index) const noexcept { return _workload[index]; }
private:
// Number of requests finished by tasks.
mx::util::reference_counter_64 _finished_requests;
// Number of tasks scheduled by the owning request scheduler.
std::uint64_t _scheduled_requests = 0UL;
// Local buffer holding not scheduled, but from global worker owned request items.
RequestIndex _local_buffer;
// Number of requests that can be distributed by this scheduler,
// due to system-wide maximal parallel requests.
const std::uint64_t _max_pending_requests;
// Workload to get requests from.
benchmark::Workload &_workload;
/**
* Updates the counter of finished requests.
*/
void task_finished(const std::uint16_t core_id) { _finished_requests.add(core_id); }
};
/**
* The RequestScheduler own its own request container and sets up requests for the BLink-Tree.
*/
class RequestSchedulerTask final : public mx::tasking::TaskInterface
{
public:
RequestSchedulerTask(const std::uint16_t core_id, const std::uint16_t channel_id, benchmark::Workload &workload,
const mx::util::core_set &core_set,
db::index::blinktree::BLinkTree<std::uint64_t, std::int64_t> *tree, Listener *listener)
: _tree(tree), _listener(listener)
{
this->annotate(mx::tasking::priority::low);
this->is_readonly(false);
const auto container = mx::tasking::runtime::new_resource<RequestContainer>(
sizeof(RequestContainer), mx::resource::hint{channel_id}, core_id,
config::max_parallel_requests() / core_set.size(), workload);
this->annotate(container, sizeof(RequestContainer));
}
~RequestSchedulerTask() final = default;
mx::tasking::TaskResult execute(const std::uint16_t core_id, const std::uint16_t channel_id) override
{
// Get some new requests from the container.
auto &request_container = *mx::resource::ptr_cast<RequestContainer>(this->annotated_resource());
const auto next_requests = request_container.next();
if (next_requests.has_new())
{
for (auto i = next_requests.index(); i < next_requests.index() + next_requests.count(); ++i)
{
mx::tasking::TaskInterface *task{nullptr};
const auto &tuple = request_container[i];
if (tuple == benchmark::NumericTuple::INSERT)
{
task = mx::tasking::runtime::new_task<
db::index::blinktree::InsertValueTask<std::uint64_t, std::int64_t, RequestContainer>>(
core_id, tuple.key(), tuple.value(), _tree, request_container);
task->is_readonly(_tree->height() > 1U);
}
else if (tuple == benchmark::NumericTuple::LOOKUP)
{
task = mx::tasking::runtime::new_task<
db::index::blinktree::LookupTask<std::uint64_t, std::int64_t, RequestContainer>>(
core_id, tuple.key(), request_container);
task->is_readonly(true);
}
else if (tuple == benchmark::NumericTuple::UPDATE)
{
task = mx::tasking::runtime::new_task<
db::index::blinktree::UpdateTask<std::uint64_t, std::int64_t, RequestContainer>>(
core_id, tuple.key(), tuple.value(), request_container);
task->is_readonly(_tree->height() > 1U);
}
task->annotate(_tree->root(), db::index::blinktree::config::node_size() / 4U);
mx::tasking::runtime::spawn(*task, channel_id);
}
}
else if (next_requests.is_finished())
{
// All requests are done. Notify the benchmark and die.
_listener->requests_finished();
mx::tasking::runtime::delete_resource<RequestContainer>(this->annotated_resource());
return mx::tasking::TaskResult::make_remove();
}
return mx::tasking::TaskResult::make_succeed(this);
}
private:
// The tree to send requests to.
db::index::blinktree::BLinkTree<std::uint64_t, std::int64_t> *_tree;
// Benchmark listener to notify on requests are done.
Listener *_listener;
};
} // namespace application::blinktree_benchmark

View File

@@ -0,0 +1,49 @@
# HashJoin Benchmark
Benchmark of a parallel, task-based HashJoin.
## How to generate workload
* Download TPC-H benchmark and generate tables
* Specify joined tables and key-indices via CLI arguments
## Important CLI arguments
* The first argument is the number of cores:
* `./bin/hashjoin_benchmark 1` for using a single core.
* `./bin/hashjoin_benchmark 1:24` for using cores `1` up to `24`.
* `-i <NUMBER>` specifies the number of repetitions of each workload.
* `-s <NUMBER>` steps of the cores:
* `-s 1` will increase the used cores by one (core ids: `0,1,2,3,4,5,6,7,..,23`).
* `-s 2` will skip every second core (core ids: `0,1,3,5,7,..23`).
* `-pd <NUMBER>` specifies the prefetch distance.
* `-p` or `--perf` will activate performance counter (result will be printed to console and output file).
* `-R` specifies the TPC-H table file for the left relation.
* `-R-key` specifies the index of the join key for `R`.
* `-S` specifies the TPC-H table file for the right relation.
* `-S-key` specifies the index of the join key for `S`.
* `--batch` specifies the records per task (comma separated: `8,16,64,256`)
## Understanding the output
After started, the benchmark will print a summary of configured cores and workload:
core configuration:
1: 0
2: 0 1
4: 0 1 2 3
workload: customer.tbl.0 (#3000000) JOIN orders.tbl.1 (#30000000)
Here, we configured the benchmark to use one to four cores; each line of the core configuration displays the number of cores and the core identifiers.
Following, the benchmark will be started and print the results for every iteration:
1 1 64 1478 ms 3.38295e+06 op/s
2 1 64 964 ms 5.18672e+06 op/s
4 1 64 935 ms 5.34759e+06 op/s
* The first column is the number of used cores.
* The second column displays the iteration of the benchmark (configured by `-i X`).
* Thirdly, the granularity of how many records per task will be processed.
* After that, the time and throughput are written.
* If `--perf` is enabled, the output will be extended by some perf counters, which are labeled (like throughput).
## Plot the results
When using `-o FILE`, the results will be written to the given file, using `JSON` format.
The plot script `scripts/plot_hashjoin_benchmark INPUT_FILE` will aggregate and plot the results using one `JSON` file.

View File

@@ -0,0 +1,196 @@
#include "benchmark.h"
#include "build_task.h"
#include "inline_hashtable.h"
#include "partition_task.h"
#include "tpch_table_reader.h"
#include <mx/memory/global_heap.h>
#include <mx/tasking/runtime.h>
using namespace application::hash_join;
Benchmark::Benchmark(
benchmark::Cores &&cores, const std::uint16_t iterations, std::vector<std::uint32_t> &&batches,
std::tuple<std::pair<std::string, std::uint16_t>, std::pair<std::string, std::uint16_t>> &&join_table_files,
const bool use_performance_counter, std::string &&result_file_name)
: _cores(std::move(cores)), _iterations(iterations), _batches(std::move(batches)),
_result_file_name(std::move(result_file_name))
{
if (use_performance_counter)
{
this->_chronometer.add(benchmark::Perf::CYCLES);
this->_chronometer.add(benchmark::Perf::INSTRUCTIONS);
this->_chronometer.add(benchmark::Perf::STALLS_MEM_ANY);
this->_chronometer.add(benchmark::Perf::SW_PREFETCH_ACCESS_NTA);
this->_chronometer.add(benchmark::Perf::SW_PREFETCH_ACCESS_WRITE);
}
std::cout << "core configuration: \n" << this->_cores.dump(2) << std::endl;
std::vector<std::uint32_t> left_keys;
const auto &left_table = std::get<0>(std::get<0>(join_table_files));
const auto left_column_index = std::get<1>(std::get<0>(join_table_files));
application::hash_join::TPCHTableReader::read(
left_table, [&left_keys, left_column_index](const std::uint16_t index, const std::string &value) {
if (index == left_column_index)
{
left_keys.emplace_back(std::stoul(value));
}
});
std::vector<std::uint32_t> right_keys;
const auto &right_table = std::get<0>(std::get<1>(join_table_files));
const auto right_column_index = std::get<1>(std::get<1>(join_table_files));
application::hash_join::TPCHTableReader::read(
right_table, [&right_keys, right_column_index](const std::uint16_t index, const std::string &value) {
if (index == right_column_index)
{
right_keys.emplace_back(std::stoul(value));
}
});
this->_join_keys = std::make_tuple(std::move(left_keys), std::move(right_keys));
std::cout << "workload: " << left_table << "." << left_column_index << " (#" << std::get<0>(this->_join_keys).size()
<< ")"
<< " JOIN " << right_table << "." << right_column_index << " (#" << std::get<1>(this->_join_keys).size()
<< ")"
<< "\n"
<< std::endl;
}
void Benchmark::start()
{
const auto count_cores = this->_cores.current().size();
const auto count_left_keys = std::get<0>(this->_join_keys).size();
const auto count_left_keys_per_core = Benchmark::tuples_per_core(count_left_keys, count_cores);
const auto count_right_keys = std::get<1>(this->_join_keys).size();
const auto count_right_keys_per_core = Benchmark::tuples_per_core(count_right_keys, count_cores);
this->_merge_task = std::make_unique<MergeTask>(this->_cores.current(), this, count_right_keys_per_core);
// Clear notifications.
this->_build_notification = BuildFinishedNotifier{count_cores};
this->_probe_notification = ProbeFinishedNotifier{this->_merge_task.get()};
this->_build_listener = std::make_unique<Listener<BuildFinishedNotifier>>(count_cores, this->_build_notification);
this->_probe_listener = std::make_unique<Listener<ProbeFinishedNotifier>>(count_cores, this->_probe_notification);
// Build hash_tables.
this->_hash_tables.reset(new mx::resource::ptr[count_cores]); // NOLINT
for (auto channel_id = 0U; channel_id < count_cores; ++channel_id)
{
const auto needed_keys = std::size_t(count_left_keys_per_core * 1.5);
const auto needed_bytes = InlineHashtable<std::uint32_t, std::size_t>::needed_bytes(needed_keys);
this->_hash_tables.get()[channel_id] =
mx::tasking::runtime::new_resource<InlineHashtable<std::uint32_t, std::size_t>>(
needed_bytes,
mx::resource::hint{std::uint16_t(channel_id), mx::synchronization::isolation_level::Exclusive,
mx::synchronization::protocol::Queue},
needed_bytes);
}
/// Dispatch left table
auto partition_build_tasks = std::array<mx::tasking::TaskInterface *, mx::tasking::config::max_cores()>{nullptr};
for (auto i = 0U; i < count_cores; ++i)
{
const auto count_left_keys_for_core = i < count_cores - 1U
? count_left_keys_per_core
: (count_left_keys - (count_cores - 1U) * count_left_keys_per_core);
const auto count_right_keys_for_core =
i < count_cores - 1U ? count_right_keys_per_core
: (count_right_keys - (count_cores - 1U) * count_right_keys_per_core);
// Build chunk for local dispatching
auto left_chunk = mx::tasking::runtime::to_resource(
&std::get<0>(this->_join_keys)[i * count_left_keys_per_core],
mx::resource::hint{std::uint16_t(i), mx::synchronization::isolation_level::Exclusive,
mx::synchronization::protocol::Queue});
auto right_chunk = mx::tasking::runtime::to_resource(
&std::get<1>(this->_join_keys)[i * count_right_keys_per_core],
mx::resource::hint{std::uint16_t(i), mx::synchronization::isolation_level::Exclusive,
mx::synchronization::protocol::Queue});
// Run dispatcher task.
auto *partition_probe_task = mx::tasking::runtime::new_task<PartitionTask<ProbeTask>>(
0U, *this->_probe_listener, this->_batches[this->_current_batch_index], count_right_keys_for_core,
this->_hash_tables.get());
partition_probe_task->annotate(right_chunk, 64U);
this->_build_notification.dispatch_probe_task(i, partition_probe_task);
auto *partition_build_task = mx::tasking::runtime::new_task<PartitionTask<BuildTask>>(
0U, *this->_build_listener, this->_batches[this->_current_batch_index], count_left_keys_for_core,
this->_hash_tables.get());
partition_build_task->annotate(left_chunk, 64U);
partition_build_tasks[i] = partition_build_task;
}
// Here we go
this->_chronometer.start(this->_batches[this->_current_batch_index], this->_current_iteration,
this->_cores.current());
for (auto i = 0U; i < count_cores; ++i)
{
mx::tasking::runtime::spawn(*(partition_build_tasks[i]), 0U);
}
}
void Benchmark::stop()
{
// Stop and print time (and performance counter).
const auto result = this->_chronometer.stop(this->_merge_task->count_tuples());
mx::tasking::runtime::stop();
std::cout << result << std::endl;
// Dump results to file.
if (this->_result_file_name.empty() == false)
{
std::ofstream result_file_stream(this->_result_file_name, std::ofstream::app);
result_file_stream << result.to_json().dump() << std::endl;
}
}
const mx::util::core_set &Benchmark::core_set()
{
if (this->_current_iteration == std::numeric_limits<std::uint16_t>::max())
{
// This is the very first time we start the benchmark.
this->_current_iteration = 0U;
return this->_cores.next();
}
for (auto i = 0U; i < this->_cores.current().size(); ++i)
{
mx::tasking::runtime::delete_resource<InlineHashtable<std::uint32_t, std::size_t>>(this->_hash_tables.get()[i]);
}
// Run the next iteration.
if (++this->_current_iteration < this->_iterations)
{
return this->_cores.current();
}
this->_current_iteration = 0U;
if (++this->_current_batch_index < this->_batches.size())
{
return this->_cores.current();
}
this->_current_batch_index = 0U;
// At this point, all phases and all iterations for the current core configuration
// are done. Increase the cores.
return this->_cores.next();
}
std::uint64_t Benchmark::tuples_per_core(const std::uint64_t count_join_keys, const std::uint16_t count_cores) noexcept
{
const auto cache_lines = (count_join_keys * sizeof(std::uint32_t)) / 64U;
const auto cache_lines_per_core = cache_lines / count_cores;
auto p = 1U;
while (p < cache_lines_per_core)
{
p += 64U;
}
return p * (64U / sizeof(std::uint32_t));
}

View File

@@ -0,0 +1,69 @@
#pragma once
#include "listener.h"
#include "merge_task.h"
#include "notifier.h"
#include <benchmark/chronometer.h>
#include <benchmark/cores.h>
#include <cstdint>
#include <memory>
#include <string>
#include <tuple>
namespace application::hash_join {
class Benchmark
{
public:
Benchmark(
benchmark::Cores &&cores, std::uint16_t iterations, std::vector<std::uint32_t> &&batches,
std::tuple<std::pair<std::string, std::uint16_t>, std::pair<std::string, std::uint16_t>> &&join_table_files,
bool use_performance_counter, std::string &&result_file_name);
~Benchmark() = default;
/**
* @return Core set the benchmark should run in the current iteration.
*/
const mx::util::core_set &core_set();
void start();
void stop();
private:
// Collection of cores the benchmark should run on.
benchmark::Cores _cores;
// Number of iterations the benchmark should use.
const std::uint16_t _iterations;
// Current iteration within the actual core set.
std::uint16_t _current_iteration = std::numeric_limits<std::uint16_t>::max();
// Number of tuples that are probed/build together.
const std::vector<std::uint32_t> _batches;
std::uint16_t _current_batch_index{0U};
// Name of the file to print results to.
const std::string _result_file_name;
// Keys to join.
std::tuple<std::vector<std::uint32_t>, std::vector<std::uint32_t>> _join_keys;
std::unique_ptr<mx::resource::ptr> _hash_tables;
std::unique_ptr<Listener<BuildFinishedNotifier>> _build_listener;
std::unique_ptr<Listener<ProbeFinishedNotifier>> _probe_listener;
std::unique_ptr<MergeTask> _merge_task;
alignas(64) BuildFinishedNotifier _build_notification;
ProbeFinishedNotifier _probe_notification;
// Chronometer for starting/stopping time and performance counter.
alignas(64) benchmark::Chronometer<std::uint32_t> _chronometer;
static std::uint64_t tuples_per_core(std::uint64_t count_join_keys, std::uint16_t count_cores) noexcept;
};
} // namespace application::hash_join

View File

@@ -0,0 +1,42 @@
#pragma once
#include "inline_hashtable.h"
#include <cstdint>
#include <iostream>
#include <mx/tasking/task.h>
#include <vector>
namespace application::hash_join {
/**
* The build task builds the hash table.
*/
class BuildTask final : public mx::tasking::TaskInterface
{
public:
BuildTask(const std::size_t size, const std::uint8_t /*numa_node_id*/) { _keys.reserve(size); }
~BuildTask() override = default;
mx::tasking::TaskResult execute(const std::uint16_t /*core_id*/, const std::uint16_t /*channel_id*/) override
{
auto *hashtable = this->annotated_resource().get<InlineHashtable<std::uint32_t, std::size_t>>();
for (const auto &row : _keys)
{
hashtable->insert(std::get<1>(row), std::get<0>(row));
}
return mx::tasking::TaskResult::make_remove();
}
void emplace_back(const std::size_t row_id, const std::uint32_t key) noexcept
{
_keys.emplace_back(std::make_pair(row_id, key));
}
[[nodiscard]] std::uint64_t size() const noexcept { return _keys.size(); }
[[nodiscard]] bool empty() const noexcept { return _keys.empty(); }
private:
// Keys and row ids to insert into the hashtable.
std::vector<std::pair<std::size_t, std::uint32_t>> _keys;
};
} // namespace application::hash_join

View File

@@ -0,0 +1,97 @@
#pragma once
#include <cstdint>
#include <limits>
#include <mx/memory/alignment_helper.h>
#include <utility>
namespace application::hash_join {
/**
* Hashtable for hashjoin.
*/
template <typename K, typename V> class InlineHashtable
{
private:
struct Entry
{
constexpr Entry() noexcept : key(std::numeric_limits<K>::max()), value(0) {}
Entry(Entry &&other) noexcept = default;
~Entry() noexcept = default;
Entry &operator=(Entry &&) noexcept = default;
K key;
V value;
};
public:
static std::size_t needed_bytes(const std::size_t slots) noexcept
{
return sizeof(InlineHashtable<K, V>) +
sizeof(InlineHashtable<K, V>::Entry) * mx::memory::alignment_helper::next_power_of_two(slots);
}
InlineHashtable(const std::size_t size)
: _slots((size - sizeof(InlineHashtable<K, V>)) / sizeof(InlineHashtable<K, V>::Entry))
{
for (auto i = 0U; i < _slots; ++i)
{
at(i) = Entry{};
}
}
~InlineHashtable() = default;
void insert(const K key, const V value) noexcept
{
for (auto index = hash(key);; ++index)
{
index &= _slots - 1U;
auto &entry = at(index);
if (entry.key != key && entry.key != std::numeric_limits<K>::max())
{
continue;
}
entry.key = key;
entry.value = value;
return;
}
}
V get(const K key) const noexcept
{
for (auto index = hash(key);; ++index)
{
index &= _slots - 1U;
const auto &entry = at(index);
if (entry.key == key)
{
return entry.value;
}
if (entry.key == std::numeric_limits<K>::max())
{
return std::numeric_limits<V>::max();
}
}
}
const Entry &at(const std::size_t slot) const noexcept { return reinterpret_cast<const Entry *>(this + 1)[slot]; }
Entry &at(const std::size_t slot) noexcept { return reinterpret_cast<Entry *>(this + 1)[slot]; }
private:
const std::size_t _slots;
std::size_t hash(K key) const
{
key ^= key >> 16;
key *= 0x85ebca6b;
key ^= key >> 13;
key *= 0xc2b2ae35;
key ^= key >> 16;
return std::size_t(key);
}
};
} // namespace application::hash_join

View File

@@ -0,0 +1,42 @@
#pragma once
#include "merge_task.h"
#include <algorithm>
#include <array>
#include <cstdint>
#include <iostream>
#include <mx/tasking/runtime.h>
#include <mx/tasking/task.h>
#include <mx/util/aligned_t.h>
#include <mx/util/core_set.h>
#include <mx/util/reference_counter.h>
#include <vector>
namespace application::hash_join {
template <class N> class Listener
{
public:
Listener(const std::uint16_t count_cores, N &notificator) : _count_cores(count_cores), _notificator(notificator)
{
_pending_local_notifications.fill(mx::util::aligned_t<std::uint32_t>{0U});
std::fill_n(_pending_local_notifications.begin(), count_cores, mx::util::aligned_t<std::uint32_t>{count_cores});
_pending_global_notifications.store(count_cores);
}
~Listener() = default;
std::uint16_t count_cores() const noexcept { return _count_cores; }
N &notificator() noexcept { return _notificator; }
std::uint32_t &pending_local(const std::uint16_t channel_id) noexcept
{
return _pending_local_notifications[channel_id].value();
}
std::atomic_uint32_t &pending_global() noexcept { return _pending_global_notifications; }
private:
const std::uint16_t _count_cores;
N &_notificator;
std::array<mx::util::aligned_t<std::uint32_t>, mx::tasking::config::max_cores()> _pending_local_notifications{};
alignas(64) std::atomic_uint32_t _pending_global_notifications{0U};
};
} // namespace application::hash_join

View File

@@ -0,0 +1,117 @@
#include "benchmark.h"
#include <argparse.hpp>
#include <iostream>
#include <mx/system/environment.h>
#include <utility>
#include <vector>
using namespace application::hash_join;
std::pair<Benchmark *, std::uint16_t> create_benchmark(int count_arguments, char **arguments);
int main(int count_arguments, char **arguments)
{
auto [benchmark, prefetch_distance] = create_benchmark(count_arguments, arguments);
if (mx::system::Environment::is_numa_balancing_enabled())
{
std::cout << "[Warn] NUMA balancing may be enabled, set '/proc/sys/kernel/numa_balancing' to '0'" << std::endl;
}
if (benchmark == nullptr)
{
return 1;
}
mx::util::core_set cores{};
while ((cores = benchmark->core_set()))
{
mx::tasking::runtime_guard _(false, cores, prefetch_distance);
benchmark->start();
}
delete benchmark;
return 0;
}
std::pair<Benchmark *, std::uint16_t> create_benchmark(int count_arguments, char **arguments)
{
argparse::ArgumentParser argument_parser("hashjoin_benchmark");
argument_parser.add_argument("cores")
.help("Range of the number of cores (1 for using 1 core, 1: for using 1 up to available cores, 1:4 for using "
"cores from 1 to 4).")
.default_value(std::string("1"));
argument_parser.add_argument("-s", "--steps")
.help("Steps, how number of cores is increased (1,2,4,6,.. for -s 2).")
.default_value(std::uint16_t(2U))
.action([](const std::string &value) { return std::uint16_t(std::stoi(value)); });
argument_parser.add_argument("-i", "--iterations")
.help("Number of iterations for each workload")
.default_value(std::uint16_t(1U))
.action([](const std::string &value) { return std::uint16_t(std::stoi(value)); });
argument_parser.add_argument("-sco", "--system-core-order")
.help("Use systems core order. If not, cores are ordered by node id (should be preferred).")
.implicit_value(true)
.default_value(false);
argument_parser.add_argument("-p", "--perf")
.help("Use performance counter.")
.implicit_value(true)
.default_value(false);
argument_parser.add_argument("-pd", "--prefetch-distance")
.help("Distance of prefetched data objects (0 = disable prefetching).")
.default_value(std::uint16_t(0U))
.action([](const std::string &value) { return std::uint16_t(std::stoi(value)); });
argument_parser.add_argument("-o", "--out")
.help("Name of the file, the results will be written to.")
.default_value(std::string(""));
argument_parser.add_argument("--batch")
.help("Number of tuples build/probed together; comma separated as string (e.g. \"64,128,256\")")
.default_value(std::string("128"));
argument_parser.add_argument("-R").help("Data file of left relation.").default_value(std::string("customer.tbl"));
argument_parser.add_argument("-R-key")
.help("Index of join key of R")
.default_value(std::uint16_t(0U))
.action([](const std::string &value) { return std::uint16_t(std::stoi(value)); });
argument_parser.add_argument("-S").help("Data file of right relation.").default_value(std::string("orders.tbl"));
argument_parser.add_argument("-S-key")
.help("Index of join key of S")
.default_value(std::uint16_t(1U))
.action([](const std::string &value) { return std::uint16_t(std::stoi(value)); });
// Parse arguments.
try
{
argument_parser.parse_args(count_arguments, arguments);
}
catch (std::runtime_error &e)
{
std::cout << argument_parser << std::endl;
return std::make_pair(nullptr, 0U);
}
auto order =
argument_parser.get<bool>("-sco") ? mx::util::core_set::Order::Ascending : mx::util::core_set::Order::NUMAAware;
auto cores =
benchmark::Cores({argument_parser.get<std::string>("cores"), argument_parser.get<std::uint16_t>("-s"), order});
std::vector<std::uint32_t> build_probe_batches;
auto batches = std::stringstream{argument_parser.get<std::string>("--batch")};
std::string batch;
while (std::getline(batches, batch, ','))
{
build_probe_batches.emplace_back(std::stoul(batch));
}
// Join relations
auto r = std::make_pair(argument_parser.get<std::string>("-R"), argument_parser.get<std::uint16_t>("-R-key"));
auto s = std::make_pair(argument_parser.get<std::string>("-S"), argument_parser.get<std::uint16_t>("-S-key"));
// Create the benchmark.
auto *benchmark = new Benchmark(std::move(cores), argument_parser.get<std::uint16_t>("-i"),
std::move(build_probe_batches), std::make_tuple(std::move(r), std::move(s)),
argument_parser.get<bool>("-p"), argument_parser.get<std::string>("-o"));
return {benchmark, argument_parser.get<std::uint16_t>("-pd")};
}

View File

@@ -0,0 +1,31 @@
#include "merge_task.h"
#include "benchmark.h"
using namespace application::hash_join;
MergeTask::MergeTask(const mx::util::core_set &cores, Benchmark *benchmark, const std::uint64_t output_per_core)
: _benchmark(benchmark), _count_cores(cores.size())
{
this->_result_sets = new mx::util::aligned_t<mx::util::vector<std::pair<std::size_t, std::size_t>>>[cores.size()];
for (auto channel_id = 0U; channel_id < cores.size(); ++channel_id)
{
this->_result_sets[channel_id].value().reserve(cores.numa_node_id(channel_id), output_per_core);
}
}
MergeTask::~MergeTask()
{
delete[] this->_result_sets;
}
mx::tasking::TaskResult MergeTask::execute(const std::uint16_t /*core_id*/, const std::uint16_t /*channel_id*/)
{
for (auto channel = 0U; channel < _count_cores; ++channel)
{
_count_output_tuples += result_set(channel).size();
}
_benchmark->stop();
return mx::tasking::TaskResult::make_null();
}

View File

@@ -0,0 +1,41 @@
#pragma once
#include <array>
#include <functional>
#include <mx/memory/global_heap.h>
#include <mx/tasking/task.h>
#include <mx/util/aligned_t.h>
#include <mx/util/core_set.h>
#include <mx/util/vector.h>
#include <utility>
namespace application::hash_join {
class Benchmark;
class MergeTask final : public mx::tasking::TaskInterface
{
public:
MergeTask(const mx::util::core_set &cores, Benchmark *benchmark, std::uint64_t output_per_core);
~MergeTask() override;
mx::tasking::TaskResult execute(std::uint16_t /*core_id*/, std::uint16_t /*channel_id*/) override;
mx::util::vector<std::pair<std::size_t, std::size_t>> &result_set(const std::uint16_t channel_id)
{
return _result_sets[channel_id].value();
}
[[nodiscard]] const mx::util::vector<std::pair<std::size_t, std::size_t>> &result_set(
const std::uint16_t channel_id) const
{
return _result_sets[channel_id].value();
}
[[nodiscard]] std::size_t count_tuples() const noexcept { return _count_output_tuples; }
private:
Benchmark *_benchmark;
const std::uint16_t _count_cores;
std::size_t _count_output_tuples{0U};
mx::util::aligned_t<mx::util::vector<std::pair<std::size_t, std::size_t>>> *_result_sets;
};
} // namespace application::hash_join

View File

@@ -0,0 +1,31 @@
#pragma once
#include "listener.h"
#include <cstdint>
#include <cstdlib>
#include <mx/tasking/task.h>
namespace application::hash_join {
template <class N> class NotificationTask final : public mx::tasking::TaskInterface
{
public:
NotificationTask(Listener<N> &listener) : _listener(listener) {}
~NotificationTask() override = default;
mx::tasking::TaskResult execute(const std::uint16_t /*core_id*/, const std::uint16_t channel_id) override
{
if (--_listener.pending_local(channel_id) == 0U)
{
if (--_listener.pending_global() == 0U)
{
_listener.notificator()(channel_id);
}
}
return mx::tasking::TaskResult::make_remove();
}
private:
Listener<N> &_listener;
};
} // namespace application::hash_join

View File

@@ -0,0 +1,17 @@
#include "notifier.h"
#include <mx/tasking/runtime.h>
using namespace application::hash_join;
void BuildFinishedNotifier::operator()(const std::uint16_t channel_id)
{
for (auto target_channel_id = 0U; target_channel_id < this->_count_cores; ++target_channel_id)
{
mx::tasking::runtime::spawn(*this->_probe_tasks[target_channel_id], channel_id);
}
}
void ProbeFinishedNotifier::operator()(const std::uint16_t channel_id)
{
mx::tasking::runtime::spawn(*this->_merge_task, channel_id);
}

View File

@@ -0,0 +1,56 @@
#pragma once
#include "merge_task.h"
#include <array>
#include <mx/tasking/task.h>
#include <mx/util/vector.h>
namespace application::hash_join {
class BuildFinishedNotifier
{
public:
constexpr BuildFinishedNotifier() = default;
constexpr BuildFinishedNotifier(const std::uint16_t count_cores) : _count_cores(count_cores)
{
_probe_tasks.fill(nullptr);
}
BuildFinishedNotifier &operator=(BuildFinishedNotifier &&) = default;
~BuildFinishedNotifier() = default;
void dispatch_probe_task(const std::uint16_t index, mx::tasking::TaskInterface *task) noexcept
{
_probe_tasks[index] = task;
}
void operator()(std::uint16_t channel_id);
private:
std::uint16_t _count_cores{0U};
std::array<mx::tasking::TaskInterface *, mx::tasking::config::max_cores()> _probe_tasks{};
};
class ProbeFinishedNotifier
{
public:
constexpr ProbeFinishedNotifier() = default;
constexpr ProbeFinishedNotifier(MergeTask *merge_task) : _merge_task(merge_task) {}
ProbeFinishedNotifier &operator=(ProbeFinishedNotifier &&) = default;
~ProbeFinishedNotifier() = default;
void operator()(std::uint16_t channel_id);
mx::util::vector<std::pair<std::size_t, std::size_t>> &result_set(const std::uint16_t channel_id) noexcept
{
return _merge_task->result_set(channel_id);
}
private:
MergeTask *_merge_task{nullptr};
};
} // namespace application::hash_join

View File

@@ -0,0 +1,117 @@
#pragma once
#include "build_task.h"
#include "listener.h"
#include "notification_task.h"
#include "notifier.h"
#include "probe_task.h"
#include <array>
#include <atomic>
#include <cassert>
#include <cstdint>
#include <iostream>
#include <mx/tasking/runtime.h>
#include <mx/tasking/task.h>
#include <mx/util/core_set.h>
#include <vector>
namespace application::hash_join {
template <class T> struct notifier_type
{
using value = BuildFinishedNotifier;
};
template <> struct notifier_type<ProbeTask>
{
using value = ProbeFinishedNotifier;
};
template <typename T> class PartitionTask final : public mx::tasking::TaskInterface
{
public:
constexpr PartitionTask(Listener<typename notifier_type<T>::value> &listener, const std::uint32_t batch_size,
const std::size_t count, const mx::resource::ptr *hash_tables) noexcept
: _listener(listener), _batch_size(batch_size), _count(count), _hash_tables(hash_tables)
{
}
~PartitionTask() override = default;
mx::tasking::TaskResult execute(const std::uint16_t core_id, const std::uint16_t channel_id) override
{
const auto count_cores = _listener.count_cores();
auto build_probe_tasks = std::array<T *, mx::tasking::config::max_cores()>{nullptr};
for (auto target_channel_id = 0U; target_channel_id < count_cores; ++target_channel_id)
{
if constexpr (std::is_same<T, BuildTask>::value)
{
build_probe_tasks[target_channel_id] = mx::tasking::runtime::new_task<T>(
core_id, _batch_size, mx::tasking::runtime::numa_node_id(target_channel_id));
}
else
{
build_probe_tasks[target_channel_id] = mx::tasking::runtime::new_task<T>(
core_id, _listener.notificator().result_set(target_channel_id), _batch_size,
mx::tasking::runtime::numa_node_id(target_channel_id));
}
build_probe_tasks[target_channel_id]->annotate(_hash_tables[target_channel_id], 64U);
}
auto *data = this->annotated_resource().template get<std::uint32_t>();
const auto offset = channel_id * _count;
for (auto data_index = 0U; data_index < _count; ++data_index)
{
const auto key = data[data_index];
// Distribute key to core
const auto target_channel_id = PartitionTask::hash(key) % count_cores;
build_probe_tasks[target_channel_id]->emplace_back(offset + data_index, key);
// Run specific task and create new.
if (build_probe_tasks[target_channel_id]->size() == _batch_size)
{
mx::tasking::runtime::spawn(*build_probe_tasks[target_channel_id], channel_id);
if constexpr (std::is_same<T, BuildTask>::value)
{
build_probe_tasks[target_channel_id] = mx::tasking::runtime::new_task<T>(
core_id, _batch_size, mx::tasking::runtime::numa_node_id(target_channel_id));
}
else
{
build_probe_tasks[target_channel_id] = mx::tasking::runtime::new_task<T>(
core_id, _listener.notificator().result_set(target_channel_id), _batch_size,
mx::tasking::runtime::numa_node_id(target_channel_id));
}
build_probe_tasks[target_channel_id]->annotate(_hash_tables[target_channel_id], 64U);
}
}
for (auto target_channel_id = 0U; target_channel_id < count_cores; ++target_channel_id)
{
// Run last build/probe tasks that are not "full".
mx::tasking::runtime::spawn(*build_probe_tasks[target_channel_id], channel_id);
// Run notification tasks for every core, indicating that all
// build/probe tasks of this core are dispatched.
auto *notification_task =
mx::tasking::runtime::new_task<NotificationTask<typename notifier_type<T>::value>>(core_id, _listener);
notification_task->annotate(std::uint16_t(target_channel_id));
mx::tasking::runtime::spawn(*notification_task, channel_id);
}
return mx::tasking::TaskResult::make_remove();
}
private:
Listener<typename notifier_type<T>::value> &_listener;
const std::uint32_t _batch_size;
const std::size_t _count;
const mx::resource::ptr *_hash_tables;
static std::uint16_t hash(const std::uint32_t key) { return std::hash<std::uint32_t>()(key); }
};
} // namespace application::hash_join

View File

@@ -0,0 +1,51 @@
#pragma once
#include "inline_hashtable.h"
#include <iostream>
#include <mx/tasking/task.h>
#include <mx/util/vector.h>
#include <utility>
#include <vector>
namespace application::hash_join {
class ProbeTask final : public mx::tasking::TaskInterface
{
public:
ProbeTask(mx::util::vector<std::pair<std::size_t, std::size_t>> &result_set, const std::size_t size,
const std::uint8_t /*numa_node_id*/)
: _result_set(result_set)
{
_keys.reserve(size);
}
~ProbeTask() override = default;
mx::tasking::TaskResult execute(const std::uint16_t /*core_id*/, const std::uint16_t /*channel_id*/) override
{
auto *hashtable = this->annotated_resource().get<InlineHashtable<std::uint32_t, std::size_t>>();
for (const auto &[row_id, key] : _keys)
{
const auto row = hashtable->get(key);
if (row != std::numeric_limits<std::size_t>::max())
{
_result_set.emplace_back(std::make_pair(row_id, row));
}
}
return mx::tasking::TaskResult::make_remove();
}
void emplace_back(const std::size_t row_id, const std::uint32_t key) noexcept
{
_keys.emplace_back(std::make_pair(row_id, key));
}
[[nodiscard]] std::uint64_t size() const noexcept { return _keys.size(); }
[[nodiscard]] bool empty() const noexcept { return _keys.empty(); }
private:
std::vector<std::pair<std::size_t, std::uint32_t>> _keys;
mx::util::vector<std::pair<std::size_t, std::size_t>> &_result_set;
};
} // namespace application::hash_join

View File

@@ -0,0 +1,24 @@
#include "tpch_table_reader.h"
#include <fstream>
#include <sstream>
using namespace application::hash_join;
void TPCHTableReader::read(const std::string &file_name,
std::function<void(const std::uint16_t, const std::string &)> &&callback)
{
std::ifstream tpc_file(file_name);
if (tpc_file.good())
{
std::string line;
while (std::getline(tpc_file, line))
{
auto line_stream = std::stringstream{line};
std::string column;
auto index = 0U;
while (std::getline(line_stream, column, '|'))
{
callback(index++, column);
}
}
}
}

View File

@@ -0,0 +1,13 @@
#pragma once
#include <cstdint>
#include <functional>
#include <string>
namespace application::hash_join {
class TPCHTableReader
{
public:
static void read(const std::string &file_name,
std::function<void(const std::uint16_t, const std::string &)> &&callback);
};
} // namespace application::hash_join

225
src/benchmark/chronometer.h Normal file
View File

@@ -0,0 +1,225 @@
#pragma once
#include "perf.h"
#include "phase.h"
#include <chrono>
#include <json.hpp>
#include <mx/tasking/config.h>
#include <mx/tasking/profiling/statistic.h>
#include <mx/tasking/runtime.h>
#include <mx/util/core_set.h>
#include <numeric>
#include <ostream>
#include <unordered_map>
#include <utility>
#include <vector>
namespace benchmark {
/**
* The InterimResult is part of the chronometer, which in turn holds
* all results during a benchmark.
*/
template <typename P> class InterimResult
{
friend std::ostream &operator<<(std::ostream &stream, const InterimResult &result)
{
stream << result.core_count() << "\t" << result.iteration() << "\t" << result.phase() << "\t"
<< result.time().count() << " ms"
<< "\t" << result.throughput() << " op/s";
for (const auto &[name, value] : result.performance_counter())
{
const auto value_per_operation = value / double(result.operation_count());
stream << "\t" << value_per_operation << " " << name << "/op";
}
if constexpr (mx::tasking::config::task_statistics())
{
stream << "\t" << result.executed_writer_tasks() / double(result.operation_count()) << " writer/op";
stream << "\t" << result.executed_reader_tasks() / double(result.operation_count()) << " reader/op";
stream << "\t" << result.scheduled_tasks_on_core() / double(result.operation_count()) << " on-channel/op";
stream << "\t" << result.scheduled_tasks_off_core() / double(result.operation_count()) << " off-channel/op";
stream << "\t" << result.worker_fills() / double(result.operation_count()) << " fills/op";
}
return stream << std::flush;
}
public:
InterimResult(const std::uint64_t operation_count, const P &phase, const std::uint16_t iteration,
const std::uint16_t core_count, const std::chrono::milliseconds time,
std::vector<PerfCounter> &counter, std::unordered_map<std::uint16_t, std::uint64_t> executed_tasks,
std::unordered_map<std::uint16_t, std::uint64_t> executed_reader_tasks,
std::unordered_map<std::uint16_t, std::uint64_t> executed_writer_tasks,
std::unordered_map<std::uint16_t, std::uint64_t> scheduled_tasks,
std::unordered_map<std::uint16_t, std::uint64_t> scheduled_tasks_on_core,
std::unordered_map<std::uint16_t, std::uint64_t> scheduled_tasks_off_core,
std::unordered_map<std::uint16_t, std::uint64_t> worker_fills)
: _operation_count(operation_count), _phase(phase), _iteration(iteration), _core_count(core_count), _time(time),
_executed_tasks(std::move(executed_tasks)), _executed_reader_tasks(std::move(executed_reader_tasks)),
_executed_writer_tasks(std::move(executed_writer_tasks)), _scheduled_tasks(std::move(scheduled_tasks)),
_scheduled_tasks_on_core(std::move(scheduled_tasks_on_core)),
_scheduled_tasks_off_core(std::move(scheduled_tasks_off_core)), _worker_fills(std::move(worker_fills))
{
for (auto &c : counter)
{
_performance_counter.emplace_back(std::make_pair(c.name(), c.read()));
}
}
~InterimResult() = default;
std::uint64_t operation_count() const noexcept { return _operation_count; }
const P &phase() const noexcept { return _phase; }
std::uint16_t iteration() const noexcept { return _iteration; }
std::uint16_t core_count() const noexcept { return _core_count; }
std::chrono::milliseconds time() const noexcept { return _time; }
double throughput() const { return _operation_count / (_time.count() / 1000.0); }
const std::vector<std::pair<std::string, double>> &performance_counter() const noexcept
{
return _performance_counter;
}
[[maybe_unused]] std::uint64_t executed_tasks() const noexcept { return sum(_executed_tasks); }
[[maybe_unused]] std::uint64_t executed_reader_tasks() const noexcept { return sum(_executed_reader_tasks); }
[[maybe_unused]] std::uint64_t executed_writer_tasks() const noexcept { return sum(_executed_writer_tasks); }
[[maybe_unused]] std::uint64_t scheduled_tasks() const noexcept { return sum(_scheduled_tasks); }
[[maybe_unused]] std::uint64_t scheduled_tasks_on_core() const noexcept { return sum(_scheduled_tasks_on_core); }
[[maybe_unused]] std::uint64_t scheduled_tasks_off_core() const noexcept { return sum(_scheduled_tasks_off_core); }
[[maybe_unused]] std::uint64_t worker_fills() const noexcept { return sum(_worker_fills); }
std::uint64_t executed_tasks(const std::uint16_t channel_id) const noexcept
{
return _executed_tasks.at(channel_id);
}
std::uint64_t executed_reader_tasks(const std::uint16_t channel_id) const noexcept
{
return _executed_reader_tasks.at(channel_id);
}
std::uint64_t executed_writer_tasks(const std::uint16_t channel_id) const noexcept
{
return _executed_writer_tasks.at(channel_id);
}
std::uint64_t scheduled_tasks(const std::uint16_t channel_id) const noexcept
{
return _scheduled_tasks.at(channel_id);
}
std::uint64_t scheduled_tasks_on_core(const std::uint16_t channel_id) const noexcept
{
return _scheduled_tasks_on_core.at(channel_id);
}
std::uint64_t scheduled_tasks_off_core(const std::uint16_t channel_id) const noexcept
{
return _scheduled_tasks_off_core.at(channel_id);
}
std::uint64_t worker_fills(const std::uint16_t channel_id) const noexcept { return _worker_fills.at(channel_id); }
[[nodiscard]] nlohmann::json to_json() const noexcept
{
auto json = nlohmann::json{};
json["iteration"] = iteration();
json["cores"] = core_count();
json["phase"] = phase();
json["throughput"] = throughput();
for (const auto &[name, value] : performance_counter())
{
json[name] = value / double(operation_count());
}
if constexpr (mx::tasking::config::task_statistics())
{
json["executed-writer-tasks"] = executed_writer_tasks() / double(operation_count());
json["executed-reader-tasks"] = executed_reader_tasks() / double(operation_count());
json["scheduled-tasks-on-channel"] = scheduled_tasks_on_core() / double(operation_count());
json["scheduled-tasks-off-channel"] = scheduled_tasks_off_core() / double(operation_count());
json["buffer-fills"] = worker_fills() / double(operation_count());
}
return json;
}
private:
const std::uint64_t _operation_count;
const P &_phase;
const std::uint16_t _iteration;
const std::uint16_t _core_count;
const std::chrono::milliseconds _time;
std::vector<std::pair<std::string, double>> _performance_counter;
const std::unordered_map<std::uint16_t, std::uint64_t> _executed_tasks;
const std::unordered_map<std::uint16_t, std::uint64_t> _executed_reader_tasks;
const std::unordered_map<std::uint16_t, std::uint64_t> _executed_writer_tasks;
const std::unordered_map<std::uint16_t, std::uint64_t> _scheduled_tasks;
const std::unordered_map<std::uint16_t, std::uint64_t> _scheduled_tasks_on_core;
const std::unordered_map<std::uint16_t, std::uint64_t> _scheduled_tasks_off_core;
const std::unordered_map<std::uint16_t, std::uint64_t> _worker_fills;
std::uint64_t sum(const std::unordered_map<std::uint16_t, std::uint64_t> &map) const noexcept
{
return std::accumulate(map.begin(), map.end(), 0U,
[](const auto &current, const auto &item) { return current + item.second; });
}
};
/**
* The Chronometer is the "benchmark clock", which will be started and stopped
* before and after each benchmark run. On stopping, the chronometer will calculate
* used time, persist performance counter values, and mx::tasking statistics.
*/
template <typename P> class Chronometer
{
public:
Chronometer() = default;
~Chronometer() = default;
void start(const P phase, const std::uint16_t iteration, const mx::util::core_set &core_set)
{
_current_phase = phase;
_current_iteration = iteration;
_core_set = core_set;
_perf.start();
_start = std::chrono::steady_clock::now();
}
InterimResult<P> stop(const std::uint64_t count_operations)
{
const auto end = std::chrono::steady_clock::now();
_perf.stop();
const auto milliseconds = std::chrono::duration_cast<std::chrono::milliseconds>(end - _start);
return {count_operations,
_current_phase,
_current_iteration,
_core_set.size(),
milliseconds,
_perf.counter(),
statistic_map(mx::tasking::profiling::Statistic::Executed),
statistic_map(mx::tasking::profiling::Statistic::ExecutedReader),
statistic_map(mx::tasking::profiling::Statistic::ExecutedWriter),
statistic_map(mx::tasking::profiling::Statistic::Scheduled),
statistic_map(mx::tasking::profiling::Statistic::ScheduledOnChannel),
statistic_map(mx::tasking::profiling::Statistic::ScheduledOffChannel),
statistic_map(mx::tasking::profiling::Statistic::Fill)};
}
void add(PerfCounter &performance_counter) { _perf.add(performance_counter); }
private:
std::uint16_t _current_iteration{0U};
P _current_phase;
mx::util::core_set _core_set;
alignas(64) Perf _perf;
alignas(64) std::chrono::steady_clock::time_point _start;
std::unordered_map<std::uint16_t, std::uint64_t> statistic_map(
const mx::tasking::profiling::Statistic::Counter counter)
{
std::unordered_map<std::uint16_t, std::uint64_t> statistics;
for (auto i = 0U; i < mx::tasking::runtime::channels(); ++i)
{
statistics[i] = mx::tasking::runtime::statistic(counter, i);
}
return statistics;
}
};
} // namespace benchmark

100
src/benchmark/cores.cpp Normal file
View File

@@ -0,0 +1,100 @@
#include "cores.h"
#include <mx/system/topology.h>
#include <regex>
#include <sstream>
using namespace benchmark;
Cores::Cores(const std::uint16_t min_cores, const std::uint16_t max_cores, const std::uint16_t steps,
const mx::util::core_set::Order order)
{
this->add_for_range(min_cores, max_cores, steps, order);
}
Cores::Cores(const std::string &cores, const std::uint16_t steps, const mx::util::core_set::Order order)
{
const std::regex single_core_regex("(\\d+)$");
const std::regex from_core_regex("(\\d+):$");
const std::regex core_range_regex("(\\d+):(\\d+)");
std::stringstream stream(cores);
std::string token;
while (std::getline(stream, token, ';'))
{
std::smatch match;
if (std::regex_match(token, match, single_core_regex))
{
const auto core = std::stoi(match[1].str());
this->add_for_range(core, core, steps, order);
}
else if (std::regex_match(token, match, from_core_regex))
{
this->add_for_range(std::stoi(match[1].str()), mx::system::topology::count_cores(), steps, order);
}
else if (std::regex_match(token, match, core_range_regex))
{
this->add_for_range(std::stoi(match[1].str()), std::stoi(match[2].str()), steps, order);
}
}
}
void Cores::add_for_range(const std::uint16_t min_cores, const std::uint16_t max_cores, const std::uint16_t steps,
const mx::util::core_set::Order order)
{
if (min_cores == 0U || min_cores == max_cores)
{
this->_core_sets.push_back(mx::util::core_set::build(max_cores, order));
}
else
{
auto cores = min_cores;
if (cores % steps != 0U)
{
this->_core_sets.push_back(mx::util::core_set::build(cores, order));
cores++;
}
for (auto count_cores = cores; count_cores <= max_cores; count_cores++)
{
if (count_cores % steps == 0U)
{
this->_core_sets.push_back(mx::util::core_set::build(count_cores, order));
}
}
if (max_cores % steps != 0U)
{
this->_core_sets.push_back(mx::util::core_set::build(max_cores, order));
}
}
}
std::string Cores::dump(const std::uint8_t indent) const
{
std::stringstream stream;
for (auto i = 0U; i < this->_core_sets.size(); ++i)
{
if (i > 0U)
{
stream << "\n";
}
const auto &core_set = this->_core_sets[i];
if (indent > 0U)
{
stream << std::string(indent, ' ');
}
stream << core_set.size() << ": " << core_set;
}
stream << std::flush;
return stream.str();
}
namespace benchmark {
std::ostream &operator<<(std::ostream &stream, const Cores &cores)
{
return stream << cores.dump(0U) << std::endl;
}
} // namespace benchmark

53
src/benchmark/cores.h Normal file
View File

@@ -0,0 +1,53 @@
#pragma once
#include <cstdint>
#include <mx/util/core_set.h>
#include <ostream>
#include <string>
#include <vector>
namespace benchmark {
/**
* Set of core_sets used for a benchmark that should be performed over
* different core counts to benchmark scalability.
* Can be created from min and max cores (i.e. 1 core to 32 cores) or from
* string identifying the cores (i.e. "1:32").
*/
class Cores
{
friend std::ostream &operator<<(std::ostream &stream, const Cores &cores);
public:
Cores(std::uint16_t min_cores, std::uint16_t max_cores, std::uint16_t steps, mx::util::core_set::Order order);
Cores(const std::string &cores, std::uint16_t steps, mx::util::core_set::Order order);
Cores(Cores &&) noexcept = default;
~Cores() = default;
const mx::util::core_set &next()
{
const auto current_index = _current_index++;
if (current_index < _core_sets.size())
{
return _core_sets[current_index];
}
return _empty_core_set;
}
[[nodiscard]] const mx::util::core_set &current() const noexcept { return _core_sets[_current_index - 1]; }
[[nodiscard]] std::size_t size() const noexcept { return _core_sets.size(); }
void reset() { _current_index = 0U; }
[[nodiscard]] std::string dump(std::uint8_t indent) const;
private:
std::vector<mx::util::core_set> _core_sets;
std::uint16_t _current_index = 0U;
const mx::util::core_set _empty_core_set;
void add_for_range(std::uint16_t min_cores, std::uint16_t max_cores, std::uint16_t steps,
mx::util::core_set::Order order);
};
} // namespace benchmark

71
src/benchmark/perf.cpp Normal file
View File

@@ -0,0 +1,71 @@
#include "perf.h"
using namespace benchmark;
/**
* Counter "Instructions Retired"
* Counts when the last uop of an instruction retires.
*/
[[maybe_unused]] PerfCounter Perf::INSTRUCTIONS = {"instr", PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS};
/**
*/
[[maybe_unused]] PerfCounter Perf::CYCLES = {"cycles", PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES};
/**
*/
[[maybe_unused]] PerfCounter Perf::L1_MISSES = {"l1-miss", PERF_TYPE_HW_CACHE,
PERF_COUNT_HW_CACHE_L1D | (PERF_COUNT_HW_CACHE_OP_READ << 8) |
(PERF_COUNT_HW_CACHE_RESULT_MISS << 16)};
/**
* Counter "LLC Misses"
* Accesses to the LLC in which the data is not present(miss).
*/
[[maybe_unused]] PerfCounter Perf::LLC_MISSES = {"llc-miss", PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES};
/**
* Counter "LLC Reference"
* Accesses to the LLC, in which the data is present(hit) or not present(miss)
*/
[[maybe_unused]] PerfCounter Perf::LLC_REFERENCES = {"llc-ref", PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES};
/**
* Micro architecture "Skylake"
* Counter "CYCLE_ACTIVITY.STALLS_MEM_ANY"
* EventSel=A3H,UMask=14H, CMask=20
* Execution stalls while memory subsystem has an outstanding load.
*/
PerfCounter Perf::STALLS_MEM_ANY = {"memory-stall", PERF_TYPE_RAW, 0x145314a3};
/**
* Micro architecture "Skylake"
* Counter "SW_PREFETCH_ACCESS.NTA"
* EventSel=32H,UMask=01H
* Number of PREFETCHNTA instructions executed.
*/
[[maybe_unused]] PerfCounter Perf::SW_PREFETCH_ACCESS_NTA = {"sw-prefetch-nta", PERF_TYPE_RAW, 0x530132};
/**
* Micro architecture "Skylake"
* Counter "SW_PREFETCH_ACCESS.T0"
* EventSel=32H,UMask=02H
* Number of PREFETCHT0 instructions executed.
*/
[[maybe_unused]] PerfCounter Perf::SW_PREFETCH_ACCESS_T0 = {"sw-prefetch-t0", PERF_TYPE_RAW, 0x530232};
/**
* Micro architecture "Skylake"
* Counter "SW_PREFETCH_ACCESS.T1_T2"
* EventSel=32H,UMask=04H
* Number of PREFETCHT1 or PREFETCHT2 instructions executed.
*/
[[maybe_unused]] PerfCounter Perf::SW_PREFETCH_ACCESS_T1_T2 = {"sw-prefetch-t1t2", PERF_TYPE_RAW, 0x530432};
/**
* Micro architecture "Skylake"
* Counter "SW_PREFETCH_ACCESS.PREFETCHW"
* EventSel=32H,UMask=08H
* Number of PREFETCHW instructions executed.
*/
[[maybe_unused]] PerfCounter Perf::SW_PREFETCH_ACCESS_WRITE = {"sw-prefetch-w", PERF_TYPE_RAW, 0x530832};

157
src/benchmark/perf.h Normal file
View File

@@ -0,0 +1,157 @@
#pragma once
#include <algorithm>
#include <asm/unistd.h>
#include <cstring>
#include <linux/perf_event.h>
#include <string>
#include <sys/ioctl.h>
#include <unistd.h>
#include <vector>
/*
* For more Performance Counter take a look into the Manual from Intel:
* https://software.intel.com/sites/default/files/managed/8b/6e/335279_performance_monitoring_events_guide.pdf
*
* To get event ids from manual specification see libpfm4:
* http://www.bnikolic.co.uk/blog/hpc-prof-events.html
* Clone, Make, use examples/check_events to generate event id code from event:
* ./check_events <category>:<umask>[:c=<cmask>]
* Example:
* ./cycle_activity:0x14:c=20
*/
namespace benchmark {
/**
* Represents a Linux Performance Counter.
*/
class PerfCounter
{
public:
PerfCounter(std::string &&name, const std::uint64_t type, const std::uint64_t event_id) : _name(std::move(name))
{
std::memset(&_perf_event_attribute, 0, sizeof(perf_event_attr));
_perf_event_attribute.type = type;
_perf_event_attribute.size = sizeof(perf_event_attr);
_perf_event_attribute.config = event_id;
_perf_event_attribute.disabled = true;
_perf_event_attribute.inherit = 1;
_perf_event_attribute.exclude_kernel = false;
_perf_event_attribute.exclude_hv = false;
_perf_event_attribute.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING;
}
~PerfCounter() = default;
bool open()
{
_file_descriptor = syscall(__NR_perf_event_open, &_perf_event_attribute, 0, -1, -1, 0);
return _file_descriptor >= 0;
}
bool start()
{
ioctl(_file_descriptor, PERF_EVENT_IOC_RESET, 0);
ioctl(_file_descriptor, PERF_EVENT_IOC_ENABLE, 0);
return ::read(_file_descriptor, &_prev, sizeof(read_format)) == sizeof(read_format);
}
bool stop()
{
const auto is_read = ::read(_file_descriptor, &_data, sizeof(read_format)) == sizeof(read_format);
ioctl(_file_descriptor, PERF_EVENT_IOC_DISABLE, 0);
return is_read;
}
[[nodiscard]] double read() const
{
const auto multiplexing_correction = static_cast<double>(_data.time_enabled - _prev.time_enabled) /
static_cast<double>(_data.time_running - _prev.time_running);
return static_cast<double>(_data.value - _prev.value) * multiplexing_correction;
}
[[nodiscard]] const std::string &name() const { return _name; }
explicit operator const std::string &() const { return name(); }
bool operator==(const std::string &name) const { return _name == name; }
private:
struct read_format
{
std::uint64_t value = 0;
std::uint64_t time_enabled = 0;
std::uint64_t time_running = 0;
};
const std::string _name;
std::int32_t _file_descriptor = -1;
perf_event_attr _perf_event_attribute{};
read_format _prev{};
read_format _data{};
};
/**
* Holds a set of performance counter and starts/stops them together.
*/
class Perf
{
public:
[[maybe_unused]] static PerfCounter INSTRUCTIONS;
[[maybe_unused]] static PerfCounter CYCLES;
[[maybe_unused]] static PerfCounter L1_MISSES;
[[maybe_unused]] [[maybe_unused]] static PerfCounter LLC_MISSES;
[[maybe_unused]] static PerfCounter LLC_REFERENCES;
[[maybe_unused]] static PerfCounter STALLED_CYCLES_BACKEND;
[[maybe_unused]] static PerfCounter STALLS_MEM_ANY;
[[maybe_unused]] static PerfCounter SW_PREFETCH_ACCESS_NTA;
[[maybe_unused]] static PerfCounter SW_PREFETCH_ACCESS_T0;
[[maybe_unused]] static PerfCounter SW_PREFETCH_ACCESS_T1_T2;
[[maybe_unused]] static PerfCounter SW_PREFETCH_ACCESS_WRITE;
Perf() noexcept = default;
~Perf() noexcept = default;
bool add(PerfCounter &counter_)
{
if (counter_.open())
{
_counter.push_back(counter_);
return true;
}
return false;
}
void start()
{
for (auto &counter_ : _counter)
{
counter_.start();
}
}
void stop()
{
for (auto &counter_ : _counter)
{
counter_.stop();
}
}
double operator[](const std::string &name) const
{
auto counter_iterator = std::find(_counter.begin(), _counter.end(), name);
if (counter_iterator != _counter.end())
{
return counter_iterator->read();
}
return 0.0;
}
std::vector<PerfCounter> &counter() { return _counter; }
private:
std::vector<PerfCounter> _counter;
};
} // namespace benchmark

9
src/benchmark/phase.h Normal file
View File

@@ -0,0 +1,9 @@
#pragma once
#include <cstdint>
namespace benchmark {
enum class phase : std::uint8_t
{
FILL = 0U,
MIXED = 1U
};
}

View File

@@ -0,0 +1,15 @@
#include "string_util.h"
#include <sstream>
using namespace benchmark;
void string_util::split(const std::string &text, const char delimiter,
const std::function<void(const std::string &line)> &callback)
{
std::stringstream stream(text);
std::string token;
while (std::getline(stream, token, delimiter))
{
callback(token);
}
}

View File

@@ -0,0 +1,13 @@
#pragma once
#include <functional>
#include <string>
namespace benchmark {
class string_util
{
public:
static void split(const std::string &text, char delimiter,
const std::function<void(const std::string &line)> &callback);
};
} // namespace benchmark

View File

@@ -0,0 +1,20 @@
#include "workload.h"
#include <limits>
using namespace benchmark;
std::pair<std::uint64_t, std::uint64_t> Workload::next(const std::uint64_t count) noexcept
{
const auto index = this->_current_index.fetch_add(count, std::memory_order_relaxed);
const auto workload_size = this->_workload_set[this->_current_phase].size();
return index < workload_size ? std::make_pair(index, std::min(count, workload_size - index))
: std::make_pair(std::numeric_limits<std::uint64_t>::max(), 0UL);
}
namespace benchmark {
std::ostream &operator<<(std::ostream &stream, const Workload &workload)
{
return stream << workload._workload_set << std::flush;
}
} // namespace benchmark

58
src/benchmark/workload.h Normal file
View File

@@ -0,0 +1,58 @@
#pragma once
#include "phase.h"
#include "workload_set.h"
#include <array>
#include <atomic>
#include <cstdint>
#include <utility>
namespace benchmark {
class Workload
{
friend std::ostream &operator<<(std::ostream &stream, const Workload &workload);
public:
Workload() noexcept = default;
~Workload() noexcept = default;
[[maybe_unused]] void build(const std::string &fill_workload_file, const std::string &mixed_workload_file)
{
_workload_set.build(fill_workload_file, mixed_workload_file);
}
[[maybe_unused]] void build(const std::uint64_t fill_inserts, const std::uint64_t mixed_inserts,
const std::uint64_t mixed_lookups, const std::uint64_t mixed_updates,
const std::uint64_t mixed_deletes)
{
_workload_set.build(fill_inserts, mixed_inserts, mixed_lookups, mixed_updates, mixed_deletes);
}
[[maybe_unused]] void shuffle() { _workload_set.shuffle(); }
std::pair<std::uint64_t, std::uint64_t> next(std::uint64_t count) noexcept;
[[nodiscard]] std::uint64_t size() const noexcept { return _workload_set[_current_phase].size(); }
[[nodiscard]] bool empty() const noexcept { return _workload_set[_current_phase].empty(); }
[[nodiscard]] bool empty(const phase phase) const noexcept { return _workload_set[phase].empty(); }
void reset(const phase phase) noexcept
{
_current_phase = phase;
_current_index = 0;
}
const NumericTuple &operator[](const std::size_t index) const noexcept
{
return _workload_set[_current_phase][index];
}
bool operator==(const phase phase) const noexcept { return _current_phase == phase; }
explicit operator phase() const noexcept { return _current_phase; }
private:
NumericWorkloadSet _workload_set;
phase _current_phase = phase::FILL;
alignas(64) std::atomic_uint64_t _current_index{0U};
};
} // namespace benchmark

View File

@@ -0,0 +1,168 @@
#include "workload_set.h"
#include <algorithm>
#include <fstream>
#include <iostream>
#include <mutex>
#include <random>
#include <thread>
using namespace benchmark;
void NumericWorkloadSet::build(const std::string &fill_workload_file, const std::string &mixed_workload_file)
{
auto parse = [](auto &file_stream, std::vector<NumericTuple> &data_set) -> bool {
std::srand(1337);
std::string op_name;
std::uint64_t key{};
bool contains_update = false;
while (file_stream >> op_name >> key)
{
if (op_name == "INSERT")
{
contains_update = true;
data_set.emplace_back(NumericTuple{NumericTuple::INSERT, key, std::rand()});
}
else if (op_name == "READ")
{
data_set.emplace_back(NumericTuple{NumericTuple::LOOKUP, key});
}
else if (op_name == "UPDATE")
{
contains_update = true;
data_set.emplace_back(NumericTuple{NumericTuple::UPDATE, key, std::rand()});
}
}
return contains_update;
};
std::mutex out_mutex;
std::thread fill_thread{[this, &out_mutex, &parse, &fill_workload_file]() {
std::ifstream fill_file(fill_workload_file);
if (fill_file.good())
{
parse(fill_file, this->_data_sets[static_cast<std::size_t>(phase::FILL)]);
}
else
{
std::lock_guard lock{out_mutex};
std::cerr << "Could not open workload file '" << fill_workload_file << "'." << std::endl;
}
}};
std::thread mixed_thread{[this, &out_mutex, &parse, &mixed_workload_file]() {
std::ifstream mixed_file(mixed_workload_file);
if (mixed_file.good())
{
this->_mixed_phase_contains_update =
parse(mixed_file, this->_data_sets[static_cast<std::size_t>(phase::MIXED)]);
}
else
{
std::lock_guard lock{out_mutex};
std::cerr << "Could not open workload file '" << mixed_workload_file << "'." << std::endl;
}
}};
fill_thread.join();
mixed_thread.join();
}
void NumericWorkloadSet::build(const std::uint64_t fill_inserts, const std::uint64_t mixed_inserts,
const std::uint64_t mixed_lookups, const std::uint64_t mixed_updates,
const std::uint64_t mixed_deletes)
{
std::srand(1337);
this->_data_sets[static_cast<std::uint8_t>(phase::FILL)].reserve(fill_inserts);
this->_data_sets[static_cast<std::uint8_t>(phase::MIXED)].reserve(mixed_inserts + mixed_lookups + mixed_updates +
mixed_deletes);
for (auto i = 0U; i < fill_inserts; ++i)
{
this->_data_sets[static_cast<std::uint8_t>(phase::FILL)].emplace_back(
NumericTuple{NumericTuple::INSERT, i + 1U, std::rand()});
}
this->_mixed_phase_contains_update = mixed_inserts > 0U || mixed_deletes > 0U || mixed_updates > 0U;
for (auto i = fill_inserts; i < fill_inserts + mixed_inserts; ++i)
{
this->_data_sets[static_cast<std::uint8_t>(phase::MIXED)].emplace_back(
NumericTuple{NumericTuple::INSERT, i + 1U, std::rand()});
}
for (auto i = 0U; i < mixed_lookups; ++i)
{
this->_data_sets[static_cast<std::uint8_t>(phase::MIXED)].push_back(
{NumericTuple::LOOKUP, this->_data_sets[static_cast<std::uint16_t>(phase::FILL)][i % fill_inserts].key()});
}
for (auto i = 0U; i < mixed_updates; ++i)
{
this->_data_sets[static_cast<std::size_t>(phase::MIXED)].push_back(
{NumericTuple::UPDATE, this->_data_sets[static_cast<std::uint16_t>(phase::FILL)][i % fill_inserts].key(),
std::rand()});
}
for (auto i = 0U; i < mixed_deletes; ++i)
{
this->_data_sets[static_cast<std::uint8_t>(phase::MIXED)].push_back(
{NumericTuple::DELETE, this->_data_sets[static_cast<std::uint16_t>(phase::FILL)][i % fill_inserts].key()});
}
}
void NumericWorkloadSet::shuffle()
{
std::srand(1337U + 42U);
std::random_device random_device;
std::mt19937 mersenne_twister_engine(random_device());
std::shuffle(this->_data_sets[static_cast<std::uint8_t>(phase::FILL)].begin(),
this->_data_sets[static_cast<std::uint8_t>(phase::FILL)].end(), mersenne_twister_engine);
std::shuffle(this->_data_sets[static_cast<std::uint8_t>(phase::MIXED)].begin(),
this->_data_sets[static_cast<std::uint8_t>(phase::MIXED)].end(), mersenne_twister_engine);
}
std::ostream &NumericWorkloadSet::nice_print(std::ostream &stream, const std::size_t number) noexcept
{
if (number >= 1000000U)
{
return stream << (number / 1000000U) << "m";
}
if (number >= 1000U)
{
return stream << (number / 1000U) << "k";
}
return stream << number;
}
namespace benchmark {
std::ostream &operator<<(std::ostream &stream, const NumericWorkloadSet &workload)
{
const auto has_fill_and_mixed = workload[phase::FILL].empty() == false && workload[phase::MIXED].empty() == false;
if (workload[phase::FILL].empty() == false)
{
stream << "fill: ";
NumericWorkloadSet::nice_print(stream, workload[phase::FILL].size());
}
if (has_fill_and_mixed)
{
stream << " / ";
}
if (workload[phase::MIXED].empty() == false)
{
stream << (workload._mixed_phase_contains_update ? "mixed: " : "read-only: ");
NumericWorkloadSet::nice_print(stream, workload[phase::MIXED].size());
}
return stream << std::flush;
}
} // namespace benchmark

View File

@@ -0,0 +1,74 @@
#pragma once
#include "phase.h"
#include <array>
#include <cstdint>
#include <ostream>
#include <string>
#include <vector>
namespace benchmark {
class NumericTuple
{
public:
enum Type
{
INSERT,
LOOKUP,
UPDATE,
DELETE
};
constexpr NumericTuple(const Type type, const std::uint64_t key) : _type(type), _key(key) {}
constexpr NumericTuple(const Type type, const std::uint64_t key, const std::int64_t value)
: _type(type), _key(key), _value(value)
{
}
NumericTuple(NumericTuple &&) noexcept = default;
NumericTuple(const NumericTuple &) = default;
~NumericTuple() = default;
NumericTuple &operator=(NumericTuple &&) noexcept = default;
[[nodiscard]] std::uint64_t key() const { return _key; };
[[nodiscard]] std::int64_t value() const { return _value; }
bool operator==(const Type type) const { return _type == type; }
private:
Type _type;
std::uint64_t _key;
std::int64_t _value = 0;
};
class NumericWorkloadSet
{
friend std::ostream &operator<<(std::ostream &stream, const NumericWorkloadSet &workload_set);
public:
NumericWorkloadSet() = default;
~NumericWorkloadSet() = default;
void build(const std::string &fill_workload_file, const std::string &mixed_workload_file);
void build(std::uint64_t fill_inserts, std::uint64_t mixed_inserts, std::uint64_t mixed_lookups,
std::uint64_t mixed_updates, std::uint64_t mixed_deletes);
void shuffle();
[[nodiscard]] const std::vector<NumericTuple> &fill() const noexcept { return _data_sets[0]; }
[[nodiscard]] const std::vector<NumericTuple> &mixed() const noexcept { return _data_sets[1]; }
const std::vector<NumericTuple> &operator[](const phase phase) const noexcept
{
return _data_sets[static_cast<std::uint16_t>(phase)];
}
explicit operator bool() const { return fill().empty() == false || mixed().empty() == false; }
private:
std::array<std::vector<NumericTuple>, 2> _data_sets;
bool _mixed_phase_contains_update = false;
static std::ostream &nice_print(std::ostream &stream, std::size_t number) noexcept;
};
} // namespace benchmark

View File

@@ -0,0 +1,366 @@
#pragma once
#include "config.h"
#include "node.h"
#include "node_consistency_checker.h"
#include "node_iterator.h"
#include "node_statistics.h"
#include <atomic>
#include <cassert>
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <json.hpp>
#include <mx/resource/resource.h>
#include <mx/tasking/runtime.h>
#include <utility>
#include <vector>
namespace db::index::blinktree {
template <typename K, typename V> class BLinkTree
{
public:
BLinkTree(const mx::synchronization::isolation_level isolation_level,
const mx::synchronization::protocol preferred_synchronization_protocol)
: _isolation_level(isolation_level), _preferred_synchronization_protocol(preferred_synchronization_protocol),
_root(create_node(NodeType::Leaf, mx::resource::ptr{}, true))
{
}
~BLinkTree() { mx::tasking::runtime::delete_resource<Node<K, V>>(_root); }
/**
* @return Root node of the tree.
*/
[[nodiscard]] mx::resource::ptr root() const { return _root; }
/**
* @return Height of the tree.
*/
[[nodiscard]] std::uint16_t height() const { return _height; }
/**
* @return True, when the tree does not contain any value.
*/
[[nodiscard]] bool empty() const
{
return static_cast<bool>(_root) == false || _root.template get<Node<K, V>>()->size() == 0;
}
/**
* Creates a node of type inner.
*
* @param is_branch True, when the children of the new inner node will be leaf nodes.
* @param parent Parent of the new inner node.
* @param is_root True, then the new inner node will be the root.
* @return Inner node.
*/
[[nodiscard]] mx::resource::ptr create_inner_node(const bool is_branch, const mx::resource::ptr parent,
const bool is_root = false) const
{
const auto inner_type = is_branch ? NodeType::Inner | NodeType::Branch : NodeType::Inner;
return create_node(inner_type, parent, is_root);
}
/**
* Creates a node of type leaf.
*
* @param parent Parent of the new leaf node.
* @return Leaf node.
*/
[[nodiscard]] mx::resource::ptr create_leaf_node(const mx::resource::ptr parent) const
{
return create_node(NodeType::Leaf, parent, false);
}
/**
* Creates a new root node, containing two separators (to the left and right).
* The new root node will be set in the tree.
*
* @param left Link to the "smaller" child node.
* @param right Link to the "greater" child node.
* @param key Separator key.
*/
void create_new_root(mx::resource::ptr left, mx::resource::ptr right, K key);
/**
* Splits an inner node.
*
* @param inner_node Node to split.
* @param key Key to insert after split.
* @param separator Separator to insert after split.
* @return Pointer and high key of the new node.
*/
std::pair<mx::resource::ptr, K> split(mx::resource::ptr inner_node, K key, mx::resource::ptr separator) const;
/**
* Splits a leaf node.
*
* @param leaf_node Node to split.
* @param key Key to insert after split.
* @param value Value to insert after split.
* @return Pointer to the leaf node and key for parent.
*/
std::pair<mx::resource::ptr, K> split(mx::resource::ptr leaf_node, K key, V value) const;
/**
* @return Begin iterator for iterating ofer nodes.
*/
NodeIterator<K, V> begin() const { return NodeIterator(mx::resource::ptr_cast<Node<K, V>>(_root)); }
/**
* @return End iterator (aka empty node iterator).
*/
NodeIterator<K, V> end() const { return {}; }
/**
* Checks the consistency of the tree.
*/
void check() const;
/**
* Dumps the statistics like height, number of (inner/leaf) nodes, number of records,... .
*/
void print_statistics() const;
explicit operator nlohmann::json() const
{
nlohmann::json out;
out["height"] = _height;
out["root"] = node_to_json(_root);
return out;
}
protected:
// Height of the tree.
std::uint8_t _height = 1;
// Isolation of tasks accessing a node.
const mx::synchronization::isolation_level _isolation_level;
// Select a preferred method for synchronization.
const mx::synchronization::protocol _preferred_synchronization_protocol;
// Pointer to the root.
alignas(64) mx::resource::ptr _root;
/**
* Creates a new node.
*
* @param node_type Type of the node.
* @param parent Parent of the node.
* @param is_root True, if the new node will be the root.
* @return Pointer to the new node.
*/
[[nodiscard]] mx::resource::ptr create_node(const NodeType node_type, const mx::resource::ptr parent,
const bool is_root) const
{
const auto is_inner = static_cast<bool>(node_type & NodeType::Inner);
return mx::tasking::runtime::new_resource<Node<K, V>>(
config::node_size(),
mx::resource::hint{_isolation_level, _preferred_synchronization_protocol,
predict_access_frequency(is_inner, is_root), predict_read_write_ratio(is_inner)},
node_type, parent);
}
/**
* Creates a hint for tasking regarding usage of the node.
*
* @param is_inner True, of the node is an inner node.
* @param is_root True, of the node is the root.
* @return Hint for usage prediction which will be used for allocating resources.
*/
[[nodiscard]] static mx::resource::hint::expected_access_frequency predict_access_frequency(const bool is_inner,
const bool is_root)
{
if (is_root)
{
return mx::resource::hint::expected_access_frequency::excessive;
}
if (is_inner)
{
return mx::resource::hint::expected_access_frequency::high;
}
return mx::resource::hint::expected_access_frequency::normal;
}
/**
* Create a hint for the read/write ratio.
* Inner nodes will be written very little while
* leaf nodes will be written more often.
*
* @param is_inner True, when the node is an inner node.
* @return Predicted read/write ratio.
*/
[[nodiscard]] static mx::resource::hint::expected_read_write_ratio predict_read_write_ratio(const bool is_inner)
{
return is_inner ? mx::resource::hint::expected_read_write_ratio::heavy_read
: mx::resource::hint::expected_read_write_ratio::balanced;
}
/**
* Serializes a tree node to json format.
*
* @param node Node to serialize.
* @return JSON representation of the node.
*/
[[nodiscard]] nlohmann::json node_to_json(mx::resource::ptr node) const
{
auto out = nlohmann::json();
auto node_ptr = mx::resource::ptr_cast<Node<K, V>>(node);
out["channel_id"] = node.channel_id();
out["is_leaf"] = node_ptr->is_leaf();
out["size"] = node_ptr->size();
if (node_ptr->is_inner())
{
auto children = nlohmann::json::array();
for (auto i = 0U; i <= node_ptr->size(); ++i)
{
children.push_back(node_to_json(node_ptr->separator(i)));
}
out["children"] = children;
}
return out;
}
};
template <typename K, typename V>
void BLinkTree<K, V>::create_new_root(const mx::resource::ptr left, const mx::resource::ptr right, const K key)
{
const auto is_left_inner = mx::resource::ptr_cast<Node<K, V>>(left)->is_inner();
mx::tasking::runtime::modify_predicted_usage(left, predict_access_frequency(is_left_inner, true),
predict_access_frequency(is_left_inner, false));
auto root = this->create_inner_node(mx::resource::ptr_cast<Node<K, V>>(left)->is_leaf(), mx::resource::ptr{}, true);
left.template get<Node<K, V>>()->parent(root);
right.template get<Node<K, V>>()->parent(root);
root.template get<Node<K, V>>()->separator(0, left);
root.template get<Node<K, V>>()->insert(0, right, key);
this->_height++;
this->_root = root;
}
template <typename K, typename V>
std::pair<mx::resource::ptr, K> BLinkTree<K, V>::split(const mx::resource::ptr inner_node, const K key,
const mx::resource::ptr separator) const
{
constexpr std::uint16_t left_size = InnerNode<K, V>::max_keys / 2;
constexpr std::uint16_t right_size = InnerNode<K, V>::max_keys - left_size;
auto node_ptr = mx::resource::ptr_cast<Node<K, V>>(inner_node);
K key_up;
auto new_inner_node = this->create_inner_node(node_ptr->is_branch(), node_ptr->parent());
auto new_node_ptr = mx::resource::ptr_cast<Node<K, V>>(new_inner_node);
new_node_ptr->high_key(node_ptr->high_key());
if (key < node_ptr->inner_key(left_size - 1))
{
node_ptr->move(new_inner_node, left_size, right_size);
new_node_ptr->separator(0, node_ptr->separator(left_size));
new_node_ptr->size(right_size);
node_ptr->size(left_size - 1);
key_up = node_ptr->inner_key(left_size - 1);
const auto index = node_ptr->index(key);
separator.template get<Node<K, V>>()->parent(inner_node);
node_ptr->insert(index, separator, key);
}
else if (key < node_ptr->inner_key(left_size))
{
node_ptr->move(new_inner_node, left_size, right_size);
new_node_ptr->separator(0, separator);
key_up = key;
node_ptr->size(left_size);
new_node_ptr->size(right_size);
}
else
{
node_ptr->move(new_inner_node, left_size + 1, right_size - 1);
new_node_ptr->separator(0, node_ptr->separator(left_size + 1));
node_ptr->size(left_size);
new_node_ptr->size(right_size - 1);
key_up = node_ptr->inner_key(left_size);
const auto index = new_node_ptr->index(key);
new_node_ptr->insert(index, separator, key);
}
new_node_ptr->right_sibling(node_ptr->right_sibling());
node_ptr->right_sibling(new_inner_node);
node_ptr->high_key(key_up);
for (auto index = 0U; index <= new_node_ptr->size(); ++index)
{
new_node_ptr->separator(index).template get<Node<K, V>>()->parent(new_inner_node);
}
return {new_inner_node, key_up};
}
template <typename K, typename V>
std::pair<mx::resource::ptr, K> BLinkTree<K, V>::split(const mx::resource::ptr leaf_node_ptr, const K key,
const V value) const
{
auto *leaf_node = mx::resource::ptr_cast<Node<K, V>>(leaf_node_ptr);
constexpr std::uint16_t left_size = LeafNode<K, V>::max_items / 2;
constexpr std::uint16_t right_size = LeafNode<K, V>::max_items - left_size;
auto new_leaf_node_ptr = this->create_leaf_node(leaf_node->parent());
auto *new_leaf_node = mx::resource::ptr_cast<Node<K, V>>(new_leaf_node_ptr);
leaf_node->move(new_leaf_node_ptr, left_size, right_size);
if (leaf_node->right_sibling() != nullptr)
{
new_leaf_node->right_sibling(leaf_node->right_sibling());
}
new_leaf_node->high_key(leaf_node->high_key());
new_leaf_node->size(right_size);
leaf_node->size(left_size);
leaf_node->right_sibling(new_leaf_node_ptr);
if (key < new_leaf_node->leaf_key(0))
{
leaf_node->insert(leaf_node->index(key), value, key);
}
else
{
new_leaf_node->insert(new_leaf_node->index(key), value, key);
}
leaf_node->high_key(new_leaf_node->leaf_key(0));
return {new_leaf_node_ptr, new_leaf_node->leaf_key(0)};
}
template <typename K, typename V> void BLinkTree<K, V>::print_statistics() const
{
NodeStatistics<K, V> statistics(this->height());
for (auto node : *this)
{
statistics += node;
}
std::cout << statistics << std::endl;
}
template <typename K, typename V> void BLinkTree<K, V>::check() const
{
for (auto node : *this)
{
NodeConsistencyChecker<K, V>::check_and_print_errors(node, std::cerr);
}
}
} // namespace db::index::blinktree

View File

@@ -0,0 +1,11 @@
#pragma once
#include <mx/synchronization/synchronization.h>
namespace db::index::blinktree {
class config
{
public:
static constexpr auto node_size() { return 1024U; }
};
} // namespace db::index::blinktree

View File

@@ -0,0 +1,63 @@
#pragma once
#include "b_link_tree.h"
#include "node.h"
#include "task.h"
#include <mx/tasking/runtime.h>
namespace db::index::blinktree {
template <typename K, typename V, class L> class InsertSeparatorTask final : public Task<K, V, L>
{
public:
constexpr InsertSeparatorTask(const K key, const mx::resource::ptr separator, BLinkTree<K, V> *tree,
L &listener) noexcept
: Task<K, V, L>(key, listener), _tree(tree), _separator(separator)
{
}
~InsertSeparatorTask() override = default;
mx::tasking::TaskResult execute(std::uint16_t core_id, std::uint16_t channel_id) override;
private:
BLinkTree<K, V> *_tree;
mx::resource::ptr _separator;
};
template <typename K, typename V, class L>
mx::tasking::TaskResult InsertSeparatorTask<K, V, L>::execute(const std::uint16_t core_id,
const std::uint16_t /*channel_id*/)
{
auto *annotated_node = this->annotated_resource().template get<Node<K, V>>();
// Is the node related to the key?
if (annotated_node->high_key() <= this->_key)
{
this->annotate(annotated_node->right_sibling(), config::node_size() / 4U);
return mx::tasking::TaskResult::make_succeed(this);
}
// At this point, we are accessing the related leaf and we are in writer mode.
if (!annotated_node->full())
{
const auto index = annotated_node->index(this->_key);
annotated_node->insert(index, this->_separator, this->_key);
this->_separator.template get<Node<K, V>>()->parent(this->annotated_resource());
this->_listener.inserted(core_id, this->_key, 0U);
return mx::tasking::TaskResult::make_remove();
}
auto [right, key] = this->_tree->split(this->annotated_resource(), this->_key, this->_separator);
if (annotated_node->parent() != nullptr)
{
this->_separator = right;
this->_key = key;
this->annotate(annotated_node->parent(), config::node_size() / 4U);
return mx::tasking::TaskResult::make_succeed(this);
}
this->_tree->create_new_root(this->annotated_resource(), right, key);
this->_listener.inserted(core_id, this->_key, 0U);
return mx::tasking::TaskResult::make_remove();
}
} // namespace db::index::blinktree

View File

@@ -0,0 +1,85 @@
#pragma once
#include "b_link_tree.h"
#include "insert_separator_task.h"
#include "node.h"
#include "task.h"
#include <mx/tasking/runtime.h>
#include <vector>
namespace db::index::blinktree {
template <typename K, typename V, class L> class InsertValueTask final : public Task<K, V, L>
{
public:
constexpr InsertValueTask(const K key, const V value, BLinkTree<K, V> *tree, L &listener) noexcept
: Task<K, V, L>(key, listener), _tree(tree), _value(value)
{
}
~InsertValueTask() override = default;
mx::tasking::TaskResult execute(std::uint16_t core_id, std::uint16_t channel_id) override;
private:
BLinkTree<K, V> *_tree;
const V _value;
};
template <typename K, typename V, class L>
mx::tasking::TaskResult InsertValueTask<K, V, L>::execute(const std::uint16_t core_id,
const std::uint16_t /*channel_id*/)
{
auto *annotated_node = this->annotated_resource().template get<Node<K, V>>();
// Is the node related to the key?
if (annotated_node->high_key() <= this->_key)
{
this->annotate(annotated_node->right_sibling(), config::node_size() / 4U);
return mx::tasking::TaskResult::make_succeed(this);
}
// If we are accessing an inner node, pick the next related child.
if (annotated_node->is_inner())
{
const auto child = annotated_node->child(this->_key);
this->annotate(child, config::node_size() / 4U);
this->is_readonly(!annotated_node->is_branch());
return mx::tasking::TaskResult::make_succeed(this);
}
// Is it a leaf, but we are still reading? Upgrade to writer.
if (annotated_node->is_leaf() && this->is_readonly())
{
this->is_readonly(false);
return mx::tasking::TaskResult::make_succeed(this);
}
// At this point, we are accessing the related leaf and we are in writer mode.
const auto index = annotated_node->index(this->_key);
if (index < annotated_node->size() && annotated_node->leaf_key(index) == this->_key)
{
this->_listener.inserted(core_id, this->_key, this->_value);
return mx::tasking::TaskResult::make_remove();
}
if (annotated_node->full() == false)
{
annotated_node->insert(index, this->_value, this->_key);
this->_listener.inserted(core_id, this->_key, this->_value);
return mx::tasking::TaskResult::make_remove();
}
auto [right, key] = this->_tree->split(this->annotated_resource(), this->_key, this->_value);
if (annotated_node->parent() != nullptr)
{
auto *task = mx::tasking::runtime::new_task<InsertSeparatorTask<K, V, L>>(core_id, key, right, this->_tree,
this->_listener);
task->annotate(annotated_node->parent(), config::node_size() / 4U);
return mx::tasking::TaskResult::make_succeed_and_remove(task);
}
this->_tree->create_new_root(this->annotated_resource(), right, key);
this->_listener.inserted(core_id, this->_key, this->_value);
return mx::tasking::TaskResult::make_remove();
}
} // namespace db::index::blinktree

View File

@@ -0,0 +1,13 @@
#pragma once
namespace db::index::blinktree {
template <typename K, typename V> class Listener
{
public:
virtual void inserted(std::uint16_t core_id, K key, V value) = 0;
virtual void updated(std::uint16_t core_id, K key, V value) = 0;
virtual void removed(std::uint16_t core_id, K key) = 0;
virtual void found(std::uint16_t core_id, K key, V value) = 0;
virtual void missing(std::uint16_t core_id, K key) = 0;
};
} // namespace db::index::blinktree

View File

@@ -0,0 +1,54 @@
#pragma once
#include "b_link_tree.h"
#include "insert_separator_task.h"
#include "node.h"
#include "task.h"
#include <optional>
namespace db::index::blinktree {
template <typename K, typename V, class L> class LookupTask final : public Task<K, V, L>
{
public:
LookupTask(const K key, L &listener) noexcept : Task<K, V, L>(key, listener) {}
~LookupTask() override { this->_listener.found(_core_id, this->_key, _value); }
mx::tasking::TaskResult execute(std::uint16_t core_id, std::uint16_t channel_id) override;
private:
V _value;
std::uint16_t _core_id{0U};
};
template <typename K, typename V, typename L>
mx::tasking::TaskResult LookupTask<K, V, L>::execute(const std::uint16_t core_id, const std::uint16_t /*channel_id*/)
{
auto *annotated_node = this->annotated_resource().template get<Node<K, V>>();
// Is the node related to the key?
if (annotated_node->high_key() <= this->_key)
{
this->annotate(annotated_node->right_sibling(), config::node_size() / 4U);
return mx::tasking::TaskResult::make_succeed(this);
}
// If we are accessing an inner node, pick the next related child.
if (annotated_node->is_inner())
{
const auto child = annotated_node->child(this->_key);
this->annotate(child, config::node_size() / 4U);
return mx::tasking::TaskResult::make_succeed(this);
}
// We are accessing the correct leaf.
const auto index = annotated_node->index(this->_key);
if (annotated_node->leaf_key(index) == this->_key)
{
this->_value = annotated_node->value(index);
}
_core_id = core_id;
return mx::tasking::TaskResult::make_remove();
}
} // namespace db::index::blinktree

View File

@@ -0,0 +1,388 @@
#pragma once
#include "config.h"
#include <array>
#include <atomic>
#include <cstdint>
#include <cstring>
#include <mx/resource/resource.h>
#include <mx/resource/resource_interface.h>
#include <mx/tasking/runtime.h>
namespace db::index::blinktree {
template <typename K, typename V> class Node;
/**
* Node type.
*/
enum NodeType : std::uint8_t
{
Leaf = 1U << 0U,
Inner = 1U << 1U,
Branch = 1U << 2U
};
inline NodeType operator|(const NodeType a, const NodeType b) noexcept
{
return static_cast<NodeType>(static_cast<std::uint8_t>(a) | static_cast<std::uint8_t>(b));
}
/**
* Header for every node
*/
template <typename K, typename V> struct NodeHeader
{
static constexpr std::uint16_t node_size =
config::node_size() - sizeof(NodeHeader<K, V>) - sizeof(mx::resource::ResourceInterface);
// Type of the node.
const NodeType node_type;
// High key.
K high_key{std::numeric_limits<K>::max()};
// Link to the right sibling.
mx::resource::ptr right_sibling;
// Link to the parent. Alignment needed by some CPU architectures (e.g. arm) because of atomicity.
alignas(8) std::atomic<mx::resource::ptr> parent;
// Number of records in the node.
std::uint16_t size{0U};
[[maybe_unused]] NodeHeader(const NodeType node_type_, const mx::resource::ptr parent_) : node_type(node_type_)
{
this->parent.store(parent_);
}
~NodeHeader() = default;
#ifdef __GNUG__
};
#else
} __attribute__((packed));
#endif
/**
* Representation of an inner node.
*/
template <typename K, typename V> struct InnerNode
{
static constexpr std::uint16_t max_keys =
(NodeHeader<K, V>::node_size - sizeof(mx::resource::ptr)) / (sizeof(K) + sizeof(mx::resource::ptr));
static constexpr std::uint16_t max_separators = max_keys + 1;
// Memory for keys.
std::array<K, InnerNode::max_keys> keys;
// Memory for separators.
std::array<mx::resource::ptr, InnerNode::max_separators> separators;
};
/**
* Representation of a leaf node.
*/
template <typename K, typename V> struct LeafNode
{
static constexpr std::uint16_t max_items = NodeHeader<K, V>::node_size / (sizeof(K) + sizeof(V));
// Memory for keys.
std::array<K, LeafNode::max_items> keys;
// Memory for payloads.
std::array<V, LeafNode::max_items> values;
};
/**
* Abstract node representation.
*/
template <typename K, typename V> class Node final : public mx::resource::ResourceInterface
{
public:
constexpr Node(const NodeType node_type, const mx::resource::ptr parent) : _header(node_type, parent)
{
static_assert(sizeof(Node<K, V>) <= config::node_size());
}
~Node() override
{
if (is_inner())
{
for (auto i = 0U; i <= _header.size; ++i)
{
if (_inner_node.separators[i] != nullptr)
{
mx::tasking::runtime::delete_resource<Node<K, V>>(_inner_node.separators[i]);
}
}
}
}
void on_reclaim() override { this->~Node(); }
/**
* @return True, if this node is a leaf node.
*/
[[nodiscard]] bool is_leaf() const noexcept { return _header.node_type & NodeType::Leaf; }
/**
* @return True, if this node is an inner node.
*/
[[nodiscard]] bool is_inner() const noexcept { return _header.node_type & NodeType::Inner; }
/**
* @return True, if this node is an inner node and children are leaf nodes.
*/
[[nodiscard]] bool is_branch() const noexcept { return _header.node_type & NodeType::Branch; }
/**
* @return Number of records stored in the node.
*/
[[nodiscard]] std::uint16_t size() const noexcept { return _header.size; }
/**
* Updates the number of records stored in the node.
* @param size New number of records.
*/
void size(const std::uint16_t size) noexcept { _header.size = size; }
/**
* @return High key of the node.
*/
K high_key() const noexcept { return _header.high_key; }
/**
* Updates the high key.
* @param high_key New high key.
*/
[[maybe_unused]] void high_key(const K high_key) noexcept { _header.high_key = high_key; }
/**
* @return Pointer to the right sibling.
*/
[[nodiscard]] mx::resource::ptr right_sibling() const noexcept { return _header.right_sibling; }
/**
* Updates the right sibling.
* @param right_sibling Pointer to the new right sibling.
*/
[[maybe_unused]] void right_sibling(const mx::resource::ptr right_sibling) noexcept
{
_header.right_sibling = right_sibling;
}
/**
* @return Pointer to the parent node.
*/
[[nodiscard]] mx::resource::ptr parent() const noexcept { return _header.parent; }
/**
* Updates the parent node.
* @param parent Pointer to the new parent node.
*/
void parent(const mx::resource::ptr parent) noexcept { _header.parent = parent; }
/**
* Read the value at a given index.
* @param index Index.
* @return Value at the index.
*/
V value(const std::uint16_t index) const noexcept { return _leaf_node.values[index]; }
/**
* Update the value at a given index.
* @param index Index.
* @param value New value.
*/
void value(const std::uint16_t index, const V value) noexcept { _leaf_node.values[index] = value; }
/**
* Read the separator at a given index.
* @param index Index.
* @return Separator at the index.
*/
[[nodiscard]] mx::resource::ptr separator(const std::uint16_t index) const noexcept
{
return _inner_node.separators[index];
}
/**
* Update the separator for a given index.
* @param index Index.
* @param separator New separator for the index.
*/
void separator(const std::uint16_t index, const mx::resource::ptr separator) noexcept
{
_inner_node.separators[index] = separator;
}
/**
* Read the key from the leaf node.
* @param index Index.
* @return Key at the index.
*/
K leaf_key(const std::uint16_t index) const noexcept { return _leaf_node.keys[index]; }
/**
* Read the key from the inner node.
* @param index Index.
* @return Key at the index.
*/
K inner_key(const std::uint16_t index) const noexcept { return _inner_node.keys[index]; }
/**
* @return True, if the node can not store further records.
*/
[[nodiscard]] bool full() const noexcept
{
const auto max_size = is_leaf() ? LeafNode<K, V>::max_items : InnerNode<K, V>::max_keys;
return _header.size >= max_size;
}
/**
* Calculates the index for a given key.
* @param key Key.
* @return Index for the key.
*/
std::uint16_t index(K key) const noexcept;
/**
* Calculates the child for a given key using binary search.
* @param key Key.
* @return Child for the key.
*/
mx::resource::ptr child(K key) const noexcept;
/**
* Inserts a record into an inner node.
* @param index Index.
* @param separator Separator.
* @param key Key.
*/
void insert(std::uint16_t index, mx::resource::ptr separator, K key);
/**
* Inserts a record into a leaf node.
* @param index Index.
* @param value Payload.
* @param key Key.
*/
void insert(std::uint16_t index, V value, K key);
/**
* Moves a range of records into another node.
* @param destination Other node.
* @param from_index Start index.
* @param count Number of records to move.
*/
void move(mx::resource::ptr destination, std::uint16_t from_index, std::uint16_t count);
/**
* Searches a separator within an inner node.
* @param separator Separator to search.
* @return True, if the separator was found.
*/
[[nodiscard]] bool contains(mx::resource::ptr separator) const noexcept;
private:
NodeHeader<K, V> _header;
union {
InnerNode<K, V> _inner_node;
LeafNode<K, V> _leaf_node;
};
};
template <typename K, typename V> std::uint16_t Node<K, V>::index(const K key) const noexcept
{
const auto keys = this->is_leaf() ? this->_leaf_node.keys.cbegin() : this->_inner_node.keys.cbegin();
const auto iterator = std::lower_bound(keys, keys + this->size(), key);
return std::distance(keys, iterator);
}
template <typename K, typename V> mx::resource::ptr Node<K, V>::child(const K key) const noexcept
{
std::int16_t low = 0;
std::int16_t high = size() - 1;
while (low <= high)
{
const auto mid = (low + high) >> 1U; // Will work for size() - 1 < max(std::int32_t)/2
if (this->inner_key(mid) <= key)
{
low = mid + 1;
}
else
{
high = mid - 1;
}
}
return this->_inner_node.separators[high + 1U];
}
template <typename K, typename V>
void Node<K, V>::insert(const std::uint16_t index, const mx::resource::ptr separator, const K key)
{
if (index < this->size())
{
const auto offset = this->size() - index;
std::memmove(static_cast<void *>(&this->_inner_node.keys[index + 1]),
static_cast<void *>(&this->_inner_node.keys[index]), offset * sizeof(K));
std::memmove(static_cast<void *>(&this->_inner_node.separators[index + 2]),
static_cast<void *>(&this->_inner_node.separators[index + 1]), offset * sizeof(mx::resource::ptr));
}
this->_inner_node.keys[index] = key;
this->_inner_node.separators[index + 1] = separator;
++this->_header.size;
}
template <typename K, typename V> void Node<K, V>::insert(const std::uint16_t index, const V value, const K key)
{
if (index < this->size())
{
const auto offset = this->size() - index;
std::memmove(static_cast<void *>(&this->_leaf_node.keys[index + 1]),
static_cast<void *>(&this->_leaf_node.keys[index]), offset * sizeof(K));
std::memmove(static_cast<void *>(&this->_leaf_node.values[index + 1]),
static_cast<void *>(&this->_leaf_node.values[index]), offset * sizeof(V));
}
this->_leaf_node.keys[index] = key;
this->_leaf_node.values[index] = value;
++this->_header.size;
}
template <typename K, typename V>
void Node<K, V>::move(const mx::resource::ptr destination, const std::uint16_t from_index, const std::uint16_t count)
{
auto *node = mx::resource::ptr_cast<Node<K, V>>(destination);
if (this->is_leaf())
{
std::memcpy(static_cast<void *>(&node->_leaf_node.keys[0]),
static_cast<void *>(&this->_leaf_node.keys[from_index]), count * sizeof(K));
std::memcpy(static_cast<void *>(&node->_leaf_node.values[0]),
static_cast<void *>(&this->_leaf_node.values[from_index]), count * sizeof(V));
}
else
{
std::memcpy(static_cast<void *>(&node->_inner_node.keys[0]),
static_cast<void *>(&this->_inner_node.keys[from_index]), count * sizeof(K));
std::memcpy(static_cast<void *>(&node->_inner_node.separators[1]),
static_cast<void *>(&this->_inner_node.separators[from_index + 1]),
count * sizeof(mx::resource::ptr));
}
}
template <typename K, typename V> bool Node<K, V>::contains(const mx::resource::ptr separator) const noexcept
{
for (auto i = 0U; i <= this->size(); ++i)
{
if (this->_inner_node.separators[i] == separator)
{
return true;
}
}
return false;
}
} // namespace db::index::blinktree

View File

@@ -0,0 +1,185 @@
#pragma once
#include <ostream>
#include "node.h"
namespace db::index::blinktree {
/**
* Validates tree nodes and checks consistency.
*/
template <typename K, typename V> class NodeConsistencyChecker
{
public:
/**
* Validates the node and prints errors to the given stream.
* @param node Node to validate.
* @param stream Stream to print errors.
*/
static void check_and_print_errors(Node<K, V> *node, std::ostream &stream);
private:
static void check_high_key_valid(Node<K, V> *node, std::ostream &stream);
static void check_key_order_valid(Node<K, V> *node, std::ostream &stream);
static void check_no_null_separator(Node<K, V> *node, std::ostream &stream);
static void check_children_order_valid(Node<K, V> *node, std::ostream &stream);
static void check_level_valid(Node<K, V> *node, std::ostream &stream);
static void check_and_print_parent(Node<K, V> *node, std::ostream &stream);
};
template <typename K, typename V>
void NodeConsistencyChecker<K, V>::check_and_print_errors(Node<K, V> *node, std::ostream &stream)
{
check_high_key_valid(node, stream);
check_key_order_valid(node, stream);
check_no_null_separator(node, stream);
check_children_order_valid(node, stream);
check_level_valid(node, stream);
// check_and_print_parent(node, stream);
}
template <typename K, typename V>
void NodeConsistencyChecker<K, V>::check_high_key_valid(Node<K, V> *node, std::ostream &stream)
{
if (node->is_leaf())
{
if (node->leaf_key(node->size() - 1) >= node->high_key())
{
stream << "[HighKey ] Leaf " << node << ": Key[" << node->size() - 1
<< "] (=" << node->leaf_key(node->size() - 1) << ") >= " << node->high_key() << std::endl;
}
}
else
{
if (node->inner_key(node->size() - 1) >= node->high_key())
{
stream << "[HighKey ] Inner " << node << ": Key[" << node->size() - 1
<< "] (=" << node->inner_key(node->size() - 1) << ") >= " << node->high_key() << std::endl;
}
}
}
template <typename K, typename V>
void NodeConsistencyChecker<K, V>::check_key_order_valid(Node<K, V> *node, std::ostream &stream)
{
for (auto index = 1U; index < node->size(); index++)
{
if (node->is_leaf())
{
if (node->leaf_key(index - 1U) >= node->leaf_key(index))
{
stream << "[KeyOrder ] Leaf " << node << ": Key[" << index - 1U << "] (=" << node->leaf_key(index - 1U)
<< ") >= Key[" << index << "] (=" << node->leaf_key(index) << ")" << std::endl;
}
}
else
{
if (node->inner_key(index - 1) >= node->inner_key(index))
{
stream << "[KeyOrder ] Inner " << node << ": Key[" << index - 1 << "] (=" << node->inner_key(index - 1)
<< ") >= Key[" << index << "] (=" << node->inner_key(index) << ")" << std::endl;
}
}
}
}
template <typename K, typename V>
void NodeConsistencyChecker<K, V>::check_no_null_separator(Node<K, V> *node, std::ostream &stream)
{
if (node->is_inner())
{
for (auto index = 0U; index <= node->size(); index++)
{
if (node->separator(index) == nullptr)
{
stream << "[Separator ] Inner " << node << ": Separator[" << index << "] is empty." << std::endl;
}
}
}
}
template <typename K, typename V>
void NodeConsistencyChecker<K, V>::check_children_order_valid(Node<K, V> *node, std::ostream &stream)
{
if (node->is_inner())
{
for (auto index = 0U; index < node->size(); index++)
{
auto child = node->separator(index).template get<Node<K, V>>();
const auto child_last_key =
child->is_leaf() ? child->leaf_key(child->size() - 1U) : child->inner_key(child->size() - 1U);
if (child_last_key >= node->inner_key(index))
{
stream << "[ChildOrder] Inner " << node << ": Key[" << index << "] (=" << node->inner_key(index)
<< ") <= Separator[" << index << "].Key[" << child->size() - 1U << "] (=" << child_last_key
<< ")" << std::endl;
}
}
}
}
template <typename K, typename V>
void NodeConsistencyChecker<K, V>::check_level_valid(Node<K, V> *node, std::ostream &stream)
{
if (node->right_sibling() && node->is_leaf() != node->right_sibling().template get<Node<K, V>>()->is_leaf())
{
stream << "[Level ] Leaf " << node << ": Is marked as leaf, but right sibling is not" << std::endl;
}
if (node->is_inner())
{
for (auto index = 0U; index < node->size(); index++)
{
if (node->separator(index).template get<Node<K, V>>()->is_leaf() !=
node->separator(index + 1U).template get<Node<K, V>>()->is_leaf())
{
stream << "[Level ] Inner " << node << ": Separator[" << index
<< "] is marked as is_leaf = " << node->separator(index).template get<Node<K, V>>()->is_leaf()
<< " but Separator[" << index + 1U << "] is not" << std::endl;
}
}
}
}
template <typename K, typename V>
void NodeConsistencyChecker<K, V>::check_and_print_parent(Node<K, V> *node, std::ostream &stream)
{
const auto parent = node->parent();
if (parent)
{
if (parent.template get<Node<K, V>>()->contains(mx::resource::ptr(node)) == false)
{
stream << "Wrong parent(1) for node " << node << " (leaf: " << node->is_leaf() << ")" << std::endl;
}
else
{
auto index = 0U;
for (; index <= parent.template get<Node<K, V>>()->size(); index++)
{
if (parent.template get<Node<K, V>>()->separator(index).template get<Node<K, V>>() == node)
{
break;
}
}
if (index < parent.template get<Node<K, V>>()->size())
{
const auto key =
node->is_leaf() ? node->leaf_key(node->size() - 1U) : node->inner_key(node->size() - 1);
if ((key < parent.template get<Node<K, V>>()->inner_key(index)) == false)
{
stream << "Wrong parent(2) for node " << node << " (leaf: " << node->is_leaf() << ")" << std::endl;
}
}
else
{
const auto key = node->is_leaf() ? node->leaf_key(0U) : node->inner_key(0U);
if ((key >= parent.template get<Node<K, V>>()->inner_key(index - 1U)) == false)
{
stream << "Wrong parent(3) for node " << node << " (leaf: " << node->is_leaf() << ")" << std::endl;
}
}
}
}
}
} // namespace db::index::blinktree

View File

@@ -0,0 +1,44 @@
#pragma once
#include "node.h"
#include <mx/resource/resource.h>
namespace db::index::blinktree {
/**
* Iterator for iterating over nodes of a tree.
*/
template <typename K, typename V> class NodeIterator
{
public:
NodeIterator() = default;
explicit NodeIterator(Node<K, V> *root) : _current_node(root), _first_node_in_level(root) {}
~NodeIterator() = default;
Node<K, V> *&operator*() { return _current_node; }
NodeIterator<K, V> &operator++()
{
if (_current_node->right_sibling())
{
_current_node = _current_node->right_sibling().template get<Node<K, V>>();
}
else if (_current_node->is_inner())
{
_first_node_in_level = _first_node_in_level->separator(0).template get<Node<K, V>>();
_current_node = _first_node_in_level;
}
else
{
_current_node = nullptr;
}
return *this;
}
bool operator!=(const NodeIterator<K, V> &other) const { return _current_node != other._current_node; }
private:
Node<K, V> *_current_node = nullptr;
Node<K, V> *_first_node_in_level = nullptr;
};
} // namespace db::index::blinktree

View File

@@ -0,0 +1,72 @@
#pragma once
#include "config.h"
#include "node.h"
#include <cstdint>
#include <ostream>
namespace db::index::blinktree {
/**
* Collects and prints statistics of a set of nodes.
*/
template <typename K, typename V> class NodeStatistics
{
public:
explicit NodeStatistics(const std::uint16_t height) : _tree_height(height) {}
~NodeStatistics() = default;
NodeStatistics &operator+=(Node<K, V> *node)
{
this->_count_inner_nodes += node->is_inner();
this->_count_leaf_nodes += node->is_leaf();
if (node->is_leaf())
{
this->_count_leaf_node_keys += node->size();
}
else
{
this->_count_inner_node_keys += node->size();
}
return *this;
}
friend std::ostream &operator<<(std::ostream &stream, const NodeStatistics<K, V> &tree_statistics)
{
const auto count_nodes = tree_statistics._count_leaf_nodes + tree_statistics._count_inner_nodes;
const auto size_in_bytes = count_nodes * config::node_size();
stream << "Statistics of the Tree: \n"
<< " Node size: " << sizeof(Node<K, V>) << " B\n"
<< " Header size: " << sizeof(NodeHeader<K, V>) << " B\n"
<< " Inner keys: " << InnerNode<K, V>::max_keys << " (" << sizeof(K) * InnerNode<K, V>::max_keys
<< " B)\n"
<< " Leaf keys: " << LeafNode<K, V>::max_items << " (" << sizeof(K) * LeafNode<K, V>::max_items
<< " B)\n"
<< " Tree height: " << tree_statistics._tree_height << "\n"
<< " Inner nodes: " << tree_statistics._count_inner_nodes << "\n"
<< " Inner entries: " << tree_statistics._count_inner_node_keys << "\n"
<< " Leaf nodes: " << tree_statistics._count_leaf_nodes << "\n"
<< " Leaf entries: " << tree_statistics._count_leaf_node_keys << "\n"
<< " Tree size: " << size_in_bytes / 1024.0 / 1024.0 << " MB";
return stream;
}
private:
// Number of inner nodes.
std::uint64_t _count_inner_nodes = 0U;
// Number of leaf nodes.
std::uint64_t _count_leaf_nodes = 0U;
// Number of records located in inner nodes.
std::uint64_t _count_inner_node_keys = 0U;
// Number of records located in leaf nodes.
std::uint64_t _count_leaf_node_keys = 0U;
// Hight of the tree.
const std::uint16_t _tree_height;
};
} // namespace db::index::blinktree

View File

@@ -0,0 +1,16 @@
#pragma once
#include <mx/tasking/task.h>
namespace db::index::blinktree {
template <typename K, typename V, class L> class Task : public mx::tasking::TaskInterface
{
public:
constexpr Task(const K key, L &listener) : _listener(listener), _key(key) {}
~Task() override = default;
protected:
L &_listener;
K _key;
};
} // namespace db::index::blinktree

View File

@@ -0,0 +1,69 @@
#pragma once
#include "b_link_tree.h"
#include "insert_separator_task.h"
#include "node.h"
#include "task.h"
#include <iostream>
namespace db::index::blinktree {
template <typename K, typename V, class L> class UpdateTask final : public Task<K, V, L>
{
public:
constexpr UpdateTask(const K key, const V value, L &listener) noexcept : Task<K, V, L>(key, listener), _value(value)
{
}
~UpdateTask() override = default;
mx::tasking::TaskResult execute(std::uint16_t core_id, std::uint16_t channel_id) override;
private:
const V _value;
};
template <typename K, typename V, typename L>
mx::tasking::TaskResult UpdateTask<K, V, L>::execute(const std::uint16_t core_id, const std::uint16_t /*channel_id*/)
{
auto *node = this->annotated_resource().template get<Node<K, V>>();
// Is the node related to the key?
if (node->high_key() <= this->_key)
{
this->annotate(node->right_sibling(), config::node_size() / 4U);
return mx::tasking::TaskResult::make_succeed(this);
}
// If we are accessing an inner node, pick the next related child.
if (node->is_inner())
{
const auto child = node->child(this->_key);
this->annotate(child, config::node_size() / 4U);
this->is_readonly(!node->is_branch());
return mx::tasking::TaskResult::make_succeed(this);
}
// If the task is still reading, but this is a leaf,
// spawn again as writer.
if (node->is_leaf() && this->is_readonly())
{
this->is_readonly(false);
return mx::tasking::TaskResult::make_succeed(this);
}
// We are accessing the correct leaf.
const auto index = node->index(this->_key);
const auto key = node->leaf_key(index);
if (key == this->_key)
{
node->value(index, this->_value);
this->_listener.updated(core_id, key, this->_value);
}
else
{
this->_listener.missing(core_id, key);
}
return mx::tasking::TaskResult::make_remove();
}
} // namespace db::index::blinktree

42
src/mx/README.md Normal file
View File

@@ -0,0 +1,42 @@
# How to use `MxTasking`
## Build a simple _Hello World_ task
Every task inherits from `mx::tasking::TaskInterface` and implements the `execute` method, which is called when the task gets executed by the runtime.
#include <mx/tasking/task.h>
#include <iostream>
class HelloWorldTask : public mx::tasking::TaskInterface
{
public:
HelloWorldTask() = default;
virtual ~HelloWorldTask() = default;
virtual TaskInterface *execute(const std::uint16_t, const std::uint16_t)
{
std::cout << "Hello world from MxTasking!" << std::endl;
return nullptr;
}
};
## Run the _Hello World_ task
#include <mx/tasking/runtime.h>
int main()
{
// Define which cores will be used (1 core here).
auto cores = mx::util::core_set::build(1);
// Create an instance of the task with the current core as first
// parameter (we assume that we start at core 0).
auto *task = mx::tasking::runtime::new_task<HelloWorldTask>(0);
// Create a runtime for the given cores.
mx::tasking::runtime_guard runtime { cores };
// Schedule the task.
mx::tasking::runtime::spawn(*task);
// Will print: "Hello world from MxTasking!"
return 0;
}

View File

@@ -0,0 +1,39 @@
#pragma once
#include <cstdint>
namespace mx::memory {
/**
* Helper for setting the correct size on aligned allocation:
* The allocation size has to be a multiple of the alignment.
*/
class alignment_helper
{
public:
template <typename T> static constexpr T next_multiple(const T value, const T base)
{
if (value > base)
{
const auto mod = value % base;
if (mod == 0U)
{
return value;
}
return value + base - mod;
}
return base;
}
static constexpr bool is_power_of_two(const std::uint64_t value)
{
return ((value != 0U) && ((value & (value - 1U)) == 0U));
}
static constexpr std::uint64_t next_power_of_two(const std::uint64_t value)
{
return is_power_of_two(value) ? value : 1ULL << (sizeof(std::uint64_t) * 8 - __builtin_clzll(value));
}
};
} // namespace mx::memory

29
src/mx/memory/config.h Normal file
View File

@@ -0,0 +1,29 @@
#pragma once
#include <chrono>
namespace mx::memory {
class config
{
public:
/**
* @return Number of maximal provided NUMA regions.
*/
static constexpr auto max_numa_nodes() { return 2U; }
/**
* Decreases the use of memory of external NUMA regions within the allocator.
* @return True, when memory usage of external NUMA regions should be less.
*/
static constexpr auto low_priority_for_external_numa() { return false; }
/**
* @return Interval of each epoch, if memory reclamation is used.
*/
static constexpr auto epoch_interval() { return std::chrono::milliseconds(50U); }
/**
* @return True, if garbage is removed local.
*/
static constexpr auto local_garbage_collection() { return false; }
};
} // namespace mx::memory

View File

@@ -0,0 +1,326 @@
#include "dynamic_size_allocator.h"
#include "global_heap.h"
#include <algorithm>
#include <cassert>
#include <mx/system/topology.h>
using namespace mx::memory::dynamic;
AllocationBlock::AllocationBlock(const std::uint32_t id, const std::uint8_t numa_node_id, const std::size_t size)
: _id(id), _numa_node_id(numa_node_id), _size(size), _available_size(size)
{
this->_allocated_block = GlobalHeap::allocate(numa_node_id, size);
this->_free_elements.emplace_back(FreeHeader{reinterpret_cast<std::uintptr_t>(this->_allocated_block), size});
}
AllocationBlock::AllocationBlock(AllocationBlock &&other) noexcept
: _id(other._id), _numa_node_id(other._numa_node_id), _size(other._size), _allocated_block(other._allocated_block),
_free_elements(std::move(other._free_elements)), _available_size(other._available_size)
{
other._allocated_block = nullptr;
}
AllocationBlock &AllocationBlock::operator=(AllocationBlock &&other) noexcept
{
this->_id = other._id;
this->_numa_node_id = other._numa_node_id;
this->_size = other._size;
this->_allocated_block = std::exchange(other._allocated_block, nullptr);
this->_free_elements = std::move(other._free_elements);
this->_available_size = other._available_size;
return *this;
}
AllocationBlock::~AllocationBlock()
{
if (this->_allocated_block != nullptr)
{
GlobalHeap::free(this->_allocated_block, this->_size);
}
}
void *AllocationBlock::allocate(const std::size_t alignment, const std::size_t size) noexcept
{
assert(alignment && (!(alignment & (alignment - 1))) && "Alignment must be > 0 and power of 2");
this->_lock.lock();
if (this->_available_size < size)
{
this->_lock.unlock();
return nullptr;
}
auto [free_element_iterator, aligned_size_including_header] = this->find_block(alignment, size);
if (free_element_iterator == this->_free_elements.end())
{
this->_lock.unlock();
return nullptr;
}
const auto free_block_start = free_element_iterator->start();
const auto free_block_end = free_block_start + free_element_iterator->size();
const auto remaining_size = free_element_iterator->size() - aligned_size_including_header;
std::uint16_t size_before_header{0U};
if (remaining_size >= 256U)
{
const auto index = std::distance(this->_free_elements.begin(), free_element_iterator);
this->_free_elements[index].contract(aligned_size_including_header);
this->_available_size -= aligned_size_including_header;
}
else
{
size_before_header = remaining_size;
this->_free_elements.erase(free_element_iterator);
this->_available_size -= free_element_iterator->size();
}
this->_lock.unlock();
const auto allocation_header_address = free_block_end - aligned_size_including_header;
new (reinterpret_cast<void *>(allocation_header_address)) AllocatedHeader(
aligned_size_including_header - sizeof(AllocatedHeader), size_before_header, this->_numa_node_id, this->_id);
assert((allocation_header_address + sizeof(AllocatedHeader)) % alignment == 0 && "Not aligned");
return reinterpret_cast<void *>(allocation_header_address + sizeof(AllocatedHeader));
}
void AllocationBlock::free(AllocatedHeader *allocation_header) noexcept
{
const auto allocated_size = allocation_header->size;
const auto unused_size_before_header = allocation_header->unused_size_before_header;
const auto block_address = reinterpret_cast<std::uintptr_t>(allocation_header) - unused_size_before_header;
const auto size = allocated_size + unused_size_before_header + sizeof(AllocatedHeader);
const auto free_element = FreeHeader{block_address, size};
this->_lock.lock();
if (this->_free_elements.empty())
{
this->_free_elements.push_back(free_element);
}
else
{
const auto lower_bound_iterator =
std::lower_bound(this->_free_elements.begin(), this->_free_elements.end(), free_element);
const auto index = std::distance(this->_free_elements.begin(), lower_bound_iterator);
assert(index >= 0 && "Index is negative");
const auto real_index = std::size_t(index);
// Try merge to the right.
if (real_index < this->_free_elements.size() && free_element.borders(this->_free_elements[real_index]))
{
this->_free_elements[real_index].merge(free_element);
// Okay, we inserted the new free element as merge, we do not insert it "real".
// Try to merge the expanded right with the left.
if (real_index > 0U && this->_free_elements[real_index - 1U].borders(this->_free_elements[real_index]))
{
this->_free_elements[real_index - 1].merge(this->_free_elements[real_index]);
this->_free_elements.erase(this->_free_elements.begin() + real_index);
}
}
else if (real_index > 0U && this->_free_elements[real_index - 1U].borders(free_element))
{
// In this case, we could not merge with the right, but can we merge
// to the left? By this, we could save up the real insert.
this->_free_elements[real_index - 1U].merge(free_element);
}
else
{
// We could not merge anything. Just insert.
this->_free_elements.insert(this->_free_elements.begin() + real_index, free_element);
}
}
this->_available_size += free_element.size();
this->_lock.unlock();
}
std::pair<std::vector<FreeHeader>::iterator, std::size_t> AllocationBlock::find_block(const std::size_t alignment,
const std::size_t size) noexcept
{
/**
* Check each block of the free list for enough space to include the wanted space.
* If enough, check the alignment (starting at the end).
*
* +----------------------------+
* | 2000byte |
* +----------------------------+
* => wanted: 700byte
* => align border -> 1300 is not aligned, expand to 720byte -> 1280 is aligned
* +----------------------------+
* | 1280byte | 720byte |
* +----------------------------+
*
*/
const auto size_including_header = size + sizeof(AllocatedHeader);
for (auto iterator = this->_free_elements.begin(); iterator != this->_free_elements.end(); iterator++)
{
const auto &free_element = *iterator;
if (free_element >= size_including_header)
{
const auto start = free_element.start();
// The free block ends here.
const auto end = start + free_element.size();
// This is where we would start the memory block on allocation
// But this may be not aligned.
const auto possible_block_begin = end - size;
// This is the size we need to start the block aligned.
const auto aligned_size = size + (possible_block_begin & (alignment - 1U));
// This is the size we need aligned and for header.
const auto aligned_size_including_header = aligned_size + sizeof(AllocatedHeader);
if (free_element >= aligned_size_including_header)
{
// aligned_size_including_header
return std::make_pair(iterator, aligned_size_including_header);
}
}
}
return std::make_pair(this->_free_elements.end(), 0U);
}
Allocator::Allocator()
{
this->initialize_empty();
}
void *Allocator::allocate(const std::uint8_t numa_node_id, const std::size_t alignment, const std::size_t size) noexcept
{
auto &allocation_blocks = this->_numa_allocation_blocks[numa_node_id];
auto *memory = allocation_blocks.back().allocate(alignment, size);
if (memory == nullptr)
{
// This will be allocated default...
constexpr auto default_alloc_size = 1UL << 28U;
// ... but if the requested size is higher, allocate more.
const auto size_to_alloc = std::max(default_alloc_size, alignment_helper::next_multiple(size, 64UL));
// Try to allocate until allocation was successful.
// It is possible, that another core tries to allocate at the
// same time, therefore we capture the allocation flag (one per region)
auto &flag = this->_numa_allocation_flags[numa_node_id].value();
while (memory == nullptr)
{
allocate_new_block(numa_node_id, size_to_alloc, allocation_blocks, flag);
memory = allocation_blocks.back().allocate(alignment, size);
}
}
return memory;
}
void Allocator::allocate_new_block(const std::uint8_t numa_node_id, const std::size_t size,
std::vector<AllocationBlock> &blocks, std::atomic<bool> &flag)
{
// Acquire the allocation flag to ensure only one thread to allocate.
auto expected = false;
const auto can_allocate = flag.compare_exchange_strong(expected, true);
if (can_allocate)
{
// If that was this thread go for it...
const auto next_id = this->_next_allocation_id[numa_node_id].value().fetch_add(1U, std::memory_order_acq_rel);
blocks.emplace_back(AllocationBlock{next_id, numa_node_id, size});
// .. but release the allocation flag afterward.
flag.store(false);
}
else
{
// If that was another thread, wait until he finished.
while (flag.load())
{
system::builtin::pause();
}
}
}
void Allocator::free(void *pointer) noexcept
{
// Every allocated memory belongs to one allocation block.
// The reason is, that we can only return full blocks to
// the global heap that is managed by the operating system.
const auto address = reinterpret_cast<std::uintptr_t>(pointer);
// Access the header to identify the allocation block.
const auto header_address = address - sizeof(AllocatedHeader);
auto *allocation_header = reinterpret_cast<AllocatedHeader *>(header_address);
// Check all blocks to find the matching one.
for (auto &block : this->_numa_allocation_blocks[allocation_header->numa_node_id])
{
if (allocation_header->allocation_block_id == block.id())
{
block.free(allocation_header);
return;
}
}
}
void Allocator::defragment() noexcept
{
// Remove all blocks that are unused to free as much memory as possible.
for (auto i = 0U; i <= system::topology::max_node_id(); ++i)
{
auto &numa_blocks = this->_numa_allocation_blocks[i];
numa_blocks.erase(
std::remove_if(numa_blocks.begin(), numa_blocks.end(), [](const auto &block) { return block.is_free(); }),
numa_blocks.end());
}
// If all memory was released, acquire new.
this->initialize_empty();
}
void Allocator::initialize_empty()
{
// For performance reasons: Each list must contain at least
// one block. This way, we do not have to check every time.
for (auto i = 0U; i <= system::topology::max_node_id(); ++i)
{
auto &blocks = this->_numa_allocation_blocks[i];
if (blocks.empty())
{
const auto next_id = this->_next_allocation_id[i].value().fetch_add(1U, std::memory_order_relaxed);
blocks.emplace_back(AllocationBlock{next_id, std::uint8_t(i), 4096U * 4096U});
}
}
}
bool Allocator::is_free() const noexcept
{
for (auto i = 0U; i <= system::topology::max_node_id(); ++i)
{
const auto &numa_blocks = this->_numa_allocation_blocks[i];
const auto iterator = std::find_if(numa_blocks.cbegin(), numa_blocks.cend(), [](const auto &allocation_block) {
return allocation_block.is_free() == false;
});
if (iterator != numa_blocks.cend())
{
return false;
}
}
return true;
}
void Allocator::release_allocated_memory() noexcept
{
for (auto i = 0U; i <= system::topology::max_node_id(); ++i)
{
this->_numa_allocation_blocks[i].clear();
this->_next_allocation_id[i].value().store(0U);
}
}

View File

@@ -0,0 +1,185 @@
#pragma once
#include "config.h"
#include <array>
#include <cassert>
#include <cstdint>
#include <mx/synchronization/spinlock.h>
#include <mx/util/aligned_t.h>
#include <utility>
#include <vector>
namespace mx::memory::dynamic {
/**
* Represents free space within an allocation block.
* Holds the start and the size of a free object.
*/
class FreeHeader
{
public:
constexpr FreeHeader(const std::uintptr_t start, const std::size_t size) noexcept : _start(start), _size(size) {}
constexpr FreeHeader(const FreeHeader &other) noexcept = default;
~FreeHeader() noexcept = default;
void contract(const std::size_t size) noexcept { _size -= size; }
[[nodiscard]] std::uintptr_t start() const noexcept { return _start; }
[[nodiscard]] std::uintptr_t size() const noexcept { return _size; }
bool operator<(const FreeHeader &other) const noexcept { return _start < other._start; }
bool operator>=(const std::size_t size) const noexcept { return _size >= size; }
[[nodiscard]] bool borders(const FreeHeader &other) const noexcept { return (_start + _size) == other._start; }
void merge(const FreeHeader &other) noexcept
{
if (other._start < _start)
{
assert(other.borders(*this) && "Can not merge: Elements are not next to each other");
_start = other._start;
_size += other._size;
}
else
{
assert(borders(other) && "Can not merge: Elements are not next to each other");
_size += other._size;
}
}
private:
std::uintptr_t _start;
std::size_t _size;
};
/**
* Header in front of allocated memory, storing the
* size, the size which is unused because of alignment,
* the ID of the NUMA node the memory is allocated on,
* and the source allocation block of this memory.
*/
struct AllocatedHeader
{
constexpr AllocatedHeader(const std::size_t size_, const std::uint16_t unused_size_before_header_,
const std::uint8_t numa_node_id_, const std::uint32_t allocation_block_id_) noexcept
: size(size_), unused_size_before_header(unused_size_before_header_), numa_node_id(numa_node_id_),
allocation_block_id(allocation_block_id_)
{
}
const std::size_t size;
const std::uint16_t unused_size_before_header;
const std::uint8_t numa_node_id;
const std::uint32_t allocation_block_id;
};
/**
* Set of on or more free tiles, that can be allocated.
*/
class AllocationBlock
{
public:
AllocationBlock(std::uint32_t id, std::uint8_t numa_node_id, std::size_t size);
AllocationBlock(const AllocationBlock &other) = delete;
AllocationBlock(AllocationBlock &&other) noexcept;
AllocationBlock &operator=(AllocationBlock &&other) noexcept;
~AllocationBlock();
/**
* Allocates memory from the allocation block.
*
* @param alignment Requested alignment.
* @param size Requested size.
* @return Pointer to the allocated memory.
*/
void *allocate(std::size_t alignment, std::size_t size) noexcept;
/**
* Frees memory.
*
* @param allocation_header Pointer to the header of the freed memory.
*/
void free(AllocatedHeader *allocation_header) noexcept;
/**
* @return Unique number of this allocation block.
*/
[[nodiscard]] std::uint32_t id() const noexcept { return _id; }
/**
* @return True, if the full block is free.
*/
[[nodiscard]] bool is_free() const noexcept
{
return _free_elements.empty() || (_free_elements.size() == 1 && _free_elements[0].size() == _size);
}
private:
alignas(64) std::uint32_t _id;
std::uint8_t _numa_node_id;
std::size_t _size;
void *_allocated_block;
std::vector<FreeHeader> _free_elements;
alignas(64) std::size_t _available_size;
synchronization::Spinlock _lock;
std::pair<std::vector<FreeHeader>::iterator, std::size_t> find_block(std::size_t alignment,
std::size_t size) noexcept;
};
/**
* Allocator which holds a set of allocation blocks separated
* for each numa node region.
*/
class Allocator
{
public:
Allocator();
~Allocator() = default;
void *allocate(std::uint8_t numa_node_id, std::size_t alignment, std::size_t size) noexcept;
void free(void *pointer) noexcept;
/**
* Frees unused allocation blocks.
*/
void defragment() noexcept;
/**
* Releases all allocated memory.
*/
void release_allocated_memory() noexcept;
/**
* Adds minimal memory to all numa node regions.
*/
void initialize_empty();
/**
* @return True, if all blocks of all numa regions are free.
*/
[[nodiscard]] bool is_free() const noexcept;
private:
// Allocation blocks per numa node region.
std::array<std::vector<AllocationBlock>, config::max_numa_nodes()> _numa_allocation_blocks;
// Allocation flags, used for synchronization when allocating, per numa node region.
std::array<util::aligned_t<std::atomic<bool>>, config::max_numa_nodes()> _numa_allocation_flags;
// Sequence for block allocation per numa node region.
std::array<util::aligned_t<std::atomic_uint32_t>, config::max_numa_nodes()> _next_allocation_id;
/**
* Allocates (thread-safe) a block of fresh memory
* @param numa_node_id
* @param size
* @param blocks
* @param flag
*/
void allocate_new_block(std::uint8_t numa_node_id, std::size_t size, std::vector<AllocationBlock> &blocks,
std::atomic<bool> &flag);
};
} // namespace mx::memory::dynamic

View File

@@ -0,0 +1,356 @@
#pragma once
#include "alignment_helper.h"
#include "config.h"
#include "global_heap.h"
#include "task_allocator_interface.h"
#include <array>
#include <atomic>
#include <cstdint>
#include <cstring>
#include <memory>
#include <mx/synchronization/spinlock.h>
#include <mx/system/cache.h>
#include <mx/system/topology.h>
#include <mx/tasking/config.h>
#include <mx/util/core_set.h>
#include <unordered_map>
#include <vector>
namespace mx::memory::fixed {
/**
* Represents a free memory object.
*/
class FreeHeader
{
public:
constexpr FreeHeader() noexcept = default;
~FreeHeader() noexcept = default;
[[nodiscard]] FreeHeader *next() const noexcept { return _next; }
void next(FreeHeader *next) noexcept { _next = next; }
void numa_node_id(const std::uint8_t numa_node_id) noexcept { _numa_node_id = numa_node_id; }
[[nodiscard]] std::uint8_t numa_node_id() const noexcept { return _numa_node_id; }
private:
FreeHeader *_next = nullptr;
std::uint8_t _numa_node_id = 0U;
};
/**
* The Chunk holds a fixed size of memory.
*/
class Chunk
{
public:
Chunk() noexcept = default;
explicit Chunk(void *memory) noexcept : _memory(memory) {}
~Chunk() noexcept = default;
static constexpr auto size() { return 4096 * 4096; /* 16mb */ }
explicit operator void *() const noexcept { return _memory; }
explicit operator std::uintptr_t() const noexcept { return reinterpret_cast<std::uintptr_t>(_memory); }
explicit operator bool() const noexcept { return _memory != nullptr; }
private:
void *_memory = nullptr;
};
/**
* The ProcessorHeap holds memory for a single socket.
* All cores sitting on this socket can allocate memory.
* Internal, the ProcessorHeap bufferes allocated memory
* to minimize access to the global heap.
*/
class ProcessorHeap
{
public:
explicit ProcessorHeap(const std::uint8_t numa_node_id) noexcept : _numa_node_id(numa_node_id)
{
_allocated_chunks.reserve(1024);
fill_buffer<true>();
}
~ProcessorHeap() noexcept
{
for (const auto allocated_chunk : _allocated_chunks)
{
GlobalHeap::free(static_cast<void *>(allocated_chunk), Chunk::size());
}
for (const auto free_chunk : _free_chunk_buffer)
{
GlobalHeap::free(static_cast<void *>(free_chunk), Chunk::size());
}
}
/**
* @return ID of the NUMA node the memory is allocated on.
*/
[[nodiscard]] std::uint8_t numa_node_id() const noexcept { return _numa_node_id; }
/**
* Allocates a chunk of memory from the internal buffer.
* In case the buffer is empty, new Chunks from the GlobalHeap
* will be allocated.
*
* @return A chunk of allocated memory.
*/
Chunk allocate() noexcept
{
const auto next_free_chunk = _next_free_chunk.fetch_add(1, std::memory_order_relaxed);
if (next_free_chunk < _free_chunk_buffer.size())
{
return _free_chunk_buffer[next_free_chunk];
}
auto expect = false;
const auto can_fill = _fill_buffer_flag.compare_exchange_strong(expect, true);
if (can_fill)
{
fill_buffer<false>();
_fill_buffer_flag = false;
}
else
{
while (_fill_buffer_flag)
{
system::builtin::pause();
}
}
return allocate();
}
private:
// Size of the internal chunk buffer.
inline static constexpr auto CHUNKS = 128U;
// ID of the NUMA node of this ProcessorHeap.
alignas(64) const std::uint8_t _numa_node_id;
// Buffer for free chunks.
std::array<Chunk, CHUNKS> _free_chunk_buffer;
// Pointer to the next free chunk in the buffer.
alignas(64) std::atomic_uint8_t _next_free_chunk{0U};
// Flag, used for allocation from the global Heap for mutual exclusion.
std::atomic_bool _fill_buffer_flag{false};
// List of all allocated chunks, they will be freed later.
std::vector<Chunk> _allocated_chunks;
/**
* Allocates a very big chunk from the GlobalHeap and
* splits it into smaller chunks to store them in the
* internal buffer.
*/
template <bool IS_FIRST = false> void fill_buffer() noexcept
{
if constexpr (IS_FIRST == false)
{
for (const auto &chunk : _free_chunk_buffer)
{
_allocated_chunks.push_back(chunk);
}
}
auto *heap_memory = GlobalHeap::allocate(_numa_node_id, Chunk::size() * _free_chunk_buffer.size());
auto heap_memory_address = reinterpret_cast<std::uintptr_t>(heap_memory);
for (auto i = 0U; i < _free_chunk_buffer.size(); ++i)
{
_free_chunk_buffer[i] = Chunk(reinterpret_cast<void *>(heap_memory_address + (i * Chunk::size())));
}
_next_free_chunk.store(0U);
}
};
/**
* The CoreHeap represents the allocator on a single core.
* By this, allocations are latch-free.
*/
template <std::size_t S> class alignas(64) CoreHeap
{
public:
explicit CoreHeap(ProcessorHeap *processor_heap) noexcept
: _processor_heap(processor_heap), _numa_node_id(processor_heap->numa_node_id())
{
}
CoreHeap() noexcept = default;
~CoreHeap() noexcept = default;
/**
* Allocates new memory from the CoreHeap.
* When the internal buffer is empty, the CoreHeap
* will allocate new chunks from the ProcessorHeap.
*
* @return Pointer to the new allocated memory.
*/
void *allocate() noexcept
{
if (empty())
{
fill_buffer();
}
auto *free_object = _first;
_first = free_object->next();
if constexpr (config::low_priority_for_external_numa())
{
free_object->numa_node_id(_numa_node_id);
return reinterpret_cast<void *>(reinterpret_cast<std::uintptr_t>(free_object) + 64U);
}
else
{
return static_cast<void *>(free_object);
}
}
/**
* Frees a memory object. The new available memory location
* will be placed in front of the "available"-list. By this,
* the next allocation will use the just freed object, which
* may be still in the CPU cache.
*
* @param pointer Pointer to the memory object to be freed.
*/
void free(void *pointer) noexcept
{
if constexpr (config::low_priority_for_external_numa())
{
const auto address = reinterpret_cast<std::uintptr_t>(pointer);
auto *free_object = reinterpret_cast<FreeHeader *>(address - 64U);
if (free_object->numa_node_id() == _numa_node_id)
{
free_object->next(_first);
_first = free_object;
}
else
{
_last->next(free_object);
free_object->next(nullptr);
_last = free_object;
}
}
else
{
auto *free_object = static_cast<FreeHeader *>(pointer);
free_object->next(_first);
_first = free_object;
}
}
/**
* Fills the buffer by asking the ProcessorHeap for more memory.
* This is latch-free since just a single core calls this method.
*/
void fill_buffer()
{
auto chunk = _processor_heap->allocate();
const auto chunk_address = static_cast<std::uintptr_t>(chunk);
constexpr auto object_size = config::low_priority_for_external_numa() ? S + 64U : S;
constexpr auto count_objects = std::uint64_t{Chunk::size() / object_size};
auto *first_free = reinterpret_cast<FreeHeader *>(chunk_address);
auto *last_free = reinterpret_cast<FreeHeader *>(chunk_address + ((count_objects - 1) * object_size));
auto *current_free = first_free;
for (auto i = 0U; i < count_objects - 1U; ++i)
{
auto *next = reinterpret_cast<FreeHeader *>(chunk_address + ((i + 1U) * object_size));
current_free->next(next);
current_free = next;
}
last_free->next(nullptr);
_first = first_free;
_last = last_free;
}
private:
// Processor heap to allocate new chunks.
ProcessorHeap *_processor_heap = nullptr;
// ID of the NUMA node the core is placed in.
std::uint8_t _numa_node_id = 0U;
// First element of the list of free memory objects.
FreeHeader *_first = nullptr;
// Last element of the list of free memory objects.
FreeHeader *_last = nullptr;
/**
* @return True, when the buffer is empty.
*/
[[nodiscard]] bool empty() const noexcept { return _first == nullptr; }
};
/**
* The Allocator is the interface to the internal CoreHeaps.
*/
template <std::size_t S> class Allocator final : public TaskAllocatorInterface
{
public:
explicit Allocator(const util::core_set &core_set) : _core_heaps(core_set.size())
{
_processor_heaps.fill(nullptr);
for (auto i = 0U; i < core_set.size(); ++i)
{
const auto core_id = core_set[i];
const auto node_id = system::topology::node_id(core_id);
if (_processor_heaps[node_id] == nullptr)
{
_processor_heaps[node_id] =
new (GlobalHeap::allocate_cache_line_aligned(sizeof(ProcessorHeap))) ProcessorHeap(node_id);
}
auto core_heap = CoreHeap<S>{_processor_heaps[node_id]};
core_heap.fill_buffer();
_core_heaps.insert(std::make_pair(core_id, std::move(core_heap)));
}
}
~Allocator() override
{
for (auto *processor_heap : _processor_heaps)
{
delete processor_heap;
}
}
/**
* Allocates memory from the given CoreHeap.
*
* @param core_id ID of the core.
* @return Allocated memory object.
*/
void *allocate(const std::uint16_t core_id) override { return _core_heaps[core_id].allocate(); }
/**
* Frees memory.
*
* @param core_id ID of the core to place the free object in.
* @param address Pointer to the memory object.
*/
void free(const std::uint16_t core_id, void *address) noexcept override { _core_heaps[core_id].free(address); }
private:
// Heap for every processor socket/NUMA region.
std::array<ProcessorHeap *, config::max_numa_nodes()> _processor_heaps;
// Map from core_id to core-local allocator.
std::unordered_map<std::uint16_t, CoreHeap<S>> _core_heaps;
};
} // namespace mx::memory::fixed

View File

@@ -0,0 +1,46 @@
#pragma once
#include "alignment_helper.h"
#include <cstdint>
#include <cstdlib>
#include <numa.h>
namespace mx::memory {
/**
* The global heap represents the heap, provided by the OS.
*/
class GlobalHeap
{
public:
/**
* Allocates the given size on the given NUMA node.
*
* @param numa_node_id ID of the NUMA node, the memory should allocated on.
* @param size Size of the memory to be allocated.
* @return Pointer to allocated memory.
*/
static void *allocate(const std::uint8_t numa_node_id, const std::size_t size)
{
return numa_alloc_onnode(size, numa_node_id);
}
/**
* Allocates the given memory aligned to the cache line
* with a multiple of the alignment as a size.
* The allocated memory is not NUMA aware.
* @param size Size to be allocated.
* @return Allocated memory
*/
static void *allocate_cache_line_aligned(const std::size_t size)
{
return std::aligned_alloc(64U, alignment_helper::next_multiple(size, 64UL));
}
/**
* Frees the given memory.
*
* @param memory Pointer to memory.
* @param size Size of the allocated object.
*/
static void free(void *memory, const std::size_t size) { numa_free(memory, size); }
};
} // namespace mx::memory

View File

@@ -0,0 +1,155 @@
#include "epoch_manager.h"
#include <mx/system/topology.h>
#include <mx/tasking/runtime.h>
#include <mx/util/queue.h>
#include <thread>
using namespace mx::memory::reclamation;
void EpochManager::enter_epoch_periodically()
{
// Wait until the scheduler starts the system.
while (this->_is_running == false)
{
system::builtin::pause();
}
// Enter new epochs and collect garbage periodically
// while the system is running.
while (this->_is_running)
{
// Enter new epoch.
this->_global_epoch.fetch_add(1U);
if constexpr (config::local_garbage_collection())
{
// Collect local garbage.
const auto core_id = mx::system::topology::core_id();
for (auto channel_id = 0U; channel_id < this->_count_channels; ++channel_id)
{
auto *garbage_task =
mx::tasking::runtime::new_task<ReclaimEpochGarbageTask>(core_id, *this, this->_allocator);
garbage_task->annotate(std::uint16_t(channel_id));
mx::tasking::runtime::spawn(*garbage_task);
}
}
else
{
// Collect global garbage of finished epochs.
this->reclaim_epoch_garbage();
}
// Wait some time until next epoch.
std::this_thread::sleep_for(config::epoch_interval()); // NOLINT: sleep_for seems to crash clang-tidy
}
}
void EpochManager::reclaim_epoch_garbage() noexcept
{
// Items logically removed in an epoch leq than
// this epoch can be removed physically.
const auto min_epoch = this->min_local_epoch();
// Items that could not be physically removed in this epoch
// and therefore have to be scheduled to the next one.
util::Queue<resource::ResourceInterface> deferred_resources{};
resource::ResourceInterface *resource;
while ((resource = reinterpret_cast<resource::ResourceInterface *>(this->_global_garbage_queue.pop_front())) !=
nullptr)
{
if (resource->remove_epoch() < min_epoch)
{
resource->on_reclaim();
this->_allocator.free(static_cast<void *>(resource));
}
else
{
deferred_resources.push_back(resource);
}
}
// Resources that could not be deleted physically
// need to be deleted in next epochs.
if (deferred_resources.empty() == false)
{
this->_global_garbage_queue.push_back(deferred_resources.begin(), deferred_resources.end());
}
}
void EpochManager::reclaim_all() noexcept
{
if constexpr (config::local_garbage_collection())
{
for (auto channel_id = 0U; channel_id < this->_count_channels; ++channel_id)
{
resource::ResourceInterface *resource;
while ((resource = reinterpret_cast<resource::ResourceInterface *>(
this->_local_garbage_queues[channel_id].value().pop_front())) != nullptr)
{
resource->on_reclaim();
this->_allocator.free(static_cast<void *>(resource));
}
}
}
else
{
resource::ResourceInterface *resource;
while ((resource = reinterpret_cast<resource::ResourceInterface *>(this->_global_garbage_queue.pop_front())) !=
nullptr)
{
resource->on_reclaim();
this->_allocator.free(static_cast<void *>(resource));
}
}
}
void EpochManager::reset() noexcept
{
if (this->_allocator.is_free())
{
this->_global_epoch.store(0U);
for (auto channel_id = 0U; channel_id < tasking::config::max_cores(); ++channel_id)
{
_local_epochs[channel_id] = std::numeric_limits<epoch_t>::max();
}
}
}
mx::tasking::TaskResult ReclaimEpochGarbageTask::execute(const std::uint16_t /*core_id*/,
const std::uint16_t channel_id)
{
// Items logically removed in an epoch leq than
// this epoch can be removed physically.
const auto min_epoch = this->_epoch_manager.min_local_epoch();
// Items that could not be physically removed in this epoch
// and therefore have to be scheduled to the next one.
util::Queue<resource::ResourceInterface> deferred_resources{};
// Queue with channel-local garbage.
auto &garbage_queue = this->_epoch_manager.local_garbage(channel_id);
resource::ResourceInterface *resource;
while ((resource = reinterpret_cast<resource::ResourceInterface *>(garbage_queue.pop_front())) != nullptr)
{
if (resource->remove_epoch() < min_epoch)
{
resource->on_reclaim();
this->_allocator.free(static_cast<void *>(resource));
}
else
{
deferred_resources.push_back(resource);
}
}
// Resources that could not be deleted physically
// need to be deleted in next epochs.
if (deferred_resources.empty() == false)
{
garbage_queue.push_back(deferred_resources.begin(), deferred_resources.end());
}
return tasking::TaskResult::make_remove();
}

View File

@@ -0,0 +1,183 @@
#pragma once
#include "epoch_t.h"
#include <array>
#include <atomic>
#include <chrono>
#include <cstdint>
#include <mx/memory/config.h>
#include <mx/memory/dynamic_size_allocator.h>
#include <mx/resource/resource_interface.h>
#include <mx/system/builtin.h>
#include <mx/tasking/config.h>
#include <mx/tasking/task.h>
#include <mx/util/aligned_t.h>
#include <mx/util/core_set.h>
#include <mx/util/maybe_atomic.h>
#include <mx/util/mpsc_queue.h>
#include <thread>
namespace mx::memory::reclamation {
class alignas(64) LocalEpoch
{
public:
constexpr LocalEpoch() noexcept = default;
~LocalEpoch() noexcept = default;
LocalEpoch &operator=(const epoch_t epoch) noexcept
{
_epoch = epoch;
return *this;
}
void enter(const std::atomic<epoch_t> &global_epoch) noexcept
{
_epoch.store(global_epoch.load(std::memory_order_seq_cst), std::memory_order_seq_cst);
}
void leave() noexcept { _epoch.store(std::numeric_limits<epoch_t>::max()); }
[[nodiscard]] epoch_t operator()() const noexcept { return _epoch.load(std::memory_order_seq_cst); }
private:
std::atomic<epoch_t> _epoch{std::numeric_limits<epoch_t>::max()};
};
/**
* The Epoch Manager manages periodic epochs which
* are used to protect reads against concurrent
* delete operations. Therefore, a global epoch
* will be incremented every 50ms (configurable).
* Read operations, on the other hand, will update
* their local epoch every time before reading an
* optimistic resource.
* When (logically) deleting an optimistic resource,
* the resource will be deleted physically, when
* every local epoch is greater than the epoch
* when the resource is deleted.
*/
class EpochManager
{
public:
EpochManager(const std::uint16_t count_channels, dynamic::Allocator &allocator,
util::maybe_atomic<bool> &is_running) noexcept
: _count_channels(count_channels), _is_running(is_running), _allocator(allocator)
{
}
EpochManager(const EpochManager &) = delete;
~EpochManager() = default;
LocalEpoch &operator[](const std::uint16_t channel_id) noexcept { return _local_epochs[channel_id]; }
/**
* @return Access to read to global epoch.
*/
[[nodiscard]] const std::atomic<epoch_t> &global_epoch() const noexcept { return _global_epoch; }
/**
* @return The minimal epoch of all channels.
*/
[[nodiscard]] epoch_t min_local_epoch() const noexcept
{
auto min_epoch = _local_epochs[0U]();
for (auto channel_id = 1U; channel_id < _count_channels; ++channel_id)
{
min_epoch = std::min(min_epoch, _local_epochs[channel_id]());
}
return min_epoch;
}
/**
* Adds an optimistic resource to garbage collection.
* @param resource Resource to logically delete.
*/
void add_to_garbage_collection(resource::ResourceInterface *resource,
[[maybe_unused]] const std::uint16_t owning_channel_id) noexcept
{
resource->remove_epoch(_global_epoch.load(std::memory_order_acq_rel));
if constexpr (config::local_garbage_collection())
{
_local_garbage_queues[owning_channel_id].value().push_back(resource);
}
else
{
_global_garbage_queue.push_back(resource);
}
}
/**
* Called periodically by a separate thread.
*/
void enter_epoch_periodically();
/**
* Reclaims all garbage, mainly right before shut down tasking.
*/
void reclaim_all() noexcept;
/**
* Grants access to the local garbage queue of a specific channel.
*
* @param channel_id Channel Id.
* @return Local garbage queue.
*/
[[nodiscard]] util::MPSCQueue<resource::ResourceInterface> &local_garbage(const std::uint16_t channel_id) noexcept
{
return _local_garbage_queues[channel_id].value();
}
/**
* Reset all local and the global epoch to initial values
* if no memory is in use.
*/
void reset() noexcept;
private:
// Number of used channels; important for min-calculation.
const std::uint16_t _count_channels;
// Flag of the scheduler indicating the state of the system.
util::maybe_atomic<bool> &_is_running;
// Allocator to free collected resources.
dynamic::Allocator &_allocator;
// Global epoch, incremented periodically.
std::atomic<epoch_t> _global_epoch{0U};
// Local epochs, one for every channel.
alignas(64) std::array<LocalEpoch, tasking::config::max_cores()> _local_epochs;
// Queue that holds all logically deleted objects in a global space.
alignas(64) util::MPSCQueue<resource::ResourceInterface> _global_garbage_queue;
// Queues for every worker thread. Logically deleted objects are stored here
// whenever local garbage collection is used.
alignas(64) std::array<util::aligned_t<util::MPSCQueue<resource::ResourceInterface>>,
tasking::config::max_cores()> _local_garbage_queues;
/**
* Reclaims resources with regard to the epoch.
*/
void reclaim_epoch_garbage() noexcept;
};
class ReclaimEpochGarbageTask final : public tasking::TaskInterface
{
public:
constexpr ReclaimEpochGarbageTask(EpochManager &epoch_manager, dynamic::Allocator &allocator) noexcept
: _epoch_manager(epoch_manager), _allocator(allocator)
{
}
~ReclaimEpochGarbageTask() noexcept override = default;
tasking::TaskResult execute(std::uint16_t core_id, std::uint16_t channel_id) override;
private:
EpochManager &_epoch_manager;
dynamic::Allocator &_allocator;
};
} // namespace mx::memory::reclamation

View File

@@ -0,0 +1,5 @@
#pragma once
#include <cstdint>
namespace mx::memory::reclamation {
using epoch_t = std::uint32_t;
}

101
src/mx/memory/tagged_ptr.h Normal file
View File

@@ -0,0 +1,101 @@
#pragma once
#include <cassert>
#include <cstdint>
#include <functional>
namespace mx::memory {
/**
* Holds the memory address of an instance of the class T
* and decodes a 16bit core address within the memory address.
* The size of the tagged_ptr<T> is equal to T*.
*/
template <class T, typename I> class tagged_ptr
{
public:
constexpr tagged_ptr() noexcept : _object_pointer(0U)
{
static_assert(sizeof(I) == 2U);
static_assert(sizeof(tagged_ptr) == 8U);
}
constexpr explicit tagged_ptr(T *pointer) noexcept : _object_pointer(std::uintptr_t(pointer)) {}
constexpr explicit tagged_ptr(T *pointer, const I information) noexcept
: _object_pointer(std::uintptr_t(pointer)), _information(information)
{
}
~tagged_ptr() noexcept = default;
/**
* @return The decoded info.
*/
inline I info() const noexcept { return _information; }
/**
* @return The memory address without the info.
*/
template <typename S = T> inline S *get() const noexcept { return reinterpret_cast<S *>(_object_pointer); }
/**
* Decodes the given info within the pointer.
*
* @param info Info to store in the tagged pointer.
*/
inline void reset(const I information) noexcept { _information = information; }
/**
* Replaces the internal pointer by a new one.
*
* @param new_pointer Pointer to the new memory object.
*/
inline void reset(T *new_pointer = nullptr) noexcept { _object_pointer = std::uintptr_t(new_pointer); }
T *operator->() const noexcept { return get(); }
explicit operator T *() const noexcept { return get(); }
explicit operator bool() const noexcept { return _object_pointer > 0U; }
tagged_ptr<T, I> &operator=(const tagged_ptr<T, I> &other) noexcept = default;
bool operator==(const tagged_ptr<T, I> &other) const noexcept { return other._object_pointer == _object_pointer; }
bool operator==(const T *other) const noexcept { return other == get(); }
bool operator==(std::nullptr_t) const noexcept { return _object_pointer == 0U; }
bool operator!=(const tagged_ptr<T, I> &other) const noexcept { return other.get() != get(); }
bool operator!=(std::nullptr_t) const noexcept { return _object_pointer != 0U; }
bool operator<(const tagged_ptr<T, I> &other) noexcept { return other.get() < get(); }
bool operator<=(const tagged_ptr<T, I> &other) noexcept { return other.get() <= get(); }
bool operator>(const tagged_ptr<T, I> &other) noexcept { return other.get() > get(); }
bool operator>=(const tagged_ptr<T, I> &other) noexcept { return other.get() >= get(); }
private:
/**
* Pointer to the instance of T, only 48bit are used.
*/
std::uintptr_t _object_pointer : 48;
/**
* Information stored within this pointer, remaining 16bit are used.
*/
I _information{};
} __attribute__((packed));
} // namespace mx::memory
namespace std {
template <class T, typename I> struct hash<mx::memory::tagged_ptr<T, I>>
{
std::size_t operator()(const mx::memory::tagged_ptr<T, I> &ptr) const noexcept
{
return std::hash<T *>().operator()(ptr.get());
}
};
} // namespace std

View File

@@ -0,0 +1,52 @@
#pragma once
#include <cstdint>
#include <cstdlib>
namespace mx::memory {
/**
* Interface for task allocators (e.g. using systems malloc
* or the internal allocator).
*/
class TaskAllocatorInterface
{
public:
constexpr TaskAllocatorInterface() noexcept = default;
virtual ~TaskAllocatorInterface() noexcept = default;
/**
* Allocates memory for the given core.
* @param core_id Core to allocate memory for.
* @return Allocated memory.
*/
virtual void *allocate(std::uint16_t core_id) = 0;
/**
* Frees the memory at the given core.
* @param core_id Core to store free memory.
* @param address Address to free.
*/
virtual void free(std::uint16_t core_id, void *address) noexcept = 0;
};
/**
* Task allocator using the systems (aligned_)malloc/free interface.
*/
template <std::size_t S> class SystemTaskAllocator final : public TaskAllocatorInterface
{
public:
constexpr SystemTaskAllocator() noexcept = default;
virtual ~SystemTaskAllocator() noexcept = default;
/**
* @return Allocated memory using systems malloc (but aligned).
*/
void *allocate(const std::uint16_t /*core_id*/) override { return std::aligned_alloc(64U, S); }
/**
* Frees the given memory using systems free.
* @param address Memory to free.
*/
void free(const std::uint16_t /*core_id*/, void *address) noexcept override { std::free(address); }
};
} // namespace mx::memory

View File

@@ -0,0 +1,66 @@
#include "builder.h"
#include <mx/synchronization/primitive_matrix.h>
using namespace mx::resource;
std::pair<std::uint16_t, std::uint8_t> Builder::schedule(const resource::hint &hint)
{
// Scheduling was done by the hint.
if (hint.has_channel_id())
{
this->_scheduler.predict_usage(hint.channel_id(), hint.access_frequency());
return std::make_pair(hint.channel_id(), this->_scheduler.numa_node_id(hint.channel_id()));
}
// Schedule resources round robin to the channels.
const auto count_channels = this->_scheduler.count_channels();
auto channel_id = this->_round_robin_channel_id.fetch_add(1U, std::memory_order_relaxed) % count_channels;
// If the chosen channel contains an excessive accessed resource, get another.
if (count_channels > 2U && hint.isolation_level() == synchronization::isolation_level::Exclusive &&
this->_scheduler.has_excessive_usage_prediction(channel_id))
{
channel_id = this->_round_robin_channel_id.fetch_add(1U, std::memory_order_relaxed) % count_channels;
}
this->_scheduler.predict_usage(channel_id, hint.access_frequency());
const auto numa_node_id = hint.has_numa_node_id() ? hint.numa_node_id() : this->_scheduler.numa_node_id(channel_id);
return std::make_pair(channel_id, numa_node_id);
}
mx::synchronization::primitive Builder::isolation_level_to_synchronization_primitive(const hint &hint) noexcept
{
// The developer did not define any fixed protocol for
// synchronization; we choose one depending on the hints.
if (hint == synchronization::protocol::None)
{
return synchronization::PrimitiveMatrix::select_primitive(hint.isolation_level(), hint.access_frequency(),
hint.read_write_ratio());
}
// The developer hinted a specific protocol (latched, queued, ...)
// and a relaxed isolation level.
if (hint == synchronization::isolation_level::ExclusiveWriter)
{
switch (hint.preferred_protocol())
{
case synchronization::protocol::Latch:
return synchronization::primitive::ReaderWriterLatch;
case synchronization::protocol::OLFIT:
return synchronization::primitive::OLFIT;
default:
return synchronization::primitive::ScheduleWriter;
}
}
// The developer hinted a specific protocol (latched, queued, ...)
// and a strict isolation level.
if (hint == synchronization::isolation_level::Exclusive)
{
return hint == synchronization::protocol::Latch ? synchronization::primitive::ExclusiveLatch
: synchronization::primitive::ScheduleAll;
}
return mx::synchronization::primitive::None;
}

144
src/mx/resource/builder.h Normal file
View File

@@ -0,0 +1,144 @@
#pragma once
#include "resource.h"
#include <array>
#include <atomic>
#include <cstdint>
#include <mx/memory/dynamic_size_allocator.h>
#include <mx/memory/global_heap.h>
#include <mx/tasking/config.h>
#include <mx/tasking/scheduler.h>
#include <mx/util/aligned_t.h>
#include <type_traits>
#include <utility>
namespace mx::resource {
/**
* The Builder constructs and deletes data objects.
* Besides, the Builder schedules data objects to
* channels.
*/
class Builder
{
public:
Builder(tasking::Scheduler &scheduler, memory::dynamic::Allocator &allocator) noexcept
: _allocator(allocator), _scheduler(scheduler)
{
}
~Builder() noexcept = default;
/**
* Build a data object of given type with given
* size and arguments. The hint defines the synchronization
* requirements and affects scheduling.
*
* @param size Size of the data object.
* @param hint Hint for scheduling and synchronization.
* @param arguments Arguments to the constructor.
* @return Tagged pointer holding the synchronization, assigned channel and pointer.
*/
template <typename T, typename... Args>
ptr build(const std::size_t size, resource::hint &&hint, Args &&... arguments) noexcept
{
#ifndef NDEBUG
if (hint != synchronization::isolation_level::None &&
(hint != synchronization::isolation_level::Exclusive || hint != synchronization::protocol::Queue))
{
if constexpr (std::is_base_of<ResourceInterface, T>::value == false)
{
assert(false && "Type must be inherited from mx::resource::ResourceInterface");
}
}
#endif
const auto synchronization_method = Builder::isolation_level_to_synchronization_primitive(hint);
const auto [channel_id, numa_node_id] = schedule(hint);
const auto resource_information = information{channel_id, synchronization_method};
return ptr{new (_allocator.allocate(numa_node_id, 64U, size)) T(std::forward<Args>(arguments)...),
resource_information};
}
/**
* Builds data resourced from an existing pointer.
* The hint defines the synchronization
* requirements and affects scheduling.
* @param object
* @param hint Hint for scheduling and synchronization.
* @return Tagged pointer holding the synchronization, assigned channel and pointer.
*/
template <typename T> ptr build(T *object, resource::hint &&hint) noexcept
{
#ifndef NDEBUG
if (hint != synchronization::isolation_level::None &&
(hint != synchronization::isolation_level::Exclusive || hint != synchronization::protocol::Queue))
{
if constexpr (std::is_base_of<ResourceInterface, T>::value == false)
{
assert(false && "Type must be inherited from mx::resource::ResourceInterface");
}
}
#endif
const auto synchronization_method = Builder::isolation_level_to_synchronization_primitive(hint);
const auto [channel_id, _] = schedule(hint);
return ptr{object, information{channel_id, synchronization_method}};
}
/**
* Destroys the given data object.
* @param core_id Executing core.
* @param resource Tagged pointer to the data object.
*/
template <typename T> void destroy(const ptr resource)
{
// TODO: Revoke usage prediction?
if (resource != nullptr)
{
if constexpr (tasking::config::memory_reclamation() != tasking::config::None)
{
if (synchronization::is_optimistic(resource.synchronization_primitive()))
{
_scheduler.epoch_manager().add_to_garbage_collection(resource.get<resource::ResourceInterface>(),
resource.channel_id());
return;
}
}
// No need to reclaim memory.
resource.get<T>()->~T();
_allocator.free(resource.get<void>());
}
}
private:
// Internal allocator for dynamic sized allocation.
memory::dynamic::Allocator &_allocator;
// Scheduler of MxTasking to get access to channels.
tasking::Scheduler &_scheduler;
// Next channel id for round-robin scheduling.
alignas(64) std::atomic_uint16_t _round_robin_channel_id{0U};
/**
* Schedules the resource to a channel, affected by the given hint.
*
* @param hint Hint for scheduling.
* @return Pair of Channel and NUMA node IDs.
*/
std::pair<std::uint16_t, std::uint8_t> schedule(const resource::hint &hint);
/**
* Determines the best synchronization method based on
* synchronization requirement.
*
* @param isolation_level Synchronization requirement.
* @param prefer_latch Prefer latch for synchronization or latch-free?
* @return Chosen synchronization method.
*/
static synchronization::primitive isolation_level_to_synchronization_primitive(const hint &hint) noexcept;
};
} // namespace mx::resource

223
src/mx/resource/resource.h Normal file
View File

@@ -0,0 +1,223 @@
#pragma once
#include "resource_interface.h"
#include <cassert>
#include <cstdint>
#include <mx/memory/alignment_helper.h>
#include <mx/memory/tagged_ptr.h>
#include <mx/synchronization/synchronization.h>
#include <mx/util/random.h>
#include <new>
namespace mx::resource {
/**
* Hint for creating resources by the resource interface.
* Encapsulates the requested numa region, synchronization requirements
* and expected access frequency.
*/
class hint
{
public:
enum expected_access_frequency : std::uint8_t
{
excessive = 0U,
high = 1U,
normal = 2U,
unused = 3U,
};
enum expected_read_write_ratio : std::uint8_t
{
heavy_read = 0U,
mostly_read = 1U,
balanced = 2U,
mostly_written = 3U,
heavy_written = 4U
};
constexpr explicit hint(const std::uint8_t node_id) noexcept : _numa_node_id(node_id) {}
constexpr explicit hint(const std::uint16_t channel_id) noexcept : _channel_id(channel_id) {}
constexpr explicit hint(const synchronization::isolation_level isolation_level) noexcept
: _isolation_level(isolation_level)
{
}
constexpr explicit hint(const expected_access_frequency access_frequency) noexcept
: _access_frequency(access_frequency)
{
}
constexpr hint(const std::uint16_t channel_id, const synchronization::isolation_level isolation_level) noexcept
: _channel_id(channel_id), _isolation_level(isolation_level)
{
}
constexpr hint(const std::uint8_t node_id, const synchronization::isolation_level isolation_level) noexcept
: _numa_node_id(node_id), _isolation_level(isolation_level)
{
}
constexpr hint(const std::uint8_t node_id, const synchronization::isolation_level isolation_level,
const synchronization::protocol preferred_protocol) noexcept
: _numa_node_id(node_id), _isolation_level(isolation_level), _preferred_protocol(preferred_protocol)
{
}
constexpr hint(const std::uint16_t channel_id, const synchronization::isolation_level isolation_level,
const synchronization::protocol preferred_protocol) noexcept
: _channel_id(channel_id), _isolation_level(isolation_level), _preferred_protocol(preferred_protocol)
{
}
constexpr hint(const std::uint8_t node_id, const expected_access_frequency access_frequency) noexcept
: _numa_node_id(node_id), _access_frequency(access_frequency)
{
}
constexpr hint(const synchronization::isolation_level isolation_level,
const expected_access_frequency access_frequency) noexcept
: _access_frequency(access_frequency), _isolation_level(isolation_level)
{
}
constexpr hint(const synchronization::isolation_level isolation_level,
const synchronization::protocol preferred_protocol,
const expected_access_frequency access_frequency) noexcept
: _access_frequency(access_frequency), _isolation_level(isolation_level),
_preferred_protocol(preferred_protocol)
{
}
constexpr hint(const synchronization::isolation_level isolation_level,
const synchronization::protocol preferred_protocol, const expected_access_frequency access_frequency,
const expected_read_write_ratio read_write_ratio) noexcept
: _access_frequency(access_frequency), _read_write_ratio(read_write_ratio), _isolation_level(isolation_level),
_preferred_protocol(preferred_protocol)
{
}
constexpr hint(const std::uint8_t node_id, const synchronization::isolation_level isolation_level,
const expected_access_frequency access_frequency) noexcept
: _numa_node_id(node_id), _access_frequency(access_frequency), _isolation_level(isolation_level)
{
}
constexpr hint(const std::uint8_t node_id, const synchronization::isolation_level isolation_level,
const synchronization::protocol preferred_protocol,
const expected_access_frequency access_frequency) noexcept
: _numa_node_id(node_id), _access_frequency(access_frequency), _isolation_level(isolation_level),
_preferred_protocol(preferred_protocol)
{
}
constexpr hint(hint &&) noexcept = default;
~hint() = default;
[[nodiscard]] bool has_numa_node_id() const noexcept
{
return _numa_node_id < std::numeric_limits<std::uint8_t>::max();
}
[[nodiscard]] std::uint8_t numa_node_id() const noexcept { return _numa_node_id; }
[[nodiscard]] bool has_channel_id() const noexcept
{
return _channel_id < std::numeric_limits<std::uint16_t>::max();
}
[[nodiscard]] std::uint16_t channel_id() const noexcept { return _channel_id; }
[[nodiscard]] expected_access_frequency access_frequency() const noexcept { return _access_frequency; }
[[nodiscard]] expected_read_write_ratio read_write_ratio() const noexcept { return _read_write_ratio; }
[[nodiscard]] synchronization::isolation_level isolation_level() const noexcept { return _isolation_level; }
[[nodiscard]] synchronization::protocol preferred_protocol() const noexcept { return _preferred_protocol; }
bool operator==(const synchronization::isolation_level isolation_level) const noexcept
{
return _isolation_level == isolation_level;
}
bool operator!=(const synchronization::isolation_level isolation_level) const noexcept
{
return _isolation_level != isolation_level;
}
bool operator==(const synchronization::protocol protocol) const noexcept { return _preferred_protocol == protocol; }
bool operator!=(const synchronization::protocol protocol) const noexcept { return _preferred_protocol != protocol; }
private:
hint() = default;
// Preferred NUMA region; no preference by default.
const std::uint8_t _numa_node_id{std::numeric_limits<std::uint8_t>::max()};
// Preferred channel; no preference by default.
const std::uint16_t _channel_id{std::numeric_limits<std::uint16_t>::max()};
// Expected access frequency; normal by default.
const enum expected_access_frequency _access_frequency { expected_access_frequency::normal };
// Expected read/write ratio; normal by default.
const expected_read_write_ratio _read_write_ratio{expected_read_write_ratio::balanced};
// Preferred isolation level; no synchronization by default.
const synchronization::isolation_level _isolation_level{synchronization::isolation_level::None};
// Preferred synchronization protocol (queue, latch, ...); no synchronization by default.
const synchronization::protocol _preferred_protocol{synchronization::protocol::None};
};
/**
* Information of a resource, stored within
* the pointer to the resource.
*/
class information
{
public:
constexpr information() noexcept : _channel_id(0U), _synchronization_primitive(0U) {}
explicit information(const std::uint16_t channel_id,
const synchronization::primitive synchronization_primitive) noexcept
: _channel_id(channel_id), _synchronization_primitive(static_cast<std::uint16_t>(synchronization_primitive))
{
}
~information() = default;
[[nodiscard]] std::uint16_t channel_id() const noexcept { return _channel_id; }
[[nodiscard]] synchronization::primitive synchronization_primitive() const noexcept
{
return static_cast<synchronization::primitive>(_synchronization_primitive);
}
information &operator=(const information &other) = default;
private:
std::uint16_t _channel_id : 12;
std::uint16_t _synchronization_primitive : 4;
} __attribute__((packed));
/**
* Pointer to a resource, stores information about
* that resource.
*/
class ptr final : public memory::tagged_ptr<void, information>
{
public:
constexpr ptr() noexcept = default;
explicit ptr(void *ptr_, const information info = {}) noexcept : memory::tagged_ptr<void, information>(ptr_, info)
{
}
~ptr() = default;
ptr &operator=(const ptr &other) noexcept = default;
[[nodiscard]] std::uint16_t channel_id() const noexcept { return info().channel_id(); }
[[nodiscard]] synchronization::primitive synchronization_primitive() const noexcept
{
return info().synchronization_primitive();
}
} __attribute__((packed));
/**
* Casts the internal pointer of the resource pointer
* to a pointer typed by the given template parameter.
*
* @param resource Resource to cast.
* @return Pointer to the requested type.
*/
template <typename S> static auto *ptr_cast(const ptr resource) noexcept
{
return resource.template get<S>();
}
} // namespace mx::resource

View File

@@ -0,0 +1,154 @@
#pragma once
#include <atomic>
#include <cstdint>
#include <mx/memory/reclamation/epoch_t.h>
#include <mx/synchronization/optimistic_lock.h>
#include <mx/synchronization/rw_spinlock.h>
#include <mx/synchronization/spinlock.h>
namespace mx::resource {
/**
* The resource interface represents resources that
* needs to be synchronized by the tasking engine.
* Supported synchronizations are:
* - Latches (Spinlock, R/W-lock)
* - Optimistic latches + memory reclamation
*/
class ResourceInterface
{
public:
enum SynchronizationType : std::uint8_t
{
Exclusive,
SharedRead,
SharedWrite,
Optimistic,
OLFIT,
};
constexpr ResourceInterface() noexcept = default;
ResourceInterface(const ResourceInterface &) = delete;
ResourceInterface(ResourceInterface &&) = delete;
virtual ~ResourceInterface() = default;
/**
* Called by the epoch manager on safe reclaiming this resource.
*/
virtual void on_reclaim() = 0;
/**
* Set the next resource in garbage list.
* @param next Next resource in garbage list.
*/
void next(ResourceInterface *next) noexcept { _next_garbage = next; }
/**
* @return Next resource in garbage list.
*/
[[nodiscard]] ResourceInterface *next() const noexcept { return _next_garbage; }
/**
* @return The current version of the resource.
*/
[[nodiscard]] synchronization::OptimisticLock::version_t version() const noexcept
{
return _optimistic_latch.read_valid();
}
/**
* Checks whether the given version is still valid.
*
* @param version Version to check.
* @return True, when the version is valid.
*/
[[nodiscard]] bool is_version_valid(const synchronization::OptimisticLock::version_t version) const noexcept
{
return _optimistic_latch.is_valid(version);
}
/**
* Tries to acquire the optimistic latch.
* @return True, when latch was acquired.
*/
[[nodiscard]] bool try_acquire_optimistic_latch() noexcept { return _optimistic_latch.try_lock(); }
/**
* Set the epoch-timestamp this resource was removed.
* @param epoch Epoch where this resource was removed.
*/
void remove_epoch(const memory::reclamation::epoch_t epoch) noexcept { _remove_epoch = epoch; }
/**
* @return The epoch this resource was removed.
*/
[[nodiscard]] memory::reclamation::epoch_t remove_epoch() const noexcept { return _remove_epoch; }
template <SynchronizationType T> class scoped_latch
{
public:
constexpr inline explicit scoped_latch(ResourceInterface *resource) noexcept : _resource(resource)
{
if constexpr (T == SynchronizationType::Exclusive)
{
_resource->_exclusive_latch.lock();
}
else if constexpr (T == SynchronizationType::SharedRead)
{
_resource->_rw_latch.lock_shared();
}
else if constexpr (T == SynchronizationType::SharedWrite)
{
_resource->_rw_latch.lock();
}
else if constexpr (T == SynchronizationType::Optimistic)
{
_resource->_optimistic_latch.lock<true>();
}
else if constexpr (T == SynchronizationType::OLFIT)
{
_resource->_optimistic_latch.lock<false>();
}
}
inline ~scoped_latch() noexcept
{
if constexpr (T == SynchronizationType::Exclusive)
{
_resource->_exclusive_latch.unlock();
}
else if constexpr (T == SynchronizationType::SharedRead)
{
_resource->_rw_latch.unlock_shared();
}
else if constexpr (T == SynchronizationType::SharedWrite)
{
_resource->_rw_latch.unlock();
}
else if constexpr (T == SynchronizationType::Optimistic || T == SynchronizationType::OLFIT)
{
_resource->_optimistic_latch.unlock();
}
}
private:
ResourceInterface *_resource;
};
using scoped_exclusive_latch = scoped_latch<SynchronizationType::Exclusive>;
using scoped_optimistic_latch = scoped_latch<SynchronizationType::Optimistic>;
using scoped_olfit_latch = scoped_latch<SynchronizationType::OLFIT>;
template <bool WRITER>
using scoped_rw_latch = scoped_latch<WRITER ? SynchronizationType::SharedWrite : SynchronizationType::SharedRead>;
private:
// Encapsulated synchronization primitives.
synchronization::Spinlock _exclusive_latch;
synchronization::RWSpinLock _rw_latch;
synchronization::OptimisticLock _optimistic_latch;
// Epoch and Garbage management.
memory::reclamation::epoch_t _remove_epoch{0U};
ResourceInterface *_next_garbage{nullptr};
};
} // namespace mx::resource

View File

@@ -0,0 +1,89 @@
#pragma once
#include <atomic>
#include <cstdint>
#include <limits>
#include <mx/system/builtin.h>
#include <mx/tasking/config.h>
namespace mx::synchronization {
class OptimisticLock
{
public:
using version_t = std::uint32_t;
constexpr OptimisticLock() = default;
~OptimisticLock() = default;
/**
* Guarantees to read a valid version by blocking until
* the version is not locked.
* @return The current version.
*/
[[nodiscard]] version_t read_valid() const noexcept
{
auto version = _version.load(std::memory_order_seq_cst);
while (OptimisticLock::is_locked(version))
{
system::builtin::pause();
version = _version.load(std::memory_order_seq_cst);
}
return version;
}
/**
* Validates the version.
*
* @param version The version to validate.
* @return True, if the version is valid.
*/
[[nodiscard]] bool is_valid(const version_t version) const noexcept
{
return version == _version.load(std::memory_order_seq_cst);
}
/**
* Tries to acquire the lock.
* @return True, when lock was acquired.
*/
[[nodiscard]] bool try_lock() noexcept
{
auto version = read_valid();
return _version.compare_exchange_strong(version, version + 0b10);
}
/**
* Waits until the lock is successfully acquired.
*/
template <bool SINGLE_WRITER> void lock() noexcept
{
if constexpr (SINGLE_WRITER)
{
_version.fetch_add(0b10, std::memory_order_seq_cst);
}
else
{
auto tries = std::uint64_t{1U};
while (this->try_lock() == false)
{
const auto wait = tries++;
for (auto i = 0U; i < wait * 32U; ++i)
{
system::builtin::pause();
std::atomic_thread_fence(std::memory_order_seq_cst);
}
}
}
}
/**
* Unlocks the version lock.
*/
void unlock() noexcept { _version.fetch_add(0b10, std::memory_order_seq_cst); }
private:
std::atomic<version_t> _version{0b100};
[[nodiscard]] static bool is_locked(const version_t version) noexcept { return (version & 0b10) == 0b10; }
};
} // namespace mx::synchronization

View File

@@ -0,0 +1,68 @@
#pragma once
#include "synchronization.h"
#include <algorithm>
#include <cstdint>
#include <mx/resource/resource.h>
namespace mx::synchronization {
class PrimitiveMatrix
{
public:
static primitive select_primitive(const isolation_level isolation_level,
const resource::hint::expected_access_frequency access_frequency,
const resource::hint::expected_read_write_ratio read_write_ratio) noexcept
{
return isolation_level != isolation_level::None
? matrix()[static_cast<std::uint8_t>(isolation_level)][static_cast<std::uint8_t>(read_write_ratio)]
[static_cast<std::uint8_t>(access_frequency)]
: primitive::None;
}
private:
constexpr static std::array<std::array<std::array<primitive, 4>, 5>, 2> matrix() noexcept
{
return {{// For isolation_level::ExclusiveWriter
{{
// For predicted_read_write_ratio::heavy_read
{{primitive::ScheduleWriter, primitive::ScheduleWriter, primitive::ScheduleWriter,
primitive::ScheduleWriter}},
// For predicted_read_write_ratio::mostly_read
{{primitive::ScheduleWriter, primitive::ScheduleWriter, primitive::OLFIT, primitive::OLFIT}},
// For predicted_read_write_ratio::balanced
{{primitive::OLFIT, primitive::OLFIT, primitive::OLFIT, primitive::OLFIT}},
// For predicted_read_write_ratio::mostly_written
{{primitive::OLFIT, primitive::OLFIT, primitive::ReaderWriterLatch, primitive::ReaderWriterLatch}},
// For predicted_read_write_ratio::heavy_written
{{primitive::ScheduleAll, primitive::ScheduleAll, primitive::ReaderWriterLatch,
primitive::ReaderWriterLatch}},
}},
// For isolation_level::Exclusive
{{
// For predicted_read_write_ratio::heavy_read
{{primitive::ScheduleAll, primitive::ScheduleAll, primitive::ExclusiveLatch,
primitive::ExclusiveLatch}},
// For predicted_read_write_ratio::mostly_read
{{primitive::ScheduleAll, primitive::ScheduleAll, primitive::ExclusiveLatch,
primitive::ExclusiveLatch}},
// For predicted_read_write_ratio::balanced
{{primitive::ScheduleAll, primitive::ScheduleAll, primitive::ExclusiveLatch,
primitive::ExclusiveLatch}},
// For predicted_read_write_ratio::mostly_written
{{primitive::ScheduleAll, primitive::ScheduleAll, primitive::ExclusiveLatch,
primitive::ExclusiveLatch}},
// For predicted_read_write_ratio::heavy_written
{{primitive::ScheduleAll, primitive::ScheduleAll, primitive::ExclusiveLatch,
primitive::ExclusiveLatch}},
}}}};
}
};
} // namespace mx::synchronization

View File

@@ -0,0 +1,292 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* N.B. You most likely do _not_ want to use RWSpinLock or any other
* kind of spinlock. Use SharedMutex instead.
*
* In short, spinlocks in preemptive multi-tasking operating systems
* have serious problems and fast mutexes like SharedMutex are almost
* certainly the better choice, because letting the OS scheduler put a
* thread to sleep is better for system responsiveness and throughput
* than wasting a timeslice repeatedly querying a lock held by a
* thread that's blocked, and you can't prevent userspace
* programs blocking.
*
* Spinlocks in an operating system kernel make much more sense than
* they do in userspace.
*
* -------------------------------------------------------------------
*
* Two Read-Write spin lock implementations.
*
* Ref: http://locklessinc.com/articles/locks
*
* Both locks here are faster than pthread_rwlock and have very low
* overhead (usually 20-30ns). They don't use any system mutexes and
* are very compact (4/8 bytes), so are suitable for per-instance
* based locking, particularly when contention is not expected.
*
* For a spinlock, RWSpinLock is a reasonable choice. (See the note
* about for why a spin lock is frequently a bad idea generally.)
* RWSpinLock has minimal overhead, and comparable contention
* performance when the number of competing threads is less than or
* equal to the number of logical CPUs. Even as the number of
* threads gets larger, RWSpinLock can still be very competitive in
* READ, although it is slower on WRITE, and also inherently unfair
* to writers.
*
* RWTicketSpinLock shows more balanced READ/WRITE performance. If
* your application really needs a lot more threads, and a
* higher-priority writer, prefer one of the RWTicketSpinLock locks.
*
* Caveats:
*
* RWTicketSpinLock locks can only be used with GCC on x86/x86-64
* based systems.
*
* RWTicketSpinLock<32> only allows up to 2^8 - 1 concurrent
* readers and writers.
*
* RWTicketSpinLock<64> only allows up to 2^16 - 1 concurrent
* readers and writers.
*
* RWTicketSpinLock<..., true> (kFavorWriter = true, that is, strict
* writer priority) is NOT reentrant, even for lock_shared().
*
* The lock will not grant any new shared (read) accesses while a thread
* attempting to acquire the lock in write mode is blocked. (That is,
* if the lock is held in shared mode by N threads, and a thread attempts
* to acquire it in write mode, no one else can acquire it in shared mode
* until these N threads release the lock and then the blocked thread
* acquires and releases the exclusive lock.) This also applies for
* attempts to reacquire the lock in shared mode by threads that already
* hold it in shared mode, making the lock non-reentrant.
*
* RWSpinLock handles 2^30 - 1 concurrent readers.
*
* @author Xin Liu <xliux@fb.com>
*/
#pragma once
/*
========================================================================
Benchmark on (Intel(R) Xeon(R) CPU L5630 @ 2.13GHz) 8 cores(16 HTs)
========================================================================
------------------------------------------------------------------------------
1. Single thread benchmark (read/write lock + unlock overhead)
Benchmark Iters Total t t/iter iter/sec
-------------------------------------------------------------------------------
* BM_RWSpinLockRead 100000 1.786 ms 17.86 ns 53.4M
+30.5% BM_RWSpinLockWrite 100000 2.331 ms 23.31 ns 40.91M
+85.7% BM_RWTicketSpinLock32Read 100000 3.317 ms 33.17 ns 28.75M
+96.0% BM_RWTicketSpinLock32Write 100000 3.5 ms 35 ns 27.25M
+85.6% BM_RWTicketSpinLock64Read 100000 3.315 ms 33.15 ns 28.77M
+96.0% BM_RWTicketSpinLock64Write 100000 3.5 ms 35 ns 27.25M
+85.7% BM_RWTicketSpinLock32FavorWriterRead 100000 3.317 ms 33.17 ns 28.75M
+29.7% BM_RWTicketSpinLock32FavorWriterWrite 100000 2.316 ms 23.16 ns 41.18M
+85.3% BM_RWTicketSpinLock64FavorWriterRead 100000 3.309 ms 33.09 ns 28.82M
+30.2% BM_RWTicketSpinLock64FavorWriterWrite 100000 2.325 ms 23.25 ns 41.02M
+ 175% BM_PThreadRWMutexRead 100000 4.917 ms 49.17 ns 19.4M
+ 166% BM_PThreadRWMutexWrite 100000 4.757 ms 47.57 ns 20.05M
------------------------------------------------------------------------------
2. Contention Benchmark 90% read 10% write
Benchmark hits average min max sigma
------------------------------------------------------------------------------
---------- 8 threads ------------
RWSpinLock Write 142666 220ns 78ns 40.8us 269ns
RWSpinLock Read 1282297 222ns 80ns 37.7us 248ns
RWTicketSpinLock Write 85692 209ns 71ns 17.9us 252ns
RWTicketSpinLock Read 769571 215ns 78ns 33.4us 251ns
pthread_rwlock_t Write 84248 2.48us 99ns 269us 8.19us
pthread_rwlock_t Read 761646 933ns 101ns 374us 3.25us
---------- 16 threads ------------
RWSpinLock Write 124236 237ns 78ns 261us 801ns
RWSpinLock Read 1115807 236ns 78ns 2.27ms 2.17us
RWTicketSpinLock Write 81781 231ns 71ns 31.4us 351ns
RWTicketSpinLock Read 734518 238ns 78ns 73.6us 379ns
pthread_rwlock_t Write 83363 7.12us 99ns 785us 28.1us
pthread_rwlock_t Read 754978 2.18us 101ns 1.02ms 14.3us
---------- 50 threads ------------
RWSpinLock Write 131142 1.37us 82ns 7.53ms 68.2us
RWSpinLock Read 1181240 262ns 78ns 6.62ms 12.7us
RWTicketSpinLock Write 83045 397ns 73ns 7.01ms 31.5us
RWTicketSpinLock Read 744133 386ns 78ns 11ms 31.4us
pthread_rwlock_t Write 80849 112us 103ns 4.52ms 263us
pthread_rwlock_t Read 728698 24us 101ns 7.28ms 194us
*/
#include <algorithm>
#include <atomic>
#include <mx/system/builtin.h>
#include <thread>
namespace mx::synchronization {
/*
* A simple, small (4-bytes), but unfair rwlock. Use it when you want
* a nice writer and don't expect a lot of write/read contention, or
* when you need small rwlocks since you are creating a large number
* of them.
*
* Note that the unfairness here is extreme: if the lock is
* continually accessed for read, writers will never get a chance. If
* the lock can be that highly contended this class is probably not an
* ideal choice anyway.
*
* It currently implements most of the Lockable, SharedLockable and
* UpgradeLockable concepts except the TimedLockable related locking/unlocking
* interfaces.
*/
class RWSpinLock
{
enum : int32_t
{
READER = 4,
UPGRADED = 2,
WRITER = 1
};
public:
constexpr RWSpinLock() : bits_(0) {}
RWSpinLock(RWSpinLock const &) = delete;
RWSpinLock &operator=(RWSpinLock const &) = delete;
// Lockable Concept
void lock() noexcept
{
while (!try_lock())
{
mx::system::builtin::pause();
}
}
// Writer is responsible for clearing up both the UPGRADED and WRITER bits.
void unlock() noexcept
{
static_assert(READER > WRITER + UPGRADED, "wrong bits!");
bits_.fetch_and(~(WRITER | UPGRADED), std::memory_order_release);
}
// SharedLockable Concept
void lock_shared() noexcept
{
while (!try_lock_shared())
{
mx::system::builtin::pause();
}
}
void unlock_shared() noexcept { bits_.fetch_add(-READER, std::memory_order_release); }
// Downgrade the lock from writer status to reader status.
void unlock_and_lock_shared() noexcept
{
bits_.fetch_add(READER, std::memory_order_acquire);
unlock();
}
// UpgradeLockable Concept
void lock_upgrade() noexcept
{
while (!try_lock_upgrade())
{
system::builtin::pause();
}
}
void unlock_upgrade() noexcept { bits_.fetch_add(-UPGRADED, std::memory_order_acq_rel); }
// unlock upgrade and try to acquire write lock
void unlock_upgrade_and_lock() noexcept
{
while (!try_unlock_upgrade_and_lock())
{
system::builtin::pause();
}
}
// unlock upgrade and read lock atomically
void unlock_upgrade_and_lock_shared() noexcept { bits_.fetch_add(READER - UPGRADED, std::memory_order_acq_rel); }
// write unlock and upgrade lock atomically
void unlock_and_lock_upgrade() noexcept
{
// need to do it in two steps here -- as the UPGRADED bit might be OR-ed at
// the same time when other threads are trying do try_lock_upgrade().
bits_.fetch_or(UPGRADED, std::memory_order_acquire);
bits_.fetch_add(-WRITER, std::memory_order_release);
}
// Attempt to acquire writer permission. Return false if we didn't get it.
bool try_lock() noexcept
{
int32_t expect = 0;
return bits_.compare_exchange_strong(expect, WRITER, std::memory_order_acq_rel);
}
// Try to get reader permission on the lock. This can fail if we
// find out someone is a writer or upgrader.
// Setting the UPGRADED bit would allow a writer-to-be to indicate
// its intention to write and block any new readers while waiting
// for existing readers to finish and release their read locks. This
// helps avoid starving writers (promoted from upgraders).
bool try_lock_shared() noexcept
{
// fetch_add is considerably (100%) faster than compare_exchange,
// so here we are optimizing for the common (lock success) case.
int32_t value = bits_.fetch_add(READER, std::memory_order_acquire);
if (value & (WRITER | UPGRADED))
{
bits_.fetch_add(-READER, std::memory_order_release);
return false;
}
return true;
}
// try to unlock upgrade and write lock atomically
bool try_unlock_upgrade_and_lock() noexcept
{
int32_t expect = UPGRADED;
return bits_.compare_exchange_strong(expect, WRITER, std::memory_order_acq_rel);
}
// try to acquire an upgradable lock.
bool try_lock_upgrade() noexcept
{
int32_t value = bits_.fetch_or(UPGRADED, std::memory_order_acquire);
// Note: when failed, we cannot flip the UPGRADED bit back,
// as in this case there is either another upgrade lock or a write lock.
// If it's a write lock, the bit will get cleared up when that lock's done
// with unlock().
return ((value & (UPGRADED | WRITER)) == 0);
}
// mainly for debugging purposes.
[[nodiscard]] int32_t bits() const noexcept { return bits_.load(std::memory_order_acquire); }
private:
std::atomic<int32_t> bits_;
};
} // namespace mx::synchronization

View File

@@ -0,0 +1,59 @@
#pragma once
#include <atomic>
#include <cstdint>
#include <mx/system/builtin.h>
namespace mx::synchronization {
/**
* Simple spinlock for mutual exclusion.
*/
class Spinlock
{
public:
constexpr Spinlock() noexcept = default;
~Spinlock() = default;
/**
* Locks the spinlock by spinning until it is lockable.
*/
void lock() noexcept
{
while (true)
{
while (_flag.load(std::memory_order_relaxed))
{
system::builtin::pause();
}
if (try_lock())
{
return;
}
}
}
/**
* Try to lock the lock.
* @return True, when successfully locked.
*/
bool try_lock() noexcept
{
bool expected = false;
return _flag.compare_exchange_weak(expected, true, std::memory_order_acquire);
}
/**
* Unlocks the spinlock.
*/
void unlock() noexcept { _flag.store(false, std::memory_order_acquire); }
/**
* @return True, if the lock is in use.
*/
[[nodiscard]] bool is_locked() const noexcept { return _flag.load(std::memory_order_relaxed); }
private:
std::atomic_bool _flag{false};
};
} // namespace mx::synchronization

View File

@@ -0,0 +1,57 @@
#pragma once
#include <cstdint>
namespace mx::synchronization {
/**
* Desired isolation level of a resource.
*/
enum class isolation_level : std::uint8_t
{
ExclusiveWriter = 0U, // Reads can be parallel, writes will be synchronized
Exclusive = 1U, // All accesses will be synchronized
None = 2U, // Nothing will be synchronized
};
/**
* Desired protocol of synchronization.
*/
enum class protocol : std::uint8_t
{
None = 0U, // System is free to choose
Queue = 1U, // Choose primitive with queues with respect to isolation level
Latch = 2U, // Choose primitive with latches with respect to isolation level
OLFIT = 3U, // Try to choose olfit
TransactionalMemory = 4U // Try to choose htm
};
/**
* Real method, based on the isolation level
* and decision by the tasking layer.
*
* Attention: Even if the primitive is 8bit long,
* it is stored within the tagged_ptr as
* using only 4bit! Therefore, the max.
* value can be 15.
*/
enum class primitive : std::uint8_t
{
None = 0U, // Nothing will be synchronized
ExclusiveLatch = 1U, // All accesses will use a spinlock
ScheduleAll = 2U, // All accesses will be scheduled to the mapped channel
ReaderWriterLatch = 3U, // Use a reader/writer latch to enable parallel reads
ScheduleWriter = 4U, // Reads can perform anywhere, writes are scheduled to the mapped channel
OLFIT = 5U // Read/write anywhere but use a latch for writers
};
/**
* Checks whether the given primitive is kind of optimistic synchronization
* or not.
* @param primitive_ Primitive to check.
* @return True, if the given primitive is optimistic.
*/
static inline bool is_optimistic(const primitive primitive_) noexcept
{
return primitive_ == primitive::ScheduleWriter || primitive_ == primitive::OLFIT;
}
} // namespace mx::synchronization

35
src/mx/system/builtin.h Normal file
View File

@@ -0,0 +1,35 @@
#pragma once
#include <cstdint>
#include <iostream>
namespace mx::system {
/**
* Encapsulates compiler builtins.
*/
class builtin
{
public:
/**
* Generates a pause/yield cpu instruction, independently
* of the hardware.
*/
static void pause() noexcept
{
#if defined(__x86_64__) || defined(__amd64__)
__builtin_ia32_pause();
#elif defined(__arm__)
asm("YIELD");
#endif
}
[[maybe_unused]] static bool expect_false(const bool expression) noexcept
{
return __builtin_expect(expression, false);
}
[[maybe_unused]] static bool expect_true(const bool expression) noexcept
{
return __builtin_expect(expression, true);
}
};
} // namespace mx::system

187
src/mx/system/cache.h Normal file
View File

@@ -0,0 +1,187 @@
#pragma once
#include <cstdint>
namespace mx::system {
/**
* Encapsulates cache operations like prefetching.
*
* Further documentation on Intel: https://www.felixcloutier.com/x86/prefetchh
*/
class cache
{
public:
enum level : std::uint8_t
{
L1 = 1U,
L2 = 2U,
LLC = 3U
};
enum access : std::uint8_t
{
read = 0U,
write = 1U
};
/**
* Prefetches a single cache line into a given prefetch level.
*
* @tparam L Wanted cache level.
* @tparam A Access to the cache line whether read or write.
* @param address Address of the memory which should be prefetched.
*/
template <level L, access A = access::read> static void prefetch(void *address) noexcept
{
#ifdef __x86_64
if constexpr (A == access::write)
{
asm volatile("PREFETCHW (%0)\n" ::"r"(address));
}
else if constexpr (L == level::L1)
{
asm volatile("PREFETCHT1 (%0)\n" ::"r"(address));
}
else if constexpr (L == level::L2)
{
asm volatile("PREFETCHT2 (%0)\n" ::"r"(address));
}
else
{
asm volatile("PREFETCHNTA (%0)\n" ::"r"(address));
}
#elif defined(__aarch64__)
if constexpr (L == L1)
{
if constexpr (A == access::read)
{
asm volatile("prfm pldl1keep, %a0\n" : : "p"(address));
}
else
{
asm volatile("prfm pstl1keep, %a0\n" : : "p"(address));
}
}
else if constexpr (L == L2)
{
if constexpr (A == access::read)
{
asm volatile("prfm pldl2keep, %a0\n" : : "p"(address));
}
else
{
asm volatile("prfm pstl2keep, %a0\n" : : "p"(address));
}
}
else
{
if constexpr (A == access::read)
{
asm volatile("prfm pldl3keep, %a0\n" : : "p"(address));
}
else
{
asm volatile("prfm pstl3keep, %a0\n" : : "p"(address));
}
}
#endif
}
/**
* Prefetches a range of cache lines into the given cache level.
*
* @tparam L Wanted cache level.
* @tparam A Access to the cache line whether read or write.
* @param address Address of the memory which should be prefetched.
* @param size Size of the accessed memory.
*/
template <level L, access A = access::read>
static void prefetch_range(void *address, const std::uint32_t size) noexcept
{
auto addr = std::uintptr_t(address);
const auto end = addr + size;
if ((size & 1023U) == 0U)
{
for (; addr < end; addr += 1024U)
{
prefetch_range<L, 1024U, A>(reinterpret_cast<void *>(addr));
}
}
else if ((size & 511U) == 0U)
{
for (; addr < end; addr += 512U)
{
prefetch_range<L, 512U, A>(reinterpret_cast<void *>(addr));
}
}
else if ((size & 255U) == 0U)
{
for (; addr < end; addr += 256U)
{
prefetch_range<L, 256U, A>(reinterpret_cast<void *>(addr));
}
}
else if ((size & 127U) == 0U)
{
for (; addr < end; addr += 128U)
{
prefetch_range<L, 128U, A>(reinterpret_cast<void *>(addr));
}
}
else
{
for (; addr < end; addr += 64U)
{
prefetch<L, A>(reinterpret_cast<void *>(addr));
}
}
}
/**
* Prefetches a range of cache lines into the given cache level.
*
* @tparam L Wanted cache level.
* @tparam S Size of the accessed memory.
* @tparam A Access to the cache line whether read or write.
* @param address Address of the accessed memory.
*/
template <level L, std::uint32_t S, access A = access::read> static void prefetch_range(void *address) noexcept
{
static_assert(S && (!(S & (S - 1))) && "Must be power of two.");
const auto addr = std::uintptr_t(address);
if constexpr (S <= 64U)
{
prefetch<L, A>(address);
}
else if constexpr (S == 128U)
{
prefetch<L, A>(address);
prefetch<L, A>(reinterpret_cast<void *>(addr + 64U));
}
else if constexpr (S == 192U)
{
prefetch_range<L, 128U, A>(address);
prefetch<L, A>(reinterpret_cast<void *>(addr + 128U));
}
else if constexpr (S == 256U)
{
prefetch_range<L, 128U, A>(address);
prefetch_range<L, 128U, A>(reinterpret_cast<void *>(addr + 128U));
}
else if constexpr (S == 512U)
{
prefetch_range<L, 256U, A>(address);
prefetch_range<L, 256U, A>(reinterpret_cast<void *>(addr + 256U));
}
else if constexpr (S == 1024U)
{
prefetch_range<L, 512U, A>(address);
prefetch_range<L, 512U, A>(reinterpret_cast<void *>(addr + 512U));
}
else
{
prefetch_range<L, A>(address, S);
}
}
};
} // namespace mx::system

27
src/mx/system/cpuid.h Normal file
View File

@@ -0,0 +1,27 @@
#pragma once
#include <cstdint>
namespace mx::system {
/**
* Encapsulates methods for checking features
* of the system by calling cpuid instruction.
*/
class cpuid
{
public:
/**
* @return True, when restricted transactional memory
* is enabled.
*/
static bool is_rtm_provided()
{
std::uint32_t eax = 0x7;
std::uint32_t ebx;
std::uint32_t ecx = 0x0;
asm volatile("cpuid" : "=b"(ebx) : "a"(eax), "c"(ecx));
return ebx & 0b100000000000;
}
};
} // namespace mx::system

View File

@@ -0,0 +1,36 @@
#pragma once
#include <fstream>
namespace mx::system {
/**
* Encapsulates functionality of the (Linux) system.
*/
class Environment
{
public:
/**
* @return True, if NUMA balancing is enabled by the system.
*/
static bool is_numa_balancing_enabled()
{
std::ifstream numa_balancing_file("/proc/sys/kernel/numa_balancing");
auto is_enabled = std::int32_t{};
if (numa_balancing_file >> is_enabled)
{
return !(is_enabled == 0);
}
return true;
}
static constexpr auto is_sse2()
{
#ifdef USE_SSE2
return true;
#else
return false;
#endif
}
};
} // namespace mx::system

36
src/mx/system/thread.h Normal file
View File

@@ -0,0 +1,36 @@
#pragma once
#include <chrono>
#include <cstdint>
#include <iostream>
#include <thread>
namespace mx::system {
/**
* Encapsulates methods for thread access.
*/
class thread
{
public:
/**
* Pins a thread to a given core.
*
* @param thread Thread to pin.
* @param core_id Core where the thread should be pinned.
* @return True, when pinning was successful.
*/
static bool pin(std::thread &thread, const std::uint16_t core_id)
{
cpu_set_t cpu_set;
CPU_ZERO(&cpu_set);
CPU_SET(core_id, &cpu_set);
if (pthread_setaffinity_np(thread.native_handle(), sizeof(cpu_set_t), &cpu_set) != 0)
{
std::cerr << "Can not pin thread!" << std::endl;
return false;
}
return true;
}
};
} // namespace mx::system

40
src/mx/system/topology.h Normal file
View File

@@ -0,0 +1,40 @@
#pragma once
#include <algorithm>
#include <cstdint>
#include <numa.h>
#include <sched.h>
#include <thread>
namespace mx::system {
/**
* Encapsulates methods for retrieving information
* about the hardware landscape.
*/
class topology
{
public:
/**
* @return Core where the caller is running.
*/
static std::uint16_t core_id() { return std::uint16_t(sched_getcpu()); }
/**
* Reads the NUMA region identifier of the given core.
*
* @param core_id Id of the core.
* @return Id of the NUMA region the core stays in.
*/
static std::uint8_t node_id(const std::uint16_t core_id) { return std::max(numa_node_of_cpu(core_id), 0); }
/**
* @return The greatest NUMA region identifier.
*/
static std::uint8_t max_node_id() { return std::uint8_t(numa_max_node()); }
/**
* @return Number of available cores.
*/
static std::uint16_t count_cores() { return std::uint16_t(std::thread::hardware_concurrency()); }
};
} // namespace mx::system

186
src/mx/tasking/channel.h Normal file
View File

@@ -0,0 +1,186 @@
#pragma once
#include "channel_occupancy.h"
#include "load.h"
#include "task.h"
#include "task_buffer.h"
#include <array>
#include <cassert>
#include <cstdint>
#include <mx/memory/config.h>
#include <mx/system/cache.h>
#include <mx/util/mpsc_queue.h>
#include <mx/util/queue.h>
namespace mx::tasking {
/**
* The channel is the central data structure where tasks are scheduled to pass tasks
* between worker threads. Every worker thread owns his own channel, where tasks
* are popped by only this channel. Every worker thread (or task) can push further
* tasks to the channel.
*
* Every channel consists of a handful of queues, where the tasks are really stored,
* different queues have different guarantees regarding concurrency and locality.
*
* In addition, every channel has its own buffer, where tasks are transferred from the
* queues. If the buffer is empty, the worker thread will fill it with tasks from backend
* queues.
*
* The buffer enables the worker thread to have a view to tasks that are ready for execution;
* this is used e.g. for prefetching.
*/
class Channel
{
public:
constexpr Channel(const std::uint16_t id, const std::uint8_t numa_node_id,
const std::uint8_t prefetch_distance) noexcept
: _remote_queues({}), _local_queues({}), _task_buffer(prefetch_distance), _id(id), _numa_node_id(numa_node_id)
{
}
~Channel() noexcept = default;
/**
* @return Identifier of the channel.
*/
[[nodiscard]] std::uint16_t id() const noexcept { return _id; }
/**
* @return The next task to be executed.
*/
TaskInterface *next() noexcept { return _task_buffer.next(); }
/**
* Schedules the task to thread-safe queue with regard to the NUMA region
* of the producer. Producer of different NUMA regions should not share
* a single queue.
* @param task Task to be scheduled.
* @param numa_node_id NUMA region of the producer.
*/
void push_back_remote(TaskInterface *task, const std::uint8_t numa_node_id) noexcept
{
_remote_queues[task->priority()][numa_node_id].push_back(task);
}
/**
* Schedules a task to the local queue, which is not thread-safe. Only
* the channel owner should spawn tasks this way.
* @param task Task to be scheduled.
*/
void push_back_local(TaskInterface *task) noexcept { _local_queues[task->priority()].push_back(task); }
/**
* Fill the task buffer with tasks from the backend queues.
* @return Size of the buffer after filling it.
*/
std::uint16_t fill() noexcept
{
// Fill with normal prioritized.
auto size = fill<priority::normal>(_task_buffer.available_slots());
// Fill with low prioritized.
if (this->_task_buffer.empty())
{
size = fill<priority::low>(config::task_buffer_size());
}
return size;
}
/**
* Fills the task buffer with tasks scheduled with a given priority.
*
* @tparam P Priority of the tasks.
* @return Size of the task buffer after filling.
*/
template <priority P> std::uint16_t fill() noexcept { return fill<P>(_task_buffer.available_slots()); }
/**
* @return Number of tasks available in the buffer and ready for execution.
*/
[[nodiscard]] std::uint16_t size() const noexcept { return _task_buffer.size(); }
/**
* @return True, when the task buffer is empty. Backend queues may be have tasks.
*/
[[nodiscard]] bool empty() const noexcept { return _task_buffer.empty(); }
/**
* Adds usage prediction of a resource to this channel.
* @param usage Predicted usage.
*/
void predict_usage(const resource::hint::expected_access_frequency usage) noexcept { _occupancy.predict(usage); }
/**
* Updates the usage prediction of this channel.
* @param old_prediction So far predicted usage.
* @param new_prediction New predicted usage.
*/
void modify_predicted_usage(const resource::hint::expected_access_frequency old_prediction,
const resource::hint::expected_access_frequency new_prediction) noexcept
{
_occupancy.revoke(old_prediction);
_occupancy.predict(new_prediction);
}
/**
* @return Aggregated predicted usage.
*/
[[nodiscard]] resource::hint::expected_access_frequency predicted_usage() const noexcept
{
return static_cast<resource::hint::expected_access_frequency>(_occupancy);
}
/**
* @return True, whenever min. one prediction was "excessive".
*/
[[nodiscard]] bool has_excessive_usage_prediction() const noexcept
{
return _occupancy.has_excessive_usage_prediction();
}
private:
// Backend queues for multiple produces in different NUMA regions and different priorities,
alignas(64)
std::array<std::array<util::MPSCQueue<TaskInterface>, memory::config::max_numa_nodes()>, 2> _remote_queues{};
// Backend queues for a single producer (owning worker thread) and different priorities.
alignas(64) std::array<util::Queue<TaskInterface>, 2> _local_queues{};
// Buffer for ready-to-execute tasks.
alignas(64) TaskBuffer<config::task_buffer_size()> _task_buffer;
// Id of this channel.
const std::uint16_t _id;
// NUMA id of the worker thread owning this channel.
const std::uint8_t _numa_node_id;
// Holder of resource predictions of this channel.
alignas(64) ChannelOccupancy _occupancy{};
/**
* Fills the task buffer with tasks scheduled with a given priority.
*
* @tparam P Priority.
* @param available Number of maximal tasks to fill the task buffer.
* @return Size of the task buffer after filling.
*/
template <priority P> std::uint16_t fill(std::uint16_t available) noexcept
{
// 1) Fill up from the local queue.
available -= _task_buffer.fill(_local_queues[P], available);
if (available > 0U)
{
// 2) Fill up from remote queues; start with the NUMA-local one.
for (auto i = 0U; i < _remote_queues[P].max_size(); ++i)
{
const auto numa_node_id = (_numa_node_id + i) & (_remote_queues[P].max_size() - 1U);
available -= _task_buffer.fill(_remote_queues[P][numa_node_id], available);
}
}
return _task_buffer.max_size() - available;
}
};
} // namespace mx::tasking

View File

@@ -0,0 +1,78 @@
#pragma once
#include <array>
#include <atomic>
#include <mx/resource/resource.h>
namespace mx::tasking {
/**
* Stores usage predictions.
*/
class ChannelOccupancy
{
public:
constexpr ChannelOccupancy() = default;
~ChannelOccupancy() = default;
/**
* Adds the given predicted usage.
* @param predicted_usage Predicted usage.
*/
void predict(const resource::hint::expected_access_frequency predicted_usage) noexcept
{
_predicted_usage_counter[static_cast<std::uint8_t>(predicted_usage)].fetch_add(1, std::memory_order_relaxed);
}
/**
* Subtracts the given predicted usage.
* @param predicted_usage Predicted usage.
*/
void revoke(const resource::hint::expected_access_frequency predicted_usage) noexcept
{
_predicted_usage_counter[static_cast<std::uint8_t>(predicted_usage)].fetch_sub(1, std::memory_order_relaxed);
}
/**
* @return True, when at least one prediction was "excessive".
*/
[[nodiscard]] bool has_excessive_usage_prediction() const noexcept
{
return has_at_least_one<resource::hint::expected_access_frequency::excessive>();
}
/**
* @return The highest predicted usage.
*/
explicit operator resource::hint::expected_access_frequency() const noexcept
{
if (has_at_least_one<resource::hint::expected_access_frequency::excessive>())
{
return resource::hint::expected_access_frequency::excessive;
}
if (has_at_least_one<resource::hint::expected_access_frequency::high>())
{
return resource::hint::expected_access_frequency::high;
}
if (has_at_least_one<resource::hint::expected_access_frequency::normal>())
{
return resource::hint::expected_access_frequency::normal;
}
return resource::hint::expected_access_frequency::unused;
}
private:
// Counter of predicted usages.
std::array<std::atomic_uint64_t, 4U> _predicted_usage_counter{0U};
/**
* @return True, when at least one usage as given by the template was predicted.
*/
template <resource::hint::expected_access_frequency U>[[nodiscard]] bool has_at_least_one() const noexcept
{
return _predicted_usage_counter[static_cast<std::uint8_t>(U)].load(std::memory_order_relaxed) > 0;
}
};
} // namespace mx::tasking

33
src/mx/tasking/config.h Normal file
View File

@@ -0,0 +1,33 @@
#pragma once
namespace mx::tasking {
class config
{
public:
enum memory_reclamation_scheme
{
None = 0U,
UpdateEpochOnRead = 1U,
UpdateEpochPeriodically = 2U
};
// Maximal number of supported cores.
static constexpr auto max_cores() { return 64U; }
// Maximal size for a single task, will be used for task allocation.
static constexpr auto task_size() { return 64U; }
// The task buffer will hold a set of tasks, fetched from
// queues. This is the size of the buffer.
static constexpr auto task_buffer_size() { return 64U; }
// If enabled, will record the number of execute tasks,
// scheduled tasks, reader and writer per core and more.
static constexpr auto task_statistics() { return false; }
// If enabled, memory will be reclaimed while using optimistic
// synchronization by epoch-based reclamation. Otherwise, freeing
// memory is unsafe.
static constexpr auto memory_reclamation() { return memory_reclamation_scheme::UpdateEpochPeriodically; }
};
} // namespace mx::tasking

40
src/mx/tasking/load.h Normal file
View File

@@ -0,0 +1,40 @@
#pragma once
#include <bitset>
#include <cstdint>
namespace mx::tasking {
/**
* Persists the channel load for the last 64 requests.
*/
class Load
{
public:
constexpr Load() = default;
~Load() = default;
Load &operator+=(const bool hit) noexcept
{
_hits <<= 1;
_hits.set(0, hit);
return *this;
}
Load &operator|=(const Load &other) noexcept
{
_hits |= other._hits;
return *this;
}
/**
* @return Number of successful requests.
*/
[[nodiscard]] std::size_t count() const noexcept { return _hits.count(); }
bool operator<(const Load &other) const noexcept { return _hits.count() < other._hits.count(); }
bool operator<(const std::size_t other) const noexcept { return _hits.count() < other; }
private:
// Bitvector of the last 64 requests.
std::bitset<64> _hits{0U};
};
} // namespace mx::tasking

View File

@@ -0,0 +1,49 @@
#pragma once
#include "task.h"
#include <mx/system/cache.h>
#include <utility>
namespace mx::tasking {
/**
* A prefetch slot is part of the prefetch buffer used for task
* and resource prefetching
* A slot can contain up to one task and one resource that are
* prefetched by the channel.
*/
class PrefetchSlot
{
public:
constexpr PrefetchSlot() noexcept = default;
~PrefetchSlot() = default;
PrefetchSlot &operator=(TaskInterface *task) noexcept
{
_task = task;
if (task->has_resource_annotated())
{
_resource = std::make_pair(task->annotated_resource().get(), task->annotated_resource_size());
}
return *this;
}
void operator()() noexcept
{
if (_task != nullptr)
{
system::cache::prefetch<system::cache::L1, system::cache::write>(_task);
_task = nullptr;
}
if (std::get<0>(_resource) != nullptr)
{
system::cache::prefetch_range<system::cache::LLC, system::cache::read>(std::get<0>(_resource),
std::get<1>(_resource));
std::get<0>(_resource) = nullptr;
}
}
private:
void *_task = nullptr;
std::pair<void *, std::uint16_t> _resource = std::make_pair(nullptr, 0U);
};
} // namespace mx::tasking

View File

@@ -0,0 +1,107 @@
#include "profiling_task.h"
#include <fstream>
#include <json.hpp>
#include <mx/memory/global_heap.h>
#include <mx/tasking/runtime.h>
using namespace mx::tasking::profiling;
ProfilingTask::ProfilingTask(mx::util::maybe_atomic<bool> &is_running, mx::tasking::Channel &channel)
: _is_running(is_running), _channel(channel)
{
_idle_ranges.reserve(1 << 16);
}
mx::tasking::TaskResult ProfilingTask::execute(const std::uint16_t /*core_id*/, const std::uint16_t /*channel_id*/)
{
IdleRange range;
while (this->_is_running && this->_channel.empty())
{
this->_channel.fill();
}
range.stop();
if (range.nanoseconds() > 10U)
{
this->_idle_ranges.emplace_back(std::move(range));
}
if (this->_is_running)
{
return tasking::TaskResult::make_succeed(this);
}
return tasking::TaskResult::make_null();
}
Profiler::~Profiler()
{
for (auto *task : this->_tasks)
{
delete task;
}
}
void Profiler::profile(const std::string &profiling_output_file)
{
for (auto *task : this->_tasks)
{
delete task;
}
this->_tasks.clear();
this->_profiling_output_file.emplace(profiling_output_file);
this->_start = std::chrono::steady_clock::now();
}
void Profiler::profile(util::maybe_atomic<bool> &is_running, Channel &channel)
{
auto *task =
new (memory::GlobalHeap::allocate_cache_line_aligned(sizeof(ProfilingTask))) ProfilingTask(is_running, channel);
task->annotate(channel.id());
task->annotate(mx::tasking::priority::low);
this->_tasks.push_back(task);
mx::tasking::runtime::spawn(*task);
}
void Profiler::stop()
{
const auto end = std::chrono::steady_clock::now();
const auto end_relative_nanoseconds =
std::chrono::duration_cast<std::chrono::nanoseconds>(end - this->_start).count();
if (this->_profiling_output_file.has_value())
{
auto output = nlohmann::json{};
for (auto *task : this->_tasks)
{
if (task != nullptr && task->idle_ranges().empty() == false)
{
nlohmann::json channel_output;
channel_output["channel"] = task->annotated_channel();
nlohmann::json ranges{};
for (const auto &range : task->idle_ranges())
{
const auto normalized = range.normalize(this->_start);
auto normalized_json = nlohmann::json{};
normalized_json["s"] = std::get<0>(normalized);
normalized_json["e"] = std::get<1>(normalized);
ranges.push_back(std::move(normalized_json));
}
channel_output["ranges"] = std::move(ranges);
output.push_back(std::move(channel_output));
}
}
nlohmann::json end_output;
end_output["end"] = end_relative_nanoseconds;
output.push_back(std::move(end_output));
std::ofstream out_file{this->_profiling_output_file.value()};
out_file << output.dump() << std::endl;
}
this->_profiling_output_file = std::nullopt;
}

View File

@@ -0,0 +1,119 @@
#pragma once
#include <chrono>
#include <mx/tasking/channel.h>
#include <mx/tasking/task.h>
#include <mx/util/maybe_atomic.h>
#include <optional>
#include <utility>
#include <vector>
namespace mx::tasking::profiling {
/**
* Time range (from -- to) for idled time of a single channel.
*/
class IdleRange
{
public:
IdleRange() : _start(std::chrono::steady_clock::now()) {}
IdleRange(IdleRange &&) = default;
~IdleRange() = default;
/**
* Sets the end of the idle range to the current time.
*/
void stop() noexcept { _end = std::chrono::steady_clock::now(); }
/**
* @return Number of nanoseconds idled.
*/
[[nodiscard]] std::uint64_t nanoseconds() const noexcept
{
return std::chrono::duration_cast<std::chrono::nanoseconds>(_end - _start).count();
}
/**
* Normalizes this range with respect to a given point in time.
* @param global_start Point in time to normalize.
* @return Pair of (start, stop) normalized to the given time point.
*/
[[nodiscard]] std::pair<std::uint64_t, std::uint64_t> normalize(
const std::chrono::steady_clock::time_point global_start) const noexcept
{
return {
std::chrono::duration_cast<std::chrono::nanoseconds>(_start - global_start).count(),
std::chrono::duration_cast<std::chrono::nanoseconds>(_end - global_start).count(),
};
}
private:
// Start of idling.
std::chrono::steady_clock::time_point _start;
// End of idling.
std::chrono::steady_clock::time_point _end;
};
/**
* Task, that is scheduled with low priority and gets CPU time,
* whenever no other task is available.
* Every time the task gets executed, it will record the time range,
* until the channel has new tasks for execution.
*/
class ProfilingTask final : public TaskInterface
{
public:
ProfilingTask(util::maybe_atomic<bool> &is_running, Channel &channel);
~ProfilingTask() override = default;
TaskResult execute(std::uint16_t core_id, std::uint16_t channel_id) override;
[[nodiscard]] const std::vector<IdleRange> &idle_ranges() const noexcept { return _idle_ranges; }
private:
util::maybe_atomic<bool> &_is_running;
Channel &_channel;
std::vector<IdleRange> _idle_ranges;
};
/**
* Schedules the idle/profiling task to every channel and
* writes the memory to a given file.
*/
class Profiler
{
public:
Profiler() noexcept = default;
~Profiler();
/**
* Enable profiling and set the result file.
* @param profiling_output_file File, where results should be written to.
*/
void profile(const std::string &profiling_output_file);
/**
* Schedules a new idle/profile task to the given channel.
* @param is_running Reference to the schedulers "is_running" flag.
* @param channel Channel to spawn the task to.
*/
void profile(util::maybe_atomic<bool> &is_running, Channel &channel);
/**
* Normalizes all time ranges and writes them to the specified
* file.
*/
void stop();
private:
// File to write the output.
std::optional<std::string> _profiling_output_file{std::nullopt};
// Time point of the runtime start.
std::chrono::steady_clock::time_point _start;
// List of all idle/profile tasks.
std::vector<ProfilingTask *> _tasks;
};
} // namespace mx::tasking::profiling

View File

@@ -0,0 +1,92 @@
#pragma once
#include <array>
#include <cstdint>
#include <mx/memory/global_heap.h>
#include <mx/tasking/config.h>
#include <mx/util/aligned_t.h>
namespace mx::tasking::profiling {
/**
* Collector for tasking statistics (scheduled tasks, executed tasks, ...).
*/
class Statistic
{
public:
using counter_line_t = util::aligned_t<std::array<std::uint64_t, 7>>;
enum Counter : std::uint8_t
{
Scheduled,
ScheduledOnChannel,
ScheduledOffChannel,
Executed,
ExecutedReader,
ExecutedWriter,
Fill
};
explicit Statistic(const std::uint16_t count_channels) noexcept : _count_channels(count_channels)
{
this->_counter = new (memory::GlobalHeap::allocate_cache_line_aligned(sizeof(counter_line_t) * count_channels))
counter_line_t[count_channels];
std::memset(static_cast<void *>(this->_counter), 0, sizeof(counter_line_t) * count_channels);
}
Statistic(const Statistic &) = delete;
~Statistic() noexcept { delete[] this->_counter; }
Statistic &operator=(const Statistic &) = delete;
/**
* Clears all collected statistics.
*/
void clear() noexcept
{
std::memset(static_cast<void *>(this->_counter), 0, sizeof(counter_line_t) * this->_count_channels);
}
/**
* Increment the template-given counter by one for the given channel.
* @param channel_id Channel to increment the statistics for.
*/
template <Counter C> void increment(const std::uint16_t channel_id) noexcept
{
_counter[channel_id].value()[static_cast<std::uint8_t>(C)] += 1;
}
/**
* Read the given counter for a given channel.
* @param counter Counter to read.
* @param channel_id Channel the counter is for.
* @return Value of the counter.
*/
[[nodiscard]] std::uint64_t get(const Counter counter, const std::uint16_t channel_id) const noexcept
{
return _counter[channel_id].value()[static_cast<std::uint8_t>(counter)];
}
/**
* Read and aggregate the counter for all channels.
* @param counter Counter to read.
* @return Value of the counter for all channels.
*/
[[nodiscard]] std::uint64_t get(const Counter counter) const noexcept
{
std::uint64_t sum = 0U;
for (auto i = 0U; i < _count_channels; ++i)
{
sum += get(counter, i);
}
return sum;
}
private:
// Number of channels to monitor.
const std::uint16_t _count_channels;
// Memory for storing the counter.
counter_line_t *_counter = nullptr;
};
} // namespace mx::tasking::profiling

265
src/mx/tasking/runtime.h Normal file
View File

@@ -0,0 +1,265 @@
#pragma once
#include "scheduler.h"
#include "task.h"
#include <iostream>
#include <memory>
#include <mx/memory/dynamic_size_allocator.h>
#include <mx/memory/fixed_size_allocator.h>
#include <mx/memory/task_allocator_interface.h>
#include <mx/resource/builder.h>
#include <mx/util/core_set.h>
#include <utility>
namespace mx::tasking {
/**
* The runtime is the central access structure to MxTasking.
* Here, we can initialize MxTasking, spawn and allocate tasks, allocate
* data objects.
*/
class runtime
{
public:
/**
* Initializes the MxTasking runtime.
* @param core_set Cores, where the runtime should execute on.
* @param prefetch_distance Distance for prefetching.
* @param channels_per_core Number of channels per core (more than one enables channel-stealing).
* @param use_system_allocator Should we use the systems malloc interface or our allocator?
* @return True, when the runtime was started successfully.
*/
static bool init(const util::core_set &core_set, const std::uint16_t prefetch_distance,
const bool use_system_allocator)
{
// Are we ready to re-initialize the scheduler?
if (_scheduler != nullptr && _scheduler->is_running())
{
return false;
}
// Create a new resource allocator.
if (_resource_allocator == nullptr)
{
_resource_allocator.reset(new (memory::GlobalHeap::allocate_cache_line_aligned(
sizeof(memory::dynamic::Allocator))) memory::dynamic::Allocator());
}
else if (_resource_allocator->is_free())
{
_resource_allocator->release_allocated_memory();
_resource_allocator->initialize_empty();
}
else
{
_resource_allocator->defragment();
}
// Create a new task allocator.
if (use_system_allocator)
{
_task_allocator.reset(new (memory::GlobalHeap::allocate_cache_line_aligned(sizeof(
memory::SystemTaskAllocator<config::task_size()>))) memory::SystemTaskAllocator<config::task_size()>());
}
else
{
_task_allocator.reset(new (
memory::GlobalHeap::allocate_cache_line_aligned(sizeof(memory::fixed::Allocator<config::task_size()>)))
memory::fixed::Allocator<config::task_size()>(core_set));
}
// Create a new scheduler.
const auto need_new_scheduler = _scheduler == nullptr || *_scheduler != core_set;
if (need_new_scheduler)
{
_scheduler.reset(new (memory::GlobalHeap::allocate_cache_line_aligned(sizeof(Scheduler)))
Scheduler(core_set, prefetch_distance, *_resource_allocator));
}
else
{
_scheduler->reset();
}
// Create a new resource builder.
if (_resource_builder == nullptr || need_new_scheduler)
{
_resource_builder = std::make_unique<resource::Builder>(*_scheduler, *_resource_allocator);
}
return true;
}
/**
* Start profiling of idle times. Results will be written to the given file.
* @param output_file File for idle-time results.
*/
static void profile(const std::string &output_file) noexcept { _scheduler->profile(output_file); }
/**
* Spawns the given task.
* @param task Task to be scheduled.
* @param current_channel_id Channel, the spawn request came from.
*/
static void spawn(TaskInterface &task, const std::uint16_t current_channel_id) noexcept
{
_scheduler->schedule(task, current_channel_id);
}
/**
* Spawns the given task.
* @param task Task to be scheduled.
*/
static void spawn(TaskInterface &task) noexcept { _scheduler->schedule(task); }
/**
* @return Number of available channels.
*/
static std::uint16_t channels() noexcept { return _scheduler->count_channels(); }
/**
* Starts the runtime and suspends the starting thread until MxTasking is stopped.
*/
static void start_and_wait() { _scheduler->start_and_wait(); }
/**
* Instructs all worker threads to stop their work.
* After all worker threads are stopped, the starting
* thread will be resumed.
*/
static void stop() noexcept { _scheduler->interrupt(); }
/**
* Creates a new task.
* @param core_id Core to allocate memory from.
* @param arguments Arguments for the task.
* @return The new task.
*/
template <typename T, typename... Args> static T *new_task(const std::uint16_t core_id, Args &&... arguments)
{
static_assert(sizeof(T) <= config::task_size() && "Task must be leq defined task size.");
return new (_task_allocator->allocate(core_id)) T(std::forward<Args>(arguments)...);
}
/**
* Frees a given task.
* @param core_id Core id to return the memory to.
* @param task Task to be freed.
*/
template <typename T> static void delete_task(const std::uint16_t core_id, T *task) noexcept
{
task->~T();
_task_allocator->free(core_id, static_cast<void *>(task));
}
/**
* Creates a resource.
* @param size Size of the data object.
* @param hint Hints for allocation and scheduling.
* @param arguments Arguments for the data object.
* @return The resource pointer.
*/
template <typename T, typename... Args>
static resource::ptr new_resource(const std::size_t size, resource::hint &&hint, Args &&... arguments) noexcept
{
return _resource_builder->build<T>(size, std::move(hint), std::forward<Args>(arguments)...);
}
/**
* Creates a resource from a given pointer.
* @param object Pointer to the existing object.
* @param hint Hints for allocation and scheduling.
* @return The resource pointer.
*/
template <typename T> static resource::ptr to_resource(T *object, resource::hint &&hint) noexcept
{
return _resource_builder->build<T>(object, std::move(hint));
}
/**
* Deletes the given data object.
* @param resource Data object to be deleted.
*/
template <typename T> static void delete_resource(const resource::ptr resource) noexcept
{
_resource_builder->destroy<T>(resource);
}
static void *allocate(const std::uint8_t numa_node_id, const std::size_t alignment, const std::size_t size) noexcept
{
return _resource_allocator->allocate(numa_node_id, alignment, size);
}
static void free(void *pointer) noexcept { _resource_allocator->free(pointer); }
/**
* Updates the prediction of a data object.
* @param resource Data object, whose usage should be predicted.
* @param old_prediction Prediction so far.
* @param new_prediction New usage prediction.
*/
static void modify_predicted_usage(const resource::ptr resource,
const resource::hint::expected_access_frequency old_prediction,
const resource::hint::expected_access_frequency new_prediction) noexcept
{
_scheduler->modify_predicted_usage(resource.channel_id(), old_prediction, new_prediction);
}
/**
* ID of the NUMA region of a channel.
* @param channel_id Channel.
* @return ID of the NUMA region.
*/
static std::uint8_t numa_node_id(const std::uint16_t channel_id) noexcept
{
return _scheduler->numa_node_id(channel_id);
}
/**
* Reads the task statistics for a given counter and all channels.
* @param counter Counter to be read.
* @return Aggregated value of all channels.
*/
static std::uint64_t statistic(const profiling::Statistic::Counter counter) noexcept
{
return _scheduler->statistic(counter);
}
/**
* Reads the task statistic for a given counter on a given channel.
* @param counter Counter to be read.
* @param channel_id Channel.
* @return Value of the counter of the given channel.
*/
static std::uint64_t statistic(const profiling::Statistic::Counter counter, const std::uint16_t channel_id) noexcept
{
return _scheduler->statistic(counter, channel_id);
}
private:
// Scheduler to spawn tasks.
inline static std::unique_ptr<Scheduler> _scheduler = {nullptr};
// Allocator to allocate tasks (could be systems malloc or our Multi-level allocator).
inline static std::unique_ptr<memory::TaskAllocatorInterface> _task_allocator = {nullptr};
// Allocator to allocate resources.
inline static std::unique_ptr<memory::dynamic::Allocator> _resource_allocator = {nullptr};
// Allocator to allocate data objects.
inline static std::unique_ptr<resource::Builder> _resource_builder = {nullptr};
};
/**
* The runtime_guard initializes the runtime at initialization and starts
* the runtime when the object is deleted. This allows MxTasking to execute
* within a specific scope.
*/
class runtime_guard
{
public:
runtime_guard(const bool use_system_allocator, const util::core_set &core_set,
const std::uint16_t prefetch_distance = 0U) noexcept
{
runtime::init(core_set, prefetch_distance, use_system_allocator);
}
~runtime_guard() noexcept { runtime::start_and_wait(); }
};
} // namespace mx::tasking

View File

@@ -0,0 +1,203 @@
#include "scheduler.h"
#include <cassert>
#include <mx/memory/global_heap.h>
#include <mx/synchronization/synchronization.h>
#include <mx/system/thread.h>
#include <mx/system/topology.h>
#include <thread>
#include <vector>
using namespace mx::tasking;
Scheduler::Scheduler(const mx::util::core_set &core_set, const std::uint16_t prefetch_distance,
memory::dynamic::Allocator &resource_allocator) noexcept
: _core_set(core_set), _count_channels(core_set.size()), _worker({}), _channel_numa_node_map({0U}),
_epoch_manager(core_set.size(), resource_allocator, _is_running), _statistic(_count_channels)
{
this->_worker.fill(nullptr);
this->_channel_numa_node_map.fill(0U);
for (auto worker_id = 0U; worker_id < this->_count_channels; ++worker_id)
{
const auto core_id = this->_core_set[worker_id];
this->_channel_numa_node_map[worker_id] = system::topology::node_id(core_id);
this->_worker[worker_id] =
new (memory::GlobalHeap::allocate(this->_channel_numa_node_map[worker_id], sizeof(Worker)))
Worker(worker_id, core_id, this->_channel_numa_node_map[worker_id], this->_is_running,
prefetch_distance, this->_epoch_manager[worker_id], this->_epoch_manager.global_epoch(),
this->_statistic);
}
}
Scheduler::~Scheduler() noexcept
{
for (auto *worker : this->_worker)
{
worker->~Worker();
memory::GlobalHeap::free(worker, sizeof(Worker));
}
}
void Scheduler::start_and_wait()
{
// Create threads for worker...
std::vector<std::thread> worker_threads(this->_core_set.size() +
static_cast<std::uint16_t>(config::memory_reclamation() != config::None));
for (auto channel_id = 0U; channel_id < this->_core_set.size(); ++channel_id)
{
worker_threads[channel_id] = std::thread([this, channel_id] { this->_worker[channel_id]->execute(); });
system::thread::pin(worker_threads[channel_id], this->_worker[channel_id]->core_id());
}
// ... and epoch management (if enabled).
if constexpr (config::memory_reclamation() != config::None)
{
// In case we enable memory reclamation: Use an additional thread.
worker_threads[this->_core_set.size()] =
std::thread([this] { this->_epoch_manager.enter_epoch_periodically(); });
}
// Turning the flag on starts all worker threads to execute tasks.
this->_is_running = true;
// Wait for the worker threads to end. This will only
// reached when the _is_running flag is set to false
// from somewhere in the application.
for (auto &worker_thread : worker_threads)
{
worker_thread.join();
}
if constexpr (config::memory_reclamation() != config::None)
{
// At this point, no task will execute on any resource;
// but the epoch manager has joined, too. Therefore,
// we will reclaim all memory manually.
this->_epoch_manager.reclaim_all();
}
}
void Scheduler::schedule(TaskInterface &task, const std::uint16_t current_channel_id) noexcept
{
// Scheduling is based on the annotated resource of the given task.
if (task.has_resource_annotated())
{
const auto annotated_resource = task.annotated_resource();
const auto resource_channel_id = annotated_resource.channel_id();
// For performance reasons, we prefer the local (not synchronized) queue
// whenever possible to spawn the task. The decision is based on the
// synchronization primitive and the access mode of the task (reader/writer).
if (Scheduler::keep_task_local(task.is_readonly(), annotated_resource.synchronization_primitive(),
resource_channel_id, current_channel_id))
{
this->_worker[current_channel_id]->channel().push_back_local(&task);
if constexpr (config::task_statistics())
{
this->_statistic.increment<profiling::Statistic::ScheduledOnChannel>(current_channel_id);
}
}
else
{
this->_worker[resource_channel_id]->channel().push_back_remote(&task,
this->numa_node_id(current_channel_id));
if constexpr (config::task_statistics())
{
this->_statistic.increment<profiling::Statistic::ScheduledOffChannel>(current_channel_id);
}
}
}
// The developer assigned a fixed channel to the task.
else if (task.has_channel_annotated())
{
const auto target_channel_id = task.annotated_channel();
// For performance reasons, we prefer the local (not synchronized) queue
// whenever possible to spawn the task.
if (target_channel_id == current_channel_id)
{
this->_worker[current_channel_id]->channel().push_back_local(&task);
if constexpr (config::task_statistics())
{
this->_statistic.increment<profiling::Statistic::ScheduledOnChannel>(current_channel_id);
}
}
else
{
this->_worker[target_channel_id]->channel().push_back_remote(&task, this->numa_node_id(current_channel_id));
if constexpr (config::task_statistics())
{
this->_statistic.increment<profiling::Statistic::ScheduledOffChannel>(current_channel_id);
}
}
}
// The developer assigned a fixed NUMA region to the task.
else if (task.has_node_annotated())
{
// TODO: Select random channel @ node, based on load
assert(false && "NOT IMPLEMENTED: Task scheduling for node.");
}
// The task can run everywhere.
else
{
this->_worker[current_channel_id]->channel().push_back_local(&task);
if constexpr (config::task_statistics())
{
this->_statistic.increment<profiling::Statistic::ScheduledOnChannel>(current_channel_id);
}
}
if constexpr (config::task_statistics())
{
this->_statistic.increment<profiling::Statistic::Scheduled>(current_channel_id);
}
}
void Scheduler::schedule(TaskInterface &task) noexcept
{
if (task.has_resource_annotated())
{
const auto &annotated_resource = task.annotated_resource();
this->_worker[annotated_resource.channel_id()]->channel().push_back_remote(&task, 0U);
if constexpr (config::task_statistics())
{
this->_statistic.increment<profiling::Statistic::ScheduledOffChannel>(annotated_resource.channel_id());
}
}
else if (task.has_channel_annotated())
{
this->_worker[task.annotated_channel()]->channel().push_back_remote(&task, 0U);
if constexpr (config::task_statistics())
{
this->_statistic.increment<profiling::Statistic::ScheduledOffChannel>(task.annotated_channel());
}
}
else if (task.has_node_annotated())
{
// TODO: Select random channel @ node, based on load
assert(false && "NOT IMPLEMENTED: Task scheduling for node.");
}
else
{
assert(false && "NOT IMPLEMENTED: Task scheduling without channel.");
}
}
void Scheduler::reset() noexcept
{
this->_statistic.clear();
this->_epoch_manager.reset();
}
void Scheduler::profile(const std::string &output_file)
{
this->_profiler.profile(output_file);
for (auto i = 0U; i < this->_count_channels; ++i)
{
this->_profiler.profile(this->_is_running, this->_worker[i]->channel());
}
}

220
src/mx/tasking/scheduler.h Normal file
View File

@@ -0,0 +1,220 @@
#pragma once
#include "channel.h"
#include "task.h"
#include "worker.h"
#include <array>
#include <atomic>
#include <cassert>
#include <cstdint>
#include <iostream>
#include <memory>
#include <mx/memory/config.h>
#include <mx/memory/dynamic_size_allocator.h>
#include <mx/memory/reclamation/epoch_manager.h>
#include <mx/resource/resource.h>
#include <mx/tasking/profiling/profiling_task.h>
#include <mx/tasking/profiling/statistic.h>
#include <mx/util/core_set.h>
#include <mx/util/random.h>
#include <string>
namespace mx::tasking {
/**
* The scheduler is the central (but hidden by the runtime) data structure to spawn
* tasks between worker threads.
*/
class Scheduler
{
public:
Scheduler(const util::core_set &core_set, std::uint16_t prefetch_distance,
memory::dynamic::Allocator &resource_allocator) noexcept;
~Scheduler() noexcept;
/**
* Schedules a given task.
* @param task Task to be scheduled.
* @param current_channel_id Channel, the request came from.
*/
void schedule(TaskInterface &task, std::uint16_t current_channel_id) noexcept;
/**
* Schedules a given task.
* @param task Task to be scheduled.
*/
void schedule(TaskInterface &task) noexcept;
/**
* Starts all worker threads and waits until they finish.
*/
void start_and_wait();
/**
* Interrupts the worker threads. They will finish after executing
* their current tasks.
*/
void interrupt() noexcept
{
_is_running = false;
this->_profiler.stop();
}
/**
* @return Core set of this instance.
*/
[[nodiscard]] const util::core_set &core_set() const noexcept { return _core_set; }
/**
* @return True, when the worker threads are not interrupted.
*/
[[nodiscard]] bool is_running() const noexcept { return _is_running; }
/**
* @return The global epoch manager.
*/
[[nodiscard]] memory::reclamation::EpochManager &epoch_manager() noexcept { return _epoch_manager; }
/**
* @return Number of all channels.
*/
[[nodiscard]] std::uint16_t count_channels() const noexcept { return _count_channels; }
/**
* Reads the NUMA region of a given channel/worker thread.
* @param channel_id Channel.
* @return NUMA region of the given channel.
*/
[[nodiscard]] std::uint8_t numa_node_id(const std::uint16_t channel_id) const noexcept
{
return _channel_numa_node_map[channel_id];
}
/**
* Predicts usage for a given channel.
* @param channel_id Channel.
* @param usage Usage to predict.
*/
void predict_usage(const std::uint16_t channel_id, const resource::hint::expected_access_frequency usage) noexcept
{
_worker[channel_id]->channel().predict_usage(usage);
}
/**
* Updates the predicted usage of a channel.
* @param channel_id Channel.
* @param old_prediction So far predicted usage.
* @param new_prediction New prediction.
*/
void modify_predicted_usage(const std::uint16_t channel_id,
const resource::hint::expected_access_frequency old_prediction,
const resource::hint::expected_access_frequency new_prediction) noexcept
{
_worker[channel_id]->channel().modify_predicted_usage(old_prediction, new_prediction);
}
/**
* @param channel_id Channel.
* @return True, when a least one usage was predicted to be "excessive" for the given channel.
*/
[[nodiscard]] bool has_excessive_usage_prediction(const std::uint16_t channel_id) const noexcept
{
return _worker[channel_id]->channel().has_excessive_usage_prediction();
}
/**
* Resets the statistics.
*/
void reset() noexcept;
/**
* Aggregates the counter for all cores.
* @param counter Statistic counter.
* @return Aggregated value.
*/
[[nodiscard]] std::uint64_t statistic([[maybe_unused]] const profiling::Statistic::Counter counter) const noexcept
{
if constexpr (config::task_statistics())
{
return this->_statistic.get(counter);
}
else
{
return 0U;
}
}
/**
* Reads the statistics for a given counter on a given channel.
* @param counter Statistic counter.
* @param channel_id Channel.
* @return Value of the counter for the given channel.
*/
[[nodiscard]] std::uint64_t statistic([[maybe_unused]] const profiling::Statistic::Counter counter,
[[maybe_unused]] const std::uint16_t channel_id) const noexcept
{
if constexpr (config::task_statistics())
{
return this->_statistic.get(counter, channel_id);
}
else
{
return 0U;
}
}
/**
* Starts profiling of idle times and specifies the results file.
* @param output_file File to write idle times after stopping MxTasking.
*/
void profile(const std::string &output_file);
bool operator==(const util::core_set &cores) const noexcept { return _core_set == cores; }
bool operator!=(const util::core_set &cores) const noexcept { return _core_set != cores; }
private:
// Cores to run the worker threads on.
const util::core_set _core_set;
// Number of all channels.
const std::uint16_t _count_channels;
// Flag for the worker threads. If false, the worker threads will stop.
// This is atomic for hardware that does not guarantee atomic reads/writes of booleans.
alignas(64) util::maybe_atomic<bool> _is_running{false};
// All initialized workers.
alignas(64) std::array<Worker *, config::max_cores()> _worker{nullptr};
// Map of channel id to NUMA region id.
alignas(64) std::array<std::uint8_t, config::max_cores()> _channel_numa_node_map{0U};
// Epoch manager for memory reclamation,
alignas(64) memory::reclamation::EpochManager _epoch_manager;
// Profiler for task statistics.
profiling::Statistic _statistic;
// Profiler for idle times.
profiling::Profiler _profiler{};
/**
* Make a decision whether a task should be scheduled to the local
* channel or a remote.
*
* @param is_readonly Access mode of the task.
* @param primitive The synchronization primitive of the task annotated resource.
* @param resource_channel_id Channel id of the task annotated resource.
* @param current_channel_id Channel id where the spawn() operation is called.
* @return True, if the task should be scheduled local.
*/
[[nodiscard]] static inline bool keep_task_local(const bool is_readonly, const synchronization::primitive primitive,
const std::uint16_t resource_channel_id,
const std::uint16_t current_channel_id)
{
return (resource_channel_id == current_channel_id) ||
(is_readonly && primitive != synchronization::primitive::ScheduleAll) ||
(primitive != synchronization::primitive::None && primitive != synchronization::primitive::ScheduleAll &&
primitive != synchronization::primitive::ScheduleWriter);
}
};
} // namespace mx::tasking

208
src/mx/tasking/task.h Normal file
View File

@@ -0,0 +1,208 @@
#pragma once
#include "config.h"
#include "task_stack.h"
#include <bitset>
#include <cstdint>
#include <functional>
#include <mx/resource/resource.h>
#include <variant>
namespace mx::tasking {
enum priority : std::uint8_t
{
low = 0,
normal = 1
};
class TaskInterface;
class TaskResult
{
public:
static TaskResult make_succeed(TaskInterface *successor_task) noexcept { return TaskResult{successor_task, false}; }
static TaskResult make_remove() noexcept { return TaskResult{nullptr, true}; }
static TaskResult make_succeed_and_remove(TaskInterface *successor_task) noexcept
{
return TaskResult{successor_task, true};
}
static TaskResult make_null() noexcept { return TaskResult{nullptr, false}; }
constexpr TaskResult() = default;
~TaskResult() = default;
TaskResult &operator=(const TaskResult &) = default;
explicit operator TaskInterface *() const noexcept { return _successor_task; }
[[nodiscard]] bool is_remove() const noexcept { return _remove_task; }
[[nodiscard]] bool has_successor() const noexcept { return _successor_task != nullptr; }
private:
constexpr TaskResult(TaskInterface *successor_task, const bool remove) noexcept
: _successor_task(successor_task), _remove_task(remove)
{
}
TaskInterface *_successor_task = nullptr;
bool _remove_task = false;
};
/**
* The task is the central execution unit of mxtasking.
* Every task that should be executed has to derive
* from this class.
*/
class TaskInterface
{
public:
using channel = std::uint16_t;
using node = std::uint8_t;
using resource_and_size = std::pair<mx::resource::ptr, std::uint16_t>;
constexpr TaskInterface() = default;
virtual ~TaskInterface() = default;
/**
* Will be executed by a worker when the task gets CPU time.
*
* @param core_id (System-)ID of the core, the task is executed on.
* @param channel_id Channel ID the task is executed on.
* @return Pointer to the follow up task.
*/
virtual TaskResult execute(std::uint16_t core_id, std::uint16_t channel_id) = 0;
/**
* Annotate the task with a resource the task will work on.
*
* @param resource Pointer to the resource.
* @param size Size of the resource (that will be prefetched).
*/
void annotate(const mx::resource::ptr resource_, const std::uint16_t size) noexcept
{
_annotation.target = std::make_pair(resource_, size);
}
/**
* Annotate the task with a desired channel the task should be executed on.
*
* @param channel_id ID of the channel.
*/
void annotate(const channel channel_id) noexcept { _annotation.target = channel_id; }
/**
* Annotate the task with a desired NUMA node id the task should executed on.
*
* @param node_id ID of the NUMA node.
*/
void annotate(const node node_id) noexcept { _annotation.target = node_id; }
/**
* Annotate the task with a run priority (low, normal, high).
*
* @param priority_ Priority the task should run with.
*/
void annotate(const priority priority_) noexcept { _annotation.priority = priority_; }
/**
* Annotate the task whether it is a reading or writing task.
*
* @param is_readonly True, when the task is read only (false by default).
*/
void is_readonly(const bool is_readonly) noexcept { _annotation.is_readonly = is_readonly; }
/**
* @return The annotated resource.
*/
[[nodiscard]] mx::resource::ptr annotated_resource() const noexcept
{
return std::get<0>(std::get<resource_and_size>(_annotation.target));
}
/**
* @return The annotated resource size.
*/
[[nodiscard]] std::uint16_t annotated_resource_size() const noexcept
{
return std::get<1>(std::get<resource_and_size>(_annotation.target));
}
/**
* @return The annotated channel.
*/
[[nodiscard]] channel annotated_channel() const noexcept { return std::get<channel>(_annotation.target); }
/**
* @return The annotated NUMA node id.
*/
[[nodiscard]] node annotated_node() const noexcept { return std::get<node>(_annotation.target); }
/**
* @return Annotated priority.
*/
[[nodiscard]] enum priority priority() const noexcept { return _annotation.priority; }
/**
* @return True, when the task is a read only task.
*/
[[nodiscard]] bool is_readonly() const noexcept { return _annotation.is_readonly; }
/**
* @return True, when the task has a resource annotated.
*/
[[nodiscard]] bool has_resource_annotated() const noexcept
{
return std::holds_alternative<resource_and_size>(_annotation.target);
}
/**
* @return True, when the task has a channel annotated.
*/
[[nodiscard]] bool has_channel_annotated() const noexcept
{
return std::holds_alternative<channel>(_annotation.target);
}
/**
* @return True, when the task has a NUMA node annotated.
*/
[[nodiscard]] bool has_node_annotated() const noexcept { return std::holds_alternative<node>(_annotation.target); }
/**
* @return Pointer to the next task in spawn queue.
*/
[[nodiscard]] TaskInterface *next() const noexcept { return _next; }
/**
* Set the next task for scheduling.
* @param next Task scheduled after this task.
*/
void next(TaskInterface *next) noexcept { _next = next; }
private:
/**
* Annotation of a task.
*/
class annotation
{
public:
constexpr annotation() noexcept = default;
~annotation() = default;
// Is the task just reading?
bool is_readonly{false};
// Priority of a task.
enum priority priority
{
priority::normal
};
// Target the task will run on.
std::variant<channel, node, resource_and_size, bool> target{false};
} __attribute__((packed));
// Pointer for next task in queue.
TaskInterface *_next{nullptr};
// Tasks annotations.
annotation _annotation;
};
} // namespace mx::tasking

View File

@@ -0,0 +1,163 @@
#pragma once
#include "load.h"
#include "prefetch_slot.h"
#include "task.h"
#include <array>
#include <cstdint>
#include <mx/system/cache.h>
#include <utility>
namespace mx::tasking {
/**
* The task buffer holds tasks that are ready to execute.
* The buffer is realized as a ring buffer with a fixed size.
* All empty slots are null pointers.
*/
template <std::size_t S> class TaskBuffer
{
private:
class Slot
{
public:
constexpr Slot() noexcept = default;
~Slot() noexcept = default;
void task(TaskInterface *task) noexcept { _task = task; }
[[nodiscard]] TaskInterface *consume_task() noexcept { return std::exchange(_task, nullptr); }
void prefetch() noexcept { _prefetch_slot(); }
void prefetch(TaskInterface *task) noexcept { _prefetch_slot = task; }
bool operator==(std::nullptr_t) const noexcept { return _task == nullptr; }
bool operator!=(std::nullptr_t) const noexcept { return _task != nullptr; }
private:
TaskInterface *_task{nullptr};
PrefetchSlot _prefetch_slot{};
};
public:
constexpr explicit TaskBuffer(const std::uint8_t prefetch_distance) noexcept : _prefetch_distance(prefetch_distance)
{
}
~TaskBuffer() noexcept = default;
/**
* @return True, when the buffer is empty.
*/
[[nodiscard]] bool empty() const noexcept { return _buffer[_head] == nullptr; }
/**
* @return Number of tasks in the buffer.
*/
[[nodiscard]] std::uint16_t size() const noexcept
{
return _tail >= _head ? (_tail - _head) : (S - (_head - _tail));
}
/**
* @return Number of maximal tasks of the buffer.
*/
constexpr auto max_size() const noexcept { return S; }
/**
* @return Number of free slots.
*/
[[nodiscard]] std::uint16_t available_slots() const noexcept { return S - size(); }
/**
* @return The next task in the buffer; the slot will be available after.
*/
TaskInterface *next() noexcept;
/**
* Takes out tasks from the given queue and inserts them into the buffer.
* @param from_queue Queue to take tasks from.
* @param count Number of maximal tasks to take out of the queue.
* @return Number of retrieved tasks.
*/
template <class Q> std::uint16_t fill(Q &from_queue, std::uint16_t count) noexcept;
private:
// Prefetch distance.
const std::uint8_t _prefetch_distance;
// Index of the first element in the buffer.
std::uint16_t _head{0U};
// Index of the last element in the buffer.
std::uint16_t _tail{0U};
// Array with task-slots.
std::array<Slot, S> _buffer{};
/**
* Normalizes the index with respect to the size.
* @param index Index.
* @return Normalized index.
*/
static std::uint16_t normalize(const std::uint16_t index) noexcept { return index & (S - 1U); }
/**
* Normalizes the index backwards with respect to the given offset.
* @param index Index.
* @param offset Offset to index.
* @return Normalized index.
*/
static std::uint16_t normalize_backward(const std::uint16_t index, const std::uint16_t offset) noexcept
{
return index >= offset ? index - offset : S - (offset - index);
}
};
template <std::size_t S> TaskInterface *TaskBuffer<S>::next() noexcept
{
auto &slot = this->_buffer[this->_head];
if (slot != nullptr)
{
slot.prefetch();
this->_head = TaskBuffer<S>::normalize(this->_head + 1U);
return slot.consume_task();
}
return nullptr;
}
template <std::size_t S>
template <class Q>
std::uint16_t TaskBuffer<S>::fill(Q &from_queue, const std::uint16_t count) noexcept
{
if (count == 0U || from_queue.empty())
{
return 0U;
}
const auto size = S - count;
const auto is_prefetching = this->_prefetch_distance > 0U;
auto prefetch_tail = TaskBuffer<S>::normalize_backward(this->_tail, this->_prefetch_distance);
for (auto i = 0U; i < count; ++i)
{
auto *task = static_cast<TaskInterface *>(from_queue.pop_front());
if (task == nullptr)
{
return i;
}
// Schedule prefetch instruction <prefetch_distance> slots before.
if (is_prefetching && (size + i) >= this->_prefetch_distance)
{
this->_buffer[prefetch_tail].prefetch(task);
}
// Schedule task.
this->_buffer[this->_tail].task(task);
// Increment tail.
this->_tail = TaskBuffer<S>::normalize(this->_tail + 1U);
prefetch_tail = TaskBuffer<S>::normalize(prefetch_tail + 1U);
}
return count;
}
} // namespace mx::tasking

162
src/mx/tasking/task_stack.h Normal file
View File

@@ -0,0 +1,162 @@
#pragma once
#include "config.h"
#include <array>
#include <cstddef>
#include <cstring>
#include <mx/system/environment.h>
#ifdef USE_SSE2
#include <emmintrin.h>
#endif
namespace mx::tasking {
/**
* Stack to save/restore tasks before/after optimistic synchronization.
* In case of failed read, the task will be restored to re-run.
*/
class TaskInterface;
class TaskStack
{
public:
constexpr TaskStack() : _data({}) { _data.fill(std::byte{'\0'}); }
~TaskStack() = default;
/**
* Saves the full task on the stack.
* @param task Task to save.
*/
void save(const TaskInterface *task) noexcept
{
if constexpr (system::Environment::is_sse2() && (config::task_size() == 64U || config::task_size() == 128U))
{
TaskStack::memcpy_simd<config::task_size()>(_data.data(), static_cast<const void *>(task));
}
else if constexpr (config::task_size() == 64U || config::task_size() == 128U)
{
TaskStack::memcpy_tiny<config::task_size()>(_data.data(), static_cast<const void *>(task));
}
else
{
std::memcpy(_data.data(), static_cast<const void *>(task), config::task_size());
}
}
/**
* Restores the full task from the stack.
* @param task Task to restore.
*/
void restore(TaskInterface *task) const noexcept
{
if constexpr (system::Environment::is_sse2() && (config::task_size() == 64U || config::task_size() == 128U))
{
TaskStack::memcpy_simd<config::task_size()>(static_cast<void *>(task), _data.data());
}
else if constexpr (config::task_size() == 64U || config::task_size() == 128U)
{
TaskStack::memcpy_tiny<config::task_size()>(static_cast<void *>(task), _data.data());
}
else
{
std::memcpy(static_cast<void *>(task), _data.data(), config::task_size());
}
}
/**
* Saves some data on the stack.
*
* @param index Index where to store.
* @param data Data to store.
*/
template <typename T> void store(const std::uint16_t index, const T &data)
{
*reinterpret_cast<T *>(&_data[index]) = data;
}
/**
* Restores some data from the stack.
*
* @param index Index where the data is stored.
* @return The restored data.
*/
template <typename T> const T *read(const std::uint16_t index) const
{
return reinterpret_cast<const T *>(&_data[index]);
}
private:
// Data to store tasks or single data on the stack.
std::array<std::byte, config::task_size()> _data;
template <std::size_t S>
static inline void memcpy_simd([[maybe_unused]] void *destination, [[maybe_unused]] const void *src)
{
#ifdef USE_SSE2
if constexpr (S == 64U)
{
__m128i m0 = _mm_loadu_si128(static_cast<const __m128i *>(src) + 0U);
__m128i m1 = _mm_loadu_si128(static_cast<const __m128i *>(src) + 1U);
__m128i m2 = _mm_loadu_si128(static_cast<const __m128i *>(src) + 2U);
__m128i m3 = _mm_loadu_si128(static_cast<const __m128i *>(src) + 3U);
_mm_storeu_si128(static_cast<__m128i *>(destination) + 0U, m0);
_mm_storeu_si128(static_cast<__m128i *>(destination) + 1U, m1);
_mm_storeu_si128(static_cast<__m128i *>(destination) + 2U, m2);
_mm_storeu_si128(static_cast<__m128i *>(destination) + 3U, m3);
}
else if constexpr (S == 128U)
{
__m128i m0 = _mm_loadu_si128(static_cast<const __m128i *>(src) + 0U);
__m128i m1 = _mm_loadu_si128(static_cast<const __m128i *>(src) + 1U);
__m128i m2 = _mm_loadu_si128(static_cast<const __m128i *>(src) + 2U);
__m128i m3 = _mm_loadu_si128(static_cast<const __m128i *>(src) + 3U);
__m128i m4 = _mm_loadu_si128(static_cast<const __m128i *>(src) + 4U);
__m128i m5 = _mm_loadu_si128(static_cast<const __m128i *>(src) + 5U);
__m128i m6 = _mm_loadu_si128(static_cast<const __m128i *>(src) + 6U);
__m128i m7 = _mm_loadu_si128(static_cast<const __m128i *>(src) + 7U);
_mm_storeu_si128(static_cast<__m128i *>(destination) + 0U, m0);
_mm_storeu_si128(static_cast<__m128i *>(destination) + 1U, m1);
_mm_storeu_si128(static_cast<__m128i *>(destination) + 2U, m2);
_mm_storeu_si128(static_cast<__m128i *>(destination) + 3U, m3);
_mm_storeu_si128(static_cast<__m128i *>(destination) + 4U, m4);
_mm_storeu_si128(static_cast<__m128i *>(destination) + 5U, m5);
_mm_storeu_si128(static_cast<__m128i *>(destination) + 6U, m6);
_mm_storeu_si128(static_cast<__m128i *>(destination) + 7U, m7);
}
#endif
}
template <std::size_t S>
static inline void memcpy_tiny([[maybe_unused]] void *destination, [[maybe_unused]] const void *src)
{
if constexpr (S == 64U)
{
static_cast<std::int64_t *>(destination)[0U] = static_cast<const std::int64_t *>(src)[0U];
static_cast<std::int64_t *>(destination)[1U] = static_cast<const std::int64_t *>(src)[1U];
static_cast<std::int64_t *>(destination)[2U] = static_cast<const std::int64_t *>(src)[2U];
static_cast<std::int64_t *>(destination)[3U] = static_cast<const std::int64_t *>(src)[3U];
static_cast<std::int64_t *>(destination)[4U] = static_cast<const std::int64_t *>(src)[4U];
static_cast<std::int64_t *>(destination)[5U] = static_cast<const std::int64_t *>(src)[5U];
static_cast<std::int64_t *>(destination)[6U] = static_cast<const std::int64_t *>(src)[6U];
static_cast<std::int64_t *>(destination)[7U] = static_cast<const std::int64_t *>(src)[7U];
}
else if constexpr (S == 128U)
{
static_cast<std::int64_t *>(destination)[0U] = static_cast<const std::int64_t *>(src)[0U];
static_cast<std::int64_t *>(destination)[1U] = static_cast<const std::int64_t *>(src)[1U];
static_cast<std::int64_t *>(destination)[2U] = static_cast<const std::int64_t *>(src)[2U];
static_cast<std::int64_t *>(destination)[3U] = static_cast<const std::int64_t *>(src)[3U];
static_cast<std::int64_t *>(destination)[4U] = static_cast<const std::int64_t *>(src)[4U];
static_cast<std::int64_t *>(destination)[5U] = static_cast<const std::int64_t *>(src)[5U];
static_cast<std::int64_t *>(destination)[6U] = static_cast<const std::int64_t *>(src)[6U];
static_cast<std::int64_t *>(destination)[7U] = static_cast<const std::int64_t *>(src)[7U];
static_cast<std::int64_t *>(destination)[8U] = static_cast<const std::int64_t *>(src)[8U];
static_cast<std::int64_t *>(destination)[9U] = static_cast<const std::int64_t *>(src)[9U];
static_cast<std::int64_t *>(destination)[10U] = static_cast<const std::int64_t *>(src)[10U];
static_cast<std::int64_t *>(destination)[11U] = static_cast<const std::int64_t *>(src)[11U];
static_cast<std::int64_t *>(destination)[12U] = static_cast<const std::int64_t *>(src)[12U];
static_cast<std::int64_t *>(destination)[13U] = static_cast<const std::int64_t *>(src)[13U];
static_cast<std::int64_t *>(destination)[14U] = static_cast<const std::int64_t *>(src)[14U];
static_cast<std::int64_t *>(destination)[15U] = static_cast<const std::int64_t *>(src)[15U];
}
}
};
} // namespace mx::tasking

231
src/mx/tasking/worker.cpp Normal file
View File

@@ -0,0 +1,231 @@
#include "worker.h"
#include "config.h"
#include "runtime.h"
#include "task.h"
#include <cassert>
#include <mx/system/builtin.h>
#include <mx/system/topology.h>
#include <mx/util/random.h>
using namespace mx::tasking;
Worker::Worker(const std::uint16_t id, const std::uint16_t target_core_id, const std::uint16_t target_numa_node_id,
const util::maybe_atomic<bool> &is_running, const std::uint16_t prefetch_distance,
memory::reclamation::LocalEpoch &local_epoch,
const std::atomic<memory::reclamation::epoch_t> &global_epoch, profiling::Statistic &statistic) noexcept
: _target_core_id(target_core_id), _prefetch_distance(prefetch_distance),
_channel(id, target_numa_node_id, prefetch_distance), _local_epoch(local_epoch), _global_epoch(global_epoch),
_statistic(statistic), _is_running(is_running)
{
}
void Worker::execute()
{
while (this->_is_running == false)
{
system::builtin::pause();
}
TaskInterface *task;
const auto core_id = system::topology::core_id();
assert(this->_target_core_id == core_id && "Worker not pinned to correct core.");
const auto channel_id = this->_channel.id();
while (this->_is_running)
{
if constexpr (config::memory_reclamation() == config::UpdateEpochPeriodically)
{
this->_local_epoch.enter(this->_global_epoch);
}
this->_channel_size = this->_channel.fill();
if constexpr (config::task_statistics())
{
this->_statistic.increment<profiling::Statistic::Fill>(channel_id);
}
while ((task = this->_channel.next()) != nullptr)
{
// Whenever the worker-local task-buffer falls under
// the prefetch distance, we re-fill the buffer to avoid
// empty slots in the prefetch-buffer.
if (--this->_channel_size <= this->_prefetch_distance)
{
if constexpr (config::memory_reclamation() == config::UpdateEpochPeriodically)
{
this->_local_epoch.enter(this->_global_epoch);
}
this->_channel_size = this->_channel.fill();
if constexpr (config::task_statistics())
{
this->_statistic.increment<profiling::Statistic::Fill>(channel_id);
}
}
if constexpr (config::task_statistics())
{
this->_statistic.increment<profiling::Statistic::Executed>(channel_id);
if (task->has_resource_annotated())
{
if (task->is_readonly())
{
this->_statistic.increment<profiling::Statistic::ExecutedReader>(channel_id);
}
else
{
this->_statistic.increment<profiling::Statistic::ExecutedWriter>(channel_id);
}
}
}
// Based on the annotated resource and its synchronization
// primitive, we choose the fitting execution context.
auto result = TaskResult{};
switch (Worker::synchronization_primitive(task))
{
case synchronization::primitive::ScheduleWriter:
result = this->execute_optimistic(core_id, channel_id, task);
break;
case synchronization::primitive::OLFIT:
result = this->execute_olfit(core_id, channel_id, task);
break;
case synchronization::primitive::ScheduleAll:
case synchronization::primitive::None:
result = task->execute(core_id, channel_id);
break;
case synchronization::primitive::ReaderWriterLatch:
result = Worker::execute_reader_writer_latched(core_id, channel_id, task);
break;
case synchronization::primitive::ExclusiveLatch:
result = Worker::execute_exclusive_latched(core_id, channel_id, task);
break;
}
// The task-chain may be finished at time the
// task has no successor. Otherwise, we spawn
// the successor task.
if (result.has_successor())
{
runtime::spawn(*static_cast<TaskInterface *>(result), channel_id);
}
if (result.is_remove())
{
runtime::delete_task(core_id, task);
}
}
}
}
TaskResult Worker::execute_exclusive_latched(const std::uint16_t core_id, const std::uint16_t channel_id,
mx::tasking::TaskInterface *const task)
{
auto *resource = resource::ptr_cast<resource::ResourceInterface>(task->annotated_resource());
resource::ResourceInterface::scoped_exclusive_latch _{resource};
return task->execute(core_id, channel_id);
}
TaskResult Worker::execute_reader_writer_latched(const std::uint16_t core_id, const std::uint16_t channel_id,
mx::tasking::TaskInterface *const task)
{
auto *resource = resource::ptr_cast<resource::ResourceInterface>(task->annotated_resource());
// Reader do only need to acquire a "read-only" latch.
if (task->is_readonly())
{
resource::ResourceInterface::scoped_rw_latch<false> _{resource};
return task->execute(core_id, channel_id);
}
{
resource::ResourceInterface::scoped_rw_latch<true> _{resource};
return task->execute(core_id, channel_id);
}
}
TaskResult Worker::execute_optimistic(const std::uint16_t core_id, const std::uint16_t channel_id,
mx::tasking::TaskInterface *const task)
{
auto *optimistic_resource = resource::ptr_cast<resource::ResourceInterface>(task->annotated_resource());
if (task->is_readonly())
{
// For readers running at a different channel than writer,
// we need to validate the version of the resource. This
// comes along with saving the tasks state on a stack and
// re-running the task, whenever the version check failed.
if (task->annotated_resource().channel_id() != channel_id)
{
return this->execute_optimistic_read(core_id, channel_id, optimistic_resource, task);
}
// Whenever the task is executed at the same channel
// where writing tasks are executed, we do not need to
// synchronize because no write can happen.
return task->execute(core_id, channel_id);
}
// Writers, however, need to acquire the version to tell readers, that
// the resource is modified. This is done by making the version odd before
// writing to the resource and even afterwards. Here, we can use a simple
// fetch_add operation, because writers are serialized on the channel.
{
resource::ResourceInterface::scoped_optimistic_latch _{optimistic_resource};
return task->execute(core_id, channel_id);
}
}
TaskResult Worker::execute_olfit(const std::uint16_t core_id, const std::uint16_t channel_id, TaskInterface *const task)
{
auto *optimistic_resource = resource::ptr_cast<resource::ResourceInterface>(task->annotated_resource());
if (task->is_readonly())
{
return this->execute_optimistic_read(core_id, channel_id, optimistic_resource, task);
}
// Writers, however, need to acquire the version to tell readers, that
// the resource is modified. This is done by making the version odd before
// writing to the resource and even afterwards. Here, we need to use compare
// xchg because writers can appear on every channel.
{
resource::ResourceInterface::scoped_olfit_latch _{optimistic_resource};
return task->execute(core_id, channel_id);
}
}
TaskResult Worker::execute_optimistic_read(const std::uint16_t core_id, const std::uint16_t channel_id,
resource::ResourceInterface *optimistic_resource, TaskInterface *const task)
{
if constexpr (config::memory_reclamation() == config::UpdateEpochOnRead)
{
this->_local_epoch.enter(this->_global_epoch);
}
// The current state of the task is saved for
// restoring if the read operation failed, but
// the task was maybe modified.
this->_task_stack.save(task);
do
{
const auto version = optimistic_resource->version();
const auto result = task->execute(core_id, channel_id);
if (optimistic_resource->is_version_valid(version))
{
if constexpr (config::memory_reclamation() == config::UpdateEpochOnRead)
{
this->_local_epoch.leave();
}
return result;
}
// At this point, the version check failed and we need
// to re-run the read operation.
this->_task_stack.restore(task);
} while (true);
}

130
src/mx/tasking/worker.h Normal file
View File

@@ -0,0 +1,130 @@
#pragma once
#include "channel.h"
#include "config.h"
#include "profiling/statistic.h"
#include "task.h"
#include "task_stack.h"
#include <atomic>
#include <cstddef>
#include <memory>
#include <mx/memory/reclamation/epoch_manager.h>
#include <mx/util/maybe_atomic.h>
#include <variant>
#include <vector>
namespace mx::tasking {
/**
* The worker executes tasks from his own channel, until the "running" flag is false.
*/
class alignas(64) Worker
{
public:
Worker(std::uint16_t id, std::uint16_t target_core_id, std::uint16_t target_numa_node_id,
const util::maybe_atomic<bool> &is_running, std::uint16_t prefetch_distance,
memory::reclamation::LocalEpoch &local_epoch, const std::atomic<memory::reclamation::epoch_t> &global_epoch,
profiling::Statistic &statistic) noexcept;
~Worker() noexcept = default;
/**
* Starts the worker (typically in its own thread).
*/
void execute();
/**
* @return Id of the logical core this worker runs on.
*/
[[nodiscard]] std::uint16_t core_id() const noexcept { return _target_core_id; }
[[nodiscard]] Channel &channel() noexcept { return _channel; }
[[nodiscard]] const Channel &channel() const noexcept { return _channel; }
private:
// Id of the logical core.
const std::uint16_t _target_core_id;
// Distance of prefetching tasks.
const std::uint16_t _prefetch_distance;
std::int32_t _channel_size{0U};
// Stack for persisting tasks in optimistic execution. Optimistically
// executed tasks may fail and be restored after execution.
alignas(64) TaskStack _task_stack;
// Channel where tasks are stored for execution.
alignas(64) Channel _channel;
// Local epoch of this worker.
memory::reclamation::LocalEpoch &_local_epoch;
// Global epoch.
const std::atomic<memory::reclamation::epoch_t> &_global_epoch;
// Statistics container.
profiling::Statistic &_statistic;
// Flag for "running" state of MxTasking.
const util::maybe_atomic<bool> &_is_running;
/**
* Analyzes the given task and chooses the execution method regarding synchronization.
* @param task Task to be executed.
* @return Synchronization method.
*/
static synchronization::primitive synchronization_primitive(TaskInterface *task) noexcept
{
return task->has_resource_annotated() ? task->annotated_resource().synchronization_primitive()
: synchronization::primitive::None;
}
/**
* Executes a task with a latch.
* @param core_id Id of the core.
* @param channel_id Id of the channel.
* @param task Task to be executed.
* @return Task to be scheduled after execution.
*/
static TaskResult execute_exclusive_latched(std::uint16_t core_id, std::uint16_t channel_id, TaskInterface *task);
/**
* Executes a task with a reader/writer latch.
* @param core_id Id of the core.
* @param channel_id Id of the channel.
* @param task Task to be executed.
* @return Task to be scheduled after execution.
*/
static TaskResult execute_reader_writer_latched(std::uint16_t core_id, std::uint16_t channel_id,
TaskInterface *task);
/**
* Executes the task optimistically.
* @param core_id Id of the core.
* @param channel_id Id of the channel.
* @param task Task to be executed.
* @return Task to be scheduled after execution.
*/
TaskResult execute_optimistic(std::uint16_t core_id, std::uint16_t channel_id, TaskInterface *task);
/**
* Executes the task using olfit protocol.
* @param core_id Id of the core.
* @param channel_id Id of the channel.
* @param task Task to be executed.
* @return Task to be scheduled after execution.
*/
TaskResult execute_olfit(std::uint16_t core_id, std::uint16_t channel_id, TaskInterface *task);
/**
* Executes the read-only task optimistically.
* @param core_id Id of the core.
* @param channel_id Id of the channel.
* @param resource Resource the task reads.
* @param task Task to be executed.
* @return Task to be scheduled after execution.
*/
TaskResult execute_optimistic_read(std::uint16_t core_id, std::uint16_t channel_id,
resource::ResourceInterface *resource, TaskInterface *task);
};
} // namespace mx::tasking

64
src/mx/util/aligned_t.h Normal file
View File

@@ -0,0 +1,64 @@
#pragma once
#include <type_traits>
namespace mx::util {
/**
* Aligns the given data type with an alignment of 64.
*/
template <typename T> class alignas(64) aligned_t
{
public:
constexpr aligned_t() noexcept = default;
explicit constexpr aligned_t(const T &value) noexcept : _value(value) {}
constexpr aligned_t(const aligned_t<T> &other) = default;
template <typename... Args> explicit aligned_t(Args &&... args) noexcept : _value(std::forward<Args>(args)...) {}
~aligned_t() noexcept = default;
aligned_t<T> &operator=(const aligned_t<T> &) = default;
aligned_t<T> &operator=(aligned_t<T> &&) noexcept = default;
explicit operator T() const noexcept { return _value; }
T &operator*() noexcept { return _value; };
const T &operator*() const noexcept { return _value; };
T &value() noexcept { return _value; }
const T &value() const noexcept { return _value; }
aligned_t<T> &operator=(const T &value) noexcept
{
_value = value;
return *this;
}
bool operator==(std::nullptr_t) const noexcept
{
if constexpr (std::is_pointer<T>::value)
{
return _value == nullptr;
}
else
{
return false;
}
}
bool operator!=(std::nullptr_t) const noexcept
{
if constexpr (std::is_pointer<T>::value)
{
return _value != nullptr;
}
else
{
return true;
}
}
private:
T _value = T();
};
} // namespace mx::util

Some files were not shown because too many files have changed in this diff Show More