From 6ee6177c9ec87e0d2a145c79b96a16977ddcc0c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcel=20L=C3=BCtke=20Dreimann?= Date: Thu, 30 Jun 2022 12:40:56 +0200 Subject: [PATCH 01/14] added gpgpu driver --- .gitmodules | 3 + repos/dde_uos-intel-gpgpu/README | 0 repos/dde_uos-intel-gpgpu/include/gpgpu.h | 5 + repos/dde_uos-intel-gpgpu/run/gpgpu.run | 67 ++++++ .../src/gpgpu/gpgpu_genode.cc | 136 ++++++++++++ .../src/gpgpu/gpgpu_genode.h | 135 ++++++++++++ repos/dde_uos-intel-gpgpu/src/gpgpu/main.cc | 82 +++++++ repos/dde_uos-intel-gpgpu/src/gpgpu/stubs.cc | 67 ++++++ repos/dde_uos-intel-gpgpu/src/gpgpu/target.mk | 20 ++ repos/dde_uos-intel-gpgpu/src/gpgpu/test.cc | 206 ++++++++++++++++++ repos/dde_uos-intel-gpgpu/src/gpgpu/test.h | 10 + repos/dde_uos-intel-gpgpu/src/uos-intel-gpgpu | 1 + 12 files changed, 732 insertions(+) create mode 100644 .gitmodules create mode 100644 repos/dde_uos-intel-gpgpu/README create mode 100644 repos/dde_uos-intel-gpgpu/include/gpgpu.h create mode 100644 repos/dde_uos-intel-gpgpu/run/gpgpu.run create mode 100644 repos/dde_uos-intel-gpgpu/src/gpgpu/gpgpu_genode.cc create mode 100644 repos/dde_uos-intel-gpgpu/src/gpgpu/gpgpu_genode.h create mode 100644 repos/dde_uos-intel-gpgpu/src/gpgpu/main.cc create mode 100644 repos/dde_uos-intel-gpgpu/src/gpgpu/stubs.cc create mode 100644 repos/dde_uos-intel-gpgpu/src/gpgpu/target.mk create mode 100644 repos/dde_uos-intel-gpgpu/src/gpgpu/test.cc create mode 100644 repos/dde_uos-intel-gpgpu/src/gpgpu/test.h create mode 160000 repos/dde_uos-intel-gpgpu/src/uos-intel-gpgpu diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000000..84f727fe1d --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "repos/dde_uos-intel-gpgpu/src/uos-intel-gpgpu"] + path = repos/dde_uos-intel-gpgpu/src/uos-intel-gpgpu + url = git@ess-git.inf.uos.de:software/uos-intel-gpgpu.git diff --git a/repos/dde_uos-intel-gpgpu/README b/repos/dde_uos-intel-gpgpu/README new file mode 100644 index 0000000000..e69de29bb2 diff --git a/repos/dde_uos-intel-gpgpu/include/gpgpu.h b/repos/dde_uos-intel-gpgpu/include/gpgpu.h new file mode 100644 index 0000000000..2f0e46d8b9 --- /dev/null +++ b/repos/dde_uos-intel-gpgpu/include/gpgpu.h @@ -0,0 +1,5 @@ +// driver include +#include "../src/uos-intel-gpgpu/driver/gpgpu_driver.h" + +// include for genode wrapper +#include "../src/gpgpu/gpgpu_genode.h" diff --git a/repos/dde_uos-intel-gpgpu/run/gpgpu.run b/repos/dde_uos-intel-gpgpu/run/gpgpu.run new file mode 100644 index 0000000000..60484570e5 --- /dev/null +++ b/repos/dde_uos-intel-gpgpu/run/gpgpu.run @@ -0,0 +1,67 @@ +# build config +set build_components { core init gpgpu timer } + +# platform config +set use_acpica_as_acpi_drv 0 +source ${genode_dir}/repos/base/run/platform_drv.inc +proc platform_drv_policy {} { + global use_acpica_as_acpi_drv + set policy "" + + append_if $use_acpica_as_acpi_drv policy { + } + + append policy { + } + + return $policy +} +append_platform_drv_build_components +build $build_components + +# boot dir +create_boot_directory + +# other config +append config { + + + + + + + + + + + + + + + + + + + +} + +append_platform_drv_config + +append config { + + + +} + +install_config $config + +# boot modules +set boot_modules { + core ld.lib.so init gpgpu timer +} +append_platform_drv_boot_modules +build_boot_image $boot_modules + +# qemu stuff +append qemu_args "-nographic -m 256" +run_genode_until {This is the UOS Intel GPGPU End!.*\n} 10 diff --git a/repos/dde_uos-intel-gpgpu/src/gpgpu/gpgpu_genode.cc b/repos/dde_uos-intel-gpgpu/src/gpgpu/gpgpu_genode.cc new file mode 100644 index 0000000000..0bb6ba702c --- /dev/null +++ b/repos/dde_uos-intel-gpgpu/src/gpgpu/gpgpu_genode.cc @@ -0,0 +1,136 @@ +#include "gpgpu_genode.h" + +#define GENODE // use genodes stdint header +#include "../uos-intel-gpgpu/driver/gpgpu_driver.h" + +void gpgpu_genode::handleInterrupt() +{ + // handle the gpu interrupt + GPGPU_Driver& gpgpudriver = GPGPU_Driver::getInstance(); + gpgpudriver.handleInterrupt(); + gpgpudriver.runNext(); + + // ack the irq + irq->ack_irq(); +} + +gpgpu_genode::gpgpu_genode(Env& e) : env(e), heap{ e.ram(), e.rm() }, alloc(&heap), ram_cap(), mapped_base(0), base(0), pci(e), dev(), prev_dev(), irq(nullptr), dispatcher(env.ep(), *this, &gpgpu_genode::handleInterrupt) +{ + // size of avaible memory for allocator + const unsigned long size = 0x1000 * 0x1000; + + // allocate chunk of ram + ram_cap = e.ram().alloc(size); + mapped_base = e.rm().attach(ram_cap); + base = Dataspace_client(ram_cap).phys_addr(); + + // use this ram for allocator + alloc.add_range(mapped_base, size); +} + +gpgpu_genode::~gpgpu_genode() +{ + // release pci dev and free allocator memory + pci.release_device(dev); + env.ram().free(ram_cap); +} + +void* gpgpu_genode::aligned_alloc(uint32_t alignment, uint32_t size) +{ + void* ptr; + alloc.alloc_aligned(size, &ptr, alignment); + return ptr; +} + +void gpgpu_genode::free(void* addr) +{ + alloc.free(addr); +} + +void gpgpu_genode::createPCIConnection(uint8_t bus, uint8_t device, uint8_t function) +{ + // get first device + pci.with_upgrade([&] () { dev = pci.first_device(); }); + + while (dev.valid()) { + // release old one + pci.release_device(prev_dev); + prev_dev = dev; + + // get next one + pci.with_upgrade([&] () { dev = pci.next_device(dev); }); + + // check if this is the right one + Platform::Device_client client(dev); + uint8_t b, d, f; + client.bus_address(&b, &d, &f); + if(b == bus && d == device && f == function) + { + break; + } + } + + // we did not find the right one + if (!dev.valid()) + { + Genode::error("[GENODE_GPGPU]: Could not find PCI dev: ", bus, device, function); + return; + } +} + +uint32_t gpgpu_genode::readPCI(uint32_t addr) +{ + Platform::Device_client client(dev); + return client.config_read(addr, Platform::Device::ACCESS_32BIT); +} + +void gpgpu_genode::writePCI(uint32_t addr, uint32_t val) +{ + Platform::Device_client client(dev); + pci.with_upgrade([&] () { + client.config_write(addr, val, Platform::Device::ACCESS_32BIT); + }); +} + +addr_t gpgpu_genode::getVirtBarAddr(uint8_t bar_id) const +{ + // get virt bar id (why does this exist?) + Platform::Device_client dc(dev); + Platform::Device::Resource res = dc.resource(bar_id); + uint8_t genodeBarID = dc.phys_bar_to_virt(bar_id); + + // create io mem session + Genode::Io_mem_session_capability cap = dc.io_mem(genodeBarID); + if (!cap.valid()) + { + Genode::error("[GENODE_GPGPU]: IO memory session is not valid"); + return 0; + } + + // get dataspace cap + Genode::Io_mem_session_client mem(cap); + Genode::Io_mem_dataspace_capability mem_ds(mem.dataspace()); + if (!mem_ds.valid()) + { + Genode::error("[GENODE_GPGPU]: IO mem dataspace cap not valid"); + return 0; + } + + // add addr to rm and get virt addr + addr_t vaddr = env.rm().attach(mem_ds); + vaddr |= res.base() & 0xfff; + return vaddr; +} + +void gpgpu_genode::registerInterruptHandler() +{ + Platform::Device_client client(dev); + static Irq_session_client irq_client(client.irq(0)); // 0 ?? + irq = &irq_client; + + // set dispatcher + irq->sigh(dispatcher); + + // initial ack + irq->ack_irq(); +} diff --git a/repos/dde_uos-intel-gpgpu/src/gpgpu/gpgpu_genode.h b/repos/dde_uos-intel-gpgpu/src/gpgpu/gpgpu_genode.h new file mode 100644 index 0000000000..b448f3e988 --- /dev/null +++ b/repos/dde_uos-intel-gpgpu/src/gpgpu/gpgpu_genode.h @@ -0,0 +1,135 @@ +#ifndef GPGPU_GENODE_H +#define GPGPU_GENODE_H + +// stdint +#include +using namespace Genode; + +// allocator +#include +#include +#include + +// pci +#include +#include +#include +#include + +// interrupts +#include + +class gpgpu_genode +{ +private: + // genode enviroment + Env& env; + + // allocator + Heap heap; + Allocator_avl alloc; + Ram_dataspace_capability ram_cap; + addr_t mapped_base; + addr_t base; + + // pci + Platform::Connection pci; + Platform::Device_capability dev; + Platform::Device_capability prev_dev; + + // interrupts + Irq_session_client* irq; + Signal_handler dispatcher; + + // do not allow copies + gpgpu_genode(const gpgpu_genode& copy) = delete; + gpgpu_genode& operator=(const gpgpu_genode& src) = delete; + + /** + * @brief Interrupt handler + * + */ + void handleInterrupt(); + +public: + /** + * @brief Construct a new gpgpu genode object + * + * @param e + */ + gpgpu_genode(Env& e); + + /** + * @brief Destroy the gpgpu genode object + * + */ + ~gpgpu_genode(); + + /** + * @brief allocate aligned memory + * + * @param alignment the alignment + * @param size the size in bytes + * @return void* the address of the allocated memory + */ + void* aligned_alloc(uint32_t alignment, uint32_t size); + + /** + * @brief free memory + * + * @param addr the address of the memory to be freed + */ + void free(void* addr); + + /** + * @brief converts a virtual address into a physical address + * + * @param virt the virtual address + * @return addr_t the physical address + */ + addr_t virt_to_phys(addr_t virt) const + { + return virt - mapped_base + base; + } + + /** + * @brief creates a connection to the PCI device. This has to be called before any read/write to the PCI device! + * + * @param bus the bus id + * @param device the device id + * @param function the function id + */ + void createPCIConnection(uint8_t bus, uint8_t device, uint8_t function); + + /** + * @brief read from pci config space + * + * @param addr the address to read from + * @return uint32_t the value + */ + uint32_t readPCI(uint32_t addr); + + /** + * @brief write to pci config space (some register are protected by genode!) + * + * @param addr the address to write to + * @param val the value to write + */ + void writePCI(uint32_t addr, uint32_t val); + + /** + * @brief Get the Virt Bar Addr object + * + * @param bar_id + * @return addr_t + */ + addr_t getVirtBarAddr(uint8_t bar_id) const; + + /** + * @brief register the interrupt handler for the current PCI device + * + */ + void registerInterruptHandler(); +}; + +#endif // GPGPU_GENODE_H diff --git a/repos/dde_uos-intel-gpgpu/src/gpgpu/main.cc b/repos/dde_uos-intel-gpgpu/src/gpgpu/main.cc new file mode 100644 index 0000000000..61eb3d78bd --- /dev/null +++ b/repos/dde_uos-intel-gpgpu/src/gpgpu/main.cc @@ -0,0 +1,82 @@ +#include + +#define GENODE // use genodes stdint header +#include "../uos-intel-gpgpu/driver/gpgpu_driver.h" +#include "gpgpu_genode.h" + +//#define TEST // test stubs only (works with qemu) +#ifdef TEST +#include "../uos-intel-gpgpu/stubs.h" +#else +#include "test.h" +#endif // TEST + +gpgpu_genode* _global_gpgpu_genode; + +void Component::construct(Genode::Env& e) +{ + Genode::log("Hello world: UOS Intel GPGPU!"); + Genode::log("Build: ", __TIMESTAMP__); + + // init globals + static gpgpu_genode gg(e); + _global_gpgpu_genode = ≫ + +#ifdef TEST + // test prink + printk("Hello printk: %d", 42); + + // test alloc + uint8_t* test = (uint8_t*)uos_aligned_alloc(0x1000, 0x1000); + uint64_t addr = (uint64_t)test; + if((addr & 0xFFF) != 0) + { + Genode::error("mem alignment failed: ", addr); + } + for(int i = 0; i < 0x1000; i++) + { + test[i] = 0x42; + } + for(int i = 0; i < 0x1000; i++) + { + if(test[i] != 0x42) + { + Genode::error("mem write or read failed!"); + break; + } + } + free(test); + Genode::log("Allocator test finished!"); + + // test pci + uint32_t base = calculatePCIConfigHeaderAddress(0, 2 , 0); + uint32_t dev_ven = readPCIConfigSpace(base + 0); + if((dev_ven & 0xFFFF) == 0x8086) + { + Genode::log("PCI test successful!"); + } + else + { + Genode::error("PCI test failed!"); + } + + // test pci memory + uint8_t* test2 = (uint8_t*)_global_gpgpu_genode->getVirtBarAddr(0); + test2[0x42] = 0x42; + Genode::log("PCI memory test finished!"); + + // test interrupts + _global_gpgpu_genode->registerInterruptHandler(); + Genode::log("Interrupt test finished!"); +#else + // init driver + GPGPU_Driver& gpgpudriver = GPGPU_Driver::getInstance(); + gpgpudriver.init(0); + _global_gpgpu_genode->registerInterruptHandler(); + + // run the test and hope the best + run_gpgpu_test(); +#endif // TEST + + Genode::log("This is the UOS Intel GPGPU End!"); +} diff --git a/repos/dde_uos-intel-gpgpu/src/gpgpu/stubs.cc b/repos/dde_uos-intel-gpgpu/src/gpgpu/stubs.cc new file mode 100644 index 0000000000..97036aa517 --- /dev/null +++ b/repos/dde_uos-intel-gpgpu/src/gpgpu/stubs.cc @@ -0,0 +1,67 @@ +// stdint +#include +using namespace Genode; + +// printk +#include +#include +#include + +// genode instance +#include "gpgpu_genode.h" +extern gpgpu_genode* _global_gpgpu_genode; + +// printing (optional) +extern "C" int printk(const char* str, ...) +{ + va_list list; + va_start(list, str); + + char buff[256]; + String_console sc(buff, sizeof(buff)); + sc.vprintf(str, list); + + va_end(list); + + Genode::log("[GPU] ", Genode::Cstring(buff)); + return 0; +} + +// allocator +extern "C" void* uos_aligned_alloc(uint32_t alignment, uint32_t size) +{ + return _global_gpgpu_genode->aligned_alloc(alignment, size); +} + +extern "C" void free(void* addr) +{ + _global_gpgpu_genode->free(addr); +} + +// pci +extern "C" uint32_t calculatePCIConfigHeaderAddress(uint8_t bus, uint8_t device, uint8_t function) +{ + _global_gpgpu_genode->createPCIConnection(bus, device, function); + return 0; +} + +extern "C" uint32_t readPCIConfigSpace(uint32_t addr) +{ + return _global_gpgpu_genode->readPCI(addr); +} + +extern "C" void writePCIConfigSpace(uint32_t address, uint32_t value) +{ + _global_gpgpu_genode->writePCI(address, value); +} + +// address model +extern "C" void* getVirtBarAddr(uint8_t bar_id) +{ + return (void*)_global_gpgpu_genode->getVirtBarAddr(bar_id); +} + +extern "C" void* virt_to_phys(void* addr) +{ + return (void*)_global_gpgpu_genode->virt_to_phys((addr_t)addr); +} diff --git a/repos/dde_uos-intel-gpgpu/src/gpgpu/target.mk b/repos/dde_uos-intel-gpgpu/src/gpgpu/target.mk new file mode 100644 index 0000000000..dcb68e06d7 --- /dev/null +++ b/repos/dde_uos-intel-gpgpu/src/gpgpu/target.mk @@ -0,0 +1,20 @@ +TARGET = gpgpu +REQUIRES = x86_64 + +SRC_CC = main.cc gpgpu_genode.cc stubs.cc test.cc +LIBS = base + +UOS_INTEL_GPGPU = uos-intel-gpgpu-link-cxx.o +EXT_OBJECTS = $(BUILD_BASE_DIR)/bin/$(UOS_INTEL_GPGPU) + +$(TARGET): $(UOS_INTEL_GPGPU) + +$(UOS_INTEL_GPGPU): $(SRC_CC) + $(MSG_BUILD) "Building uos-intel-gpgpu..." + $(MAKE) -C $(REP_DIR)/src/uos-intel-gpgpu/ + cp $(REP_DIR)/src/uos-intel-gpgpu/build/$(UOS_INTEL_GPGPU) $(BUILD_BASE_DIR)/bin/. + +clean_uos-intel-gpgpu: + $(MAKE) -C $(REP_DIR)/src/uos-intel-gpgpu/ clean + +clean: clean_uos-intel-gpgpu diff --git a/repos/dde_uos-intel-gpgpu/src/gpgpu/test.cc b/repos/dde_uos-intel-gpgpu/src/gpgpu/test.cc new file mode 100644 index 0000000000..ba7d7f921c --- /dev/null +++ b/repos/dde_uos-intel-gpgpu/src/gpgpu/test.cc @@ -0,0 +1,206 @@ +#define GENODE // use genodes stdint header +#include "../uos-intel-gpgpu/driver/gpgpu_driver.h" +#include "../uos-intel-gpgpu/stubs.h" + +#define ELEMENTS 4096 + +uint32_t* in; +uint32_t* out; + +/* +kernel void clmain(global const unsigned int* in, global unsigned int* out) +{ + unsigned int i = get_global_id(0); + out[i] = in[i]; +} +*/ + +static unsigned char test_Gen9core_gen[] = { + 0x43, 0x54, 0x4e, 0x49, 0x2e, 0x04, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x4c, 0x04, 0x96, 0x2a, 0x25, 0xad, 0x06, 0x1f, + 0x99, 0x00, 0x72, 0x8d, 0x08, 0x00, 0x00, 0x00, 0xac, 0x03, 0x00, 0x00, + 0x80, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x88, 0x00, 0x00, 0x00, 0xe0, 0x00, 0x00, 0x00, 0x63, 0x6c, 0x6d, 0x61, + 0x69, 0x6e, 0x00, 0x00, 0x01, 0x00, 0x60, 0x00, 0x0c, 0x02, 0x60, 0x20, + 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x80, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x30, 0x00, 0x10, 0x00, 0x16, 0xc0, 0x04, 0xc0, 0x04, + 0x41, 0x00, 0x00, 0x00, 0x2c, 0x0a, 0x80, 0x20, 0x10, 0x01, 0x00, 0x0a, + 0x64, 0x00, 0x00, 0x00, 0x01, 0x4d, 0x00, 0x20, 0x07, 0x7f, 0x03, 0x00, + 0x40, 0x00, 0x80, 0x00, 0x28, 0x0a, 0xa0, 0x20, 0x80, 0x00, 0x00, 0x12, + 0x20, 0x00, 0xb1, 0x00, 0x40, 0x20, 0x80, 0x00, 0x28, 0x0a, 0x20, 0x21, + 0x80, 0x00, 0x00, 0x12, 0x40, 0x00, 0xb1, 0x00, 0x40, 0x96, 0x01, 0x20, + 0x07, 0x05, 0x05, 0x07, 0x40, 0x20, 0x80, 0x00, 0x28, 0x0a, 0x20, 0x21, + 0x20, 0x01, 0x8d, 0x0a, 0xe0, 0x00, 0x00, 0x00, 0x09, 0x00, 0x80, 0x00, + 0x28, 0x0a, 0xa0, 0x20, 0xa0, 0x00, 0x8d, 0x1e, 0x02, 0x00, 0x02, 0x00, + 0x09, 0x20, 0x80, 0x00, 0x28, 0x0a, 0x20, 0x21, 0x20, 0x01, 0x8d, 0x1e, + 0x02, 0x00, 0x02, 0x00, 0x31, 0x00, 0x80, 0x0c, 0x68, 0x02, 0x60, 0x21, + 0xa0, 0x00, 0x00, 0x06, 0x00, 0x5e, 0x20, 0x04, 0x31, 0x20, 0x80, 0x0c, + 0x68, 0x02, 0xa0, 0x21, 0x20, 0x01, 0x00, 0x06, 0x00, 0x5e, 0x20, 0x04, + 0x33, 0x00, 0x80, 0x0c, 0x70, 0xb0, 0x00, 0x00, 0xa2, 0x00, 0x00, 0x00, + 0x01, 0x5e, 0x02, 0x04, 0x33, 0x20, 0x80, 0x0c, 0x70, 0xd0, 0x00, 0x00, + 0x22, 0x01, 0x00, 0x00, 0x01, 0x5e, 0x02, 0x04, 0x31, 0x00, 0x60, 0x07, + 0x04, 0x02, 0x00, 0x20, 0xe0, 0x0f, 0x00, 0x06, 0x10, 0x00, 0x00, 0x82, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x82, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x00, 0xc0, 0xff, 0x83, 0x00, 0x00, 0x00, 0x03, 0x7f, 0x00, 0xff, 0x1f, + 0x00, 0x00, 0xe0, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0xff, 0x83, 0x00, 0x00, 0x00, 0x03, + 0x7f, 0x00, 0xff, 0x1f, 0x00, 0x00, 0xe0, 0x0f, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x40, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x80, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x80, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x2b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x30, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, + 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, + 0x24, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, + 0x0c, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, + 0x44, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x84, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x5f, 0x5f, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, + 0x00, 0x00, 0x00, 0x00, 0x4e, 0x4f, 0x4e, 0x45, 0x00, 0x00, 0x00, 0x00, + 0x69, 0x6e, 0x00, 0x00, 0x75, 0x69, 0x6e, 0x74, 0x2a, 0x3b, 0x38, 0x00, + 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x5f, 0x5f, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, + 0x00, 0x00, 0x00, 0x00, 0x4e, 0x4f, 0x4e, 0x45, 0x00, 0x00, 0x00, 0x00, + 0x6f, 0x75, 0x74, 0x00, 0x75, 0x69, 0x6e, 0x74, 0x2a, 0x3b, 0x38, 0x00, + 0x4e, 0x4f, 0x4e, 0x45, 0x00, 0x00, 0x00, 0x00 +}; + +void cleanUp() +{ + printk("Yeah.. it finished!"); + + // set gpu frequency to minimum + GPGPU_Driver::getInstance().setMinFreq(); + + for(int i = 0; i < ELEMENTS; i++) + { + if(out[i] != in[i]) + { + printk("Error at Item %d (%d != %d)!", i, out[i], in[i]); + } + } + + // free buffers + free((void *)in); + free((void *)out); +} + +void run_gpgpu_test() +{ + // create kernel and buffer config struct + static kernel_config kconf; + static buffer_config buffconf[2]; + + kconf.range[0] = ELEMENTS; // number of executions + kconf.workgroupsize[0] = 0; // 0 = auto + kconf.binary = test_Gen9core_gen; + kconf.finish_callback = cleanUp; + + // allocate buffers + in = (uint32_t*)uos_aligned_alloc(0x1000, kconf.range[0] * sizeof(uint32_t)); + out = (uint32_t*)uos_aligned_alloc(0x1000, kconf.range[0] * sizeof(uint32_t)); + + // config buffers + kconf.buffCount = 2; + kconf.buffConfigs = buffconf; + kconf.buffConfigs[0].buffer = (uint32_t*)in; + kconf.buffConfigs[0].buffer_size = kconf.range[0] * sizeof(uint32_t); + kconf.buffConfigs[1].buffer = (uint32_t*)out; + kconf.buffConfigs[1].buffer_size = kconf.range[0] * sizeof(uint32_t); + + for(int i = 0; i < ELEMENTS; i++) + { + in[i] = 0x42; + } + + // set maximum freuqency + GPGPU_Driver& gpgpudriver = GPGPU_Driver::getInstance(); + gpgpudriver.setMaxFreq(); + + // start gpu task + gpgpudriver.enqueueRun(kconf); +} diff --git a/repos/dde_uos-intel-gpgpu/src/gpgpu/test.h b/repos/dde_uos-intel-gpgpu/src/gpgpu/test.h new file mode 100644 index 0000000000..f4fbc0875e --- /dev/null +++ b/repos/dde_uos-intel-gpgpu/src/gpgpu/test.h @@ -0,0 +1,10 @@ +#ifndef TEST_H +#define TEST_H + +/** + * @brief run a test kernel + * + */ +void run_gpgpu_test(); + +#endif // TEST_H diff --git a/repos/dde_uos-intel-gpgpu/src/uos-intel-gpgpu b/repos/dde_uos-intel-gpgpu/src/uos-intel-gpgpu new file mode 160000 index 0000000000..eca8dac23a --- /dev/null +++ b/repos/dde_uos-intel-gpgpu/src/uos-intel-gpgpu @@ -0,0 +1 @@ +Subproject commit eca8dac23a1bd6aec59c3a3142eae8a5f504bc51 From 383ad4ca453905ecb1b4727af7b340342b8e0a2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcel=20L=C3=BCtke=20Dreimann?= Date: Thu, 7 Jul 2022 10:52:44 +0200 Subject: [PATCH 02/14] added simple RPC setup --- repos/dde_uos-intel-gpgpu/include/gpgpu.h | 5 -- .../include/gpgpu/session.h | 26 +++++++ repos/dde_uos-intel-gpgpu/run/gpgpu.run | 1 + repos/dde_uos-intel-gpgpu/src/gpgpu/rpc.cc | 68 +++++++++++++++++++ repos/dde_uos-intel-gpgpu/src/gpgpu/rpc.h | 6 ++ repos/dde_uos-intel-gpgpu/src/gpgpu/target.mk | 2 +- repos/hello_gpgpu/README | 0 .../include/hello_gpgpu_session/client.h | 21 ++++++ .../include/hello_gpgpu_session/connection.h | 21 ++++++ repos/hello_gpgpu/run/hello_gpgpu.run | 42 ++++++++++++ repos/hello_gpgpu/src/hello_gpgpu/main.cc | 12 ++++ repos/hello_gpgpu/src/hello_gpgpu/target.mk | 3 + 12 files changed, 201 insertions(+), 6 deletions(-) delete mode 100644 repos/dde_uos-intel-gpgpu/include/gpgpu.h create mode 100644 repos/dde_uos-intel-gpgpu/include/gpgpu/session.h create mode 100644 repos/dde_uos-intel-gpgpu/src/gpgpu/rpc.cc create mode 100644 repos/dde_uos-intel-gpgpu/src/gpgpu/rpc.h create mode 100644 repos/hello_gpgpu/README create mode 100644 repos/hello_gpgpu/include/hello_gpgpu_session/client.h create mode 100644 repos/hello_gpgpu/include/hello_gpgpu_session/connection.h create mode 100644 repos/hello_gpgpu/run/hello_gpgpu.run create mode 100644 repos/hello_gpgpu/src/hello_gpgpu/main.cc create mode 100644 repos/hello_gpgpu/src/hello_gpgpu/target.mk diff --git a/repos/dde_uos-intel-gpgpu/include/gpgpu.h b/repos/dde_uos-intel-gpgpu/include/gpgpu.h deleted file mode 100644 index 2f0e46d8b9..0000000000 --- a/repos/dde_uos-intel-gpgpu/include/gpgpu.h +++ /dev/null @@ -1,5 +0,0 @@ -// driver include -#include "../src/uos-intel-gpgpu/driver/gpgpu_driver.h" - -// include for genode wrapper -#include "../src/gpgpu/gpgpu_genode.h" diff --git a/repos/dde_uos-intel-gpgpu/include/gpgpu/session.h b/repos/dde_uos-intel-gpgpu/include/gpgpu/session.h new file mode 100644 index 0000000000..bacebe2e33 --- /dev/null +++ b/repos/dde_uos-intel-gpgpu/include/gpgpu/session.h @@ -0,0 +1,26 @@ +#ifndef GPGPU_SESSION +#define GPGPU_SESSION + +#include +#include + +namespace gpgpu { struct Session; } + +struct gpgpu::Session : Genode::Session +{ + static const char *service_name() { return "gpgpu"; } + + enum { CAP_QUOTA = 1 }; + + virtual void say_hello() = 0; + + /******************* + ** RPC interface ** + *******************/ + + GENODE_RPC(Rpc_say_hello, void, say_hello); + + GENODE_RPC_INTERFACE(Rpc_say_hello); +}; + +#endif // GPGPU_SESSION diff --git a/repos/dde_uos-intel-gpgpu/run/gpgpu.run b/repos/dde_uos-intel-gpgpu/run/gpgpu.run index 60484570e5..1d4571a0f2 100644 --- a/repos/dde_uos-intel-gpgpu/run/gpgpu.run +++ b/repos/dde_uos-intel-gpgpu/run/gpgpu.run @@ -49,6 +49,7 @@ append_platform_drv_config append config { + } diff --git a/repos/dde_uos-intel-gpgpu/src/gpgpu/rpc.cc b/repos/dde_uos-intel-gpgpu/src/gpgpu/rpc.cc new file mode 100644 index 0000000000..2ec3bd6023 --- /dev/null +++ b/repos/dde_uos-intel-gpgpu/src/gpgpu/rpc.cc @@ -0,0 +1,68 @@ +#include +#include +#include +#include +#include + +namespace gpgpu { + struct Session_component; + struct Root_component; + struct Main; +} + +struct gpgpu::Session_component : Genode::Rpc_object +{ + void say_hello() override + { + Genode::log("Hello from uos-intel-gpgpu!"); + } +}; + +class gpgpu::Root_component +: + public Genode::Root_component +{ + protected: + Session_component *_create_session(const char *) override + { + return new (md_alloc()) Session_component(); + } + + public: + + Root_component(Genode::Entrypoint &ep, + Genode::Allocator &alloc) + : + Genode::Root_component(ep, alloc) + { + + } +}; + + +struct gpgpu::Main +{ + Genode::Env &env; + + /* + * A sliced heap is used for allocating session objects - thereby we + * can release objects separately. + */ + Genode::Sliced_heap sliced_heap { env.ram(), env.rm() }; + + gpgpu::Root_component root { env.ep(), sliced_heap }; + + Main(Genode::Env &env) : env(env) + { + /* + * Create a RPC object capability for the root interface and + * announce the service to our parent. + */ + env.parent().announce(env.ep().manage(root)); + } +}; + +void construct_RPC(Genode::Env &env) +{ + static gpgpu::Main main(env); +} diff --git a/repos/dde_uos-intel-gpgpu/src/gpgpu/rpc.h b/repos/dde_uos-intel-gpgpu/src/gpgpu/rpc.h new file mode 100644 index 0000000000..b4e0b9a0d5 --- /dev/null +++ b/repos/dde_uos-intel-gpgpu/src/gpgpu/rpc.h @@ -0,0 +1,6 @@ +#ifndef RPC_H +#define RPC_H + + + +#endif // RPC_H diff --git a/repos/dde_uos-intel-gpgpu/src/gpgpu/target.mk b/repos/dde_uos-intel-gpgpu/src/gpgpu/target.mk index dcb68e06d7..ff98c0c7c7 100644 --- a/repos/dde_uos-intel-gpgpu/src/gpgpu/target.mk +++ b/repos/dde_uos-intel-gpgpu/src/gpgpu/target.mk @@ -1,7 +1,7 @@ TARGET = gpgpu REQUIRES = x86_64 -SRC_CC = main.cc gpgpu_genode.cc stubs.cc test.cc +SRC_CC = main.cc gpgpu_genode.cc stubs.cc test.cc rpc.cc LIBS = base UOS_INTEL_GPGPU = uos-intel-gpgpu-link-cxx.o diff --git a/repos/hello_gpgpu/README b/repos/hello_gpgpu/README new file mode 100644 index 0000000000..e69de29bb2 diff --git a/repos/hello_gpgpu/include/hello_gpgpu_session/client.h b/repos/hello_gpgpu/include/hello_gpgpu_session/client.h new file mode 100644 index 0000000000..5de64f27e3 --- /dev/null +++ b/repos/hello_gpgpu/include/hello_gpgpu_session/client.h @@ -0,0 +1,21 @@ +#ifndef HELLO_GPGPU_CLIENT_H +#define HELLO_GPGPU_CLIENT_H + +#include +#include +#include + +namespace gpgpu { struct Session_client; } + +struct gpgpu::Session_client : Genode::Rpc_client +{ + Session_client(Genode::Capability cap) + : Genode::Rpc_client(cap) { } + + void say_hello() override + { + call(); + } +}; + +#endif // HELLO_GPGPU_CLIENT_H diff --git a/repos/hello_gpgpu/include/hello_gpgpu_session/connection.h b/repos/hello_gpgpu/include/hello_gpgpu_session/connection.h new file mode 100644 index 0000000000..94facc5536 --- /dev/null +++ b/repos/hello_gpgpu/include/hello_gpgpu_session/connection.h @@ -0,0 +1,21 @@ +#ifndef HELLO_GPGPU_CONNECTION_H +#define HELLO_GPGPU_CONNECTION_H + +#include +#include + +namespace gpgpu { struct Connection; } + +struct gpgpu::Connection : Genode::Connection, Session_client +{ + Connection(Genode::Env &env) + : + /* create session */ + Genode::Connection(env, session(env.parent(), + "ram_quota=6K, cap_quota=4")), + + /* initialize RPC interface */ + Session_client(cap()) { } +}; + +#endif // HELLO_GPGPU_CONNECTION_H diff --git a/repos/hello_gpgpu/run/hello_gpgpu.run b/repos/hello_gpgpu/run/hello_gpgpu.run new file mode 100644 index 0000000000..dfc64f7889 --- /dev/null +++ b/repos/hello_gpgpu/run/hello_gpgpu.run @@ -0,0 +1,42 @@ +# +# Build +# + +build { core init gpgpu hello_gpgpu } + +create_boot_directory + +# +# Generate config +# + +install_config { + + + + + + + + + + + + + + + + + + +} + +# +# Boot image +# + +build_boot_image { core ld.lib.so init gpgpu hello_gpgpu } + +append qemu_args " -nographic " + +run_genode_until "hello gpgpu completed.*\n" 10 diff --git a/repos/hello_gpgpu/src/hello_gpgpu/main.cc b/repos/hello_gpgpu/src/hello_gpgpu/main.cc new file mode 100644 index 0000000000..3b04d77585 --- /dev/null +++ b/repos/hello_gpgpu/src/hello_gpgpu/main.cc @@ -0,0 +1,12 @@ +#include +#include +#include + +void Component::construct(Genode::Env &env) +{ + gpgpu::Connection gpgpu(env); + + gpgpu.say_hello(); + + Genode::log("hello gpgpu completed"); +} diff --git a/repos/hello_gpgpu/src/hello_gpgpu/target.mk b/repos/hello_gpgpu/src/hello_gpgpu/target.mk new file mode 100644 index 0000000000..025cc5108e --- /dev/null +++ b/repos/hello_gpgpu/src/hello_gpgpu/target.mk @@ -0,0 +1,3 @@ +TARGET = hello_gpgpu +SRC_CC = main.cc +LIBS = base From 73e34a542eadf73c519828fa55fd818a377a34cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20M=C3=BCller?= Date: Mon, 11 Jul 2022 14:55:34 +0200 Subject: [PATCH 03/14] mml/thread_test: Fixed compiler errors. --- repos/mml/src/app/thread_test/thread_test.cc | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/repos/mml/src/app/thread_test/thread_test.cc b/repos/mml/src/app/thread_test/thread_test.cc index c5ed442b6c..42f2857aa5 100644 --- a/repos/mml/src/app/thread_test/thread_test.cc +++ b/repos/mml/src/app/thread_test/thread_test.cc @@ -11,11 +11,13 @@ struct ThreadTest::Main { Genode::Env &_env; + Main(Genode::Env &env) : _env(env) {} + void execute() { while(true) { std::cout << "Hello world" << std::endl; - std::this_thread::sleep_for(std::chrone::seconds(1)); + std::this_thread::sleep_for(std::chrono::seconds(1)); } } }; @@ -23,6 +25,6 @@ struct ThreadTest::Main void Component::construct(Genode::Env &env) { static ThreadTest::Main main(env); - std::thread([main] - { main->execute(); }); + std::thread([&] + { main.execute(); }); } \ No newline at end of file From b58b34ca7eedee81bf6d0549656c6353cbe56aed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcel=20L=C3=BCtke=20Dreimann?= Date: Mon, 11 Jul 2022 16:19:02 +0200 Subject: [PATCH 04/14] updated gpgpu driver --- repos/dde_uos-intel-gpgpu/src/uos-intel-gpgpu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/repos/dde_uos-intel-gpgpu/src/uos-intel-gpgpu b/repos/dde_uos-intel-gpgpu/src/uos-intel-gpgpu index eca8dac23a..7b9c018e48 160000 --- a/repos/dde_uos-intel-gpgpu/src/uos-intel-gpgpu +++ b/repos/dde_uos-intel-gpgpu/src/uos-intel-gpgpu @@ -1 +1 @@ -Subproject commit eca8dac23a1bd6aec59c3a3142eae8a5f504bc51 +Subproject commit 7b9c018e4858d08eaa6bb2e12c707e1e45740c81 From f6ba28f53ce5ef273073102b0eebcb7f61c79552 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcel=20L=C3=BCtke=20Dreimann?= Date: Mon, 11 Jul 2022 16:19:12 +0200 Subject: [PATCH 05/14] added opencl test app --- repos/hello_gpgpu/run/hello_gpgpu.run | 2 +- repos/hello_gpgpu/src/hello_gpgpu/CL/cl.cc | 1632 ++++++++++++++ repos/hello_gpgpu/src/hello_gpgpu/CL/cl.h | 1934 +++++++++++++++++ .../src/hello_gpgpu/CL/cl_platform.h | 1432 ++++++++++++ .../src/hello_gpgpu/CL/cl_version.h | 81 + repos/hello_gpgpu/src/hello_gpgpu/main.cc | 17 + repos/hello_gpgpu/src/hello_gpgpu/target.mk | 2 +- repos/hello_gpgpu/src/hello_gpgpu/test.cc | 239 ++ repos/hello_gpgpu/src/hello_gpgpu/test.h | 12 + 9 files changed, 5349 insertions(+), 2 deletions(-) create mode 100644 repos/hello_gpgpu/src/hello_gpgpu/CL/cl.cc create mode 100644 repos/hello_gpgpu/src/hello_gpgpu/CL/cl.h create mode 100644 repos/hello_gpgpu/src/hello_gpgpu/CL/cl_platform.h create mode 100644 repos/hello_gpgpu/src/hello_gpgpu/CL/cl_version.h create mode 100644 repos/hello_gpgpu/src/hello_gpgpu/test.cc create mode 100644 repos/hello_gpgpu/src/hello_gpgpu/test.h diff --git a/repos/hello_gpgpu/run/hello_gpgpu.run b/repos/hello_gpgpu/run/hello_gpgpu.run index dfc64f7889..7d9efb52f0 100644 --- a/repos/hello_gpgpu/run/hello_gpgpu.run +++ b/repos/hello_gpgpu/run/hello_gpgpu.run @@ -27,7 +27,7 @@ install_config { - + } diff --git a/repos/hello_gpgpu/src/hello_gpgpu/CL/cl.cc b/repos/hello_gpgpu/src/hello_gpgpu/CL/cl.cc new file mode 100644 index 0000000000..1757163328 --- /dev/null +++ b/repos/hello_gpgpu/src/hello_gpgpu/CL/cl.cc @@ -0,0 +1,1632 @@ +#define CL_TARGET_OPENCL_VERSION 100 +#include "cl.h" +#include +#include +#define GENODE +#include +#pragma GCC diagnostic ignored "-Wunused-parameter" + +#ifdef __cplusplus +extern "C" { +#endif + +/* Genode */ +static Genode::Allocator_avl* genode_allocator; +extern CL_API_ENTRY void CL_API_CALL +clInitGenode(Genode::Allocator_avl& alloc) +{ + genode_allocator = &alloc; +} + +/* Platform API */ +CL_API_ENTRY cl_int CL_API_CALL +clGetPlatformIDs(cl_uint num_entries, + cl_platform_id * platforms, + cl_uint * num_platforms) +{ + *platforms = 0; + *num_platforms = 1; + return CL_SUCCESS; +} + +CL_API_ENTRY cl_int CL_API_CALL +clGetPlatformInfo(cl_platform_id platform, + cl_platform_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) +{ + if(platform != 0) + return CL_INVALID_VALUE; + + static char name[] = "UOS-INTEL-GPGPU"; + static char ver[] = "1.0"; + static char vendor[] = "Uni Osnabrueck mld"; + + switch (param_name) + { + case CL_PLATFORM_NAME: + { + char* val = (char*)param_value; + for(size_t i = 0; i < sizeof(name) && i < param_value_size; i++) + val[i] = name[i]; + break; + } + + case CL_PLATFORM_VERSION: + { + char* val = (char*)param_value; + for(size_t i = 0; i < sizeof(ver) && i < param_value_size; i++) + val[i] = ver[i]; + break; + } + + case CL_PLATFORM_VENDOR: + { + char* val = (char*)param_value; + for(size_t i = 0; i < sizeof(vendor) && i < param_value_size; i++) + val[i] = vendor[i]; + break; + } + + default: + return CL_INVALID_VALUE; + break; + } + return CL_SUCCESS; +} + +/* Device APIs */ +CL_API_ENTRY cl_int CL_API_CALL +clGetDeviceIDs(cl_platform_id platform, + cl_device_type device_type, + cl_uint num_entries, + cl_device_id * devices, + cl_uint * num_devices) +{ + if(device_type != CL_DEVICE_TYPE_GPU) + return CL_INVALID_VALUE; + + *devices = 0; + *num_devices = 1; + return CL_SUCCESS; +} + +CL_API_ENTRY cl_int CL_API_CALL +clGetDeviceInfo(cl_device_id device, + cl_device_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) +{ + if(device != 0) + return CL_INVALID_VALUE; + + static char name[] = "Intel GEN9 GPU"; + static char ver[] = "1.0"; + static char vendor[] = "Uni Osnabrueck mld"; + + switch (param_name) + { + case CL_DEVICE_NAME: + { + char* val = (char*)param_value; + for(size_t i = 0; i < sizeof(name) && i < param_value_size; i++) + val[i] = name[i]; + break; + } + + case CL_DRIVER_VERSION: + case CL_DEVICE_VERSION: + { + char* val = (char*)param_value; + for(size_t i = 0; i < sizeof(ver) && i < param_value_size; i++) + val[i] = ver[i]; + break; + } + + case CL_DEVICE_VENDOR: + { + char* val = (char*)param_value; + for(size_t i = 0; i < sizeof(vendor) && i < param_value_size; i++) + val[i] = vendor[i]; + break; + } + + default: + return CL_INVALID_VALUE; + break; + } + return CL_SUCCESS; +} + +#ifdef CL_VERSION_1_2 + +CL_API_ENTRY cl_int CL_API_CALL +clCreateSubDevices(cl_device_id in_device, + const cl_device_partition_property * properties, + cl_uint num_devices, + cl_device_id * out_devices, + cl_uint * num_devices_ret) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +CL_API_ENTRY cl_int CL_API_CALL +clRetainDevice(cl_device_id device) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +CL_API_ENTRY cl_int CL_API_CALL +clReleaseDevice(cl_device_id device) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +#endif + +#ifdef CL_VERSION_2_1 + +CL_API_ENTRY cl_int CL_API_CALL +clSetDefaultDeviceCommandQueue(cl_context context, + cl_device_id device, + cl_command_queue command_queue) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +CL_API_ENTRY cl_int CL_API_CALL +clGetDeviceAndHostTimer(cl_device_id device, + cl_ulong* device_timestamp, + cl_ulong* host_timestamp) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +CL_API_ENTRY cl_int CL_API_CALL +clGetHostTimer(cl_device_id device, + cl_ulong * host_timestamp) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +#endif + +/* Context APIs */ +CL_API_ENTRY cl_context CL_API_CALL +clCreateContext(const cl_context_properties * properties, + cl_uint num_devices, + const cl_device_id * devices, + void (CL_CALLBACK * pfn_notify)(const char * errinfo, + const void * private_info, + size_t cb, + void * user_data), + void * user_data, + cl_int * errcode_ret) +{ + if(num_devices != 1 || *devices != 0) + { + *errcode_ret = CL_INVALID_VALUE; + return NULL; + } + + // TODO: RPC: gpgpu_init(); + *errcode_ret = CL_SUCCESS; + return NULL; +} + +CL_API_ENTRY cl_context CL_API_CALL +clCreateContextFromType(const cl_context_properties * properties, + cl_device_type device_type, + void (CL_CALLBACK * pfn_notify)(const char * errinfo, + const void * private_info, + size_t cb, + void * user_data), + void * user_data, + cl_int * errcode_ret) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return NULL; +} + +CL_API_ENTRY cl_int CL_API_CALL +clRetainContext(cl_context context) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +CL_API_ENTRY cl_int CL_API_CALL +clReleaseContext(cl_context context) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +CL_API_ENTRY cl_int CL_API_CALL +clGetContextInfo(cl_context context, + cl_context_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +#ifdef CL_VERSION_3_0 + +CL_API_ENTRY cl_int CL_API_CALL +clSetContextDestructorCallback(cl_context context, + void (CL_CALLBACK* pfn_notify)(cl_context context, + void* user_data), + void* user_data) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +#endif + +/* Command Queue APIs */ + +#ifdef CL_VERSION_2_0 + +CL_API_ENTRY cl_command_queue CL_API_CALL +clCreateCommandQueueWithProperties(cl_context context, + cl_device_id device, + const cl_queue_properties * properties, + cl_int * errcode_ret) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return NULL; +} + +#endif + +CL_API_ENTRY cl_int CL_API_CALL +clRetainCommandQueue(cl_command_queue command_queue) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +CL_API_ENTRY cl_int CL_API_CALL +clReleaseCommandQueue(cl_command_queue command_queue) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +CL_API_ENTRY cl_int CL_API_CALL +clGetCommandQueueInfo(cl_command_queue command_queue, + cl_command_queue_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +/* Memory Object APIs */ +CL_API_ENTRY cl_mem CL_API_CALL +clCreateBuffer(cl_context context, + cl_mem_flags flags, + size_t size, + void * host_ptr, + cl_int * errcode_ret) +{ + if(host_ptr == NULL) + { + genode_allocator->alloc_aligned(size, &host_ptr, 0x1000); + } + + struct buffer_config* bc; + genode_allocator->alloc(sizeof(struct buffer_config), (void**)&bc); + bc->buffer = host_ptr; + bc->buffer_size = size; + + *errcode_ret = CL_SUCCESS; + return (cl_mem)bc; +} + +#ifdef CL_VERSION_1_1 + +CL_API_ENTRY cl_mem CL_API_CALL +clCreateSubBuffer(cl_mem buffer, + cl_mem_flags flags, + cl_buffer_create_type buffer_create_type, + const void * buffer_create_info, + cl_int * errcode_ret) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return NULL; +} + +#endif + +#ifdef CL_VERSION_1_2 + +CL_API_ENTRY cl_mem CL_API_CALL +clCreateImage(cl_context context, + cl_mem_flags flags, + const cl_image_format * image_format, + const cl_image_desc * image_desc, + void * host_ptr, + cl_int * errcode_ret) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return NULL; +} + +#endif + +#ifdef CL_VERSION_2_0 + +CL_API_ENTRY cl_mem CL_API_CALL +clCreatePipe(cl_context context, + cl_mem_flags flags, + cl_uint pipe_packet_size, + cl_uint pipe_max_packets, + const cl_pipe_properties * properties, + cl_int * errcode_ret) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return NULL; +} + +#endif + +#ifdef CL_VERSION_3_0 + +CL_API_ENTRY cl_mem CL_API_CALL +clCreateBufferWithProperties(cl_context context, + const cl_mem_properties * properties, + cl_mem_flags flags, + size_t size, + void * host_ptr, + cl_int * errcode_ret) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return NULL; +} + +CL_API_ENTRY cl_mem CL_API_CALL +clCreateImageWithProperties(cl_context context, + const cl_mem_properties * properties, + cl_mem_flags flags, + const cl_image_format * image_format, + const cl_image_desc * image_desc, + void * host_ptr, + cl_int * errcode_ret) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return NULL; +} + +#endif + +CL_API_ENTRY cl_int CL_API_CALL +clRetainMemObject(cl_mem memobj) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +CL_API_ENTRY cl_int CL_API_CALL +clReleaseMemObject(cl_mem memobj) +{ + struct buffer_config* bc = (struct buffer_config*)memobj; + genode_allocator->free(bc->buffer); + genode_allocator->free(bc); + return CL_SUCCESS; +} + +CL_API_ENTRY cl_int CL_API_CALL +clGetSupportedImageFormats(cl_context context, + cl_mem_flags flags, + cl_mem_object_type image_type, + cl_uint num_entries, + cl_image_format * image_formats, + cl_uint * num_image_formats) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +CL_API_ENTRY cl_int CL_API_CALL +clGetMemObjectInfo(cl_mem memobj, + cl_mem_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +CL_API_ENTRY cl_int CL_API_CALL +clGetImageInfo(cl_mem image, + cl_image_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +#ifdef CL_VERSION_2_0 + +CL_API_ENTRY cl_int CL_API_CALL +clGetPipeInfo(cl_mem pipe, + cl_pipe_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +#endif + +#ifdef CL_VERSION_1_1 + +CL_API_ENTRY cl_int CL_API_CALL +clSetMemObjectDestructorCallback(cl_mem memobj, + void (CL_CALLBACK * pfn_notify)(cl_mem memobj, + void * user_data), + void * user_data) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +#endif + +/* SVM Allocation APIs */ + +#ifdef CL_VERSION_2_0 + +CL_API_ENTRY void * CL_API_CALL +clSVMAlloc(cl_context context, + cl_svm_mem_flags flags, + size_t size, + cl_uint alignment) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return NULL; +} + +CL_API_ENTRY void CL_API_CALL +clSVMFree(cl_context context, + void * svm_pointer) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); +} + +#endif + +/* Sampler APIs */ + +#ifdef CL_VERSION_2_0 + +CL_API_ENTRY cl_sampler CL_API_CALL +clCreateSamplerWithProperties(cl_context context, + const cl_sampler_properties * sampler_properties, + cl_int * errcode_ret) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return NULL; +} + +#endif + +CL_API_ENTRY cl_int CL_API_CALL +clRetainSampler(cl_sampler sampler) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +CL_API_ENTRY cl_int CL_API_CALL +clReleaseSampler(cl_sampler sampler) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +CL_API_ENTRY cl_int CL_API_CALL +clGetSamplerInfo(cl_sampler sampler, + cl_sampler_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +/* Program Object APIs */ +CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithSource(cl_context context, + cl_uint count, + const char ** strings, + const size_t * lengths, + cl_int * errcode_ret) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return NULL; +} + +CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithBinary(cl_context context, + cl_uint num_devices, + const cl_device_id * device_list, + const size_t * lengths, + const unsigned char ** binaries, + cl_int * binary_status, + cl_int * errcode_ret) +{ + if(*device_list != 0x0) + { + *errcode_ret = CL_INVALID_VALUE; + return NULL; + } + + struct kernel_config* kc; + genode_allocator->alloc(sizeof(struct kernel_config), (void**)&kc); + + kc->binary = (uint8_t*)binaries[0]; + + *errcode_ret = CL_SUCCESS; + return (cl_program)kc; +} + +#ifdef CL_VERSION_1_2 + +CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithBuiltInKernels(cl_context context, + cl_uint num_devices, + const cl_device_id * device_list, + const char * kernel_names, + cl_int * errcode_ret) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return NULL; +} + +#endif + +#ifdef CL_VERSION_2_1 + +CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithIL(cl_context context, + const void* il, + size_t length, + cl_int* errcode_ret) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return NULL; +} + +#endif + +CL_API_ENTRY cl_int CL_API_CALL +clRetainProgram(cl_program program) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +CL_API_ENTRY cl_int CL_API_CALL +clReleaseProgram(cl_program program) +{ + struct kernel_config* kc = (struct kernel_config*)program; + genode_allocator->free(kc->buffConfigs); + genode_allocator->free(kc); + return CL_SUCCESS; +} + +CL_API_ENTRY cl_int CL_API_CALL +clBuildProgram(cl_program program, + cl_uint num_devices, + const cl_device_id * device_list, + const char * options, + void (CL_CALLBACK * pfn_notify)(cl_program program, + void * user_data), + void * user_data) +{ + return CL_SUCCESS; +} + +#ifdef CL_VERSION_1_2 + +CL_API_ENTRY cl_int CL_API_CALL +clCompileProgram(cl_program program, + cl_uint num_devices, + const cl_device_id * device_list, + const char * options, + cl_uint num_input_headers, + const cl_program * input_headers, + const char ** header_include_names, + void (CL_CALLBACK * pfn_notify)(cl_program program, + void * user_data), + void * user_data) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +CL_API_ENTRY cl_program CL_API_CALL +clLinkProgram(cl_context context, + cl_uint num_devices, + const cl_device_id * device_list, + const char * options, + cl_uint num_input_programs, + const cl_program * input_programs, + void (CL_CALLBACK * pfn_notify)(cl_program program, + void * user_data), + void * user_data, + cl_int * errcode_ret) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return NULL; +} + +#endif + +#ifdef CL_VERSION_2_2 + +CL_API_ENTRY CL_API_PREFIX__VERSION_2_2_DEPRECATED cl_int CL_API_CALL +clSetProgramReleaseCallback(cl_program program, + void (CL_CALLBACK * pfn_notify)(cl_program program, + void * user_data), + void * user_data) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +CL_API_ENTRY cl_int CL_API_CALL +clSetProgramSpecializationConstant(cl_program program, + cl_uint spec_id, + size_t spec_size, + const void* spec_value) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +#endif + +#ifdef CL_VERSION_1_2 + +CL_API_ENTRY cl_int CL_API_CALL +clUnloadPlatformCompiler(cl_platform_id platform) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +#endif + +CL_API_ENTRY cl_int CL_API_CALL +clGetProgramInfo(cl_program program, + cl_program_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +CL_API_ENTRY cl_int CL_API_CALL +clGetProgramBuildInfo(cl_program program, + cl_device_id device, + cl_program_build_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +/* Kernel Object APIs */ +CL_API_ENTRY cl_kernel CL_API_CALL +clCreateKernel(cl_program program, + const char * kernel_name, + cl_int * errcode_ret) +{ + struct kernel_config* kc = (struct kernel_config*)program; + + // preallocated 32 buff configs; + genode_allocator->alloc(32 * sizeof(struct buffer_config), (void**)&kc->buffConfigs); + + // set name + kc->kernelName = (char*)kernel_name; + + *errcode_ret = CL_SUCCESS; + return (cl_kernel)kc; +} + +CL_API_ENTRY cl_int CL_API_CALL +clCreateKernelsInProgram(cl_program program, + cl_uint num_kernels, + cl_kernel * kernels, + cl_uint * num_kernels_ret) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +#ifdef CL_VERSION_2_1 + +CL_API_ENTRY cl_kernel CL_API_CALL +clCloneKernel(cl_kernel source_kernel, + cl_int* errcode_ret) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return NULL; +} + +#endif + +CL_API_ENTRY cl_int CL_API_CALL +clRetainKernel(cl_kernel kernel) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +CL_API_ENTRY cl_int CL_API_CALL +clReleaseKernel(cl_kernel kernel) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +CL_API_ENTRY cl_int CL_API_CALL +clSetKernelArg(cl_kernel kernel, + cl_uint arg_index, + size_t arg_size, + const void * arg_value) +{ + if(arg_index > 31) // preallocated buffConfigs size + return CL_INVALID_ARG_INDEX; + + struct kernel_config* kc = (struct kernel_config*)kernel; + + if(arg_size == sizeof(cl_mem)) + { + struct buffer_config** bc = (struct buffer_config**)arg_value; + kc->buffConfigs[arg_index] = **bc; + } + else + { + struct buffer_config bc; + bc.buffer = (void*)arg_value; + bc.buffer_size = arg_size; + bc.non_pointer_type = true; + kc->buffConfigs[arg_index] = bc; + } + + if(kc->buffCount < (arg_index + 1)) + kc->buffCount = (arg_index + 1); + + return CL_SUCCESS; +} + +#ifdef CL_VERSION_2_0 + +CL_API_ENTRY cl_int CL_API_CALL +clSetKernelArgSVMPointer(cl_kernel kernel, + cl_uint arg_index, + const void * arg_value) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +CL_API_ENTRY cl_int CL_API_CALL +clSetKernelExecInfo(cl_kernel kernel, + cl_kernel_exec_info param_name, + size_t param_value_size, + const void * param_value) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +#endif + +CL_API_ENTRY cl_int CL_API_CALL +clGetKernelInfo(cl_kernel kernel, + cl_kernel_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +#ifdef CL_VERSION_1_2 + +CL_API_ENTRY cl_int CL_API_CALL +clGetKernelArgInfo(cl_kernel kernel, + cl_uint arg_indx, + cl_kernel_arg_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +#endif + +CL_API_ENTRY cl_int CL_API_CALL +clGetKernelWorkGroupInfo(cl_kernel kernel, + cl_device_id device, + cl_kernel_work_group_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +#ifdef CL_VERSION_2_1 + +CL_API_ENTRY cl_int CL_API_CALL +clGetKernelSubGroupInfo(cl_kernel kernel, + cl_device_id device, + cl_kernel_sub_group_info param_name, + size_t input_value_size, + const void* input_value, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +#endif + +/* Event Object APIs */ +CL_API_ENTRY cl_int CL_API_CALL +clWaitForEvents(cl_uint num_events, + const cl_event * event_list) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +CL_API_ENTRY cl_int CL_API_CALL +clGetEventInfo(cl_event event, + cl_event_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +#ifdef CL_VERSION_1_1 + +CL_API_ENTRY cl_event CL_API_CALL +clCreateUserEvent(cl_context context, + cl_int * errcode_ret) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return NULL; +} + +#endif + +CL_API_ENTRY cl_int CL_API_CALL +clRetainEvent(cl_event event) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +CL_API_ENTRY cl_int CL_API_CALL +clReleaseEvent(cl_event event) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +#ifdef CL_VERSION_1_1 + +CL_API_ENTRY cl_int CL_API_CALL +clSetUserEventStatus(cl_event event, + cl_int execution_status) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +CL_API_ENTRY cl_int CL_API_CALL +clSetEventCallback(cl_event event, + cl_int command_exec_callback_type, + void (CL_CALLBACK * pfn_notify)(cl_event event, + cl_int event_command_status, + void * user_data), + void * user_data) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +#endif + +/* Profiling APIs */ +CL_API_ENTRY cl_int CL_API_CALL +clGetEventProfilingInfo(cl_event event, + cl_profiling_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +/* Flush and Finish APIs */ +CL_API_ENTRY cl_int CL_API_CALL +clFlush(cl_command_queue command_queue) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +CL_API_ENTRY cl_int CL_API_CALL +clFinish(cl_command_queue command_queue) +{ + // TODO: RPC: gpgpu_wait(); + return CL_SUCCESS; +} + +/* Enqueued Commands APIs */ +CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReadBuffer(cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_read, + size_t offset, + size_t size, + void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) +{ + if(blocking_read == false) + { + return CL_INVALID_VALUE; + } + + struct buffer_config* bc = (struct buffer_config*)buffer; + uint8_t* src = (uint8_t*)bc->buffer; + uint8_t* dst = (uint8_t*)ptr; + for(size_t i = 0; i < size; i++) + { + dst[i] = src[i]; + } + + return CL_SUCCESS; +} + +#ifdef CL_VERSION_1_1 + +CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReadBufferRect(cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_read, + const size_t * buffer_origin, + const size_t * host_origin, + const size_t * region, + size_t buffer_row_pitch, + size_t buffer_slice_pitch, + size_t host_row_pitch, + size_t host_slice_pitch, + void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +#endif + +CL_API_ENTRY cl_int CL_API_CALL +clEnqueueWriteBuffer(cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_write, + size_t offset, + size_t size, + const void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) +{ + if(blocking_write == false) + { + return CL_INVALID_VALUE; + } + + struct buffer_config* bc = (struct buffer_config*)buffer; + uint8_t* src = (uint8_t*)ptr; + uint8_t* dst = (uint8_t*)bc->buffer; + for(size_t i = 0; i < size; i++) + { + dst[i] = src[i]; + } + + return CL_SUCCESS; +} + +#ifdef CL_VERSION_1_1 + +CL_API_ENTRY cl_int CL_API_CALL +clEnqueueWriteBufferRect(cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_write, + const size_t * buffer_origin, + const size_t * host_origin, + const size_t * region, + size_t buffer_row_pitch, + size_t buffer_slice_pitch, + size_t host_row_pitch, + size_t host_slice_pitch, + const void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +#endif + +#ifdef CL_VERSION_1_2 + +CL_API_ENTRY cl_int CL_API_CALL +clEnqueueFillBuffer(cl_command_queue command_queue, + cl_mem buffer, + const void * pattern, + size_t pattern_size, + size_t offset, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +#endif + +CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyBuffer(cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_buffer, + size_t src_offset, + size_t dst_offset, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +#ifdef CL_VERSION_1_1 + +CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyBufferRect(cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_buffer, + const size_t * src_origin, + const size_t * dst_origin, + const size_t * region, + size_t src_row_pitch, + size_t src_slice_pitch, + size_t dst_row_pitch, + size_t dst_slice_pitch, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +#endif + +CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReadImage(cl_command_queue command_queue, + cl_mem image, + cl_bool blocking_read, + const size_t * origin, + const size_t * region, + size_t row_pitch, + size_t slice_pitch, + void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +CL_API_ENTRY cl_int CL_API_CALL +clEnqueueWriteImage(cl_command_queue command_queue, + cl_mem image, + cl_bool blocking_write, + const size_t * origin, + const size_t * region, + size_t input_row_pitch, + size_t input_slice_pitch, + const void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +#ifdef CL_VERSION_1_2 + +CL_API_ENTRY cl_int CL_API_CALL +clEnqueueFillImage(cl_command_queue command_queue, + cl_mem image, + const void * fill_color, + const size_t * origin, + const size_t * region, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +#endif + +CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyImage(cl_command_queue command_queue, + cl_mem src_image, + cl_mem dst_image, + const size_t * src_origin, + const size_t * dst_origin, + const size_t * region, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyImageToBuffer(cl_command_queue command_queue, + cl_mem src_image, + cl_mem dst_buffer, + const size_t * src_origin, + const size_t * region, + size_t dst_offset, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyBufferToImage(cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_image, + size_t src_offset, + const size_t * dst_origin, + const size_t * region, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +CL_API_ENTRY void * CL_API_CALL +clEnqueueMapBuffer(cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_map, + cl_map_flags map_flags, + size_t offset, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event, + cl_int * errcode_ret) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return NULL; +} + +CL_API_ENTRY void * CL_API_CALL +clEnqueueMapImage(cl_command_queue command_queue, + cl_mem image, + cl_bool blocking_map, + cl_map_flags map_flags, + const size_t * origin, + const size_t * region, + size_t * image_row_pitch, + size_t * image_slice_pitch, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event, + cl_int * errcode_ret) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return NULL; +} + +CL_API_ENTRY cl_int CL_API_CALL +clEnqueueUnmapMemObject(cl_command_queue command_queue, + cl_mem memobj, + void * mapped_ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +#ifdef CL_VERSION_1_2 + +CL_API_ENTRY cl_int CL_API_CALL +clEnqueueMigrateMemObjects(cl_command_queue command_queue, + cl_uint num_mem_objects, + const cl_mem * mem_objects, + cl_mem_migration_flags flags, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +#endif + +CL_API_ENTRY cl_int CL_API_CALL +clEnqueueNDRangeKernel(cl_command_queue command_queue, + cl_kernel kernel, + cl_uint work_dim, + const size_t * global_work_offset, + const size_t * global_work_size, + const size_t * local_work_size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) +{ + struct kernel_config* kc = (struct kernel_config*)kernel; + for(cl_uint i = 0; i < work_dim; i++) + { + kc->range[i] = global_work_size[i]; + if(local_work_size != NULL) + { + kc->workgroupsize[i] = local_work_size[i]; + } + } + // TODO: RPC: gpgpu_enqueueRun(kc); + return CL_SUCCESS; +} + +CL_API_ENTRY cl_int CL_API_CALL +clEnqueueNativeKernel(cl_command_queue command_queue, + void (CL_CALLBACK * user_func)(void *), + void * args, + size_t cb_args, + cl_uint num_mem_objects, + const cl_mem * mem_list, + const void ** args_mem_loc, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +#ifdef CL_VERSION_1_2 + +CL_API_ENTRY cl_int CL_API_CALL +clEnqueueMarkerWithWaitList(cl_command_queue command_queue, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +CL_API_ENTRY cl_int CL_API_CALL +clEnqueueBarrierWithWaitList(cl_command_queue command_queue, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +#endif + +#ifdef CL_VERSION_2_0 + +CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMFree(cl_command_queue command_queue, + cl_uint num_svm_pointers, + void * svm_pointers[], + void (CL_CALLBACK * pfn_free_func)(cl_command_queue queue, + cl_uint num_svm_pointers, + void * svm_pointers[], + void * user_data), + void * user_data, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMMemcpy(cl_command_queue command_queue, + cl_bool blocking_copy, + void * dst_ptr, + const void * src_ptr, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMMemFill(cl_command_queue command_queue, + void * svm_ptr, + const void * pattern, + size_t pattern_size, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMMap(cl_command_queue command_queue, + cl_bool blocking_map, + cl_map_flags flags, + void * svm_ptr, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMUnmap(cl_command_queue command_queue, + void * svm_ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +#endif + +#ifdef CL_VERSION_2_1 + +CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMMigrateMem(cl_command_queue command_queue, + cl_uint num_svm_pointers, + const void ** svm_pointers, + const size_t * sizes, + cl_mem_migration_flags flags, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +#endif + +#ifdef CL_VERSION_1_2 + +/* Extension function access + * + * Returns the extension function address for the given function name, + * or NULL if a valid function can not be found. The client must + * check to make sure the address is not NULL, before using or + * calling the returned function address. + */ +CL_API_ENTRY void * CL_API_CALL +clGetExtensionFunctionAddressForPlatform(cl_platform_id platform, + const char * func_name) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return NULL; +} + +#endif + +#ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS + /* + * WARNING: + * This API introduces mutable state into the OpenCL implementation. It has been REMOVED + * to better facilitate thread safety. The 1.0 API is not thread safe. It is not tested by the + * OpenCL 1.1 conformance test, and consequently may not work or may not work dependably. + * It is likely to be non-performant. Use of this API is not advised. Use at your own risk. + * + * Software developers previously relying on this API are instructed to set the command queue + * properties when creating the queue, instead. + */ + CL_API_ENTRY cl_int CL_API_CALL + clSetCommandQueueProperty(cl_command_queue command_queue, + cl_command_queue_properties properties, + cl_bool enable, + cl_command_queue_properties * old_properties) CL_API_SUFFIX__VERSION_1_0_DEPRECATED; +#endif /* CL_USE_DEPRECATED_OPENCL_1_0_APIS */ + +/* Deprecated OpenCL 1.1 APIs */ +CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL +clCreateImage2D(cl_context context, + cl_mem_flags flags, + const cl_image_format * image_format, + size_t image_width, + size_t image_height, + size_t image_row_pitch, + void * host_ptr, + cl_int * errcode_ret) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return NULL; +} + +CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL +clCreateImage3D(cl_context context, + cl_mem_flags flags, + const cl_image_format * image_format, + size_t image_width, + size_t image_height, + size_t image_depth, + size_t image_row_pitch, + size_t image_slice_pitch, + void * host_ptr, + cl_int * errcode_ret) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return NULL; +} + +CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +clEnqueueMarker(cl_command_queue command_queue, + cl_event * event) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +clEnqueueWaitForEvents(cl_command_queue command_queue, + cl_uint num_events, + const cl_event * event_list) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +clEnqueueBarrier(cl_command_queue command_queue) +{ + return CL_SUCCESS; +} + +CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +clUnloadCompiler(void) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED void * CL_API_CALL +clGetExtensionFunctionAddress(const char * func_name) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return NULL; +} + +/* Deprecated OpenCL 2.0 APIs */ +CL_API_ENTRY CL_API_PREFIX__VERSION_1_2_DEPRECATED cl_command_queue CL_API_CALL +clCreateCommandQueue(cl_context context, + cl_device_id device, + cl_command_queue_properties properties, + cl_int * errcode_ret) +{ + if(device != 0) + { + *errcode_ret = CL_INVALID_VALUE; + return NULL; + } + + *errcode_ret = CL_SUCCESS; + return NULL; +} + +CL_API_ENTRY CL_API_PREFIX__VERSION_1_2_DEPRECATED cl_sampler CL_API_CALL +clCreateSampler(cl_context context, + cl_bool normalized_coords, + cl_addressing_mode addressing_mode, + cl_filter_mode filter_mode, + cl_int * errcode_ret) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return NULL; +} + +CL_API_ENTRY CL_API_PREFIX__VERSION_1_2_DEPRECATED cl_int CL_API_CALL +clEnqueueTask(cl_command_queue command_queue, + cl_kernel kernel, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) +{ + Genode::log("[OCL] func ", __func__, " is not implemented!"); + return CL_INVALID_VALUE; +} + +#ifdef __cplusplus +} +#endif + +#pragma GCC diagnostic pop diff --git a/repos/hello_gpgpu/src/hello_gpgpu/CL/cl.h b/repos/hello_gpgpu/src/hello_gpgpu/CL/cl.h new file mode 100644 index 0000000000..d1e46d5963 --- /dev/null +++ b/repos/hello_gpgpu/src/hello_gpgpu/CL/cl.h @@ -0,0 +1,1934 @@ +/******************************************************************************* + * Copyright (c) 2008-2020 The Khronos Group Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ + +#ifndef __OPENCL_CL_H +#define __OPENCL_CL_H + +#include +#include "cl_version.h" +#include "cl_platform.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/******************************************************************************/ + +typedef struct _cl_platform_id * cl_platform_id; +typedef struct _cl_device_id * cl_device_id; +typedef struct _cl_context * cl_context; +typedef struct _cl_command_queue * cl_command_queue; +typedef struct _cl_mem * cl_mem; +typedef struct _cl_program * cl_program; +typedef struct _cl_kernel * cl_kernel; +typedef struct _cl_event * cl_event; +typedef struct _cl_sampler * cl_sampler; + +typedef cl_uint cl_bool; /* WARNING! Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */ +typedef cl_ulong cl_bitfield; +typedef cl_ulong cl_properties; +typedef cl_bitfield cl_device_type; +typedef cl_uint cl_platform_info; +typedef cl_uint cl_device_info; +typedef cl_bitfield cl_device_fp_config; +typedef cl_uint cl_device_mem_cache_type; +typedef cl_uint cl_device_local_mem_type; +typedef cl_bitfield cl_device_exec_capabilities; +#ifdef CL_VERSION_2_0 +typedef cl_bitfield cl_device_svm_capabilities; +#endif +typedef cl_bitfield cl_command_queue_properties; +#ifdef CL_VERSION_1_2 +typedef intptr_t cl_device_partition_property; +typedef cl_bitfield cl_device_affinity_domain; +#endif + +typedef intptr_t cl_context_properties; +typedef cl_uint cl_context_info; +#ifdef CL_VERSION_2_0 +typedef cl_properties cl_queue_properties; +#endif +typedef cl_uint cl_command_queue_info; +typedef cl_uint cl_channel_order; +typedef cl_uint cl_channel_type; +typedef cl_bitfield cl_mem_flags; +#ifdef CL_VERSION_2_0 +typedef cl_bitfield cl_svm_mem_flags; +#endif +typedef cl_uint cl_mem_object_type; +typedef cl_uint cl_mem_info; +#ifdef CL_VERSION_1_2 +typedef cl_bitfield cl_mem_migration_flags; +#endif +typedef cl_uint cl_image_info; +#ifdef CL_VERSION_1_1 +typedef cl_uint cl_buffer_create_type; +#endif +typedef cl_uint cl_addressing_mode; +typedef cl_uint cl_filter_mode; +typedef cl_uint cl_sampler_info; +typedef cl_bitfield cl_map_flags; +#ifdef CL_VERSION_2_0 +typedef intptr_t cl_pipe_properties; +typedef cl_uint cl_pipe_info; +#endif +typedef cl_uint cl_program_info; +typedef cl_uint cl_program_build_info; +#ifdef CL_VERSION_1_2 +typedef cl_uint cl_program_binary_type; +#endif +typedef cl_int cl_build_status; +typedef cl_uint cl_kernel_info; +#ifdef CL_VERSION_1_2 +typedef cl_uint cl_kernel_arg_info; +typedef cl_uint cl_kernel_arg_address_qualifier; +typedef cl_uint cl_kernel_arg_access_qualifier; +typedef cl_bitfield cl_kernel_arg_type_qualifier; +#endif +typedef cl_uint cl_kernel_work_group_info; +#ifdef CL_VERSION_2_1 +typedef cl_uint cl_kernel_sub_group_info; +#endif +typedef cl_uint cl_event_info; +typedef cl_uint cl_command_type; +typedef cl_uint cl_profiling_info; +#ifdef CL_VERSION_2_0 +typedef cl_properties cl_sampler_properties; +typedef cl_uint cl_kernel_exec_info; +#endif +#ifdef CL_VERSION_3_0 +typedef cl_bitfield cl_device_atomic_capabilities; +typedef cl_bitfield cl_device_device_enqueue_capabilities; +typedef cl_uint cl_khronos_vendor_id; +typedef cl_properties cl_mem_properties; +typedef cl_uint cl_version; +#endif + +typedef struct _cl_image_format { + cl_channel_order image_channel_order; + cl_channel_type image_channel_data_type; +} cl_image_format; + +#ifdef CL_VERSION_1_2 + +typedef struct _cl_image_desc { + cl_mem_object_type image_type; + size_t image_width; + size_t image_height; + size_t image_depth; + size_t image_array_size; + size_t image_row_pitch; + size_t image_slice_pitch; + cl_uint num_mip_levels; + cl_uint num_samples; +#ifdef CL_VERSION_2_0 +#if defined(__GNUC__) + __extension__ /* Prevents warnings about anonymous union in -pedantic builds */ +#endif +#if defined(_MSC_VER) && !defined(__STDC__) +#pragma warning( push ) +#pragma warning( disable : 4201 ) /* Prevents warning about nameless struct/union in /W4 builds */ +#endif +#if defined(_MSC_VER) && defined(__STDC__) + /* Anonymous unions are not supported in /Za builds */ +#else + union { +#endif +#endif + cl_mem buffer; +#ifdef CL_VERSION_2_0 +#if defined(_MSC_VER) && defined(__STDC__) + /* Anonymous unions are not supported in /Za builds */ +#else + cl_mem mem_object; + }; +#endif +#if defined(_MSC_VER) && !defined(__STDC__) +#pragma warning( pop ) +#endif +#endif +} cl_image_desc; + +#endif + +#ifdef CL_VERSION_1_1 + +typedef struct _cl_buffer_region { + size_t origin; + size_t size; +} cl_buffer_region; + +#endif + +#ifdef CL_VERSION_3_0 + +#define CL_NAME_VERSION_MAX_NAME_SIZE 64 + +typedef struct _cl_name_version { + cl_version version; + char name[CL_NAME_VERSION_MAX_NAME_SIZE]; +} cl_name_version; + +#endif + +/******************************************************************************/ + +/* Error Codes */ +#define CL_SUCCESS 0 +#define CL_DEVICE_NOT_FOUND -1 +#define CL_DEVICE_NOT_AVAILABLE -2 +#define CL_COMPILER_NOT_AVAILABLE -3 +#define CL_MEM_OBJECT_ALLOCATION_FAILURE -4 +#define CL_OUT_OF_RESOURCES -5 +#define CL_OUT_OF_HOST_MEMORY -6 +#define CL_PROFILING_INFO_NOT_AVAILABLE -7 +#define CL_MEM_COPY_OVERLAP -8 +#define CL_IMAGE_FORMAT_MISMATCH -9 +#define CL_IMAGE_FORMAT_NOT_SUPPORTED -10 +#define CL_BUILD_PROGRAM_FAILURE -11 +#define CL_MAP_FAILURE -12 +#ifdef CL_VERSION_1_1 +#define CL_MISALIGNED_SUB_BUFFER_OFFSET -13 +#define CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST -14 +#endif +#ifdef CL_VERSION_1_2 +#define CL_COMPILE_PROGRAM_FAILURE -15 +#define CL_LINKER_NOT_AVAILABLE -16 +#define CL_LINK_PROGRAM_FAILURE -17 +#define CL_DEVICE_PARTITION_FAILED -18 +#define CL_KERNEL_ARG_INFO_NOT_AVAILABLE -19 +#endif + +#define CL_INVALID_VALUE -30 +#define CL_INVALID_DEVICE_TYPE -31 +#define CL_INVALID_PLATFORM -32 +#define CL_INVALID_DEVICE -33 +#define CL_INVALID_CONTEXT -34 +#define CL_INVALID_QUEUE_PROPERTIES -35 +#define CL_INVALID_COMMAND_QUEUE -36 +#define CL_INVALID_HOST_PTR -37 +#define CL_INVALID_MEM_OBJECT -38 +#define CL_INVALID_IMAGE_FORMAT_DESCRIPTOR -39 +#define CL_INVALID_IMAGE_SIZE -40 +#define CL_INVALID_SAMPLER -41 +#define CL_INVALID_BINARY -42 +#define CL_INVALID_BUILD_OPTIONS -43 +#define CL_INVALID_PROGRAM -44 +#define CL_INVALID_PROGRAM_EXECUTABLE -45 +#define CL_INVALID_KERNEL_NAME -46 +#define CL_INVALID_KERNEL_DEFINITION -47 +#define CL_INVALID_KERNEL -48 +#define CL_INVALID_ARG_INDEX -49 +#define CL_INVALID_ARG_VALUE -50 +#define CL_INVALID_ARG_SIZE -51 +#define CL_INVALID_KERNEL_ARGS -52 +#define CL_INVALID_WORK_DIMENSION -53 +#define CL_INVALID_WORK_GROUP_SIZE -54 +#define CL_INVALID_WORK_ITEM_SIZE -55 +#define CL_INVALID_GLOBAL_OFFSET -56 +#define CL_INVALID_EVENT_WAIT_LIST -57 +#define CL_INVALID_EVENT -58 +#define CL_INVALID_OPERATION -59 +#define CL_INVALID_GL_OBJECT -60 +#define CL_INVALID_BUFFER_SIZE -61 +#define CL_INVALID_MIP_LEVEL -62 +#define CL_INVALID_GLOBAL_WORK_SIZE -63 +#ifdef CL_VERSION_1_1 +#define CL_INVALID_PROPERTY -64 +#endif +#ifdef CL_VERSION_1_2 +#define CL_INVALID_IMAGE_DESCRIPTOR -65 +#define CL_INVALID_COMPILER_OPTIONS -66 +#define CL_INVALID_LINKER_OPTIONS -67 +#define CL_INVALID_DEVICE_PARTITION_COUNT -68 +#endif +#ifdef CL_VERSION_2_0 +#define CL_INVALID_PIPE_SIZE -69 +#define CL_INVALID_DEVICE_QUEUE -70 +#endif +#ifdef CL_VERSION_2_2 +#define CL_INVALID_SPEC_ID -71 +#define CL_MAX_SIZE_RESTRICTION_EXCEEDED -72 +#endif + + +/* cl_bool */ +#define CL_FALSE 0 +#define CL_TRUE 1 +#ifdef CL_VERSION_1_2 +#define CL_BLOCKING CL_TRUE +#define CL_NON_BLOCKING CL_FALSE +#endif + +/* cl_platform_info */ +#define CL_PLATFORM_PROFILE 0x0900 +#define CL_PLATFORM_VERSION 0x0901 +#define CL_PLATFORM_NAME 0x0902 +#define CL_PLATFORM_VENDOR 0x0903 +#define CL_PLATFORM_EXTENSIONS 0x0904 +#ifdef CL_VERSION_2_1 +#define CL_PLATFORM_HOST_TIMER_RESOLUTION 0x0905 +#endif +#ifdef CL_VERSION_3_0 +#define CL_PLATFORM_NUMERIC_VERSION 0x0906 +#define CL_PLATFORM_EXTENSIONS_WITH_VERSION 0x0907 +#endif + +/* cl_device_type - bitfield */ +#define CL_DEVICE_TYPE_DEFAULT (1 << 0) +#define CL_DEVICE_TYPE_CPU (1 << 1) +#define CL_DEVICE_TYPE_GPU (1 << 2) +#define CL_DEVICE_TYPE_ACCELERATOR (1 << 3) +#ifdef CL_VERSION_1_2 +#define CL_DEVICE_TYPE_CUSTOM (1 << 4) +#endif +#define CL_DEVICE_TYPE_ALL 0xFFFFFFFF + +/* cl_device_info */ +#define CL_DEVICE_TYPE 0x1000 +#define CL_DEVICE_VENDOR_ID 0x1001 +#define CL_DEVICE_MAX_COMPUTE_UNITS 0x1002 +#define CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS 0x1003 +#define CL_DEVICE_MAX_WORK_GROUP_SIZE 0x1004 +#define CL_DEVICE_MAX_WORK_ITEM_SIZES 0x1005 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR 0x1006 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT 0x1007 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT 0x1008 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG 0x1009 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT 0x100A +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE 0x100B +#define CL_DEVICE_MAX_CLOCK_FREQUENCY 0x100C +#define CL_DEVICE_ADDRESS_BITS 0x100D +#define CL_DEVICE_MAX_READ_IMAGE_ARGS 0x100E +#define CL_DEVICE_MAX_WRITE_IMAGE_ARGS 0x100F +#define CL_DEVICE_MAX_MEM_ALLOC_SIZE 0x1010 +#define CL_DEVICE_IMAGE2D_MAX_WIDTH 0x1011 +#define CL_DEVICE_IMAGE2D_MAX_HEIGHT 0x1012 +#define CL_DEVICE_IMAGE3D_MAX_WIDTH 0x1013 +#define CL_DEVICE_IMAGE3D_MAX_HEIGHT 0x1014 +#define CL_DEVICE_IMAGE3D_MAX_DEPTH 0x1015 +#define CL_DEVICE_IMAGE_SUPPORT 0x1016 +#define CL_DEVICE_MAX_PARAMETER_SIZE 0x1017 +#define CL_DEVICE_MAX_SAMPLERS 0x1018 +#define CL_DEVICE_MEM_BASE_ADDR_ALIGN 0x1019 +#define CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE 0x101A +#define CL_DEVICE_SINGLE_FP_CONFIG 0x101B +#define CL_DEVICE_GLOBAL_MEM_CACHE_TYPE 0x101C +#define CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE 0x101D +#define CL_DEVICE_GLOBAL_MEM_CACHE_SIZE 0x101E +#define CL_DEVICE_GLOBAL_MEM_SIZE 0x101F +#define CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE 0x1020 +#define CL_DEVICE_MAX_CONSTANT_ARGS 0x1021 +#define CL_DEVICE_LOCAL_MEM_TYPE 0x1022 +#define CL_DEVICE_LOCAL_MEM_SIZE 0x1023 +#define CL_DEVICE_ERROR_CORRECTION_SUPPORT 0x1024 +#define CL_DEVICE_PROFILING_TIMER_RESOLUTION 0x1025 +#define CL_DEVICE_ENDIAN_LITTLE 0x1026 +#define CL_DEVICE_AVAILABLE 0x1027 +#define CL_DEVICE_COMPILER_AVAILABLE 0x1028 +#define CL_DEVICE_EXECUTION_CAPABILITIES 0x1029 +#define CL_DEVICE_QUEUE_PROPERTIES 0x102A /* deprecated */ +#ifdef CL_VERSION_2_0 +#define CL_DEVICE_QUEUE_ON_HOST_PROPERTIES 0x102A +#endif +#define CL_DEVICE_NAME 0x102B +#define CL_DEVICE_VENDOR 0x102C +#define CL_DRIVER_VERSION 0x102D +#define CL_DEVICE_PROFILE 0x102E +#define CL_DEVICE_VERSION 0x102F +#define CL_DEVICE_EXTENSIONS 0x1030 +#define CL_DEVICE_PLATFORM 0x1031 +#ifdef CL_VERSION_1_2 +#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032 +#endif +/* 0x1033 reserved for CL_DEVICE_HALF_FP_CONFIG which is already defined in "cl_ext.h" */ +#ifdef CL_VERSION_1_1 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF 0x1034 +#define CL_DEVICE_HOST_UNIFIED_MEMORY 0x1035 /* deprecated */ +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR 0x1036 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT 0x1037 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_INT 0x1038 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG 0x1039 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT 0x103A +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE 0x103B +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF 0x103C +#define CL_DEVICE_OPENCL_C_VERSION 0x103D +#endif +#ifdef CL_VERSION_1_2 +#define CL_DEVICE_LINKER_AVAILABLE 0x103E +#define CL_DEVICE_BUILT_IN_KERNELS 0x103F +#define CL_DEVICE_IMAGE_MAX_BUFFER_SIZE 0x1040 +#define CL_DEVICE_IMAGE_MAX_ARRAY_SIZE 0x1041 +#define CL_DEVICE_PARENT_DEVICE 0x1042 +#define CL_DEVICE_PARTITION_MAX_SUB_DEVICES 0x1043 +#define CL_DEVICE_PARTITION_PROPERTIES 0x1044 +#define CL_DEVICE_PARTITION_AFFINITY_DOMAIN 0x1045 +#define CL_DEVICE_PARTITION_TYPE 0x1046 +#define CL_DEVICE_REFERENCE_COUNT 0x1047 +#define CL_DEVICE_PREFERRED_INTEROP_USER_SYNC 0x1048 +#define CL_DEVICE_PRINTF_BUFFER_SIZE 0x1049 +#endif +#ifdef CL_VERSION_2_0 +#define CL_DEVICE_IMAGE_PITCH_ALIGNMENT 0x104A +#define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT 0x104B +#define CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS 0x104C +#define CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE 0x104D +#define CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES 0x104E +#define CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE 0x104F +#define CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE 0x1050 +#define CL_DEVICE_MAX_ON_DEVICE_QUEUES 0x1051 +#define CL_DEVICE_MAX_ON_DEVICE_EVENTS 0x1052 +#define CL_DEVICE_SVM_CAPABILITIES 0x1053 +#define CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE 0x1054 +#define CL_DEVICE_MAX_PIPE_ARGS 0x1055 +#define CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS 0x1056 +#define CL_DEVICE_PIPE_MAX_PACKET_SIZE 0x1057 +#define CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT 0x1058 +#define CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT 0x1059 +#define CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT 0x105A +#endif +#ifdef CL_VERSION_2_1 +#define CL_DEVICE_IL_VERSION 0x105B +#define CL_DEVICE_MAX_NUM_SUB_GROUPS 0x105C +#define CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS 0x105D +#endif +#ifdef CL_VERSION_3_0 +#define CL_DEVICE_NUMERIC_VERSION 0x105E +#define CL_DEVICE_EXTENSIONS_WITH_VERSION 0x1060 +#define CL_DEVICE_ILS_WITH_VERSION 0x1061 +#define CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION 0x1062 +#define CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES 0x1063 +#define CL_DEVICE_ATOMIC_FENCE_CAPABILITIES 0x1064 +#define CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT 0x1065 +#define CL_DEVICE_OPENCL_C_ALL_VERSIONS 0x1066 +#define CL_DEVICE_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x1067 +#define CL_DEVICE_WORK_GROUP_COLLECTIVE_FUNCTIONS_SUPPORT 0x1068 +#define CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT 0x1069 +/* 0x106A to 0x106E - Reserved for upcoming KHR extension */ +#define CL_DEVICE_OPENCL_C_FEATURES 0x106F +#define CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES 0x1070 +#define CL_DEVICE_PIPE_SUPPORT 0x1071 +#define CL_DEVICE_LATEST_CONFORMANCE_VERSION_PASSED 0x1072 +#endif + +/* cl_device_fp_config - bitfield */ +#define CL_FP_DENORM (1 << 0) +#define CL_FP_INF_NAN (1 << 1) +#define CL_FP_ROUND_TO_NEAREST (1 << 2) +#define CL_FP_ROUND_TO_ZERO (1 << 3) +#define CL_FP_ROUND_TO_INF (1 << 4) +#define CL_FP_FMA (1 << 5) +#ifdef CL_VERSION_1_1 +#define CL_FP_SOFT_FLOAT (1 << 6) +#endif +#ifdef CL_VERSION_1_2 +#define CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT (1 << 7) +#endif + +/* cl_device_mem_cache_type */ +#define CL_NONE 0x0 +#define CL_READ_ONLY_CACHE 0x1 +#define CL_READ_WRITE_CACHE 0x2 + +/* cl_device_local_mem_type */ +#define CL_LOCAL 0x1 +#define CL_GLOBAL 0x2 + +/* cl_device_exec_capabilities - bitfield */ +#define CL_EXEC_KERNEL (1 << 0) +#define CL_EXEC_NATIVE_KERNEL (1 << 1) + +/* cl_command_queue_properties - bitfield */ +#define CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE (1 << 0) +#define CL_QUEUE_PROFILING_ENABLE (1 << 1) +#ifdef CL_VERSION_2_0 +#define CL_QUEUE_ON_DEVICE (1 << 2) +#define CL_QUEUE_ON_DEVICE_DEFAULT (1 << 3) +#endif + +/* cl_context_info */ +#define CL_CONTEXT_REFERENCE_COUNT 0x1080 +#define CL_CONTEXT_DEVICES 0x1081 +#define CL_CONTEXT_PROPERTIES 0x1082 +#ifdef CL_VERSION_1_1 +#define CL_CONTEXT_NUM_DEVICES 0x1083 +#endif + +/* cl_context_properties */ +#define CL_CONTEXT_PLATFORM 0x1084 +#ifdef CL_VERSION_1_2 +#define CL_CONTEXT_INTEROP_USER_SYNC 0x1085 +#endif + +#ifdef CL_VERSION_1_2 + +/* cl_device_partition_property */ +#define CL_DEVICE_PARTITION_EQUALLY 0x1086 +#define CL_DEVICE_PARTITION_BY_COUNTS 0x1087 +#define CL_DEVICE_PARTITION_BY_COUNTS_LIST_END 0x0 +#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN 0x1088 + +#endif + +#ifdef CL_VERSION_1_2 + +/* cl_device_affinity_domain */ +#define CL_DEVICE_AFFINITY_DOMAIN_NUMA (1 << 0) +#define CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE (1 << 1) +#define CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE (1 << 2) +#define CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE (1 << 3) +#define CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE (1 << 4) +#define CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE (1 << 5) + +#endif + +#ifdef CL_VERSION_2_0 + +/* cl_device_svm_capabilities */ +#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER (1 << 0) +#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER (1 << 1) +#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM (1 << 2) +#define CL_DEVICE_SVM_ATOMICS (1 << 3) + +#endif + +/* cl_command_queue_info */ +#define CL_QUEUE_CONTEXT 0x1090 +#define CL_QUEUE_DEVICE 0x1091 +#define CL_QUEUE_REFERENCE_COUNT 0x1092 +#define CL_QUEUE_PROPERTIES 0x1093 +#ifdef CL_VERSION_2_0 +#define CL_QUEUE_SIZE 0x1094 +#endif +#ifdef CL_VERSION_2_1 +#define CL_QUEUE_DEVICE_DEFAULT 0x1095 +#endif +#ifdef CL_VERSION_3_0 +#define CL_QUEUE_PROPERTIES_ARRAY 0x1098 +#endif + +/* cl_mem_flags and cl_svm_mem_flags - bitfield */ +#define CL_MEM_READ_WRITE (1 << 0) +#define CL_MEM_WRITE_ONLY (1 << 1) +#define CL_MEM_READ_ONLY (1 << 2) +#define CL_MEM_USE_HOST_PTR (1 << 3) +#define CL_MEM_ALLOC_HOST_PTR (1 << 4) +#define CL_MEM_COPY_HOST_PTR (1 << 5) +/* reserved (1 << 6) */ +#ifdef CL_VERSION_1_2 +#define CL_MEM_HOST_WRITE_ONLY (1 << 7) +#define CL_MEM_HOST_READ_ONLY (1 << 8) +#define CL_MEM_HOST_NO_ACCESS (1 << 9) +#endif +#ifdef CL_VERSION_2_0 +#define CL_MEM_SVM_FINE_GRAIN_BUFFER (1 << 10) /* used by cl_svm_mem_flags only */ +#define CL_MEM_SVM_ATOMICS (1 << 11) /* used by cl_svm_mem_flags only */ +#define CL_MEM_KERNEL_READ_AND_WRITE (1 << 12) +#endif + +#ifdef CL_VERSION_1_2 + +/* cl_mem_migration_flags - bitfield */ +#define CL_MIGRATE_MEM_OBJECT_HOST (1 << 0) +#define CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED (1 << 1) + +#endif + +/* cl_channel_order */ +#define CL_R 0x10B0 +#define CL_A 0x10B1 +#define CL_RG 0x10B2 +#define CL_RA 0x10B3 +#define CL_RGB 0x10B4 +#define CL_RGBA 0x10B5 +#define CL_BGRA 0x10B6 +#define CL_ARGB 0x10B7 +#define CL_INTENSITY 0x10B8 +#define CL_LUMINANCE 0x10B9 +#ifdef CL_VERSION_1_1 +#define CL_Rx 0x10BA +#define CL_RGx 0x10BB +#define CL_RGBx 0x10BC +#endif +#ifdef CL_VERSION_1_2 +#define CL_DEPTH 0x10BD +#define CL_DEPTH_STENCIL 0x10BE +#endif +#ifdef CL_VERSION_2_0 +#define CL_sRGB 0x10BF +#define CL_sRGBx 0x10C0 +#define CL_sRGBA 0x10C1 +#define CL_sBGRA 0x10C2 +#define CL_ABGR 0x10C3 +#endif + +/* cl_channel_type */ +#define CL_SNORM_INT8 0x10D0 +#define CL_SNORM_INT16 0x10D1 +#define CL_UNORM_INT8 0x10D2 +#define CL_UNORM_INT16 0x10D3 +#define CL_UNORM_SHORT_565 0x10D4 +#define CL_UNORM_SHORT_555 0x10D5 +#define CL_UNORM_INT_101010 0x10D6 +#define CL_SIGNED_INT8 0x10D7 +#define CL_SIGNED_INT16 0x10D8 +#define CL_SIGNED_INT32 0x10D9 +#define CL_UNSIGNED_INT8 0x10DA +#define CL_UNSIGNED_INT16 0x10DB +#define CL_UNSIGNED_INT32 0x10DC +#define CL_HALF_FLOAT 0x10DD +#define CL_FLOAT 0x10DE +#ifdef CL_VERSION_1_2 +#define CL_UNORM_INT24 0x10DF +#endif +#ifdef CL_VERSION_2_1 +#define CL_UNORM_INT_101010_2 0x10E0 +#endif + +/* cl_mem_object_type */ +#define CL_MEM_OBJECT_BUFFER 0x10F0 +#define CL_MEM_OBJECT_IMAGE2D 0x10F1 +#define CL_MEM_OBJECT_IMAGE3D 0x10F2 +#ifdef CL_VERSION_1_2 +#define CL_MEM_OBJECT_IMAGE2D_ARRAY 0x10F3 +#define CL_MEM_OBJECT_IMAGE1D 0x10F4 +#define CL_MEM_OBJECT_IMAGE1D_ARRAY 0x10F5 +#define CL_MEM_OBJECT_IMAGE1D_BUFFER 0x10F6 +#endif +#ifdef CL_VERSION_2_0 +#define CL_MEM_OBJECT_PIPE 0x10F7 +#endif + +/* cl_mem_info */ +#define CL_MEM_TYPE 0x1100 +#define CL_MEM_FLAGS 0x1101 +#define CL_MEM_SIZE 0x1102 +#define CL_MEM_HOST_PTR 0x1103 +#define CL_MEM_MAP_COUNT 0x1104 +#define CL_MEM_REFERENCE_COUNT 0x1105 +#define CL_MEM_CONTEXT 0x1106 +#ifdef CL_VERSION_1_1 +#define CL_MEM_ASSOCIATED_MEMOBJECT 0x1107 +#define CL_MEM_OFFSET 0x1108 +#endif +#ifdef CL_VERSION_2_0 +#define CL_MEM_USES_SVM_POINTER 0x1109 +#endif +#ifdef CL_VERSION_3_0 +#define CL_MEM_PROPERTIES 0x110A +#endif + +/* cl_image_info */ +#define CL_IMAGE_FORMAT 0x1110 +#define CL_IMAGE_ELEMENT_SIZE 0x1111 +#define CL_IMAGE_ROW_PITCH 0x1112 +#define CL_IMAGE_SLICE_PITCH 0x1113 +#define CL_IMAGE_WIDTH 0x1114 +#define CL_IMAGE_HEIGHT 0x1115 +#define CL_IMAGE_DEPTH 0x1116 +#ifdef CL_VERSION_1_2 +#define CL_IMAGE_ARRAY_SIZE 0x1117 +#define CL_IMAGE_BUFFER 0x1118 +#define CL_IMAGE_NUM_MIP_LEVELS 0x1119 +#define CL_IMAGE_NUM_SAMPLES 0x111A +#endif + + +/* cl_pipe_info */ +#ifdef CL_VERSION_2_0 +#define CL_PIPE_PACKET_SIZE 0x1120 +#define CL_PIPE_MAX_PACKETS 0x1121 +#endif +#ifdef CL_VERSION_3_0 +#define CL_PIPE_PROPERTIES 0x1122 +#endif + +/* cl_addressing_mode */ +#define CL_ADDRESS_NONE 0x1130 +#define CL_ADDRESS_CLAMP_TO_EDGE 0x1131 +#define CL_ADDRESS_CLAMP 0x1132 +#define CL_ADDRESS_REPEAT 0x1133 +#ifdef CL_VERSION_1_1 +#define CL_ADDRESS_MIRRORED_REPEAT 0x1134 +#endif + +/* cl_filter_mode */ +#define CL_FILTER_NEAREST 0x1140 +#define CL_FILTER_LINEAR 0x1141 + +/* cl_sampler_info */ +#define CL_SAMPLER_REFERENCE_COUNT 0x1150 +#define CL_SAMPLER_CONTEXT 0x1151 +#define CL_SAMPLER_NORMALIZED_COORDS 0x1152 +#define CL_SAMPLER_ADDRESSING_MODE 0x1153 +#define CL_SAMPLER_FILTER_MODE 0x1154 +#ifdef CL_VERSION_2_0 +/* These enumerants are for the cl_khr_mipmap_image extension. + They have since been added to cl_ext.h with an appropriate + KHR suffix, but are left here for backwards compatibility. */ +#define CL_SAMPLER_MIP_FILTER_MODE 0x1155 +#define CL_SAMPLER_LOD_MIN 0x1156 +#define CL_SAMPLER_LOD_MAX 0x1157 +#endif +#ifdef CL_VERSION_3_0 +#define CL_SAMPLER_PROPERTIES 0x1158 +#endif + +/* cl_map_flags - bitfield */ +#define CL_MAP_READ (1 << 0) +#define CL_MAP_WRITE (1 << 1) +#ifdef CL_VERSION_1_2 +#define CL_MAP_WRITE_INVALIDATE_REGION (1 << 2) +#endif + +/* cl_program_info */ +#define CL_PROGRAM_REFERENCE_COUNT 0x1160 +#define CL_PROGRAM_CONTEXT 0x1161 +#define CL_PROGRAM_NUM_DEVICES 0x1162 +#define CL_PROGRAM_DEVICES 0x1163 +#define CL_PROGRAM_SOURCE 0x1164 +#define CL_PROGRAM_BINARY_SIZES 0x1165 +#define CL_PROGRAM_BINARIES 0x1166 +#ifdef CL_VERSION_1_2 +#define CL_PROGRAM_NUM_KERNELS 0x1167 +#define CL_PROGRAM_KERNEL_NAMES 0x1168 +#endif +#ifdef CL_VERSION_2_1 +#define CL_PROGRAM_IL 0x1169 +#endif +#ifdef CL_VERSION_2_2 +#define CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT 0x116A +#define CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT 0x116B +#endif + +/* cl_program_build_info */ +#define CL_PROGRAM_BUILD_STATUS 0x1181 +#define CL_PROGRAM_BUILD_OPTIONS 0x1182 +#define CL_PROGRAM_BUILD_LOG 0x1183 +#ifdef CL_VERSION_1_2 +#define CL_PROGRAM_BINARY_TYPE 0x1184 +#endif +#ifdef CL_VERSION_2_0 +#define CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE 0x1185 +#endif + +#ifdef CL_VERSION_1_2 + +/* cl_program_binary_type */ +#define CL_PROGRAM_BINARY_TYPE_NONE 0x0 +#define CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT 0x1 +#define CL_PROGRAM_BINARY_TYPE_LIBRARY 0x2 +#define CL_PROGRAM_BINARY_TYPE_EXECUTABLE 0x4 + +#endif + +/* cl_build_status */ +#define CL_BUILD_SUCCESS 0 +#define CL_BUILD_NONE -1 +#define CL_BUILD_ERROR -2 +#define CL_BUILD_IN_PROGRESS -3 + +/* cl_kernel_info */ +#define CL_KERNEL_FUNCTION_NAME 0x1190 +#define CL_KERNEL_NUM_ARGS 0x1191 +#define CL_KERNEL_REFERENCE_COUNT 0x1192 +#define CL_KERNEL_CONTEXT 0x1193 +#define CL_KERNEL_PROGRAM 0x1194 +#ifdef CL_VERSION_1_2 +#define CL_KERNEL_ATTRIBUTES 0x1195 +#endif + +#ifdef CL_VERSION_1_2 + +/* cl_kernel_arg_info */ +#define CL_KERNEL_ARG_ADDRESS_QUALIFIER 0x1196 +#define CL_KERNEL_ARG_ACCESS_QUALIFIER 0x1197 +#define CL_KERNEL_ARG_TYPE_NAME 0x1198 +#define CL_KERNEL_ARG_TYPE_QUALIFIER 0x1199 +#define CL_KERNEL_ARG_NAME 0x119A + +#endif + +#ifdef CL_VERSION_1_2 + +/* cl_kernel_arg_address_qualifier */ +#define CL_KERNEL_ARG_ADDRESS_GLOBAL 0x119B +#define CL_KERNEL_ARG_ADDRESS_LOCAL 0x119C +#define CL_KERNEL_ARG_ADDRESS_CONSTANT 0x119D +#define CL_KERNEL_ARG_ADDRESS_PRIVATE 0x119E + +#endif + +#ifdef CL_VERSION_1_2 + +/* cl_kernel_arg_access_qualifier */ +#define CL_KERNEL_ARG_ACCESS_READ_ONLY 0x11A0 +#define CL_KERNEL_ARG_ACCESS_WRITE_ONLY 0x11A1 +#define CL_KERNEL_ARG_ACCESS_READ_WRITE 0x11A2 +#define CL_KERNEL_ARG_ACCESS_NONE 0x11A3 + +#endif + +#ifdef CL_VERSION_1_2 + +/* cl_kernel_arg_type_qualifier */ +#define CL_KERNEL_ARG_TYPE_NONE 0 +#define CL_KERNEL_ARG_TYPE_CONST (1 << 0) +#define CL_KERNEL_ARG_TYPE_RESTRICT (1 << 1) +#define CL_KERNEL_ARG_TYPE_VOLATILE (1 << 2) +#ifdef CL_VERSION_2_0 +#define CL_KERNEL_ARG_TYPE_PIPE (1 << 3) +#endif + +#endif + +/* cl_kernel_work_group_info */ +#define CL_KERNEL_WORK_GROUP_SIZE 0x11B0 +#define CL_KERNEL_COMPILE_WORK_GROUP_SIZE 0x11B1 +#define CL_KERNEL_LOCAL_MEM_SIZE 0x11B2 +#define CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x11B3 +#define CL_KERNEL_PRIVATE_MEM_SIZE 0x11B4 +#ifdef CL_VERSION_1_2 +#define CL_KERNEL_GLOBAL_WORK_SIZE 0x11B5 +#endif + +#ifdef CL_VERSION_2_1 + +/* cl_kernel_sub_group_info */ +#define CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE 0x2033 +#define CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE 0x2034 +#define CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT 0x11B8 +#define CL_KERNEL_MAX_NUM_SUB_GROUPS 0x11B9 +#define CL_KERNEL_COMPILE_NUM_SUB_GROUPS 0x11BA + +#endif + +#ifdef CL_VERSION_2_0 + +/* cl_kernel_exec_info */ +#define CL_KERNEL_EXEC_INFO_SVM_PTRS 0x11B6 +#define CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM 0x11B7 + +#endif + +/* cl_event_info */ +#define CL_EVENT_COMMAND_QUEUE 0x11D0 +#define CL_EVENT_COMMAND_TYPE 0x11D1 +#define CL_EVENT_REFERENCE_COUNT 0x11D2 +#define CL_EVENT_COMMAND_EXECUTION_STATUS 0x11D3 +#ifdef CL_VERSION_1_1 +#define CL_EVENT_CONTEXT 0x11D4 +#endif + +/* cl_command_type */ +#define CL_COMMAND_NDRANGE_KERNEL 0x11F0 +#define CL_COMMAND_TASK 0x11F1 +#define CL_COMMAND_NATIVE_KERNEL 0x11F2 +#define CL_COMMAND_READ_BUFFER 0x11F3 +#define CL_COMMAND_WRITE_BUFFER 0x11F4 +#define CL_COMMAND_COPY_BUFFER 0x11F5 +#define CL_COMMAND_READ_IMAGE 0x11F6 +#define CL_COMMAND_WRITE_IMAGE 0x11F7 +#define CL_COMMAND_COPY_IMAGE 0x11F8 +#define CL_COMMAND_COPY_IMAGE_TO_BUFFER 0x11F9 +#define CL_COMMAND_COPY_BUFFER_TO_IMAGE 0x11FA +#define CL_COMMAND_MAP_BUFFER 0x11FB +#define CL_COMMAND_MAP_IMAGE 0x11FC +#define CL_COMMAND_UNMAP_MEM_OBJECT 0x11FD +#define CL_COMMAND_MARKER 0x11FE +#define CL_COMMAND_ACQUIRE_GL_OBJECTS 0x11FF +#define CL_COMMAND_RELEASE_GL_OBJECTS 0x1200 +#ifdef CL_VERSION_1_1 +#define CL_COMMAND_READ_BUFFER_RECT 0x1201 +#define CL_COMMAND_WRITE_BUFFER_RECT 0x1202 +#define CL_COMMAND_COPY_BUFFER_RECT 0x1203 +#define CL_COMMAND_USER 0x1204 +#endif +#ifdef CL_VERSION_1_2 +#define CL_COMMAND_BARRIER 0x1205 +#define CL_COMMAND_MIGRATE_MEM_OBJECTS 0x1206 +#define CL_COMMAND_FILL_BUFFER 0x1207 +#define CL_COMMAND_FILL_IMAGE 0x1208 +#endif +#ifdef CL_VERSION_2_0 +#define CL_COMMAND_SVM_FREE 0x1209 +#define CL_COMMAND_SVM_MEMCPY 0x120A +#define CL_COMMAND_SVM_MEMFILL 0x120B +#define CL_COMMAND_SVM_MAP 0x120C +#define CL_COMMAND_SVM_UNMAP 0x120D +#endif +#ifdef CL_VERSION_3_0 +#define CL_COMMAND_SVM_MIGRATE_MEM 0x120E +#endif + +/* command execution status */ +#define CL_COMPLETE 0x0 +#define CL_RUNNING 0x1 +#define CL_SUBMITTED 0x2 +#define CL_QUEUED 0x3 + +/* cl_buffer_create_type */ +#ifdef CL_VERSION_1_1 +#define CL_BUFFER_CREATE_TYPE_REGION 0x1220 +#endif + +/* cl_profiling_info */ +#define CL_PROFILING_COMMAND_QUEUED 0x1280 +#define CL_PROFILING_COMMAND_SUBMIT 0x1281 +#define CL_PROFILING_COMMAND_START 0x1282 +#define CL_PROFILING_COMMAND_END 0x1283 +#ifdef CL_VERSION_2_0 +#define CL_PROFILING_COMMAND_COMPLETE 0x1284 +#endif + +/* cl_device_atomic_capabilities - bitfield */ +#ifdef CL_VERSION_3_0 +#define CL_DEVICE_ATOMIC_ORDER_RELAXED (1 << 0) +#define CL_DEVICE_ATOMIC_ORDER_ACQ_REL (1 << 1) +#define CL_DEVICE_ATOMIC_ORDER_SEQ_CST (1 << 2) +#define CL_DEVICE_ATOMIC_SCOPE_WORK_ITEM (1 << 3) +#define CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP (1 << 4) +#define CL_DEVICE_ATOMIC_SCOPE_DEVICE (1 << 5) +#define CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES (1 << 6) +#endif + +/* cl_device_device_enqueue_capabilities - bitfield */ +#ifdef CL_VERSION_3_0 +#define CL_DEVICE_QUEUE_SUPPORTED (1 << 0) +#define CL_DEVICE_QUEUE_REPLACEABLE_DEFAULT (1 << 1) +#endif + +/* cl_khronos_vendor_id */ +#define CL_KHRONOS_VENDOR_ID_CODEPLAY 0x10004 + +#ifdef CL_VERSION_3_0 + +/* cl_version */ +#define CL_VERSION_MAJOR_BITS (10) +#define CL_VERSION_MINOR_BITS (10) +#define CL_VERSION_PATCH_BITS (12) + +#define CL_VERSION_MAJOR_MASK ((1 << CL_VERSION_MAJOR_BITS) - 1) +#define CL_VERSION_MINOR_MASK ((1 << CL_VERSION_MINOR_BITS) - 1) +#define CL_VERSION_PATCH_MASK ((1 << CL_VERSION_PATCH_BITS) - 1) + +#define CL_VERSION_MAJOR(version) \ + ((version) >> (CL_VERSION_MINOR_BITS + CL_VERSION_PATCH_BITS)) + +#define CL_VERSION_MINOR(version) \ + (((version) >> CL_VERSION_PATCH_BITS) & CL_VERSION_MINOR_MASK) + +#define CL_VERSION_PATCH(version) ((version) & CL_VERSION_PATCH_MASK) + +#define CL_MAKE_VERSION(major, minor, patch) \ + ((((major) & CL_VERSION_MAJOR_MASK) \ + << (CL_VERSION_MINOR_BITS + CL_VERSION_PATCH_BITS)) | \ + (((minor) & CL_VERSION_MINOR_MASK) << CL_VERSION_PATCH_BITS) | \ + ((patch) & CL_VERSION_PATCH_MASK)) + +#endif + +/********************************************************************************************************/ + +/* Genode */ +extern CL_API_ENTRY void CL_API_CALL +clInitGenode(Genode::Allocator_avl& alloc); + +/* Platform API */ +extern CL_API_ENTRY cl_int CL_API_CALL +clGetPlatformIDs(cl_uint num_entries, + cl_platform_id * platforms, + cl_uint * num_platforms) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetPlatformInfo(cl_platform_id platform, + cl_platform_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +/* Device APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clGetDeviceIDs(cl_platform_id platform, + cl_device_type device_type, + cl_uint num_entries, + cl_device_id * devices, + cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetDeviceInfo(cl_device_id device, + cl_device_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_int CL_API_CALL +clCreateSubDevices(cl_device_id in_device, + const cl_device_partition_property * properties, + cl_uint num_devices, + cl_device_id * out_devices, + cl_uint * num_devices_ret) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainDevice(cl_device_id device) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseDevice(cl_device_id device) CL_API_SUFFIX__VERSION_1_2; + +#endif + +#ifdef CL_VERSION_2_1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetDefaultDeviceCommandQueue(cl_context context, + cl_device_id device, + cl_command_queue command_queue) CL_API_SUFFIX__VERSION_2_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetDeviceAndHostTimer(cl_device_id device, + cl_ulong* device_timestamp, + cl_ulong* host_timestamp) CL_API_SUFFIX__VERSION_2_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetHostTimer(cl_device_id device, + cl_ulong * host_timestamp) CL_API_SUFFIX__VERSION_2_1; + +#endif + +/* Context APIs */ +extern CL_API_ENTRY cl_context CL_API_CALL +clCreateContext(const cl_context_properties * properties, + cl_uint num_devices, + const cl_device_id * devices, + void (CL_CALLBACK * pfn_notify)(const char * errinfo, + const void * private_info, + size_t cb, + void * user_data), + void * user_data, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_context CL_API_CALL +clCreateContextFromType(const cl_context_properties * properties, + cl_device_type device_type, + void (CL_CALLBACK * pfn_notify)(const char * errinfo, + const void * private_info, + size_t cb, + void * user_data), + void * user_data, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainContext(cl_context context) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseContext(cl_context context) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetContextInfo(cl_context context, + cl_context_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_3_0 + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetContextDestructorCallback(cl_context context, + void (CL_CALLBACK* pfn_notify)(cl_context context, + void* user_data), + void* user_data) CL_API_SUFFIX__VERSION_3_0; + +#endif + +/* Command Queue APIs */ + +#ifdef CL_VERSION_2_0 + +extern CL_API_ENTRY cl_command_queue CL_API_CALL +clCreateCommandQueueWithProperties(cl_context context, + cl_device_id device, + const cl_queue_properties * properties, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_2_0; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainCommandQueue(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseCommandQueue(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetCommandQueueInfo(cl_command_queue command_queue, + cl_command_queue_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +/* Memory Object APIs */ +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateBuffer(cl_context context, + cl_mem_flags flags, + size_t size, + void * host_ptr, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_1 + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateSubBuffer(cl_mem buffer, + cl_mem_flags flags, + cl_buffer_create_type buffer_create_type, + const void * buffer_create_info, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_1; + +#endif + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateImage(cl_context context, + cl_mem_flags flags, + const cl_image_format * image_format, + const cl_image_desc * image_desc, + void * host_ptr, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +#endif + +#ifdef CL_VERSION_2_0 + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreatePipe(cl_context context, + cl_mem_flags flags, + cl_uint pipe_packet_size, + cl_uint pipe_max_packets, + const cl_pipe_properties * properties, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_2_0; + +#endif + +#ifdef CL_VERSION_3_0 + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateBufferWithProperties(cl_context context, + const cl_mem_properties * properties, + cl_mem_flags flags, + size_t size, + void * host_ptr, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_3_0; + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateImageWithProperties(cl_context context, + const cl_mem_properties * properties, + cl_mem_flags flags, + const cl_image_format * image_format, + const cl_image_desc * image_desc, + void * host_ptr, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_3_0; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainMemObject(cl_mem memobj) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseMemObject(cl_mem memobj) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetSupportedImageFormats(cl_context context, + cl_mem_flags flags, + cl_mem_object_type image_type, + cl_uint num_entries, + cl_image_format * image_formats, + cl_uint * num_image_formats) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetMemObjectInfo(cl_mem memobj, + cl_mem_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetImageInfo(cl_mem image, + cl_image_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_2_0 + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetPipeInfo(cl_mem pipe, + cl_pipe_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_2_0; + +#endif + +#ifdef CL_VERSION_1_1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetMemObjectDestructorCallback(cl_mem memobj, + void (CL_CALLBACK * pfn_notify)(cl_mem memobj, + void * user_data), + void * user_data) CL_API_SUFFIX__VERSION_1_1; + +#endif + +/* SVM Allocation APIs */ + +#ifdef CL_VERSION_2_0 + +extern CL_API_ENTRY void * CL_API_CALL +clSVMAlloc(cl_context context, + cl_svm_mem_flags flags, + size_t size, + cl_uint alignment) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY void CL_API_CALL +clSVMFree(cl_context context, + void * svm_pointer) CL_API_SUFFIX__VERSION_2_0; + +#endif + +/* Sampler APIs */ + +#ifdef CL_VERSION_2_0 + +extern CL_API_ENTRY cl_sampler CL_API_CALL +clCreateSamplerWithProperties(cl_context context, + const cl_sampler_properties * sampler_properties, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_2_0; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainSampler(cl_sampler sampler) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseSampler(cl_sampler sampler) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetSamplerInfo(cl_sampler sampler, + cl_sampler_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +/* Program Object APIs */ +extern CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithSource(cl_context context, + cl_uint count, + const char ** strings, + const size_t * lengths, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithBinary(cl_context context, + cl_uint num_devices, + const cl_device_id * device_list, + const size_t * lengths, + const unsigned char ** binaries, + cl_int * binary_status, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithBuiltInKernels(cl_context context, + cl_uint num_devices, + const cl_device_id * device_list, + const char * kernel_names, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +#endif + +#ifdef CL_VERSION_2_1 + +extern CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithIL(cl_context context, + const void* il, + size_t length, + cl_int* errcode_ret) CL_API_SUFFIX__VERSION_2_1; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainProgram(cl_program program) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseProgram(cl_program program) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clBuildProgram(cl_program program, + cl_uint num_devices, + const cl_device_id * device_list, + const char * options, + void (CL_CALLBACK * pfn_notify)(cl_program program, + void * user_data), + void * user_data) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_int CL_API_CALL +clCompileProgram(cl_program program, + cl_uint num_devices, + const cl_device_id * device_list, + const char * options, + cl_uint num_input_headers, + const cl_program * input_headers, + const char ** header_include_names, + void (CL_CALLBACK * pfn_notify)(cl_program program, + void * user_data), + void * user_data) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_program CL_API_CALL +clLinkProgram(cl_context context, + cl_uint num_devices, + const cl_device_id * device_list, + const char * options, + cl_uint num_input_programs, + const cl_program * input_programs, + void (CL_CALLBACK * pfn_notify)(cl_program program, + void * user_data), + void * user_data, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +#endif + +#ifdef CL_VERSION_2_2 + +extern CL_API_ENTRY CL_API_PREFIX__VERSION_2_2_DEPRECATED cl_int CL_API_CALL +clSetProgramReleaseCallback(cl_program program, + void (CL_CALLBACK * pfn_notify)(cl_program program, + void * user_data), + void * user_data) CL_API_SUFFIX__VERSION_2_2_DEPRECATED; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetProgramSpecializationConstant(cl_program program, + cl_uint spec_id, + size_t spec_size, + const void* spec_value) CL_API_SUFFIX__VERSION_2_2; + +#endif + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_int CL_API_CALL +clUnloadPlatformCompiler(cl_platform_id platform) CL_API_SUFFIX__VERSION_1_2; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetProgramInfo(cl_program program, + cl_program_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetProgramBuildInfo(cl_program program, + cl_device_id device, + cl_program_build_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +/* Kernel Object APIs */ +extern CL_API_ENTRY cl_kernel CL_API_CALL +clCreateKernel(cl_program program, + const char * kernel_name, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clCreateKernelsInProgram(cl_program program, + cl_uint num_kernels, + cl_kernel * kernels, + cl_uint * num_kernels_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_2_1 + +extern CL_API_ENTRY cl_kernel CL_API_CALL +clCloneKernel(cl_kernel source_kernel, + cl_int* errcode_ret) CL_API_SUFFIX__VERSION_2_1; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainKernel(cl_kernel kernel) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseKernel(cl_kernel kernel) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetKernelArg(cl_kernel kernel, + cl_uint arg_index, + size_t arg_size, + const void * arg_value) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_2_0 + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetKernelArgSVMPointer(cl_kernel kernel, + cl_uint arg_index, + const void * arg_value) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetKernelExecInfo(cl_kernel kernel, + cl_kernel_exec_info param_name, + size_t param_value_size, + const void * param_value) CL_API_SUFFIX__VERSION_2_0; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetKernelInfo(cl_kernel kernel, + cl_kernel_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetKernelArgInfo(cl_kernel kernel, + cl_uint arg_indx, + cl_kernel_arg_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_2; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetKernelWorkGroupInfo(cl_kernel kernel, + cl_device_id device, + cl_kernel_work_group_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_2_1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetKernelSubGroupInfo(cl_kernel kernel, + cl_device_id device, + cl_kernel_sub_group_info param_name, + size_t input_value_size, + const void* input_value, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_2_1; + +#endif + +/* Event Object APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clWaitForEvents(cl_uint num_events, + const cl_event * event_list) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetEventInfo(cl_event event, + cl_event_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_1 + +extern CL_API_ENTRY cl_event CL_API_CALL +clCreateUserEvent(cl_context context, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_1; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainEvent(cl_event event) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseEvent(cl_event event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetUserEventStatus(cl_event event, + cl_int execution_status) CL_API_SUFFIX__VERSION_1_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetEventCallback(cl_event event, + cl_int command_exec_callback_type, + void (CL_CALLBACK * pfn_notify)(cl_event event, + cl_int event_command_status, + void * user_data), + void * user_data) CL_API_SUFFIX__VERSION_1_1; + +#endif + +/* Profiling APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clGetEventProfilingInfo(cl_event event, + cl_profiling_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +/* Flush and Finish APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clFlush(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clFinish(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; + +/* Enqueued Commands APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReadBuffer(cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_read, + size_t offset, + size_t size, + void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReadBufferRect(cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_read, + const size_t * buffer_origin, + const size_t * host_origin, + const size_t * region, + size_t buffer_row_pitch, + size_t buffer_slice_pitch, + size_t host_row_pitch, + size_t host_slice_pitch, + void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_1; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueWriteBuffer(cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_write, + size_t offset, + size_t size, + const void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueWriteBufferRect(cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_write, + const size_t * buffer_origin, + const size_t * host_origin, + const size_t * region, + size_t buffer_row_pitch, + size_t buffer_slice_pitch, + size_t host_row_pitch, + size_t host_slice_pitch, + const void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_1; + +#endif + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueFillBuffer(cl_command_queue command_queue, + cl_mem buffer, + const void * pattern, + size_t pattern_size, + size_t offset, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyBuffer(cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_buffer, + size_t src_offset, + size_t dst_offset, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyBufferRect(cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_buffer, + const size_t * src_origin, + const size_t * dst_origin, + const size_t * region, + size_t src_row_pitch, + size_t src_slice_pitch, + size_t dst_row_pitch, + size_t dst_slice_pitch, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_1; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReadImage(cl_command_queue command_queue, + cl_mem image, + cl_bool blocking_read, + const size_t * origin, + const size_t * region, + size_t row_pitch, + size_t slice_pitch, + void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueWriteImage(cl_command_queue command_queue, + cl_mem image, + cl_bool blocking_write, + const size_t * origin, + const size_t * region, + size_t input_row_pitch, + size_t input_slice_pitch, + const void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueFillImage(cl_command_queue command_queue, + cl_mem image, + const void * fill_color, + const size_t * origin, + const size_t * region, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyImage(cl_command_queue command_queue, + cl_mem src_image, + cl_mem dst_image, + const size_t * src_origin, + const size_t * dst_origin, + const size_t * region, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyImageToBuffer(cl_command_queue command_queue, + cl_mem src_image, + cl_mem dst_buffer, + const size_t * src_origin, + const size_t * region, + size_t dst_offset, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyBufferToImage(cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_image, + size_t src_offset, + const size_t * dst_origin, + const size_t * region, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY void * CL_API_CALL +clEnqueueMapBuffer(cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_map, + cl_map_flags map_flags, + size_t offset, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY void * CL_API_CALL +clEnqueueMapImage(cl_command_queue command_queue, + cl_mem image, + cl_bool blocking_map, + cl_map_flags map_flags, + const size_t * origin, + const size_t * region, + size_t * image_row_pitch, + size_t * image_slice_pitch, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueUnmapMemObject(cl_command_queue command_queue, + cl_mem memobj, + void * mapped_ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueMigrateMemObjects(cl_command_queue command_queue, + cl_uint num_mem_objects, + const cl_mem * mem_objects, + cl_mem_migration_flags flags, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueNDRangeKernel(cl_command_queue command_queue, + cl_kernel kernel, + cl_uint work_dim, + const size_t * global_work_offset, + const size_t * global_work_size, + const size_t * local_work_size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueNativeKernel(cl_command_queue command_queue, + void (CL_CALLBACK * user_func)(void *), + void * args, + size_t cb_args, + cl_uint num_mem_objects, + const cl_mem * mem_list, + const void ** args_mem_loc, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueMarkerWithWaitList(cl_command_queue command_queue, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueBarrierWithWaitList(cl_command_queue command_queue, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +#endif + +#ifdef CL_VERSION_2_0 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMFree(cl_command_queue command_queue, + cl_uint num_svm_pointers, + void * svm_pointers[], + void (CL_CALLBACK * pfn_free_func)(cl_command_queue queue, + cl_uint num_svm_pointers, + void * svm_pointers[], + void * user_data), + void * user_data, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMMemcpy(cl_command_queue command_queue, + cl_bool blocking_copy, + void * dst_ptr, + const void * src_ptr, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMMemFill(cl_command_queue command_queue, + void * svm_ptr, + const void * pattern, + size_t pattern_size, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMMap(cl_command_queue command_queue, + cl_bool blocking_map, + cl_map_flags flags, + void * svm_ptr, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMUnmap(cl_command_queue command_queue, + void * svm_ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_2_0; + +#endif + +#ifdef CL_VERSION_2_1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMMigrateMem(cl_command_queue command_queue, + cl_uint num_svm_pointers, + const void ** svm_pointers, + const size_t * sizes, + cl_mem_migration_flags flags, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_2_1; + +#endif + +#ifdef CL_VERSION_1_2 + +/* Extension function access + * + * Returns the extension function address for the given function name, + * or NULL if a valid function can not be found. The client must + * check to make sure the address is not NULL, before using or + * calling the returned function address. + */ +extern CL_API_ENTRY void * CL_API_CALL +clGetExtensionFunctionAddressForPlatform(cl_platform_id platform, + const char * func_name) CL_API_SUFFIX__VERSION_1_2; + +#endif + +#ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS + /* + * WARNING: + * This API introduces mutable state into the OpenCL implementation. It has been REMOVED + * to better facilitate thread safety. The 1.0 API is not thread safe. It is not tested by the + * OpenCL 1.1 conformance test, and consequently may not work or may not work dependably. + * It is likely to be non-performant. Use of this API is not advised. Use at your own risk. + * + * Software developers previously relying on this API are instructed to set the command queue + * properties when creating the queue, instead. + */ + extern CL_API_ENTRY cl_int CL_API_CALL + clSetCommandQueueProperty(cl_command_queue command_queue, + cl_command_queue_properties properties, + cl_bool enable, + cl_command_queue_properties * old_properties) CL_API_SUFFIX__VERSION_1_0_DEPRECATED; +#endif /* CL_USE_DEPRECATED_OPENCL_1_0_APIS */ + +/* Deprecated OpenCL 1.1 APIs */ +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL +clCreateImage2D(cl_context context, + cl_mem_flags flags, + const cl_image_format * image_format, + size_t image_width, + size_t image_height, + size_t image_row_pitch, + void * host_ptr, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL +clCreateImage3D(cl_context context, + cl_mem_flags flags, + const cl_image_format * image_format, + size_t image_width, + size_t image_height, + size_t image_depth, + size_t image_row_pitch, + size_t image_slice_pitch, + void * host_ptr, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +clEnqueueMarker(cl_command_queue command_queue, + cl_event * event) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +clEnqueueWaitForEvents(cl_command_queue command_queue, + cl_uint num_events, + const cl_event * event_list) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +clEnqueueBarrier(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +clUnloadCompiler(void) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED void * CL_API_CALL +clGetExtensionFunctionAddress(const char * func_name) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; + +/* Deprecated OpenCL 2.0 APIs */ +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_2_DEPRECATED cl_command_queue CL_API_CALL +clCreateCommandQueue(cl_context context, + cl_device_id device, + cl_command_queue_properties properties, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2_DEPRECATED; + +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_2_DEPRECATED cl_sampler CL_API_CALL +clCreateSampler(cl_context context, + cl_bool normalized_coords, + cl_addressing_mode addressing_mode, + cl_filter_mode filter_mode, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2_DEPRECATED; + +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_2_DEPRECATED cl_int CL_API_CALL +clEnqueueTask(cl_command_queue command_queue, + cl_kernel kernel, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2_DEPRECATED; + +#ifdef __cplusplus +} +#endif + +#endif /* __OPENCL_CL_H */ diff --git a/repos/hello_gpgpu/src/hello_gpgpu/CL/cl_platform.h b/repos/hello_gpgpu/src/hello_gpgpu/CL/cl_platform.h new file mode 100644 index 0000000000..16a2173d18 --- /dev/null +++ b/repos/hello_gpgpu/src/hello_gpgpu/CL/cl_platform.h @@ -0,0 +1,1432 @@ +/******************************************************************************* + * Copyright (c) 2008-2020 The Khronos Group Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ + +#ifndef __CL_PLATFORM_H +#define __CL_PLATFORM_H + +#include "cl_version.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(_WIN32) + #if !defined(CL_API_ENTRY) + #define CL_API_ENTRY + #endif + #if !defined(CL_API_CALL) + #define CL_API_CALL __stdcall + #endif + #if !defined(CL_CALLBACK) + #define CL_CALLBACK __stdcall + #endif +#else + #if !defined(CL_API_ENTRY) + #define CL_API_ENTRY + #endif + #if !defined(CL_API_CALL) + #define CL_API_CALL + #endif + #if !defined(CL_CALLBACK) + #define CL_CALLBACK + #endif +#endif + +/* + * Deprecation flags refer to the last version of the header in which the + * feature was not deprecated. + * + * E.g. VERSION_1_1_DEPRECATED means the feature is present in 1.1 without + * deprecation but is deprecated in versions later than 1.1. + */ + +#ifndef CL_API_SUFFIX_USER +#define CL_API_SUFFIX_USER +#endif + +#ifndef CL_API_PREFIX_USER +#define CL_API_PREFIX_USER +#endif + +#define CL_API_SUFFIX_COMMON CL_API_SUFFIX_USER +#define CL_API_PREFIX_COMMON CL_API_PREFIX_USER + +#define CL_API_SUFFIX__VERSION_1_0 CL_API_SUFFIX_COMMON +#define CL_API_SUFFIX__VERSION_1_1 CL_API_SUFFIX_COMMON +#define CL_API_SUFFIX__VERSION_1_2 CL_API_SUFFIX_COMMON +#define CL_API_SUFFIX__VERSION_2_0 CL_API_SUFFIX_COMMON +#define CL_API_SUFFIX__VERSION_2_1 CL_API_SUFFIX_COMMON +#define CL_API_SUFFIX__VERSION_2_2 CL_API_SUFFIX_COMMON +#define CL_API_SUFFIX__VERSION_3_0 CL_API_SUFFIX_COMMON +#define CL_API_SUFFIX__EXPERIMENTAL CL_API_SUFFIX_COMMON + + +#ifdef __GNUC__ + #define CL_API_SUFFIX_DEPRECATED __attribute__((deprecated)) + #define CL_API_PREFIX_DEPRECATED +#elif defined(_WIN32) + #define CL_API_SUFFIX_DEPRECATED + #define CL_API_PREFIX_DEPRECATED __declspec(deprecated) +#else + #define CL_API_SUFFIX_DEPRECATED + #define CL_API_PREFIX_DEPRECATED +#endif + +#ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS + #define CL_API_SUFFIX__VERSION_1_0_DEPRECATED CL_API_SUFFIX_COMMON + #define CL_API_PREFIX__VERSION_1_0_DEPRECATED CL_API_PREFIX_COMMON +#else + #define CL_API_SUFFIX__VERSION_1_0_DEPRECATED CL_API_SUFFIX_COMMON CL_API_SUFFIX_DEPRECATED + #define CL_API_PREFIX__VERSION_1_0_DEPRECATED CL_API_PREFIX_COMMON CL_API_PREFIX_DEPRECATED +#endif + +#ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS + #define CL_API_SUFFIX__VERSION_1_1_DEPRECATED CL_API_SUFFIX_COMMON + #define CL_API_PREFIX__VERSION_1_1_DEPRECATED CL_API_PREFIX_COMMON +#else + #define CL_API_SUFFIX__VERSION_1_1_DEPRECATED CL_API_SUFFIX_COMMON CL_API_SUFFIX_DEPRECATED + #define CL_API_PREFIX__VERSION_1_1_DEPRECATED CL_API_PREFIX_COMMON CL_API_PREFIX_DEPRECATED +#endif + +#ifdef CL_USE_DEPRECATED_OPENCL_1_2_APIS + #define CL_API_SUFFIX__VERSION_1_2_DEPRECATED CL_API_SUFFIX_COMMON + #define CL_API_PREFIX__VERSION_1_2_DEPRECATED CL_API_PREFIX_COMMON +#else + #define CL_API_SUFFIX__VERSION_1_2_DEPRECATED CL_API_SUFFIX_COMMON CL_API_SUFFIX_DEPRECATED + #define CL_API_PREFIX__VERSION_1_2_DEPRECATED CL_API_PREFIX_COMMON CL_API_PREFIX_DEPRECATED + #endif + +#ifdef CL_USE_DEPRECATED_OPENCL_2_0_APIS + #define CL_API_SUFFIX__VERSION_2_0_DEPRECATED CL_API_SUFFIX_COMMON + #define CL_API_PREFIX__VERSION_2_0_DEPRECATED CL_API_PREFIX_COMMON +#else + #define CL_API_SUFFIX__VERSION_2_0_DEPRECATED CL_API_SUFFIX_COMMON CL_API_SUFFIX_DEPRECATED + #define CL_API_PREFIX__VERSION_2_0_DEPRECATED CL_API_PREFIX_COMMON CL_API_PREFIX_DEPRECATED +#endif + +#ifdef CL_USE_DEPRECATED_OPENCL_2_1_APIS + #define CL_API_SUFFIX__VERSION_2_1_DEPRECATED CL_API_SUFFIX_COMMON + #define CL_API_PREFIX__VERSION_2_1_DEPRECATED CL_API_PREFIX_COMMON +#else + #define CL_API_SUFFIX__VERSION_2_1_DEPRECATED CL_API_SUFFIX_COMMON CL_API_SUFFIX_DEPRECATED + #define CL_API_PREFIX__VERSION_2_1_DEPRECATED CL_API_PREFIX_COMMON CL_API_PREFIX_DEPRECATED +#endif + +#ifdef CL_USE_DEPRECATED_OPENCL_2_2_APIS + #define CL_API_SUFFIX__VERSION_2_2_DEPRECATED CL_API_SUFFIX_COMMON + #define CL_API_PREFIX__VERSION_2_2_DEPRECATED CL_API_PREFIX_COMMON +#else + #define CL_API_SUFFIX__VERSION_2_2_DEPRECATED CL_API_SUFFIX_COMMON CL_API_SUFFIX_DEPRECATED + #define CL_API_PREFIX__VERSION_2_2_DEPRECATED CL_API_PREFIX_COMMON CL_API_PREFIX_DEPRECATED +#endif + +#define CL_EXT_PREFIX_DEPRECATED CL_API_PREFIX_DEPRECATED +#define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED CL_API_PREFIX__VERSION_1_0_DEPRECATED +#define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED CL_API_PREFIX__VERSION_1_1_DEPRECATED +#define CL_EXT_PREFIX__VERSION_1_2_DEPRECATED CL_API_PREFIX__VERSION_1_2_DEPRECATED +#define CL_EXT_PREFIX__VERSION_2_0_DEPRECATED CL_API_PREFIX__VERSION_2_0_DEPRECATED +#define CL_EXT_PREFIX__VERSION_2_1_DEPRECATED CL_API_PREFIX__VERSION_2_1_DEPRECATED +#define CL_EXT_PREFIX__VERSION_2_2_DEPRECATED CL_API_PREFIX__VERSION_2_2_DEPRECATED +#define CL_EXT_SUFFIX_DEPRECATED CL_API_SUFFIX_DEPRECATED +#define CL_EXT_SUFFIX__EXPERIMENTAL CL_API_SUFFIX__EXPERIMENTAL +#define CL_EXT_SUFFIX__VERSION_1_0 CL_API_SUFFIX__VERSION_1_0 +#define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED CL_API_SUFFIX__VERSION_1_0_DEPRECATED +#define CL_EXT_SUFFIX__VERSION_1_1 CL_API_SUFFIX__VERSION_1_1 +#define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED CL_API_SUFFIX__VERSION_1_1_DEPRECATED +#define CL_EXT_SUFFIX__VERSION_1_2 CL_API_SUFFIX__VERSION_1_2 +#define CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED CL_API_SUFFIX__VERSION_1_2_DEPRECATED +#define CL_EXT_SUFFIX__VERSION_2_0 CL_API_SUFFIX__VERSION_2_0 +#define CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED CL_API_SUFFIX__VERSION_2_0_DEPRECATED +#define CL_EXT_SUFFIX__VERSION_2_1 CL_API_SUFFIX__VERSION_2_1 +#define CL_EXT_SUFFIX__VERSION_2_1_DEPRECATED CL_API_SUFFIX__VERSION_2_1_DEPRECATED +#define CL_EXT_SUFFIX__VERSION_2_2 CL_API_SUFFIX__VERSION_2_2 +#define CL_EXT_SUFFIX__VERSION_2_2_DEPRECATED CL_API_SUFFIX__VERSION_2_2_DEPRECATED +#define CL_EXT_SUFFIX__VERSION_3_0 CL_API_SUFFIX__VERSION_3_0 + +#if (defined (_WIN32) && defined(_MSC_VER)) + +/* intptr_t is used in cl.h and provided by stddef.h in Visual C++, but not in clang */ +/* stdint.h was missing before Visual Studio 2010, include it for later versions and for clang */ +#if defined(__clang__) || _MSC_VER >= 1600 + #include +#endif + +/* scalar types */ +typedef signed __int8 cl_char; +typedef unsigned __int8 cl_uchar; +typedef signed __int16 cl_short; +typedef unsigned __int16 cl_ushort; +typedef signed __int32 cl_int; +typedef unsigned __int32 cl_uint; +typedef signed __int64 cl_long; +typedef unsigned __int64 cl_ulong; + +typedef unsigned __int16 cl_half; +typedef float cl_float; +typedef double cl_double; + +/* Macro names and corresponding values defined by OpenCL */ +#define CL_CHAR_BIT 8 +#define CL_SCHAR_MAX 127 +#define CL_SCHAR_MIN (-127-1) +#define CL_CHAR_MAX CL_SCHAR_MAX +#define CL_CHAR_MIN CL_SCHAR_MIN +#define CL_UCHAR_MAX 255 +#define CL_SHRT_MAX 32767 +#define CL_SHRT_MIN (-32767-1) +#define CL_USHRT_MAX 65535 +#define CL_INT_MAX 2147483647 +#define CL_INT_MIN (-2147483647-1) +#define CL_UINT_MAX 0xffffffffU +#define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL) +#define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL) +#define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL) + +#define CL_FLT_DIG 6 +#define CL_FLT_MANT_DIG 24 +#define CL_FLT_MAX_10_EXP +38 +#define CL_FLT_MAX_EXP +128 +#define CL_FLT_MIN_10_EXP -37 +#define CL_FLT_MIN_EXP -125 +#define CL_FLT_RADIX 2 +#define CL_FLT_MAX 340282346638528859811704183484516925440.0f +#define CL_FLT_MIN 1.175494350822287507969e-38f +#define CL_FLT_EPSILON 1.1920928955078125e-7f + +#define CL_HALF_DIG 3 +#define CL_HALF_MANT_DIG 11 +#define CL_HALF_MAX_10_EXP +4 +#define CL_HALF_MAX_EXP +16 +#define CL_HALF_MIN_10_EXP -4 +#define CL_HALF_MIN_EXP -13 +#define CL_HALF_RADIX 2 +#define CL_HALF_MAX 65504.0f +#define CL_HALF_MIN 6.103515625e-05f +#define CL_HALF_EPSILON 9.765625e-04f + +#define CL_DBL_DIG 15 +#define CL_DBL_MANT_DIG 53 +#define CL_DBL_MAX_10_EXP +308 +#define CL_DBL_MAX_EXP +1024 +#define CL_DBL_MIN_10_EXP -307 +#define CL_DBL_MIN_EXP -1021 +#define CL_DBL_RADIX 2 +#define CL_DBL_MAX 1.7976931348623158e+308 +#define CL_DBL_MIN 2.225073858507201383090e-308 +#define CL_DBL_EPSILON 2.220446049250313080847e-16 + +#define CL_M_E 2.7182818284590452354 +#define CL_M_LOG2E 1.4426950408889634074 +#define CL_M_LOG10E 0.43429448190325182765 +#define CL_M_LN2 0.69314718055994530942 +#define CL_M_LN10 2.30258509299404568402 +#define CL_M_PI 3.14159265358979323846 +#define CL_M_PI_2 1.57079632679489661923 +#define CL_M_PI_4 0.78539816339744830962 +#define CL_M_1_PI 0.31830988618379067154 +#define CL_M_2_PI 0.63661977236758134308 +#define CL_M_2_SQRTPI 1.12837916709551257390 +#define CL_M_SQRT2 1.41421356237309504880 +#define CL_M_SQRT1_2 0.70710678118654752440 + +#define CL_M_E_F 2.718281828f +#define CL_M_LOG2E_F 1.442695041f +#define CL_M_LOG10E_F 0.434294482f +#define CL_M_LN2_F 0.693147181f +#define CL_M_LN10_F 2.302585093f +#define CL_M_PI_F 3.141592654f +#define CL_M_PI_2_F 1.570796327f +#define CL_M_PI_4_F 0.785398163f +#define CL_M_1_PI_F 0.318309886f +#define CL_M_2_PI_F 0.636619772f +#define CL_M_2_SQRTPI_F 1.128379167f +#define CL_M_SQRT2_F 1.414213562f +#define CL_M_SQRT1_2_F 0.707106781f + +#define CL_NAN (CL_INFINITY - CL_INFINITY) +#define CL_HUGE_VALF ((cl_float) 1e50) +#define CL_HUGE_VAL ((cl_double) 1e500) +#define CL_MAXFLOAT CL_FLT_MAX +#define CL_INFINITY CL_HUGE_VALF + +#else + +// genode stdint +#include +typedef __INTPTR_TYPE__ intptr_t; +using namespace Genode; + +/* scalar types */ +typedef int8_t cl_char; +typedef uint8_t cl_uchar; +typedef int16_t cl_short; +typedef uint16_t cl_ushort; +typedef int32_t cl_int; +typedef uint32_t cl_uint; +typedef int64_t cl_long; +typedef uint64_t cl_ulong; + +typedef uint16_t cl_half; +typedef float cl_float; +typedef double cl_double; + +/* Macro names and corresponding values defined by OpenCL */ +#define CL_CHAR_BIT 8 +#define CL_SCHAR_MAX 127 +#define CL_SCHAR_MIN (-127-1) +#define CL_CHAR_MAX CL_SCHAR_MAX +#define CL_CHAR_MIN CL_SCHAR_MIN +#define CL_UCHAR_MAX 255 +#define CL_SHRT_MAX 32767 +#define CL_SHRT_MIN (-32767-1) +#define CL_USHRT_MAX 65535 +#define CL_INT_MAX 2147483647 +#define CL_INT_MIN (-2147483647-1) +#define CL_UINT_MAX 0xffffffffU +#define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL) +#define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL) +#define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL) + +#define CL_FLT_DIG 6 +#define CL_FLT_MANT_DIG 24 +#define CL_FLT_MAX_10_EXP +38 +#define CL_FLT_MAX_EXP +128 +#define CL_FLT_MIN_10_EXP -37 +#define CL_FLT_MIN_EXP -125 +#define CL_FLT_RADIX 2 +#define CL_FLT_MAX 340282346638528859811704183484516925440.0f +#define CL_FLT_MIN 1.175494350822287507969e-38f +#define CL_FLT_EPSILON 1.1920928955078125e-7f + +#define CL_HALF_DIG 3 +#define CL_HALF_MANT_DIG 11 +#define CL_HALF_MAX_10_EXP +4 +#define CL_HALF_MAX_EXP +16 +#define CL_HALF_MIN_10_EXP -4 +#define CL_HALF_MIN_EXP -13 +#define CL_HALF_RADIX 2 +#define CL_HALF_MAX 65504.0f +#define CL_HALF_MIN 6.103515625e-05f +#define CL_HALF_EPSILON 9.765625e-04f + +#define CL_DBL_DIG 15 +#define CL_DBL_MANT_DIG 53 +#define CL_DBL_MAX_10_EXP +308 +#define CL_DBL_MAX_EXP +1024 +#define CL_DBL_MIN_10_EXP -307 +#define CL_DBL_MIN_EXP -1021 +#define CL_DBL_RADIX 2 +#define CL_DBL_MAX 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.0 +#define CL_DBL_MIN 2.225073858507201383090e-308 +#define CL_DBL_EPSILON 2.220446049250313080847e-16 + +#define CL_M_E 2.7182818284590452354 +#define CL_M_LOG2E 1.4426950408889634074 +#define CL_M_LOG10E 0.43429448190325182765 +#define CL_M_LN2 0.69314718055994530942 +#define CL_M_LN10 2.30258509299404568402 +#define CL_M_PI 3.14159265358979323846 +#define CL_M_PI_2 1.57079632679489661923 +#define CL_M_PI_4 0.78539816339744830962 +#define CL_M_1_PI 0.31830988618379067154 +#define CL_M_2_PI 0.63661977236758134308 +#define CL_M_2_SQRTPI 1.12837916709551257390 +#define CL_M_SQRT2 1.41421356237309504880 +#define CL_M_SQRT1_2 0.70710678118654752440 + +#define CL_M_E_F 2.718281828f +#define CL_M_LOG2E_F 1.442695041f +#define CL_M_LOG10E_F 0.434294482f +#define CL_M_LN2_F 0.693147181f +#define CL_M_LN10_F 2.302585093f +#define CL_M_PI_F 3.141592654f +#define CL_M_PI_2_F 1.570796327f +#define CL_M_PI_4_F 0.785398163f +#define CL_M_1_PI_F 0.318309886f +#define CL_M_2_PI_F 0.636619772f +#define CL_M_2_SQRTPI_F 1.128379167f +#define CL_M_SQRT2_F 1.414213562f +#define CL_M_SQRT1_2_F 0.707106781f + +#if defined( __GNUC__ ) + #define CL_HUGE_VALF __builtin_huge_valf() + #define CL_HUGE_VAL __builtin_huge_val() + #define CL_NAN __builtin_nanf( "" ) +#else + #define CL_HUGE_VALF ((cl_float) 1e50) + #define CL_HUGE_VAL ((cl_double) 1e500) + float nanf( const char * ); + #define CL_NAN nanf( "" ) +#endif +#define CL_MAXFLOAT CL_FLT_MAX +#define CL_INFINITY CL_HUGE_VALF + +#endif + +#include + +/* Mirror types to GL types. Mirror types allow us to avoid deciding which 87s to load based on whether we are using GL or GLES here. */ +typedef unsigned int cl_GLuint; +typedef int cl_GLint; +typedef unsigned int cl_GLenum; + +/* + * Vector types + * + * Note: OpenCL requires that all types be naturally aligned. + * This means that vector types must be naturally aligned. + * For example, a vector of four floats must be aligned to + * a 16 byte boundary (calculated as 4 * the natural 4-byte + * alignment of the float). The alignment qualifiers here + * will only function properly if your compiler supports them + * and if you don't actively work to defeat them. For example, + * in order for a cl_float4 to be 16 byte aligned in a struct, + * the start of the struct must itself be 16-byte aligned. + * + * Maintaining proper alignment is the user's responsibility. + */ + +/* Define basic vector types */ +#if defined( __VEC__ ) + #if !defined(__clang__) + #include /* may be omitted depending on compiler. AltiVec spec provides no way to detect whether the header is required. */ + #endif + typedef __vector unsigned char __cl_uchar16; + typedef __vector signed char __cl_char16; + typedef __vector unsigned short __cl_ushort8; + typedef __vector signed short __cl_short8; + typedef __vector unsigned int __cl_uint4; + typedef __vector signed int __cl_int4; + typedef __vector float __cl_float4; + #define __CL_UCHAR16__ 1 + #define __CL_CHAR16__ 1 + #define __CL_USHORT8__ 1 + #define __CL_SHORT8__ 1 + #define __CL_UINT4__ 1 + #define __CL_INT4__ 1 + #define __CL_FLOAT4__ 1 +#endif + +/*#if defined( __SSE__ ) + #if defined( __MINGW64__ ) + #include + #else + #include + #endif + #if defined( __GNUC__ ) + typedef float __cl_float4 __attribute__((vector_size(16))); + #else + typedef __m128 __cl_float4; + #endif + #define __CL_FLOAT4__ 1 +#endif + +#if defined( __SSE2__ ) + #if defined( __MINGW64__ ) + #include + #else + #include + #endif + #if defined( __GNUC__ ) + typedef cl_uchar __cl_uchar16 __attribute__((vector_size(16))); + typedef cl_char __cl_char16 __attribute__((vector_size(16))); + typedef cl_ushort __cl_ushort8 __attribute__((vector_size(16))); + typedef cl_short __cl_short8 __attribute__((vector_size(16))); + typedef cl_uint __cl_uint4 __attribute__((vector_size(16))); + typedef cl_int __cl_int4 __attribute__((vector_size(16))); + typedef cl_ulong __cl_ulong2 __attribute__((vector_size(16))); + typedef cl_long __cl_long2 __attribute__((vector_size(16))); + typedef cl_double __cl_double2 __attribute__((vector_size(16))); + #else + typedef __m128i __cl_uchar16; + typedef __m128i __cl_char16; + typedef __m128i __cl_ushort8; + typedef __m128i __cl_short8; + typedef __m128i __cl_uint4; + typedef __m128i __cl_int4; + typedef __m128i __cl_ulong2; + typedef __m128i __cl_long2; + typedef __m128d __cl_double2; + #endif + #define __CL_UCHAR16__ 1 + #define __CL_CHAR16__ 1 + #define __CL_USHORT8__ 1 + #define __CL_SHORT8__ 1 + #define __CL_INT4__ 1 + #define __CL_UINT4__ 1 + #define __CL_ULONG2__ 1 + #define __CL_LONG2__ 1 + #define __CL_DOUBLE2__ 1 +#endif + +#if defined( __MMX__ ) + #include + #if defined( __GNUC__ ) + typedef cl_uchar __cl_uchar8 __attribute__((vector_size(8))); + typedef cl_char __cl_char8 __attribute__((vector_size(8))); + typedef cl_ushort __cl_ushort4 __attribute__((vector_size(8))); + typedef cl_short __cl_short4 __attribute__((vector_size(8))); + typedef cl_uint __cl_uint2 __attribute__((vector_size(8))); + typedef cl_int __cl_int2 __attribute__((vector_size(8))); + typedef cl_ulong __cl_ulong1 __attribute__((vector_size(8))); + typedef cl_long __cl_long1 __attribute__((vector_size(8))); + typedef cl_float __cl_float2 __attribute__((vector_size(8))); + #else + typedef __m64 __cl_uchar8; + typedef __m64 __cl_char8; + typedef __m64 __cl_ushort4; + typedef __m64 __cl_short4; + typedef __m64 __cl_uint2; + typedef __m64 __cl_int2; + typedef __m64 __cl_ulong1; + typedef __m64 __cl_long1; + typedef __m64 __cl_float2; + #endif + #define __CL_UCHAR8__ 1 + #define __CL_CHAR8__ 1 + #define __CL_USHORT4__ 1 + #define __CL_SHORT4__ 1 + #define __CL_INT2__ 1 + #define __CL_UINT2__ 1 + #define __CL_ULONG1__ 1 + #define __CL_LONG1__ 1 + #define __CL_FLOAT2__ 1 +#endif + +#if defined( __AVX__ ) + #if defined( __MINGW64__ ) + #include + #else + #include + #endif + #if defined( __GNUC__ ) + typedef cl_float __cl_float8 __attribute__((vector_size(32))); + typedef cl_double __cl_double4 __attribute__((vector_size(32))); + #else + typedef __m256 __cl_float8; + typedef __m256d __cl_double4; + #endif + #define __CL_FLOAT8__ 1 + #define __CL_DOUBLE4__ 1 +#endif */ + +/* Define capabilities for anonymous struct members. */ +#if !defined(__cplusplus) && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L +#define __CL_HAS_ANON_STRUCT__ 1 +#define __CL_ANON_STRUCT__ +#elif defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) +#define __CL_HAS_ANON_STRUCT__ 1 +#define __CL_ANON_STRUCT__ __extension__ +#elif defined( _WIN32) && defined(_MSC_VER) && ! defined(__STDC__) + #if _MSC_VER >= 1500 + /* Microsoft Developer Studio 2008 supports anonymous structs, but + * complains by default. */ + #define __CL_HAS_ANON_STRUCT__ 1 + #define __CL_ANON_STRUCT__ + /* Disable warning C4201: nonstandard extension used : nameless + * struct/union */ + #pragma warning( push ) + #pragma warning( disable : 4201 ) + #endif +#else +#define __CL_HAS_ANON_STRUCT__ 0 +#define __CL_ANON_STRUCT__ +#endif + +/* Define alignment keys */ +#if defined( __GNUC__ ) || defined(__INTEGRITY) + #define CL_ALIGNED(_x) __attribute__ ((aligned(_x))) +#elif defined( _WIN32) && (_MSC_VER) + /* Alignment keys neutered on windows because MSVC can't swallow function arguments with alignment requirements */ + /* http://msdn.microsoft.com/en-us/library/373ak2y1%28VS.71%29.aspx */ + /* #include */ + /* #define CL_ALIGNED(_x) _CRT_ALIGN(_x) */ + #define CL_ALIGNED(_x) +#else + #warning Need to implement some method to align data here + #define CL_ALIGNED(_x) +#endif + +/* Indicate whether .xyzw, .s0123 and .hi.lo are supported */ +#if __CL_HAS_ANON_STRUCT__ + /* .xyzw and .s0123...{f|F} are supported */ + #define CL_HAS_NAMED_VECTOR_FIELDS 1 + /* .hi and .lo are supported */ + #define CL_HAS_HI_LO_VECTOR_FIELDS 1 +#endif + +/* Define cl_vector types */ + +/* ---- cl_charn ---- */ +typedef union +{ + cl_char CL_ALIGNED(2) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_char x, y; }; + __CL_ANON_STRUCT__ struct{ cl_char s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_char lo, hi; }; +#endif +#if defined( __CL_CHAR2__) + __cl_char2 v2; +#endif +}cl_char2; + +typedef union +{ + cl_char CL_ALIGNED(4) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_char x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_char s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_char2 lo, hi; }; +#endif +#if defined( __CL_CHAR2__) + __cl_char2 v2[2]; +#endif +#if defined( __CL_CHAR4__) + __cl_char4 v4; +#endif +}cl_char4; + +/* cl_char3 is identical in size, alignment and behavior to cl_char4. See section 6.1.5. */ +typedef cl_char4 cl_char3; + +typedef union +{ + cl_char CL_ALIGNED(8) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_char x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_char s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_char4 lo, hi; }; +#endif +#if defined( __CL_CHAR2__) + __cl_char2 v2[4]; +#endif +#if defined( __CL_CHAR4__) + __cl_char4 v4[2]; +#endif +#if defined( __CL_CHAR8__ ) + __cl_char8 v8; +#endif +}cl_char8; + +typedef union +{ + cl_char CL_ALIGNED(16) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_char x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_char s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_char8 lo, hi; }; +#endif +#if defined( __CL_CHAR2__) + __cl_char2 v2[8]; +#endif +#if defined( __CL_CHAR4__) + __cl_char4 v4[4]; +#endif +#if defined( __CL_CHAR8__ ) + __cl_char8 v8[2]; +#endif +#if defined( __CL_CHAR16__ ) + __cl_char16 v16; +#endif +}cl_char16; + + +/* ---- cl_ucharn ---- */ +typedef union +{ + cl_uchar CL_ALIGNED(2) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uchar x, y; }; + __CL_ANON_STRUCT__ struct{ cl_uchar s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_uchar lo, hi; }; +#endif +#if defined( __cl_uchar2__) + __cl_uchar2 v2; +#endif +}cl_uchar2; + +typedef union +{ + cl_uchar CL_ALIGNED(4) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uchar x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_uchar s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_uchar2 lo, hi; }; +#endif +#if defined( __CL_UCHAR2__) + __cl_uchar2 v2[2]; +#endif +#if defined( __CL_UCHAR4__) + __cl_uchar4 v4; +#endif +}cl_uchar4; + +/* cl_uchar3 is identical in size, alignment and behavior to cl_uchar4. See section 6.1.5. */ +typedef cl_uchar4 cl_uchar3; + +typedef union +{ + cl_uchar CL_ALIGNED(8) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uchar x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_uchar s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_uchar4 lo, hi; }; +#endif +#if defined( __CL_UCHAR2__) + __cl_uchar2 v2[4]; +#endif +#if defined( __CL_UCHAR4__) + __cl_uchar4 v4[2]; +#endif +#if defined( __CL_UCHAR8__ ) + __cl_uchar8 v8; +#endif +}cl_uchar8; + +typedef union +{ + cl_uchar CL_ALIGNED(16) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uchar x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_uchar s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_uchar8 lo, hi; }; +#endif +#if defined( __CL_UCHAR2__) + __cl_uchar2 v2[8]; +#endif +#if defined( __CL_UCHAR4__) + __cl_uchar4 v4[4]; +#endif +#if defined( __CL_UCHAR8__ ) + __cl_uchar8 v8[2]; +#endif +#if defined( __CL_UCHAR16__ ) + __cl_uchar16 v16; +#endif +}cl_uchar16; + + +/* ---- cl_shortn ---- */ +typedef union +{ + cl_short CL_ALIGNED(4) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_short x, y; }; + __CL_ANON_STRUCT__ struct{ cl_short s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_short lo, hi; }; +#endif +#if defined( __CL_SHORT2__) + __cl_short2 v2; +#endif +}cl_short2; + +typedef union +{ + cl_short CL_ALIGNED(8) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_short x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_short s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_short2 lo, hi; }; +#endif +#if defined( __CL_SHORT2__) + __cl_short2 v2[2]; +#endif +#if defined( __CL_SHORT4__) + __cl_short4 v4; +#endif +}cl_short4; + +/* cl_short3 is identical in size, alignment and behavior to cl_short4. See section 6.1.5. */ +typedef cl_short4 cl_short3; + +typedef union +{ + cl_short CL_ALIGNED(16) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_short x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_short s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_short4 lo, hi; }; +#endif +#if defined( __CL_SHORT2__) + __cl_short2 v2[4]; +#endif +#if defined( __CL_SHORT4__) + __cl_short4 v4[2]; +#endif +#if defined( __CL_SHORT8__ ) + __cl_short8 v8; +#endif +}cl_short8; + +typedef union +{ + cl_short CL_ALIGNED(32) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_short x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_short s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_short8 lo, hi; }; +#endif +#if defined( __CL_SHORT2__) + __cl_short2 v2[8]; +#endif +#if defined( __CL_SHORT4__) + __cl_short4 v4[4]; +#endif +#if defined( __CL_SHORT8__ ) + __cl_short8 v8[2]; +#endif +#if defined( __CL_SHORT16__ ) + __cl_short16 v16; +#endif +}cl_short16; + + +/* ---- cl_ushortn ---- */ +typedef union +{ + cl_ushort CL_ALIGNED(4) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ushort x, y; }; + __CL_ANON_STRUCT__ struct{ cl_ushort s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_ushort lo, hi; }; +#endif +#if defined( __CL_USHORT2__) + __cl_ushort2 v2; +#endif +}cl_ushort2; + +typedef union +{ + cl_ushort CL_ALIGNED(8) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ushort x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_ushort s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_ushort2 lo, hi; }; +#endif +#if defined( __CL_USHORT2__) + __cl_ushort2 v2[2]; +#endif +#if defined( __CL_USHORT4__) + __cl_ushort4 v4; +#endif +}cl_ushort4; + +/* cl_ushort3 is identical in size, alignment and behavior to cl_ushort4. See section 6.1.5. */ +typedef cl_ushort4 cl_ushort3; + +typedef union +{ + cl_ushort CL_ALIGNED(16) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ushort x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_ushort s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_ushort4 lo, hi; }; +#endif +#if defined( __CL_USHORT2__) + __cl_ushort2 v2[4]; +#endif +#if defined( __CL_USHORT4__) + __cl_ushort4 v4[2]; +#endif +#if defined( __CL_USHORT8__ ) + __cl_ushort8 v8; +#endif +}cl_ushort8; + +typedef union +{ + cl_ushort CL_ALIGNED(32) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ushort x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_ushort s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_ushort8 lo, hi; }; +#endif +#if defined( __CL_USHORT2__) + __cl_ushort2 v2[8]; +#endif +#if defined( __CL_USHORT4__) + __cl_ushort4 v4[4]; +#endif +#if defined( __CL_USHORT8__ ) + __cl_ushort8 v8[2]; +#endif +#if defined( __CL_USHORT16__ ) + __cl_ushort16 v16; +#endif +}cl_ushort16; + + +/* ---- cl_halfn ---- */ +typedef union +{ + cl_half CL_ALIGNED(4) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_half x, y; }; + __CL_ANON_STRUCT__ struct{ cl_half s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_half lo, hi; }; +#endif +#if defined( __CL_HALF2__) + __cl_half2 v2; +#endif +}cl_half2; + +typedef union +{ + cl_half CL_ALIGNED(8) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_half x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_half s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_half2 lo, hi; }; +#endif +#if defined( __CL_HALF2__) + __cl_half2 v2[2]; +#endif +#if defined( __CL_HALF4__) + __cl_half4 v4; +#endif +}cl_half4; + +/* cl_half3 is identical in size, alignment and behavior to cl_half4. See section 6.1.5. */ +typedef cl_half4 cl_half3; + +typedef union +{ + cl_half CL_ALIGNED(16) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_half x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_half s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_half4 lo, hi; }; +#endif +#if defined( __CL_HALF2__) + __cl_half2 v2[4]; +#endif +#if defined( __CL_HALF4__) + __cl_half4 v4[2]; +#endif +#if defined( __CL_HALF8__ ) + __cl_half8 v8; +#endif +}cl_half8; + +typedef union +{ + cl_half CL_ALIGNED(32) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_half x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_half s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_half8 lo, hi; }; +#endif +#if defined( __CL_HALF2__) + __cl_half2 v2[8]; +#endif +#if defined( __CL_HALF4__) + __cl_half4 v4[4]; +#endif +#if defined( __CL_HALF8__ ) + __cl_half8 v8[2]; +#endif +#if defined( __CL_HALF16__ ) + __cl_half16 v16; +#endif +}cl_half16; + +/* ---- cl_intn ---- */ +typedef union +{ + cl_int CL_ALIGNED(8) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_int x, y; }; + __CL_ANON_STRUCT__ struct{ cl_int s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_int lo, hi; }; +#endif +#if defined( __CL_INT2__) + __cl_int2 v2; +#endif +}cl_int2; + +typedef union +{ + cl_int CL_ALIGNED(16) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_int x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_int s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_int2 lo, hi; }; +#endif +#if defined( __CL_INT2__) + __cl_int2 v2[2]; +#endif +#if defined( __CL_INT4__) + __cl_int4 v4; +#endif +}cl_int4; + +/* cl_int3 is identical in size, alignment and behavior to cl_int4. See section 6.1.5. */ +typedef cl_int4 cl_int3; + +typedef union +{ + cl_int CL_ALIGNED(32) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_int x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_int s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_int4 lo, hi; }; +#endif +#if defined( __CL_INT2__) + __cl_int2 v2[4]; +#endif +#if defined( __CL_INT4__) + __cl_int4 v4[2]; +#endif +#if defined( __CL_INT8__ ) + __cl_int8 v8; +#endif +}cl_int8; + +typedef union +{ + cl_int CL_ALIGNED(64) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_int x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_int8 lo, hi; }; +#endif +#if defined( __CL_INT2__) + __cl_int2 v2[8]; +#endif +#if defined( __CL_INT4__) + __cl_int4 v4[4]; +#endif +#if defined( __CL_INT8__ ) + __cl_int8 v8[2]; +#endif +#if defined( __CL_INT16__ ) + __cl_int16 v16; +#endif +}cl_int16; + + +/* ---- cl_uintn ---- */ +typedef union +{ + cl_uint CL_ALIGNED(8) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uint x, y; }; + __CL_ANON_STRUCT__ struct{ cl_uint s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_uint lo, hi; }; +#endif +#if defined( __CL_UINT2__) + __cl_uint2 v2; +#endif +}cl_uint2; + +typedef union +{ + cl_uint CL_ALIGNED(16) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uint x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_uint s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_uint2 lo, hi; }; +#endif +#if defined( __CL_UINT2__) + __cl_uint2 v2[2]; +#endif +#if defined( __CL_UINT4__) + __cl_uint4 v4; +#endif +}cl_uint4; + +/* cl_uint3 is identical in size, alignment and behavior to cl_uint4. See section 6.1.5. */ +typedef cl_uint4 cl_uint3; + +typedef union +{ + cl_uint CL_ALIGNED(32) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uint x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_uint s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_uint4 lo, hi; }; +#endif +#if defined( __CL_UINT2__) + __cl_uint2 v2[4]; +#endif +#if defined( __CL_UINT4__) + __cl_uint4 v4[2]; +#endif +#if defined( __CL_UINT8__ ) + __cl_uint8 v8; +#endif +}cl_uint8; + +typedef union +{ + cl_uint CL_ALIGNED(64) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uint x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_uint s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_uint8 lo, hi; }; +#endif +#if defined( __CL_UINT2__) + __cl_uint2 v2[8]; +#endif +#if defined( __CL_UINT4__) + __cl_uint4 v4[4]; +#endif +#if defined( __CL_UINT8__ ) + __cl_uint8 v8[2]; +#endif +#if defined( __CL_UINT16__ ) + __cl_uint16 v16; +#endif +}cl_uint16; + +/* ---- cl_longn ---- */ +typedef union +{ + cl_long CL_ALIGNED(16) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_long x, y; }; + __CL_ANON_STRUCT__ struct{ cl_long s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_long lo, hi; }; +#endif +#if defined( __CL_LONG2__) + __cl_long2 v2; +#endif +}cl_long2; + +typedef union +{ + cl_long CL_ALIGNED(32) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_long x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_long s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_long2 lo, hi; }; +#endif +#if defined( __CL_LONG2__) + __cl_long2 v2[2]; +#endif +#if defined( __CL_LONG4__) + __cl_long4 v4; +#endif +}cl_long4; + +/* cl_long3 is identical in size, alignment and behavior to cl_long4. See section 6.1.5. */ +typedef cl_long4 cl_long3; + +typedef union +{ + cl_long CL_ALIGNED(64) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_long x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_long s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_long4 lo, hi; }; +#endif +#if defined( __CL_LONG2__) + __cl_long2 v2[4]; +#endif +#if defined( __CL_LONG4__) + __cl_long4 v4[2]; +#endif +#if defined( __CL_LONG8__ ) + __cl_long8 v8; +#endif +}cl_long8; + +typedef union +{ + cl_long CL_ALIGNED(128) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_long x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_long s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_long8 lo, hi; }; +#endif +#if defined( __CL_LONG2__) + __cl_long2 v2[8]; +#endif +#if defined( __CL_LONG4__) + __cl_long4 v4[4]; +#endif +#if defined( __CL_LONG8__ ) + __cl_long8 v8[2]; +#endif +#if defined( __CL_LONG16__ ) + __cl_long16 v16; +#endif +}cl_long16; + + +/* ---- cl_ulongn ---- */ +typedef union +{ + cl_ulong CL_ALIGNED(16) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ulong x, y; }; + __CL_ANON_STRUCT__ struct{ cl_ulong s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_ulong lo, hi; }; +#endif +#if defined( __CL_ULONG2__) + __cl_ulong2 v2; +#endif +}cl_ulong2; + +typedef union +{ + cl_ulong CL_ALIGNED(32) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ulong x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_ulong s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_ulong2 lo, hi; }; +#endif +#if defined( __CL_ULONG2__) + __cl_ulong2 v2[2]; +#endif +#if defined( __CL_ULONG4__) + __cl_ulong4 v4; +#endif +}cl_ulong4; + +/* cl_ulong3 is identical in size, alignment and behavior to cl_ulong4. See section 6.1.5. */ +typedef cl_ulong4 cl_ulong3; + +typedef union +{ + cl_ulong CL_ALIGNED(64) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ulong x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_ulong s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_ulong4 lo, hi; }; +#endif +#if defined( __CL_ULONG2__) + __cl_ulong2 v2[4]; +#endif +#if defined( __CL_ULONG4__) + __cl_ulong4 v4[2]; +#endif +#if defined( __CL_ULONG8__ ) + __cl_ulong8 v8; +#endif +}cl_ulong8; + +typedef union +{ + cl_ulong CL_ALIGNED(128) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ulong x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_ulong s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_ulong8 lo, hi; }; +#endif +#if defined( __CL_ULONG2__) + __cl_ulong2 v2[8]; +#endif +#if defined( __CL_ULONG4__) + __cl_ulong4 v4[4]; +#endif +#if defined( __CL_ULONG8__ ) + __cl_ulong8 v8[2]; +#endif +#if defined( __CL_ULONG16__ ) + __cl_ulong16 v16; +#endif +}cl_ulong16; + + +/* --- cl_floatn ---- */ + +typedef union +{ + cl_float CL_ALIGNED(8) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_float x, y; }; + __CL_ANON_STRUCT__ struct{ cl_float s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_float lo, hi; }; +#endif +#if defined( __CL_FLOAT2__) + __cl_float2 v2; +#endif +}cl_float2; + +typedef union +{ + cl_float CL_ALIGNED(16) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_float x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_float s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_float2 lo, hi; }; +#endif +#if defined( __CL_FLOAT2__) + __cl_float2 v2[2]; +#endif +#if defined( __CL_FLOAT4__) + __cl_float4 v4; +#endif +}cl_float4; + +/* cl_float3 is identical in size, alignment and behavior to cl_float4. See section 6.1.5. */ +typedef cl_float4 cl_float3; + +typedef union +{ + cl_float CL_ALIGNED(32) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_float x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_float s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_float4 lo, hi; }; +#endif +#if defined( __CL_FLOAT2__) + __cl_float2 v2[4]; +#endif +#if defined( __CL_FLOAT4__) + __cl_float4 v4[2]; +#endif +#if defined( __CL_FLOAT8__ ) + __cl_float8 v8; +#endif +}cl_float8; + +typedef union +{ + cl_float CL_ALIGNED(64) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_float x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_float s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_float8 lo, hi; }; +#endif +#if defined( __CL_FLOAT2__) + __cl_float2 v2[8]; +#endif +#if defined( __CL_FLOAT4__) + __cl_float4 v4[4]; +#endif +#if defined( __CL_FLOAT8__ ) + __cl_float8 v8[2]; +#endif +#if defined( __CL_FLOAT16__ ) + __cl_float16 v16; +#endif +}cl_float16; + +/* --- cl_doublen ---- */ + +typedef union +{ + cl_double CL_ALIGNED(16) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_double x, y; }; + __CL_ANON_STRUCT__ struct{ cl_double s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_double lo, hi; }; +#endif +#if defined( __CL_DOUBLE2__) + __cl_double2 v2; +#endif +}cl_double2; + +typedef union +{ + cl_double CL_ALIGNED(32) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_double x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_double s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_double2 lo, hi; }; +#endif +#if defined( __CL_DOUBLE2__) + __cl_double2 v2[2]; +#endif +#if defined( __CL_DOUBLE4__) + __cl_double4 v4; +#endif +}cl_double4; + +/* cl_double3 is identical in size, alignment and behavior to cl_double4. See section 6.1.5. */ +typedef cl_double4 cl_double3; + +typedef union +{ + cl_double CL_ALIGNED(64) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_double x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_double s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_double4 lo, hi; }; +#endif +#if defined( __CL_DOUBLE2__) + __cl_double2 v2[4]; +#endif +#if defined( __CL_DOUBLE4__) + __cl_double4 v4[2]; +#endif +#if defined( __CL_DOUBLE8__ ) + __cl_double8 v8; +#endif +}cl_double8; + +typedef union +{ + cl_double CL_ALIGNED(128) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_double x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_double s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_double8 lo, hi; }; +#endif +#if defined( __CL_DOUBLE2__) + __cl_double2 v2[8]; +#endif +#if defined( __CL_DOUBLE4__) + __cl_double4 v4[4]; +#endif +#if defined( __CL_DOUBLE8__ ) + __cl_double8 v8[2]; +#endif +#if defined( __CL_DOUBLE16__ ) + __cl_double16 v16; +#endif +}cl_double16; + +/* Macro to facilitate debugging + * Usage: + * Place CL_PROGRAM_STRING_DEBUG_INFO on the line before the first line of your source. + * The first line ends with: CL_PROGRAM_STRING_DEBUG_INFO \" + * Each line thereafter of OpenCL C source must end with: \n\ + * The last line ends in "; + * + * Example: + * + * const char *my_program = CL_PROGRAM_STRING_DEBUG_INFO "\ + * kernel void foo( int a, float * b ) \n\ + * { \n\ + * // my comment \n\ + * *b[ get_global_id(0)] = a; \n\ + * } \n\ + * "; + * + * This should correctly set up the line, (column) and file information for your source + * string so you can do source level debugging. + */ +#define __CL_STRINGIFY( _x ) # _x +#define _CL_STRINGIFY( _x ) __CL_STRINGIFY( _x ) +#define CL_PROGRAM_STRING_DEBUG_INFO "#line " _CL_STRINGIFY(__LINE__) " \"" __FILE__ "\" \n\n" + +#ifdef __cplusplus +} +#endif + +#if !defined(__cplusplus) && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L +#elif defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) +#elif defined( _WIN32) && defined(_MSC_VER) && ! defined(__STDC__) + #if _MSC_VER >=1500 + #pragma warning( pop ) + #endif +#endif + +#endif /* __CL_PLATFORM_H */ diff --git a/repos/hello_gpgpu/src/hello_gpgpu/CL/cl_version.h b/repos/hello_gpgpu/src/hello_gpgpu/CL/cl_version.h new file mode 100644 index 0000000000..3844938d54 --- /dev/null +++ b/repos/hello_gpgpu/src/hello_gpgpu/CL/cl_version.h @@ -0,0 +1,81 @@ +/******************************************************************************* + * Copyright (c) 2018-2020 The Khronos Group Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ + +#ifndef __CL_VERSION_H +#define __CL_VERSION_H + +/* Detect which version to target */ +#if !defined(CL_TARGET_OPENCL_VERSION) +#pragma message("cl_version.h: CL_TARGET_OPENCL_VERSION is not defined. Defaulting to 300 (OpenCL 3.0)") +#define CL_TARGET_OPENCL_VERSION 300 +#endif +#if CL_TARGET_OPENCL_VERSION != 100 && \ + CL_TARGET_OPENCL_VERSION != 110 && \ + CL_TARGET_OPENCL_VERSION != 120 && \ + CL_TARGET_OPENCL_VERSION != 200 && \ + CL_TARGET_OPENCL_VERSION != 210 && \ + CL_TARGET_OPENCL_VERSION != 220 && \ + CL_TARGET_OPENCL_VERSION != 300 +#pragma message("cl_version: CL_TARGET_OPENCL_VERSION is not a valid value (100, 110, 120, 200, 210, 220, 300). Defaulting to 300 (OpenCL 3.0)") +#undef CL_TARGET_OPENCL_VERSION +#define CL_TARGET_OPENCL_VERSION 300 +#endif + + +/* OpenCL Version */ +#if CL_TARGET_OPENCL_VERSION >= 300 && !defined(CL_VERSION_3_0) +#define CL_VERSION_3_0 1 +#endif +#if CL_TARGET_OPENCL_VERSION >= 220 && !defined(CL_VERSION_2_2) +#define CL_VERSION_2_2 1 +#endif +#if CL_TARGET_OPENCL_VERSION >= 210 && !defined(CL_VERSION_2_1) +#define CL_VERSION_2_1 1 +#endif +#if CL_TARGET_OPENCL_VERSION >= 200 && !defined(CL_VERSION_2_0) +#define CL_VERSION_2_0 1 +#endif +#if CL_TARGET_OPENCL_VERSION >= 120 && !defined(CL_VERSION_1_2) +#define CL_VERSION_1_2 1 +#endif +#if CL_TARGET_OPENCL_VERSION >= 110 && !defined(CL_VERSION_1_1) +#define CL_VERSION_1_1 1 +#endif +#if CL_TARGET_OPENCL_VERSION >= 100 && !defined(CL_VERSION_1_0) +#define CL_VERSION_1_0 1 +#endif + +/* Allow deprecated APIs for older OpenCL versions. */ +#if CL_TARGET_OPENCL_VERSION <= 220 && !defined(CL_USE_DEPRECATED_OPENCL_2_2_APIS) +#define CL_USE_DEPRECATED_OPENCL_2_2_APIS +#endif +#if CL_TARGET_OPENCL_VERSION <= 210 && !defined(CL_USE_DEPRECATED_OPENCL_2_1_APIS) +#define CL_USE_DEPRECATED_OPENCL_2_1_APIS +#endif +#if CL_TARGET_OPENCL_VERSION <= 200 && !defined(CL_USE_DEPRECATED_OPENCL_2_0_APIS) +#define CL_USE_DEPRECATED_OPENCL_2_0_APIS +#endif +#if CL_TARGET_OPENCL_VERSION <= 120 && !defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS) +#define CL_USE_DEPRECATED_OPENCL_1_2_APIS +#endif +#if CL_TARGET_OPENCL_VERSION <= 110 && !defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) +#define CL_USE_DEPRECATED_OPENCL_1_1_APIS +#endif +#if CL_TARGET_OPENCL_VERSION <= 100 && !defined(CL_USE_DEPRECATED_OPENCL_1_0_APIS) +#define CL_USE_DEPRECATED_OPENCL_1_0_APIS +#endif + +#endif /* __CL_VERSION_H */ diff --git a/repos/hello_gpgpu/src/hello_gpgpu/main.cc b/repos/hello_gpgpu/src/hello_gpgpu/main.cc index 3b04d77585..bd7f0c3324 100644 --- a/repos/hello_gpgpu/src/hello_gpgpu/main.cc +++ b/repos/hello_gpgpu/src/hello_gpgpu/main.cc @@ -1,12 +1,29 @@ #include #include +#include +#include +#include #include +#include "test.h" void Component::construct(Genode::Env &env) { gpgpu::Connection gpgpu(env); + // allocator + Genode::Heap heap(env.ram(), env.rm()); + Genode::Allocator_avl alloc(&heap); + const unsigned int size = 0x1000 * 0x100; + Genode::Ram_dataspace_capability ram_cap = env.ram().alloc(size); + Genode::addr_t mapped_base = env.rm().attach(ram_cap); + //Genode::addr_t base = Genode::Dataspace_client(ram_cap).phys_addr(); + alloc.add_range(mapped_base, size); + + // test RPC gpgpu.say_hello(); + // run the test and hope the best + run_gpgpu_test(alloc); + Genode::log("hello gpgpu completed"); } diff --git a/repos/hello_gpgpu/src/hello_gpgpu/target.mk b/repos/hello_gpgpu/src/hello_gpgpu/target.mk index 025cc5108e..899fd26bad 100644 --- a/repos/hello_gpgpu/src/hello_gpgpu/target.mk +++ b/repos/hello_gpgpu/src/hello_gpgpu/target.mk @@ -1,3 +1,3 @@ TARGET = hello_gpgpu -SRC_CC = main.cc +SRC_CC = main.cc test.cc CL/cl.cc LIBS = base diff --git a/repos/hello_gpgpu/src/hello_gpgpu/test.cc b/repos/hello_gpgpu/src/hello_gpgpu/test.cc new file mode 100644 index 0000000000..4a5ad953dc --- /dev/null +++ b/repos/hello_gpgpu/src/hello_gpgpu/test.cc @@ -0,0 +1,239 @@ +#include +#include +#define CL_TARGET_OPENCL_VERSION 100 +#include "CL/cl.h" + +static unsigned char test_Gen9core_gen[] = { + 0x43, 0x54, 0x4e, 0x49, 0x2e, 0x04, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x4c, 0x04, 0x96, 0x2a, 0x25, 0xad, 0x06, 0x1f, + 0x99, 0x00, 0x72, 0x8d, 0x08, 0x00, 0x00, 0x00, 0xac, 0x03, 0x00, 0x00, + 0x80, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x88, 0x00, 0x00, 0x00, 0xe0, 0x00, 0x00, 0x00, 0x63, 0x6c, 0x6d, 0x61, + 0x69, 0x6e, 0x00, 0x00, 0x01, 0x00, 0x60, 0x00, 0x0c, 0x02, 0x60, 0x20, + 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x80, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x30, 0x00, 0x10, 0x00, 0x16, 0xc0, 0x04, 0xc0, 0x04, + 0x41, 0x00, 0x00, 0x00, 0x2c, 0x0a, 0x80, 0x20, 0x10, 0x01, 0x00, 0x0a, + 0x64, 0x00, 0x00, 0x00, 0x01, 0x4d, 0x00, 0x20, 0x07, 0x7f, 0x03, 0x00, + 0x40, 0x00, 0x80, 0x00, 0x28, 0x0a, 0xa0, 0x20, 0x80, 0x00, 0x00, 0x12, + 0x20, 0x00, 0xb1, 0x00, 0x40, 0x20, 0x80, 0x00, 0x28, 0x0a, 0x20, 0x21, + 0x80, 0x00, 0x00, 0x12, 0x40, 0x00, 0xb1, 0x00, 0x40, 0x96, 0x01, 0x20, + 0x07, 0x05, 0x05, 0x07, 0x40, 0x20, 0x80, 0x00, 0x28, 0x0a, 0x20, 0x21, + 0x20, 0x01, 0x8d, 0x0a, 0xe0, 0x00, 0x00, 0x00, 0x09, 0x00, 0x80, 0x00, + 0x28, 0x0a, 0xa0, 0x20, 0xa0, 0x00, 0x8d, 0x1e, 0x02, 0x00, 0x02, 0x00, + 0x09, 0x20, 0x80, 0x00, 0x28, 0x0a, 0x20, 0x21, 0x20, 0x01, 0x8d, 0x1e, + 0x02, 0x00, 0x02, 0x00, 0x31, 0x00, 0x80, 0x0c, 0x68, 0x02, 0x60, 0x21, + 0xa0, 0x00, 0x00, 0x06, 0x00, 0x5e, 0x20, 0x04, 0x31, 0x20, 0x80, 0x0c, + 0x68, 0x02, 0xa0, 0x21, 0x20, 0x01, 0x00, 0x06, 0x00, 0x5e, 0x20, 0x04, + 0x33, 0x00, 0x80, 0x0c, 0x70, 0xb0, 0x00, 0x00, 0xa2, 0x00, 0x00, 0x00, + 0x01, 0x5e, 0x02, 0x04, 0x33, 0x20, 0x80, 0x0c, 0x70, 0xd0, 0x00, 0x00, + 0x22, 0x01, 0x00, 0x00, 0x01, 0x5e, 0x02, 0x04, 0x31, 0x00, 0x60, 0x07, + 0x04, 0x02, 0x00, 0x20, 0xe0, 0x0f, 0x00, 0x06, 0x10, 0x00, 0x00, 0x82, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x82, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x00, 0xc0, 0xff, 0x83, 0x00, 0x00, 0x00, 0x03, 0x7f, 0x00, 0xff, 0x1f, + 0x00, 0x00, 0xe0, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0xff, 0x83, 0x00, 0x00, 0x00, 0x03, + 0x7f, 0x00, 0xff, 0x1f, 0x00, 0x00, 0xe0, 0x0f, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x40, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x80, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x80, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x2b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x30, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, + 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, + 0x24, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, + 0x0c, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, + 0x44, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x84, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x5f, 0x5f, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, + 0x00, 0x00, 0x00, 0x00, 0x4e, 0x4f, 0x4e, 0x45, 0x00, 0x00, 0x00, 0x00, + 0x69, 0x6e, 0x00, 0x00, 0x75, 0x69, 0x6e, 0x74, 0x2a, 0x3b, 0x38, 0x00, + 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x5f, 0x5f, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, + 0x00, 0x00, 0x00, 0x00, 0x4e, 0x4f, 0x4e, 0x45, 0x00, 0x00, 0x00, 0x00, + 0x6f, 0x75, 0x74, 0x00, 0x75, 0x69, 0x6e, 0x74, 0x2a, 0x3b, 0x38, 0x00, + 0x4e, 0x4f, 0x4e, 0x45, 0x00, 0x00, 0x00, 0x00 +}; +static unsigned int test_Gen9core_gen_len = 1568; + +#define ELEMENTS 4096 +void run_gpgpu_test(Genode::Allocator_avl& alloc) +{ + clInitGenode(alloc); + const int num = 0x42; + volatile uint32_t* m_out; + uint32_t* m_in; + + // allocate buffers + alloc.alloc(ELEMENTS * sizeof(uint32_t), (void**)&m_in); + alloc.alloc(ELEMENTS * sizeof(uint32_t), (void**)&m_out); + + for(int i = 0; i < ELEMENTS; i++) + { + m_in[i] = num; + m_out[i] = 0; + } + + cl_platform_id platform_id; + cl_device_id device_id; + cl_uint num_devices; + cl_uint num_platforms; + cl_int errcode; + cl_context clContext; + cl_kernel clKernel; + cl_command_queue clCommandQue; + cl_program clProgram; + cl_mem clInBuff; + cl_mem clOutBuff; + + // init opencl stuff + errcode = clGetPlatformIDs(1, &platform_id, &num_platforms); + if(errcode != CL_SUCCESS) Genode::log("Error in number of platforms"); + errcode = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_GPU, 1, &device_id, &num_devices); + if(errcode != CL_SUCCESS) Genode::log("Error in number of devices"); + clContext = clCreateContext( NULL, 1, &device_id, NULL, NULL, &errcode); + if(errcode != CL_SUCCESS) Genode::log("Error in creating context"); + clCommandQue = clCreateCommandQueue(clContext, device_id, 0, &errcode); + if(errcode != CL_SUCCESS) Genode::log("Error in creating command queue"); + + // allocate opencl buffers + clInBuff = clCreateBuffer(clContext, CL_MEM_READ_WRITE, ELEMENTS * sizeof(uint32_t), NULL, &errcode); + if(errcode != CL_SUCCESS) Genode::log("Error in creating buffer"); + clOutBuff = clCreateBuffer(clContext, CL_MEM_READ_WRITE, ELEMENTS * sizeof(uint32_t), NULL, &errcode); + if(errcode != CL_SUCCESS) Genode::log("Error in creating buffer"); + + // init buffers + errcode = clEnqueueWriteBuffer(clCommandQue, clInBuff, CL_TRUE, 0, ELEMENTS * sizeof(uint32_t), m_in, 0, NULL, NULL); + if(errcode != CL_SUCCESS) Genode::log("Error in writing to buffer"); + errcode = clEnqueueWriteBuffer(clCommandQue, clOutBuff, CL_TRUE, 0, ELEMENTS * sizeof(uint32_t), (uint32_t*)m_out, 0, NULL, NULL); + if(errcode != CL_SUCCESS) Genode::log("Error in writing to buffer"); + + // create a program from the kernel source + const size_t kernel_size = test_Gen9core_gen_len; + const unsigned char* kernel_bin = test_Gen9core_gen; + clProgram = clCreateProgramWithBinary(clContext, 1, &device_id, &kernel_size, &kernel_bin, NULL, &errcode); + if(errcode != CL_SUCCESS) Genode::log("Error in loading binary"); + + // build the program + errcode = clBuildProgram(clProgram, 1, &device_id, NULL, NULL, NULL); + if(errcode != CL_SUCCESS) Genode::log("Error in building program"); + + // create the OpenCL kernel + clKernel = clCreateKernel(clProgram, "clmain", &errcode); + if(errcode != CL_SUCCESS) Genode::log("Error in creating kernel"); + + // set kernel args + errcode = clSetKernelArg(clKernel, 0, sizeof(cl_mem), (void *)&clInBuff); + if(errcode != CL_SUCCESS) Genode::log("Error in setting kernel arg"); + errcode = clSetKernelArg(clKernel, 1, sizeof(cl_mem), (void *)&clOutBuff); + if(errcode != CL_SUCCESS) Genode::log("Error in setting kernel arg"); + + // launch the kernel + size_t globalWorkSize = ELEMENTS; + errcode = clEnqueueNDRangeKernel(clCommandQue, clKernel, 1, NULL, &globalWorkSize, NULL, 0, NULL, NULL); + if(errcode != CL_SUCCESS) Genode::log("Error in launching kernel"); + + // wait for finish + clFinish(clCommandQue); + + // read result back + errcode = clEnqueueReadBuffer(clCommandQue, clOutBuff, CL_TRUE, 0, ELEMENTS * sizeof(uint32_t), (void*)m_out, 0, NULL, NULL); + if(errcode != CL_SUCCESS) Genode::log("Error in reading GPU mem"); + + uint32_t errors = 0; + for(int i = 0; i < ELEMENTS; i++) + { + if(m_out[i] != num) + { + //LOG_INFO("Error at Item " << i << " val: " << m_out[i]); + errors++; + } + } + Genode::log("Task has finished with ", errors, " errors!"); + + // free buffers + alloc.free(m_in); + alloc.free((void*)m_out); +} diff --git a/repos/hello_gpgpu/src/hello_gpgpu/test.h b/repos/hello_gpgpu/src/hello_gpgpu/test.h new file mode 100644 index 0000000000..1ecef647b7 --- /dev/null +++ b/repos/hello_gpgpu/src/hello_gpgpu/test.h @@ -0,0 +1,12 @@ +#ifndef TEST_H +#define TEST_H + +#include + +/** + * @brief run a test kernel + * + */ +void run_gpgpu_test(Genode::Allocator_avl& alloc); + +#endif // TEST_H From 4c9678ea55038a5e7a4fd4ed8441548d6e3d00f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcel=20L=C3=BCtke=20Dreimann?= Date: Mon, 11 Jul 2022 16:40:57 +0200 Subject: [PATCH 06/14] added missing header file --- repos/dde_uos-intel-gpgpu/include/gpgpu/gpgpu.h | 1 + 1 file changed, 1 insertion(+) create mode 100644 repos/dde_uos-intel-gpgpu/include/gpgpu/gpgpu.h diff --git a/repos/dde_uos-intel-gpgpu/include/gpgpu/gpgpu.h b/repos/dde_uos-intel-gpgpu/include/gpgpu/gpgpu.h new file mode 100644 index 0000000000..b8b60061bf --- /dev/null +++ b/repos/dde_uos-intel-gpgpu/include/gpgpu/gpgpu.h @@ -0,0 +1 @@ +#include "../../src/uos-intel-gpgpu/driver/gpgpu_driver.h" From 15a01c011fcd4d5e2cdc7342ad7dbcce0bfcba30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcel=20L=C3=BCtke=20Dreimann?= Date: Tue, 12 Jul 2022 15:52:28 +0200 Subject: [PATCH 07/14] patched ocl and example code --- repos/hello_gpgpu/src/hello_gpgpu/CL/cl.cc | 70 ++++++++++++---------- repos/hello_gpgpu/src/hello_gpgpu/test.cc | 12 ++++ 2 files changed, 50 insertions(+), 32 deletions(-) diff --git a/repos/hello_gpgpu/src/hello_gpgpu/CL/cl.cc b/repos/hello_gpgpu/src/hello_gpgpu/CL/cl.cc index 1757163328..e73ecdb28d 100644 --- a/repos/hello_gpgpu/src/hello_gpgpu/CL/cl.cc +++ b/repos/hello_gpgpu/src/hello_gpgpu/CL/cl.cc @@ -10,6 +10,12 @@ extern "C" { #endif +struct _cl_mem +{ + struct buffer_config bc; + bool ocl_allocated; +}; + /* Genode */ static Genode::Allocator_avl* genode_allocator; extern CL_API_ENTRY void CL_API_CALL @@ -246,8 +252,7 @@ clRetainContext(cl_context context) CL_API_ENTRY cl_int CL_API_CALL clReleaseContext(cl_context context) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); - return CL_INVALID_VALUE; + return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL @@ -301,8 +306,7 @@ clRetainCommandQueue(cl_command_queue command_queue) CL_API_ENTRY cl_int CL_API_CALL clReleaseCommandQueue(cl_command_queue command_queue) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); - return CL_INVALID_VALUE; + return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL @@ -324,18 +328,23 @@ clCreateBuffer(cl_context context, void * host_ptr, cl_int * errcode_ret) { + cl_mem clmem; + genode_allocator->alloc(sizeof(struct _cl_mem), (void**)&clmem); if(host_ptr == NULL) { genode_allocator->alloc_aligned(size, &host_ptr, 0x1000); + clmem->ocl_allocated = true; + } + else + { + clmem->ocl_allocated = false; } - struct buffer_config* bc; - genode_allocator->alloc(sizeof(struct buffer_config), (void**)&bc); - bc->buffer = host_ptr; - bc->buffer_size = size; + clmem->bc.buffer = host_ptr; + clmem->bc.buffer_size = size; *errcode_ret = CL_SUCCESS; - return (cl_mem)bc; + return clmem; } #ifdef CL_VERSION_1_1 @@ -424,9 +433,11 @@ clRetainMemObject(cl_mem memobj) CL_API_ENTRY cl_int CL_API_CALL clReleaseMemObject(cl_mem memobj) { - struct buffer_config* bc = (struct buffer_config*)memobj; - genode_allocator->free(bc->buffer); - genode_allocator->free(bc); + if(memobj->ocl_allocated && !memobj->bc.non_pointer_type) + { + genode_allocator->free(memobj->bc.buffer); + } + genode_allocator->free(memobj); return CL_SUCCESS; } @@ -583,13 +594,8 @@ clCreateProgramWithBinary(cl_context context, return NULL; } - struct kernel_config* kc; - genode_allocator->alloc(sizeof(struct kernel_config), (void**)&kc); - - kc->binary = (uint8_t*)binaries[0]; - *errcode_ret = CL_SUCCESS; - return (cl_program)kc; + return (cl_program)binaries[0]; } #ifdef CL_VERSION_1_2 @@ -631,9 +637,7 @@ clRetainProgram(cl_program program) CL_API_ENTRY cl_int CL_API_CALL clReleaseProgram(cl_program program) { - struct kernel_config* kc = (struct kernel_config*)program; - genode_allocator->free(kc->buffConfigs); - genode_allocator->free(kc); + return CL_SUCCESS; } @@ -749,7 +753,10 @@ clCreateKernel(cl_program program, const char * kernel_name, cl_int * errcode_ret) { - struct kernel_config* kc = (struct kernel_config*)program; + // create kernel and set binary + struct kernel_config* kc; + genode_allocator->alloc(sizeof(struct kernel_config), (void**)&kc); + kc->binary = (uint8_t*)program; // preallocated 32 buff configs; genode_allocator->alloc(32 * sizeof(struct buffer_config), (void**)&kc->buffConfigs); @@ -793,8 +800,10 @@ clRetainKernel(cl_kernel kernel) CL_API_ENTRY cl_int CL_API_CALL clReleaseKernel(cl_kernel kernel) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); - return CL_INVALID_VALUE; + struct kernel_config* kc = (struct kernel_config*)kernel; + genode_allocator->free(kc->buffConfigs); + genode_allocator->free(kc); + return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL @@ -810,8 +819,8 @@ clSetKernelArg(cl_kernel kernel, if(arg_size == sizeof(cl_mem)) { - struct buffer_config** bc = (struct buffer_config**)arg_value; - kc->buffConfigs[arg_index] = **bc; + cl_mem* clmem = (cl_mem*)arg_value; + kc->buffConfigs[arg_index] = (*clmem)->bc; } else { @@ -994,8 +1003,7 @@ clGetEventProfilingInfo(cl_event event, CL_API_ENTRY cl_int CL_API_CALL clFlush(cl_command_queue command_queue) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); - return CL_INVALID_VALUE; + return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL @@ -1022,8 +1030,7 @@ clEnqueueReadBuffer(cl_command_queue command_queue, return CL_INVALID_VALUE; } - struct buffer_config* bc = (struct buffer_config*)buffer; - uint8_t* src = (uint8_t*)bc->buffer; + uint8_t* src = (uint8_t*)buffer->bc.buffer; uint8_t* dst = (uint8_t*)ptr; for(size_t i = 0; i < size; i++) { @@ -1073,9 +1080,8 @@ clEnqueueWriteBuffer(cl_command_queue command_queue, return CL_INVALID_VALUE; } - struct buffer_config* bc = (struct buffer_config*)buffer; uint8_t* src = (uint8_t*)ptr; - uint8_t* dst = (uint8_t*)bc->buffer; + uint8_t* dst = (uint8_t*)buffer->bc.buffer; for(size_t i = 0; i < size; i++) { dst[i] = src[i]; diff --git a/repos/hello_gpgpu/src/hello_gpgpu/test.cc b/repos/hello_gpgpu/src/hello_gpgpu/test.cc index 4a5ad953dc..28802d2cd0 100644 --- a/repos/hello_gpgpu/src/hello_gpgpu/test.cc +++ b/repos/hello_gpgpu/src/hello_gpgpu/test.cc @@ -232,6 +232,18 @@ void run_gpgpu_test(Genode::Allocator_avl& alloc) } } Genode::log("Task has finished with ", errors, " errors!"); + + // free stuff + errcode = clReleaseKernel(clKernel); + if(errcode != CL_SUCCESS) Genode::log("Error in releasing kernel"); + errcode = clReleaseMemObject(clInBuff); + if(errcode != CL_SUCCESS) Genode::log("Error in releasing mem obj"); + errcode = clReleaseMemObject(clOutBuff); + if(errcode != CL_SUCCESS) Genode::log("Error in releasing mem obj"); + errcode = clReleaseCommandQueue(clCommandQue); + if(errcode != CL_SUCCESS) Genode::log("Error in releasing command queue"); + errcode = clReleaseContext(clContext); + if(errcode != CL_SUCCESS) Genode::log("Error in releasing context"); // free buffers alloc.free(m_in); From 15a51fc4f2821e73f880a6071112c7acc239274f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcel=20L=C3=BCtke=20Dreimann?= Date: Tue, 12 Jul 2022 17:33:13 +0200 Subject: [PATCH 08/14] clone gpgpu driver via https --- .gitmodules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index 84f727fe1d..f28f5f244b 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,3 @@ [submodule "repos/dde_uos-intel-gpgpu/src/uos-intel-gpgpu"] path = repos/dde_uos-intel-gpgpu/src/uos-intel-gpgpu - url = git@ess-git.inf.uos.de:software/uos-intel-gpgpu.git + url = https://ess.cs.uos.de/git/software/uos-intel-gpgpu.git From 58d8e7ca909cf01a029183ae90e0cac1917e68c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcel=20L=C3=BCtke=20Dreimann?= Date: Mon, 18 Jul 2022 12:07:19 +0200 Subject: [PATCH 09/14] updated gpgpu driver --- repos/dde_uos-intel-gpgpu/src/uos-intel-gpgpu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/repos/dde_uos-intel-gpgpu/src/uos-intel-gpgpu b/repos/dde_uos-intel-gpgpu/src/uos-intel-gpgpu index 7b9c018e48..6f6cf6fb0a 160000 --- a/repos/dde_uos-intel-gpgpu/src/uos-intel-gpgpu +++ b/repos/dde_uos-intel-gpgpu/src/uos-intel-gpgpu @@ -1 +1 @@ -Subproject commit 7b9c018e4858d08eaa6bb2e12c707e1e45740c81 +Subproject commit 6f6cf6fb0abf6442060adbe9a95cb01f5c7aa2d9 From 024e774e46708b32df60fe97091be105ced9605f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcel=20L=C3=BCtke=20Dreimann?= Date: Mon, 18 Jul 2022 12:08:27 +0200 Subject: [PATCH 10/14] fixed code for current genode version --- .../src/gpgpu/gpgpu_genode.cc | 33 +++++++++++++++---- .../src/gpgpu/gpgpu_genode.h | 8 ++--- repos/dde_uos-intel-gpgpu/src/gpgpu/main.cc | 4 +++ repos/dde_uos-intel-gpgpu/src/gpgpu/stubs.cc | 4 +-- 4 files changed, 36 insertions(+), 13 deletions(-) diff --git a/repos/dde_uos-intel-gpgpu/src/gpgpu/gpgpu_genode.cc b/repos/dde_uos-intel-gpgpu/src/gpgpu/gpgpu_genode.cc index 0bb6ba702c..db85399272 100644 --- a/repos/dde_uos-intel-gpgpu/src/gpgpu/gpgpu_genode.cc +++ b/repos/dde_uos-intel-gpgpu/src/gpgpu/gpgpu_genode.cc @@ -20,9 +20,22 @@ gpgpu_genode::gpgpu_genode(Env& e) : env(e), heap{ e.ram(), e.rm() }, alloc(&hea const unsigned long size = 0x1000 * 0x1000; // allocate chunk of ram - ram_cap = e.ram().alloc(size); + //ram_cap = e.ram().alloc(size); + size_t donate = size; + ram_cap = + retry( + [&] () { + return retry( + [&] () { return pci.alloc_dma_buffer(size, UNCACHED); }, + [&] () { pci.upgrade_caps(2); }); + }, + [&] () { + pci.upgrade_ram(donate); + donate = donate * 2 > size ? 4096 : donate * 2; + }); mapped_base = e.rm().attach(ram_cap); - base = Dataspace_client(ram_cap).phys_addr(); + base = pci.dma_addr(ram_cap); + //base = Dataspace_client(ram_cap).phys_addr(); // use this ram for allocator alloc.add_range(mapped_base, size); @@ -37,9 +50,15 @@ gpgpu_genode::~gpgpu_genode() void* gpgpu_genode::aligned_alloc(uint32_t alignment, uint32_t size) { - void* ptr; - alloc.alloc_aligned(size, &ptr, alignment); - return ptr; + return alloc.alloc_aligned(size, alignment).convert( + + [&] (void *ptr) { return ptr; }, + + [&] (Genode::Range_allocator::Alloc_error) -> void * { + Genode::error("[GPU] Error in driver allocation!"); + return nullptr; + } + ); } void gpgpu_genode::free(void* addr) @@ -78,13 +97,13 @@ void gpgpu_genode::createPCIConnection(uint8_t bus, uint8_t device, uint8_t func } } -uint32_t gpgpu_genode::readPCI(uint32_t addr) +uint32_t gpgpu_genode::readPCI(uint8_t addr) { Platform::Device_client client(dev); return client.config_read(addr, Platform::Device::ACCESS_32BIT); } -void gpgpu_genode::writePCI(uint32_t addr, uint32_t val) +void gpgpu_genode::writePCI(uint8_t addr, uint32_t val) { Platform::Device_client client(dev); pci.with_upgrade([&] () { diff --git a/repos/dde_uos-intel-gpgpu/src/gpgpu/gpgpu_genode.h b/repos/dde_uos-intel-gpgpu/src/gpgpu/gpgpu_genode.h index b448f3e988..be7a60f5f8 100644 --- a/repos/dde_uos-intel-gpgpu/src/gpgpu/gpgpu_genode.h +++ b/repos/dde_uos-intel-gpgpu/src/gpgpu/gpgpu_genode.h @@ -11,8 +11,8 @@ using namespace Genode; #include // pci -#include -#include +#include +#include #include #include @@ -107,7 +107,7 @@ public: * @param addr the address to read from * @return uint32_t the value */ - uint32_t readPCI(uint32_t addr); + uint32_t readPCI(uint8_t addr); /** * @brief write to pci config space (some register are protected by genode!) @@ -115,7 +115,7 @@ public: * @param addr the address to write to * @param val the value to write */ - void writePCI(uint32_t addr, uint32_t val); + void writePCI(uint8_t addr, uint32_t val); /** * @brief Get the Virt Bar Addr object diff --git a/repos/dde_uos-intel-gpgpu/src/gpgpu/main.cc b/repos/dde_uos-intel-gpgpu/src/gpgpu/main.cc index 61eb3d78bd..045a24463d 100644 --- a/repos/dde_uos-intel-gpgpu/src/gpgpu/main.cc +++ b/repos/dde_uos-intel-gpgpu/src/gpgpu/main.cc @@ -33,6 +33,10 @@ void Component::construct(Genode::Env& e) { Genode::error("mem alignment failed: ", addr); } + if(virt_to_phys(test) == nullptr) + { + Genode::error("mem phys addr NULL"); + } for(int i = 0; i < 0x1000; i++) { test[i] = 0x42; diff --git a/repos/dde_uos-intel-gpgpu/src/gpgpu/stubs.cc b/repos/dde_uos-intel-gpgpu/src/gpgpu/stubs.cc index 97036aa517..e1eb8ba784 100644 --- a/repos/dde_uos-intel-gpgpu/src/gpgpu/stubs.cc +++ b/repos/dde_uos-intel-gpgpu/src/gpgpu/stubs.cc @@ -47,12 +47,12 @@ extern "C" uint32_t calculatePCIConfigHeaderAddress(uint8_t bus, uint8_t device, extern "C" uint32_t readPCIConfigSpace(uint32_t addr) { - return _global_gpgpu_genode->readPCI(addr); + return _global_gpgpu_genode->readPCI((uint8_t)addr); } extern "C" void writePCIConfigSpace(uint32_t address, uint32_t value) { - _global_gpgpu_genode->writePCI(address, value); + _global_gpgpu_genode->writePCI((uint8_t)address, value); } // address model From b9c3f297401cfea98a388c9062d8409b8df2af52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20M=C3=BCller?= Date: Mon, 18 Jul 2022 12:37:37 +0200 Subject: [PATCH 11/14] Basic structure for GPGPU service. --- repos/mml/include/kiihdytin/gpgpu/client.h | 44 +++++++++ .../mml/include/kiihdytin/gpgpu/connection.h | 30 ++++++ repos/mml/include/kiihdytin/gpgpu/kernel.h | 46 ++++++++++ repos/mml/include/kiihdytin/gpgpu/scheduler.h | 92 +++++++++++++++++++ repos/mml/include/kiihdytin/gpgpu/service.h | 44 +++++++++ repos/mml/include/kiihdytin/gpgpu/session.h | 54 +++++++++++ .../kiihdytin/gpgpu/session_component.h | 31 +++++++ repos/mml/include/kiihdytin/gpgpu/vgpu.h | 73 +++++++++++++++ 8 files changed, 414 insertions(+) create mode 100644 repos/mml/include/kiihdytin/gpgpu/client.h create mode 100644 repos/mml/include/kiihdytin/gpgpu/connection.h create mode 100644 repos/mml/include/kiihdytin/gpgpu/kernel.h create mode 100644 repos/mml/include/kiihdytin/gpgpu/scheduler.h create mode 100644 repos/mml/include/kiihdytin/gpgpu/service.h create mode 100644 repos/mml/include/kiihdytin/gpgpu/session.h create mode 100644 repos/mml/include/kiihdytin/gpgpu/session_component.h create mode 100644 repos/mml/include/kiihdytin/gpgpu/vgpu.h diff --git a/repos/mml/include/kiihdytin/gpgpu/client.h b/repos/mml/include/kiihdytin/gpgpu/client.h new file mode 100644 index 0000000000..6965a0ab49 --- /dev/null +++ b/repos/mml/include/kiihdytin/gpgpu/client.h @@ -0,0 +1,44 @@ +/* + * \brief CLient-side interface to a GPGPU session + * \author Michael Müller + * \date 2022-07-17 + */ + +/* + * Copyright (C) 2022 Michael Müller + * + * This file is distributed under the terms of the + * GNU Affero General Public License version 3. + */ +#pragma once + +#include +#include +#include "session.h" +#include "kernel.h" + +namespace Kiihdytin::GPGPU { + struct Session_client; +} + +struct Kiihdytin::GPGPU::Session_client : Genode::Rpc_client +{ + Session_client(Genode::Capability cap) + : Genode::Rpc_client(cap) { } + + void enqueue_kernel(Kernel &kernel) override { + call(kernel); + } + + void wait_for_kernel(Kernel &kernel) override { + call(kernel); + } + + void abort_kernel(Kernel &kernel) override { + call(kernel); + } + + void remove_kernel(Kernel &kernel) override { + call(kernel); + } +}; \ No newline at end of file diff --git a/repos/mml/include/kiihdytin/gpgpu/connection.h b/repos/mml/include/kiihdytin/gpgpu/connection.h new file mode 100644 index 0000000000..aacb9d710b --- /dev/null +++ b/repos/mml/include/kiihdytin/gpgpu/connection.h @@ -0,0 +1,30 @@ +/* + * \brief Connection to GPGPU session + * \author Michael Müller + * \date 2022-07-17 + */ + +/* + * Copyright (C) 2022 Michael Müller + * + * This file is distributed under the terms of the + * GNU Affero General Public License version 3. + */ + +#include "client.h" +#include + +namespace Kiihdytin::GPGPU { struct Connection; } + + +struct Kiihdytin::GPGPU::Connection : Genode::Connection, Session_client +{ + Connection(Genode::Env &env) + : + /* create session */ + Genode::Connection(env, session(env.parent(), + "ram_quota=6K, cap_quota=4")), // TODO: determine correct ram and cap quota + + /* initialize RPC interface */ + Session_client(cap()) { } +}; diff --git a/repos/mml/include/kiihdytin/gpgpu/kernel.h b/repos/mml/include/kiihdytin/gpgpu/kernel.h new file mode 100644 index 0000000000..f1d52bd0d4 --- /dev/null +++ b/repos/mml/include/kiihdytin/gpgpu/kernel.h @@ -0,0 +1,46 @@ +/* + * \brief Definition of a GPGPU kernel, i.e. OpenCL-slang for an executable unit of code for an OpenCL device + * \author Michael Müller + * \date 2022-07-15 + */ + +/* + * Copyright (C) 2022 Michael Müller + * + * This file is distributed under the terms of the + * GNU Affero General Public License version 3. + */ +#pragma once + +#include +#include +#include +#include + +namespace Kiihdytin::GPGPU { + +typedef uint8_t Kernel_image; + /** + * @class This class represents an OpenCL kernel + * + */ + class Kernel : public Chain + { + private: + struct kernel_config _configuration; + Kernel_image *_image; + + public: + /** + * @brief get configuration for this kernel + * @return reference to kernel configuration + */ + inline struct kernel_config &get_config() { return _configuration; } + + /** + * @brief get pointer to kernel image + * @return pointer to kernel's binary image + */ + inline Kernel_image *get_image() { return _image; } + }; +} \ No newline at end of file diff --git a/repos/mml/include/kiihdytin/gpgpu/scheduler.h b/repos/mml/include/kiihdytin/gpgpu/scheduler.h new file mode 100644 index 0000000000..137ee4cfdd --- /dev/null +++ b/repos/mml/include/kiihdytin/gpgpu/scheduler.h @@ -0,0 +1,92 @@ +/* + * \brief Scheduler interface for the GPGPU, select which vGPU to choose next. + * \author Michael Müller + * \date 2022-07-15 + */ + +/* + * Copyright (C) 2022 Michael Müller + * + * This file is distributed under the terms of the + * GNU Affero General Public License version 3. + */ +#pragma once + +#include +#include +#include +#include "vgpu.h" +#include "kernel.h" + +namespace Kiihdytin::GPGPU { + + class Scheduler + { + private: + VGpu *_curr_vgpu; + // GPGPU_Driver _driver; /* TODO: Use driver session */ + WFQueue _run_list; + /* TODO: Define handler object for GPGPU driver session to receive interrupts. */ + + public: + + Scheduler() + { + // TODO: Initialize GPU driver + + // TODO: Register interrupt/event handler for the GPGPU driver session. + } + + /** + * @brief Select next vGPU from run list + * @details At the moment, round-robin is the only implemented strategy. + * TODO: Implement interface for strategies and strategies * + */ + void schedule_next() { + VGpu *next; + + if ((next = static_cast(_run_list.dequeue()))) { + this->dispatch(*next); + _curr_vgpu = next; + _run_list.enqueue(next); + } else + _curr_vgpu = nullptr; + } + + /** + * @brief Switch to new vGPU's context + * + * @param vgpu - vGPU to switch to + */ + void dispatch(VGpu &vgpu) { + // TODO: Implement context switch using GPGPU driver + } + + /** + * @brief Implmentation for the handling of events from the GPU + * @details The handler is especially important for scheduling the next vGPU and for + * executing kernels. It is the target for interrupts coming from the GPGPU driver, e.g. when + * a kernel has finished its execution. + */ + void handle_gpu_event() { + // TODO: Check for error conditions + + // TODO: Handle finish of kernel + + /* Switch to next vGPU in the run list */ + schedule_next(); + + /* If no vGPU to schedule, this means that we don't have any clients anymore. + * Thus, there are also no kernels anymore to run. */ + if (_curr_vgpu == nullptr) + return; + + Kernel *next = _curr_vgpu->take_kernel(); + + if (!next) /* If there is no kernel for the vGPU left */ + schedule_next(); /* pick the next vGPU, maybe it has got some kernels for us. */ + + // TODO: execute kernel using GPGPU driver + } + }; +} \ No newline at end of file diff --git a/repos/mml/include/kiihdytin/gpgpu/service.h b/repos/mml/include/kiihdytin/gpgpu/service.h new file mode 100644 index 0000000000..c7d83296d3 --- /dev/null +++ b/repos/mml/include/kiihdytin/gpgpu/service.h @@ -0,0 +1,44 @@ +/* + * \brief Definition of the GPGPU service's root component + * \author Michael Müller + * \date 2022-07-17 + */ + +/* + * Copyright (C) 2022 Michael Müller + * + * This file is distributed under the terms of the + * GNU Affero General Public License version 3. + */ +#pragma once + +#include +#include +#include +#include +#include "session_component.h" + + +namespace Kiihdytin::GPGPU { + class Service; +} + +/** + * @brief The GPGPU service provides multiplexed accesses to GPGPU functionality to its clients. + * + */ + +class Kiihdytin::GPGPU::Session : public Genode::Root_component +{ + private: + Genode::List sessions; + + protected: + + Session_component *_create_session(const char*) override { + return new (md_alloc()) Session_component(); + } + + public: + Session(Genode::Entrypoint &ep, Genode::Allocator &alloc) : Genode::Root_component(ep, alloc) {} +}; \ No newline at end of file diff --git a/repos/mml/include/kiihdytin/gpgpu/session.h b/repos/mml/include/kiihdytin/gpgpu/session.h new file mode 100644 index 0000000000..b795823280 --- /dev/null +++ b/repos/mml/include/kiihdytin/gpgpu/session.h @@ -0,0 +1,54 @@ +/* + * \brief Interface definition of a GPU session + * \author Michael Müller + * \date 2022-07-17 + */ + +/* + * Copyright (C) 2022 Michael Müller + * + * This file is distributed under the terms of the + * GNU Affero General Public License version 3. + */ +#pragma once + +#include +#include +#include "vgpu.h" + + namespace Kiihdytin::GPGPU { + class Session; +} + +class Kiihdytin::GPGPU::Session : Genode::Session +{ + + private: + VGpu &vgpu; + + VGpu& create_vgpu(); + PPGTT32& create_ppgtt(); + + + public: + static const char *service_name() { return "Kiihdytin::GPGPU"; } + + enum { CAP_QUOTA = 2 }; // TODO: determine actual cap quota + + Session() : vgpu(create_vgpu()) {} + + /* Backend methods */ + virtual void enqueue_kernel(Kernel &kernel) = 0; + virtual void wait_for_kernel(Kernel &kernel) = 0; + virtual void abort_kernel(Kernel &kernel) = 0; + virtual void remove_kernel(Kernel &kernel) = 0; + + /* RPC interface */ + + GENODE_RPC(Rpc_enqueue_kernel, void, enqueue_kernel, Kernel&); + GENODE_RPC(Rpc_wait_for_kernel, void, wait_for_kernel, Kernel &); + GENODE_RPC(Rpc_abort_kernel, void, abort_kernel, Kernel &); + GENODE_RPC(Rpc_remove_kernel, void, remove_kernel, Kernel &); + + GENODE_RPC_INTERFACE(Rpc_enqueue_kernel, Rpc_remove_kernel, Rpc_wait_for_kernel, Rpc_abort_kernel); +}; \ No newline at end of file diff --git a/repos/mml/include/kiihdytin/gpgpu/session_component.h b/repos/mml/include/kiihdytin/gpgpu/session_component.h new file mode 100644 index 0000000000..7db29a7369 --- /dev/null +++ b/repos/mml/include/kiihdytin/gpgpu/session_component.h @@ -0,0 +1,31 @@ +/* + * \brief RPC object for a GPGPU session + * \author Michael Müller + * \date 2022-07-17 + */ + +/* + * Copyright (C) 2022 Michael Müller + * + * This file is distributed under the terms of the + * GNU Affero General Public License version 3. + */ +#pragma once + +#include +#include +#include "session.h" + +namespace Kiihdytin::GPGPU { + class Session_component; +} + +class Kiihdytin::GPGPU::Session_component : public Genode::Rpc_object +{ + public: + void enqueue_kernel(Kernel &kernel) override; + void wait_for_kernel(Kernel &kernel) override; + void abort_kernel(Kernel &kernel) override; + void remove_kernel(Kernel &kernel) override; + +}; \ No newline at end of file diff --git a/repos/mml/include/kiihdytin/gpgpu/vgpu.h b/repos/mml/include/kiihdytin/gpgpu/vgpu.h new file mode 100644 index 0000000000..b8e33a2619 --- /dev/null +++ b/repos/mml/include/kiihdytin/gpgpu/vgpu.h @@ -0,0 +1,73 @@ +/* + * \brief Representation of a "virtual" GPU, used as abstraction for the real thing. + * \author Michael Müller + * \date 2022-07-15 + */ + +/* + * Copyright (C) 2022 Michael Müller + * + * This file is distributed under the terms of the + * GNU Affero General Public License version 3. + */ +#pragma once + +#include +#include "kernel.h" +#include +#include +#include + + namespace Kiihdytin::GPGPU { + + class Context; + + class VGpu : public Chain + { + private: + // Context _context; TODO: implement context images + PPGTT32 &_ppgtt; + WFQueue _ready_list; + + public: + /** + * @brief Construct a new VGpu object + * + * @param ppgtt - PPGTT mapping phyisical addresses from the client's rm space to gpu addresses + */ + VGpu(PPGTT32 &ppgtt) : _ppgtt(ppgtt) {} + + /** + * @brief Add a kernel to the vGPU's ready list + * + * @param kernel - the kernel object to enqueue + */ + void add_kernel(Kernel &kernel) { + _ready_list.enqueue(&kernel); + } + + /** + * @brief Get saved GPU context for this VGPU + * + * @return GPU context image for this VGPU + * TODO: implement saving the context of the GPU using the GPGPU driver + */ + Context get_context(); + + /** + * @brief Dequeue a kernel from the ready list + * + * @return First kernel image in ready list + */ + Kernel *take_kernel() { return static_cast(_ready_list.dequeue()); } + + /** + * @brief Get the ppgtt object + * + * @return PPGTT + */ + PPGTT32 &get_ppgtt() { return _ppgtt; } + + + }; +} \ No newline at end of file From a8f142eceb2f5347630375a90f764ae00d11e676 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcel=20L=C3=BCtke=20Dreimann?= Date: Wed, 3 Aug 2022 17:31:16 +0200 Subject: [PATCH 12/14] updated ocl for libc version --- repos/hello_gpgpu/src/hello_gpgpu/CL/cl.cc | 226 +++++++++--------- .../src/hello_gpgpu/CL/cl_platform.h | 5 +- 2 files changed, 116 insertions(+), 115 deletions(-) diff --git a/repos/hello_gpgpu/src/hello_gpgpu/CL/cl.cc b/repos/hello_gpgpu/src/hello_gpgpu/CL/cl.cc index e73ecdb28d..6a3d8f1d19 100644 --- a/repos/hello_gpgpu/src/hello_gpgpu/CL/cl.cc +++ b/repos/hello_gpgpu/src/hello_gpgpu/CL/cl.cc @@ -2,7 +2,6 @@ #include "cl.h" #include #include -#define GENODE #include #pragma GCC diagnostic ignored "-Wunused-parameter" @@ -155,21 +154,21 @@ clCreateSubDevices(cl_device_id in_device, cl_device_id * out_devices, cl_uint * num_devices_ret) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } CL_API_ENTRY cl_int CL_API_CALL clRetainDevice(cl_device_id device) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } CL_API_ENTRY cl_int CL_API_CALL clReleaseDevice(cl_device_id device) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -182,7 +181,7 @@ clSetDefaultDeviceCommandQueue(cl_context context, cl_device_id device, cl_command_queue command_queue) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -191,7 +190,7 @@ clGetDeviceAndHostTimer(cl_device_id device, cl_ulong* device_timestamp, cl_ulong* host_timestamp) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -199,7 +198,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetHostTimer(cl_device_id device, cl_ulong * host_timestamp) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -219,12 +218,12 @@ clCreateContext(const cl_context_properties * properties, { if(num_devices != 1 || *devices != 0) { - *errcode_ret = CL_INVALID_VALUE; + *errcode_ret |= CL_INVALID_VALUE; return NULL; } // TODO: RPC: gpgpu_init(); - *errcode_ret = CL_SUCCESS; + *errcode_ret |= CL_SUCCESS; return NULL; } @@ -238,14 +237,14 @@ clCreateContextFromType(const cl_context_properties * properties, void * user_data, cl_int * errcode_ret) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return NULL; } CL_API_ENTRY cl_int CL_API_CALL clRetainContext(cl_context context) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -262,7 +261,7 @@ clGetContextInfo(cl_context context, void * param_value, size_t * param_value_size_ret) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -274,7 +273,7 @@ clSetContextDestructorCallback(cl_context context, void* user_data), void* user_data) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -290,7 +289,7 @@ clCreateCommandQueueWithProperties(cl_context context, const cl_queue_properties * properties, cl_int * errcode_ret) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return NULL; } @@ -299,7 +298,7 @@ clCreateCommandQueueWithProperties(cl_context context, CL_API_ENTRY cl_int CL_API_CALL clRetainCommandQueue(cl_command_queue command_queue) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -316,7 +315,7 @@ clGetCommandQueueInfo(cl_command_queue command_queue, void * param_value, size_t * param_value_size_ret) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -328,11 +327,17 @@ clCreateBuffer(cl_context context, void * host_ptr, cl_int * errcode_ret) { - cl_mem clmem; - genode_allocator->alloc(sizeof(struct _cl_mem), (void**)&clmem); + cl_mem clmem = (cl_mem)genode_allocator->alloc(sizeof(struct _cl_mem)); if(host_ptr == NULL) { - genode_allocator->alloc_aligned(size, &host_ptr, 0x1000); + host_ptr = genode_allocator->alloc_aligned(size, 0x1000).convert( + [&] (void *ptr) { return ptr; }, + + [&] (Genode::Range_allocator::Alloc_error) -> void * { + Genode::error("[OCL] Error clCreateBuffer allocation!"); + return nullptr; + } + ); clmem->ocl_allocated = true; } else @@ -341,9 +346,9 @@ clCreateBuffer(cl_context context, } clmem->bc.buffer = host_ptr; - clmem->bc.buffer_size = size; + clmem->bc.buffer_size = (uint32_t)size; - *errcode_ret = CL_SUCCESS; + *errcode_ret |= CL_SUCCESS; return clmem; } @@ -356,7 +361,7 @@ clCreateSubBuffer(cl_mem buffer, const void * buffer_create_info, cl_int * errcode_ret) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return NULL; } @@ -372,7 +377,7 @@ clCreateImage(cl_context context, void * host_ptr, cl_int * errcode_ret) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return NULL; } @@ -388,7 +393,7 @@ clCreatePipe(cl_context context, const cl_pipe_properties * properties, cl_int * errcode_ret) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return NULL; } @@ -404,7 +409,7 @@ clCreateBufferWithProperties(cl_context context, void * host_ptr, cl_int * errcode_ret) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return NULL; } @@ -417,7 +422,7 @@ clCreateImageWithProperties(cl_context context, void * host_ptr, cl_int * errcode_ret) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return NULL; } @@ -426,7 +431,7 @@ clCreateImageWithProperties(cl_context context, CL_API_ENTRY cl_int CL_API_CALL clRetainMemObject(cl_mem memobj) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -449,7 +454,7 @@ clGetSupportedImageFormats(cl_context context, cl_image_format * image_formats, cl_uint * num_image_formats) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -460,7 +465,7 @@ clGetMemObjectInfo(cl_mem memobj, void * param_value, size_t * param_value_size_ret) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -471,7 +476,7 @@ clGetImageInfo(cl_mem image, void * param_value, size_t * param_value_size_ret) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -484,7 +489,7 @@ clGetPipeInfo(cl_mem pipe, void * param_value, size_t * param_value_size_ret) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -498,7 +503,7 @@ clSetMemObjectDestructorCallback(cl_mem memobj, void * user_data), void * user_data) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -514,7 +519,7 @@ clSVMAlloc(cl_context context, size_t size, cl_uint alignment) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return NULL; } @@ -522,7 +527,7 @@ CL_API_ENTRY void CL_API_CALL clSVMFree(cl_context context, void * svm_pointer) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); } #endif @@ -536,7 +541,7 @@ clCreateSamplerWithProperties(cl_context context, const cl_sampler_properties * sampler_properties, cl_int * errcode_ret) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return NULL; } @@ -545,14 +550,14 @@ clCreateSamplerWithProperties(cl_context context, CL_API_ENTRY cl_int CL_API_CALL clRetainSampler(cl_sampler sampler) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } CL_API_ENTRY cl_int CL_API_CALL clReleaseSampler(cl_sampler sampler) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -563,7 +568,7 @@ clGetSamplerInfo(cl_sampler sampler, void * param_value, size_t * param_value_size_ret) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -575,7 +580,7 @@ clCreateProgramWithSource(cl_context context, const size_t * lengths, cl_int * errcode_ret) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return NULL; } @@ -590,11 +595,11 @@ clCreateProgramWithBinary(cl_context context, { if(*device_list != 0x0) { - *errcode_ret = CL_INVALID_VALUE; + *errcode_ret |= CL_INVALID_VALUE; return NULL; } - *errcode_ret = CL_SUCCESS; + *errcode_ret |= CL_SUCCESS; return (cl_program)binaries[0]; } @@ -607,7 +612,7 @@ clCreateProgramWithBuiltInKernels(cl_context context, const char * kernel_names, cl_int * errcode_ret) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return NULL; } @@ -621,7 +626,7 @@ clCreateProgramWithIL(cl_context context, size_t length, cl_int* errcode_ret) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return NULL; } @@ -630,7 +635,7 @@ clCreateProgramWithIL(cl_context context, CL_API_ENTRY cl_int CL_API_CALL clRetainProgram(cl_program program) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -667,7 +672,7 @@ clCompileProgram(cl_program program, void * user_data), void * user_data) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -683,7 +688,7 @@ clLinkProgram(cl_context context, void * user_data, cl_int * errcode_ret) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return NULL; } @@ -697,7 +702,7 @@ clSetProgramReleaseCallback(cl_program program, void * user_data), void * user_data) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -707,7 +712,7 @@ clSetProgramSpecializationConstant(cl_program program, size_t spec_size, const void* spec_value) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -718,7 +723,7 @@ clSetProgramSpecializationConstant(cl_program program, CL_API_ENTRY cl_int CL_API_CALL clUnloadPlatformCompiler(cl_platform_id platform) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -731,7 +736,7 @@ clGetProgramInfo(cl_program program, void * param_value, size_t * param_value_size_ret) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -743,7 +748,7 @@ clGetProgramBuildInfo(cl_program program, void * param_value, size_t * param_value_size_ret) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -754,17 +759,16 @@ clCreateKernel(cl_program program, cl_int * errcode_ret) { // create kernel and set binary - struct kernel_config* kc; - genode_allocator->alloc(sizeof(struct kernel_config), (void**)&kc); + struct kernel_config* kc = (struct kernel_config*)genode_allocator->alloc(sizeof(struct kernel_config)); kc->binary = (uint8_t*)program; // preallocated 32 buff configs; - genode_allocator->alloc(32 * sizeof(struct buffer_config), (void**)&kc->buffConfigs); + kc->buffConfigs = (struct buffer_config*)genode_allocator->alloc(32 * sizeof(struct buffer_config)); // set name kc->kernelName = (char*)kernel_name; - *errcode_ret = CL_SUCCESS; + *errcode_ret |= CL_SUCCESS; return (cl_kernel)kc; } @@ -774,7 +778,7 @@ clCreateKernelsInProgram(cl_program program, cl_kernel * kernels, cl_uint * num_kernels_ret) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -784,7 +788,7 @@ CL_API_ENTRY cl_kernel CL_API_CALL clCloneKernel(cl_kernel source_kernel, cl_int* errcode_ret) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return NULL; } @@ -793,7 +797,7 @@ clCloneKernel(cl_kernel source_kernel, CL_API_ENTRY cl_int CL_API_CALL clRetainKernel(cl_kernel kernel) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -826,13 +830,13 @@ clSetKernelArg(cl_kernel kernel, { struct buffer_config bc; bc.buffer = (void*)arg_value; - bc.buffer_size = arg_size; + bc.buffer_size = (uint32_t)arg_size; bc.non_pointer_type = true; kc->buffConfigs[arg_index] = bc; } if(kc->buffCount < (arg_index + 1)) - kc->buffCount = (arg_index + 1); + kc->buffCount = (uint8_t)(arg_index + 1); return CL_SUCCESS; } @@ -844,7 +848,7 @@ clSetKernelArgSVMPointer(cl_kernel kernel, cl_uint arg_index, const void * arg_value) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -854,7 +858,7 @@ clSetKernelExecInfo(cl_kernel kernel, size_t param_value_size, const void * param_value) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -867,7 +871,7 @@ clGetKernelInfo(cl_kernel kernel, void * param_value, size_t * param_value_size_ret) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -881,7 +885,7 @@ clGetKernelArgInfo(cl_kernel kernel, void * param_value, size_t * param_value_size_ret) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -895,7 +899,7 @@ clGetKernelWorkGroupInfo(cl_kernel kernel, void * param_value, size_t * param_value_size_ret) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -911,7 +915,7 @@ clGetKernelSubGroupInfo(cl_kernel kernel, void* param_value, size_t* param_value_size_ret) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -922,7 +926,7 @@ CL_API_ENTRY cl_int CL_API_CALL clWaitForEvents(cl_uint num_events, const cl_event * event_list) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -933,7 +937,7 @@ clGetEventInfo(cl_event event, void * param_value, size_t * param_value_size_ret) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -943,7 +947,7 @@ CL_API_ENTRY cl_event CL_API_CALL clCreateUserEvent(cl_context context, cl_int * errcode_ret) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return NULL; } @@ -952,14 +956,14 @@ clCreateUserEvent(cl_context context, CL_API_ENTRY cl_int CL_API_CALL clRetainEvent(cl_event event) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } CL_API_ENTRY cl_int CL_API_CALL clReleaseEvent(cl_event event) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -969,7 +973,7 @@ CL_API_ENTRY cl_int CL_API_CALL clSetUserEventStatus(cl_event event, cl_int execution_status) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -981,7 +985,7 @@ clSetEventCallback(cl_event event, void * user_data), void * user_data) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -995,7 +999,7 @@ clGetEventProfilingInfo(cl_event event, void * param_value, size_t * param_value_size_ret) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -1058,7 +1062,7 @@ clEnqueueReadBufferRect(cl_command_queue command_queue, const cl_event * event_wait_list, cl_event * event) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -1108,7 +1112,7 @@ clEnqueueWriteBufferRect(cl_command_queue command_queue, const cl_event * event_wait_list, cl_event * event) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -1127,7 +1131,7 @@ clEnqueueFillBuffer(cl_command_queue command_queue, const cl_event * event_wait_list, cl_event * event) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -1144,7 +1148,7 @@ clEnqueueCopyBuffer(cl_command_queue command_queue, const cl_event * event_wait_list, cl_event * event) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -1165,7 +1169,7 @@ clEnqueueCopyBufferRect(cl_command_queue command_queue, const cl_event * event_wait_list, cl_event * event) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -1184,7 +1188,7 @@ clEnqueueReadImage(cl_command_queue command_queue, const cl_event * event_wait_list, cl_event * event) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -1201,7 +1205,7 @@ clEnqueueWriteImage(cl_command_queue command_queue, const cl_event * event_wait_list, cl_event * event) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -1217,7 +1221,7 @@ clEnqueueFillImage(cl_command_queue command_queue, const cl_event * event_wait_list, cl_event * event) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -1234,7 +1238,7 @@ clEnqueueCopyImage(cl_command_queue command_queue, const cl_event * event_wait_list, cl_event * event) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -1249,7 +1253,7 @@ clEnqueueCopyImageToBuffer(cl_command_queue command_queue, const cl_event * event_wait_list, cl_event * event) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -1264,7 +1268,7 @@ clEnqueueCopyBufferToImage(cl_command_queue command_queue, const cl_event * event_wait_list, cl_event * event) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -1280,7 +1284,7 @@ clEnqueueMapBuffer(cl_command_queue command_queue, cl_event * event, cl_int * errcode_ret) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return NULL; } @@ -1298,7 +1302,7 @@ clEnqueueMapImage(cl_command_queue command_queue, cl_event * event, cl_int * errcode_ret) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return NULL; } @@ -1310,7 +1314,7 @@ clEnqueueUnmapMemObject(cl_command_queue command_queue, const cl_event * event_wait_list, cl_event * event) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -1325,7 +1329,7 @@ clEnqueueMigrateMemObjects(cl_command_queue command_queue, const cl_event * event_wait_list, cl_event * event) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -1345,10 +1349,10 @@ clEnqueueNDRangeKernel(cl_command_queue command_queue, struct kernel_config* kc = (struct kernel_config*)kernel; for(cl_uint i = 0; i < work_dim; i++) { - kc->range[i] = global_work_size[i]; + kc->range[i] = (uint32_t)global_work_size[i]; if(local_work_size != NULL) { - kc->workgroupsize[i] = local_work_size[i]; + kc->workgroupsize[i] = (uint32_t)local_work_size[i]; } } // TODO: RPC: gpgpu_enqueueRun(kc); @@ -1367,7 +1371,7 @@ clEnqueueNativeKernel(cl_command_queue command_queue, const cl_event * event_wait_list, cl_event * event) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -1379,7 +1383,7 @@ clEnqueueMarkerWithWaitList(cl_command_queue command_queue, const cl_event * event_wait_list, cl_event * event) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -1389,7 +1393,7 @@ clEnqueueBarrierWithWaitList(cl_command_queue command_queue, const cl_event * event_wait_list, cl_event * event) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -1410,7 +1414,7 @@ clEnqueueSVMFree(cl_command_queue command_queue, const cl_event * event_wait_list, cl_event * event) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -1424,7 +1428,7 @@ clEnqueueSVMMemcpy(cl_command_queue command_queue, const cl_event * event_wait_list, cl_event * event) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -1438,7 +1442,7 @@ clEnqueueSVMMemFill(cl_command_queue command_queue, const cl_event * event_wait_list, cl_event * event) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -1452,7 +1456,7 @@ clEnqueueSVMMap(cl_command_queue command_queue, const cl_event * event_wait_list, cl_event * event) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -1463,7 +1467,7 @@ clEnqueueSVMUnmap(cl_command_queue command_queue, const cl_event * event_wait_list, cl_event * event) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -1481,7 +1485,7 @@ clEnqueueSVMMigrateMem(cl_command_queue command_queue, const cl_event * event_wait_list, cl_event * event) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -1500,7 +1504,7 @@ CL_API_ENTRY void * CL_API_CALL clGetExtensionFunctionAddressForPlatform(cl_platform_id platform, const char * func_name) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return NULL; } @@ -1535,7 +1539,7 @@ clCreateImage2D(cl_context context, void * host_ptr, cl_int * errcode_ret) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return NULL; } @@ -1551,7 +1555,7 @@ clCreateImage3D(cl_context context, void * host_ptr, cl_int * errcode_ret) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return NULL; } @@ -1559,7 +1563,7 @@ CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL clEnqueueMarker(cl_command_queue command_queue, cl_event * event) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -1568,7 +1572,7 @@ clEnqueueWaitForEvents(cl_command_queue command_queue, cl_uint num_events, const cl_event * event_list) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } @@ -1581,14 +1585,14 @@ clEnqueueBarrier(cl_command_queue command_queue) CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL clUnloadCompiler(void) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED void * CL_API_CALL clGetExtensionFunctionAddress(const char * func_name) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return NULL; } @@ -1601,11 +1605,11 @@ clCreateCommandQueue(cl_context context, { if(device != 0) { - *errcode_ret = CL_INVALID_VALUE; + *errcode_ret |= CL_INVALID_VALUE; return NULL; } - *errcode_ret = CL_SUCCESS; + *errcode_ret |= CL_SUCCESS; return NULL; } @@ -1616,7 +1620,7 @@ clCreateSampler(cl_context context, cl_filter_mode filter_mode, cl_int * errcode_ret) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return NULL; } @@ -1627,7 +1631,7 @@ clEnqueueTask(cl_command_queue command_queue, const cl_event * event_wait_list, cl_event * event) { - Genode::log("[OCL] func ", __func__, " is not implemented!"); + Genode::error("[OCL] func ", __func__, " is not implemented!"); return CL_INVALID_VALUE; } diff --git a/repos/hello_gpgpu/src/hello_gpgpu/CL/cl_platform.h b/repos/hello_gpgpu/src/hello_gpgpu/CL/cl_platform.h index 16a2173d18..f37cdc5771 100644 --- a/repos/hello_gpgpu/src/hello_gpgpu/CL/cl_platform.h +++ b/repos/hello_gpgpu/src/hello_gpgpu/CL/cl_platform.h @@ -264,10 +264,7 @@ typedef double cl_double; #else -// genode stdint -#include -typedef __INTPTR_TYPE__ intptr_t; -using namespace Genode; +#include /* scalar types */ typedef int8_t cl_char; From f3305ee5e1516665a442f8558b3cf49dd6178e22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcel=20L=C3=BCtke=20Dreimann?= Date: Wed, 3 Aug 2022 17:35:40 +0200 Subject: [PATCH 13/14] added libc support and 2mm --- repos/hello_gpgpu/run/hello_gpgpu.run | 26 +- repos/hello_gpgpu/src/hello_gpgpu/2mm.cc | 418 +++++++++++++ repos/hello_gpgpu/src/hello_gpgpu/2mm.h | 73 +++ .../hello_gpgpu/src/hello_gpgpu/2mm_kernel.h | 570 ++++++++++++++++++ repos/hello_gpgpu/src/hello_gpgpu/main.cc | 21 +- .../hello_gpgpu/src/hello_gpgpu/polybench.cc | 402 ++++++++++++ repos/hello_gpgpu/src/hello_gpgpu/polybench.h | 202 +++++++ .../src/hello_gpgpu/polybenchUtilFuncts.h | 36 ++ repos/hello_gpgpu/src/hello_gpgpu/target.mk | 6 +- repos/hello_gpgpu/src/hello_gpgpu/test.cc | 6 +- 10 files changed, 1746 insertions(+), 14 deletions(-) create mode 100644 repos/hello_gpgpu/src/hello_gpgpu/2mm.cc create mode 100644 repos/hello_gpgpu/src/hello_gpgpu/2mm.h create mode 100644 repos/hello_gpgpu/src/hello_gpgpu/2mm_kernel.h create mode 100644 repos/hello_gpgpu/src/hello_gpgpu/polybench.cc create mode 100644 repos/hello_gpgpu/src/hello_gpgpu/polybench.h create mode 100644 repos/hello_gpgpu/src/hello_gpgpu/polybenchUtilFuncts.h diff --git a/repos/hello_gpgpu/run/hello_gpgpu.run b/repos/hello_gpgpu/run/hello_gpgpu.run index 7d9efb52f0..7446aee476 100644 --- a/repos/hello_gpgpu/run/hello_gpgpu.run +++ b/repos/hello_gpgpu/run/hello_gpgpu.run @@ -2,7 +2,7 @@ # Build # -build { core init gpgpu hello_gpgpu } +build { core init gpgpu timer hello_gpgpu } create_boot_directory @@ -17,17 +17,31 @@ install_config { + + + - + + + + + + + + - + + + 2022-07-20 14:30 + + } @@ -35,8 +49,8 @@ install_config { # Boot image # -build_boot_image { core ld.lib.so init gpgpu hello_gpgpu } +build_boot_image { core ld.lib.so libc.lib.so vfs.lib.so init gpgpu timer hello_gpgpu } -append qemu_args " -nographic " +append qemu_args " -nographic -m 4G" -run_genode_until "hello gpgpu completed.*\n" 10 +run_genode_until "hello gpgpu completed.*\n" 15 diff --git a/repos/hello_gpgpu/src/hello_gpgpu/2mm.cc b/repos/hello_gpgpu/src/hello_gpgpu/2mm.cc new file mode 100644 index 0000000000..96e7390760 --- /dev/null +++ b/repos/hello_gpgpu/src/hello_gpgpu/2mm.cc @@ -0,0 +1,418 @@ +/** + * 2mm.c: This file is part of the PolyBench/GPU 1.0 test suite. + * + * + * Contact: Scott Grauer-Gray + * Will Killian + * Louis-Noel Pouchet + * Web address: http://www.cse.ohio-state.edu/~pouchet/software/polybench/GPU + */ + + +#include +#include +#include +#include +#include + +#define CL_TARGET_OPENCL_VERSION 100 +#ifdef __APPLE__ +#include +#else +#include "CL/cl.h" +#endif + +#define POLYBENCH_TIME 1 + +//select the OpenCL device to use (can be GPU, CPU, or Accelerator such as Intel Xeon Phi) +#define OPENCL_DEVICE_SELECTION CL_DEVICE_TYPE_GPU + +#include "2mm.h" +#include "polybench.h" +#include "polybenchUtilFuncts.h" + +//define the error threshold for the results "not matching" +#define PERCENT_DIFF_ERROR_THRESHOLD 1.05 + +#define MAX_SOURCE_SIZE (0x100000) + + +#if defined(cl_khr_fp64) // Khronos extension available? +#pragma OPENCL EXTENSION cl_khr_fp64 : enable +#elif defined(cl_amd_fp64) // AMD extension available? +#pragma OPENCL EXTENSION cl_amd_fp64 : enable +#endif + +#include "2mm_kernel.h" + +char str_temp[1024]; + +cl_platform_id platform_id; +cl_device_id device_id; +cl_uint num_devices; +cl_uint num_platforms; +cl_int errcode; +cl_context clGPUContext; +cl_kernel clKernel1; +cl_kernel clKernel2; +cl_command_queue clCommandQue; +cl_program clProgram; +cl_mem tmp_mem_obj; +cl_mem a_mem_obj; +cl_mem b_mem_obj; +cl_mem c_mem_obj; +cl_mem dOutputFromGpu_mem_obj; + +FILE *fp; +char *source_str; +size_t source_size; + +#define RUN_ON_CPU + + +void compareResults(int ni, int nl, DATA_TYPE POLYBENCH_2D(D, NI, NL, ni, nl), DATA_TYPE POLYBENCH_2D(D_outputFromGpu, NI, NL, ni, nl)) +{ + int i,j,fail; + fail = 0; + + for (i=0; i < ni; i++) + { + for (j=0; j < nl; j++) + { + if (percentDiff(D[i][j], D_outputFromGpu[i][j]) > PERCENT_DIFF_ERROR_THRESHOLD) + { + fail++; + } + } + } + + // print results + printf("Non-Matching CPU-GPU Outputs Beyond Error Threshold of %4.2f Percent: %d\n", PERCENT_DIFF_ERROR_THRESHOLD, fail); +} + + +void read_cl_file() +{ + // Load the kernel source code into the array source_str + /*fp = fopen("2mm.cl", "r"); + if (!fp) { + fprintf(stderr, "Failed to load kernel.\n"); + exit(1); + } + source_str = (char*)malloc(MAX_SOURCE_SIZE); + source_size = fread( source_str, 1, MAX_SOURCE_SIZE, fp); + fclose( fp );*/ +} + + +void init_array(int ni, int nj, int nk, int nl, DATA_TYPE *alpha, DATA_TYPE *beta, DATA_TYPE POLYBENCH_2D(A, NI, NK, ni, nk), + DATA_TYPE POLYBENCH_2D(B, NK, NJ, nk, nj), DATA_TYPE POLYBENCH_2D(C, NL, NJ, nl, nj), + DATA_TYPE POLYBENCH_2D(D, NI, NL, ni, nl), DATA_TYPE POLYBENCH_2D(Dgpu, NI, NL, ni, nl)) +{ + int i, j; + + *alpha = 32412; + *beta = 2123; + + for (i = 0; i < ni; i++) + { + for (j = 0; j < nk; j++) + { + A[i][j] = ((DATA_TYPE) (i*j)) / NI; + } + } + + for (i = 0; i < nk; i++) + { + for (j = 0; j < nj; j++) + { + B[i][j] = ((DATA_TYPE) (i*(j+1))) / NJ; + } + } + + for (i = 0; i < nl; i++) + { + for (j = 0; j < nj; j++) + { + C[i][j] = ((DATA_TYPE) (i*(j+3))) / NL; + } + } + + for (i = 0; i < ni; i++) + { + for (j = 0; j < nl; j++) + { + D[i][j] = ((DATA_TYPE) (i*(j+2))) / NK; + Dgpu[i][j] = ((DATA_TYPE) (i*(j+2))) / NK; + } + } +} + + +void cl_initialization() +{ + // Get platform and device information + errcode = clGetPlatformIDs(1, &platform_id, &num_platforms); + if(errcode == CL_SUCCESS) printf("number of platforms is %d\n",num_platforms); + else printf("Error getting platform IDs\n"); + + errcode = clGetPlatformInfo(platform_id,CL_PLATFORM_NAME, sizeof(str_temp), str_temp,NULL); + if(errcode == CL_SUCCESS) printf("platform name is %s\n",str_temp); + else printf("Error getting platform name\n"); + + errcode = clGetPlatformInfo(platform_id, CL_PLATFORM_VERSION, sizeof(str_temp), str_temp,NULL); + if(errcode == CL_SUCCESS) printf("platform version is %s\n",str_temp); + else printf("Error getting platform version\n"); + + errcode = clGetDeviceIDs( platform_id, OPENCL_DEVICE_SELECTION, 1, &device_id, &num_devices); + if(errcode == CL_SUCCESS) printf("number of devices is %d\n", num_devices); + else printf("Error getting device IDs\n"); + + errcode = clGetDeviceInfo(device_id,CL_DEVICE_NAME, sizeof(str_temp), str_temp,NULL); + if(errcode == CL_SUCCESS) printf("device name is %s\n",str_temp); + else printf("Error getting device name\n"); + + // Create an OpenCL context + clGPUContext = clCreateContext( NULL, 1, &device_id, NULL, NULL, &errcode); + if(errcode != CL_SUCCESS) printf("Error in creating context\n"); + + //Create a command-queue + clCommandQue = clCreateCommandQueue(clGPUContext, device_id, 0, &errcode); + if(errcode != CL_SUCCESS) printf("Error in creating command queue\n"); +} + + +void cl_mem_init(DATA_TYPE POLYBENCH_2D(tmp, NI,NJ,ni,nj), DATA_TYPE POLYBENCH_2D(A, NI,NK,ni,nk), DATA_TYPE POLYBENCH_2D(B, NK,NJ,nk,nj), + DATA_TYPE POLYBENCH_2D(C, NL,NJ,nl,nj), DATA_TYPE POLYBENCH_2D(D_outputFromGpu,NI,NL,ni,nl)) +{ + tmp_mem_obj = clCreateBuffer(clGPUContext, CL_MEM_READ_WRITE, sizeof(DATA_TYPE) * NI * NJ, NULL, &errcode); + a_mem_obj = clCreateBuffer(clGPUContext, CL_MEM_READ_ONLY, sizeof(DATA_TYPE) * NI * NK, NULL, &errcode); + b_mem_obj = clCreateBuffer(clGPUContext, CL_MEM_READ_ONLY, sizeof(DATA_TYPE) * NK * NJ, NULL, &errcode); + c_mem_obj = clCreateBuffer(clGPUContext, CL_MEM_READ_WRITE, sizeof(DATA_TYPE) * NL * NJ, NULL, &errcode); + dOutputFromGpu_mem_obj = clCreateBuffer(clGPUContext, CL_MEM_READ_WRITE, sizeof(DATA_TYPE) * NI * NL, NULL, &errcode); + if(errcode != CL_SUCCESS) printf("Error in creating buffers\n"); + + errcode = clEnqueueWriteBuffer(clCommandQue, tmp_mem_obj, CL_TRUE, 0, sizeof(DATA_TYPE) * NI * NJ, tmp, 0, NULL, NULL); + errcode = clEnqueueWriteBuffer(clCommandQue, a_mem_obj, CL_TRUE, 0, sizeof(DATA_TYPE) * NI * NK, A, 0, NULL, NULL); + errcode = clEnqueueWriteBuffer(clCommandQue, b_mem_obj, CL_TRUE, 0, sizeof(DATA_TYPE) * NK * NJ, B, 0, NULL, NULL); + errcode = clEnqueueWriteBuffer(clCommandQue, c_mem_obj, CL_TRUE, 0, sizeof(DATA_TYPE) * NL * NJ, C, 0, NULL, NULL); + errcode = clEnqueueWriteBuffer(clCommandQue, dOutputFromGpu_mem_obj, CL_TRUE, 0, sizeof(DATA_TYPE) * NI * NL, D_outputFromGpu, 0, NULL, NULL); + if(errcode != CL_SUCCESS)printf("Error in writing buffers\n"); +} + + +void cl_load_prog() +{ + // Create a program from the kernel source + const size_t kernel_size = __2mm_Gen9core_gen_len; + const unsigned char* kernel_bin = __2mm_Gen9core_gen; + clProgram = clCreateProgramWithBinary(clGPUContext, 1, &device_id, &kernel_size, &kernel_bin, NULL, &errcode); + //clProgram = clCreateProgramWithSource(clGPUContext, 1, (const char **)&source_str, (const size_t *)&source_size, &errcode); + + if(errcode != CL_SUCCESS) printf("Error in creating program\n"); + + // Build the program + errcode = clBuildProgram(clProgram, 1, &device_id, NULL, NULL, NULL); + if(errcode != CL_SUCCESS) printf("Error in building program\n"); + + // Create the OpenCL kernel + clKernel1 = clCreateKernel(clProgram, "mm2_kernel1", &errcode); + if(errcode != CL_SUCCESS) printf("Error in creating kernel\n"); + + clKernel2 = clCreateKernel(clProgram, "mm2_kernel2", &errcode); + if(errcode != CL_SUCCESS) printf("Error in creating kernel\n"); + clFinish(clCommandQue); +} + + +void cl_launch_kernel(int ni, int nj, int nk, int nl, DATA_TYPE alpha, DATA_TYPE beta) +{ + size_t localWorkSize[2], globalWorkSize[2]; + localWorkSize[0] = DIM_LOCAL_WORK_GROUP_X; + localWorkSize[1] = DIM_LOCAL_WORK_GROUP_Y; + globalWorkSize[0] = (size_t)ceil(((float)NI) / ((float)DIM_LOCAL_WORK_GROUP_X)) * DIM_LOCAL_WORK_GROUP_X; + globalWorkSize[1] = (size_t)ceil(((float)NL) / ((float)DIM_LOCAL_WORK_GROUP_Y)) * DIM_LOCAL_WORK_GROUP_Y; + + /* Start timer. */ + polybench_start_instruments; + + // Set the arguments of the kernel + errcode = clSetKernelArg(clKernel1, 0, sizeof(cl_mem), (void *)&tmp_mem_obj); + errcode |= clSetKernelArg(clKernel1, 1, sizeof(cl_mem), (void *)&a_mem_obj); + errcode |= clSetKernelArg(clKernel1, 2, sizeof(cl_mem), (void *)&b_mem_obj); + errcode |= clSetKernelArg(clKernel1, 3, sizeof(int), (void *)&ni); + errcode |= clSetKernelArg(clKernel1, 4, sizeof(int), (void *)&nj); + errcode |= clSetKernelArg(clKernel1, 5, sizeof(int), (void *)&nk); + errcode |= clSetKernelArg(clKernel1, 6, sizeof(int), (void *)&nl); + errcode |= clSetKernelArg(clKernel1, 7, sizeof(DATA_TYPE), (void *)&alpha); + errcode |= clSetKernelArg(clKernel1, 8, sizeof(DATA_TYPE), (void *)&beta); + if(errcode != CL_SUCCESS) printf("Error in seting arguments\n"); + // Execute the OpenCL kernel + errcode = clEnqueueNDRangeKernel(clCommandQue, clKernel1, 2, NULL, globalWorkSize, localWorkSize, 0, NULL, NULL); + if(errcode != CL_SUCCESS) printf("Error in launching kernel\n"); + clEnqueueBarrier(clCommandQue); + + globalWorkSize[0] = (size_t)ceil(((float)NI) / ((float)DIM_LOCAL_WORK_GROUP_X)) * DIM_LOCAL_WORK_GROUP_X; + globalWorkSize[1] = (size_t)ceil(((float)NL) / ((float)DIM_LOCAL_WORK_GROUP_Y)) * DIM_LOCAL_WORK_GROUP_Y; + + errcode = clSetKernelArg(clKernel2, 0, sizeof(cl_mem), (void *)&tmp_mem_obj); + errcode |= clSetKernelArg(clKernel2, 1, sizeof(cl_mem), (void *)&c_mem_obj); + errcode |= clSetKernelArg(clKernel2, 2, sizeof(cl_mem), (void *)&dOutputFromGpu_mem_obj); + errcode |= clSetKernelArg(clKernel2, 3, sizeof(int), (void *)&ni); + errcode |= clSetKernelArg(clKernel2, 4, sizeof(int), (void *)&nj); + errcode |= clSetKernelArg(clKernel2, 5, sizeof(int), (void *)&nk); + errcode |= clSetKernelArg(clKernel2, 6, sizeof(int), (void *)&nl); + errcode |= clSetKernelArg(clKernel2, 7, sizeof(DATA_TYPE), (void *)&alpha); + errcode |= clSetKernelArg(clKernel2, 8, sizeof(DATA_TYPE), (void *)&beta); + if(errcode != CL_SUCCESS) printf("Error in seting arguments\n"); + + // Execute the OpenCL kernel + errcode = clEnqueueNDRangeKernel(clCommandQue, clKernel2, 2, NULL, globalWorkSize, localWorkSize, 0, NULL, NULL); + if(errcode != CL_SUCCESS) printf("Error in launching kernel\n"); + clFinish(clCommandQue); + + /* Stop and print timer. */ + printf("GPU Time in seconds:\n"); + polybench_stop_instruments; + polybench_print_instruments; +} + + +void cl_clean_up() +{ + // Clean up + errcode = clFlush(clCommandQue); + errcode = clFinish(clCommandQue); + errcode = clReleaseKernel(clKernel1); + errcode = clReleaseKernel(clKernel2); + errcode = clReleaseProgram(clProgram); + errcode = clReleaseMemObject(tmp_mem_obj); + errcode = clReleaseMemObject(a_mem_obj); + errcode = clReleaseMemObject(b_mem_obj); + errcode = clReleaseMemObject(c_mem_obj); + errcode = clReleaseMemObject(dOutputFromGpu_mem_obj); + errcode = clReleaseCommandQueue(clCommandQue); + errcode = clReleaseContext(clGPUContext); + if(errcode != CL_SUCCESS) printf("Error in cleanup\n"); +} + + +void mm2_cpu(int ni, int nj, int nk, int nl, + DATA_TYPE alpha, + DATA_TYPE beta, + DATA_TYPE POLYBENCH_2D(tmp,NI,NJ,ni,nj), + DATA_TYPE POLYBENCH_2D(A,NI,NK,ni,nk), + DATA_TYPE POLYBENCH_2D(B,NK,NJ,nk,nj), + DATA_TYPE POLYBENCH_2D(C,NL,NJ,nl,nj), + DATA_TYPE POLYBENCH_2D(D,NI,NL,ni,nl)) +{ + int i, j, k; + + /* D := alpha*A*B*C + beta*D */ + for (i = 0; i < _PB_NI; i++) + { + for (j = 0; j < _PB_NJ; j++) + { + tmp[i][j] = 0; + for (k = 0; k < _PB_NK; ++k) + { + tmp[i][j] += alpha * A[i][k] * B[k][j]; + } + } + } + + for (i = 0; i < _PB_NI; i++) + { + for (j = 0; j < _PB_NL; j++) + { + D[i][j] *= beta; + for (k = 0; k < _PB_NJ; ++k) + { + D[i][j] += tmp[i][k] * C[k][j]; + } + } + } +} + + +/* DCE code. Must scan the entire live-out data. + Can be used also to check the correctness of the output. */ +static +void print_array(int ni, int nl, + DATA_TYPE POLYBENCH_2D(D,NI,NL,ni,nl)) +{ + int i, j; + + for (i = 0; i < ni; i++) + for (j = 0; j < nl; j++) { + fprintf (stderr, DATA_PRINTF_MODIFIER, D[i][j]); + if ((i * ni + j) % 20 == 0) fprintf (stderr, "\n"); + } + fprintf (stderr, "\n"); +} + +int main(int argc, char *argv[]) +{ + /* Retrieve problem size. */ + int ni = NI; + int nj = NJ; + int nk = NK; + int nl = NL; + + /* Variable declaration/allocation. */ + DATA_TYPE alpha; + DATA_TYPE beta; + POLYBENCH_2D_ARRAY_DECL(tmp,DATA_TYPE,NI,NJ,ni,nj); + POLYBENCH_2D_ARRAY_DECL(A,DATA_TYPE,NI,NK,ni,nk); + POLYBENCH_2D_ARRAY_DECL(B,DATA_TYPE,NK,NJ,nk,nj); + POLYBENCH_2D_ARRAY_DECL(C,DATA_TYPE,NL,NJ,nl,nj); + POLYBENCH_2D_ARRAY_DECL(D,DATA_TYPE,NI,NL,ni,nl); + POLYBENCH_2D_ARRAY_DECL(D_outputFromGpu,DATA_TYPE,NI,NL,ni,nl); + + /* Initialize array(s). */ + init_array(ni, nj, nk, nl, &alpha, &beta, POLYBENCH_ARRAY(A), POLYBENCH_ARRAY(B), POLYBENCH_ARRAY(C), POLYBENCH_ARRAY(D), POLYBENCH_ARRAY(D_outputFromGpu)); + + read_cl_file(); + cl_initialization(); + cl_mem_init(POLYBENCH_ARRAY(tmp), POLYBENCH_ARRAY(A), POLYBENCH_ARRAY(B), POLYBENCH_ARRAY(C), POLYBENCH_ARRAY(D_outputFromGpu)); + cl_load_prog(); + + cl_launch_kernel(ni, nj, nk, nl, alpha, beta); + + errcode = clEnqueueReadBuffer(clCommandQue, dOutputFromGpu_mem_obj, CL_TRUE, 0, sizeof(DATA_TYPE) * NI * NL, POLYBENCH_ARRAY(D_outputFromGpu), 0, NULL, NULL); + if(errcode != CL_SUCCESS) printf("Error in reading GPU mem\n"); + + + #ifdef RUN_ON_CPU + + /* Start timer. */ + polybench_start_instruments; + + mm2_cpu(ni, nj, nk, nl, alpha, beta, POLYBENCH_ARRAY(tmp), POLYBENCH_ARRAY(A), POLYBENCH_ARRAY(B), POLYBENCH_ARRAY(C), POLYBENCH_ARRAY(D)); + + /* Stop and print timer. */ + printf("CPU Time in seconds:\n"); + polybench_stop_instruments; + polybench_print_instruments; + + compareResults(ni, nl, POLYBENCH_ARRAY(D), POLYBENCH_ARRAY(D_outputFromGpu)); + + #else //prevent dead code elimination + + polybench_prevent_dce(print_array(ni, nl, POLYBENCH_ARRAY(D_outputFromGpu))); + + #endif //RUN_ON_CPU + + cl_clean_up(); + + POLYBENCH_FREE_ARRAY(tmp); + POLYBENCH_FREE_ARRAY(A); + POLYBENCH_FREE_ARRAY(B); + POLYBENCH_FREE_ARRAY(C); + POLYBENCH_FREE_ARRAY(D); + POLYBENCH_FREE_ARRAY(D_outputFromGpu); + + return 0; +} + +#include "polybench.cc" \ No newline at end of file diff --git a/repos/hello_gpgpu/src/hello_gpgpu/2mm.h b/repos/hello_gpgpu/src/hello_gpgpu/2mm.h new file mode 100644 index 0000000000..3f80f30cc0 --- /dev/null +++ b/repos/hello_gpgpu/src/hello_gpgpu/2mm.h @@ -0,0 +1,73 @@ +/** + * 2mm.h: This file is part of the PolyBench/GPU 1.0 test suite. + * + * + * Contact: Scott Grauer-Gray + * Will Killian + * Louis-Noel Pouchet + * Web address: http://www.cse.ohio-state.edu/~pouchet/software/polybench/GPU + */ + +#ifndef TWOMM_H +# define TWOMM_H + +/* Default to STANDARD_DATASET. */ +# if !defined(MINI_DATASET) && !defined(SMALL_DATASET) && !defined(LARGE_DATASET) && !defined(EXTRALARGE_DATASET) +# define SMALL_DATASET +# endif + +/* Do not define anything if the user manually defines the size. */ +# if !defined(NI) && !defined(NJ) && !defined(NK) && !defined(NL) +/* Define the possible dataset sizes. */ +# ifdef MINI_DATASET +# define NI 256 +# define NJ 256 +# define NK 256 +# define NL 256 +# endif + +# ifdef SMALL_DATASET +# define NI 512 +# define NJ 512 +# define NK 512 +# define NL 512 +# endif + +# ifdef STANDARD_DATASET /* Default if unspecified. */ +# define NI 1024 +# define NJ 1024 +# define NK 1024 +# define NL 1024 +# endif + +# ifdef LARGE_DATASET +# define NI 2048 +# define NJ 2048 +# define NK 2048 +# define NL 2048 +# endif + +# ifdef EXTRALARGE_DATASET +# define NI 4096 +# define NJ 4096 +# define NK 4096 +# define NL 4096 +# endif +# endif /* !N */ + +# define _PB_NI POLYBENCH_LOOP_BOUND(NI,ni) +# define _PB_NJ POLYBENCH_LOOP_BOUND(NJ,nj) +# define _PB_NK POLYBENCH_LOOP_BOUND(NK,nk) +# define _PB_NL POLYBENCH_LOOP_BOUND(NL,nl) + +# ifndef DATA_TYPE +# define DATA_TYPE float +# define DATA_PRINTF_MODIFIER "%0.2lf " +# endif + +/* Thread block dimensions */ +#define DIM_LOCAL_WORK_GROUP_X 32 +#define DIM_LOCAL_WORK_GROUP_Y 8 + + +#endif /* !TWOMM*/ \ No newline at end of file diff --git a/repos/hello_gpgpu/src/hello_gpgpu/2mm_kernel.h b/repos/hello_gpgpu/src/hello_gpgpu/2mm_kernel.h new file mode 100644 index 0000000000..0e76fd2966 --- /dev/null +++ b/repos/hello_gpgpu/src/hello_gpgpu/2mm_kernel.h @@ -0,0 +1,570 @@ +unsigned char __2mm_Gen9core_gen[] = { + 0x43, 0x54, 0x4e, 0x49, 0x39, 0x04, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x88, 0x34, 0xad, 0xe9, 0x49, 0x73, 0x32, 0xcf, + 0xd7, 0x42, 0x73, 0xa9, 0x0c, 0x00, 0x00, 0x00, 0x9c, 0x07, 0x00, 0x00, + 0x80, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0xcc, 0x00, 0x00, 0x00, 0xf0, 0x03, 0x00, 0x00, 0x6d, 0x6d, 0x32, 0x5f, + 0x6b, 0x65, 0x72, 0x6e, 0x65, 0x6c, 0x31, 0x00, 0x01, 0x00, 0x60, 0x00, + 0x0c, 0x02, 0xa0, 0x20, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x06, 0x80, 0x00, 0x00, 0x04, 0x00, 0x00, 0x30, 0x00, 0x10, 0x00, 0x16, + 0xc0, 0x04, 0xc0, 0x04, 0x41, 0x80, 0x2d, 0x20, 0x00, 0x7e, 0x0a, 0x05, + 0x41, 0x00, 0x00, 0x00, 0x2c, 0x0a, 0xc0, 0x20, 0x44, 0x01, 0x00, 0x0a, + 0xb8, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x4c, 0x16, 0xc4, 0x20, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x40, 0x20, 0x80, 0x00, + 0x28, 0x0a, 0x80, 0x2f, 0xc0, 0x0f, 0x00, 0x12, 0x40, 0x00, 0xb1, 0x00, + 0x40, 0x20, 0x80, 0x00, 0x28, 0x0a, 0xe0, 0x21, 0xc0, 0x00, 0x00, 0x12, + 0x80, 0x00, 0xb1, 0x00, 0x40, 0x00, 0x80, 0x00, 0x28, 0x0a, 0x60, 0x21, + 0xc0, 0x0f, 0x00, 0x12, 0x20, 0x00, 0xb1, 0x00, 0x40, 0x00, 0x80, 0x00, + 0x28, 0x0a, 0x00, 0x2f, 0xc0, 0x00, 0x00, 0x12, 0x60, 0x00, 0xb1, 0x00, + 0x40, 0x20, 0x80, 0x00, 0x28, 0x0a, 0x40, 0x2f, 0x80, 0x0f, 0x8d, 0x0a, + 0xe0, 0x00, 0x00, 0x00, 0x40, 0x20, 0x80, 0x00, 0x28, 0x0a, 0x20, 0x22, + 0xe0, 0x01, 0x8d, 0x0a, 0xe4, 0x00, 0x00, 0x00, 0x40, 0x96, 0x01, 0x20, + 0x07, 0x0d, 0x0b, 0x07, 0x40, 0x96, 0x2d, 0x20, 0x07, 0x76, 0x78, 0x07, + 0x10, 0x20, 0x80, 0x05, 0x20, 0x0a, 0x00, 0x20, 0x40, 0x0f, 0x8d, 0x0a, + 0x1c, 0x01, 0x00, 0x00, 0x10, 0x20, 0x80, 0x05, 0x22, 0x0a, 0x00, 0x20, + 0x20, 0x02, 0x8d, 0x0a, 0x18, 0x01, 0x00, 0x00, 0x10, 0x00, 0x80, 0x05, + 0x22, 0x0a, 0x00, 0x20, 0xa0, 0x01, 0x8d, 0x0a, 0x1c, 0x01, 0x00, 0x00, + 0x02, 0x20, 0x81, 0x00, 0x48, 0x12, 0xe0, 0x2c, 0xc4, 0x00, 0x00, 0x16, + 0x00, 0x00, 0x00, 0x00, 0x02, 0x20, 0x81, 0x00, 0x4a, 0x12, 0xe0, 0x23, + 0xc4, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x81, 0x05, + 0x22, 0x0a, 0x00, 0x20, 0xc0, 0x0e, 0x8d, 0x0a, 0x18, 0x01, 0x00, 0x00, + 0x05, 0x20, 0x80, 0x02, 0x42, 0x12, 0x00, 0x20, 0xe0, 0x03, 0xb1, 0x12, + 0xe0, 0x0c, 0xb1, 0x00, 0x22, 0x00, 0xa1, 0x00, 0x02, 0x0e, 0x00, 0x20, + 0xa0, 0x02, 0x00, 0x00, 0xa0, 0x02, 0x00, 0x00, 0x41, 0x96, 0x79, 0x20, + 0x07, 0x74, 0x76, 0x08, 0x41, 0x20, 0x80, 0x00, 0x28, 0x0a, 0x60, 0x22, + 0x20, 0x02, 0x8d, 0x0a, 0x1c, 0x01, 0x00, 0x00, 0x01, 0x00, 0x80, 0x00, + 0xe8, 0x3e, 0x40, 0x2e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x20, 0x80, 0x00, 0xe8, 0x3e, 0xa0, 0x22, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x40, 0x96, 0x01, 0x20, 0xe7, 0x74, 0x74, 0x0d, + 0x40, 0x20, 0x80, 0x00, 0x28, 0x0a, 0x60, 0x22, 0x60, 0x02, 0x8d, 0x0a, + 0x40, 0x0f, 0x8d, 0x00, 0x10, 0x00, 0x80, 0x03, 0x24, 0x0a, 0x00, 0x20, + 0x20, 0x01, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x00, 0x10, 0x20, 0x80, 0x03, + 0x24, 0x0a, 0x00, 0x20, 0x20, 0x01, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x80, 0x00, 0x28, 0x0a, 0x80, 0x2e, 0x80, 0x0e, 0x8d, 0x1e, + 0x02, 0x00, 0x02, 0x00, 0x09, 0x20, 0x80, 0x00, 0x28, 0x0a, 0x60, 0x22, + 0x60, 0x02, 0x8d, 0x1e, 0x02, 0x00, 0x02, 0x00, 0x40, 0x96, 0x6d, 0x20, + 0x07, 0x74, 0x74, 0x09, 0x40, 0x20, 0x80, 0x00, 0x28, 0x0a, 0x60, 0x22, + 0x60, 0x02, 0x8d, 0x0a, 0x30, 0x01, 0x00, 0x00, 0x33, 0x00, 0x80, 0x0c, + 0x70, 0x20, 0x07, 0x00, 0x82, 0x0e, 0x00, 0x00, 0x00, 0x5e, 0x02, 0x04, + 0x33, 0x20, 0x80, 0x0c, 0x70, 0x50, 0x01, 0x00, 0x62, 0x02, 0x00, 0x00, + 0x00, 0x5e, 0x02, 0x04, 0x20, 0x00, 0x11, 0x00, 0x04, 0x00, 0x00, 0x34, + 0x00, 0x14, 0x00, 0x0e, 0xb8, 0x01, 0x00, 0x00, 0x41, 0x96, 0x01, 0x20, + 0x07, 0x70, 0x76, 0x09, 0x41, 0x20, 0x80, 0x00, 0x28, 0x0a, 0xe0, 0x22, + 0x20, 0x02, 0x8d, 0x0a, 0x20, 0x01, 0x00, 0x00, 0x01, 0x00, 0x80, 0x00, + 0xe8, 0x3e, 0xc0, 0x2d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x20, 0x80, 0x00, 0xe8, 0x3e, 0x20, 0x23, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2c, 0x1e, 0xc4, 0x2f, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x96, 0x2d, 0x20, + 0x07, 0x1b, 0x70, 0x7e, 0x41, 0x00, 0x00, 0x00, 0x2c, 0x0a, 0xc8, 0x2f, + 0xc4, 0x0f, 0x00, 0x0a, 0x1c, 0x01, 0x00, 0x00, 0x40, 0x20, 0x80, 0x00, + 0x28, 0x0a, 0x80, 0x2d, 0xe0, 0x02, 0x8d, 0x0a, 0xc4, 0x0f, 0x00, 0x00, + 0x40, 0x00, 0x00, 0x00, 0x2c, 0x0a, 0xc4, 0x2f, 0xc4, 0x0f, 0x00, 0x1e, + 0x01, 0x00, 0x01, 0x00, 0x09, 0x00, 0x80, 0x00, 0x28, 0x0a, 0x60, 0x23, + 0x60, 0x03, 0x8d, 0x1e, 0x02, 0x00, 0x02, 0x00, 0x40, 0x96, 0x19, 0x20, + 0xe0, 0x24, 0x7e, 0x0d, 0x40, 0x20, 0x80, 0x00, 0x28, 0x0a, 0x00, 0x2d, + 0xc8, 0x0f, 0x00, 0x0a, 0x40, 0x0f, 0x8d, 0x00, 0x09, 0x20, 0x80, 0x00, + 0x28, 0x0a, 0x80, 0x2d, 0x80, 0x0d, 0x8d, 0x1e, 0x02, 0x00, 0x02, 0x00, + 0x40, 0x96, 0x71, 0x20, 0x07, 0x1b, 0x1b, 0x09, 0x09, 0x00, 0x80, 0x00, + 0x28, 0x0a, 0x80, 0x24, 0x80, 0x04, 0x8d, 0x1e, 0x02, 0x00, 0x02, 0x00, + 0x09, 0x20, 0x80, 0x00, 0x28, 0x0a, 0x00, 0x2d, 0x00, 0x0d, 0x8d, 0x1e, + 0x02, 0x00, 0x02, 0x00, 0x40, 0x20, 0x80, 0x00, 0x28, 0x0a, 0x80, 0x2d, + 0x80, 0x0d, 0x8d, 0x0a, 0x34, 0x01, 0x00, 0x00, 0x31, 0x00, 0x80, 0x0c, + 0x68, 0x02, 0xa0, 0x23, 0x60, 0x03, 0x00, 0x06, 0x01, 0x5e, 0x20, 0x04, + 0x40, 0x96, 0x75, 0x20, 0x07, 0x24, 0x24, 0x09, 0x40, 0x20, 0x80, 0x00, + 0x28, 0x0a, 0x00, 0x2d, 0x00, 0x0d, 0x8d, 0x0a, 0x38, 0x01, 0x00, 0x00, + 0x31, 0x20, 0x80, 0x0c, 0x68, 0x02, 0x40, 0x2d, 0x80, 0x0d, 0x00, 0x06, + 0x01, 0x5e, 0x20, 0x04, 0x10, 0x17, 0x17, 0x25, 0x00, 0x00, 0x7e, 0x09, + 0x31, 0x00, 0x80, 0x0c, 0x68, 0x02, 0xa0, 0x2c, 0x80, 0x04, 0x00, 0x06, + 0x02, 0x5e, 0x20, 0x04, 0x31, 0x20, 0x80, 0x0c, 0x68, 0x02, 0x60, 0x2c, + 0x00, 0x0d, 0x00, 0x06, 0x02, 0x5e, 0x20, 0x04, 0x10, 0x20, 0x80, 0x05, + 0x24, 0x0a, 0x00, 0x20, 0xc4, 0x0f, 0x00, 0x0a, 0x20, 0x01, 0x00, 0x00, + 0x41, 0x56, 0x5e, 0x20, 0x07, 0x20, 0x1d, 0x09, 0x41, 0x20, 0x80, 0x00, + 0xe8, 0x3a, 0x40, 0x24, 0x40, 0x0d, 0x8d, 0x3a, 0x28, 0x01, 0x00, 0x00, + 0x5b, 0xe2, 0x06, 0x20, 0x00, 0x70, 0x83, 0xca, 0x5b, 0x93, 0x01, 0x20, + 0x00, 0xc8, 0x88, 0xc6, 0x33, 0x00, 0x80, 0x0c, 0x70, 0xe0, 0x06, 0x00, + 0x82, 0x0e, 0x00, 0x00, 0x00, 0x5e, 0x02, 0x04, 0x33, 0x20, 0x80, 0x0c, + 0x70, 0x90, 0x01, 0x00, 0x62, 0x02, 0x00, 0x00, 0x00, 0x5e, 0x02, 0x04, + 0x20, 0x00, 0x01, 0x00, 0x04, 0x00, 0x00, 0x34, 0x00, 0x14, 0x00, 0x0e, + 0x90, 0xfe, 0xff, 0xff, 0x25, 0x00, 0xa0, 0x00, 0x00, 0x00, 0x00, 0x20, + 0x00, 0x00, 0x00, 0x0e, 0x10, 0x00, 0x00, 0x00, 0x01, 0x4d, 0x00, 0x20, + 0x07, 0x7f, 0x05, 0x00, 0x31, 0x00, 0x60, 0x07, 0x04, 0x02, 0x00, 0x20, + 0xe0, 0x0f, 0x00, 0x06, 0x10, 0x00, 0x00, 0x82, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xc3, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0xc0, 0xff, 0x83, 0x00, 0x00, 0x00, 0x03, + 0x7f, 0x00, 0xff, 0x1f, 0x00, 0x00, 0xe0, 0x0f, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0xff, 0x83, + 0x00, 0x00, 0x00, 0x03, 0x7f, 0x00, 0xff, 0x1f, 0x00, 0x00, 0xe0, 0x0f, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0xc0, 0xff, 0x83, 0x00, 0x00, 0x00, 0x03, 0x7f, 0x00, 0xff, 0x1f, + 0x00, 0x00, 0xe0, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, + 0x80, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xc0, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0xc0, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x2b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x30, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x2a, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x2a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x58, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x68, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x1e, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0x1e, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x40, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0x1e, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x80, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, + 0x40, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x4c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0x19, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, + 0x16, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x8c, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x1b, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x5f, 0x5f, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x00, 0x00, 0x00, 0x00, + 0x4e, 0x4f, 0x4e, 0x45, 0x00, 0x00, 0x00, 0x00, 0x74, 0x6d, 0x70, 0x00, + 0x44, 0x41, 0x54, 0x41, 0x5f, 0x54, 0x59, 0x50, 0x45, 0x2a, 0x3b, 0x38, + 0x00, 0x00, 0x00, 0x00, 0x4e, 0x4f, 0x4e, 0x45, 0x00, 0x00, 0x00, 0x00, + 0x1a, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x5f, 0x5f, 0x67, 0x6c, + 0x6f, 0x62, 0x61, 0x6c, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x4f, 0x4e, 0x45, + 0x00, 0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00, 0x44, 0x41, 0x54, 0x41, + 0x5f, 0x54, 0x59, 0x50, 0x45, 0x2a, 0x3b, 0x38, 0x00, 0x00, 0x00, 0x00, + 0x4e, 0x4f, 0x4e, 0x45, 0x00, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, + 0x50, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x5f, 0x5f, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, + 0x00, 0x00, 0x00, 0x00, 0x4e, 0x4f, 0x4e, 0x45, 0x00, 0x00, 0x00, 0x00, + 0x42, 0x00, 0x00, 0x00, 0x44, 0x41, 0x54, 0x41, 0x5f, 0x54, 0x59, 0x50, + 0x45, 0x2a, 0x3b, 0x38, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x4f, 0x4e, 0x45, + 0x00, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x5f, 0x5f, 0x70, 0x72, 0x69, 0x76, 0x61, 0x74, 0x65, 0x00, 0x00, 0x00, + 0x4e, 0x4f, 0x4e, 0x45, 0x00, 0x00, 0x00, 0x00, 0x6e, 0x69, 0x00, 0x00, + 0x69, 0x6e, 0x74, 0x3b, 0x34, 0x00, 0x00, 0x00, 0x4e, 0x4f, 0x4e, 0x45, + 0x00, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x5f, 0x5f, 0x70, 0x72, 0x69, 0x76, 0x61, 0x74, 0x65, 0x00, 0x00, 0x00, + 0x4e, 0x4f, 0x4e, 0x45, 0x00, 0x00, 0x00, 0x00, 0x6e, 0x6a, 0x00, 0x00, + 0x69, 0x6e, 0x74, 0x3b, 0x34, 0x00, 0x00, 0x00, 0x4e, 0x4f, 0x4e, 0x45, + 0x00, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, + 0x05, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x5f, 0x5f, 0x70, 0x72, 0x69, 0x76, 0x61, 0x74, 0x65, 0x00, 0x00, 0x00, + 0x4e, 0x4f, 0x4e, 0x45, 0x00, 0x00, 0x00, 0x00, 0x6e, 0x6b, 0x00, 0x00, + 0x69, 0x6e, 0x74, 0x3b, 0x34, 0x00, 0x00, 0x00, 0x4e, 0x4f, 0x4e, 0x45, + 0x00, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x5f, 0x5f, 0x70, 0x72, 0x69, 0x76, 0x61, 0x74, 0x65, 0x00, 0x00, 0x00, + 0x4e, 0x4f, 0x4e, 0x45, 0x00, 0x00, 0x00, 0x00, 0x6e, 0x6c, 0x00, 0x00, + 0x69, 0x6e, 0x74, 0x3b, 0x34, 0x00, 0x00, 0x00, 0x4e, 0x4f, 0x4e, 0x45, + 0x00, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x5f, 0x5f, 0x70, 0x72, 0x69, 0x76, 0x61, 0x74, 0x65, 0x00, 0x00, 0x00, + 0x4e, 0x4f, 0x4e, 0x45, 0x00, 0x00, 0x00, 0x00, 0x61, 0x6c, 0x70, 0x68, + 0x61, 0x00, 0x00, 0x00, 0x44, 0x41, 0x54, 0x41, 0x5f, 0x54, 0x59, 0x50, + 0x45, 0x3b, 0x34, 0x00, 0x4e, 0x4f, 0x4e, 0x45, 0x00, 0x00, 0x00, 0x00, + 0x1a, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x5f, 0x5f, 0x70, 0x72, + 0x69, 0x76, 0x61, 0x74, 0x65, 0x00, 0x00, 0x00, 0x4e, 0x4f, 0x4e, 0x45, + 0x00, 0x00, 0x00, 0x00, 0x62, 0x65, 0x74, 0x61, 0x00, 0x00, 0x00, 0x00, + 0x44, 0x41, 0x54, 0x41, 0x5f, 0x54, 0x59, 0x50, 0x45, 0x3b, 0x34, 0x00, + 0x4e, 0x4f, 0x4e, 0x45, 0x00, 0x00, 0x00, 0x00, 0x0e, 0xe7, 0x4b, 0xdd, + 0x49, 0x73, 0x32, 0xcf, 0xd7, 0x42, 0x73, 0xa9, 0x0c, 0x00, 0x00, 0x00, + 0x9c, 0x07, 0x00, 0x00, 0x80, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0xcc, 0x00, 0x00, 0x00, 0xd0, 0x03, 0x00, 0x00, + 0x6d, 0x6d, 0x32, 0x5f, 0x6b, 0x65, 0x72, 0x6e, 0x65, 0x6c, 0x32, 0x00, + 0x01, 0x00, 0x60, 0x00, 0x0c, 0x02, 0xa0, 0x20, 0x00, 0x00, 0x20, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x06, 0x80, 0x00, 0x00, 0x04, 0x00, 0x00, 0x30, + 0x00, 0x10, 0x00, 0x16, 0xc0, 0x04, 0xc0, 0x04, 0x41, 0x80, 0x2d, 0x20, + 0x00, 0x7e, 0x0a, 0x05, 0x41, 0x00, 0x00, 0x00, 0x2c, 0x0a, 0xc0, 0x20, + 0x44, 0x01, 0x00, 0x0a, 0xb8, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x4c, 0x16, 0xc4, 0x20, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, + 0x40, 0x20, 0x80, 0x00, 0x28, 0x0a, 0x80, 0x2f, 0xc0, 0x0f, 0x00, 0x12, + 0x40, 0x00, 0xb1, 0x00, 0x40, 0x20, 0x80, 0x00, 0x28, 0x0a, 0xe0, 0x21, + 0xc0, 0x00, 0x00, 0x12, 0x80, 0x00, 0xb1, 0x00, 0x40, 0x00, 0x80, 0x00, + 0x28, 0x0a, 0x60, 0x21, 0xc0, 0x0f, 0x00, 0x12, 0x20, 0x00, 0xb1, 0x00, + 0x40, 0x00, 0x80, 0x00, 0x28, 0x0a, 0x00, 0x2f, 0xc0, 0x00, 0x00, 0x12, + 0x60, 0x00, 0xb1, 0x00, 0x40, 0x20, 0x80, 0x00, 0x28, 0x0a, 0x40, 0x2f, + 0x80, 0x0f, 0x8d, 0x0a, 0xe0, 0x00, 0x00, 0x00, 0x40, 0x20, 0x80, 0x00, + 0x28, 0x0a, 0x20, 0x22, 0xe0, 0x01, 0x8d, 0x0a, 0xe4, 0x00, 0x00, 0x00, + 0x40, 0x96, 0x01, 0x20, 0x07, 0x0d, 0x0b, 0x07, 0x40, 0x96, 0x2d, 0x20, + 0x07, 0x76, 0x78, 0x07, 0x10, 0x20, 0x80, 0x05, 0x20, 0x0a, 0x00, 0x20, + 0x40, 0x0f, 0x8d, 0x0a, 0x24, 0x01, 0x00, 0x00, 0x10, 0x20, 0x80, 0x05, + 0x22, 0x0a, 0x00, 0x20, 0x20, 0x02, 0x8d, 0x0a, 0x18, 0x01, 0x00, 0x00, + 0x10, 0x00, 0x80, 0x05, 0x22, 0x0a, 0x00, 0x20, 0xa0, 0x01, 0x8d, 0x0a, + 0x24, 0x01, 0x00, 0x00, 0x02, 0x20, 0x81, 0x00, 0x48, 0x12, 0x20, 0x2d, + 0xc4, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x02, 0x20, 0x81, 0x00, + 0x4a, 0x12, 0xa0, 0x23, 0xc4, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x81, 0x05, 0x22, 0x0a, 0x00, 0x20, 0xc0, 0x0e, 0x8d, 0x0a, + 0x18, 0x01, 0x00, 0x00, 0x05, 0x20, 0x80, 0x02, 0x42, 0x12, 0x00, 0x20, + 0xa0, 0x03, 0xb1, 0x12, 0x20, 0x0d, 0xb1, 0x00, 0x22, 0x00, 0xa1, 0x00, + 0x02, 0x0e, 0x00, 0x20, 0x80, 0x02, 0x00, 0x00, 0x80, 0x02, 0x00, 0x00, + 0x41, 0x96, 0x2d, 0x20, 0x07, 0x74, 0x76, 0x09, 0x41, 0x20, 0x80, 0x00, + 0x28, 0x0a, 0x60, 0x22, 0x20, 0x02, 0x8d, 0x0a, 0x24, 0x01, 0x00, 0x00, + 0x10, 0x00, 0x80, 0x03, 0x24, 0x0a, 0x00, 0x20, 0x1c, 0x01, 0x00, 0x1e, + 0x00, 0x00, 0x00, 0x00, 0x10, 0x20, 0x80, 0x03, 0x24, 0x0a, 0x00, 0x20, + 0x1c, 0x01, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x00, 0x40, 0x96, 0x01, 0x20, + 0xe7, 0x74, 0x74, 0x0d, 0x40, 0x20, 0x80, 0x00, 0x28, 0x0a, 0x60, 0x22, + 0x60, 0x02, 0x8d, 0x0a, 0x40, 0x0f, 0x8d, 0x00, 0x09, 0x00, 0x80, 0x00, + 0x28, 0x0a, 0x80, 0x2e, 0x80, 0x0e, 0x8d, 0x1e, 0x02, 0x00, 0x02, 0x00, + 0x09, 0x20, 0x80, 0x00, 0x28, 0x0a, 0x60, 0x22, 0x60, 0x02, 0x8d, 0x1e, + 0x02, 0x00, 0x02, 0x00, 0x40, 0x96, 0x75, 0x20, 0x07, 0x74, 0x74, 0x09, + 0x40, 0x20, 0x80, 0x00, 0x28, 0x0a, 0x60, 0x22, 0x60, 0x02, 0x8d, 0x0a, + 0x38, 0x01, 0x00, 0x00, 0x31, 0x00, 0x80, 0x0c, 0x68, 0x02, 0x40, 0x2e, + 0x80, 0x0e, 0x00, 0x06, 0x02, 0x5e, 0x20, 0x04, 0x31, 0x20, 0x80, 0x0c, + 0x68, 0x02, 0xa0, 0x22, 0x60, 0x02, 0x00, 0x06, 0x02, 0x5e, 0x20, 0x04, + 0x41, 0x56, 0x66, 0x20, 0x07, 0x70, 0x72, 0x09, 0x41, 0x20, 0x80, 0x00, + 0xe8, 0x3a, 0xe0, 0x22, 0xa0, 0x02, 0x8d, 0x3a, 0x2c, 0x01, 0x00, 0x00, + 0x33, 0x00, 0x80, 0x0c, 0x70, 0x00, 0x07, 0x00, 0x82, 0x0e, 0x00, 0x00, + 0x02, 0x5e, 0x02, 0x04, 0x33, 0x20, 0x80, 0x0c, 0x70, 0x70, 0x01, 0x00, + 0x62, 0x02, 0x00, 0x00, 0x02, 0x5e, 0x02, 0x04, 0x20, 0x00, 0x11, 0x00, + 0x04, 0x00, 0x00, 0x34, 0x00, 0x14, 0x00, 0x0e, 0x80, 0x01, 0x00, 0x00, + 0x41, 0x96, 0x79, 0x20, 0x07, 0x6e, 0x76, 0x08, 0x41, 0x20, 0x80, 0x00, + 0x28, 0x0a, 0x20, 0x23, 0x20, 0x02, 0x8d, 0x0a, 0x1c, 0x01, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x2c, 0x1e, 0xc4, 0x2f, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x41, 0x80, 0x49, 0x20, 0x00, 0x7e, 0x7e, 0x09, + 0x40, 0x96, 0x2d, 0x20, 0x07, 0x1b, 0x6e, 0x7e, 0x40, 0x20, 0x80, 0x00, + 0x28, 0x0a, 0x80, 0x2d, 0x20, 0x03, 0x8d, 0x0a, 0xc4, 0x0f, 0x00, 0x00, + 0x40, 0x00, 0x00, 0x00, 0x2c, 0x0a, 0xc4, 0x2f, 0xc4, 0x0f, 0x00, 0x1e, + 0x01, 0x00, 0x01, 0x00, 0x40, 0x96, 0x19, 0x20, 0xe0, 0x22, 0x7e, 0x0d, + 0x40, 0x20, 0x80, 0x00, 0x28, 0x0a, 0x40, 0x2d, 0xc8, 0x0f, 0x00, 0x0a, + 0x40, 0x0f, 0x8d, 0x00, 0x09, 0x00, 0x80, 0x00, 0x28, 0x0a, 0x60, 0x23, + 0x60, 0x03, 0x8d, 0x1e, 0x02, 0x00, 0x02, 0x00, 0x09, 0x20, 0x80, 0x00, + 0x28, 0x0a, 0x80, 0x2d, 0x80, 0x0d, 0x8d, 0x1e, 0x02, 0x00, 0x02, 0x00, + 0x09, 0x00, 0x80, 0x00, 0x28, 0x0a, 0x40, 0x24, 0x40, 0x04, 0x8d, 0x1e, + 0x02, 0x00, 0x02, 0x00, 0x09, 0x20, 0x80, 0x00, 0x28, 0x0a, 0x40, 0x2d, + 0x40, 0x0d, 0x8d, 0x1e, 0x02, 0x00, 0x02, 0x00, 0x40, 0x96, 0x6d, 0x20, + 0x07, 0x1b, 0x1b, 0x09, 0x40, 0x20, 0x80, 0x00, 0x28, 0x0a, 0x80, 0x2d, + 0x80, 0x0d, 0x8d, 0x0a, 0x30, 0x01, 0x00, 0x00, 0x40, 0x96, 0x71, 0x20, + 0x07, 0x22, 0x22, 0x09, 0x40, 0x20, 0x80, 0x00, 0x28, 0x0a, 0x40, 0x2d, + 0x40, 0x0d, 0x8d, 0x0a, 0x34, 0x01, 0x00, 0x00, 0x31, 0x00, 0x80, 0x0c, + 0x68, 0x02, 0xc0, 0x23, 0x60, 0x03, 0x00, 0x06, 0x00, 0x5e, 0x20, 0x04, + 0x10, 0x00, 0x80, 0x05, 0x24, 0x0a, 0x00, 0x20, 0xc4, 0x0f, 0x00, 0x0a, + 0x1c, 0x01, 0x00, 0x00, 0x31, 0x00, 0x80, 0x0c, 0x68, 0x02, 0xe0, 0x2c, + 0x40, 0x04, 0x00, 0x06, 0x01, 0x5e, 0x20, 0x04, 0x31, 0x20, 0x80, 0x0c, + 0x68, 0x02, 0x00, 0x24, 0x80, 0x0d, 0x00, 0x06, 0x00, 0x5e, 0x20, 0x04, + 0x31, 0x20, 0x80, 0x0c, 0x68, 0x02, 0xa0, 0x2c, 0x40, 0x0d, 0x00, 0x06, + 0x01, 0x5e, 0x20, 0x04, 0x10, 0x20, 0x80, 0x05, 0x24, 0x0a, 0x00, 0x20, + 0xc4, 0x0f, 0x00, 0x0a, 0x1c, 0x01, 0x00, 0x00, 0x5b, 0x02, 0x07, 0x20, + 0x00, 0x80, 0x7b, 0xce, 0x5b, 0x73, 0x01, 0x20, 0x00, 0xb8, 0x80, 0xca, + 0x33, 0x00, 0x80, 0x0c, 0x70, 0x00, 0x07, 0x00, 0x82, 0x0e, 0x00, 0x00, + 0x02, 0x5e, 0x02, 0x04, 0x33, 0x20, 0x80, 0x0c, 0x70, 0x70, 0x01, 0x00, + 0x62, 0x02, 0x00, 0x00, 0x02, 0x5e, 0x02, 0x04, 0x20, 0x00, 0x01, 0x00, + 0x04, 0x00, 0x00, 0x34, 0x00, 0x14, 0x00, 0x0e, 0xa8, 0xfe, 0xff, 0xff, + 0x25, 0x00, 0xa0, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x0e, + 0x10, 0x00, 0x00, 0x00, 0x01, 0x4d, 0x00, 0x20, 0x07, 0x7f, 0x05, 0x00, + 0x31, 0x00, 0x60, 0x07, 0x04, 0x02, 0x00, 0x20, 0xe0, 0x0f, 0x00, 0x06, + 0x10, 0x00, 0x00, 0x82, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xc3, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0xc0, 0xff, 0x83, + 0x00, 0x00, 0x00, 0x03, 0x7f, 0x00, 0xff, 0x1f, 0x00, 0x00, 0xe0, 0x0f, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0xc0, 0xff, 0x83, 0x00, 0x00, 0x00, 0x03, 0x7f, 0x00, 0xff, 0x1f, + 0x00, 0x00, 0xe0, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0xff, 0x83, 0x00, 0x00, 0x00, 0x03, + 0x7f, 0x00, 0xff, 0x1f, 0x00, 0x00, 0xe0, 0x0f, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x40, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, + 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x2b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x2a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x54, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x2a, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x64, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x3c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x05, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x4c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, + 0x80, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, + 0x8c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, + 0x50, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x5f, 0x5f, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, + 0x00, 0x00, 0x00, 0x00, 0x4e, 0x4f, 0x4e, 0x45, 0x00, 0x00, 0x00, 0x00, + 0x74, 0x6d, 0x70, 0x00, 0x44, 0x41, 0x54, 0x41, 0x5f, 0x54, 0x59, 0x50, + 0x45, 0x2a, 0x3b, 0x38, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x4f, 0x4e, 0x45, + 0x00, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x5f, 0x5f, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x00, 0x00, 0x00, 0x00, + 0x4e, 0x4f, 0x4e, 0x45, 0x00, 0x00, 0x00, 0x00, 0x43, 0x00, 0x00, 0x00, + 0x44, 0x41, 0x54, 0x41, 0x5f, 0x54, 0x59, 0x50, 0x45, 0x2a, 0x3b, 0x38, + 0x00, 0x00, 0x00, 0x00, 0x4e, 0x4f, 0x4e, 0x45, 0x00, 0x00, 0x00, 0x00, + 0x1a, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x5f, 0x5f, 0x67, 0x6c, + 0x6f, 0x62, 0x61, 0x6c, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x4f, 0x4e, 0x45, + 0x00, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x44, 0x41, 0x54, 0x41, + 0x5f, 0x54, 0x59, 0x50, 0x45, 0x2a, 0x3b, 0x38, 0x00, 0x00, 0x00, 0x00, + 0x4e, 0x4f, 0x4e, 0x45, 0x00, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x5f, 0x5f, 0x70, 0x72, 0x69, 0x76, 0x61, 0x74, + 0x65, 0x00, 0x00, 0x00, 0x4e, 0x4f, 0x4e, 0x45, 0x00, 0x00, 0x00, 0x00, + 0x6e, 0x69, 0x00, 0x00, 0x69, 0x6e, 0x74, 0x3b, 0x34, 0x00, 0x00, 0x00, + 0x4e, 0x4f, 0x4e, 0x45, 0x00, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x5f, 0x5f, 0x70, 0x72, 0x69, 0x76, 0x61, 0x74, + 0x65, 0x00, 0x00, 0x00, 0x4e, 0x4f, 0x4e, 0x45, 0x00, 0x00, 0x00, 0x00, + 0x6e, 0x6a, 0x00, 0x00, 0x69, 0x6e, 0x74, 0x3b, 0x34, 0x00, 0x00, 0x00, + 0x4e, 0x4f, 0x4e, 0x45, 0x00, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x5f, 0x5f, 0x70, 0x72, 0x69, 0x76, 0x61, 0x74, + 0x65, 0x00, 0x00, 0x00, 0x4e, 0x4f, 0x4e, 0x45, 0x00, 0x00, 0x00, 0x00, + 0x6e, 0x6b, 0x00, 0x00, 0x69, 0x6e, 0x74, 0x3b, 0x34, 0x00, 0x00, 0x00, + 0x4e, 0x4f, 0x4e, 0x45, 0x00, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x5f, 0x5f, 0x70, 0x72, 0x69, 0x76, 0x61, 0x74, + 0x65, 0x00, 0x00, 0x00, 0x4e, 0x4f, 0x4e, 0x45, 0x00, 0x00, 0x00, 0x00, + 0x6e, 0x6c, 0x00, 0x00, 0x69, 0x6e, 0x74, 0x3b, 0x34, 0x00, 0x00, 0x00, + 0x4e, 0x4f, 0x4e, 0x45, 0x00, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, + 0x50, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x5f, 0x5f, 0x70, 0x72, 0x69, 0x76, 0x61, 0x74, + 0x65, 0x00, 0x00, 0x00, 0x4e, 0x4f, 0x4e, 0x45, 0x00, 0x00, 0x00, 0x00, + 0x61, 0x6c, 0x70, 0x68, 0x61, 0x00, 0x00, 0x00, 0x44, 0x41, 0x54, 0x41, + 0x5f, 0x54, 0x59, 0x50, 0x45, 0x3b, 0x34, 0x00, 0x4e, 0x4f, 0x4e, 0x45, + 0x00, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x5f, 0x5f, 0x70, 0x72, 0x69, 0x76, 0x61, 0x74, 0x65, 0x00, 0x00, 0x00, + 0x4e, 0x4f, 0x4e, 0x45, 0x00, 0x00, 0x00, 0x00, 0x62, 0x65, 0x74, 0x61, + 0x00, 0x00, 0x00, 0x00, 0x44, 0x41, 0x54, 0x41, 0x5f, 0x54, 0x59, 0x50, + 0x45, 0x3b, 0x34, 0x00, 0x4e, 0x4f, 0x4e, 0x45, 0x00, 0x00, 0x00, 0x00 +}; +unsigned int __2mm_Gen9core_gen_len = 6804; diff --git a/repos/hello_gpgpu/src/hello_gpgpu/main.cc b/repos/hello_gpgpu/src/hello_gpgpu/main.cc index bd7f0c3324..3dc951de9b 100644 --- a/repos/hello_gpgpu/src/hello_gpgpu/main.cc +++ b/repos/hello_gpgpu/src/hello_gpgpu/main.cc @@ -5,15 +5,19 @@ #include #include #include "test.h" +#include +#include "CL/cl.h" -void Component::construct(Genode::Env &env) +extern int main(int argc, char *argv[]); + +void testvm_construct(Genode::Env &env) { gpgpu::Connection gpgpu(env); // allocator Genode::Heap heap(env.ram(), env.rm()); Genode::Allocator_avl alloc(&heap); - const unsigned int size = 0x1000 * 0x100; + const unsigned int size = 0x10000 * 0x1000; Genode::Ram_dataspace_capability ram_cap = env.ram().alloc(size); Genode::addr_t mapped_base = env.rm().attach(ram_cap); //Genode::addr_t base = Genode::Dataspace_client(ram_cap).phys_addr(); @@ -23,7 +27,18 @@ void Component::construct(Genode::Env &env) gpgpu.say_hello(); // run the test and hope the best - run_gpgpu_test(alloc); + //run_gpgpu_test(alloc); + + // run 2mm + Libc::with_libc([&] { + clInitGenode(alloc); + main(0, 0); + }); Genode::log("hello gpgpu completed"); } + +void Libc::Component::construct(Libc::Env &env) +{ + testvm_construct(env); +} diff --git a/repos/hello_gpgpu/src/hello_gpgpu/polybench.cc b/repos/hello_gpgpu/src/hello_gpgpu/polybench.cc new file mode 100644 index 0000000000..6055a4d5a4 --- /dev/null +++ b/repos/hello_gpgpu/src/hello_gpgpu/polybench.cc @@ -0,0 +1,402 @@ +/** + * polybench.c: This file is part of the PolyBench/C 3.2 test suite. + * + * + * Contact: Louis-Noel Pouchet + * Web address: http://polybench.sourceforge.net + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef _OPENMP +# include +#endif + +/* By default, collect PAPI counters on thread 0. */ +#ifndef POLYBENCH_THREAD_MONITOR +# define POLYBENCH_THREAD_MONITOR 0 +#endif + +/* Total LLC cache size. By default 32+MB.. */ +#ifndef POLYBENCH_CACHE_SIZE_KB +# define POLYBENCH_CACHE_SIZE_KB 32770 +#endif + + +int polybench_papi_counters_threadid = POLYBENCH_THREAD_MONITOR; +double polybench_program_total_flops = 0; + +#ifdef POLYBENCH_PAPI +# include +# define POLYBENCH_MAX_NB_PAPI_COUNTERS 96 + char* _polybench_papi_eventlist[] = { +#include "papi_counters.list" + NULL + }; + int polybench_papi_eventset; + int polybench_papi_eventlist[POLYBENCH_MAX_NB_PAPI_COUNTERS]; + long_long polybench_papi_values[POLYBENCH_MAX_NB_PAPI_COUNTERS]; + +#endif + + +/* Timer code (gettimeofday). */ +double polybench_t_start, polybench_t_end; +/* Timer code (RDTSC). */ +unsigned long long int polybench_c_start, polybench_c_end; + +static +double rtclock() +{ +#ifdef POLYBENCH_TIME + struct timeval Tp; + int stat; + stat = gettimeofday (&Tp, NULL); + if (stat != 0) + printf ("Error return from gettimeofday: %d", stat); + return ((double)Tp.tv_sec + (double)Tp.tv_usec * 1.0e-6); +#else + return 0; +#endif +} + + +#ifdef POLYBENCH_CYCLE_ACCURATE_TIMER +static +unsigned long long int rdtsc() +{ + unsigned long long int ret = 0; + unsigned int cycles_lo; + unsigned int cycles_hi; + __asm__ volatile ("RDTSC" : "=a" (cycles_lo), "=d" (cycles_hi)); + ret = (unsigned long long int)cycles_hi << 32 | cycles_lo; + + return ret; +} +#endif + +void polybench_flush_cache() +{ + int cs = POLYBENCH_CACHE_SIZE_KB * 1024 / sizeof(double); + double* flush = (double*) calloc (cs, sizeof(double)); + int i; + double tmp = 0.0; +#ifdef _OPENMP +#pragma omp parallel for +#endif + for (i = 0; i < cs; i++) + tmp += flush[i]; + assert (tmp <= 10.0); + free (flush); +} + + +#ifdef POLYBENCH_LINUX_FIFO_SCHEDULER +void polybench_linux_fifo_scheduler() +{ + /* Use FIFO scheduler to limit OS interference. Program must be run + as root, and this works only for Linux kernels. */ + struct sched_param schedParam; + schedParam.sched_priority = sched_get_priority_max (SCHED_FIFO); + sched_setscheduler (0, SCHED_FIFO, &schedParam); +} + + +void polybench_linux_standard_scheduler() +{ + /* Restore to standard scheduler policy. */ + struct sched_param schedParam; + schedParam.sched_priority = sched_get_priority_max (SCHED_OTHER); + sched_setscheduler (0, SCHED_OTHER, &schedParam); +} +#endif + +#ifdef POLYBENCH_PAPI + +static +void test_fail(char *file, int line, char *call, int retval) +{ + char buf[128]; + + memset(buf, '\0', sizeof(buf)); + if (retval != 0) + fprintf (stdout,"%-40s FAILED\nLine # %d\n", file, line); + else + { + fprintf (stdout,"%-40s SKIPPED\n", file); + fprintf (stdout,"Line # %d\n", line); + } + if (retval == PAPI_ESYS) + { + sprintf (buf, "System error in %s", call); + perror (buf); + } + else if (retval > 0) + fprintf (stdout,"Error: %s\n", call); + else if (retval == 0) + fprintf (stdout,"Error: %s\n", call); + else + { + char errstring[PAPI_MAX_STR_LEN]; + PAPI_perror (retval, errstring, PAPI_MAX_STR_LEN); + fprintf (stdout,"Error in %s: %s\n", call, errstring); + } + fprintf (stdout,"\n"); + if (PAPI_is_initialized ()) + PAPI_shutdown (); + exit (1); +} + + +void polybench_papi_init() +{ +# ifdef _OPENMP +#pragma omp parallel + { +#pragma omp master + { + if (omp_get_max_threads () < polybench_papi_counters_threadid) + polybench_papi_counters_threadid = omp_get_max_threads () - 1; + } +#pragma omp barrier + + if (omp_get_thread_num () == polybench_papi_counters_threadid) + { +# endif + int retval; + polybench_papi_eventset = PAPI_NULL; + if ((retval = PAPI_library_init (PAPI_VER_CURRENT)) != PAPI_VER_CURRENT) + test_fail (__FILE__, __LINE__, "PAPI_library_init", retval); + if ((retval = PAPI_create_eventset (&polybench_papi_eventset)) + != PAPI_OK) + test_fail (__FILE__, __LINE__, "PAPI_create_eventset", retval); + int k; + for (k = 0; _polybench_papi_eventlist[k]; ++k) + { + if ((retval = + PAPI_event_name_to_code (_polybench_papi_eventlist[k], + &(polybench_papi_eventlist[k]))) + != PAPI_OK) + test_fail (__FILE__, __LINE__, "PAPI_event_name_to_code", retval); + } + polybench_papi_eventlist[k] = 0; + + +# ifdef _OPENMP + } + } +#pragma omp barrier +# endif +} + + +void polybench_papi_close() +{ +# ifdef _OPENMP +#pragma omp parallel + { + if (omp_get_thread_num () == polybench_papi_counters_threadid) + { +# endif + int retval; + if ((retval = PAPI_destroy_eventset (&polybench_papi_eventset)) + != PAPI_OK) + test_fail (__FILE__, __LINE__, "PAPI_destroy_eventset", retval); + if (PAPI_is_initialized ()) + PAPI_shutdown (); +# ifdef _OPENMP + } + } +#pragma omp barrier +# endif +} + +int polybench_papi_start_counter(int evid) +{ +# ifndef POLYBENCH_NO_FLUSH_CACHE + polybench_flush_cache(); +# endif + +# ifdef _OPENMP +# pragma omp parallel + { + if (omp_get_thread_num () == polybench_papi_counters_threadid) + { +# endif + + int retval = 1; + char descr[PAPI_MAX_STR_LEN]; + PAPI_event_info_t evinfo; + PAPI_event_code_to_name (polybench_papi_eventlist[evid], descr); + if (PAPI_add_event (polybench_papi_eventset, + polybench_papi_eventlist[evid]) != PAPI_OK) + test_fail (__FILE__, __LINE__, "PAPI_add_event", 1); + if (PAPI_get_event_info (polybench_papi_eventlist[evid], &evinfo) + != PAPI_OK) + test_fail (__FILE__, __LINE__, "PAPI_get_event_info", retval); + if ((retval = PAPI_start (polybench_papi_eventset)) != PAPI_OK) + test_fail (__FILE__, __LINE__, "PAPI_start", retval); +# ifdef _OPENMP + } + } +#pragma omp barrier +# endif + return 0; +} + + +void polybench_papi_stop_counter(int evid) +{ +# ifdef _OPENMP +# pragma omp parallel + { + if (omp_get_thread_num () == polybench_papi_counters_threadid) + { +# endif + int retval; + long_long values[1]; + values[0] = 0; + if ((retval = PAPI_read (polybench_papi_eventset, &values[0])) + != PAPI_OK) + test_fail (__FILE__, __LINE__, "PAPI_read", retval); + + if ((retval = PAPI_stop (polybench_papi_eventset, NULL)) != PAPI_OK) + test_fail (__FILE__, __LINE__, "PAPI_stop", retval); + + polybench_papi_values[evid] = values[0]; + + if ((retval = PAPI_remove_event + (polybench_papi_eventset, + polybench_papi_eventlist[evid])) != PAPI_OK) + test_fail (__FILE__, __LINE__, "PAPI_remove_event", retval); +# ifdef _OPENMP + } + } +#pragma omp barrier +# endif +} + + +void polybench_papi_print() +{ + int verbose = 0; +# ifdef _OPENMP +# pragma omp parallel + { + if (omp_get_thread_num() == polybench_papi_counters_threadid) + { +#ifdef POLYBENCH_PAPI_VERBOSE + verbose = 1; +#endif + if (verbose) + printf ("On thread %d:\n", polybench_papi_counters_threadid); +#endif + int evid; + for (evid = 0; polybench_papi_eventlist[evid] != 0; ++evid) + { + if (verbose) + printf ("%s=", _polybench_papi_eventlist[evid]); + printf ("%llu ", polybench_papi_values[evid]); + if (verbose) + printf ("\n"); + } + printf ("\n"); +# ifdef _OPENMP + } + } +#pragma omp barrier +# endif +} + +#endif +/* ! POLYBENCH_PAPI */ + +void polybench_prepare_instruments() +{ +#ifndef POLYBENCH_NO_FLUSH_CACHE + polybench_flush_cache (); +#endif +#ifdef POLYBENCH_LINUX_FIFO_SCHEDULER + polybench_linux_fifo_scheduler (); +#endif +} + + +void polybench_timer_start() +{ + polybench_prepare_instruments (); +#ifndef POLYBENCH_CYCLE_ACCURATE_TIMER + polybench_t_start = rtclock (); +#else + polybench_c_start = rdtsc (); +#endif +} + + +void polybench_timer_stop() +{ +#ifndef POLYBENCH_CYCLE_ACCURATE_TIMER + polybench_t_end = rtclock (); +#else + polybench_c_end = rdtsc (); +#endif +#ifdef POLYBENCH_LINUX_FIFO_SCHEDULER + polybench_linux_standard_scheduler (); +#endif +} + + +void polybench_timer_print() +{ +#ifdef POLYBENCH_GFLOPS + if (__polybench_program_total_flops == 0) + { + printf ("[PolyBench][WARNING] Program flops not defined, use polybench_set_program_flops(value)\n"); + printf ("%0.6lf\n", polybench_t_end - polybench_t_start); + } + else + printf ("%0.2lf\n", + (__polybench_program_total_flops / + (double)(polybench_t_end - polybench_t_start)) / 1000000000); +#else +# ifndef POLYBENCH_CYCLE_ACCURATE_TIMER + printf ("%0.6f\n", polybench_t_end - polybench_t_start); +# else + printf ("%Ld\n", polybench_c_end - polybench_c_start); +# endif +#endif +} + + + +static +void * +xmalloc (size_t num) +{ + void* newA = NULL; + int ret = posix_memalign (&newA, 32, num); + if (! newA || ret) + { + fprintf (stderr, "[PolyBench] posix_memalign: cannot allocate memory"); + exit (1); + } + return newA; +} + + +void* polybench_alloc_data(unsigned long long int n, int elt_size) +{ + /// FIXME: detect overflow! + size_t val = n; + val *= elt_size; + void* ret = xmalloc (val); + + return ret; +} diff --git a/repos/hello_gpgpu/src/hello_gpgpu/polybench.h b/repos/hello_gpgpu/src/hello_gpgpu/polybench.h new file mode 100644 index 0000000000..7d092e45d9 --- /dev/null +++ b/repos/hello_gpgpu/src/hello_gpgpu/polybench.h @@ -0,0 +1,202 @@ +/** + * polybench.h: This file is part of the PolyBench/C 3.2 test suite. + * + * + * Contact: Louis-Noel Pouchet + * Web address: http://polybench.sourceforge.net + */ +/* + * Polybench header for instrumentation. + * + * Programs must be compiled with `-I utilities utilities/polybench.c' + * + * Optionally, one can define: + * + * -DPOLYBENCH_TIME, to report the execution time, + * OR (exclusive): + * -DPOLYBENCH_PAPI, to use PAPI H/W counters (defined in polybench.c) + * + * + * See README or utilities/polybench.c for additional options. + * + */ +#ifndef POLYBENCH_H +# define POLYBENCH_H + +# include + +/* Array padding. By default, none is used. */ +# ifndef POLYBENCH_PADDING_FACTOR +/* default: */ +# define POLYBENCH_PADDING_FACTOR 0 +# endif + + +/* C99 arrays in function prototype. By default, do not use. */ +# ifdef POLYBENCH_USE_C99_PROTO +# define POLYBENCH_C99_SELECT(x,y) y +# else +/* default: */ +# define POLYBENCH_C99_SELECT(x,y) x +# endif + + +/* Scalar loop bounds in SCoPs. By default, use parametric loop bounds. */ +# ifdef POLYBENCH_USE_SCALAR_LB +# define POLYBENCH_LOOP_BOUND(x,y) x +# else +/* default: */ +# define POLYBENCH_LOOP_BOUND(x,y) y +# endif + + +/* Macros to reference an array. Generic for heap and stack arrays + (C99). Each array dimensionality has his own macro, to be used at + declaration or as a function argument. + Example: + int b[x] => POLYBENCH_1D_ARRAY(b, x) + int A[N][N] => POLYBENCH_2D_ARRAY(A, N, N) +*/ +# ifndef POLYBENCH_STACK_ARRAYS +# define POLYBENCH_ARRAY(x) *x +# define POLYBENCH_FREE_ARRAY(x) free((void*)x); +# define POLYBENCH_DECL_VAR(x) (*x) +# else +# define POLYBENCH_ARRAY(x) x +# define POLYBENCH_FREE_ARRAY(x) +# define POLYBENCH_DECL_VAR(x) x +# endif +/* Macros for using arrays in the function prototypes. */ +# define POLYBENCH_1D(var, dim1,ddim1) var[POLYBENCH_C99_SELECT(dim1,ddim1) + POLYBENCH_PADDING_FACTOR] +# define POLYBENCH_2D(var, dim1, dim2, ddim1, ddim2) var[POLYBENCH_C99_SELECT(dim1,ddim1) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim2,ddim2) + POLYBENCH_PADDING_FACTOR] +# define POLYBENCH_3D(var, dim1, dim2, dim3, ddim1, ddim2, ddim3) var[POLYBENCH_C99_SELECT(dim1,ddim1) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim2,ddim2) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim3,ddim3) + POLYBENCH_PADDING_FACTOR] +# define POLYBENCH_4D(var, dim1, dim2, dim3, dim4, ddim1, ddim2, ddim3, ddim4) var[POLYBENCH_C99_SELECT(dim1,ddim1) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim2,ddim2) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim3,ddim3) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim4,ddim4) + POLYBENCH_PADDING_FACTOR] +# define POLYBENCH_5D(var, dim1, dim2, dim3, dim4, dim5, ddim1, ddim2, ddim3, ddim4, ddim5) var[POLYBENCH_C99_SELECT(dim1,ddim1) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim2,ddim2) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim3,ddim3) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim4,ddim4) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim5,ddim5) + POLYBENCH_PADDING_FACTOR] + + +/* Macros to allocate heap arrays. + Example: + polybench_alloc_2d_array(N, M, double) => allocates N x M x sizeof(double) + and returns a pointer to the 2d array + */ +# define POLYBENCH_ALLOC_1D_ARRAY(n1, type) \ + (type(*)[n1 + POLYBENCH_PADDING_FACTOR])polybench_alloc_data (n1 + POLYBENCH_PADDING_FACTOR, sizeof(type)) +# define POLYBENCH_ALLOC_2D_ARRAY(n1, n2, type) \ + (type(*)[n1 + POLYBENCH_PADDING_FACTOR][n2 + POLYBENCH_PADDING_FACTOR])polybench_alloc_data ((n1 + POLYBENCH_PADDING_FACTOR) * (n2 + POLYBENCH_PADDING_FACTOR), sizeof(type)) +# define POLYBENCH_ALLOC_3D_ARRAY(n1, n2, n3, type) \ + (type(*)[n1 + POLYBENCH_PADDING_FACTOR][n2 + POLYBENCH_PADDING_FACTOR][n3 + POLYBENCH_PADDING_FACTOR])polybench_alloc_data ((n1 + POLYBENCH_PADDING_FACTOR) * (n2 + POLYBENCH_PADDING_FACTOR) * (n3 + POLYBENCH_PADDING_FACTOR), sizeof(type)) +# define POLYBENCH_ALLOC_4D_ARRAY(n1, n2, n3, n4, type) \ + (type(*)[n1 + POLYBENCH_PADDING_FACTOR][n2 + POLYBENCH_PADDING_FACTOR][n3 + POLYBENCH_PADDING_FACTOR][n4 + POLYBENCH_PADDING_FACTOR])polybench_alloc_data ((n1 + POLYBENCH_PADDING_FACTOR) * (n2 + POLYBENCH_PADDING_FACTOR) * (n3 + POLYBENCH_PADDING_FACTOR) * (n4 + POLYBENCH_PADDING_FACTOR), sizeof(type)) +# define POLYBENCH_ALLOC_5D_ARRAY(n1, n2, n3, n4, n5, type) \ + (type(*)[n1 + POLYBENCH_PADDING_FACTOR][n2 + POLYBENCH_PADDING_FACTOR][n3 + POLYBENCH_PADDING_FACTOR][n4 + POLYBENCH_PADDING_FACTOR][n5 + POLYBENCH_PADDING_FACTOR])polybench_alloc_data ((n1 + POLYBENCH_PADDING_FACTOR) * (n2 + POLYBENCH_PADDING_FACTOR) * (n3 + POLYBENCH_PADDING_FACTOR) * (n4 + POLYBENCH_PADDING_FACTOR) * (n5 + POLYBENCH_PADDING_FACTOR), sizeof(type)) + +/* Macros for array declaration. */ +# ifndef POLYBENCH_STACK_ARRAYS +# define POLYBENCH_1D_ARRAY_DECL(var, type, dim1, ddim1) \ + type POLYBENCH_1D(POLYBENCH_DECL_VAR(var), dim1, ddim1); \ + var = POLYBENCH_ALLOC_1D_ARRAY(POLYBENCH_C99_SELECT(dim1, ddim1), type); +# define POLYBENCH_2D_ARRAY_DECL(var, type, dim1, dim2, ddim1, ddim2) \ + type POLYBENCH_2D(POLYBENCH_DECL_VAR(var), dim1, dim2, ddim1, ddim2); \ + var = POLYBENCH_ALLOC_2D_ARRAY(POLYBENCH_C99_SELECT(dim1, ddim1), POLYBENCH_C99_SELECT(dim2, ddim2), type); +# define POLYBENCH_3D_ARRAY_DECL(var, type, dim1, dim2, dim3, ddim1, ddim2, ddim3) \ + type POLYBENCH_3D(POLYBENCH_DECL_VAR(var), dim1, dim2, dim3, ddim1, ddim2, ddim3); \ + var = POLYBENCH_ALLOC_3D_ARRAY(POLYBENCH_C99_SELECT(dim1, ddim1), POLYBENCH_C99_SELECT(dim2, ddim2), POLYBENCH_C99_SELECT(dim3, ddim3), type); +# define POLYBENCH_4D_ARRAY_DECL(var, type, dim1, dim2, dim3, dim4, ddim1, ddim2, ddim3, ddim4) \ + type POLYBENCH_4D(POLYBENCH_DECL_VAR(var), dim1, dim2, ,dim3, dim4, ddim1, ddim2, ddim3, ddim4); \ + var = POLYBENCH_ALLOC_4D_ARRAY(POLYBENCH_C99_SELECT(dim1, ddim1), POLYBENCH_C99_SELECT(dim2, ddim2), POLYBENCH_C99_SELECT(dim3, ddim3), POLYBENCH_C99_SELECT(dim4, ddim4), type); +# define POLYBENCH_5D_ARRAY_DECL(var, type, dim1, dim2, dim3, dim4, dim5, ddim1, ddim2, ddim3, ddim4, ddim5) \ + type POLYBENCH_5D(POLYBENCH_DECL_VAR(var), dim1, dim2, dim3, dim4, dim5, ddim1, ddim2, ddim3, ddim4, ddim5); \ + var = POLYBENCH_ALLOC_5D_ARRAY(POLYBENCH_C99_SELECT(dim1, ddim1), POLYBENCH_C99_SELECT(dim2, ddim2), POLYBENCH_C99_SELECT(dim3, ddim3), POLYBENCH_C99_SELECT(dim4, ddim4), POLYBENCH_C99_SELECT(dim5, ddim5), type); +# else +# define POLYBENCH_1D_ARRAY_DECL(var, type, dim1, ddim1) \ + type POLYBENCH_1D(POLYBENCH_DECL_VAR(var), dim1, ddim1); +# define POLYBENCH_2D_ARRAY_DECL(var, type, dim1, dim2, ddim1, ddim2) \ + type POLYBENCH_2D(POLYBENCH_DECL_VAR(var), dim1, dim2, ddim1, ddim2); +# define POLYBENCH_3D_ARRAY_DECL(var, type, dim1, dim2, dim3, ddim1, ddim2, ddim3) \ + type POLYBENCH_3D(POLYBENCH_DECL_VAR(var), dim1, dim2, dim3, ddim1, ddim2, ddim3); +# define POLYBENCH_4D_ARRAY_DECL(var, type, dim1, dim2, dim3, dim4, ddim1, ddim2, ddim3, ddim4) \ + type POLYBENCH_4D(POLYBENCH_DECL_VAR(var), dim1, dim2, dim3, dim4, ddim1, ddim2, ddim3, ddim4); +# define POLYBENCH_5D_ARRAY_DECL(var, type, dim1, dim2, dim3, dim4, dim5, ddim1, ddim2, ddim3, ddim4, ddim5) \ + type POLYBENCH_5D(POLYBENCH_DECL_VAR(var), dim1, dim2, dim3, dim4, dim5, ddim1, ddim2, ddim3, ddim4, ddim5); +# endif + + +/* Dead-code elimination macros. Use argc/argv for the run-time check. */ +# ifndef POLYBENCH_DUMP_ARRAYS +# define POLYBENCH_DCE_ONLY_CODE if (argc > 42 && ! strcmp(argv[0], "")) +# else +# define POLYBENCH_DCE_ONLY_CODE +# endif + +# define polybench_prevent_dce(func) \ + POLYBENCH_DCE_ONLY_CODE \ + func + + +/* Performance-related instrumentation. See polybench.c */ +# define polybench_start_instruments +# define polybench_stop_instruments +# define polybench_print_instruments + + +/* PAPI support. */ +# ifdef POLYBENCH_PAPI +extern const unsigned int polybench_papi_eventlist[]; +# undef polybench_start_instruments +# undef polybench_stop_instruments +# undef polybench_print_instruments +# define polybench_set_papi_thread_report(x) \ + polybench_papi_counters_threadid = x; +# define polybench_start_instruments \ + polybench_prepare_instruments(); \ + polybench_papi_init(); \ + int evid; \ + for (evid = 0; polybench_papi_eventlist[evid] != 0; evid++) \ + { \ + if (polybench_papi_start_counter(evid)) \ + continue; \ + +# define polybench_stop_instruments \ + polybench_papi_stop_counter(evid); \ + } \ + polybench_papi_close(); \ + +# define polybench_print_instruments polybench_papi_print(); +# endif + + +/* Timing support. */ +# if defined(POLYBENCH_TIME) || defined(POLYBENCH_GFLOPS) +# undef polybench_start_instruments +# undef polybench_stop_instruments +# undef polybench_print_instruments +# define polybench_start_instruments polybench_timer_start(); +# define polybench_stop_instruments polybench_timer_stop(); +# define polybench_print_instruments polybench_timer_print(); +extern double polybench_program_total_flops; +extern void polybench_timer_start(); +extern void polybench_timer_stop(); +extern void polybench_timer_print(); +# endif + +/* Function declaration. */ +# ifdef POLYBENCH_TIME +extern void polybench_timer_start(); +extern void polybench_timer_stop(); +extern void polybench_timer_print(); +# endif + +# ifdef POLYBENCH_PAPI +extern void polybench_prepare_instruments(); +extern int polybench_papi_start_counter(int evid); +extern void polybench_papi_stop_counter(int evid); +extern void polybench_papi_init(); +extern void polybench_papi_close(); +extern void polybench_papi_print(); +# endif + +/* Function prototypes. */ +extern void* polybench_alloc_data(unsigned long long int n, int elt_size); + + +#endif /* !POLYBENCH_H */ diff --git a/repos/hello_gpgpu/src/hello_gpgpu/polybenchUtilFuncts.h b/repos/hello_gpgpu/src/hello_gpgpu/polybenchUtilFuncts.h new file mode 100644 index 0000000000..cfe335f0f4 --- /dev/null +++ b/repos/hello_gpgpu/src/hello_gpgpu/polybenchUtilFuncts.h @@ -0,0 +1,36 @@ +//polybenchUtilFuncts.h +//Scott Grauer-Gray (sgrauerg@gmail.com) +//Functions used across codes + +#ifndef POLYBENCH_UTIL_FUNCTS_H +#define POLYBENCH_UTIL_FUNCTS_H + +//define a small float value +#define SMALL_FLOAT_VAL 0.00000001f + +double absVal(double a) +{ + if(a < 0) + { + return (a * -1); + } + else + { + return a; + } +} + +double percentDiff(double val1, double val2) +{ + if ((absVal(val1) < 0.01) && (absVal(val2) < 0.01)) + { + return 0.0f; + } + + else + { + return 100.0f * (absVal(absVal(val1 - val2) / absVal(val1 + SMALL_FLOAT_VAL))); + } +} + +#endif //POLYBENCH_UTIL_FUNCTS_H \ No newline at end of file diff --git a/repos/hello_gpgpu/src/hello_gpgpu/target.mk b/repos/hello_gpgpu/src/hello_gpgpu/target.mk index 899fd26bad..d85c6ce52b 100644 --- a/repos/hello_gpgpu/src/hello_gpgpu/target.mk +++ b/repos/hello_gpgpu/src/hello_gpgpu/target.mk @@ -1,3 +1,5 @@ TARGET = hello_gpgpu -SRC_CC = main.cc test.cc CL/cl.cc -LIBS = base +SRC_CC = main.cc test.cc CL/cl.cc 2mm.cc +LIBS = base libc + +CC_CXX_WARN_STRICT = diff --git a/repos/hello_gpgpu/src/hello_gpgpu/test.cc b/repos/hello_gpgpu/src/hello_gpgpu/test.cc index 28802d2cd0..a9cf441656 100644 --- a/repos/hello_gpgpu/src/hello_gpgpu/test.cc +++ b/repos/hello_gpgpu/src/hello_gpgpu/test.cc @@ -143,12 +143,12 @@ void run_gpgpu_test(Genode::Allocator_avl& alloc) { clInitGenode(alloc); const int num = 0x42; - volatile uint32_t* m_out; uint32_t* m_in; + volatile uint32_t* m_out; // allocate buffers - alloc.alloc(ELEMENTS * sizeof(uint32_t), (void**)&m_in); - alloc.alloc(ELEMENTS * sizeof(uint32_t), (void**)&m_out); + m_in = (uint32_t*)alloc.alloc(ELEMENTS * sizeof(uint32_t)); + m_out = (volatile uint32_t*)alloc.alloc(ELEMENTS * sizeof(uint32_t)); for(int i = 0; i < ELEMENTS; i++) { From 0fc9a061153718d37b3289c2550b18916b1a59fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcel=20L=C3=BCtke=20Dreimann?= Date: Fri, 5 Aug 2022 13:05:28 +0200 Subject: [PATCH 14/14] dummy RPC --- repos/dde_uos-intel-gpgpu/src/gpgpu/main.cc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/repos/dde_uos-intel-gpgpu/src/gpgpu/main.cc b/repos/dde_uos-intel-gpgpu/src/gpgpu/main.cc index 045a24463d..b194ff0662 100644 --- a/repos/dde_uos-intel-gpgpu/src/gpgpu/main.cc +++ b/repos/dde_uos-intel-gpgpu/src/gpgpu/main.cc @@ -13,11 +13,16 @@ gpgpu_genode* _global_gpgpu_genode; +extern void construct_RPC(Genode::Env &env); + void Component::construct(Genode::Env& e) { Genode::log("Hello world: UOS Intel GPGPU!"); Genode::log("Build: ", __TIMESTAMP__); + construct_RPC(e); + return; + // init globals static gpgpu_genode gg(e); _global_gpgpu_genode = ≫