mirror of
https://github.com/mmueller41/genode.git
synced 2026-01-21 12:32:56 +01:00
shared gpu memory across cells
This commit is contained in:
@@ -23,6 +23,16 @@ struct Session_client : Genode::Rpc_client<Session>
|
|||||||
call<Rpc_register_vm>(size, ram_cap);
|
call<Rpc_register_vm>(size, ram_cap);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void register_shm(Genode::size_t size, Genode::Ram_dataspace_capability& ram_cap) override
|
||||||
|
{
|
||||||
|
call<Rpc_register_shm>(size, ram_cap);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ask_shm(int id, Genode::size_t &size, Genode::Ram_dataspace_capability& ram_cap) override
|
||||||
|
{
|
||||||
|
call<Rpc_ask_shm>(id, size, ram_cap);
|
||||||
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -14,6 +14,8 @@ struct Session : Genode::Session
|
|||||||
|
|
||||||
virtual void register_vm(Genode::size_t size, Genode::Ram_dataspace_capability& ram_cap) = 0;
|
virtual void register_vm(Genode::size_t size, Genode::Ram_dataspace_capability& ram_cap) = 0;
|
||||||
virtual void start_task(unsigned long kconf) = 0;
|
virtual void start_task(unsigned long kconf) = 0;
|
||||||
|
virtual void register_shm(Genode::size_t size, Genode::Ram_dataspace_capability& ram_cap) = 0;
|
||||||
|
virtual void ask_shm(int id, Genode::size_t &size, Genode::Ram_dataspace_capability& ram_cap) = 0;
|
||||||
|
|
||||||
/*******************
|
/*******************
|
||||||
** RPC interface **
|
** RPC interface **
|
||||||
@@ -21,9 +23,11 @@ struct Session : Genode::Session
|
|||||||
|
|
||||||
GENODE_RPC(Rpc_register_vm, void, register_vm, Genode::size_t, Genode::Ram_dataspace_capability&);
|
GENODE_RPC(Rpc_register_vm, void, register_vm, Genode::size_t, Genode::Ram_dataspace_capability&);
|
||||||
GENODE_RPC(Rpc_start_task, void, start_task, unsigned long);
|
GENODE_RPC(Rpc_start_task, void, start_task, unsigned long);
|
||||||
|
GENODE_RPC(Rpc_register_shm, void, register_shm, Genode::size_t, Genode::Ram_dataspace_capability&);
|
||||||
|
GENODE_RPC(Rpc_ask_shm, void, ask_shm, int, Genode::size_t&, Genode::Ram_dataspace_capability&);
|
||||||
|
|
||||||
|
|
||||||
GENODE_RPC_INTERFACE(Rpc_register_vm, Rpc_start_task);
|
GENODE_RPC_INTERFACE(Rpc_register_vm, Rpc_start_task, Rpc_register_shm, Rpc_ask_shm);
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,11 +1,13 @@
|
|||||||
#ifndef CONFIG_H
|
#ifndef CONFIG_H
|
||||||
#define CONFIG_H
|
#define CONFIG_H
|
||||||
|
|
||||||
#define QEMU_TEST
|
// #define QEMU_TEST
|
||||||
|
|
||||||
//#define VERBOSE
|
#define VERBOSE
|
||||||
|
|
||||||
#define SCHED_CFS
|
#define SCHED_CFS
|
||||||
//#define SCHED_RR // default
|
//#define SCHED_RR // default
|
||||||
|
|
||||||
|
#define MAX_SHM_REGIONS 32
|
||||||
|
|
||||||
#endif // CONFIG_H
|
#endif // CONFIG_H
|
||||||
Submodule repos/dde_uos-intel-gpgpu/src/uos-intel-gpgpu updated: 9849813237...6f4658c8e2
@@ -31,6 +31,49 @@ void Session_component::register_vm(Genode::size_t size, Genode::Ram_dataspace_c
|
|||||||
_global_sched->add_vgpu(&vgpu);
|
_global_sched->add_vgpu(&vgpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int SHM_manager::alloc_shm(Genode::size_t size, Genode::Ram_dataspace_capability& ram_cap_vm)
|
||||||
|
{
|
||||||
|
// get shared memory id
|
||||||
|
const int s = __atomic_fetch_add(&shid, 1, __ATOMIC_SEQ_CST);
|
||||||
|
|
||||||
|
if(s >= MAX_SHM_REGIONS)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
// alloc shared memory
|
||||||
|
Genode::addr_t mapped_base;
|
||||||
|
ram_cap[s] = _global_gpgpu_genode->allocRamCap(size, mapped_base, base[s]);
|
||||||
|
sizes[s] = size;
|
||||||
|
ram_cap_vm = ram_cap[s];
|
||||||
|
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
void SHM_manager::free_shm(int id)
|
||||||
|
{
|
||||||
|
_global_gpgpu_genode->freeRamCap(ram_cap[id]);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Session_component::register_shm(Genode::size_t size, Genode::Ram_dataspace_capability& ram_cap_vm)
|
||||||
|
{
|
||||||
|
// create shared mem
|
||||||
|
int shid = SHM_manager::getInstance().alloc_shm(size, ram_cap_vm);
|
||||||
|
vgpu.assignSHM(shid);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Session_component::ask_shm(int id, Genode::size_t &size, Genode::Ram_dataspace_capability& ram_cap_vm)
|
||||||
|
{
|
||||||
|
// get size
|
||||||
|
size = SHM_manager::getInstance().getSize(id);
|
||||||
|
if(size == 0) // invalid
|
||||||
|
return;
|
||||||
|
|
||||||
|
// get ram cap
|
||||||
|
ram_cap_vm = SHM_manager::getInstance().getCap(id);
|
||||||
|
|
||||||
|
// assign id to vgpu
|
||||||
|
vgpu.assignSHM(id);
|
||||||
|
}
|
||||||
|
|
||||||
void Session_component::start_task(unsigned long kconf)
|
void Session_component::start_task(unsigned long kconf)
|
||||||
{
|
{
|
||||||
// convert offset to driver virt addr
|
// convert offset to driver virt addr
|
||||||
@@ -44,7 +87,8 @@ void Session_component::start_task(unsigned long kconf)
|
|||||||
}
|
}
|
||||||
else // for pointer set phys addr
|
else // for pointer set phys addr
|
||||||
{
|
{
|
||||||
kc->buffConfigs[i].buffer = (void*)((Genode::addr_t)kc->buffConfigs[i].buffer + base);
|
const Genode::addr_t addrBase = kc->buffConfigs[i].shmid == -1 ? base : SHM_manager::getInstance().getBase(kc->buffConfigs[i].shmid);
|
||||||
|
kc->buffConfigs[i].buffer = (void*)((Genode::addr_t)kc->buffConfigs[i].buffer + addrBase);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
kc->kernelName = (char*)((Genode::addr_t)kc->kernelName + mapped_base);
|
kc->kernelName = (char*)((Genode::addr_t)kc->kernelName + mapped_base);
|
||||||
@@ -63,6 +107,7 @@ void Session_component::start_task(unsigned long kconf)
|
|||||||
for(int i = 0; i < kc->buffCount; i++)
|
for(int i = 0; i < kc->buffCount; i++)
|
||||||
{
|
{
|
||||||
Genode::log("\tBuffer ", i);
|
Genode::log("\tBuffer ", i);
|
||||||
|
Genode::log("\t\tshmid: ", (int)kc->buffConfigs[i].shmid);
|
||||||
if(kc->buffConfigs[i].non_pointer_type)
|
if(kc->buffConfigs[i].non_pointer_type)
|
||||||
{
|
{
|
||||||
Genode::log("\t\tvaddr: ", (void*)kc->buffConfigs[i].buffer);
|
Genode::log("\t\tvaddr: ", (void*)kc->buffConfigs[i].buffer);
|
||||||
@@ -72,7 +117,8 @@ void Session_component::start_task(unsigned long kconf)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
Genode::log("\t\tvaddr: ", (void*)((Genode::addr_t)kc->buffConfigs[i].buffer - base + mapped_base));
|
const Genode::addr_t addrBase = kc->buffConfigs[i].shmid == -1 ? base : SHM_manager::getInstance().getBase(kc->buffConfigs[i].shmid);
|
||||||
|
Genode::log("\t\tvaddr: ", (void*)((Genode::addr_t)kc->buffConfigs[i].buffer - addrBase));
|
||||||
Genode::log("\t\tpaddr: ", (void*)kc->buffConfigs[i].buffer);
|
Genode::log("\t\tpaddr: ", (void*)kc->buffConfigs[i].buffer);
|
||||||
//Genode::log("\t\tgpuaddr: ", (void*)((addr_t)kc->buffConfigs[i].ga)); // to print this, temporary make the var public
|
//Genode::log("\t\tgpuaddr: ", (void*)((addr_t)kc->buffConfigs[i].ga)); // to print this, temporary make the var public
|
||||||
//Genode::log("\t\tpos: ", (uint32_t)kc->buffConfigs[i].pos); // to print this, temporary make the var public
|
//Genode::log("\t\tpos: ", (uint32_t)kc->buffConfigs[i].pos); // to print this, temporary make the var public
|
||||||
|
|||||||
@@ -10,6 +10,28 @@
|
|||||||
namespace gpgpu_virt
|
namespace gpgpu_virt
|
||||||
{
|
{
|
||||||
|
|
||||||
|
class SHM_manager
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
Genode::Ram_dataspace_capability ram_cap[MAX_SHM_REGIONS];
|
||||||
|
Genode::addr_t base[MAX_SHM_REGIONS];
|
||||||
|
Genode::size_t sizes[MAX_SHM_REGIONS];
|
||||||
|
unsigned int shid;
|
||||||
|
SHM_manager() : ram_cap{}, base {0, }, sizes {0, }, shid(0) {};
|
||||||
|
|
||||||
|
public:
|
||||||
|
static SHM_manager &getInstance() {
|
||||||
|
static SHM_manager inst;
|
||||||
|
return inst;
|
||||||
|
}
|
||||||
|
|
||||||
|
int alloc_shm(Genode::size_t size, Genode::Ram_dataspace_capability& ram_cap);
|
||||||
|
void free_shm(int id);
|
||||||
|
Genode::Ram_dataspace_capability getCap(int id) const { return ram_cap[id]; }
|
||||||
|
Genode::addr_t getBase(int id) const { return base[id]; }
|
||||||
|
Genode::addr_t getSize(int id) const { return sizes[id]; }
|
||||||
|
};
|
||||||
|
|
||||||
struct Session_component : Genode::Rpc_object<Session>
|
struct Session_component : Genode::Rpc_object<Session>
|
||||||
{
|
{
|
||||||
VGpu vgpu;
|
VGpu vgpu;
|
||||||
@@ -25,6 +47,9 @@ struct Session_component : Genode::Rpc_object<Session>
|
|||||||
|
|
||||||
void start_task(unsigned long kconf) override;
|
void start_task(unsigned long kconf) override;
|
||||||
|
|
||||||
|
void register_shm(Genode::size_t size, Genode::Ram_dataspace_capability& ram_cap) override;
|
||||||
|
|
||||||
|
void ask_shm(int id, Genode::size_t& size, Genode::Ram_dataspace_capability& ram_cap) override;
|
||||||
};
|
};
|
||||||
|
|
||||||
class Root_component
|
class Root_component
|
||||||
|
|||||||
@@ -33,11 +33,32 @@ namespace gpgpu_virt {
|
|||||||
/// priority of vgpu
|
/// priority of vgpu
|
||||||
int prio;
|
int prio;
|
||||||
|
|
||||||
|
/// assigned shared memory regions
|
||||||
|
int shm_ids[MAX_SHM_REGIONS];
|
||||||
|
|
||||||
|
/// local shm id counter
|
||||||
|
int curr_shm_id;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
/**
|
/**
|
||||||
* @brief Construct a new VGpu object
|
* @brief Construct a new VGpu object
|
||||||
*/
|
*/
|
||||||
VGpu() : ctx(nullptr), ready_list(), prio(-1) {}
|
VGpu() : ctx(nullptr), ready_list(), prio(-1), curr_shm_id(0) {}
|
||||||
|
|
||||||
|
void assignSHM(int id)
|
||||||
|
{
|
||||||
|
shm_ids[curr_shm_id++] = id;
|
||||||
|
}
|
||||||
|
|
||||||
|
void removeSHM(int id)
|
||||||
|
{
|
||||||
|
for(int i = 0; i < MAX_SHM_REGIONS; ++i)
|
||||||
|
{
|
||||||
|
if (shm_ids[i] == id){
|
||||||
|
shm_ids[i] = -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Set the Priority
|
* @brief Set the Priority
|
||||||
|
|||||||
@@ -62,7 +62,7 @@ append config {
|
|||||||
<start name="hello_gpgpu">
|
<start name="hello_gpgpu">
|
||||||
<binary name="hello_gpgpu"/>
|
<binary name="hello_gpgpu"/>
|
||||||
<resource name="RAM" quantum="1024M"/>
|
<resource name="RAM" quantum="1024M"/>
|
||||||
<config bench="1">
|
<config bench="2097152">
|
||||||
<vfs> <dir name="dev"> <log/> <inline name="rtc">2022-07-20 14:30</inline> </dir> </vfs>
|
<vfs> <dir name="dev"> <log/> <inline name="rtc">2022-07-20 14:30</inline> </dir> </vfs>
|
||||||
<libc stdout="/dev/log" stderr="/dev/log" rtc="/dev/rtc"/>
|
<libc stdout="/dev/log" stderr="/dev/log" rtc="/dev/rtc"/>
|
||||||
</config>
|
</config>
|
||||||
|
|||||||
103
repos/hello_gpgpu/run/shm_gpgpu.run
Normal file
103
repos/hello_gpgpu/run/shm_gpgpu.run
Normal file
@@ -0,0 +1,103 @@
|
|||||||
|
# build config
|
||||||
|
build { core init gpgpu timer producer consumer1 consumer2 }
|
||||||
|
|
||||||
|
# platform config
|
||||||
|
set use_acpica_as_acpi_drv 0
|
||||||
|
source ${genode_dir}/repos/base/run/platform_drv.inc
|
||||||
|
proc platform_drv_policy {} {
|
||||||
|
global use_acpica_as_acpi_drv
|
||||||
|
set policy ""
|
||||||
|
|
||||||
|
append_if $use_acpica_as_acpi_drv policy {
|
||||||
|
<policy label="acpi_drv -> "> <pci class="ALL"/> </policy>}
|
||||||
|
|
||||||
|
append policy {
|
||||||
|
<policy label_prefix="gpgpu"> <pci class="ALL"/> </policy>}
|
||||||
|
append policy {
|
||||||
|
<policy label_prefix="producer"> <pci class="ALL"/> </policy>}
|
||||||
|
append policy {
|
||||||
|
<policy label_prefix="consumer1"> <pci class="ALL"/> </policy>}
|
||||||
|
append policy {
|
||||||
|
<policy label_prefix="consumer2"> <pci class="ALL"/> </policy>}
|
||||||
|
|
||||||
|
return $policy
|
||||||
|
}
|
||||||
|
append_platform_drv_build_components
|
||||||
|
build $build_components
|
||||||
|
|
||||||
|
# boot dir
|
||||||
|
create_boot_directory
|
||||||
|
|
||||||
|
# other config
|
||||||
|
append config {
|
||||||
|
<config>
|
||||||
|
<parent-provides>
|
||||||
|
<service name="ROM"/>
|
||||||
|
<service name="IRQ"/>
|
||||||
|
<service name="IO_MEM"/>
|
||||||
|
<service name="PD"/>
|
||||||
|
<service name="RM"/>
|
||||||
|
<service name="CPU"/>
|
||||||
|
<service name="LOG"/>
|
||||||
|
<service name="RAM"/>
|
||||||
|
<service name="CAP"/>
|
||||||
|
<service name="TOPO"/>
|
||||||
|
</parent-provides>
|
||||||
|
<default-route>
|
||||||
|
<any-service> <parent/> <any-child/> </any-service>
|
||||||
|
</default-route>
|
||||||
|
<default caps="200"/>
|
||||||
|
<start name="timer">
|
||||||
|
<resource name="RAM" quantum="1M"/>
|
||||||
|
<provides><service name="Timer"/></provides>
|
||||||
|
<route>
|
||||||
|
<any-service><parent/><any-child/></any-service>
|
||||||
|
</route>
|
||||||
|
</start>
|
||||||
|
}
|
||||||
|
|
||||||
|
append_platform_drv_config
|
||||||
|
|
||||||
|
append config {
|
||||||
|
<start name="gpgpu" priority="0">
|
||||||
|
<provides> <service name="gpgpu"/> </provides>
|
||||||
|
<resource name="RAM" quantum="12G"/>
|
||||||
|
</start>
|
||||||
|
<start name="producer">
|
||||||
|
<binary name="producer"/>
|
||||||
|
<resource name="RAM" quantum="512M"/>
|
||||||
|
<config>
|
||||||
|
<vfs> <dir name="dev"> <log/> <inline name="rtc">2022-07-20 14:30</inline> </dir> </vfs>
|
||||||
|
<libc stdout="/dev/log" stderr="/dev/log" rtc="/dev/rtc"/>
|
||||||
|
</config>
|
||||||
|
</start>
|
||||||
|
<start name="consumer (high)">
|
||||||
|
<binary name="consumer1"/>
|
||||||
|
<resource name="RAM" quantum="512M"/>
|
||||||
|
<config>
|
||||||
|
<vfs> <dir name="dev"> <log/> <inline name="rtc">2022-07-20 14:30</inline> </dir> </vfs>
|
||||||
|
<libc stdout="/dev/log" stderr="/dev/log" rtc="/dev/rtc"/>
|
||||||
|
</config>
|
||||||
|
</start>
|
||||||
|
<start name="consumer (low)">
|
||||||
|
<binary name="consumer2"/>
|
||||||
|
<resource name="RAM" quantum="512M"/>
|
||||||
|
<config >
|
||||||
|
<vfs> <dir name="dev"> <log/> <inline name="rtc">2022-07-20 14:30</inline> </dir> </vfs>
|
||||||
|
<libc stdout="/dev/log" stderr="/dev/log" rtc="/dev/rtc"/>
|
||||||
|
</config>
|
||||||
|
</start>
|
||||||
|
</config>}
|
||||||
|
|
||||||
|
install_config $config
|
||||||
|
|
||||||
|
# boot modules
|
||||||
|
set boot_modules {
|
||||||
|
core ld.lib.so libc.lib.so vfs.lib.so libm.lib.so init gpgpu timer producer consumer1 consumer2
|
||||||
|
}
|
||||||
|
append_platform_drv_boot_modules
|
||||||
|
build_boot_image $boot_modules
|
||||||
|
|
||||||
|
# qemu stuff
|
||||||
|
append qemu_args " -nographic -m 24G"
|
||||||
|
run_genode_until forever
|
||||||
1233
repos/hello_gpgpu/src/consumer1/OpenSurf.cpp
Normal file
1233
repos/hello_gpgpu/src/consumer1/OpenSurf.cpp
Normal file
File diff suppressed because it is too large
Load Diff
20554
repos/hello_gpgpu/src/consumer1/SURF_noSLM_fixed_kernel.h
Normal file
20554
repos/hello_gpgpu/src/consumer1/SURF_noSLM_fixed_kernel.h
Normal file
File diff suppressed because it is too large
Load Diff
4818
repos/hello_gpgpu/src/consumer1/frac_320_240.h
Normal file
4818
repos/hello_gpgpu/src/consumer1/frac_320_240.h
Normal file
File diff suppressed because it is too large
Load Diff
99
repos/hello_gpgpu/src/consumer1/main.cc
Normal file
99
repos/hello_gpgpu/src/consumer1/main.cc
Normal file
@@ -0,0 +1,99 @@
|
|||||||
|
#include <base/log.h>
|
||||||
|
#include <base/heap.h>
|
||||||
|
#include <base/allocator_avl.h>
|
||||||
|
#include <base/attached_rom_dataspace.h>
|
||||||
|
|
||||||
|
#include <libc/component.h>
|
||||||
|
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <time.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
// OpenCL
|
||||||
|
#define CL_TARGET_OPENCL_VERSION 100
|
||||||
|
#include "../hello_gpgpu/CL/cl.h"
|
||||||
|
|
||||||
|
// rpc
|
||||||
|
#include <gpgpu_virt/connection.h>
|
||||||
|
|
||||||
|
// stupid alloc
|
||||||
|
#include "../hello_gpgpu/allocator_stupid.h"
|
||||||
|
|
||||||
|
namespace ns_OpenSurf{int main(int argc, char *argv[]);};
|
||||||
|
|
||||||
|
struct consumer_conv
|
||||||
|
{
|
||||||
|
Genode::Env &env;
|
||||||
|
gpgpu_virt::Connection backend_driver;
|
||||||
|
Genode::Allocator_stupid allocator;
|
||||||
|
|
||||||
|
const unsigned long size = 0x40000000;
|
||||||
|
cl_genode clg;
|
||||||
|
Genode::Ram_dataspace_capability vgpu_mem_ram_cap;
|
||||||
|
Genode::Ram_dataspace_capability vgpu_shm_ram_cap;
|
||||||
|
|
||||||
|
volatile uint8_t *ready;
|
||||||
|
const unsigned long img_size = 320 * 240 * sizeof(float);
|
||||||
|
volatile float *data;
|
||||||
|
|
||||||
|
void init()
|
||||||
|
{
|
||||||
|
Genode::log("===Init Consumer Surf===");
|
||||||
|
clInitGenode(clg);
|
||||||
|
|
||||||
|
// register vgpu (optional?)
|
||||||
|
const unsigned long size_vgpu_mem = 0x1000;
|
||||||
|
backend_driver.register_vm(size_vgpu_mem, vgpu_mem_ram_cap);
|
||||||
|
|
||||||
|
// create shm for gpu
|
||||||
|
const unsigned long id = 0;
|
||||||
|
Genode::size_t total_size = 0;
|
||||||
|
while (total_size == 0)
|
||||||
|
{
|
||||||
|
backend_driver.ask_shm(id, total_size, vgpu_shm_ram_cap);
|
||||||
|
}
|
||||||
|
|
||||||
|
// attach shm to vm
|
||||||
|
Genode::addr_t mapped_base = env.rm().attach(vgpu_shm_ram_cap);
|
||||||
|
clg.add_shm_mapped_base(id, mapped_base);
|
||||||
|
|
||||||
|
// use it in allocator
|
||||||
|
allocator.add_range(mapped_base, total_size);
|
||||||
|
|
||||||
|
// alloc whole data
|
||||||
|
ready = (uint8_t *)allocator.alloc(1);
|
||||||
|
data = (float *)allocator.alloc_aligned(0x10000, img_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
void run()
|
||||||
|
{
|
||||||
|
Genode::log("===Run Consumer Surf===");
|
||||||
|
|
||||||
|
Libc::with_libc([&]
|
||||||
|
{
|
||||||
|
for(;;)
|
||||||
|
{
|
||||||
|
while (*ready != 0x42);
|
||||||
|
|
||||||
|
ns_OpenSurf::main(2, (char**)data);
|
||||||
|
|
||||||
|
//Genode::log(data[0]);
|
||||||
|
sleep(3);
|
||||||
|
} });
|
||||||
|
|
||||||
|
Genode::log("===End===");
|
||||||
|
Genode::log("Consumer Surf completed");
|
||||||
|
}
|
||||||
|
|
||||||
|
consumer_conv(Genode::Env &e) : env(e), backend_driver(env), allocator(), clg(env, size), ready(nullptr), data(nullptr)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
void Libc::Component::construct(Libc::Env &env)
|
||||||
|
{
|
||||||
|
static consumer_conv p(env);
|
||||||
|
p.init();
|
||||||
|
p.run();
|
||||||
|
}
|
||||||
9
repos/hello_gpgpu/src/consumer1/target.mk
Normal file
9
repos/hello_gpgpu/src/consumer1/target.mk
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
TARGET = consumer1
|
||||||
|
SRC_CC = main.cc \
|
||||||
|
OpenSurf.cpp \
|
||||||
|
../hello_gpgpu/CL/cl.cc ../hello_gpgpu/CL/cl_genode.cc \
|
||||||
|
../hello_gpgpu/allocator_stupid.cc
|
||||||
|
|
||||||
|
LIBS = base libc libm
|
||||||
|
|
||||||
|
CC_CXX_WARN_STRICT =
|
||||||
326
repos/hello_gpgpu/src/consumer2/2DConvolution.cc
Normal file
326
repos/hello_gpgpu/src/consumer2/2DConvolution.cc
Normal file
@@ -0,0 +1,326 @@
|
|||||||
|
/**
|
||||||
|
* 2DConvolution.c: This file is part of the PolyBench/GPU 1.0 test suite.
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* Contact: Scott Grauer-Gray <sgrauerg@gmail.com>
|
||||||
|
* Will Killian <killian@udel.edu>
|
||||||
|
* Louis-Noel Pouchet <pouchet@cse.ohio-state.edu>
|
||||||
|
* Web address: http://www.cse.ohio-state.edu/~pouchet/software/polybench/GPU
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <time.h>
|
||||||
|
#include <sys/time.h>
|
||||||
|
#include <math.h>
|
||||||
|
|
||||||
|
#define CL_TARGET_OPENCL_VERSION 100
|
||||||
|
#ifdef __APPLE__
|
||||||
|
#include <OpenCL/opencl.h>
|
||||||
|
#else
|
||||||
|
#include "../hello_gpgpu/CL/cl.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define POLYBENCH_TIME 1
|
||||||
|
|
||||||
|
//select the OpenCL device to use (can be GPU, CPU, or Accelerator such as Intel Xeon Phi)
|
||||||
|
#define OPENCL_DEVICE_SELECTION CL_DEVICE_TYPE_GPU
|
||||||
|
|
||||||
|
#include "../hello_gpgpu/benchmark/convolution-2d/2DConvolution.h"
|
||||||
|
#include "../hello_gpgpu/polybench.h"
|
||||||
|
|
||||||
|
//define the error threshold for the results "not matching"
|
||||||
|
#define PERCENT_DIFF_ERROR_THRESHOLD 1.05
|
||||||
|
|
||||||
|
#define MAX_SOURCE_SIZE (0x100000)
|
||||||
|
|
||||||
|
#if defined(cl_khr_fp64) // Khronos extension available?
|
||||||
|
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||||
|
#elif defined(cl_amd_fp64) // AMD extension available?
|
||||||
|
#pragma OPENCL EXTENSION cl_amd_fp64 : enable
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace ns_convolution_2d {
|
||||||
|
|
||||||
|
#include "../hello_gpgpu/benchmark/convolution-2d/2DConvolution_kernel.h"
|
||||||
|
#include "../hello_gpgpu/polybenchUtilFuncts.h"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
char str_temp[1024];
|
||||||
|
|
||||||
|
cl_platform_id platform_id;
|
||||||
|
cl_device_id device_id;
|
||||||
|
cl_uint num_devices;
|
||||||
|
cl_uint num_platforms;
|
||||||
|
cl_int errcode;
|
||||||
|
cl_context clGPUContext;
|
||||||
|
cl_kernel clKernel;
|
||||||
|
cl_command_queue clCommandQue;
|
||||||
|
cl_program clProgram;
|
||||||
|
cl_mem a_mem_obj;
|
||||||
|
cl_mem b_mem_obj;
|
||||||
|
cl_mem c_mem_obj;
|
||||||
|
FILE *fp;
|
||||||
|
char *source_str;
|
||||||
|
size_t source_size;
|
||||||
|
|
||||||
|
// patch config for consumer2
|
||||||
|
#undef NI
|
||||||
|
#undef NJ
|
||||||
|
#define NI 320
|
||||||
|
#define NJ 240
|
||||||
|
|
||||||
|
void compareResults(int ni, int nj, DATA_TYPE POLYBENCH_2D(B, NI, NJ, ni, nj), DATA_TYPE POLYBENCH_2D(B_outputFromGpu, NI, NJ, ni, nj))
|
||||||
|
{
|
||||||
|
int i, j, fail;
|
||||||
|
fail = 0;
|
||||||
|
|
||||||
|
// Compare outputs from CPU and GPU
|
||||||
|
for (i=1; i < (ni-1); i++)
|
||||||
|
{
|
||||||
|
for (j=1; j < (nj-1); j++)
|
||||||
|
{
|
||||||
|
if (percentDiff(B[i][j], B_outputFromGpu[i][j]) > PERCENT_DIFF_ERROR_THRESHOLD)
|
||||||
|
{
|
||||||
|
fail++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Print results
|
||||||
|
printf("Non-Matching CPU-GPU Outputs Beyond Error Threshold of %4.2f Percent: %d\n", PERCENT_DIFF_ERROR_THRESHOLD, fail);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void read_cl_file()
|
||||||
|
{
|
||||||
|
// Load the kernel source code into the array source_str
|
||||||
|
// fp = fopen("2DConvolution.cl", "r");
|
||||||
|
// if (!fp) {
|
||||||
|
// fprintf(stdout, "Failed to load kernel.\n");
|
||||||
|
// exit(1);
|
||||||
|
// }
|
||||||
|
// source_str = (char*)malloc(MAX_SOURCE_SIZE);
|
||||||
|
// source_size = fread( source_str, 1, MAX_SOURCE_SIZE, fp);
|
||||||
|
// fclose( fp );
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void init(int ni, int nj, DATA_TYPE POLYBENCH_2D(A, NI, NJ, ni, nj))
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
|
||||||
|
for (i = 0; i < ni; ++i)
|
||||||
|
{
|
||||||
|
for (j = 0; j < nj; ++j)
|
||||||
|
{
|
||||||
|
A[i][j] = (float)rand()/RAND_MAX;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void cl_initialization()
|
||||||
|
{
|
||||||
|
|
||||||
|
// Get platform and device information
|
||||||
|
errcode = clGetPlatformIDs(1, &platform_id, &num_platforms);
|
||||||
|
if(errcode == CL_SUCCESS) printf("number of platforms is %d\n",num_platforms);
|
||||||
|
else printf("Error getting platform IDs\n");
|
||||||
|
|
||||||
|
errcode = clGetPlatformInfo(platform_id,CL_PLATFORM_NAME, sizeof(str_temp), str_temp,NULL);
|
||||||
|
if(errcode == CL_SUCCESS) printf("platform name is %s\n",str_temp);
|
||||||
|
else printf("Error getting platform name\n");
|
||||||
|
|
||||||
|
errcode = clGetPlatformInfo(platform_id, CL_PLATFORM_VERSION, sizeof(str_temp), str_temp,NULL);
|
||||||
|
if(errcode == CL_SUCCESS) printf("platform version is %s\n",str_temp);
|
||||||
|
else printf("Error getting platform version\n");
|
||||||
|
|
||||||
|
errcode = clGetDeviceIDs( platform_id, OPENCL_DEVICE_SELECTION, 1, &device_id, &num_devices);
|
||||||
|
if(errcode == CL_SUCCESS) printf("number of devices is %d\n", num_devices);
|
||||||
|
else printf("Error getting device IDs\n");
|
||||||
|
|
||||||
|
errcode = clGetDeviceInfo(device_id,CL_DEVICE_NAME, sizeof(str_temp), str_temp,NULL);
|
||||||
|
if(errcode == CL_SUCCESS) printf("device name is %s\n",str_temp);
|
||||||
|
else printf("Error getting device name\n");
|
||||||
|
|
||||||
|
// Create an OpenCL context
|
||||||
|
clGPUContext = clCreateContext( NULL, 1, &device_id, NULL, NULL, &errcode);
|
||||||
|
if(errcode != CL_SUCCESS) printf("Error in creating context\n");
|
||||||
|
|
||||||
|
//Create a command-queue
|
||||||
|
clCommandQue = clCreateCommandQueue(clGPUContext, device_id, 0, &errcode);
|
||||||
|
if(errcode != CL_SUCCESS) printf("Error in creating command queue\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void cl_mem_init(DATA_TYPE POLYBENCH_2D(A, NI, NJ, ni, nj))
|
||||||
|
{
|
||||||
|
a_mem_obj = clCreateBufferSHM(clGPUContext, CL_MEM_READ_ONLY, sizeof(DATA_TYPE) * NI * NJ, A, &errcode, 0);
|
||||||
|
b_mem_obj = clCreateBuffer(clGPUContext, CL_MEM_READ_WRITE, sizeof(DATA_TYPE) * NI * NJ, NULL, &errcode);
|
||||||
|
|
||||||
|
if(errcode != CL_SUCCESS) printf("Error in creating buffers\n");
|
||||||
|
|
||||||
|
//errcode = clEnqueueWriteBuffer(clCommandQue, a_mem_obj, CL_TRUE, 0, sizeof(DATA_TYPE) * NI * NJ, A, 0, NULL, NULL);
|
||||||
|
//if(errcode != CL_SUCCESS)printf("Error in writing buffers\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void cl_load_prog()
|
||||||
|
{
|
||||||
|
// Create a program from the kernel source
|
||||||
|
const size_t kernel_size = __2DConvolution_Gen9core_gen_len;
|
||||||
|
const unsigned char* kernel_bin = __2DConvolution_Gen9core_gen;
|
||||||
|
clProgram = clCreateProgramWithBinary(clGPUContext, 1, &device_id, &kernel_size, &kernel_bin, NULL, &errcode);
|
||||||
|
// clProgram = clCreateProgramWithSource(clGPUContext, 1, (const char **)&source_str, (const size_t *)&source_size, &errcode);
|
||||||
|
|
||||||
|
if(errcode != CL_SUCCESS) printf("Error in creating program\n");
|
||||||
|
|
||||||
|
// Build the program
|
||||||
|
errcode = clBuildProgram(clProgram, 1, &device_id, NULL, NULL, NULL);
|
||||||
|
if(errcode != CL_SUCCESS) printf("Error in building program\n");
|
||||||
|
|
||||||
|
// Create the OpenCL kernel
|
||||||
|
clKernel = clCreateKernel(clProgram, "Convolution2D_kernel", &errcode);
|
||||||
|
if(errcode != CL_SUCCESS) printf("Error in creating kernel\n");
|
||||||
|
|
||||||
|
clFinish(clCommandQue);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void cl_launch_kernel(int ni, int nj)
|
||||||
|
{
|
||||||
|
size_t localWorkSize[2], globalWorkSize[2];
|
||||||
|
localWorkSize[0] = DIM_LOCAL_WORK_GROUP_X;
|
||||||
|
localWorkSize[1] = DIM_LOCAL_WORK_GROUP_Y;
|
||||||
|
globalWorkSize[0] = (size_t)ceil(((float)NI) / ((float)DIM_LOCAL_WORK_GROUP_X)) * DIM_LOCAL_WORK_GROUP_X;
|
||||||
|
globalWorkSize[1] = (size_t)ceil(((float)NJ) / ((float)DIM_LOCAL_WORK_GROUP_Y)) * DIM_LOCAL_WORK_GROUP_Y;
|
||||||
|
|
||||||
|
/* Start timer. */
|
||||||
|
polybench_start_instruments;
|
||||||
|
|
||||||
|
// Set the arguments of the kernel
|
||||||
|
errcode = clSetKernelArg(clKernel, 0, sizeof(cl_mem), (void *)&a_mem_obj);
|
||||||
|
errcode |= clSetKernelArg(clKernel, 1, sizeof(cl_mem), (void *)&b_mem_obj);
|
||||||
|
errcode = clSetKernelArg(clKernel, 2, sizeof(int), &ni);
|
||||||
|
errcode |= clSetKernelArg(clKernel, 3, sizeof(int), &nj);
|
||||||
|
|
||||||
|
if(errcode != CL_SUCCESS) printf("Error in seting arguments\n");
|
||||||
|
// Execute the OpenCL kernel
|
||||||
|
errcode = clEnqueueNDRangeKernel(clCommandQue, clKernel, 2, NULL, globalWorkSize, localWorkSize, 0, NULL, NULL);
|
||||||
|
|
||||||
|
if(errcode != CL_SUCCESS) printf("Error in launching kernel\n");
|
||||||
|
clFinish(clCommandQue);
|
||||||
|
|
||||||
|
/* Stop and print timer. */
|
||||||
|
polybench_stop_instruments;
|
||||||
|
printf("GPU Time in seconds:\n");
|
||||||
|
polybench_print_instruments;
|
||||||
|
}
|
||||||
|
|
||||||
|
void cl_clean_up()
|
||||||
|
{
|
||||||
|
// Clean up
|
||||||
|
errcode = clFlush(clCommandQue);
|
||||||
|
errcode = clFinish(clCommandQue);
|
||||||
|
errcode = clReleaseKernel(clKernel);
|
||||||
|
errcode = clReleaseProgram(clProgram);
|
||||||
|
errcode = clReleaseMemObject(a_mem_obj);
|
||||||
|
errcode = clReleaseMemObject(b_mem_obj);
|
||||||
|
errcode = clReleaseCommandQueue(clCommandQue);
|
||||||
|
errcode = clReleaseContext(clGPUContext);
|
||||||
|
if(errcode != CL_SUCCESS) printf("Error in cleanup\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void conv2D(int ni, int nj, DATA_TYPE POLYBENCH_2D(A, NI, NJ, ni, nj), DATA_TYPE POLYBENCH_2D(B, NI, NJ, ni, nj))
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
DATA_TYPE c11, c12, c13, c21, c22, c23, c31, c32, c33;
|
||||||
|
|
||||||
|
c11 = +0.2; c21 = +0.5; c31 = -0.8;
|
||||||
|
c12 = -0.3; c22 = +0.6; c32 = -0.9;
|
||||||
|
c13 = +0.4; c23 = +0.7; c33 = +0.10;
|
||||||
|
|
||||||
|
|
||||||
|
for (i = 1; i < _PB_NI - 1; ++i) // 0
|
||||||
|
{
|
||||||
|
for (j = 1; j < _PB_NJ - 1; ++j) // 1
|
||||||
|
{
|
||||||
|
B[i][j] = c11 * A[(i - 1)][(j - 1)] + c12 * A[(i + 0)][(j - 1)] + c13 * A[(i + 1)][(j - 1)]
|
||||||
|
+ c21 * A[(i - 1)][(j + 0)] + c22 * A[(i + 0)][(j + 0)] + c23 * A[(i + 1)][(j + 0)]
|
||||||
|
+ c31 * A[(i - 1)][(j + 1)] + c32 * A[(i + 0)][(j + 1)] + c33 * A[(i + 1)][(j + 1)];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* DCE code. Must scan the entire live-out data.
|
||||||
|
Can be used also to check the correctness of the output. */
|
||||||
|
static
|
||||||
|
void print_array(int ni, int nj,
|
||||||
|
DATA_TYPE POLYBENCH_2D(B,NI,NJ,ni,nj))
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
|
||||||
|
for (i = 0; i < ni; i++)
|
||||||
|
for (j = 0; j < nj; j++) {
|
||||||
|
fprintf (stderr, DATA_PRINTF_MODIFIER, B[i][j]);
|
||||||
|
if ((i * ni + j) % 20 == 0) fprintf (stderr, "\n");
|
||||||
|
}
|
||||||
|
fprintf (stderr, "\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
/* Retrieve problem size */
|
||||||
|
int ni = NI;
|
||||||
|
int nj = NJ;
|
||||||
|
|
||||||
|
POLYBENCH_2D_ARRAY_DECL(A,DATA_TYPE,NI,NJ,ni,nj);
|
||||||
|
POLYBENCH_2D_ARRAY_DECL(B,DATA_TYPE,NI,NJ,ni,nj);
|
||||||
|
POLYBENCH_2D_ARRAY_DECL(B_outputFromGpu,DATA_TYPE,NI,NJ,ni,nj);
|
||||||
|
|
||||||
|
//init(ni, nj, POLYBENCH_ARRAY(A));
|
||||||
|
A = (float(*)[NI][NJ]) argv;
|
||||||
|
|
||||||
|
read_cl_file();
|
||||||
|
cl_initialization();
|
||||||
|
cl_mem_init(POLYBENCH_ARRAY(A));
|
||||||
|
cl_load_prog();
|
||||||
|
|
||||||
|
cl_launch_kernel(ni, nj);
|
||||||
|
|
||||||
|
errcode = clEnqueueReadBuffer(clCommandQue, b_mem_obj, CL_TRUE, 0, NI*NJ*sizeof(DATA_TYPE), POLYBENCH_ARRAY(B_outputFromGpu), 0, NULL, NULL);
|
||||||
|
if(errcode != CL_SUCCESS) printf("Error in reading GPU mem\n");
|
||||||
|
|
||||||
|
#ifdef RUN_ON_CPU
|
||||||
|
|
||||||
|
/* Start timer. */
|
||||||
|
polybench_start_instruments;
|
||||||
|
|
||||||
|
conv2D(ni, nj, POLYBENCH_ARRAY(A), POLYBENCH_ARRAY(B));
|
||||||
|
|
||||||
|
/* Stop and print timer. */
|
||||||
|
printf("CPU Time in seconds:\n");
|
||||||
|
polybench_stop_instruments;
|
||||||
|
polybench_print_instruments;
|
||||||
|
|
||||||
|
compareResults(ni, nj, POLYBENCH_ARRAY(B), POLYBENCH_ARRAY(B_outputFromGpu));
|
||||||
|
|
||||||
|
#else //prevent dead code elimination
|
||||||
|
|
||||||
|
polybench_prevent_dce(print_array(ni, nj, POLYBENCH_ARRAY(B_outputFromGpu)));
|
||||||
|
|
||||||
|
#endif //RUN_ON_CPU
|
||||||
|
|
||||||
|
cl_clean_up();
|
||||||
|
|
||||||
|
//POLYBENCH_FREE_ARRAY(A);
|
||||||
|
POLYBENCH_FREE_ARRAY(B);
|
||||||
|
POLYBENCH_FREE_ARRAY(B_outputFromGpu);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
99
repos/hello_gpgpu/src/consumer2/main.cc
Normal file
99
repos/hello_gpgpu/src/consumer2/main.cc
Normal file
@@ -0,0 +1,99 @@
|
|||||||
|
#include <base/log.h>
|
||||||
|
#include <base/heap.h>
|
||||||
|
#include <base/allocator_avl.h>
|
||||||
|
#include <base/attached_rom_dataspace.h>
|
||||||
|
|
||||||
|
#include <libc/component.h>
|
||||||
|
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <time.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
// OpenCL
|
||||||
|
#define CL_TARGET_OPENCL_VERSION 100
|
||||||
|
#include "../hello_gpgpu/CL/cl.h"
|
||||||
|
|
||||||
|
// rpc
|
||||||
|
#include <gpgpu_virt/connection.h>
|
||||||
|
|
||||||
|
// stupid alloc
|
||||||
|
#include "../hello_gpgpu/allocator_stupid.h"
|
||||||
|
|
||||||
|
namespace ns_convolution_2d{int main(int argc, char *argv[]);};
|
||||||
|
|
||||||
|
struct consumer_conv
|
||||||
|
{
|
||||||
|
Genode::Env &env;
|
||||||
|
gpgpu_virt::Connection backend_driver;
|
||||||
|
Genode::Allocator_stupid allocator;
|
||||||
|
|
||||||
|
const unsigned long size = 0x40000000;
|
||||||
|
cl_genode clg;
|
||||||
|
Genode::Ram_dataspace_capability vgpu_mem_ram_cap;
|
||||||
|
Genode::Ram_dataspace_capability vgpu_shm_ram_cap;
|
||||||
|
|
||||||
|
volatile uint8_t *ready;
|
||||||
|
const unsigned long img_size = 320 * 240 * sizeof(float);
|
||||||
|
volatile float *data;
|
||||||
|
|
||||||
|
void init()
|
||||||
|
{
|
||||||
|
Genode::log("===Init Consumer Conv===");
|
||||||
|
clInitGenode(clg);
|
||||||
|
|
||||||
|
// register vgpu (optional?)
|
||||||
|
const unsigned long size_vgpu_mem = 0x1000;
|
||||||
|
backend_driver.register_vm(size_vgpu_mem, vgpu_mem_ram_cap);
|
||||||
|
|
||||||
|
// create shm for gpu
|
||||||
|
const unsigned long id = 0;
|
||||||
|
Genode::size_t total_size = 0;
|
||||||
|
while (total_size == 0)
|
||||||
|
{
|
||||||
|
backend_driver.ask_shm(id, total_size, vgpu_shm_ram_cap);
|
||||||
|
}
|
||||||
|
|
||||||
|
// attach shm to vm
|
||||||
|
Genode::addr_t mapped_base = env.rm().attach(vgpu_shm_ram_cap);
|
||||||
|
clg.add_shm_mapped_base(id, mapped_base);
|
||||||
|
|
||||||
|
// use it in allocator
|
||||||
|
allocator.add_range(mapped_base, total_size);
|
||||||
|
|
||||||
|
// alloc whole data
|
||||||
|
ready = (uint8_t *)allocator.alloc(1);
|
||||||
|
data = (float *)allocator.alloc_aligned(0x10000, img_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
void run()
|
||||||
|
{
|
||||||
|
Genode::log("===Run Consumer Conv===");
|
||||||
|
|
||||||
|
Libc::with_libc([&]
|
||||||
|
{
|
||||||
|
for(;;)
|
||||||
|
{
|
||||||
|
while (*ready != 0x42);
|
||||||
|
|
||||||
|
ns_convolution_2d::main(2, (char**)data);
|
||||||
|
|
||||||
|
//Genode::log(data[0]);
|
||||||
|
sleep(3);
|
||||||
|
} });
|
||||||
|
|
||||||
|
Genode::log("===End===");
|
||||||
|
Genode::log("Consumer Conv completed");
|
||||||
|
}
|
||||||
|
|
||||||
|
consumer_conv(Genode::Env &e) : env(e), backend_driver(env), allocator(), clg(env, size), ready(nullptr), data(nullptr)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
void Libc::Component::construct(Libc::Env &env)
|
||||||
|
{
|
||||||
|
static consumer_conv p(env);
|
||||||
|
p.init();
|
||||||
|
p.run();
|
||||||
|
}
|
||||||
10
repos/hello_gpgpu/src/consumer2/target.mk
Normal file
10
repos/hello_gpgpu/src/consumer2/target.mk
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
TARGET = consumer2
|
||||||
|
SRC_CC = main.cc \
|
||||||
|
2DConvolution.cc \
|
||||||
|
../hello_gpgpu/polybench.cc \
|
||||||
|
../hello_gpgpu/CL/cl.cc ../hello_gpgpu/CL/cl_genode.cc \
|
||||||
|
../hello_gpgpu/allocator_stupid.cc
|
||||||
|
|
||||||
|
LIBS = base libc libm
|
||||||
|
|
||||||
|
CC_CXX_WARN_STRICT =
|
||||||
@@ -357,6 +357,26 @@ clCreateBuffer(cl_context context,
|
|||||||
clmem->bc.buffer = host_ptr;
|
clmem->bc.buffer = host_ptr;
|
||||||
clmem->bc.buffer_size = (uint32_t)size;
|
clmem->bc.buffer_size = (uint32_t)size;
|
||||||
clmem->bc.non_pointer_type = false;
|
clmem->bc.non_pointer_type = false;
|
||||||
|
clmem->bc.shmid = -1;
|
||||||
|
|
||||||
|
*errcode_ret |= CL_SUCCESS;
|
||||||
|
return clmem;
|
||||||
|
}
|
||||||
|
|
||||||
|
CL_API_ENTRY cl_mem CL_API_CALL
|
||||||
|
clCreateBufferSHM(cl_context context,
|
||||||
|
cl_mem_flags flags,
|
||||||
|
size_t size,
|
||||||
|
void * host_ptr,
|
||||||
|
cl_int * errcode_ret,
|
||||||
|
int shid)
|
||||||
|
{
|
||||||
|
cl_mem clmem = (cl_mem)g_cl_genode->alloc(sizeof(struct _cl_mem));
|
||||||
|
clmem->virt_vm = host_ptr;
|
||||||
|
clmem->bc.buffer = host_ptr;
|
||||||
|
clmem->bc.buffer_size = (uint32_t)size;
|
||||||
|
clmem->bc.non_pointer_type = false;
|
||||||
|
clmem->bc.shmid = shid;
|
||||||
|
|
||||||
*errcode_ret |= CL_SUCCESS;
|
*errcode_ret |= CL_SUCCESS;
|
||||||
return clmem;
|
return clmem;
|
||||||
|
|||||||
@@ -1088,6 +1088,14 @@ clCreateBuffer(cl_context context,
|
|||||||
void * host_ptr,
|
void * host_ptr,
|
||||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||||
|
clCreateBufferSHM(cl_context context,
|
||||||
|
cl_mem_flags flags,
|
||||||
|
size_t size,
|
||||||
|
void * host_ptr,
|
||||||
|
cl_int * errcode_ret,
|
||||||
|
int shid) CL_API_SUFFIX__VERSION_1_0;
|
||||||
|
|
||||||
#ifdef CL_VERSION_1_1
|
#ifdef CL_VERSION_1_1
|
||||||
|
|
||||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
#include "cl_genode.h"
|
#include "cl_genode.h"
|
||||||
|
|
||||||
cl_genode::cl_genode(Genode::Env& env, unsigned long size) : env(env), allocator(), mapped_base(0), backend_driver(env)
|
cl_genode::cl_genode(Genode::Env& env, unsigned long size) : env(env), allocator(), mapped_base(0), backend_driver(env), shm_mapped_base{0, }
|
||||||
{
|
{
|
||||||
// get shared memory with driver
|
// get shared memory with driver
|
||||||
Genode::Ram_dataspace_capability ram_cap;
|
Genode::Ram_dataspace_capability ram_cap;
|
||||||
@@ -38,7 +38,8 @@ void cl_genode::enqueue_task(struct kernel_config* kconf)
|
|||||||
// convert virt vm addr to offset
|
// convert virt vm addr to offset
|
||||||
for(int i = 0; i < kconf->buffCount; i++)
|
for(int i = 0; i < kconf->buffCount; i++)
|
||||||
{
|
{
|
||||||
kconf->buffConfigs[i].buffer = (void*)((Genode::addr_t)kconf->buffConfigs[i].buffer - mapped_base);
|
const Genode::addr_t mbase = kconf->buffConfigs[i].shmid == -1 ? mapped_base : shm_mapped_base[kconf->buffConfigs[i].shmid];
|
||||||
|
kconf->buffConfigs[i].buffer = (void*)((Genode::addr_t)kconf->buffConfigs[i].buffer - mbase);
|
||||||
}
|
}
|
||||||
kconf->buffConfigs = (struct buffer_config*)((Genode::addr_t)kconf->buffConfigs - mapped_base);
|
kconf->buffConfigs = (struct buffer_config*)((Genode::addr_t)kconf->buffConfigs - mapped_base);
|
||||||
kconf->kernelName = (char*)((Genode::addr_t)kconf->kernelName - mapped_base);
|
kconf->kernelName = (char*)((Genode::addr_t)kconf->kernelName - mapped_base);
|
||||||
@@ -55,3 +56,8 @@ void cl_genode::wait(struct kernel_config* kconf)
|
|||||||
asm("nop");
|
asm("nop");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void cl_genode::add_shm_mapped_base(int shmid, Genode::addr_t mbase)
|
||||||
|
{
|
||||||
|
shm_mapped_base[shmid] = mbase;
|
||||||
|
}
|
||||||
|
|||||||
@@ -17,6 +17,9 @@
|
|||||||
// driver
|
// driver
|
||||||
#include <gpgpu/gpgpu.h>
|
#include <gpgpu/gpgpu.h>
|
||||||
|
|
||||||
|
// config
|
||||||
|
#include "../../../../dde_uos-intel-gpgpu/src/config.h"
|
||||||
|
|
||||||
class cl_genode
|
class cl_genode
|
||||||
{
|
{
|
||||||
private:
|
private:
|
||||||
@@ -30,6 +33,9 @@ private:
|
|||||||
// rpc
|
// rpc
|
||||||
gpgpu_virt::Connection backend_driver;
|
gpgpu_virt::Connection backend_driver;
|
||||||
|
|
||||||
|
// shm mapped_bases
|
||||||
|
Genode::addr_t shm_mapped_base[MAX_SHM_REGIONS];
|
||||||
|
|
||||||
// do not allow copies
|
// do not allow copies
|
||||||
cl_genode(const cl_genode& copy) = delete;
|
cl_genode(const cl_genode& copy) = delete;
|
||||||
cl_genode& operator=(const cl_genode& src) = delete;
|
cl_genode& operator=(const cl_genode& src) = delete;
|
||||||
@@ -100,6 +106,14 @@ public:
|
|||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
void reset() { allocator.reset(); }
|
void reset() { allocator.reset(); }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief
|
||||||
|
*
|
||||||
|
* @param shmid
|
||||||
|
* @param mbase
|
||||||
|
*/
|
||||||
|
void add_shm_mapped_base(int shmid, Genode::addr_t mbase);
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // CL_GENODE_H
|
#endif // CL_GENODE_H
|
||||||
|
|||||||
4817
repos/hello_gpgpu/src/producer/frac2_320_240.h
Normal file
4817
repos/hello_gpgpu/src/producer/frac2_320_240.h
Normal file
File diff suppressed because it is too large
Load Diff
4817
repos/hello_gpgpu/src/producer/frac_320_240.h
Normal file
4817
repos/hello_gpgpu/src/producer/frac_320_240.h
Normal file
File diff suppressed because it is too large
Load Diff
155
repos/hello_gpgpu/src/producer/main.cc
Normal file
155
repos/hello_gpgpu/src/producer/main.cc
Normal file
@@ -0,0 +1,155 @@
|
|||||||
|
#include <base/log.h>
|
||||||
|
#include <base/heap.h>
|
||||||
|
#include <base/allocator_avl.h>
|
||||||
|
#include <base/attached_rom_dataspace.h>
|
||||||
|
#include <util/misc_math.h>
|
||||||
|
|
||||||
|
#include <libc/component.h>
|
||||||
|
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <time.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
// rpc
|
||||||
|
#include <gpgpu_virt/connection.h>
|
||||||
|
|
||||||
|
// stupid alloc
|
||||||
|
#include "../hello_gpgpu/allocator_stupid.h"
|
||||||
|
|
||||||
|
// imgs
|
||||||
|
#include "frac_320_240.h"
|
||||||
|
#include "frac2_320_240.h"
|
||||||
|
|
||||||
|
namespace fake_cv
|
||||||
|
{
|
||||||
|
class Mat
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
int rows;
|
||||||
|
int cols;
|
||||||
|
int step;
|
||||||
|
float *data;
|
||||||
|
template <typename T>
|
||||||
|
T *ptr(int off)
|
||||||
|
{
|
||||||
|
return (T *)&data[off * sizeof(T)];
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Mat imread(const char *img)
|
||||||
|
{
|
||||||
|
Mat m;
|
||||||
|
m.rows = height;
|
||||||
|
m.cols = width;
|
||||||
|
m.step = width * sizeof(float);
|
||||||
|
|
||||||
|
const size_t size = m.rows * m.cols;
|
||||||
|
m.data = (float *)malloc(size * sizeof(float));
|
||||||
|
|
||||||
|
for (size_t i = 0; i < size; ++i)
|
||||||
|
{
|
||||||
|
unsigned int px[3];
|
||||||
|
HEADER_PIXEL(img, px);
|
||||||
|
const unsigned int g = 0.298936021293775 * px[0] + 0.587043074451121 * px[1] + 0.114020904255103 * px[2];
|
||||||
|
m.data[i] = g / 251.; // 255.
|
||||||
|
}
|
||||||
|
|
||||||
|
return m;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef fake_cv::Mat Image;
|
||||||
|
|
||||||
|
fake_cv::Mat getGray(const fake_cv::Mat &img)
|
||||||
|
{
|
||||||
|
return img;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct producer
|
||||||
|
{
|
||||||
|
Genode::Env &env;
|
||||||
|
gpgpu_virt::Connection backend_driver;
|
||||||
|
Genode::Allocator_stupid allocator;
|
||||||
|
|
||||||
|
float *img1;
|
||||||
|
float *img2;
|
||||||
|
unsigned long img_size;
|
||||||
|
volatile uint8_t *ready;
|
||||||
|
volatile float *data;
|
||||||
|
|
||||||
|
void init()
|
||||||
|
{
|
||||||
|
Genode::log("===Init Producer===");
|
||||||
|
|
||||||
|
// register vgpu (optional?)
|
||||||
|
const unsigned long size_vgpu_mem = 0x1000;
|
||||||
|
Genode::Ram_dataspace_capability vgpu_mem_ram_cap;
|
||||||
|
backend_driver.register_vm(size_vgpu_mem, vgpu_mem_ram_cap);
|
||||||
|
|
||||||
|
// create shm for gpu
|
||||||
|
const unsigned long size_vgpu_shm = 0x100000;
|
||||||
|
Genode::Ram_dataspace_capability vgpu_shm_ram_cap;
|
||||||
|
backend_driver.register_shm(size_vgpu_shm, vgpu_shm_ram_cap);
|
||||||
|
|
||||||
|
// attach shm to vm
|
||||||
|
Genode::addr_t mapped_base = env.rm().attach(vgpu_shm_ram_cap);
|
||||||
|
|
||||||
|
// use it in allocator
|
||||||
|
allocator.add_range(mapped_base, size_vgpu_shm);
|
||||||
|
|
||||||
|
// set not ready
|
||||||
|
ready = (uint8_t *)allocator.alloc(1);
|
||||||
|
|
||||||
|
// load img1 and img2
|
||||||
|
const Image s1 = fake_cv::imread(header_data);
|
||||||
|
Image i1 = getGray(s1);
|
||||||
|
img1 = (float *)i1.ptr<float>(0);
|
||||||
|
const Image s2 = fake_cv::imread(header_data2);
|
||||||
|
Image i2 = getGray(s2);
|
||||||
|
img2 = (float *)i2.ptr<float>(0);
|
||||||
|
|
||||||
|
img_size = Genode::max(i1.rows * i2.cols * sizeof(float), i2.rows * i2.cols * sizeof(float));
|
||||||
|
|
||||||
|
// alloc whole data
|
||||||
|
data = (float *)allocator.alloc_aligned(0x10000, img_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
void run()
|
||||||
|
{
|
||||||
|
Genode::log("===Run Producer===");
|
||||||
|
|
||||||
|
Libc::with_libc([&]
|
||||||
|
{
|
||||||
|
srand(time(NULL));
|
||||||
|
|
||||||
|
int flip = 0;
|
||||||
|
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
// fetch new img
|
||||||
|
*ready = 0x43;
|
||||||
|
memcpy((void*)data, flip ? img1 : img2, img_size);
|
||||||
|
*ready = 0x42;
|
||||||
|
flip = !flip;
|
||||||
|
Genode::log("===New Image ready: ", flip, " ===");
|
||||||
|
|
||||||
|
// sleep for 5s
|
||||||
|
sleep(5);
|
||||||
|
} });
|
||||||
|
|
||||||
|
Genode::log("===End===");
|
||||||
|
Genode::log("Producer completed");
|
||||||
|
}
|
||||||
|
|
||||||
|
producer(Genode::Env &e) : env(e), backend_driver(env), allocator(), img1(nullptr), img2(nullptr), img_size(0), ready(nullptr), data(nullptr)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
void Libc::Component::construct(Libc::Env &env)
|
||||||
|
{
|
||||||
|
static producer p(env);
|
||||||
|
p.init();
|
||||||
|
p.run();
|
||||||
|
}
|
||||||
7
repos/hello_gpgpu/src/producer/target.mk
Normal file
7
repos/hello_gpgpu/src/producer/target.mk
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
TARGET = producer
|
||||||
|
SRC_CC = main.cc \
|
||||||
|
../hello_gpgpu/allocator_stupid.cc
|
||||||
|
|
||||||
|
LIBS = base libc libm
|
||||||
|
|
||||||
|
CC_CXX_WARN_STRICT =
|
||||||
Reference in New Issue
Block a user