mirror of
https://github.com/mmueller41/genode.git
synced 2026-01-21 12:32:56 +01:00
shared gpu memory across cells
This commit is contained in:
@@ -23,6 +23,16 @@ struct Session_client : Genode::Rpc_client<Session>
|
||||
call<Rpc_register_vm>(size, ram_cap);
|
||||
}
|
||||
|
||||
void register_shm(Genode::size_t size, Genode::Ram_dataspace_capability& ram_cap) override
|
||||
{
|
||||
call<Rpc_register_shm>(size, ram_cap);
|
||||
}
|
||||
|
||||
void ask_shm(int id, Genode::size_t &size, Genode::Ram_dataspace_capability& ram_cap) override
|
||||
{
|
||||
call<Rpc_ask_shm>(id, size, ram_cap);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -14,6 +14,8 @@ struct Session : Genode::Session
|
||||
|
||||
virtual void register_vm(Genode::size_t size, Genode::Ram_dataspace_capability& ram_cap) = 0;
|
||||
virtual void start_task(unsigned long kconf) = 0;
|
||||
virtual void register_shm(Genode::size_t size, Genode::Ram_dataspace_capability& ram_cap) = 0;
|
||||
virtual void ask_shm(int id, Genode::size_t &size, Genode::Ram_dataspace_capability& ram_cap) = 0;
|
||||
|
||||
/*******************
|
||||
** RPC interface **
|
||||
@@ -21,9 +23,11 @@ struct Session : Genode::Session
|
||||
|
||||
GENODE_RPC(Rpc_register_vm, void, register_vm, Genode::size_t, Genode::Ram_dataspace_capability&);
|
||||
GENODE_RPC(Rpc_start_task, void, start_task, unsigned long);
|
||||
GENODE_RPC(Rpc_register_shm, void, register_shm, Genode::size_t, Genode::Ram_dataspace_capability&);
|
||||
GENODE_RPC(Rpc_ask_shm, void, ask_shm, int, Genode::size_t&, Genode::Ram_dataspace_capability&);
|
||||
|
||||
|
||||
GENODE_RPC_INTERFACE(Rpc_register_vm, Rpc_start_task);
|
||||
GENODE_RPC_INTERFACE(Rpc_register_vm, Rpc_start_task, Rpc_register_shm, Rpc_ask_shm);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
#ifndef CONFIG_H
|
||||
#define CONFIG_H
|
||||
|
||||
#define QEMU_TEST
|
||||
// #define QEMU_TEST
|
||||
|
||||
//#define VERBOSE
|
||||
#define VERBOSE
|
||||
|
||||
#define SCHED_CFS
|
||||
//#define SCHED_RR // default
|
||||
|
||||
#define MAX_SHM_REGIONS 32
|
||||
|
||||
#endif // CONFIG_H
|
||||
Submodule repos/dde_uos-intel-gpgpu/src/uos-intel-gpgpu updated: 9849813237...6f4658c8e2
@@ -31,6 +31,49 @@ void Session_component::register_vm(Genode::size_t size, Genode::Ram_dataspace_c
|
||||
_global_sched->add_vgpu(&vgpu);
|
||||
}
|
||||
|
||||
int SHM_manager::alloc_shm(Genode::size_t size, Genode::Ram_dataspace_capability& ram_cap_vm)
|
||||
{
|
||||
// get shared memory id
|
||||
const int s = __atomic_fetch_add(&shid, 1, __ATOMIC_SEQ_CST);
|
||||
|
||||
if(s >= MAX_SHM_REGIONS)
|
||||
return -1;
|
||||
|
||||
// alloc shared memory
|
||||
Genode::addr_t mapped_base;
|
||||
ram_cap[s] = _global_gpgpu_genode->allocRamCap(size, mapped_base, base[s]);
|
||||
sizes[s] = size;
|
||||
ram_cap_vm = ram_cap[s];
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
void SHM_manager::free_shm(int id)
|
||||
{
|
||||
_global_gpgpu_genode->freeRamCap(ram_cap[id]);
|
||||
}
|
||||
|
||||
void Session_component::register_shm(Genode::size_t size, Genode::Ram_dataspace_capability& ram_cap_vm)
|
||||
{
|
||||
// create shared mem
|
||||
int shid = SHM_manager::getInstance().alloc_shm(size, ram_cap_vm);
|
||||
vgpu.assignSHM(shid);
|
||||
}
|
||||
|
||||
void Session_component::ask_shm(int id, Genode::size_t &size, Genode::Ram_dataspace_capability& ram_cap_vm)
|
||||
{
|
||||
// get size
|
||||
size = SHM_manager::getInstance().getSize(id);
|
||||
if(size == 0) // invalid
|
||||
return;
|
||||
|
||||
// get ram cap
|
||||
ram_cap_vm = SHM_manager::getInstance().getCap(id);
|
||||
|
||||
// assign id to vgpu
|
||||
vgpu.assignSHM(id);
|
||||
}
|
||||
|
||||
void Session_component::start_task(unsigned long kconf)
|
||||
{
|
||||
// convert offset to driver virt addr
|
||||
@@ -44,7 +87,8 @@ void Session_component::start_task(unsigned long kconf)
|
||||
}
|
||||
else // for pointer set phys addr
|
||||
{
|
||||
kc->buffConfigs[i].buffer = (void*)((Genode::addr_t)kc->buffConfigs[i].buffer + base);
|
||||
const Genode::addr_t addrBase = kc->buffConfigs[i].shmid == -1 ? base : SHM_manager::getInstance().getBase(kc->buffConfigs[i].shmid);
|
||||
kc->buffConfigs[i].buffer = (void*)((Genode::addr_t)kc->buffConfigs[i].buffer + addrBase);
|
||||
}
|
||||
}
|
||||
kc->kernelName = (char*)((Genode::addr_t)kc->kernelName + mapped_base);
|
||||
@@ -63,6 +107,7 @@ void Session_component::start_task(unsigned long kconf)
|
||||
for(int i = 0; i < kc->buffCount; i++)
|
||||
{
|
||||
Genode::log("\tBuffer ", i);
|
||||
Genode::log("\t\tshmid: ", (int)kc->buffConfigs[i].shmid);
|
||||
if(kc->buffConfigs[i].non_pointer_type)
|
||||
{
|
||||
Genode::log("\t\tvaddr: ", (void*)kc->buffConfigs[i].buffer);
|
||||
@@ -72,7 +117,8 @@ void Session_component::start_task(unsigned long kconf)
|
||||
}
|
||||
else
|
||||
{
|
||||
Genode::log("\t\tvaddr: ", (void*)((Genode::addr_t)kc->buffConfigs[i].buffer - base + mapped_base));
|
||||
const Genode::addr_t addrBase = kc->buffConfigs[i].shmid == -1 ? base : SHM_manager::getInstance().getBase(kc->buffConfigs[i].shmid);
|
||||
Genode::log("\t\tvaddr: ", (void*)((Genode::addr_t)kc->buffConfigs[i].buffer - addrBase));
|
||||
Genode::log("\t\tpaddr: ", (void*)kc->buffConfigs[i].buffer);
|
||||
//Genode::log("\t\tgpuaddr: ", (void*)((addr_t)kc->buffConfigs[i].ga)); // to print this, temporary make the var public
|
||||
//Genode::log("\t\tpos: ", (uint32_t)kc->buffConfigs[i].pos); // to print this, temporary make the var public
|
||||
|
||||
@@ -10,6 +10,28 @@
|
||||
namespace gpgpu_virt
|
||||
{
|
||||
|
||||
class SHM_manager
|
||||
{
|
||||
private:
|
||||
Genode::Ram_dataspace_capability ram_cap[MAX_SHM_REGIONS];
|
||||
Genode::addr_t base[MAX_SHM_REGIONS];
|
||||
Genode::size_t sizes[MAX_SHM_REGIONS];
|
||||
unsigned int shid;
|
||||
SHM_manager() : ram_cap{}, base {0, }, sizes {0, }, shid(0) {};
|
||||
|
||||
public:
|
||||
static SHM_manager &getInstance() {
|
||||
static SHM_manager inst;
|
||||
return inst;
|
||||
}
|
||||
|
||||
int alloc_shm(Genode::size_t size, Genode::Ram_dataspace_capability& ram_cap);
|
||||
void free_shm(int id);
|
||||
Genode::Ram_dataspace_capability getCap(int id) const { return ram_cap[id]; }
|
||||
Genode::addr_t getBase(int id) const { return base[id]; }
|
||||
Genode::addr_t getSize(int id) const { return sizes[id]; }
|
||||
};
|
||||
|
||||
struct Session_component : Genode::Rpc_object<Session>
|
||||
{
|
||||
VGpu vgpu;
|
||||
@@ -25,6 +47,9 @@ struct Session_component : Genode::Rpc_object<Session>
|
||||
|
||||
void start_task(unsigned long kconf) override;
|
||||
|
||||
void register_shm(Genode::size_t size, Genode::Ram_dataspace_capability& ram_cap) override;
|
||||
|
||||
void ask_shm(int id, Genode::size_t& size, Genode::Ram_dataspace_capability& ram_cap) override;
|
||||
};
|
||||
|
||||
class Root_component
|
||||
|
||||
@@ -33,11 +33,32 @@ namespace gpgpu_virt {
|
||||
/// priority of vgpu
|
||||
int prio;
|
||||
|
||||
/// assigned shared memory regions
|
||||
int shm_ids[MAX_SHM_REGIONS];
|
||||
|
||||
/// local shm id counter
|
||||
int curr_shm_id;
|
||||
|
||||
public:
|
||||
/**
|
||||
* @brief Construct a new VGpu object
|
||||
*/
|
||||
VGpu() : ctx(nullptr), ready_list(), prio(-1) {}
|
||||
VGpu() : ctx(nullptr), ready_list(), prio(-1), curr_shm_id(0) {}
|
||||
|
||||
void assignSHM(int id)
|
||||
{
|
||||
shm_ids[curr_shm_id++] = id;
|
||||
}
|
||||
|
||||
void removeSHM(int id)
|
||||
{
|
||||
for(int i = 0; i < MAX_SHM_REGIONS; ++i)
|
||||
{
|
||||
if (shm_ids[i] == id){
|
||||
shm_ids[i] = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Set the Priority
|
||||
|
||||
@@ -62,7 +62,7 @@ append config {
|
||||
<start name="hello_gpgpu">
|
||||
<binary name="hello_gpgpu"/>
|
||||
<resource name="RAM" quantum="1024M"/>
|
||||
<config bench="1">
|
||||
<config bench="2097152">
|
||||
<vfs> <dir name="dev"> <log/> <inline name="rtc">2022-07-20 14:30</inline> </dir> </vfs>
|
||||
<libc stdout="/dev/log" stderr="/dev/log" rtc="/dev/rtc"/>
|
||||
</config>
|
||||
|
||||
103
repos/hello_gpgpu/run/shm_gpgpu.run
Normal file
103
repos/hello_gpgpu/run/shm_gpgpu.run
Normal file
@@ -0,0 +1,103 @@
|
||||
# build config
|
||||
build { core init gpgpu timer producer consumer1 consumer2 }
|
||||
|
||||
# platform config
|
||||
set use_acpica_as_acpi_drv 0
|
||||
source ${genode_dir}/repos/base/run/platform_drv.inc
|
||||
proc platform_drv_policy {} {
|
||||
global use_acpica_as_acpi_drv
|
||||
set policy ""
|
||||
|
||||
append_if $use_acpica_as_acpi_drv policy {
|
||||
<policy label="acpi_drv -> "> <pci class="ALL"/> </policy>}
|
||||
|
||||
append policy {
|
||||
<policy label_prefix="gpgpu"> <pci class="ALL"/> </policy>}
|
||||
append policy {
|
||||
<policy label_prefix="producer"> <pci class="ALL"/> </policy>}
|
||||
append policy {
|
||||
<policy label_prefix="consumer1"> <pci class="ALL"/> </policy>}
|
||||
append policy {
|
||||
<policy label_prefix="consumer2"> <pci class="ALL"/> </policy>}
|
||||
|
||||
return $policy
|
||||
}
|
||||
append_platform_drv_build_components
|
||||
build $build_components
|
||||
|
||||
# boot dir
|
||||
create_boot_directory
|
||||
|
||||
# other config
|
||||
append config {
|
||||
<config>
|
||||
<parent-provides>
|
||||
<service name="ROM"/>
|
||||
<service name="IRQ"/>
|
||||
<service name="IO_MEM"/>
|
||||
<service name="PD"/>
|
||||
<service name="RM"/>
|
||||
<service name="CPU"/>
|
||||
<service name="LOG"/>
|
||||
<service name="RAM"/>
|
||||
<service name="CAP"/>
|
||||
<service name="TOPO"/>
|
||||
</parent-provides>
|
||||
<default-route>
|
||||
<any-service> <parent/> <any-child/> </any-service>
|
||||
</default-route>
|
||||
<default caps="200"/>
|
||||
<start name="timer">
|
||||
<resource name="RAM" quantum="1M"/>
|
||||
<provides><service name="Timer"/></provides>
|
||||
<route>
|
||||
<any-service><parent/><any-child/></any-service>
|
||||
</route>
|
||||
</start>
|
||||
}
|
||||
|
||||
append_platform_drv_config
|
||||
|
||||
append config {
|
||||
<start name="gpgpu" priority="0">
|
||||
<provides> <service name="gpgpu"/> </provides>
|
||||
<resource name="RAM" quantum="12G"/>
|
||||
</start>
|
||||
<start name="producer">
|
||||
<binary name="producer"/>
|
||||
<resource name="RAM" quantum="512M"/>
|
||||
<config>
|
||||
<vfs> <dir name="dev"> <log/> <inline name="rtc">2022-07-20 14:30</inline> </dir> </vfs>
|
||||
<libc stdout="/dev/log" stderr="/dev/log" rtc="/dev/rtc"/>
|
||||
</config>
|
||||
</start>
|
||||
<start name="consumer (high)">
|
||||
<binary name="consumer1"/>
|
||||
<resource name="RAM" quantum="512M"/>
|
||||
<config>
|
||||
<vfs> <dir name="dev"> <log/> <inline name="rtc">2022-07-20 14:30</inline> </dir> </vfs>
|
||||
<libc stdout="/dev/log" stderr="/dev/log" rtc="/dev/rtc"/>
|
||||
</config>
|
||||
</start>
|
||||
<start name="consumer (low)">
|
||||
<binary name="consumer2"/>
|
||||
<resource name="RAM" quantum="512M"/>
|
||||
<config >
|
||||
<vfs> <dir name="dev"> <log/> <inline name="rtc">2022-07-20 14:30</inline> </dir> </vfs>
|
||||
<libc stdout="/dev/log" stderr="/dev/log" rtc="/dev/rtc"/>
|
||||
</config>
|
||||
</start>
|
||||
</config>}
|
||||
|
||||
install_config $config
|
||||
|
||||
# boot modules
|
||||
set boot_modules {
|
||||
core ld.lib.so libc.lib.so vfs.lib.so libm.lib.so init gpgpu timer producer consumer1 consumer2
|
||||
}
|
||||
append_platform_drv_boot_modules
|
||||
build_boot_image $boot_modules
|
||||
|
||||
# qemu stuff
|
||||
append qemu_args " -nographic -m 24G"
|
||||
run_genode_until forever
|
||||
1233
repos/hello_gpgpu/src/consumer1/OpenSurf.cpp
Normal file
1233
repos/hello_gpgpu/src/consumer1/OpenSurf.cpp
Normal file
File diff suppressed because it is too large
Load Diff
20554
repos/hello_gpgpu/src/consumer1/SURF_noSLM_fixed_kernel.h
Normal file
20554
repos/hello_gpgpu/src/consumer1/SURF_noSLM_fixed_kernel.h
Normal file
File diff suppressed because it is too large
Load Diff
4818
repos/hello_gpgpu/src/consumer1/frac_320_240.h
Normal file
4818
repos/hello_gpgpu/src/consumer1/frac_320_240.h
Normal file
File diff suppressed because it is too large
Load Diff
99
repos/hello_gpgpu/src/consumer1/main.cc
Normal file
99
repos/hello_gpgpu/src/consumer1/main.cc
Normal file
@@ -0,0 +1,99 @@
|
||||
#include <base/log.h>
|
||||
#include <base/heap.h>
|
||||
#include <base/allocator_avl.h>
|
||||
#include <base/attached_rom_dataspace.h>
|
||||
|
||||
#include <libc/component.h>
|
||||
|
||||
#include <unistd.h>
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
#include <string.h>
|
||||
|
||||
// OpenCL
|
||||
#define CL_TARGET_OPENCL_VERSION 100
|
||||
#include "../hello_gpgpu/CL/cl.h"
|
||||
|
||||
// rpc
|
||||
#include <gpgpu_virt/connection.h>
|
||||
|
||||
// stupid alloc
|
||||
#include "../hello_gpgpu/allocator_stupid.h"
|
||||
|
||||
namespace ns_OpenSurf{int main(int argc, char *argv[]);};
|
||||
|
||||
struct consumer_conv
|
||||
{
|
||||
Genode::Env &env;
|
||||
gpgpu_virt::Connection backend_driver;
|
||||
Genode::Allocator_stupid allocator;
|
||||
|
||||
const unsigned long size = 0x40000000;
|
||||
cl_genode clg;
|
||||
Genode::Ram_dataspace_capability vgpu_mem_ram_cap;
|
||||
Genode::Ram_dataspace_capability vgpu_shm_ram_cap;
|
||||
|
||||
volatile uint8_t *ready;
|
||||
const unsigned long img_size = 320 * 240 * sizeof(float);
|
||||
volatile float *data;
|
||||
|
||||
void init()
|
||||
{
|
||||
Genode::log("===Init Consumer Surf===");
|
||||
clInitGenode(clg);
|
||||
|
||||
// register vgpu (optional?)
|
||||
const unsigned long size_vgpu_mem = 0x1000;
|
||||
backend_driver.register_vm(size_vgpu_mem, vgpu_mem_ram_cap);
|
||||
|
||||
// create shm for gpu
|
||||
const unsigned long id = 0;
|
||||
Genode::size_t total_size = 0;
|
||||
while (total_size == 0)
|
||||
{
|
||||
backend_driver.ask_shm(id, total_size, vgpu_shm_ram_cap);
|
||||
}
|
||||
|
||||
// attach shm to vm
|
||||
Genode::addr_t mapped_base = env.rm().attach(vgpu_shm_ram_cap);
|
||||
clg.add_shm_mapped_base(id, mapped_base);
|
||||
|
||||
// use it in allocator
|
||||
allocator.add_range(mapped_base, total_size);
|
||||
|
||||
// alloc whole data
|
||||
ready = (uint8_t *)allocator.alloc(1);
|
||||
data = (float *)allocator.alloc_aligned(0x10000, img_size);
|
||||
}
|
||||
|
||||
void run()
|
||||
{
|
||||
Genode::log("===Run Consumer Surf===");
|
||||
|
||||
Libc::with_libc([&]
|
||||
{
|
||||
for(;;)
|
||||
{
|
||||
while (*ready != 0x42);
|
||||
|
||||
ns_OpenSurf::main(2, (char**)data);
|
||||
|
||||
//Genode::log(data[0]);
|
||||
sleep(3);
|
||||
} });
|
||||
|
||||
Genode::log("===End===");
|
||||
Genode::log("Consumer Surf completed");
|
||||
}
|
||||
|
||||
consumer_conv(Genode::Env &e) : env(e), backend_driver(env), allocator(), clg(env, size), ready(nullptr), data(nullptr)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
void Libc::Component::construct(Libc::Env &env)
|
||||
{
|
||||
static consumer_conv p(env);
|
||||
p.init();
|
||||
p.run();
|
||||
}
|
||||
9
repos/hello_gpgpu/src/consumer1/target.mk
Normal file
9
repos/hello_gpgpu/src/consumer1/target.mk
Normal file
@@ -0,0 +1,9 @@
|
||||
TARGET = consumer1
|
||||
SRC_CC = main.cc \
|
||||
OpenSurf.cpp \
|
||||
../hello_gpgpu/CL/cl.cc ../hello_gpgpu/CL/cl_genode.cc \
|
||||
../hello_gpgpu/allocator_stupid.cc
|
||||
|
||||
LIBS = base libc libm
|
||||
|
||||
CC_CXX_WARN_STRICT =
|
||||
326
repos/hello_gpgpu/src/consumer2/2DConvolution.cc
Normal file
326
repos/hello_gpgpu/src/consumer2/2DConvolution.cc
Normal file
@@ -0,0 +1,326 @@
|
||||
/**
|
||||
* 2DConvolution.c: This file is part of the PolyBench/GPU 1.0 test suite.
|
||||
*
|
||||
*
|
||||
* Contact: Scott Grauer-Gray <sgrauerg@gmail.com>
|
||||
* Will Killian <killian@udel.edu>
|
||||
* Louis-Noel Pouchet <pouchet@cse.ohio-state.edu>
|
||||
* Web address: http://www.cse.ohio-state.edu/~pouchet/software/polybench/GPU
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
#include <sys/time.h>
|
||||
#include <math.h>
|
||||
|
||||
#define CL_TARGET_OPENCL_VERSION 100
|
||||
#ifdef __APPLE__
|
||||
#include <OpenCL/opencl.h>
|
||||
#else
|
||||
#include "../hello_gpgpu/CL/cl.h"
|
||||
#endif
|
||||
|
||||
#define POLYBENCH_TIME 1
|
||||
|
||||
//select the OpenCL device to use (can be GPU, CPU, or Accelerator such as Intel Xeon Phi)
|
||||
#define OPENCL_DEVICE_SELECTION CL_DEVICE_TYPE_GPU
|
||||
|
||||
#include "../hello_gpgpu/benchmark/convolution-2d/2DConvolution.h"
|
||||
#include "../hello_gpgpu/polybench.h"
|
||||
|
||||
//define the error threshold for the results "not matching"
|
||||
#define PERCENT_DIFF_ERROR_THRESHOLD 1.05
|
||||
|
||||
#define MAX_SOURCE_SIZE (0x100000)
|
||||
|
||||
#if defined(cl_khr_fp64) // Khronos extension available?
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
#elif defined(cl_amd_fp64) // AMD extension available?
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64 : enable
|
||||
#endif
|
||||
|
||||
namespace ns_convolution_2d {
|
||||
|
||||
#include "../hello_gpgpu/benchmark/convolution-2d/2DConvolution_kernel.h"
|
||||
#include "../hello_gpgpu/polybenchUtilFuncts.h"
|
||||
|
||||
|
||||
|
||||
char str_temp[1024];
|
||||
|
||||
cl_platform_id platform_id;
|
||||
cl_device_id device_id;
|
||||
cl_uint num_devices;
|
||||
cl_uint num_platforms;
|
||||
cl_int errcode;
|
||||
cl_context clGPUContext;
|
||||
cl_kernel clKernel;
|
||||
cl_command_queue clCommandQue;
|
||||
cl_program clProgram;
|
||||
cl_mem a_mem_obj;
|
||||
cl_mem b_mem_obj;
|
||||
cl_mem c_mem_obj;
|
||||
FILE *fp;
|
||||
char *source_str;
|
||||
size_t source_size;
|
||||
|
||||
// patch config for consumer2
|
||||
#undef NI
|
||||
#undef NJ
|
||||
#define NI 320
|
||||
#define NJ 240
|
||||
|
||||
void compareResults(int ni, int nj, DATA_TYPE POLYBENCH_2D(B, NI, NJ, ni, nj), DATA_TYPE POLYBENCH_2D(B_outputFromGpu, NI, NJ, ni, nj))
|
||||
{
|
||||
int i, j, fail;
|
||||
fail = 0;
|
||||
|
||||
// Compare outputs from CPU and GPU
|
||||
for (i=1; i < (ni-1); i++)
|
||||
{
|
||||
for (j=1; j < (nj-1); j++)
|
||||
{
|
||||
if (percentDiff(B[i][j], B_outputFromGpu[i][j]) > PERCENT_DIFF_ERROR_THRESHOLD)
|
||||
{
|
||||
fail++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Print results
|
||||
printf("Non-Matching CPU-GPU Outputs Beyond Error Threshold of %4.2f Percent: %d\n", PERCENT_DIFF_ERROR_THRESHOLD, fail);
|
||||
}
|
||||
|
||||
|
||||
void read_cl_file()
|
||||
{
|
||||
// Load the kernel source code into the array source_str
|
||||
// fp = fopen("2DConvolution.cl", "r");
|
||||
// if (!fp) {
|
||||
// fprintf(stdout, "Failed to load kernel.\n");
|
||||
// exit(1);
|
||||
// }
|
||||
// source_str = (char*)malloc(MAX_SOURCE_SIZE);
|
||||
// source_size = fread( source_str, 1, MAX_SOURCE_SIZE, fp);
|
||||
// fclose( fp );
|
||||
}
|
||||
|
||||
|
||||
void init(int ni, int nj, DATA_TYPE POLYBENCH_2D(A, NI, NJ, ni, nj))
|
||||
{
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < ni; ++i)
|
||||
{
|
||||
for (j = 0; j < nj; ++j)
|
||||
{
|
||||
A[i][j] = (float)rand()/RAND_MAX;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void cl_initialization()
|
||||
{
|
||||
|
||||
// Get platform and device information
|
||||
errcode = clGetPlatformIDs(1, &platform_id, &num_platforms);
|
||||
if(errcode == CL_SUCCESS) printf("number of platforms is %d\n",num_platforms);
|
||||
else printf("Error getting platform IDs\n");
|
||||
|
||||
errcode = clGetPlatformInfo(platform_id,CL_PLATFORM_NAME, sizeof(str_temp), str_temp,NULL);
|
||||
if(errcode == CL_SUCCESS) printf("platform name is %s\n",str_temp);
|
||||
else printf("Error getting platform name\n");
|
||||
|
||||
errcode = clGetPlatformInfo(platform_id, CL_PLATFORM_VERSION, sizeof(str_temp), str_temp,NULL);
|
||||
if(errcode == CL_SUCCESS) printf("platform version is %s\n",str_temp);
|
||||
else printf("Error getting platform version\n");
|
||||
|
||||
errcode = clGetDeviceIDs( platform_id, OPENCL_DEVICE_SELECTION, 1, &device_id, &num_devices);
|
||||
if(errcode == CL_SUCCESS) printf("number of devices is %d\n", num_devices);
|
||||
else printf("Error getting device IDs\n");
|
||||
|
||||
errcode = clGetDeviceInfo(device_id,CL_DEVICE_NAME, sizeof(str_temp), str_temp,NULL);
|
||||
if(errcode == CL_SUCCESS) printf("device name is %s\n",str_temp);
|
||||
else printf("Error getting device name\n");
|
||||
|
||||
// Create an OpenCL context
|
||||
clGPUContext = clCreateContext( NULL, 1, &device_id, NULL, NULL, &errcode);
|
||||
if(errcode != CL_SUCCESS) printf("Error in creating context\n");
|
||||
|
||||
//Create a command-queue
|
||||
clCommandQue = clCreateCommandQueue(clGPUContext, device_id, 0, &errcode);
|
||||
if(errcode != CL_SUCCESS) printf("Error in creating command queue\n");
|
||||
}
|
||||
|
||||
|
||||
void cl_mem_init(DATA_TYPE POLYBENCH_2D(A, NI, NJ, ni, nj))
|
||||
{
|
||||
a_mem_obj = clCreateBufferSHM(clGPUContext, CL_MEM_READ_ONLY, sizeof(DATA_TYPE) * NI * NJ, A, &errcode, 0);
|
||||
b_mem_obj = clCreateBuffer(clGPUContext, CL_MEM_READ_WRITE, sizeof(DATA_TYPE) * NI * NJ, NULL, &errcode);
|
||||
|
||||
if(errcode != CL_SUCCESS) printf("Error in creating buffers\n");
|
||||
|
||||
//errcode = clEnqueueWriteBuffer(clCommandQue, a_mem_obj, CL_TRUE, 0, sizeof(DATA_TYPE) * NI * NJ, A, 0, NULL, NULL);
|
||||
//if(errcode != CL_SUCCESS)printf("Error in writing buffers\n");
|
||||
}
|
||||
|
||||
void cl_load_prog()
|
||||
{
|
||||
// Create a program from the kernel source
|
||||
const size_t kernel_size = __2DConvolution_Gen9core_gen_len;
|
||||
const unsigned char* kernel_bin = __2DConvolution_Gen9core_gen;
|
||||
clProgram = clCreateProgramWithBinary(clGPUContext, 1, &device_id, &kernel_size, &kernel_bin, NULL, &errcode);
|
||||
// clProgram = clCreateProgramWithSource(clGPUContext, 1, (const char **)&source_str, (const size_t *)&source_size, &errcode);
|
||||
|
||||
if(errcode != CL_SUCCESS) printf("Error in creating program\n");
|
||||
|
||||
// Build the program
|
||||
errcode = clBuildProgram(clProgram, 1, &device_id, NULL, NULL, NULL);
|
||||
if(errcode != CL_SUCCESS) printf("Error in building program\n");
|
||||
|
||||
// Create the OpenCL kernel
|
||||
clKernel = clCreateKernel(clProgram, "Convolution2D_kernel", &errcode);
|
||||
if(errcode != CL_SUCCESS) printf("Error in creating kernel\n");
|
||||
|
||||
clFinish(clCommandQue);
|
||||
}
|
||||
|
||||
|
||||
void cl_launch_kernel(int ni, int nj)
|
||||
{
|
||||
size_t localWorkSize[2], globalWorkSize[2];
|
||||
localWorkSize[0] = DIM_LOCAL_WORK_GROUP_X;
|
||||
localWorkSize[1] = DIM_LOCAL_WORK_GROUP_Y;
|
||||
globalWorkSize[0] = (size_t)ceil(((float)NI) / ((float)DIM_LOCAL_WORK_GROUP_X)) * DIM_LOCAL_WORK_GROUP_X;
|
||||
globalWorkSize[1] = (size_t)ceil(((float)NJ) / ((float)DIM_LOCAL_WORK_GROUP_Y)) * DIM_LOCAL_WORK_GROUP_Y;
|
||||
|
||||
/* Start timer. */
|
||||
polybench_start_instruments;
|
||||
|
||||
// Set the arguments of the kernel
|
||||
errcode = clSetKernelArg(clKernel, 0, sizeof(cl_mem), (void *)&a_mem_obj);
|
||||
errcode |= clSetKernelArg(clKernel, 1, sizeof(cl_mem), (void *)&b_mem_obj);
|
||||
errcode = clSetKernelArg(clKernel, 2, sizeof(int), &ni);
|
||||
errcode |= clSetKernelArg(clKernel, 3, sizeof(int), &nj);
|
||||
|
||||
if(errcode != CL_SUCCESS) printf("Error in seting arguments\n");
|
||||
// Execute the OpenCL kernel
|
||||
errcode = clEnqueueNDRangeKernel(clCommandQue, clKernel, 2, NULL, globalWorkSize, localWorkSize, 0, NULL, NULL);
|
||||
|
||||
if(errcode != CL_SUCCESS) printf("Error in launching kernel\n");
|
||||
clFinish(clCommandQue);
|
||||
|
||||
/* Stop and print timer. */
|
||||
polybench_stop_instruments;
|
||||
printf("GPU Time in seconds:\n");
|
||||
polybench_print_instruments;
|
||||
}
|
||||
|
||||
void cl_clean_up()
|
||||
{
|
||||
// Clean up
|
||||
errcode = clFlush(clCommandQue);
|
||||
errcode = clFinish(clCommandQue);
|
||||
errcode = clReleaseKernel(clKernel);
|
||||
errcode = clReleaseProgram(clProgram);
|
||||
errcode = clReleaseMemObject(a_mem_obj);
|
||||
errcode = clReleaseMemObject(b_mem_obj);
|
||||
errcode = clReleaseCommandQueue(clCommandQue);
|
||||
errcode = clReleaseContext(clGPUContext);
|
||||
if(errcode != CL_SUCCESS) printf("Error in cleanup\n");
|
||||
}
|
||||
|
||||
void conv2D(int ni, int nj, DATA_TYPE POLYBENCH_2D(A, NI, NJ, ni, nj), DATA_TYPE POLYBENCH_2D(B, NI, NJ, ni, nj))
|
||||
{
|
||||
int i, j;
|
||||
DATA_TYPE c11, c12, c13, c21, c22, c23, c31, c32, c33;
|
||||
|
||||
c11 = +0.2; c21 = +0.5; c31 = -0.8;
|
||||
c12 = -0.3; c22 = +0.6; c32 = -0.9;
|
||||
c13 = +0.4; c23 = +0.7; c33 = +0.10;
|
||||
|
||||
|
||||
for (i = 1; i < _PB_NI - 1; ++i) // 0
|
||||
{
|
||||
for (j = 1; j < _PB_NJ - 1; ++j) // 1
|
||||
{
|
||||
B[i][j] = c11 * A[(i - 1)][(j - 1)] + c12 * A[(i + 0)][(j - 1)] + c13 * A[(i + 1)][(j - 1)]
|
||||
+ c21 * A[(i - 1)][(j + 0)] + c22 * A[(i + 0)][(j + 0)] + c23 * A[(i + 1)][(j + 0)]
|
||||
+ c31 * A[(i - 1)][(j + 1)] + c32 * A[(i + 0)][(j + 1)] + c33 * A[(i + 1)][(j + 1)];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* DCE code. Must scan the entire live-out data.
|
||||
Can be used also to check the correctness of the output. */
|
||||
static
|
||||
void print_array(int ni, int nj,
|
||||
DATA_TYPE POLYBENCH_2D(B,NI,NJ,ni,nj))
|
||||
{
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < ni; i++)
|
||||
for (j = 0; j < nj; j++) {
|
||||
fprintf (stderr, DATA_PRINTF_MODIFIER, B[i][j]);
|
||||
if ((i * ni + j) % 20 == 0) fprintf (stderr, "\n");
|
||||
}
|
||||
fprintf (stderr, "\n");
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
/* Retrieve problem size */
|
||||
int ni = NI;
|
||||
int nj = NJ;
|
||||
|
||||
POLYBENCH_2D_ARRAY_DECL(A,DATA_TYPE,NI,NJ,ni,nj);
|
||||
POLYBENCH_2D_ARRAY_DECL(B,DATA_TYPE,NI,NJ,ni,nj);
|
||||
POLYBENCH_2D_ARRAY_DECL(B_outputFromGpu,DATA_TYPE,NI,NJ,ni,nj);
|
||||
|
||||
//init(ni, nj, POLYBENCH_ARRAY(A));
|
||||
A = (float(*)[NI][NJ]) argv;
|
||||
|
||||
read_cl_file();
|
||||
cl_initialization();
|
||||
cl_mem_init(POLYBENCH_ARRAY(A));
|
||||
cl_load_prog();
|
||||
|
||||
cl_launch_kernel(ni, nj);
|
||||
|
||||
errcode = clEnqueueReadBuffer(clCommandQue, b_mem_obj, CL_TRUE, 0, NI*NJ*sizeof(DATA_TYPE), POLYBENCH_ARRAY(B_outputFromGpu), 0, NULL, NULL);
|
||||
if(errcode != CL_SUCCESS) printf("Error in reading GPU mem\n");
|
||||
|
||||
#ifdef RUN_ON_CPU
|
||||
|
||||
/* Start timer. */
|
||||
polybench_start_instruments;
|
||||
|
||||
conv2D(ni, nj, POLYBENCH_ARRAY(A), POLYBENCH_ARRAY(B));
|
||||
|
||||
/* Stop and print timer. */
|
||||
printf("CPU Time in seconds:\n");
|
||||
polybench_stop_instruments;
|
||||
polybench_print_instruments;
|
||||
|
||||
compareResults(ni, nj, POLYBENCH_ARRAY(B), POLYBENCH_ARRAY(B_outputFromGpu));
|
||||
|
||||
#else //prevent dead code elimination
|
||||
|
||||
polybench_prevent_dce(print_array(ni, nj, POLYBENCH_ARRAY(B_outputFromGpu)));
|
||||
|
||||
#endif //RUN_ON_CPU
|
||||
|
||||
cl_clean_up();
|
||||
|
||||
//POLYBENCH_FREE_ARRAY(A);
|
||||
POLYBENCH_FREE_ARRAY(B);
|
||||
POLYBENCH_FREE_ARRAY(B_outputFromGpu);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
}
|
||||
99
repos/hello_gpgpu/src/consumer2/main.cc
Normal file
99
repos/hello_gpgpu/src/consumer2/main.cc
Normal file
@@ -0,0 +1,99 @@
|
||||
#include <base/log.h>
|
||||
#include <base/heap.h>
|
||||
#include <base/allocator_avl.h>
|
||||
#include <base/attached_rom_dataspace.h>
|
||||
|
||||
#include <libc/component.h>
|
||||
|
||||
#include <unistd.h>
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
#include <string.h>
|
||||
|
||||
// OpenCL
|
||||
#define CL_TARGET_OPENCL_VERSION 100
|
||||
#include "../hello_gpgpu/CL/cl.h"
|
||||
|
||||
// rpc
|
||||
#include <gpgpu_virt/connection.h>
|
||||
|
||||
// stupid alloc
|
||||
#include "../hello_gpgpu/allocator_stupid.h"
|
||||
|
||||
namespace ns_convolution_2d{int main(int argc, char *argv[]);};
|
||||
|
||||
struct consumer_conv
|
||||
{
|
||||
Genode::Env &env;
|
||||
gpgpu_virt::Connection backend_driver;
|
||||
Genode::Allocator_stupid allocator;
|
||||
|
||||
const unsigned long size = 0x40000000;
|
||||
cl_genode clg;
|
||||
Genode::Ram_dataspace_capability vgpu_mem_ram_cap;
|
||||
Genode::Ram_dataspace_capability vgpu_shm_ram_cap;
|
||||
|
||||
volatile uint8_t *ready;
|
||||
const unsigned long img_size = 320 * 240 * sizeof(float);
|
||||
volatile float *data;
|
||||
|
||||
void init()
|
||||
{
|
||||
Genode::log("===Init Consumer Conv===");
|
||||
clInitGenode(clg);
|
||||
|
||||
// register vgpu (optional?)
|
||||
const unsigned long size_vgpu_mem = 0x1000;
|
||||
backend_driver.register_vm(size_vgpu_mem, vgpu_mem_ram_cap);
|
||||
|
||||
// create shm for gpu
|
||||
const unsigned long id = 0;
|
||||
Genode::size_t total_size = 0;
|
||||
while (total_size == 0)
|
||||
{
|
||||
backend_driver.ask_shm(id, total_size, vgpu_shm_ram_cap);
|
||||
}
|
||||
|
||||
// attach shm to vm
|
||||
Genode::addr_t mapped_base = env.rm().attach(vgpu_shm_ram_cap);
|
||||
clg.add_shm_mapped_base(id, mapped_base);
|
||||
|
||||
// use it in allocator
|
||||
allocator.add_range(mapped_base, total_size);
|
||||
|
||||
// alloc whole data
|
||||
ready = (uint8_t *)allocator.alloc(1);
|
||||
data = (float *)allocator.alloc_aligned(0x10000, img_size);
|
||||
}
|
||||
|
||||
void run()
|
||||
{
|
||||
Genode::log("===Run Consumer Conv===");
|
||||
|
||||
Libc::with_libc([&]
|
||||
{
|
||||
for(;;)
|
||||
{
|
||||
while (*ready != 0x42);
|
||||
|
||||
ns_convolution_2d::main(2, (char**)data);
|
||||
|
||||
//Genode::log(data[0]);
|
||||
sleep(3);
|
||||
} });
|
||||
|
||||
Genode::log("===End===");
|
||||
Genode::log("Consumer Conv completed");
|
||||
}
|
||||
|
||||
consumer_conv(Genode::Env &e) : env(e), backend_driver(env), allocator(), clg(env, size), ready(nullptr), data(nullptr)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
void Libc::Component::construct(Libc::Env &env)
|
||||
{
|
||||
static consumer_conv p(env);
|
||||
p.init();
|
||||
p.run();
|
||||
}
|
||||
10
repos/hello_gpgpu/src/consumer2/target.mk
Normal file
10
repos/hello_gpgpu/src/consumer2/target.mk
Normal file
@@ -0,0 +1,10 @@
|
||||
TARGET = consumer2
|
||||
SRC_CC = main.cc \
|
||||
2DConvolution.cc \
|
||||
../hello_gpgpu/polybench.cc \
|
||||
../hello_gpgpu/CL/cl.cc ../hello_gpgpu/CL/cl_genode.cc \
|
||||
../hello_gpgpu/allocator_stupid.cc
|
||||
|
||||
LIBS = base libc libm
|
||||
|
||||
CC_CXX_WARN_STRICT =
|
||||
@@ -357,6 +357,26 @@ clCreateBuffer(cl_context context,
|
||||
clmem->bc.buffer = host_ptr;
|
||||
clmem->bc.buffer_size = (uint32_t)size;
|
||||
clmem->bc.non_pointer_type = false;
|
||||
clmem->bc.shmid = -1;
|
||||
|
||||
*errcode_ret |= CL_SUCCESS;
|
||||
return clmem;
|
||||
}
|
||||
|
||||
CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateBufferSHM(cl_context context,
|
||||
cl_mem_flags flags,
|
||||
size_t size,
|
||||
void * host_ptr,
|
||||
cl_int * errcode_ret,
|
||||
int shid)
|
||||
{
|
||||
cl_mem clmem = (cl_mem)g_cl_genode->alloc(sizeof(struct _cl_mem));
|
||||
clmem->virt_vm = host_ptr;
|
||||
clmem->bc.buffer = host_ptr;
|
||||
clmem->bc.buffer_size = (uint32_t)size;
|
||||
clmem->bc.non_pointer_type = false;
|
||||
clmem->bc.shmid = shid;
|
||||
|
||||
*errcode_ret |= CL_SUCCESS;
|
||||
return clmem;
|
||||
|
||||
@@ -1088,6 +1088,14 @@ clCreateBuffer(cl_context context,
|
||||
void * host_ptr,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateBufferSHM(cl_context context,
|
||||
cl_mem_flags flags,
|
||||
size_t size,
|
||||
void * host_ptr,
|
||||
cl_int * errcode_ret,
|
||||
int shid) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
#ifdef CL_VERSION_1_1
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#include "cl_genode.h"
|
||||
|
||||
cl_genode::cl_genode(Genode::Env& env, unsigned long size) : env(env), allocator(), mapped_base(0), backend_driver(env)
|
||||
cl_genode::cl_genode(Genode::Env& env, unsigned long size) : env(env), allocator(), mapped_base(0), backend_driver(env), shm_mapped_base{0, }
|
||||
{
|
||||
// get shared memory with driver
|
||||
Genode::Ram_dataspace_capability ram_cap;
|
||||
@@ -38,7 +38,8 @@ void cl_genode::enqueue_task(struct kernel_config* kconf)
|
||||
// convert virt vm addr to offset
|
||||
for(int i = 0; i < kconf->buffCount; i++)
|
||||
{
|
||||
kconf->buffConfigs[i].buffer = (void*)((Genode::addr_t)kconf->buffConfigs[i].buffer - mapped_base);
|
||||
const Genode::addr_t mbase = kconf->buffConfigs[i].shmid == -1 ? mapped_base : shm_mapped_base[kconf->buffConfigs[i].shmid];
|
||||
kconf->buffConfigs[i].buffer = (void*)((Genode::addr_t)kconf->buffConfigs[i].buffer - mbase);
|
||||
}
|
||||
kconf->buffConfigs = (struct buffer_config*)((Genode::addr_t)kconf->buffConfigs - mapped_base);
|
||||
kconf->kernelName = (char*)((Genode::addr_t)kconf->kernelName - mapped_base);
|
||||
@@ -55,3 +56,8 @@ void cl_genode::wait(struct kernel_config* kconf)
|
||||
asm("nop");
|
||||
}
|
||||
}
|
||||
|
||||
void cl_genode::add_shm_mapped_base(int shmid, Genode::addr_t mbase)
|
||||
{
|
||||
shm_mapped_base[shmid] = mbase;
|
||||
}
|
||||
|
||||
@@ -17,6 +17,9 @@
|
||||
// driver
|
||||
#include <gpgpu/gpgpu.h>
|
||||
|
||||
// config
|
||||
#include "../../../../dde_uos-intel-gpgpu/src/config.h"
|
||||
|
||||
class cl_genode
|
||||
{
|
||||
private:
|
||||
@@ -30,6 +33,9 @@ private:
|
||||
// rpc
|
||||
gpgpu_virt::Connection backend_driver;
|
||||
|
||||
// shm mapped_bases
|
||||
Genode::addr_t shm_mapped_base[MAX_SHM_REGIONS];
|
||||
|
||||
// do not allow copies
|
||||
cl_genode(const cl_genode& copy) = delete;
|
||||
cl_genode& operator=(const cl_genode& src) = delete;
|
||||
@@ -100,6 +106,14 @@ public:
|
||||
*
|
||||
*/
|
||||
void reset() { allocator.reset(); }
|
||||
|
||||
/**
|
||||
* @brief
|
||||
*
|
||||
* @param shmid
|
||||
* @param mbase
|
||||
*/
|
||||
void add_shm_mapped_base(int shmid, Genode::addr_t mbase);
|
||||
};
|
||||
|
||||
#endif // CL_GENODE_H
|
||||
|
||||
4817
repos/hello_gpgpu/src/producer/frac2_320_240.h
Normal file
4817
repos/hello_gpgpu/src/producer/frac2_320_240.h
Normal file
File diff suppressed because it is too large
Load Diff
4817
repos/hello_gpgpu/src/producer/frac_320_240.h
Normal file
4817
repos/hello_gpgpu/src/producer/frac_320_240.h
Normal file
File diff suppressed because it is too large
Load Diff
155
repos/hello_gpgpu/src/producer/main.cc
Normal file
155
repos/hello_gpgpu/src/producer/main.cc
Normal file
@@ -0,0 +1,155 @@
|
||||
#include <base/log.h>
|
||||
#include <base/heap.h>
|
||||
#include <base/allocator_avl.h>
|
||||
#include <base/attached_rom_dataspace.h>
|
||||
#include <util/misc_math.h>
|
||||
|
||||
#include <libc/component.h>
|
||||
|
||||
#include <unistd.h>
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
#include <string.h>
|
||||
|
||||
// rpc
|
||||
#include <gpgpu_virt/connection.h>
|
||||
|
||||
// stupid alloc
|
||||
#include "../hello_gpgpu/allocator_stupid.h"
|
||||
|
||||
// imgs
|
||||
#include "frac_320_240.h"
|
||||
#include "frac2_320_240.h"
|
||||
|
||||
namespace fake_cv
|
||||
{
|
||||
class Mat
|
||||
{
|
||||
public:
|
||||
int rows;
|
||||
int cols;
|
||||
int step;
|
||||
float *data;
|
||||
template <typename T>
|
||||
T *ptr(int off)
|
||||
{
|
||||
return (T *)&data[off * sizeof(T)];
|
||||
}
|
||||
};
|
||||
|
||||
Mat imread(const char *img)
|
||||
{
|
||||
Mat m;
|
||||
m.rows = height;
|
||||
m.cols = width;
|
||||
m.step = width * sizeof(float);
|
||||
|
||||
const size_t size = m.rows * m.cols;
|
||||
m.data = (float *)malloc(size * sizeof(float));
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
unsigned int px[3];
|
||||
HEADER_PIXEL(img, px);
|
||||
const unsigned int g = 0.298936021293775 * px[0] + 0.587043074451121 * px[1] + 0.114020904255103 * px[2];
|
||||
m.data[i] = g / 251.; // 255.
|
||||
}
|
||||
|
||||
return m;
|
||||
}
|
||||
}
|
||||
|
||||
typedef fake_cv::Mat Image;
|
||||
|
||||
fake_cv::Mat getGray(const fake_cv::Mat &img)
|
||||
{
|
||||
return img;
|
||||
}
|
||||
|
||||
struct producer
|
||||
{
|
||||
Genode::Env &env;
|
||||
gpgpu_virt::Connection backend_driver;
|
||||
Genode::Allocator_stupid allocator;
|
||||
|
||||
float *img1;
|
||||
float *img2;
|
||||
unsigned long img_size;
|
||||
volatile uint8_t *ready;
|
||||
volatile float *data;
|
||||
|
||||
void init()
|
||||
{
|
||||
Genode::log("===Init Producer===");
|
||||
|
||||
// register vgpu (optional?)
|
||||
const unsigned long size_vgpu_mem = 0x1000;
|
||||
Genode::Ram_dataspace_capability vgpu_mem_ram_cap;
|
||||
backend_driver.register_vm(size_vgpu_mem, vgpu_mem_ram_cap);
|
||||
|
||||
// create shm for gpu
|
||||
const unsigned long size_vgpu_shm = 0x100000;
|
||||
Genode::Ram_dataspace_capability vgpu_shm_ram_cap;
|
||||
backend_driver.register_shm(size_vgpu_shm, vgpu_shm_ram_cap);
|
||||
|
||||
// attach shm to vm
|
||||
Genode::addr_t mapped_base = env.rm().attach(vgpu_shm_ram_cap);
|
||||
|
||||
// use it in allocator
|
||||
allocator.add_range(mapped_base, size_vgpu_shm);
|
||||
|
||||
// set not ready
|
||||
ready = (uint8_t *)allocator.alloc(1);
|
||||
|
||||
// load img1 and img2
|
||||
const Image s1 = fake_cv::imread(header_data);
|
||||
Image i1 = getGray(s1);
|
||||
img1 = (float *)i1.ptr<float>(0);
|
||||
const Image s2 = fake_cv::imread(header_data2);
|
||||
Image i2 = getGray(s2);
|
||||
img2 = (float *)i2.ptr<float>(0);
|
||||
|
||||
img_size = Genode::max(i1.rows * i2.cols * sizeof(float), i2.rows * i2.cols * sizeof(float));
|
||||
|
||||
// alloc whole data
|
||||
data = (float *)allocator.alloc_aligned(0x10000, img_size);
|
||||
}
|
||||
|
||||
void run()
|
||||
{
|
||||
Genode::log("===Run Producer===");
|
||||
|
||||
Libc::with_libc([&]
|
||||
{
|
||||
srand(time(NULL));
|
||||
|
||||
int flip = 0;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
// fetch new img
|
||||
*ready = 0x43;
|
||||
memcpy((void*)data, flip ? img1 : img2, img_size);
|
||||
*ready = 0x42;
|
||||
flip = !flip;
|
||||
Genode::log("===New Image ready: ", flip, " ===");
|
||||
|
||||
// sleep for 5s
|
||||
sleep(5);
|
||||
} });
|
||||
|
||||
Genode::log("===End===");
|
||||
Genode::log("Producer completed");
|
||||
}
|
||||
|
||||
producer(Genode::Env &e) : env(e), backend_driver(env), allocator(), img1(nullptr), img2(nullptr), img_size(0), ready(nullptr), data(nullptr)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
void Libc::Component::construct(Libc::Env &env)
|
||||
{
|
||||
static producer p(env);
|
||||
p.init();
|
||||
p.run();
|
||||
}
|
||||
7
repos/hello_gpgpu/src/producer/target.mk
Normal file
7
repos/hello_gpgpu/src/producer/target.mk
Normal file
@@ -0,0 +1,7 @@
|
||||
TARGET = producer
|
||||
SRC_CC = main.cc \
|
||||
../hello_gpgpu/allocator_stupid.cc
|
||||
|
||||
LIBS = base libc libm
|
||||
|
||||
CC_CXX_WARN_STRICT =
|
||||
Reference in New Issue
Block a user