diff --git a/repos/libports/src/lib/libdrm/ioctl_lima.cc b/repos/libports/src/lib/libdrm/ioctl_lima.cc index 2636ba7950..2415914b76 100644 --- a/repos/libports/src/lib/libdrm/ioctl_lima.cc +++ b/repos/libports/src/lib/libdrm/ioctl_lima.cc @@ -38,7 +38,7 @@ extern "C" { } -enum { verbose_ioctl = false }; +static constexpr bool verbose_ioctl = false; /** @@ -122,6 +122,17 @@ namespace Lima { void serialize(drm_lima_gem_submit *submit, char *content); + template + void for_each_submit_bo(drm_lima_gem_submit const &submit, FN const &fn) + { + auto access_bo = [&] (drm_lima_gem_submit_bo const *bo) { + fn(*bo); + }; + + for_each_object((drm_lima_gem_submit_bo*)submit.bos, + submit.nr_bos, access_bo); + } + size_t get_payload_size(drm_version const &version); } /* anonymous namespace */ @@ -189,19 +200,16 @@ namespace Lima { using namespace Gpu; struct Call; - struct Buffer; } /* namespace Lima */ -namespace Gpu { - using Buffer_id = Vram_id; -} - - -struct Gpu::Vram { }; - -struct Lima::Buffer : Gpu::Vram +/* + * Gpu::Vram encapsulates a buffer object allocation + */ +struct Gpu::Vram { + struct Allocation_failed : Genode::Exception { }; + Gpu::Connection &_gpu; Vram_id_space::Element const _elem; @@ -212,22 +220,31 @@ struct Lima::Buffer : Gpu::Vram Genode::Constructible _attached_buffer { }; - Buffer(Gpu::Connection &gpu, - size_t size, - Gpu::Virtual_address va, - Vram_id_space &space) + Vram(Gpu::Connection &gpu, + size_t size, + Gpu::Virtual_address va, + Vram_id_space &space) : _gpu { gpu }, _elem { *this, space }, - size { size }, va { va } + size { size }, + va { va } { + /* + * As we cannot easily enforce an GPU virtual-address after the + * fact when using the DRM Lima API we instead use 'map_gpu' to + * create the buffer object. + * + * A valid virtual-address needs to be enforced before attempting + * the allocation. + */ if (!_gpu.map_gpu(_elem.id(), size, 0, va)) - throw -1; + throw Allocation_failed(); cap = _gpu.map_cpu(_elem.id(), Gpu::Mapping_attributes()); } - ~Buffer() + ~Vram() { _gpu.unmap_gpu(_elem.id(), 0, Virtual_address()); } @@ -246,7 +263,7 @@ struct Lima::Buffer : Gpu::Vram return reinterpret_cast(_attached_buffer->local_addr()); } - Gpu::Buffer_id id() const + Gpu::Vram_id id() const { return _elem.id(); } @@ -265,12 +282,75 @@ class Lima::Call Genode::Env &_env { *vfs_gpu_env() }; Genode::Heap _heap { _env.ram(), _env.rm() }; - Genode::Allocator_avl _va_alloc { &_heap }; + + /* + * The virtual-address allocator manages all GPU virtual + * addresses, which are shared by all contexts. + */ + struct Va_allocator + { + Genode::Allocator_avl _alloc; + + Va_allocator(Genode::Allocator &alloc) + : + _alloc { &alloc } + { + /* + * The end of range correspondes to LIMA_VA_RESERVE_START + * in Linux minus the page we omit at the start. + */ + _alloc.add_range(0x1000, 0xfff00000ul - 0x1000); + } + + Gpu::Virtual_address alloc(uint32_t size) + { + return Gpu::Virtual_address { _alloc.alloc_aligned(size, 12).convert<::uint64_t>( + [&] (void *ptr) { return (::uint64_t)ptr; }, + [&] (Range_allocator::Alloc_error) -> ::uint64_t { + error("Could not allocate GPU virtual address for size: ", size); + return 0; + }) }; + } + + void free(Gpu::Virtual_address va) + { + _alloc.free((void*)va.value); + } + }; + + Va_allocator _va_alloc { _heap }; /***************** ** Gpu session ** *****************/ + /* + * A Buffer wraps the actual Vram object and is used + * locally in each Gpu_context. + */ + struct Buffer + { + Genode::Id_space::Element const _elem; + + Gpu::Vram const &_vram; + + Buffer(Genode::Id_space &space, + Gpu::Vram const &vram) + : + _elem { *this, space, + Genode::Id_space::Id { .value = vram.id().value } }, + _vram { vram }, + + busy { false } + { } + + ~Buffer() { } + + bool busy; + }; + + using Buffer_space = Genode::Id_space; + struct Gpu_context { private: @@ -281,12 +361,14 @@ class Lima::Call Gpu_context(Gpu_context const &) = delete; Gpu_context &operator=(Gpu_context const &) = delete; + Genode::Allocator &_alloc; + static int _open_gpu() { int const fd = ::open("/dev/gpu", 0); if (fd < 0) { - Genode::error("Failed to open '/dev/gpu': ", - "try configure '' in 'dev' directory of VFS'"); + error("Failed to open '/dev/gpu': ", + "try configure '' in 'dev' directory of VFS'"); throw Gpu::Session::Invalid_state(); } return fd; @@ -296,7 +378,7 @@ class Lima::Call { struct ::stat buf; if (::fstat(fd, &buf) < 0) { - Genode::error("Could not stat '/dev/gpu'"); + error("Could not stat '/dev/gpu'"); ::close(fd); throw Gpu::Session::Invalid_state(); } @@ -306,33 +388,184 @@ class Lima::Call int const _fd; unsigned long const _id; + Gpu::Connection &_gpu { *vfs_gpu_connection(_id) }; + Genode::Id_space::Element const _elem; + /* + * The virtual-address allocator is solely used for + * the construction of the context-local exec buffer + * as the Gpu session requires a valid Vram object + * for it. + */ + Va_allocator &_va_alloc; + + /* + * The vram space is used for actual memory allocations. + * Each and every Gpu_context is able to allocate memory + * at the Gpu session although normally the main context + * is going to alloc memory for buffer objects. + */ + Gpu::Vram_id_space _vram_space { }; + + template + void _try_apply(Gpu::Vram_id id, FN const &fn) + { + try { + _vram_space.apply(id, fn); + } catch (Vram_id_space::Unknown_id) { } + } + + + /* + * The buffer space is used to attach a given buffer + * object backed by an vram allocation - normally from the + * main context - to the given context, i.e., it is mapped + * when SUBMIT is executed. + */ + Buffer_space _buffer_space { }; + + template + void _try_apply(Buffer_space::Id id, FN const &fn) + { + try { + _buffer_space.apply(id, fn); + } catch (Buffer_space::Unknown_id) { } + } + + /* + * Each context contains its on exec buffer object that is + * required by the Gpu session to pass on driver specific + * command buffer. + */ + Gpu::Vram *_exec_buffer; + public: - Gpu_context(Genode::Id_space &space) + bool defer_destruction { false }; + + static constexpr size_t _exec_buffer_size = { 256u << 10 }; + + Gpu_context(Genode::Allocator &alloc, + Genode::Id_space &space, + Va_allocator &va_alloc) : + _alloc { alloc }, _fd { _open_gpu() }, _id { _stat_gpu(_fd) }, - _elem { *this, space } + _elem { *this, space }, + _va_alloc { va_alloc }, + _exec_buffer { new (alloc) Gpu::Vram(_gpu, _exec_buffer_size, + _va_alloc.alloc(_exec_buffer_size), + _vram_space) } { } ~Gpu_context() { + while (_buffer_space.apply_any([&] (Buffer &b) { + Genode::destroy(_alloc, &b); + })) { ; } + + /* + * 'close' below will destroy the Gpu session belonging + * to this context so free the exec buffer beforehand. + */ + while (_vram_space.apply_any([&] (Vram &v) { + Genode::destroy(_alloc, &v); + })) { ; } + ::close(_fd); } + Gpu::Connection& gpu() + { + return _gpu; + } + + Gpu::Vram_capability export_vram(Gpu::Vram_id id) + { + Gpu::Vram_capability cap { }; + _try_apply(id, [&] (Gpu::Vram const &b) { + cap = _gpu.export_vram(b.id()); + }); + return cap; + } + + Buffer *import_vram(Gpu::Vram_capability cap, Gpu::Vram const &v) + { + Buffer *b = nullptr; + + try { _gpu.import_vram(cap, v.id()); } + catch (... /* should only throw Invalid_state*/) { + return nullptr; } + + try { b = new (_alloc) Buffer(_buffer_space, v); } + catch (... /* either conflicting id or alloc failure */) { + return nullptr; } + + return b; + } + + void free_buffer(Buffer_space::Id id) + { + _try_apply(id, [&] (Buffer &b) { + + /* + * We have to invalidate any imported buffer as otherwise + * the object stays ref-counted in the driver and the + * VA occupied by the object is not freed. + * + * For that we repurpose 'unmap_cpu' that otherwise is + * not used. + */ + _gpu.unmap_cpu(Gpu::Vram_id { .value = id.value }); + Genode::destroy(_alloc, &b); + }); + } + + bool buffer_space_contains(Buffer_space::Id id) + { + bool found = false; + _try_apply(id, [&] (Buffer const &) { found = true; }); + return found; + } + + Gpu::Vram_id_space &vram_space() + { + return _vram_space; + } + + template + void with_vram_space(Gpu::Vram_id id, FN const &fn) + { + _try_apply(id, fn); + } + + template + void access_exec_buffer(Genode::Env &env, FN const &fn) + { + /* + * 'env' is solely needed for mapping the exec buffer + * and is therefor used here locally. + */ + if (_exec_buffer->mmap(env)) { + char *ptr = (char*)_exec_buffer->mmap_addr(); + if (ptr) + fn(ptr, _exec_buffer_size); + } + } + + Gpu::Vram_id exec_buffer_id() const + { + return _exec_buffer->id(); + } + unsigned long id() const { return _elem.id().value; } - Gpu::Connection& gpu() - { - return *vfs_gpu_connection(_id); - } - int fd() const { return _fd; @@ -351,33 +584,33 @@ class Lima::Call Syncobj &operator=(Syncobj const &) = delete; - Gpu_context *_gc { nullptr }; + Gpu_context_space::Id _gc_id { .value = ~0u }; + Gpu::Sequence_number _seqno { 0 }; Genode::Id_space::Element const _elem; + + bool sync_fd { false }; + bool defer_destruction { false }; + Syncobj(Genode::Id_space &space) : _elem { *this, space } { } - unsigned long id() const + void adopt(Gpu_context_space::Id id, Gpu::Sequence_number seqno) { - return _elem.id().value; - } - - void adopt(Gpu_context &gc, Gpu::Sequence_number seqno) - { - _gc = &gc; + _gc_id = id; _seqno = seqno; } - Gpu_context &gpu_context() + Genode::Id_space::Id id() const { - if (!_gc) { - struct Invalid_gpu_context { }; - throw Invalid_gpu_context(); - } + return _elem.id(); + } - return *_gc; + Gpu_context_space::Id ctx_id() const + { + return _gc_id; } Gpu::Sequence_number seqno() const @@ -388,46 +621,35 @@ class Lima::Call using Syncobj_space = Genode::Id_space; Syncobj_space _syncobj_space { }; - Gpu_context _main_ctx { _gpu_context_space }; + Gpu_context *_main_ctx { + new (_heap) Gpu_context(_heap, _gpu_context_space, _va_alloc) }; - Gpu::Connection &_gpu { _main_ctx.gpu() }; Gpu::Info_lima const &_gpu_info { - *_gpu.attached_info() }; - - Gpu::Vram_id_space _buffer_space { }; + *_main_ctx->gpu().attached_info() }; template bool _apply_handle(uint32_t handle, FN const &fn) { - Buffer_id const id { .value = handle }; + Vram_id const id { .value = handle }; bool found = false; - try { - _buffer_space.apply(id, [&] (Buffer &b) { - fn(b); - found = true; - }); - } catch (Genode::Id_space::Unknown_id) { } + _main_ctx->with_vram_space(id, [&] (Vram &b) { + fn(b); + found = true; + }); return found; } - /* - * Play it safe, glmark2 apparently submits araound 110 KiB at - * some point. - */ - enum { EXEC_BUFFER_SIZE = 256u << 10 }; - Constructible _exec_buffer { }; - void _wait_for_mapping(uint32_t handle, unsigned op) { - (void)_apply_handle(handle, [&] (Buffer &b) { + (void)_apply_handle(handle, [&] (Vram &b) { do { - if (_main_ctx.gpu().set_tiling(b.id(), 0, op)) + if (_main_ctx->gpu().set_tiling_gpu(b.id(), 0, op)) break; char buf; - (void)::read(_main_ctx.fd(), &buf, sizeof(buf)); + (void)::read(_main_ctx->fd(), &buf, sizeof(buf)); } while (true); }); } @@ -445,18 +667,39 @@ class Lima::Call try { auto wait = [&] (Syncobj &sync_obj) { - Gpu_context &gc = sync_obj.gpu_context(); - do { - if (gc.gpu().complete(sync_obj.seqno())) - break; + auto poll_completion = [&] (Gpu_context &gc) { + do { + if (gc.gpu().complete(sync_obj.seqno())) + break; - char buf; - (void)::read(gc.fd(), &buf, sizeof(buf)); - } while (true); + char buf; + (void)::read(gc.fd(), &buf, sizeof(buf)); + } while (true); + + if (gc.defer_destruction) + Genode::destroy(_heap, &gc); + }; + _gpu_context_space.apply(sync_obj.ctx_id(), + poll_completion); + + if (sync_obj.defer_destruction) + Genode::destroy(_heap, &sync_obj); }; _syncobj_space.apply(syncobj_id, wait); - } catch (Genode::Id_space::Unknown_id) { - Genode::warning("ignore unknown sync fd: ", fd); + + } catch (Syncobj_space::Unknown_id) { + /* + * We could end up here on the last wait when Mesa already + * destroyed the syncobj. For that reason we deferred the + * destruction of the syncobj as well as the referenced ctx. + */ + return -1; + } catch (Gpu_context_space::Unknown_id) { + /* + * We will end up here in case the GP or PP job could not + * be submitted as Mesa does not check for successful + * submission. + */ return -1; } @@ -466,7 +709,7 @@ class Lima::Call Dataspace_capability _lookup_cap_from_handle(uint32_t handle) { Dataspace_capability cap { }; - auto lookup_cap = [&] (Buffer const &b) { + auto lookup_cap = [&] (Vram const &b) { cap = b.cap; }; (void)_apply_handle(handle, lookup_cap); @@ -480,7 +723,7 @@ class Lima::Call int _drm_lima_gem_info(drm_lima_gem_info &arg) { int result = -1; - (void)_apply_handle(arg.handle, [&] (Buffer &b) { + (void)_apply_handle(arg.handle, [&] (Vram &b) { if (!b.mmap(_env)) return; arg.offset = reinterpret_cast<::uint64_t>(b.mmap_addr()); @@ -496,41 +739,35 @@ class Lima::Call return result; } - ::uint64_t _alloc_va(::uint64_t const size) - { - return _va_alloc.alloc_aligned(size, 12).convert<::uint64_t>( - [&] (void *ptr) { return (::uint64_t)ptr; }, - [&] (Range_allocator::Alloc_error) -> ::uint64_t { - error("Could not allocate GPU virtual address for size: ", size); - return 0; - }); - } - template void _alloc_buffer(::uint64_t const size, FUNC const &fn) { size_t donate = size; - Buffer *buffer = nullptr; + Vram *buffer = nullptr; - ::uint64_t va = _alloc_va(size); - if (!va) return; + Gpu::Virtual_address const va = _va_alloc.alloc(size); + if (!va.value) + return; - retry( - [&] () { - retry( + try { + retry( [&] () { - buffer = - new (&_heap) Buffer(_gpu, size, - Gpu::Virtual_address { va }, - _buffer_space); + retry( + [&] () { + buffer = + new (&_heap) Vram(_main_ctx->gpu(), size, va, + _main_ctx->vram_space()); + }, + [&] () { + _main_ctx->gpu().upgrade_caps(2); + }); }, [&] () { - _gpu.upgrade_caps(2); + _main_ctx->gpu().upgrade_ram(donate); }); - }, - [&] () { - _gpu.upgrade_ram(donate); - }); + } catch (Gpu::Vram::Allocation_failed) { + return; + } if (buffer) fn(*buffer); @@ -541,7 +778,7 @@ class Lima::Call ::uint64_t const size = arg.size; try { - _alloc_buffer(size, [&](Buffer const &b) { + _alloc_buffer(size, [&](Vram const &b) { arg.handle = b.id().value; }); return 0; @@ -561,31 +798,71 @@ class Lima::Call _gpu_context_space.apply(ctx_id, [&] (Gpu_context &gc) { - size_t const payload_size = Lima::get_payload_size(arg); - if (payload_size > EXEC_BUFFER_SIZE) { - Genode::error(__func__, ": exec buffer too small (", - (unsigned)EXEC_BUFFER_SIZE, ") needed ", payload_size); - return; - } - /* - * Copy each array flat to the exec buffer and adjust the - * addresses in the submit object. + * Check if we have access to all needed buffer objects and + * if not import them from the main context that normaly performs + * all allocations. */ - char *local_exec_buffer = (char*)_exec_buffer->mmap_addr(); - Genode::memset(local_exec_buffer, 0, EXEC_BUFFER_SIZE); - Lima::serialize(&arg, local_exec_buffer); + + bool all_buffer_mapped = true; + Lima::for_each_submit_bo(arg, [&] (drm_lima_gem_submit_bo const &bo) { + Buffer_space::Id const id = { .value = bo.handle }; + if (!gc.buffer_space_contains(id)) { + + bool imported = false; + (void)_apply_handle(bo.handle, [&] (Gpu::Vram const &v) { + Gpu::Vram_capability cap = _main_ctx->export_vram(v.id()); + if (gc.import_vram(cap, v) == nullptr) + return; + + imported = true; + }); + + if (!imported) + all_buffer_mapped = false; + } + }); + + if (!all_buffer_mapped) + return; + + bool serialized = false; + gc.access_exec_buffer(_env, [&] (char *ptr, size_t size) { + + size_t const payload_size = Lima::get_payload_size(arg); + if (payload_size > size) { + error("exec buffer for context ", ctx_id.value, + " too small, got ", size, " but needed ", + payload_size); + return; + } + + /* + * Copy each array flat to the exec buffer and adjust the + * addresses in the submit object. + */ + Genode::memset(ptr, 0, size); + Lima::serialize(&arg, ptr); + + serialized = true; + }); + + if (!serialized) + return; try { Gpu::Connection &gpu = gc.gpu(); Gpu::Sequence_number const seqno = - gpu.execute(_exec_buffer->id(), 0); + gpu.execute(gc.exec_buffer_id(), 0); - sync_obj.adopt(gc, seqno); + sync_obj.adopt(Gpu_context_space::Id { .value = gc.id() }, + seqno); result = true; - } catch (Gpu::Session::Invalid_state) { } + } catch (Gpu::Session::Invalid_state) { + warning(": could not execute: ", gc.exec_buffer_id().value); + } }); }); @@ -617,7 +894,7 @@ class Lima::Call { try { Gpu_context * ctx = - new (_heap) Gpu_context(_gpu_context_space); + new (_heap) Gpu_context(_heap, _gpu_context_space, _va_alloc); arg.id = ctx->id(); return 0; @@ -633,8 +910,21 @@ class Lima::Call bool result = false; auto free_ctx = [&] (Gpu_context &ctx) { - Genode::destroy(_heap, &ctx); + result = true; + + /* + * If the ctx is currently referenced by a syncobj its + * destruction gets deferred until the final wait-for-syncobj + * call. + */ + _syncobj_space.for_each([&] (Syncobj &obj) { + if (obj.ctx_id().value == ctx.id()) + ctx.defer_destruction = true; + }); + + if (!ctx.defer_destruction) + Genode::destroy(_heap, &ctx); }; _gpu_context_space.apply(id, free_ctx); @@ -675,9 +965,15 @@ class Lima::Call int _drm_gem_close(drm_gem_close const &gem_close) { + auto free_buffer = [&] (Gpu_context &ctx) { + Buffer_space::Id const id { .value = gem_close.handle }; + ctx.free_buffer(id); + }; + _gpu_context_space.for_each(free_buffer); + return _apply_handle(gem_close.handle, - [&] (Lima::Buffer &b) { - _va_alloc.free((void *)b.va.value); + [&] (Gpu::Vram &b) { + _va_alloc.free(b.va); destroy(_heap, &b); }) ? 0 : -1; } @@ -710,7 +1006,7 @@ class Lima::Call { try { Syncobj *obj = new (_heap) Syncobj(_syncobj_space); - arg.handle = (uint32_t)obj->id(); + arg.handle = (uint32_t)obj->id().value; return 0; } catch (... /* XXX which exceptions can occur? */) { } return -1; @@ -722,16 +1018,34 @@ class Lima::Call bool result = false; _syncobj_space.apply(id, [&] (Syncobj &obj) { - Genode::destroy(_heap, &obj); result = true; + + /* + * In case the syncobj was once exported destruction + * gets deferred until the final wait-for-syncobj call. + */ + if (obj.sync_fd) { + obj.defer_destruction = true; + return; + } + + Genode::destroy(_heap, &obj); }); return result ? 0 : -1; } int _drm_syncobj_handle_to_fd(drm_syncobj_handle &arg) { - arg.fd = arg.handle + SYNC_FD; - return 0; + arg.fd = -1; + + Syncobj_space::Id id { .value = arg.handle }; + _syncobj_space.apply(id, [&] (Syncobj &obj) { + obj.sync_fd = true; + + arg.fd = arg.handle + SYNC_FD; + }); + + return arg.fd != -1 ? 0 : -1; } int _generic_ioctl(unsigned cmd, void *arg) @@ -766,32 +1080,15 @@ class Lima::Call /* arbitrary start value out of the libc's FD alloc range */ static constexpr int const SYNC_FD { 10000 }; - Call() - { - _va_alloc.add_range(0x1000, 0xfff00000ul - 0x1000); - - ::uint64_t va = _alloc_va(EXEC_BUFFER_SIZE); - if (!va) throw Gpu::Session::Invalid_state(); - - try { - _exec_buffer.construct(_gpu, - (size_t)EXEC_BUFFER_SIZE, - Gpu::Virtual_address { va }, - _buffer_space); - } catch (...) { - throw Gpu::Session::Invalid_state(); - } - if (!_exec_buffer->mmap(_env)) - throw Gpu::Session::Invalid_state(); - } + Call() { } ~Call() { - while (_gpu_context_space.apply_any([&] (Gpu_context &ctx) { - Genode::destroy(_heap, &ctx); })) { ; } - while (_syncobj_space.apply_any([&] (Syncobj &obj) { Genode::destroy(_heap, &obj); })) { ; } + + while (_gpu_context_space.apply_any([&] (Gpu_context &ctx) { + Genode::destroy(_heap, &ctx); })) { ; } } int ioctl(unsigned long request, void *arg) @@ -840,7 +1137,7 @@ void lima_drm_init() struct ::stat buf; if (stat("/dev/gpu", &buf) < 0) { Genode::error("'/dev/gpu' not accessible: ", - "try configure '' in 'dev' directory of VFS'"); + "try configure '' in 'dev' directory of VFS'"); return; } @@ -879,7 +1176,6 @@ int lima_drm_ioctl(unsigned long request, void *arg) Genode::log("returned ", ret); pthread_mutex_unlock(&ioctl_mutex); - return ret; }