Mac merge

This commit is contained in:
Allen Webster 2020-01-19 10:34:52 -08:00
commit c0c3d7b220
6 changed files with 210 additions and 183 deletions

View File

@ -4,14 +4,10 @@
#undef function
#import <simd/simd.h>
#import <MetalKit/MetalKit.h>
#include "AAPLShaderTypes.h"
#define function static
////////////////////////////////
typedef id<MTLTexture> Metal_Texture;
struct Metal_Buffer{
Node node;
@ -22,14 +18,57 @@ struct Metal_Buffer{
////////////////////////////////
@interface Metal_Renderer : NSObject<MTKViewDelegate>
@property (nonatomic) Render_Target *target;
typedef id<MTLTexture> Metal_Texture;
// NOTE(yuval): This is a locator used to describe where a specific slot is located.
union Metal_Texture_Slot_Locator{
u32 packed;
struct{
u16 bucket_index;
u16 slot_index;
};
};
// NOTE(yuval): This is the ACTUAL texture slot. Each slot contains the texture handle, the slot locator, and a pointer to the next slot in the free list (in case the slot if not occupied).
struct Metal_Texture_Slot{
// NOTE(yuval): This is a pointer to the next texture in the free texture slots list
Metal_Texture_Slot *next;
Metal_Texture texture;
Metal_Texture_Slot_Locator locator;
};
global_const u32 metal__texture_slots_per_bucket = 256;
// NOTE(yuval): This a bucket of ACTUAL texture slots.
struct Metal_Texture_Slot_Bucket{
Metal_Texture_Slot_Bucket *next;
Metal_Texture_Slot slots[metal__texture_slots_per_bucket];
};
// NOTE(yuval): This a struct contaning all texture slot buckets and a list of the currently free slots.
struct Metal_Texture_Slot_List{
Metal_Texture_Slot_Bucket *first_bucket;
Metal_Texture_Slot_Bucket *last_bucket;
u16 bucket_count;
Metal_Texture_Slot *first_free_slot;
Metal_Texture_Slot *last_free_slot;
};
global_const u32 metal__invalid_texture_slot_locator = (u32)-1;
////////////////////////////////
@interface Metal_Renderer : NSObject<MTKViewDelegate>
- (nonnull instancetype)initWithMetalKitView:(nonnull MTKView*)mtkView target:(Render_Target*)target;
- (u32)get_texture_of_dim:(Vec3_i32)dim kind:(Texture_Kind)kind;
- (b32)fill_texture:(u32)texture kind:(Texture_Kind)kind pos:(Vec3_i32)p dim:(Vec3_i32)dim data:(void*)data;
- (void)bind_texture:(u32)handle encoder:(id<MTLRenderCommandEncoder>)render_encoder;
- (Metal_Texture_Slot*)get_texture_slot_at_locator:(Metal_Texture_Slot_Locator)locator;
- (Metal_Texture_Slot*)get_texture_slot_at_handle:(u32)handle;
- (Metal_Buffer*)get_reusable_buffer_with_size:(NSUInteger)size;
- (void)add_reusable_buffer:(Metal_Buffer*)buffer;
@ -37,10 +76,6 @@ struct Metal_Buffer{
////////////////////////////////
global_const u32 metal__max_textures = 256;
////////////////////////////////
global_const char *metal__shaders_source = R"(
#include <metal_stdlib>
#include <simd/simd.h>
@ -155,16 +190,17 @@ metal__make_buffer(u32 size, id<MTLDevice> device){
////////////////////////////////
@implementation Metal_Renderer{
id<MTLDevice> device;
id<MTLRenderPipelineState> pipeline_state;
id<MTLCommandQueue> command_queue;
id<MTLCaptureScope> capture_scope;
Render_Target *_target;
Node buffer_cache;
u64 last_buffer_cache_purge_time;
id<MTLDevice> _device;
id<MTLRenderPipelineState> _pipeline_state;
id<MTLCommandQueue> _command_queue;
id<MTLCaptureScope> _capture_scope;
Metal_Texture *textures;
u32 next_texture_handle_index;
Node _buffer_cache;
u64 _last_buffer_cache_purge_time;
Metal_Texture_Slot_List _texture_slots;
}
- (nonnull instancetype)initWithMetalKitView:(nonnull MTKView*)mtk_view target:(Render_Target*)target{
@ -177,7 +213,7 @@ metal__make_buffer(u32 size, id<MTLDevice> device){
NSError *error = nil;
device = mtk_view.device;
_device = mtk_view.device;
// NOTE(yuval): Compile the shaders
id<MTLFunction> vertex_function = nil;
@ -188,7 +224,7 @@ metal__make_buffer(u32 size, id<MTLDevice> device){
MTLCompileOptions *options = [[MTLCompileOptions alloc] init];
options.fastMathEnabled = YES;
id<MTLLibrary> shader_library = [device newLibraryWithSource:shaders_source_str
id<MTLLibrary> shader_library = [_device newLibraryWithSource:shaders_source_str
options:options error:&error];
vertex_function = [shader_library newFunctionWithName:@"vertex_shader"];
fragment_function = [shader_library newFunctionWithName:@"fragment_shader"];
@ -232,22 +268,21 @@ metal__make_buffer(u32 size, id<MTLDevice> device){
pipeline_state_descriptor.colorAttachments[0].sourceAlphaBlendFactor = MTLBlendFactorOne;
pipeline_state_descriptor.colorAttachments[0].destinationAlphaBlendFactor = MTLBlendFactorOneMinusSourceAlpha;
pipeline_state = [device newRenderPipelineStateWithDescriptor:pipeline_state_descriptor
_pipeline_state = [_device newRenderPipelineStateWithDescriptor:pipeline_state_descriptor
error:&error];
}
Assert(error == nil);
// NOTE(yuval): Create the command queue
command_queue = [device newCommandQueue];
_command_queue = [_device newCommandQueue];
// NOTE(yuval): Initialize buffer caching
dll_init_sentinel(&buffer_cache);
last_buffer_cache_purge_time = system_now_time();
dll_init_sentinel(&_buffer_cache);
_last_buffer_cache_purge_time = system_now_time();
// NOTE(yuval): Initialize the textures array
textures = (Metal_Texture*)system_memory_allocate(metal__max_textures * sizeof(Metal_Texture), file_name_line_number_lit_u8);
next_texture_handle_index = 0;
// NOTE(yuval): Initialize the texture slot list
block_zero_struct(&_texture_slots);
// NOTE(yuval): Create the fallback texture
_target->fallback_texture_id = [self get_texture_of_dim:V3i32(2, 2, 1)
@ -260,9 +295,9 @@ metal__make_buffer(u32 size, id<MTLDevice> device){
data:white_block];
// NOTE(yuval): Create a capture scope for gpu frame capture
capture_scope = [[MTLCaptureManager sharedCaptureManager]
newCaptureScopeWithDevice:device];
capture_scope.label = @"4coder Metal Capture Scope";
_capture_scope = [[MTLCaptureManager sharedCaptureManager]
newCaptureScopeWithDevice:_device];
_capture_scope.label = @"4coder Metal Capture Scope";
return(self);
}
@ -273,19 +308,30 @@ metal__make_buffer(u32 size, id<MTLDevice> device){
- (void)drawInMTKView:(nonnull MTKView*)view{
#if FRED_INTERNAL
[capture_scope beginScope];
[_capture_scope beginScope];
#endif
// HACK(yuval): This is the best way I found to force valid width and height without drawing on the next draw cycle (1 frame delay).
CGSize drawable_size = [view drawableSize];
i32 width = (i32)Min(_target->width, drawable_size.width);
i32 height = (i32)Min(_target->height, drawable_size.height);
Font_Set *font_set = (Font_Set*)_target->font_set;
// NOTE(yuval): Free any textures in the target's texture free list
for (Render_Free_Texture *free_texture = _target->free_texture_first;
free_texture;
free_texture = free_texture->next){
Metal_Texture_Slot *texture_slot = [self get_texture_slot_at_handle:free_texture->tex_id];
if (texture_slot){
sll_queue_push(_texture_slots.first_free_slot, _texture_slots.last_free_slot, texture_slot);
}
}
_target->free_texture_first = 0;
_target->free_texture_last = 0;
// NOTE(yuval): Create the command buffer
id<MTLCommandBuffer> command_buffer = [command_queue commandBuffer];
id<MTLCommandBuffer> command_buffer = [_command_queue commandBuffer];
command_buffer.label = @"4coder Metal Render Command";
// NOTE(yuval): Obtain the render pass descriptor from the renderer's view
@ -302,7 +348,7 @@ metal__make_buffer(u32 size, id<MTLDevice> device){
[render_encoder setViewport:(MTLViewport){0.0, 0.0, (double)width, (double)height, 0.0, 1.0}];
// NOTE(yuval): Set the render pipeline to use for drawing
[render_encoder setRenderPipelineState:pipeline_state];
[render_encoder setRenderPipelineState:_pipeline_state];
// NOTE(yuval): Calculate the projection matrix
float left = 0, right = (float)width;
@ -423,26 +469,55 @@ metal__make_buffer(u32 size, id<MTLDevice> device){
[command_buffer commit];
#if FRED_INTERNAL
[capture_scope endScope];
[_capture_scope endScope];
#endif
}
- (u32)get_texture_of_dim:(Vec3_i32)dim kind:(Texture_Kind)kind{
u32 handle = next_texture_handle_index;
u32 handle = metal__invalid_texture_slot_locator;
// NOTE(yuval): Create a texture descriptor
MTLTextureDescriptor *texture_descriptor = [[MTLTextureDescriptor alloc] init];
texture_descriptor.textureType = MTLTextureType2DArray;
texture_descriptor.pixelFormat = MTLPixelFormatR8Unorm;
texture_descriptor.width = dim.x;
texture_descriptor.height = dim.y;
texture_descriptor.depth = dim.z;
// NOTE(yuval): Check for a free texture slot and allocate another slot bucket if no free slot has been found
if (!_texture_slots.first_free_slot){
// NOTE(yuval): Assert that the next bucket's index can fit in a u16
Assert(_texture_slots.bucket_count < ((u16)-1));
// NOTE(yuval): Create the texture from the device using the descriptor and add it to the textures array
Metal_Texture texture = [device newTextureWithDescriptor:texture_descriptor];
textures[handle] = texture;
Metal_Texture_Slot_Bucket *bucket = (Metal_Texture_Slot_Bucket*)system_memory_allocate(sizeof(Metal_Texture_Slot_Bucket), file_name_line_number_lit_u8);
next_texture_handle_index += 1;
for (u16 slot_index = 0;
slot_index < ArrayCount(bucket->slots);
++slot_index){
Metal_Texture_Slot *slot = &bucket->slots[slot_index];
block_zero_struct(slot);
slot->locator.bucket_index = _texture_slots.bucket_count;
slot->locator.slot_index = slot_index;
sll_queue_push(_texture_slots.first_free_slot, _texture_slots.last_free_slot, slot);
}
sll_queue_push(_texture_slots.first_bucket, _texture_slots.last_bucket, bucket);
_texture_slots.bucket_count += 1;
}
// NOTE(yuval): Get the first free texture slot and remove it from the free list (a slot is guarenteed to exist because we assert that above).
if (_texture_slots.first_free_slot){
Metal_Texture_Slot *texture_slot = _texture_slots.first_free_slot;
sll_queue_pop(_texture_slots.first_free_slot, _texture_slots.last_free_slot);
texture_slot->next = 0;
// NOTE(yuval): Create a texture descriptor.
MTLTextureDescriptor *texture_descriptor = [[MTLTextureDescriptor alloc] init];
texture_descriptor.textureType = MTLTextureType2DArray;
texture_descriptor.pixelFormat = MTLPixelFormatR8Unorm;
texture_descriptor.width = dim.x;
texture_descriptor.height = dim.y;
texture_descriptor.depth = dim.z;
// NOTE(yuval): Create the texture from the device using the descriptor and add it to the textures array.
Metal_Texture texture = [_device newTextureWithDescriptor:texture_descriptor];
texture_slot->texture = texture;
handle = texture_slot->locator.packed;
}
return handle;
}
@ -451,61 +526,92 @@ metal__make_buffer(u32 size, id<MTLDevice> device){
b32 result = false;
if (data){
Metal_Texture texture = textures[handle];
Metal_Texture_Slot *texture_slot = [self get_texture_slot_at_handle:handle];
if (texture_slot){
Metal_Texture texture = texture_slot->texture;
if (texture != 0){
MTLRegion replace_region = {
{(NSUInteger)p.x, (NSUInteger)p.y, (NSUInteger)p.z},
{(NSUInteger)dim.x, (NSUInteger)dim.y, (NSUInteger)dim.z}
};
if (texture != 0){
MTLRegion replace_region = {
{(NSUInteger)p.x, (NSUInteger)p.y, (NSUInteger)p.z},
{(NSUInteger)dim.x, (NSUInteger)dim.y, (NSUInteger)dim.z}
};
// NOTE(yuval): Fill the texture with data
[texture replaceRegion:replace_region
mipmapLevel:0
withBytes:data
bytesPerRow:dim.x];
// NOTE(yuval): Fill the texture with data
[texture replaceRegion:replace_region
mipmapLevel:0
withBytes:data
bytesPerRow:dim.x];
result = true;
result = true;
}
}
}
return result;
return(result);
}
- (void)bind_texture:(u32)handle encoder:(id<MTLRenderCommandEncoder>)render_encoder{
Metal_Texture texture = textures[handle];
if (texture != 0){
[render_encoder setFragmentTexture:texture
atIndex:0];
Metal_Texture_Slot *texture_slot = [self get_texture_slot_at_handle:handle];
if (texture_slot){
Metal_Texture texture = texture_slot->texture;
if (texture != 0){
[render_encoder setFragmentTexture:texture
atIndex:0];
}
}
}
- (Metal_Texture_Slot*)get_texture_slot_at_locator:(Metal_Texture_Slot_Locator)locator{
Metal_Texture_Slot *result = 0;
if (locator.packed != metal__invalid_texture_slot_locator){
Metal_Texture_Slot_Bucket *bucket = _texture_slots.first_bucket;
for (u16 bucket_index = 0;
(bucket_index < locator.bucket_index) && bucket;
++bucket_index, bucket = bucket->next);
if (bucket && (locator.slot_index < metal__texture_slots_per_bucket)){
result = &bucket->slots[locator.slot_index];
}
}
return(result);
}
- (Metal_Texture_Slot*)get_texture_slot_at_handle:(u32)handle{
Metal_Texture_Slot_Locator locator;
locator.packed = handle;
Metal_Texture_Slot *result = [self get_texture_slot_at_locator:locator];
return(result);
}
- (Metal_Buffer*)get_reusable_buffer_with_size:(NSUInteger)size{
// NOTE(yuval): This routine is a modified version of Dear ImGui's MetalContext::dequeueReusableBufferOfLength in imgui_impl_metal.mm
u64 now = system_now_time();
// NOTE(yuval): Purge old buffers that haven't been useful for a while
if ((now - last_buffer_cache_purge_time) > 1000000){
Node prev_buffer_cache = buffer_cache;
dll_init_sentinel(&buffer_cache);
if ((now - _last_buffer_cache_purge_time) > 1000000){
Node prev_buffer_cache = _buffer_cache;
dll_init_sentinel(&_buffer_cache);
for (Node *node = prev_buffer_cache.next;
node != &buffer_cache;
node != &_buffer_cache;
node = node->next){
Metal_Buffer *candidate = CastFromMember(Metal_Buffer, node, node);
if (candidate->last_reuse_time > last_buffer_cache_purge_time){
dll_insert(&buffer_cache, node);
if (candidate->last_reuse_time > _last_buffer_cache_purge_time){
dll_insert(&_buffer_cache, node);
}
}
last_buffer_cache_purge_time = now;
_last_buffer_cache_purge_time = now;
}
// NOTE(yuval): See if we have a buffer we can reuse
Metal_Buffer *best_candidate = 0;
for (Node *node = buffer_cache.next;
node != &buffer_cache;
for (Node *node = _buffer_cache.next;
node != &_buffer_cache;
node = node->next){
Metal_Buffer *candidate = CastFromMember(Metal_Buffer, node, node);
if ((candidate->size >= size) && ((!best_candidate) || (best_candidate->last_reuse_time > candidate->last_reuse_time))){
@ -521,15 +627,15 @@ metal__make_buffer(u32 size, id<MTLDevice> device){
result = best_candidate;
} else{
// NOTE(yuval): No luck; make a new buffer.
result = metal__make_buffer(size, device);
result = metal__make_buffer(size, _device);
}
return result;
return(result);
}
- (void)add_reusable_buffer:(Metal_Buffer*)buffer{
// NOTE(yuval): This routine is a modified version of Dear ImGui's MetalContext::enqueueReusableBuffer in imgui_impl_metal.mm
dll_insert(&buffer_cache, &buffer->node);
dll_insert(&_buffer_cache, &buffer->node);
}
@end

View File

@ -1,25 +0,0 @@
/*
See LICENSE folder for this samples licensing information.
Abstract:
Header containing types and enum constants shared between Metal shaders and C/ObjC source
*/
#ifndef AAPLShaderTypes_h
#define AAPLShaderTypes_h
#undef clamp
#include <simd/simd.h>
#define clamp(a,x,b) clamp_((a),(x),(b))
// This structure defines the layout of vertices sent to the vertex
// shader. This header is shared between the .metal shader and C code, to guarantee that
// the layout of the vertex array in the C code matches the layout that the .metal
// vertex shader expects.
typedef struct
{
vector_float2 position;
vector_float4 color;
} AAPLVertex;
#endif /* AAPLShaderTypes_h */

View File

@ -1,64 +0,0 @@
/*
See LICENSE folder for this samples licensing information.
Abstract:
Metal shaders used for this sample
*/
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
// Include header shared between this Metal shader code and C code executing Metal API commands.
#import "AAPLShaderTypes.h"
// Vertex shader outputs and fragment shader inputs
typedef struct
{
// The [[position]] attribute of this member indicates that this value
// is the clip space position of the vertex when this structure is
// returned from the vertex function.
float4 position [[position]];
// Since this member does not have a special attribute, the rasterizer
// interpolates its value with the values of the other triangle vertices
// and then passes the interpolated value to the fragment shader for each
// fragment in the triangle.
float4 color;
} RasterizerData;
vertex RasterizerData
vertexShader(uint vertexID [[vertex_id]],
constant AAPLVertex *vertices [[buffer(AAPLVertexInputIndexVertices)]],
constant vector_uint2 *viewportSizePointer [[buffer(AAPLVertexInputIndexViewportSize)]])
{
RasterizerData out;
// Index into the array of positions to get the current vertex.
// The positions are specified in pixel dimensions (i.e. a value of 100
// is 100 pixels from the origin).
float2 pixelSpacePosition = vertices[vertexID].position.xy;
// Get the viewport size and cast to float.
vector_float2 viewportSize = vector_float2(*viewportSizePointer);
// To convert from positions in pixel space to positions in clip-space,
// divide the pixel coordinates by half the size of the viewport.
out.position = vector_float4(0.0, 0.0, 0.0, 1.0);
out.position.xy = pixelSpacePosition / (viewportSize / 2.0);
// Pass the input color directly to the rasterizer.
out.color = vertices[vertexID].color;
return out;
}
fragment float4 fragmentShader(RasterizerData in [[stage_in]])
{
// Return the interpolated color.
return in.color;
}

View File

@ -285,7 +285,6 @@ gl_render(Render_Target *t){
t->free_texture_first = 0;
t->free_texture_last = 0;
u64 begin_draw = system_now_time();
for (Render_Group *group = t->group_first;
group != 0;
group = group->next){

View File

@ -67,6 +67,7 @@
#include <sys/mman.h> // NOTE(yuval): Used for mmap, munmap, mprotect
#include <sys/stat.h> // NOTE(yuval): Used for stat
#include <sys/types.h> // NOTE(yuval): Used for struct stat, pid_t
#include <sys/syslimits.h> // NOTE(yuval): Used for PATH_MAX
#include <stdlib.h> // NOTE(yuval): Used for free
@ -297,6 +298,15 @@ mac_to_object(Plat_Handle handle){
////////////////////////////////
function void
mac_init_recursive_mutex(pthread_mutex_t *mutex){
pthread_mutexattr_t attr;
pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
pthread_mutex_init(mutex, &attr);
}
////////////////////////////////
function void
mac_error_box(char *msg, b32 shutdown = true){
NSAlert *alert = [[[NSAlert alloc] init] autorelease];
@ -584,6 +594,7 @@ mac_toggle_fullscreen(void){
- (void)windowDidResize:(NSNotification*)notification{
mac_resize(mac_vars.window);
[mac_vars.view display];
}
- (void)windowDidMiniaturize:(NSNotification*)notification{
@ -629,8 +640,7 @@ mac_toggle_fullscreen(void){
mac_resize(mac_vars.window);
}
- (BOOL)wantsUpdateLayer
{
- (BOOL)wantsUpdateLayer{
return YES;
}
@ -952,11 +962,13 @@ mac_toggle_fullscreen(void){
float dx = event.scrollingDeltaX;
float dy = event.scrollingDeltaY;
i8 scroll_speed = 100;
i8 wheel_delta = 0;
if (dy > 0){
scroll_speed *= -1;
wheel_delta = -100;
} else if (dy < 0){
wheel_delta = 100;
}
mac_vars.input_chunk.trans.mouse_wheel = scroll_speed;
mac_vars.input_chunk.trans.mouse_wheel = wheel_delta;
system_signal_step(0);
}
@ -1095,7 +1107,7 @@ main(int arg_count, char **args){
FCoder_App_Delegate *app_delegate = [[FCoder_App_Delegate alloc] init];
[NSApp setDelegate:app_delegate];
pthread_mutex_init(&memory_tracker_mutex, 0);
mac_init_recursive_mutex(&memory_tracker_mutex);
// NOTE(yuval): Context setup
Thread_Context _tctx = {};
@ -1122,7 +1134,7 @@ main(int arg_count, char **args){
dll_init_sentinel(&mac_vars.free_mac_objects);
dll_init_sentinel(&mac_vars.timer_objects);
pthread_mutex_init(&mac_vars.thread_launch_mutex, 0);
mac_init_recursive_mutex(&mac_vars.thread_launch_mutex);
pthread_cond_init(&mac_vars.thread_launch_cv, 0);
// NOTE(yuval): Screen scale factor calculation

View File

@ -29,12 +29,12 @@ system_get_path_sig(){
{
local_persist b32 has_stashed_4ed_path = false;
if (!has_stashed_4ed_path){
local_const i32 binary_path_capacity = KB(32);
local_const u32 binary_path_capacity = PATH_MAX;
u8 *memory = (u8*)system_memory_allocate(binary_path_capacity, file_name_line_number_lit_u8);
pid_t pid = getpid();
i32 size = proc_pidpath(pid, memory, binary_path_capacity);
Assert(size <= binary_path_capacity - 1);
Assert(size < binary_path_capacity);
mac_vars.binary_path = SCu8(memory, size);
mac_vars.binary_path = string_remove_last_folder(mac_vars.binary_path);
@ -638,7 +638,7 @@ system_thread_get_id_sig(){
function
system_mutex_make_sig(){
Mac_Object *object = mac_alloc_object(MacObjectKind_Mutex);
pthread_mutex_init(&object->mutex, 0);
mac_init_recursive_mutex(&object->mutex);
System_Mutex result = mac_to_plat_handle(object);
return(result);
@ -742,6 +742,7 @@ function void*
mac_memory_allocate_extended(void *base, u64 size, String_Const_u8 location){
u64 adjusted_size = size + ALLOCATION_SIZE_ADJUSTMENT;
void *memory = mmap(base, adjusted_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
Assert(memory != MAP_FAILED);
Memory_Annotation_Tracker_Node *node = (Memory_Annotation_Tracker_Node*)memory;
@ -844,9 +845,7 @@ system_memory_annotation_sig(){
for (Memory_Annotation_Tracker_Node *node = memory_tracker.first;
node != 0;
node = node->next){
// TODO(yuval): Fix the API so that annotations would not mess with the system memory.
// Memory_Annotation_Node *r_node = push_array(arena, Memory_Annotation_Node, 1);
Memory_Annotation_Node *r_node = (Memory_Annotation_Node*)malloc(sizeof(Memory_Annotation_Node));
Memory_Annotation_Node *r_node = push_array(arena, Memory_Annotation_Node, 1);
sll_queue_push(result.first, result.last, r_node);
result.count += 1;