dmitri.shuralyov.com/gpu/mtl

Add support for rendering very basic geometry.

The Go API tries to follow the Swift Metal API as closely as possible.

Add tests and examples.
dmitshur committed 6 years ago commit 9453c974ce537a33764337d12411404dbb07f718
Showing partial commit. Full Commit
Collapse all
example_test.go
@@ -0,0 +1,214 @@
// +build darwin

package mtl_test

import (
	"encoding/json"
	"fmt"
	"image"
	"image/color"
	"log"
	"os"
	"unsafe"

	"dmitri.shuralyov.com/gpu/mtl"
	"golang.org/x/image/math/f32"
)

func Example_listDevices() {
	allDevices := mtl.CopyAllDevices()
	printJSON("all Metal devices in the system = ", allDevices)

	device, err := mtl.CreateSystemDefaultDevice()
	if err != nil {
		log.Fatalln(err)
	}
	printJSON("preferred system default Metal device = ", device)

	fmt.Println("device supports the macOS GPU family 1, version 1 feature set:", device.SupportsFeatureSet(mtl.MacOSGPUFamily1V1))
	fmt.Println("device supports the macOS GPU family 1, version 2 feature set:", device.SupportsFeatureSet(mtl.MacOSGPUFamily1V2))
	fmt.Println("device supports the macOS GPU family 1, version 3 feature set:", device.SupportsFeatureSet(mtl.MacOSGPUFamily1V3))
	fmt.Println("device supports the macOS GPU family 1, version 4 feature set:", device.SupportsFeatureSet(mtl.MacOSGPUFamily1V4))
	fmt.Println("device supports the macOS GPU family 2, version 1 feature set:", device.SupportsFeatureSet(mtl.MacOSGPUFamily2V1))

	// Sample output:
	// all Metal devices in the system = [
	// 	{
	// 		"Headless": false,
	// 		"LowPower": true,
	// 		"Removable": false,
	// 		"RegistryID": 4294968304,
	// 		"Name": "Intel Iris Pro Graphics"
	// 	},
	// 	{
	// 		"Headless": false,
	// 		"LowPower": false,
	// 		"Removable": false,
	// 		"RegistryID": 4294968344,
	// 		"Name": "AMD Radeon R9 M370X"
	// 	}
	// ]
	// preferred system default Metal device = {
	// 	"Headless": false,
	// 	"LowPower": false,
	// 	"Removable": false,
	// 	"RegistryID": 4294968344,
	// 	"Name": "AMD Radeon R9 M370X"
	// }
	// device supports the macOS GPU family 1, version 1 feature set: true
	// device supports the macOS GPU family 1, version 2 feature set: true
	// device supports the macOS GPU family 1, version 3 feature set: true
	// device supports the macOS GPU family 1, version 4 feature set: false
	// device supports the macOS GPU family 2, version 1 feature set: false
}

// printJSON prints label, then v as JSON encoded with indent to stdout. It panics on any error.
// It's meant to be used by examples to print the output.
func printJSON(label string, v interface{}) {
	fmt.Print(label)
	w := json.NewEncoder(os.Stdout)
	w.SetIndent("", "\t")
	err := w.Encode(v)
	if err != nil {
		panic(err)
	}
}

func Example_renderTriangle() {
	device, err := mtl.CreateSystemDefaultDevice()
	if err != nil {
		log.Fatalln(err)
	}

	// Create a render pipeline state.
	const source = `#include <metal_stdlib>

using namespace metal;

struct Vertex {
	float4 position [[position]];
	float4 color;
};

vertex Vertex VertexShader(
	uint vertexID [[vertex_id]],
	device Vertex * vertices [[buffer(0)]]
) {
	return vertices[vertexID];
}

fragment float4 FragmentShader(Vertex in [[stage_in]]) {
	return in.color;
}
`
	lib, err := device.MakeLibrary(source, mtl.CompileOptions{})
	if err != nil {
		log.Fatalln(err)
	}
	vs, err := lib.MakeFunction("VertexShader")
	if err != nil {
		log.Fatalln(err)
	}
	fs, err := lib.MakeFunction("FragmentShader")
	if err != nil {
		log.Fatalln(err)
	}
	var rpld mtl.RenderPipelineDescriptor
	rpld.VertexFunction = vs
	rpld.FragmentFunction = fs
	rpld.ColorAttachments[0].PixelFormat = mtl.PixelFormatRGBA8UNorm
	rps, err := device.MakeRenderPipelineState(rpld)
	if err != nil {
		log.Fatalln(err)
	}

	// Create a vertex buffer.
	type Vertex struct {
		Position f32.Vec4
		Color    f32.Vec4
	}
	vertexData := [...]Vertex{
		{f32.Vec4{+0.00, +0.75, 0, 1}, f32.Vec4{1, 1, 1, 1}},
		{f32.Vec4{-0.75, -0.75, 0, 1}, f32.Vec4{1, 1, 1, 1}},
		{f32.Vec4{+0.75, -0.75, 0, 1}, f32.Vec4{0, 0, 0, 1}},
	}
	vertexBuffer := device.MakeBuffer(unsafe.Pointer(&vertexData[0]), unsafe.Sizeof(vertexData), mtl.ResourceStorageModeManaged)

	// Create an output texture to render into.
	td := mtl.TextureDescriptor{
		PixelFormat: mtl.PixelFormatRGBA8UNorm,
		Width:       80,
		Height:      20,
		StorageMode: mtl.StorageModeManaged,
	}
	texture := device.MakeTexture(td)

	cq := device.MakeCommandQueue()
	cb := cq.MakeCommandBuffer()

	// Encode all render commands.
	var rpd mtl.RenderPassDescriptor
	rpd.ColorAttachments[0].LoadAction = mtl.LoadActionClear
	rpd.ColorAttachments[0].StoreAction = mtl.StoreActionStore
	rpd.ColorAttachments[0].ClearColor = mtl.ClearColor{Red: 0, Green: 0, Blue: 0, Alpha: 1}
	rpd.ColorAttachments[0].Texture = texture
	rce := cb.MakeRenderCommandEncoder(rpd)
	rce.SetRenderPipelineState(rps)
	rce.SetVertexBuffer(vertexBuffer, 0, 0)
	rce.DrawPrimitives(mtl.PrimitiveTypeTriangle, 0, 3)
	rce.EndEncoding()

	// Encode all blit commands.
	bce := cb.MakeBlitCommandEncoder()
	bce.Synchronize(texture)
	bce.EndEncoding()

	cb.Commit()
	cb.WaitUntilCompleted()

	// Read pixels from output texture into an image.
	img := image.NewNRGBA(image.Rect(0, 0, texture.Width, texture.Height))
	bytesPerRow := 4 * texture.Width
	region := mtl.RegionMake2D(0, 0, texture.Width, texture.Height)
	texture.GetBytes(&img.Pix[0], uintptr(bytesPerRow), region, 0)

	// Output image to stdout as grayscale ASCII art.
	levels := []struct {
		MinY  uint8
		Shade string
	}{{220, " "}, {170, "░"}, {85, "▒"}, {35, "▓"}, {0, "█"}}
	for y := img.Bounds().Min.Y; y < img.Bounds().Max.Y; y++ {
		for x := img.Bounds().Min.X; x < img.Bounds().Max.X; x++ {
			c := color.GrayModel.Convert(img.At(x, y)).(color.Gray)
			for _, l := range levels {
				if c.Y >= l.MinY {
					fmt.Print(l.Shade)
					break
				}
			}
		}
		fmt.Println()
	}

	// Output:
	// ████████████████████████████████████████████████████████████████████████████████
	// ████████████████████████████████████████████████████████████████████████████████
	// ████████████████████████████████████████████████████████████████████████████████
	// ██████████████████████████████████████    ██████████████████████████████████████
	// ████████████████████████████████████        ████████████████████████████████████
	// ██████████████████████████████████        ░░░░██████████████████████████████████
	// ████████████████████████████████        ░░░░░░░░████████████████████████████████
	// ██████████████████████████████        ░░░░░░░░░░░░██████████████████████████████
	// ████████████████████████████        ░░░░░░░░░░░░▒▒▒▒████████████████████████████
	// ██████████████████████████        ░░░░░░░░░░░░▒▒▒▒▒▒▒▒██████████████████████████
	// ████████████████████████        ░░░░░░░░░░░░▒▒▒▒▒▒▒▒▒▒▒▒████████████████████████
	// ██████████████████████        ░░░░░░░░░░░░▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒██████████████████████
	// ████████████████████        ░░░░░░░░░░░░▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒████████████████████
	// ██████████████████        ░░░░░░░░░░░░▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▓▓▓▓██████████████████
	// ████████████████        ░░░░░░░░░░░░▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▓▓▓▓▓▓▓▓████████████████
	// ██████████████        ░░░░░░░░░░░░▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▓▓▓▓▓▓▓▓▓▓▓▓██████████████
	// ████████████        ░░░░░░░░░░░░▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▓▓▓▓▓▓▓▓▓▓▓▓████████████████
	// ████████████████████████████████████████████████████████████████████████████████
	// ████████████████████████████████████████████████████████████████████████████████
	// ████████████████████████████████████████████████████████████████████████████████
}
mtl.go
@@ -1,28 +1,270 @@
// +build darwin

// Package mtl provides access to Apple's Metal API (https://developer.apple.com/documentation/metal).
//
// This package is in very early stages of development.
// Less than 5% of the Metal API surface is implemented.
// Current functionality is sufficient to list Metal-capable devices
// on the system and query basic information about them.
// The API will change when opportunities for improvement are discovered; it is not yet frozen.
// Less than 20% of the Metal API surface is implemented.
// Current functionality is sufficient to render very basic geometry.
package mtl

import (
	"errors"
	"fmt"
	"unsafe"
)

/*
#cgo CFLAGS: -x objective-c
#cgo LDFLAGS: -framework Metal
#cgo LDFLAGS: -framework Metal -framework Foundation
#include <stdlib.h>
#include "mtl.h"
struct Library Go_Device_MakeLibrary(void * device, _GoString_ source) {
	return Device_MakeLibrary(device, _GoStringPtr(source), _GoStringLen(source));
}
*/
import "C"

// FeatureSet defines a specific platform, hardware, and software configuration.
//
// Reference: https://developer.apple.com/documentation/metal/mtlfeatureset.
type FeatureSet uint16

// The device feature sets that define specific platform, hardware, and software configurations.
const (
	MacOSGPUFamily1V1 FeatureSet = 10000 // The GPU family 1, version 1 feature set for macOS.
	MacOSGPUFamily1V2 FeatureSet = 10001 // The GPU family 1, version 2 feature set for macOS.
	MacOSGPUFamily1V3 FeatureSet = 10003 // The GPU family 1, version 3 feature set for macOS.
	MacOSGPUFamily1V4 FeatureSet = 10004 // The GPU family 1, version 4 feature set for macOS.
	MacOSGPUFamily2V1 FeatureSet = 10005 // The GPU family 2, version 1 feature set for macOS.
)

// PixelFormat defines data formats that describe the organization
// and characteristics of individual pixels in a texture.
//
// Reference: https://developer.apple.com/documentation/metal/mtlpixelformat.
type PixelFormat uint8

// The data formats that describe the organization and characteristics
// of individual pixels in a texture.
const (
	PixelFormatRGBA8UNorm PixelFormat = 70 // Ordinary format with four 8-bit normalized unsigned integer components in RGBA order.
	PixelFormatBGRA8UNorm PixelFormat = 80 // Ordinary format with four 8-bit normalized unsigned integer components in BGRA order.
)

// PrimitiveType defines geometric primitive types for drawing commands.
//
// Reference: https://developer.apple.com/documentation/metal/mtlprimitivetype.
type PrimitiveType uint8

// Geometric primitive types for drawing commands.
const (
	PrimitiveTypePoint         PrimitiveType = 0
	PrimitiveTypeLine          PrimitiveType = 1
	PrimitiveTypeLineStrip     PrimitiveType = 2
	PrimitiveTypeTriangle      PrimitiveType = 3
	PrimitiveTypeTriangleStrip PrimitiveType = 4
)

// LoadAction defines actions performed at the start of a rendering pass
// for a render command encoder.
//
// Reference: https://developer.apple.com/documentation/metal/mtlloadaction.
type LoadAction uint8

// Actions performed at the start of a rendering pass for a render command encoder.
const (
	LoadActionDontCare LoadAction = 0
	LoadActionLoad     LoadAction = 1
	LoadActionClear    LoadAction = 2
)

// StoreAction defines actions performed at the end of a rendering pass
// for a render command encoder.
//
// Reference: https://developer.apple.com/documentation/metal/mtlstoreaction.
type StoreAction uint8

// Actions performed at the end of a rendering pass for a render command encoder.
const (
	StoreActionDontCare                   StoreAction = 0
	StoreActionStore                      StoreAction = 1
	StoreActionMultisampleResolve         StoreAction = 2
	StoreActionStoreAndMultisampleResolve StoreAction = 3
	StoreActionUnknown                    StoreAction = 4
	StoreActionCustomSampleDepthStore     StoreAction = 5
)

// StorageMode defines defines the memory location and access permissions of a resource.
//
// Reference: https://developer.apple.com/documentation/metal/mtlstoragemode.
type StorageMode uint8

const (
	// StorageModeShared indicates that the resource is stored in system memory
	// accessible to both the CPU and the GPU.
	StorageModeShared StorageMode = 0

	// StorageModeManaged indicates that the resource exists as a synchronized
	// memory pair with one copy stored in system memory accessible to the CPU
	// and another copy stored in video memory accessible to the GPU.
	StorageModeManaged StorageMode = 1

	// StorageModePrivate indicates that the resource is stored in memory
	// only accessible to the GPU. In iOS and tvOS, the resource is stored in
	// system memory. In macOS, the resource is stored in video memory.
	StorageModePrivate StorageMode = 2

	// StorageModeMemoryless indicates that the resource is stored in on-tile memory,
	// without CPU or GPU memory backing. The contents of the on-tile memory are undefined
	// and do not persist; the only way to populate the resource is to render into it.
	// Memoryless resources are limited to temporary render targets (i.e., Textures configured
	// with a TextureDescriptor and used with a RenderPassAttachmentDescriptor).
	StorageModeMemoryless StorageMode = 3
)

// ResourceOptions defines optional arguments used to create
// and influence behavior of buffer and texture objects.
//
// Reference: https://developer.apple.com/documentation/metal/mtlresourceoptions.
type ResourceOptions uint16

const (
	// ResourceCPUCacheModeDefaultCache is the default CPU cache mode for the resource.
	// Guarantees that read and write operations are executed in the expected order.
	ResourceCPUCacheModeDefaultCache ResourceOptions = ResourceOptions(CPUCacheModeDefaultCache) << resourceCPUCacheModeShift

	// ResourceCPUCacheModeWriteCombined is a write-combined CPU cache mode for the resource.
	// Optimized for resources that the CPU will write into, but never read.
	ResourceCPUCacheModeWriteCombined ResourceOptions = ResourceOptions(CPUCacheModeWriteCombined) << resourceCPUCacheModeShift

	// ResourceStorageModeShared indicates that the resource is stored in system memory
	// accessible to both the CPU and the GPU.
	ResourceStorageModeShared ResourceOptions = ResourceOptions(StorageModeShared) << resourceStorageModeShift

	// ResourceStorageModeManaged indicates that the resource exists as a synchronized
	// memory pair with one copy stored in system memory accessible to the CPU
	// and another copy stored in video memory accessible to the GPU.
	ResourceStorageModeManaged ResourceOptions = ResourceOptions(StorageModeManaged) << resourceStorageModeShift

	// ResourceStorageModePrivate indicates that the resource is stored in memory
	// only accessible to the GPU. In iOS and tvOS, the resource is stored
	// in system memory. In macOS, the resource is stored in video memory.
	ResourceStorageModePrivate ResourceOptions = ResourceOptions(StorageModePrivate) << resourceStorageModeShift

	// ResourceStorageModeMemoryless indicates that the resource is stored in on-tile memory,
	// without CPU or GPU memory backing. The contents of the on-tile memory are undefined
	// and do not persist; the only way to populate the resource is to render into it.
	// Memoryless resources are limited to temporary render targets (i.e., Textures configured
	// with a TextureDescriptor and used with a RenderPassAttachmentDescriptor).
	ResourceStorageModeMemoryless ResourceOptions = ResourceOptions(StorageModeMemoryless) << resourceStorageModeShift

	// ResourceHazardTrackingModeUntracked indicates that the command encoder dependencies
	// for this resource are tracked manually with Fence objects. This value is always set
	// for resources sub-allocated from a Heap object and may optionally be specified for
	// non-heap resources.
	ResourceHazardTrackingModeUntracked ResourceOptions = 1 << resourceHazardTrackingModeShift
)

const (
	resourceCPUCacheModeShift       = 0
	resourceStorageModeShift        = 4
	resourceHazardTrackingModeShift = 8
)

// CPUCacheMode is the CPU cache mode that defines the CPU mapping of a resource.
//
// Reference: https://developer.apple.com/documentation/metal/mtlcpucachemode.
type CPUCacheMode uint8

const (
	// CPUCacheModeDefaultCache is the default CPU cache mode for the resource.
	// Guarantees that read and write operations are executed in the expected order.
	CPUCacheModeDefaultCache CPUCacheMode = 0

	// CPUCacheModeWriteCombined is a write-combined CPU cache mode for the resource.
	// Optimized for resources that the CPU will write into, but never read.
	CPUCacheModeWriteCombined CPUCacheMode = 1
)

// Resource represents a memory allocation for storing specialized data
// that is accessible to the GPU.
//
// Reference: https://developer.apple.com/documentation/metal/mtlresource.
type Resource interface {
	resource() unsafe.Pointer
}

// RenderPipelineDescriptor configures new RenderPipelineState objects.
//
// Reference: https://developer.apple.com/documentation/metal/mtlrenderpipelinedescriptor.
type RenderPipelineDescriptor struct {
	// VertexFunction is a programmable function that processes individual vertices in a rendering pass.
	VertexFunction Function

	// FragmentFunction is a programmable function that processes individual fragments in a rendering pass.
	FragmentFunction Function

	// ColorAttachments is an array of attachments that store color data.
	ColorAttachments [1]RenderPipelineColorAttachmentDescriptor
}

// RenderPipelineColorAttachmentDescriptor describes a color render target that specifies
// the color configuration and color operations associated with a render pipeline.
//
// Reference: https://developer.apple.com/documentation/metal/mtlrenderpipelinecolorattachmentdescriptor.
type RenderPipelineColorAttachmentDescriptor struct {
	// PixelFormat is the pixel format of the color attachment’s texture.
	PixelFormat PixelFormat
}

// RenderPassDescriptor describes a group of render targets that serve as
// the output destination for pixels generated by a render pass.
//
// Reference: https://developer.apple.com/documentation/metal/mtlrenderpassdescriptor.
type RenderPassDescriptor struct {
	// ColorAttachments is array of state information for attachments that store color data.
	ColorAttachments [1]RenderPassColorAttachmentDescriptor
}

// RenderPassColorAttachmentDescriptor describes a color render target that serves
// as the output destination for color pixels generated by a render pass.
//
// Reference: https://developer.apple.com/documentation/metal/mtlrenderpasscolorattachmentdescriptor.
type RenderPassColorAttachmentDescriptor struct {
	RenderPassAttachmentDescriptor
	ClearColor ClearColor
}

// RenderPassAttachmentDescriptor describes a render target that serves
// as the output destination for pixels generated by a render pass.
//
// Reference: https://developer.apple.com/documentation/metal/mtlrenderpassattachmentdescriptor.
type RenderPassAttachmentDescriptor struct {
	LoadAction  LoadAction
	StoreAction StoreAction
	Texture     Texture
}

// ClearColor is an RGBA value used for a color pixel.
//
// Reference: https://developer.apple.com/documentation/metal/mtlclearcolor.
type ClearColor struct {
	Red, Green, Blue, Alpha float64
}

// TextureDescriptor configures new Texture objects.
//
// Reference: https://developer.apple.com/documentation/metal/mtltexturedescriptor.
type TextureDescriptor struct {
	PixelFormat PixelFormat
	Width       int
	Height      int
	StorageMode StorageMode
}

// Device is abstract representation of the GPU that
// serves as the primary interface for a Metal app.
//
// Reference: https://developer.apple.com/documentation/metal/mtldevice.
type Device struct {
@@ -47,41 +289,41 @@ type Device struct {
// CreateSystemDefaultDevice returns the preferred system default Metal device.
//
// Reference: https://developer.apple.com/documentation/metal/1433401-mtlcreatesystemdefaultdevice.
func CreateSystemDefaultDevice() (Device, error) {
	d := C.CreateSystemDefaultDevice()
	if d.device == nil {
	if d.Device == nil {
		return Device{}, errors.New("Metal is not supported on this system")
	}

	return Device{
		device:     d.device,
		Headless:   d.headless != 0,
		LowPower:   d.lowPower != 0,
		Removable:  d.removable != 0,
		RegistryID: uint64(d.registryID),
		Name:       C.GoString(d.name),
		device:     d.Device,
		Headless:   d.Headless != 0,
		LowPower:   d.LowPower != 0,
		Removable:  d.Removable != 0,
		RegistryID: uint64(d.RegistryID),
		Name:       C.GoString(d.Name),
	}, nil
}

// CopyAllDevices returns all Metal devices in the system.
//
// Reference: https://developer.apple.com/documentation/metal/1433367-mtlcopyalldevices.
func CopyAllDevices() []Device {
	d := C.CopyAllDevices()
	defer C.free(unsafe.Pointer(d.devices))
	defer C.free(unsafe.Pointer(d.Devices))

	ds := make([]Device, d.length)
	ds := make([]Device, d.Length)
	for i := 0; i < len(ds); i++ {
		d := (*C.struct_Device)(unsafe.Pointer(uintptr(unsafe.Pointer(d.devices)) + uintptr(i)*C.sizeof_struct_Device))

		ds[i].device = d.device
		ds[i].Headless = d.headless != 0
		ds[i].LowPower = d.lowPower != 0
		ds[i].Removable = d.removable != 0
		ds[i].RegistryID = uint64(d.registryID)
		ds[i].Name = C.GoString(d.name)
		d := (*C.struct_Device)(unsafe.Pointer(uintptr(unsafe.Pointer(d.Devices)) + uintptr(i)*C.sizeof_struct_Device))

		ds[i].device = d.Device
		ds[i].Headless = d.Headless != 0
		ds[i].LowPower = d.LowPower != 0
		ds[i].Removable = d.Removable != 0
		ds[i].RegistryID = uint64(d.RegistryID)
		ds[i].Name = C.GoString(d.Name)
	}
	return ds
}

// SupportsFeatureSet reports whether device d supports feature set fs.
@@ -89,47 +331,310 @@ func CopyAllDevices() []Device {
// Reference: https://developer.apple.com/documentation/metal/mtldevice/1433418-supportsfeatureset.
func (d Device) SupportsFeatureSet(fs FeatureSet) bool {
	return C.Device_SupportsFeatureSet(d.device, C.uint16_t(fs)) != 0
}

// NewCommandQueue creates a new command queue object.
// MakeCommandQueue creates a serial command submission queue.
//
// Reference: https://developer.apple.com/documentation/metal/mtldevice/1433388-newcommandqueue.
func (d Device) NewCommandQueue() CommandQueue {
	return CommandQueue{C.Device_NewCommandQueue(d.device)}
// Reference: https://developer.apple.com/documentation/metal/mtldevice/1433388-makecommandqueue.
func (d Device) MakeCommandQueue() CommandQueue {
	return CommandQueue{C.Device_MakeCommandQueue(d.device)}
}

// FeatureSet defines a specific platform, hardware, and software configuration.
// MakeLibrary creates a new library that contains
// the functions stored in the specified source string.
//
// Reference: https://developer.apple.com/documentation/metal/mtlfeatureset.
type FeatureSet uint16
// Reference: https://developer.apple.com/documentation/metal/mtldevice/1433431-makelibrary.
func (d Device) MakeLibrary(source string, opt CompileOptions) (Library, error) {
	l := C.Go_Device_MakeLibrary(d.device, source) // TODO: opt.
	if l.Library == nil {
		return Library{}, errors.New(C.GoString(l.Error))
	}

const (
	MacOSGPUFamily1V1 FeatureSet = 10000 // The GPU family 1, version 1 feature set for macOS.
	MacOSGPUFamily1V2 FeatureSet = 10001 // The GPU family 1, version 2 feature set for macOS.
	MacOSGPUFamily1V3 FeatureSet = 10003 // The GPU family 1, version 3 feature set for macOS.
	MacOSGPUFamily1V4 FeatureSet = 10004 // The GPU family 1, version 4 feature set for macOS.
	MacOSGPUFamily2V1 FeatureSet = 10005 // The GPU family 2, version 1 feature set for macOS.
)
	return Library{l.Library}, nil
}

// MakeRenderPipelineState creates a render pipeline state object.
//
// Reference: https://developer.apple.com/documentation/metal/mtldevice/1433369-makerenderpipelinestate.
func (d Device) MakeRenderPipelineState(rpd RenderPipelineDescriptor) (RenderPipelineState, error) {
	descriptor := C.struct_RenderPipelineDescriptor{
		VertexFunction:              rpd.VertexFunction.function,
		FragmentFunction:            rpd.FragmentFunction.function,
		ColorAttachment0PixelFormat: C.uint16_t(rpd.ColorAttachments[0].PixelFormat),
	}
	rps := C.Device_MakeRenderPipelineState(d.device, descriptor)
	if rps.RenderPipelineState == nil {
		return RenderPipelineState{}, errors.New(C.GoString(rps.Error))
	}

	return RenderPipelineState{rps.RenderPipelineState}, nil
}

// MakeBuffer allocates a new buffer of a given length
// and initializes its contents by copying existing data into it.
//
// Reference: https://developer.apple.com/documentation/metal/mtldevice/1433429-makebuffer.
func (d Device) MakeBuffer(bytes unsafe.Pointer, length uintptr, opt ResourceOptions) Buffer {
	return Buffer{C.Device_MakeBuffer(d.device, bytes, C.size_t(length), C.uint16_t(opt))}
}

// MakeTexture creates a texture object with privately owned storage
// that contains texture state.
//
// Reference: https://developer.apple.com/documentation/metal/mtldevice/1433425-maketexture.
func (d Device) MakeTexture(td TextureDescriptor) Texture {
	descriptor := C.struct_TextureDescriptor{
		PixelFormat: C.uint16_t(td.PixelFormat),
		Width:       C.uint_t(td.Width),
		Height:      C.uint_t(td.Height),
		StorageMode: C.uint8_t(td.StorageMode),
	}
	return Texture{
		texture: C.Device_MakeTexture(d.device, descriptor),
		Width:   td.Width,  // TODO: Fetch dimensions of actually created texture.
		Height:  td.Height, // TODO: Fetch dimensions of actually created texture.
	}
}

// CompileOptions specifies optional compilation settings for
// the graphics or compute functions within a library.
//
// Reference: https://developer.apple.com/documentation/metal/mtlcompileoptions.
type CompileOptions struct {
	// TODO.
}

// CommandQueue is a queue that organizes the order
// in which command buffers are executed by the GPU.
//
// Reference: https://developer.apple.com/documentation/metal/mtlcommandqueue.
type CommandQueue struct {
	commandQueue unsafe.Pointer
}

// CommandBuffer creates a command buffer.
// MakeCommandBuffer creates a command buffer.
//
// Reference: https://developer.apple.com/documentation/metal/mtlcommandqueue/1508686-commandbuffer.
func (cq CommandQueue) CommandBuffer() CommandBuffer {
	return CommandBuffer{C.CommandQueue_CommandBuffer(cq.commandQueue)}
// Reference: https://developer.apple.com/documentation/metal/mtlcommandqueue/1508686-makecommandbuffer.
func (cq CommandQueue) MakeCommandBuffer() CommandBuffer {
	return CommandBuffer{C.CommandQueue_MakeCommandBuffer(cq.commandQueue)}
}

// CommandBuffer is a container that stores encoded commands
// that are committed to and executed by the GPU.
//
// Reference: https://developer.apple.com/documentation/metal/mtlcommandbuffer.
type CommandBuffer struct {
	commandBuffer unsafe.Pointer
}

// Commit commits this command buffer for execution as soon as possible.
//
// Reference: https://developer.apple.com/documentation/metal/mtlcommandbuffer/1443003-commit.
func (cb CommandBuffer) Commit() {
	C.CommandBuffer_Commit(cb.commandBuffer)
}

// WaitUntilCompleted waits for the execution of this command buffer to complete.
//
// Reference: https://developer.apple.com/documentation/metal/mtlcommandbuffer/1443039-waituntilcompleted.
func (cb CommandBuffer) WaitUntilCompleted() {
	C.CommandBuffer_WaitUntilCompleted(cb.commandBuffer)
}

// MakeRenderCommandEncoder creates an encoder object that can
// encode graphics rendering commands into this command buffer.
//
// Reference: https://developer.apple.com/documentation/metal/mtlcommandbuffer/1442999-makerendercommandencoder.
func (cb CommandBuffer) MakeRenderCommandEncoder(rpd RenderPassDescriptor) RenderCommandEncoder {
	descriptor := C.struct_RenderPassDescriptor{
		ColorAttachment0LoadAction:  C.uint8_t(rpd.ColorAttachments[0].LoadAction),
		ColorAttachment0StoreAction: C.uint8_t(rpd.ColorAttachments[0].StoreAction),
		ColorAttachment0ClearColor: C.struct_ClearColor{
			Red:   C.double(rpd.ColorAttachments[0].ClearColor.Red),
			Green: C.double(rpd.ColorAttachments[0].ClearColor.Green),
			Blue:  C.double(rpd.ColorAttachments[0].ClearColor.Blue),
			Alpha: C.double(rpd.ColorAttachments[0].ClearColor.Alpha),
		},
		ColorAttachment0Texture: rpd.ColorAttachments[0].Texture.texture,
	}
	return RenderCommandEncoder{CommandEncoder{C.CommandBuffer_MakeRenderCommandEncoder(cb.commandBuffer, descriptor)}}
}

// MakeBlitCommandEncoder creates an encoder object that can encode
// memory operation (blit) commands into this command buffer.
//
// Reference: https://developer.apple.com/documentation/metal/mtlcommandbuffer/1443001-makeblitcommandencoder.
func (cb CommandBuffer) MakeBlitCommandEncoder() BlitCommandEncoder {
	return BlitCommandEncoder{CommandEncoder{C.CommandBuffer_MakeBlitCommandEncoder(cb.commandBuffer)}}
}

// CommandEncoder is an encoder that writes sequential GPU commands
// into a command buffer.
//
// Reference: https://developer.apple.com/documentation/metal/mtlcommandencoder.
type CommandEncoder struct {
	commandEncoder unsafe.Pointer
}

// EndEncoding declares that all command generation from this encoder is completed.
//
// Reference: https://developer.apple.com/documentation/metal/mtlcommandencoder/1458038-endencoding.
func (ce CommandEncoder) EndEncoding() {
	C.CommandEncoder_EndEncoding(ce.commandEncoder)
}

// RenderCommandEncoder is an encoder that specifies graphics-rendering commands
// and executes graphics functions.
//
// Reference: https://developer.apple.com/documentation/metal/mtlrendercommandencoder.
type RenderCommandEncoder struct {
	CommandEncoder
}

// SetRenderPipelineState sets the current render pipeline state object.
//
// Reference: https://developer.apple.com/documentation/metal/mtlrendercommandencoder/1515811-setrenderpipelinestate.
func (rce RenderCommandEncoder) SetRenderPipelineState(rps RenderPipelineState) {
	C.RenderCommandEncoder_SetRenderPipelineState(rce.commandEncoder, rps.renderPipelineState)
}

// SetVertexBuffer sets a buffer for the vertex shader function at an index
// in the buffer argument table with an offset that specifies the start of the data.
//
// Reference: https://developer.apple.com/documentation/metal/mtlrendercommandencoder/1515829-setvertexbuffer.
func (rce RenderCommandEncoder) SetVertexBuffer(buf Buffer, offset, index int) {
	C.RenderCommandEncoder_SetVertexBuffer(rce.commandEncoder, buf.buffer, C.uint_t(offset), C.uint_t(index))
}

// DrawPrimitives renders one instance of primitives using vertex data
// in contiguous array elements.
//
// Reference: https://developer.apple.com/documentation/metal/mtlrendercommandencoder/1516326-drawprimitives.
func (rce RenderCommandEncoder) DrawPrimitives(typ PrimitiveType, vertexStart, vertexCount int) {
	C.RenderCommandEncoder_DrawPrimitives(rce.commandEncoder, C.uint8_t(typ), C.uint_t(vertexStart), C.uint_t(vertexCount))
}

// BlitCommandEncoder is an encoder that specifies resource copy
// and resource synchronization commands.
//
// Reference: https://developer.apple.com/documentation/metal/mtlblitcommandencoder.
type BlitCommandEncoder struct {
	CommandEncoder
}

// Synchronize flushes any copy of the specified resource from its corresponding
// Device caches and, if needed, invalidates any CPU caches.
//
// Reference: https://developer.apple.com/documentation/metal/mtlblitcommandencoder/1400775-synchronize.
func (bce BlitCommandEncoder) Synchronize(resource Resource) {
	C.BlitCommandEncoder_Synchronize(bce.commandEncoder, resource.resource())
}

// Library is a collection of compiled graphics or compute functions.
//
// Reference: https://developer.apple.com/documentation/metal/mtllibrary.
type Library struct {
	library unsafe.Pointer
}

// MakeFunction returns a pre-compiled, non-specialized function.
//
// Reference: https://developer.apple.com/documentation/metal/mtllibrary/1515524-makefunction.
func (l Library) MakeFunction(name string) (Function, error) {
	f := C.Library_MakeFunction(l.library, C.CString(name))
	if f == nil {
		return Function{}, fmt.Errorf("function %q not found", name)
	}

	return Function{f}, nil
}

// Texture is a memory allocation for storing formatted
// image data that is accessible to the GPU.
//
// Reference: https://developer.apple.com/documentation/metal/mtltexture.
type Texture struct {
	texture unsafe.Pointer

	// Width is the width of the texture image for the base level mipmap, in pixels.
	Width int

	// Height is the height of the texture image for the base level mipmap, in pixels.
	Height int
}

func (t Texture) resource() unsafe.Pointer { return t.texture }

// GetBytes copies a block of pixels from the storage allocation of texture
// slice zero into system memory at a specified address.
//
// Reference: https://developer.apple.com/documentation/metal/mtltexture/1515751-getbytes.
func (t Texture) GetBytes(pixelBytes *byte, bytesPerRow uintptr, region Region, level int) {
	r := C.struct_Region{
		Origin: C.struct_Origin{
			X: C.uint_t(region.Origin.X),
			Y: C.uint_t(region.Origin.Y),
			Z: C.uint_t(region.Origin.Z),
		},
		Size: C.struct_Size{
			Width:  C.uint_t(region.Size.Width),
			Height: C.uint_t(region.Size.Height),
			Depth:  C.uint_t(region.Size.Depth),
		},
	}
	C.Texture_GetBytes(t.texture, unsafe.Pointer(pixelBytes), C.size_t(bytesPerRow), r, C.uint_t(level))
}

// Buffer is a memory allocation for storing unformatted data
// that is accessible to the GPU.
//
// Reference: https://developer.apple.com/documentation/metal/mtlbuffer.
type Buffer struct {
	buffer unsafe.Pointer
}

// Function represents a programmable graphics or compute function executed by the GPU.
//
// Reference: https://developer.apple.com/documentation/metal/mtlfunction.
type Function struct {
	function unsafe.Pointer
}

// RenderPipelineState contains the graphics functions
// and configuration state used in a render pass.
//
// Reference: https://developer.apple.com/documentation/metal/mtlrenderpipelinestate.
type RenderPipelineState struct {
	renderPipelineState unsafe.Pointer
}

// Region is a rectangular block of pixels in an image or texture,
// defined by its upper-left corner and its size.
//
// Reference: https://developer.apple.com/documentation/metal/mtlregion.
type Region struct {
	Origin Origin // The location of the upper-left corner of the block.
	Size   Size   // The size of the block.
}

// Origin represents the location of a pixel in an image or texture relative
// to the upper-left corner, whose coordinates are (0, 0).
//
// Reference: https://developer.apple.com/documentation/metal/mtlorigin.
type Origin struct{ X, Y, Z int }

// Size represents the set of dimensions that declare the size of an object,
// such as an image, texture, threadgroup, or grid.
//
// Reference: https://developer.apple.com/documentation/metal/mtlsize.
type Size struct{ Width, Height, Depth int }

// RegionMake2D returns a 2D, rectangular region for image or texture data.
//
// Reference: https://developer.apple.com/documentation/metal/1515675-mtlregionmake2d.
func RegionMake2D(x, y, width, height int) Region {
	return Region{
		Origin: Origin{x, y, 0},
		Size:   Size{width, height, 1},
	}
}
mtl.h
@@ -1,25 +1,104 @@
// +build darwin

typedef signed char BOOL;
typedef unsigned long uint_t;
typedef unsigned char uint8_t;
typedef unsigned short uint16_t;
typedef unsigned long long uint64_t;

struct Device {
	void *       device;
	BOOL         headless;
	BOOL         lowPower;
	BOOL         removable;
	uint64_t     registryID;
	const char * name;
	void *       Device;
	BOOL         Headless;
	BOOL         LowPower;
	BOOL         Removable;
	uint64_t     RegistryID;
	const char * Name;
};

struct Devices {
	struct Device * devices;
	int             length;
	struct Device * Devices;
	int             Length;
};

struct Library {
	void *       Library;
	const char * Error;
};

struct RenderPipelineDescriptor {
	void *   VertexFunction;
	void *   FragmentFunction;
	uint16_t ColorAttachment0PixelFormat;
};

struct RenderPipelineState {
	void *       RenderPipelineState;
	const char * Error;
};

struct ClearColor {
	double Red;
	double Green;
	double Blue;
	double Alpha;
};

struct RenderPassDescriptor {
	uint8_t           ColorAttachment0LoadAction;
	uint8_t           ColorAttachment0StoreAction;
	struct ClearColor ColorAttachment0ClearColor;
	void *            ColorAttachment0Texture;
};

struct TextureDescriptor {
	uint16_t PixelFormat;
	uint_t   Width;
	uint_t   Height;
	uint8_t  StorageMode;
};

struct Origin {
	uint_t X;
	uint_t Y;
	uint_t Z;
};

struct Size {
	uint_t Width;
	uint_t Height;
	uint_t Depth;
};

struct Region {
	struct Origin Origin;
	struct Size   Size;
};

struct Device CreateSystemDefaultDevice();
struct Devices CopyAllDevices();
BOOL Device_SupportsFeatureSet(void * device, uint16_t featureSet);
void * Device_NewCommandQueue(void * device);
void * CommandQueue_CommandBuffer(void * commandQueue);

BOOL                       Device_SupportsFeatureSet(void * device, uint16_t featureSet);
void *                     Device_MakeCommandQueue(void * device);
struct Library             Device_MakeLibrary(void * device, const char * source, size_t sourceLength);
struct RenderPipelineState Device_MakeRenderPipelineState(void * device, struct RenderPipelineDescriptor descriptor);
void *                     Device_MakeBuffer(void * device, const void * bytes, size_t length, uint16_t options);
void *                     Device_MakeTexture(void * device, struct TextureDescriptor descriptor);

void * CommandQueue_MakeCommandBuffer(void * commandQueue);

void   CommandBuffer_Commit(void * commandBuffer);
void   CommandBuffer_WaitUntilCompleted(void * commandBuffer);
void * CommandBuffer_MakeRenderCommandEncoder(void * commandBuffer, struct RenderPassDescriptor descriptor);
void * CommandBuffer_MakeBlitCommandEncoder(void * commandBuffer);

void CommandEncoder_EndEncoding(void * commandEncoder);

void RenderCommandEncoder_SetRenderPipelineState(void * renderCommandEncoder, void * renderPipelineState);
void RenderCommandEncoder_SetVertexBuffer(void * renderCommandEncoder, void * buffer, uint_t offset, uint_t index);
void RenderCommandEncoder_DrawPrimitives(void * renderCommandEncoder, uint8_t primitiveType, uint_t vertexStart, uint_t vertexCount);

void BlitCommandEncoder_Synchronize(void * blitCommandEncoder, void * resource);

void * Library_MakeFunction(void * library, const char * name);

void Texture_GetBytes(void * texture, void * pixelBytes, size_t bytesPerRow, struct Region region, uint_t level);
mtl.m
@@ -6,48 +6,157 @@

struct Device CreateSystemDefaultDevice() {
	id<MTLDevice> device = MTLCreateSystemDefaultDevice();
	if (!device) {
		struct Device d;
		d.device = NULL;
		d.Device = NULL;
		return d;
	}

	struct Device d;
	d.device = device;
	d.headless = device.headless;
	d.lowPower = device.lowPower;
	d.removable = device.removable;
	d.registryID = device.registryID;
	d.name = device.name.UTF8String;
	d.Device = device;
	d.Headless = device.headless;
	d.LowPower = device.lowPower;
	d.Removable = device.removable;
	d.RegistryID = device.registryID;
	d.Name = device.name.UTF8String;
	return d;
}

// Caller must call free(d.devices).
struct Devices CopyAllDevices() {
	NSArray<id<MTLDevice>> * devices = MTLCopyAllDevices();

	struct Devices d;
	d.devices = malloc(devices.count * sizeof(struct Device));
	d.Devices = malloc(devices.count * sizeof(struct Device));
	for (int i = 0; i < devices.count; i++) {
		d.devices[i].device = devices[i];
		d.devices[i].headless = devices[i].headless;
		d.devices[i].lowPower = devices[i].lowPower;
		d.devices[i].removable = devices[i].removable;
		d.devices[i].registryID = devices[i].registryID;
		d.devices[i].name = devices[i].name.UTF8String;
		d.Devices[i].Device = devices[i];
		d.Devices[i].Headless = devices[i].headless;
		d.Devices[i].LowPower = devices[i].lowPower;
		d.Devices[i].Removable = devices[i].removable;
		d.Devices[i].RegistryID = devices[i].registryID;
		d.Devices[i].Name = devices[i].name.UTF8String;
	}
	d.length = devices.count;
	d.Length = devices.count;
	return d;
}

BOOL Device_SupportsFeatureSet(void * device, uint16_t featureSet) {
	return [(id<MTLDevice>)device supportsFeatureSet:featureSet];
}

void * Device_NewCommandQueue(void * device) {
void * Device_MakeCommandQueue(void * device) {
	return [(id<MTLDevice>)device newCommandQueue];
};
}

struct Library Device_MakeLibrary(void * device, const char * source, size_t sourceLength) {
	NSError * error;
	id<MTLLibrary> library = [(id<MTLDevice>)device
		newLibraryWithSource:[[NSString alloc] initWithBytes:source length:sourceLength encoding:NSUTF8StringEncoding]
		options:NULL // TODO.
		error:&error];

	struct Library l;
	l.Library = library;
	if (!library) {
		l.Error = error.localizedDescription.UTF8String;
	}
	return l;
}

struct RenderPipelineState Device_MakeRenderPipelineState(void * device, struct RenderPipelineDescriptor descriptor) {
	MTLRenderPipelineDescriptor * renderPipelineDescriptor = [[MTLRenderPipelineDescriptor alloc] init];
	renderPipelineDescriptor.vertexFunction = descriptor.VertexFunction;
	renderPipelineDescriptor.fragmentFunction = descriptor.FragmentFunction;
	renderPipelineDescriptor.colorAttachments[0].pixelFormat = descriptor.ColorAttachment0PixelFormat;

	NSError * error;
	id<MTLRenderPipelineState> renderPipelineState = [(id<MTLDevice>)device newRenderPipelineStateWithDescriptor:renderPipelineDescriptor
	                                                                                                       error:&error];

	struct RenderPipelineState rps;
	rps.RenderPipelineState = renderPipelineState;
	if (!renderPipelineState) {
		rps.Error = error.localizedDescription.UTF8String;
	}
	return rps;
}

void * CommandQueue_CommandBuffer(void * commandQueue) {
void * Device_MakeBuffer(void * device, const void * bytes, size_t length, uint16_t options) {
	return [(id<MTLDevice>)device newBufferWithBytes:(const void *)bytes
	                                          length:(NSUInteger)length
	                                         options:(MTLResourceOptions)options];
}

void * Device_MakeTexture(void * device, struct TextureDescriptor descriptor) {
	MTLTextureDescriptor * textureDescriptor = [[MTLTextureDescriptor alloc] init];
	textureDescriptor.pixelFormat = descriptor.PixelFormat;
	textureDescriptor.width = descriptor.Width;
	textureDescriptor.height = descriptor.Height;
	textureDescriptor.storageMode = descriptor.StorageMode;
	return [(id<MTLDevice>)device newTextureWithDescriptor:textureDescriptor];
}

void * CommandQueue_MakeCommandBuffer(void * commandQueue) {
	return [(id<MTLCommandQueue>)commandQueue commandBuffer];
};
}

void CommandBuffer_Commit(void * commandBuffer) {
	[(id<MTLCommandBuffer>)commandBuffer commit];
}

void CommandBuffer_WaitUntilCompleted(void * commandBuffer) {
	[(id<MTLCommandBuffer>)commandBuffer waitUntilCompleted];
}

void * CommandBuffer_MakeRenderCommandEncoder(void * commandBuffer, struct RenderPassDescriptor descriptor) {
	MTLRenderPassDescriptor * renderPassDescriptor = [[MTLRenderPassDescriptor alloc] init];
	renderPassDescriptor.colorAttachments[0].loadAction = descriptor.ColorAttachment0LoadAction;
	renderPassDescriptor.colorAttachments[0].storeAction = descriptor.ColorAttachment0StoreAction;
	renderPassDescriptor.colorAttachments[0].clearColor = MTLClearColorMake(
		descriptor.ColorAttachment0ClearColor.Red,
		descriptor.ColorAttachment0ClearColor.Green,
		descriptor.ColorAttachment0ClearColor.Blue,
		descriptor.ColorAttachment0ClearColor.Alpha
	);
	renderPassDescriptor.colorAttachments[0].texture = (id<MTLTexture>)descriptor.ColorAttachment0Texture;
	return [(id<MTLCommandBuffer>)commandBuffer renderCommandEncoderWithDescriptor:renderPassDescriptor];
}

void * CommandBuffer_MakeBlitCommandEncoder(void * commandBuffer) {
	return [(id<MTLCommandBuffer>)commandBuffer blitCommandEncoder];
}

void CommandEncoder_EndEncoding(void * commandEncoder) {
	[(id<MTLCommandEncoder>)commandEncoder endEncoding];
}

void RenderCommandEncoder_SetRenderPipelineState(void * renderCommandEncoder, void * renderPipelineState) {
	[(id<MTLRenderCommandEncoder>)renderCommandEncoder setRenderPipelineState:(id<MTLRenderPipelineState>)renderPipelineState];
}

void RenderCommandEncoder_SetVertexBuffer(void * renderCommandEncoder, void * buffer, uint_t offset, uint_t index) {
	[(id<MTLRenderCommandEncoder>)renderCommandEncoder setVertexBuffer:(id<MTLBuffer>)buffer
	                                                            offset:offset
	                                                           atIndex:index];
}

void RenderCommandEncoder_DrawPrimitives(void * renderCommandEncoder, uint8_t primitiveType, uint_t vertexStart, uint_t vertexCount) {
	[(id<MTLRenderCommandEncoder>)renderCommandEncoder drawPrimitives:primitiveType
	                                                      vertexStart:vertexStart
	                                                      vertexCount:vertexCount];
}

void BlitCommandEncoder_Synchronize(void * blitCommandEncoder, void * resource) {
	[(id<MTLBlitCommandEncoder>)blitCommandEncoder synchronizeResource:(id<MTLResource>)resource];
}

void * Library_MakeFunction(void * library, const char * name) {
	return [(id<MTLLibrary>)library newFunctionWithName:[NSString stringWithUTF8String:name]];
}

void Texture_GetBytes(void * texture, void * pixelBytes, size_t bytesPerRow, struct Region region, uint_t level) {
	[(id<MTLTexture>)texture getBytes:(void *)pixelBytes
	                      bytesPerRow:(NSUInteger)bytesPerRow
	                       fromRegion:(MTLRegion){{region.Origin.X, region.Origin.Y, region.Origin.Z}, {region.Size.Width, region.Size.Height, region.Size.Depth}}
	                      mipmapLevel:(NSUInteger)level];
}
mtl_test.go
@@ -0,0 +1,163 @@
// +build darwin

package mtl_test

import (
	"fmt"
	"image"
	"image/color"
	"image/png"
	"os"
	"path/filepath"
	"testing"
	"unsafe"

	"dmitri.shuralyov.com/gpu/mtl"
	"golang.org/x/image/math/f32"
)

func TestRenderTriangle(t *testing.T) {
	device, err := mtl.CreateSystemDefaultDevice()
	if err != nil {
		t.Fatal(err)
	}

	// Create a render pipeline state.
	const source = `#include <metal_stdlib>

using namespace metal;

struct Vertex {
	float4 position [[position]];
	float4 color;
};

vertex Vertex VertexShader(
	uint vertexID [[vertex_id]],
	device Vertex * vertices [[buffer(0)]]
) {
	return vertices[vertexID];
}

fragment float4 FragmentShader(Vertex in [[stage_in]]) {
	return in.color;
}
`
	lib, err := device.MakeLibrary(source, mtl.CompileOptions{})
	if err != nil {
		t.Fatal(err)
	}
	vs, err := lib.MakeFunction("VertexShader")
	if err != nil {
		t.Fatal(err)
	}
	fs, err := lib.MakeFunction("FragmentShader")
	if err != nil {
		t.Fatal(err)
	}
	var rpld mtl.RenderPipelineDescriptor
	rpld.VertexFunction = vs
	rpld.FragmentFunction = fs
	rpld.ColorAttachments[0].PixelFormat = mtl.PixelFormatRGBA8UNorm
	rps, err := device.MakeRenderPipelineState(rpld)
	if err != nil {
		t.Fatal(err)
	}

	// Create a vertex buffer.
	type Vertex struct {
		Position f32.Vec4
		Color    f32.Vec4
	}
	vertexData := [...]Vertex{
		{f32.Vec4{+0.00, +0.75, 0, 1}, f32.Vec4{1, 0, 0, 1}},
		{f32.Vec4{-0.75, -0.75, 0, 1}, f32.Vec4{0, 1, 0, 1}},
		{f32.Vec4{+0.75, -0.75, 0, 1}, f32.Vec4{0, 0, 1, 1}},
	}
	vertexBuffer := device.MakeBuffer(unsafe.Pointer(&vertexData[0]), unsafe.Sizeof(vertexData), mtl.ResourceStorageModeManaged)

	// Create an output texture to render into.
	td := mtl.TextureDescriptor{
		PixelFormat: mtl.PixelFormatRGBA8UNorm,
		Width:       512,
		Height:      512,
		StorageMode: mtl.StorageModeManaged,
	}
	texture := device.MakeTexture(td)

	cq := device.MakeCommandQueue()
	cb := cq.MakeCommandBuffer()

	// Encode all render commands.
	var rpd mtl.RenderPassDescriptor
	rpd.ColorAttachments[0].LoadAction = mtl.LoadActionClear
	rpd.ColorAttachments[0].StoreAction = mtl.StoreActionStore
	rpd.ColorAttachments[0].ClearColor = mtl.ClearColor{Red: 0.35, Green: 0.65, Blue: 0.85, Alpha: 1}
	rpd.ColorAttachments[0].Texture = texture
	rce := cb.MakeRenderCommandEncoder(rpd)
	rce.SetRenderPipelineState(rps)
	rce.SetVertexBuffer(vertexBuffer, 0, 0)
	rce.DrawPrimitives(mtl.PrimitiveTypeTriangle, 0, 3)
	rce.EndEncoding()

	// Encode all blit commands.
	bce := cb.MakeBlitCommandEncoder()
	bce.Synchronize(texture)
	bce.EndEncoding()

	cb.Commit()
	cb.WaitUntilCompleted()

	// Read pixels from output texture into an image.
	got := image.NewNRGBA(image.Rect(0, 0, texture.Width, texture.Height))
	bytesPerRow := 4 * texture.Width
	region := mtl.RegionMake2D(0, 0, texture.Width, texture.Height)
	texture.GetBytes(&got.Pix[0], uintptr(bytesPerRow), region, 0)

	want, err := readPNG(filepath.Join("testdata", "triangle.png"))
	if err != nil {
		t.Fatal(err)
	}

	if err := imageEq(got, want); err != nil {
		t.Errorf("got image != want: %v", err)
	}
}

func readPNG(name string) (image.Image, error) {
	f, err := os.Open(name)
	if err != nil {
		return nil, err
	}
	defer f.Close()
	return png.Decode(f)
}

// imageEq reports whether images m, n are considered equivalent. Two images are considered
// equivalent if they have same bounds, and all pixel colors are within a small margin.
func imageEq(m, n image.Image) error {
	if m.Bounds() != n.Bounds() {
		return fmt.Errorf("bounds don't match: %v != %v", m.Bounds(), n.Bounds())
	}
	for y := m.Bounds().Min.Y; y < m.Bounds().Max.Y; y++ {
		for x := m.Bounds().Min.X; x < m.Bounds().Max.X; x++ {
			c := color.NRGBAModel.Convert(m.At(x, y)).(color.NRGBA)
			d := color.NRGBAModel.Convert(n.At(x, y)).(color.NRGBA)
			if !colorEq(c, d) {
				return fmt.Errorf("pixel (%v, %v) doesn't match: %+v != %+v", x, y, c, d)
			}
		}
	}
	return nil
}

// colorEq reports whether colors c, d are considered equivalent, i.e., within a small margin.
func colorEq(c, d color.NRGBA) bool {
	return eqEpsilon(c.R, d.R) && eqEpsilon(c.G, d.G) && eqEpsilon(c.B, d.B) && eqEpsilon(c.A, d.A)
}

// eqEpsilon reports whether a and b are within epsilon of each other.
func eqEpsilon(a, b uint8) bool {
	const epsilon = 1
	return uint16(a)-uint16(b) <= epsilon || uint16(b)-uint16(a) <= epsilon
}