Hey, I've been struggling with this for some days now. I am trying to write to a sparse texture in a compute shader. I'm performing the following steps:
-
Set up a sparse heap and create a texture from it
-
Map the whole area of the sparse texture using
updateTextureMapping(..)
-
Overwrite every value with the value "4" in a compute shader
-
Blit the texture to a shared buffer
-
Assert that the values in the buffer are "4".
I have a minimal example (which is still pretty long unfortunately).
It works perfectly when removing the line heapDesc.type = .sparse
.
What am I missing? I could not find any information that writes to sparse textures are unsupported. Any help would be greatly appreciated.
import Metal
func sparseTexture64x64Demo() throws {
// ── Metal objects
guard let device = MTLCreateSystemDefaultDevice()
else { throw NSError(domain: "SparseNotSupported", code: -1) }
let queue = device.makeCommandQueue()!
let lib = device.makeDefaultLibrary()!
let pipeline = try device.makeComputePipelineState(function: lib.makeFunction(name: "addOne")!)
// ── Texture descriptor
let width = 64, height = 64
let format: MTLPixelFormat = .r32Uint // 4 B per texel
let desc = MTLTextureDescriptor()
desc.textureType = .type2D
desc.pixelFormat = format
desc.width = width
desc.height = height
desc.storageMode = .private
desc.usage = [.shaderWrite, .shaderRead]
// ── Sparse heap
let bytesPerTile = device.sparseTileSizeInBytes
let meta = device.heapTextureSizeAndAlign(descriptor: desc)
let heapBytes = ((bytesPerTile + meta.size + bytesPerTile - 1) / bytesPerTile) * bytesPerTile
let heapDesc = MTLHeapDescriptor()
heapDesc.type = .sparse
heapDesc.storageMode = .private
heapDesc.size = heapBytes
let heap = device.makeHeap(descriptor: heapDesc)!
let tex = heap.makeTexture(descriptor: desc)!
// ── CPU buffers
let bytesPerPixel = MemoryLayout<UInt32>.stride
let rowStride = width * bytesPerPixel
let totalBytes = rowStride * height
let dstBuf = device.makeBuffer(length: totalBytes, options: .storageModeShared)!
let cb = queue.makeCommandBuffer()!
let fence = device.makeFence()!
// 2. Map the sparse tile, then signal the fence
let rse = cb.makeResourceStateCommandEncoder()!
rse.updateTextureMapping(
tex,
mode: .map,
region: MTLRegionMake2D(0, 0, width, height),
mipLevel: 0,
slice: 0)
rse.update(fence) // ← capture all work so far
rse.endEncoding()
let ce = cb.makeComputeCommandEncoder()!
ce.waitForFence(fence)
ce.setComputePipelineState(pipeline)
ce.setTexture(tex, index: 0)
let threadsPerTG = MTLSize(width: 8, height: 8, depth: 1)
let tgCount = MTLSize(width: (width + 7) / 8,
height: (height + 7) / 8,
depth: 1)
ce.dispatchThreadgroups(tgCount, threadsPerThreadgroup: threadsPerTG)
ce.updateFence(fence)
ce.endEncoding()
// Blit texture into shared buffer
let blit = cb.makeBlitCommandEncoder()!
blit.waitForFence(fence)
blit.copy(
from: tex,
sourceSlice: 0,
sourceLevel: 0,
sourceOrigin: MTLOrigin(x: 0, y: 0, z: 0),
sourceSize: MTLSize(width: width, height: height, depth: 1),
to: dstBuf,
destinationOffset: 0,
destinationBytesPerRow: rowStride,
destinationBytesPerImage: totalBytes)
blit.endEncoding()
cb.commit()
cb.waitUntilCompleted()
assert(cb.error == nil, "GPU error: \(String(describing: cb.error))")
// ── Verify a few texels
let out = dstBuf.contents().bindMemory(to: UInt32.self, capacity: width * height)
print("first three texels:", out[0], out[1], out[width]) // 0 1 64
assert(out[0] == 4 && out[1] == 4 && out[width] == 4)
}
Metal shader:
#include <metal_stdlib>
using namespace metal;
kernel void addOne(texture2d<uint, access::write> tex [[texture(0)]],
uint2 gid [[thread_position_in_grid]])
{
tex.write(4, gid);
}