Thanks for being a part of WWDC25!

How did we do? We’d love to know your thoughts on this year’s conference. Take the survey here

RealityKit Intercept the picture

I'm developing an app in which I need to render pictures and contain some models in a RealityView. I want to set up a camera, intercept virtual content through the camera, and save it as an image.

Answered by Vision Pro Engineer in 823129022

Hi @lijiaxu

RealityRenderer enables you to setup a scene with RealityKit entities and a camera. You can render that scene to an image. Here's a snippet I made to demonstrate. There's lots of ways to structure this code, and I wrote this in a hurry, so please don't consider this "best practice". Instead it's a very rough starting point for you to improve and build on.

ContentView

struct ContentView: View {
    @State private var image:Image?
    @State private var entityToImage:EntityToImage?
    @State private var sphere:Entity
    
    init() {
        
        // Build the initial scene
        let root = Entity()
        let sphere = Entity()
        var wireframeMaterial = UnlitMaterial()
        wireframeMaterial.triangleFillMode = .lines
        sphere.position = [0, 0, -1]
        let modelComponent = ModelComponent(
            mesh: .generateSphere(radius: 0.2),
            // Note, if you use a lit material be sure to
            // add an IBL to EntityToImage.
            materials: [wireframeMaterial]
        )
        sphere.components.set(modelComponent)
        root.addChild(sphere)
        
        // Initialize the helper with a root entity.
        entityToImage = try? EntityToImage(root: root)
        self.sphere = sphere
        self.entityToImage = entityToImage
    }
    
    var body: some View {
        VStack {
            if let image = entityToImage?.image {
                image
                    .resizable()
                    .scaledToFit()
            }
            else {
                Text("Unable to generate image.")
            }
            
            Button("Move Sphere") {
                Task { @MainActor in
                    // Update the entity
                    sphere.position.x += 0.2
                    
                    // Re-render the image
                    try? await entityToImage?.renderImage()
                }
            }
        }
        .task {
            // render the initial scene
            try? await entityToImage?.renderImage()
        }
    }
}

Helper class

@MainActor
@Observable
class EntityToImage {
    private let renderer:RealityRenderer?
    private let cameraEntity = PerspectiveCamera()
    var image:Image?
    
    init(root: Entity) throws {
        do {
            let renderer = try RealityRenderer()
            renderer.entities.append(root)
            renderer.entities.append(cameraEntity)
            cameraEntity.camera.near = 0.01
            cameraEntity.camera.far = 100
            cameraEntity.camera.fieldOfViewOrientation = .horizontal
            cameraEntity.camera.fieldOfViewInDegrees = 105
            renderer.activeCamera = cameraEntity
            renderer.cameraSettings.colorBackground = .color(.init(gray: 0.0, alpha: 0.0))
            renderer.cameraSettings.antialiasing = .none
            
            self.renderer = renderer
        }
        catch {
            renderer = nil
            throw error
        }
    }
    
    private func textureImage(from texture: MTLTexture) -> Image? {
        let componentCount = 4
        let bitmapInfo = CGImageByteOrderInfo.order32Big.rawValue | CGImageAlphaInfo.premultipliedLast.rawValue
        let bitsPerComponent = 8
        let colorSpace = CGColorSpace(name: CGColorSpace.sRGB)!
        
        let bytesPerRow = texture.width * componentCount
        guard let pixelBuffer = malloc(texture.height * bytesPerRow) else {
            return nil
        }

        defer {
            free(pixelBuffer)
        }
        
        let region = MTLRegionMake2D(0, 0, texture.width, texture.height)
        texture.getBytes(pixelBuffer, bytesPerRow: bytesPerRow, from: region, mipmapLevel: 0)
        let ctx = CGContext(data: pixelBuffer,
                            width: texture.width,
                            height: texture.height,
                            bitsPerComponent: bitsPerComponent,
                            bytesPerRow: bytesPerRow,
                            space: colorSpace,
                            bitmapInfo: bitmapInfo)
        
        guard let cgImage = ctx?.makeImage() else {
            return nil
        }
        let ciImage = CIImage(cgImage: cgImage)
        let context = CIContext(options: nil)
        guard let cgImage = context.createCGImage(ciImage, from: ciImage.extent) else {return nil}
        let uiImage = UIImage(cgImage: cgImage)
        
        return Image(uiImage: uiImage)
    }

    func renderImage() async throws {
        guard let renderer = renderer else { return }
        
        // If you use a lit material you'll need an ibl
//        renderer.lighting.resource = try await EnvironmentResource(named: "ImageBasedLighting")
        
        let imageWidth:Double = 1920
        let imageHeight:Double = 1080
        
        let contentSize = CGSize(width: imageWidth, height: imageHeight)
        let descriptor = MTLTextureDescriptor()
        descriptor.width = Int(contentSize.width)
        descriptor.height = Int(contentSize.height)
        descriptor.pixelFormat = .rgba8Unorm_srgb
        descriptor.sampleCount = 1
        descriptor.usage = [.renderTarget, .shaderRead, .shaderWrite]
        
        guard let texture = MTLCreateSystemDefaultDevice()?.makeTexture(descriptor: descriptor) else {
            return
        }
        
        image = await withCheckedContinuation { (continuation: CheckedContinuation<Image?, Never>) in
            do {
                let output = try RealityRenderer.CameraOutput(RealityRenderer.CameraOutput.Descriptor.singleProjection(colorTexture: texture))
                try renderer.updateAndRender(deltaTime: 0.1, cameraOutput: output, onComplete: { _ in
                    let textureImage = self.textureImage(from: texture)
                    continuation.resume(returning: textureImage)
                })
            } catch {
                continuation.resume(returning: nil)
            }
        }
    }
}
Accepted Answer

Hi @lijiaxu

RealityRenderer enables you to setup a scene with RealityKit entities and a camera. You can render that scene to an image. Here's a snippet I made to demonstrate. There's lots of ways to structure this code, and I wrote this in a hurry, so please don't consider this "best practice". Instead it's a very rough starting point for you to improve and build on.

ContentView

struct ContentView: View {
    @State private var image:Image?
    @State private var entityToImage:EntityToImage?
    @State private var sphere:Entity
    
    init() {
        
        // Build the initial scene
        let root = Entity()
        let sphere = Entity()
        var wireframeMaterial = UnlitMaterial()
        wireframeMaterial.triangleFillMode = .lines
        sphere.position = [0, 0, -1]
        let modelComponent = ModelComponent(
            mesh: .generateSphere(radius: 0.2),
            // Note, if you use a lit material be sure to
            // add an IBL to EntityToImage.
            materials: [wireframeMaterial]
        )
        sphere.components.set(modelComponent)
        root.addChild(sphere)
        
        // Initialize the helper with a root entity.
        entityToImage = try? EntityToImage(root: root)
        self.sphere = sphere
        self.entityToImage = entityToImage
    }
    
    var body: some View {
        VStack {
            if let image = entityToImage?.image {
                image
                    .resizable()
                    .scaledToFit()
            }
            else {
                Text("Unable to generate image.")
            }
            
            Button("Move Sphere") {
                Task { @MainActor in
                    // Update the entity
                    sphere.position.x += 0.2
                    
                    // Re-render the image
                    try? await entityToImage?.renderImage()
                }
            }
        }
        .task {
            // render the initial scene
            try? await entityToImage?.renderImage()
        }
    }
}

Helper class

@MainActor
@Observable
class EntityToImage {
    private let renderer:RealityRenderer?
    private let cameraEntity = PerspectiveCamera()
    var image:Image?
    
    init(root: Entity) throws {
        do {
            let renderer = try RealityRenderer()
            renderer.entities.append(root)
            renderer.entities.append(cameraEntity)
            cameraEntity.camera.near = 0.01
            cameraEntity.camera.far = 100
            cameraEntity.camera.fieldOfViewOrientation = .horizontal
            cameraEntity.camera.fieldOfViewInDegrees = 105
            renderer.activeCamera = cameraEntity
            renderer.cameraSettings.colorBackground = .color(.init(gray: 0.0, alpha: 0.0))
            renderer.cameraSettings.antialiasing = .none
            
            self.renderer = renderer
        }
        catch {
            renderer = nil
            throw error
        }
    }
    
    private func textureImage(from texture: MTLTexture) -> Image? {
        let componentCount = 4
        let bitmapInfo = CGImageByteOrderInfo.order32Big.rawValue | CGImageAlphaInfo.premultipliedLast.rawValue
        let bitsPerComponent = 8
        let colorSpace = CGColorSpace(name: CGColorSpace.sRGB)!
        
        let bytesPerRow = texture.width * componentCount
        guard let pixelBuffer = malloc(texture.height * bytesPerRow) else {
            return nil
        }

        defer {
            free(pixelBuffer)
        }
        
        let region = MTLRegionMake2D(0, 0, texture.width, texture.height)
        texture.getBytes(pixelBuffer, bytesPerRow: bytesPerRow, from: region, mipmapLevel: 0)
        let ctx = CGContext(data: pixelBuffer,
                            width: texture.width,
                            height: texture.height,
                            bitsPerComponent: bitsPerComponent,
                            bytesPerRow: bytesPerRow,
                            space: colorSpace,
                            bitmapInfo: bitmapInfo)
        
        guard let cgImage = ctx?.makeImage() else {
            return nil
        }
        let ciImage = CIImage(cgImage: cgImage)
        let context = CIContext(options: nil)
        guard let cgImage = context.createCGImage(ciImage, from: ciImage.extent) else {return nil}
        let uiImage = UIImage(cgImage: cgImage)
        
        return Image(uiImage: uiImage)
    }

    func renderImage() async throws {
        guard let renderer = renderer else { return }
        
        // If you use a lit material you'll need an ibl
//        renderer.lighting.resource = try await EnvironmentResource(named: "ImageBasedLighting")
        
        let imageWidth:Double = 1920
        let imageHeight:Double = 1080
        
        let contentSize = CGSize(width: imageWidth, height: imageHeight)
        let descriptor = MTLTextureDescriptor()
        descriptor.width = Int(contentSize.width)
        descriptor.height = Int(contentSize.height)
        descriptor.pixelFormat = .rgba8Unorm_srgb
        descriptor.sampleCount = 1
        descriptor.usage = [.renderTarget, .shaderRead, .shaderWrite]
        
        guard let texture = MTLCreateSystemDefaultDevice()?.makeTexture(descriptor: descriptor) else {
            return
        }
        
        image = await withCheckedContinuation { (continuation: CheckedContinuation<Image?, Never>) in
            do {
                let output = try RealityRenderer.CameraOutput(RealityRenderer.CameraOutput.Descriptor.singleProjection(colorTexture: texture))
                try renderer.updateAndRender(deltaTime: 0.1, cameraOutput: output, onComplete: { _ in
                    let textureImage = self.textureImage(from: texture)
                    continuation.resume(returning: textureImage)
                })
            } catch {
                continuation.resume(returning: nil)
            }
        }
    }
}
RealityKit Intercept the picture
 
 
Q