Memory leak when performing DetectHumanBodyPose3DRequest request

Hi, I'm developing an application for macos and ios that has to run DetectHumanBodyPose3DRequest model in real time for retrieving the 3d skeleton from the camera.

I'm experiencing a memory leak every time the model is used (when i comment that line, the memory stays constant). After a minute it uses about 1GB of ram running with mac catalyst.

I attached a minimal project that has this problem

Code

Camera View

import SwiftUI
import Combine
import Vision

struct CameraView: View {
    
    @StateObject private var viewModel = CameraViewModel()
    
    var body: some View {
        HStack {
            ZStack {
                GeometryReader { geometry in
                    if let image = viewModel.currentFrame {
                        Image(decorative: image, scale: 1)
                            .resizable()
                            .scaledToFill()
                            .frame(width: geometry.size.width,
                                   height: geometry.size.height)
                            .clipped()
                    } else {
                        ProgressView()
                    }
                }
            }
        }
    }
}

class CameraViewModel: ObservableObject {
    @Published var currentFrame: CGImage?
    @Published var frameRate: Double = 0
    @Published var currentVisionBodyPose: HumanBodyPose3DObservation? // Store current body pose
    @Published var currentImageSize: CGSize? // Store current image size
    
    private var cameraManager: CameraManager?
    private var humanBodyPose = HumanBodyPose3DDetector()
    private var lastClassificationTime = Date()
    private var frameCount = 0
    private var lastFrameTime = Date()

    private let classificationThrottleInterval: TimeInterval = 1.0
    private var lastPoseSendTime: Date = .distantPast
    
    init() {
        cameraManager = CameraManager()
        startPreview()
        startClassification()
    }
    
    private func startPreview() {
        Task {
            guard let previewStream = cameraManager?.previewStream else { return }
            for await frame in previewStream {
                let size = CGSize(width: frame.width, height: frame.height)
                Task { @MainActor in
                    self.currentFrame = frame
                    self.currentImageSize = size
                    self.updateFrameRate()
                }
            }
        }
    }
    
    private func startClassification() {
        Task {
            guard let classificationStream = cameraManager?.classificationStream else { return }
            for await pixelBuffer in classificationStream {
                self.classifyFrame(pixelBuffer: pixelBuffer)
            }
        }
    }
    
    private func classifyFrame(pixelBuffer: CVPixelBuffer) {
        humanBodyPose.runHumanBodyPose3DRequestOnImage(pixelBuffer: pixelBuffer) { [weak self] observation in
            guard let self = self else { return }
            DispatchQueue.main.async {
                if let observation = observation {
                    self.currentVisionBodyPose = observation
                    print(observation)
                } else {
                    self.currentVisionBodyPose = nil
                }
            }
        }
    }
    
    private func updateFrameRate() {
        frameCount += 1
        let now = Date()
        let elapsed = now.timeIntervalSince(lastFrameTime)
        
        if elapsed >= 1.0 {
            frameRate = Double(frameCount) / elapsed
            frameCount = 0
            lastFrameTime = now
        }
    }
}

HumanBodyPose3DDetector

import Foundation
import Vision

class HumanBodyPose3DDetector: NSObject, ObservableObject {
    
    @Published var humanObservation: HumanBodyPose3DObservation? = nil
    private let queue = DispatchQueue(label: "humanbodypose.queue")
    private let request = DetectHumanBodyPose3DRequest()
    
    private struct SendablePixelBuffer: @unchecked Sendable {
        let buffer: CVPixelBuffer
    }
    
    public func runHumanBodyPose3DRequestOnImage(pixelBuffer: CVPixelBuffer, completion: @escaping (HumanBodyPose3DObservation?) -> Void) {
        
        let sendableBuffer = SendablePixelBuffer(buffer: pixelBuffer)
        
        queue.async { [weak self] in

            Task { [weak self, sendableBuffer] in
                do {
                    guard let self = self else { return }
                    let result = try await self.request.perform(on: sendableBuffer.buffer)
                    
                    //process result
                    DispatchQueue.main.async {
                        if result.isEmpty {
                            completion(nil)
                        } else {
                            completion(result[0])
                        }
                    }
                } catch {
                    DispatchQueue.main.async {
                        completion(nil)
                    }
                }
            }
        }
    }
}

Camera Manager

import AVFoundation
import CoreImage
import UIKit


class CameraManager: NSObject {
    
    private let captureSession = AVCaptureSession()
    private var deviceInput: AVCaptureDeviceInput?
    private var videoOutput: AVCaptureVideoDataOutput?
    private let systemPreferredCamera = AVCaptureDevice.default(for: .video)
    private let ciContext = CIContext()

    private var sessionQueue = DispatchQueue(label: "video.preview.session")
    
    private var addToPreviewStream: ((CGImage) -> Void)?
    
    lazy var previewStream: AsyncStream<CGImage> = {
        AsyncStream(bufferingPolicy: .bufferingNewest(1)) { continuation in
            addToPreviewStream = { cgImage in
                continuation.yield(cgImage)
            }
        }
    }()
    
    private let classificationQueue = DispatchQueue(
        label: "classification.queue",
        qos: .userInitiated,
        attributes: .concurrent
    )

    private var addToClassificationStream: ((CVPixelBuffer) -> Void)?
    private var frameCounter = 0
        
    lazy var classificationStream: AsyncStream<CVPixelBuffer> = {
        AsyncStream(bufferingPolicy: .bufferingNewest(1)) { continuation in
            addToClassificationStream = { pixelBuffer in
                continuation.yield(pixelBuffer)
            }
        }
    }()
    
    private var isAuthorized: Bool {
        get async {
            let status = AVCaptureDevice.authorizationStatus(for: .video)
            var isAuthorized = status == .authorized
            if status == .notDetermined {
                isAuthorized = await AVCaptureDevice.requestAccess(for: .video)
            }
            return isAuthorized
        }
    }
    
    override init() {
        super.init()
        Task {
            await configureSession()
            await startSession()
        }
    }
    
    private func configureSession() async {
        guard await isAuthorized,
              let systemPreferredCamera,
              let deviceInput = try? AVCaptureDeviceInput(device: systemPreferredCamera)
        else { return }
        
        captureSession.beginConfiguration()
        
        if captureSession.canSetSessionPreset(.hd1920x1080) {
            captureSession.sessionPreset = .hd1920x1080
        }
        
        defer {
            self.captureSession.commitConfiguration()
        }
        
        let videoOutput = AVCaptureVideoDataOutput()
       
        videoOutput.setSampleBufferDelegate(self, queue: sessionQueue)
        
        guard captureSession.canAddInput(deviceInput) else {
            return
        }
        
        guard captureSession.canAddOutput(videoOutput) else {
            return
        }
        
        captureSession.addInput(deviceInput)
        captureSession.addOutput(videoOutput)

        //For a vertical orientation of the camera stream
        if let connection = videoOutput.connection(with: .video) {
            connection.videoRotationAngle = 0
        }
    }
    
    
    private func startSession() async {
        guard await isAuthorized else { return }
        captureSession.startRunning()
    }

}

extension CameraManager: AVCaptureVideoDataOutputSampleBufferDelegate {
    func captureOutput(_ output: AVCaptureOutput,
                       didOutput sampleBuffer: CMSampleBuffer,
                       from connection: AVCaptureConnection) {
        
        guard let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer),
                  let cgImage = ciContext.createCGImage(
                    CIImage(cvPixelBuffer: pixelBuffer),
                    from: CGRect(x: 0, y: 0,
                    width: CVPixelBufferGetWidth(pixelBuffer),
                    height: CVPixelBufferGetHeight(pixelBuffer)))
            else { return }
        
        addToPreviewStream?(cgImage)
        
        frameCounter += 1
        if frameCounter % 1 == 0 { // Process every frame
            addToClassificationStream?(pixelBuffer)
        }
    }
}
Memory leak when performing DetectHumanBodyPose3DRequest request
 
 
Q