Memory leak when performing DetectHumanBodyPose3DRequest request

Question

Created 1w

Replies 1

Boosts 0

Participants 1

Hi, I'm developing an application for macos and ios that has to run DetectHumanBodyPose3DRequest model in real time for retrieving the 3d skeleton from the camera.

I'm experiencing a memory leak every time the model is used (when i comment that line, the memory stays constant). After a minute it uses about 1GB of ram running with mac catalyst.

I attached a minimal project that has this problem

Code

Camera View

import SwiftUI
import Combine
import Vision

struct CameraView: View {
    
    @StateObject private var viewModel = CameraViewModel()
    
    var body: some View {
        HStack {
            ZStack {
                GeometryReader { geometry in
                    if let image = viewModel.currentFrame {
                        Image(decorative: image, scale: 1)
                            .resizable()
                            .scaledToFill()
                            .frame(width: geometry.size.width,
                                   height: geometry.size.height)
                            .clipped()
                    } else {
                        ProgressView()
                    }
                }
            }
        }
    }
}

class CameraViewModel: ObservableObject {
    @Published var currentFrame: CGImage?
    @Published var frameRate: Double = 0
    @Published var currentVisionBodyPose: HumanBodyPose3DObservation? // Store current body pose
    @Published var currentImageSize: CGSize? // Store current image size
    
    private var cameraManager: CameraManager?
    private var humanBodyPose = HumanBodyPose3DDetector()
    private var lastClassificationTime = Date()
    private var frameCount = 0
    private var lastFrameTime = Date()

    private let classificationThrottleInterval: TimeInterval = 1.0
    private var lastPoseSendTime: Date = .distantPast
    
    init() {
        cameraManager = CameraManager()
        startPreview()
        startClassification()
    }
    
    private func startPreview() {
        Task {
            guard let previewStream = cameraManager?.previewStream else { return }
            for await frame in previewStream {
                let size = CGSize(width: frame.width, height: frame.height)
                Task { @MainActor in
                    self.currentFrame = frame
                    self.currentImageSize = size
                    self.updateFrameRate()
                }
            }
        }
    }
    
    private func startClassification() {
        Task {
            guard let classificationStream = cameraManager?.classificationStream else { return }
            for await pixelBuffer in classificationStream {
                self.classifyFrame(pixelBuffer: pixelBuffer)
            }
        }
    }
    
    private func classifyFrame(pixelBuffer: CVPixelBuffer) {
        humanBodyPose.runHumanBodyPose3DRequestOnImage(pixelBuffer: pixelBuffer) { [weak self] observation in
            guard let self = self else { return }
            DispatchQueue.main.async {
                if let observation = observation {
                    self.currentVisionBodyPose = observation
                    print(observation)
                } else {
                    self.currentVisionBodyPose = nil
                }
            }
        }
    }
    
    private func updateFrameRate() {
        frameCount += 1
        let now = Date()
        let elapsed = now.timeIntervalSince(lastFrameTime)
        
        if elapsed >= 1.0 {
            frameRate = Double(frameCount) / elapsed
            frameCount = 0
            lastFrameTime = now
        }
    }
}

HumanBodyPose3DDetector

import Foundation
import Vision

class HumanBodyPose3DDetector: NSObject, ObservableObject {
    
    @Published var humanObservation: HumanBodyPose3DObservation? = nil
    private let queue = DispatchQueue(label: "humanbodypose.queue")
    private let request = DetectHumanBodyPose3DRequest()
    
    private struct SendablePixelBuffer: @unchecked Sendable {
        let buffer: CVPixelBuffer
    }
    
    public func runHumanBodyPose3DRequestOnImage(pixelBuffer: CVPixelBuffer, completion: @escaping (HumanBodyPose3DObservation?) -> Void) {
        
        let sendableBuffer = SendablePixelBuffer(buffer: pixelBuffer)
        
        queue.async { [weak self] in

            Task { [weak self, sendableBuffer] in
                do {
                    guard let self = self else { return }
                    let result = try await self.request.perform(on: sendableBuffer.buffer)
                    
                    //process result
                    DispatchQueue.main.async {
                        if result.isEmpty {
                            completion(nil)
                        } else {
                            completion(result[0])
                        }
                    }
                } catch {
                    DispatchQueue.main.async {
                        completion(nil)
                    }
                }
            }
        }
    }
}

Boost

Answer 1

GabrieleCano OP

1w

Camera Manager

import AVFoundation
import CoreImage
import UIKit


class CameraManager: NSObject {
    
    private let captureSession = AVCaptureSession()
    private var deviceInput: AVCaptureDeviceInput?
    private var videoOutput: AVCaptureVideoDataOutput?
    private let systemPreferredCamera = AVCaptureDevice.default(for: .video)
    private let ciContext = CIContext()

    private var sessionQueue = DispatchQueue(label: "video.preview.session")
    
    private var addToPreviewStream: ((CGImage) -> Void)?
    
    lazy var previewStream: AsyncStream<CGImage> = {
        AsyncStream(bufferingPolicy: .bufferingNewest(1)) { continuation in
            addToPreviewStream = { cgImage in
                continuation.yield(cgImage)
            }
        }
    }()
    
    private let classificationQueue = DispatchQueue(
        label: "classification.queue",
        qos: .userInitiated,
        attributes: .concurrent
    )

    private var addToClassificationStream: ((CVPixelBuffer) -> Void)?
    private var frameCounter = 0
        
    lazy var classificationStream: AsyncStream<CVPixelBuffer> = {
        AsyncStream(bufferingPolicy: .bufferingNewest(1)) { continuation in
            addToClassificationStream = { pixelBuffer in
                continuation.yield(pixelBuffer)
            }
        }
    }()
    
    private var isAuthorized: Bool {
        get async {
            let status = AVCaptureDevice.authorizationStatus(for: .video)
            var isAuthorized = status == .authorized
            if status == .notDetermined {
                isAuthorized = await AVCaptureDevice.requestAccess(for: .video)
            }
            return isAuthorized
        }
    }
    
    override init() {
        super.init()
        Task {
            await configureSession()
            await startSession()
        }
    }
    
    private func configureSession() async {
        guard await isAuthorized,
              let systemPreferredCamera,
              let deviceInput = try? AVCaptureDeviceInput(device: systemPreferredCamera)
        else { return }
        
        captureSession.beginConfiguration()
        
        if captureSession.canSetSessionPreset(.hd1920x1080) {
            captureSession.sessionPreset = .hd1920x1080
        }
        
        defer {
            self.captureSession.commitConfiguration()
        }
        
        let videoOutput = AVCaptureVideoDataOutput()
       
        videoOutput.setSampleBufferDelegate(self, queue: sessionQueue)
        
        guard captureSession.canAddInput(deviceInput) else {
            return
        }
        
        guard captureSession.canAddOutput(videoOutput) else {
            return
        }
        
        captureSession.addInput(deviceInput)
        captureSession.addOutput(videoOutput)

        //For a vertical orientation of the camera stream
        if let connection = videoOutput.connection(with: .video) {
            connection.videoRotationAngle = 0
        }
    }
    
    
    private func startSession() async {
        guard await isAuthorized else { return }
        captureSession.startRunning()
    }

}

extension CameraManager: AVCaptureVideoDataOutputSampleBufferDelegate {
    func captureOutput(_ output: AVCaptureOutput,
                       didOutput sampleBuffer: CMSampleBuffer,
                       from connection: AVCaptureConnection) {
        
        guard let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer),
                  let cgImage = ciContext.createCGImage(
                    CIImage(cvPixelBuffer: pixelBuffer),
                    from: CGRect(x: 0, y: 0,
                    width: CVPixelBufferGetWidth(pixelBuffer),
                    height: CVPixelBufferGetHeight(pixelBuffer)))
            else { return }
        
        addToPreviewStream?(cgImage)
        
        frameCounter += 1
        if frameCounter % 1 == 0 { // Process every frame
            addToClassificationStream?(pixelBuffer)
        }
    }
}

0