import ARKit import RealityKit import SwiftUI import Vision import CoreML struct ObjectTrackingWithOverlay: View { var arkitSession = ARKitSession() @State private var detectionResults: [VNRecognizedObjectObservation] = [] @State private var contentEntity = Entity() @State private var boundingBoxEntities: [UUID: ModelEntity] = [:] @State private var worldTrackingProvider = WorldTrackingProvider() @State private var isRealityKitInitialized = false var body: some View { RealityView { content in print("🎥 Initializing RealityKit scene...") content.add(contentEntity) isRealityKitInitialized = true print("✅ RealityKit Initialized!") } update: { content in Task { @MainActor in await updateBoundingBoxes() } } .onAppear { Task { do { try await arkitSession.run([worldTrackingProvider]) print("✅ World Tracking Provider started") } catch { print("🚨 Failed to start ARKit session: \(error)") } await startCameraProcessing() } } } // MARK: - Start Camera Processing private func startCameraProcessing() async { guard CameraFrameProvider.isSupported else { print("🚨 CameraFrameProvider is not supported.") return } let cameraFrameProvider = CameraFrameProvider() try? await arkitSession.run([cameraFrameProvider]) let formats = CameraVideoFormat.supportedVideoFormats(for: .main, cameraPositions: [.left]) let highResolutionFormat = formats.max { $0.frameSize.height < $1.frameSize.height } guard let highResolutionFormat, let cameraFrameUpdates = cameraFrameProvider.cameraFrameUpdates(for: highResolutionFormat) else { print("🚨 Failed to get camera updates.") return } print("🎥 Camera processing started.") for await cameraFrame in cameraFrameUpdates { if let sample = cameraFrame.sample(for: .left) { processFrame(sample.pixelBuffer) } } } // MARK: - Run YOLOv3 Inference private func processFrame(_ pixelBuffer: CVPixelBuffer) { guard let model = try? VNCoreMLModel(for: YOLOv3().model) else { print("🚨 Failed to load YOLOv3 model.") return } let request = VNCoreMLRequest(model: model) { request, _ in guard let results = request.results as? [VNRecognizedObjectObservation], !results.isEmpty else { print("🚨 No objects detected.") return } DispatchQueue.main.async { self.detectionResults = results print("🟢 Detected \(results.count) objects") for result in results { print(" - \(result.labels.first?.identifier ?? "Unknown") @ \(result.boundingBox)") } } Task { await updateBoundingBoxes() } } let handler = VNImageRequestHandler(cvPixelBuffer: pixelBuffer) try? handler.perform([request]) } // MARK: - Update Bounding Boxes private func updateBoundingBoxes() async { guard isRealityKitInitialized else { return } boundingBoxEntities.values.forEach { $0.removeFromParent() } boundingBoxEntities.removeAll() for observation in detectionResults { if let entity = await createOrUpdateBoundingBox(for: observation) { contentEntity.addChild(entity) boundingBoxEntities[observation.uuid] = entity } } } // MARK: - Create Bounding Box with Estimated Depth private func createOrUpdateBoundingBox(for observation: VNRecognizedObjectObservation) async -> ModelEntity? { let boundingBox = observation.boundingBox let screenX = Float((boundingBox.midX - 0.5) * 2) let screenY = Float((0.5 - boundingBox.midY) * 2) let estimatedDepth: Float = 0.5 + Float(boundingBox.height) * 2 let worldPosition = SIMD3( screenX * estimatedDepth, screenY * estimatedDepth, -estimatedDepth ) let width = Float(boundingBox.width) * estimatedDepth let height = Float(boundingBox.height) * estimatedDepth let depth: Float = 0.01 let wireframeEntity = ModelEntity() wireframeEntity.position = worldPosition // Create 12 edges of the box let edgeThickness: Float = 0.002 let edges = [ // Bottom rectangle (SIMD3(-width/2, -height/2, 0), SIMD3( width/2, -height/2, 0)), (SIMD3( width/2, -height/2, 0), SIMD3( width/2, height/2, 0)), (SIMD3( width/2, height/2, 0), SIMD3(-width/2, height/2, 0)), (SIMD3(-width/2, height/2, 0), SIMD3(-width/2, -height/2, 0)), // Vertical lines (simulate depth) (SIMD3(-width/2, -height/2, 0), SIMD3(-width/2, -height/2, -depth)), (SIMD3( width/2, -height/2, 0), SIMD3( width/2, -height/2, -depth)), (SIMD3( width/2, height/2, 0), SIMD3( width/2, height/2, -depth)), (SIMD3(-width/2, height/2, 0), SIMD3(-width/2, height/2, -depth)), // Back rectangle (SIMD3(-width/2, -height/2, -depth), SIMD3( width/2, -height/2, -depth)), (SIMD3( width/2, -height/2, -depth), SIMD3( width/2, height/2, -depth)), (SIMD3( width/2, height/2, -depth), SIMD3(-width/2, height/2, -depth)), (SIMD3(-width/2, height/2, -depth), SIMD3(-width/2, -height/2, -depth)), ] let material = UnlitMaterial(color: .green) for (start, end) in edges { let edgeEntity = makeEdge(from: start, to: end, thickness: edgeThickness, material: material) wireframeEntity.addChild(edgeEntity) } let labelEntity = createLabel(for: observation) labelEntity.position = [0, height / 2 + 0.02, 0] wireframeEntity.addChild(labelEntity) return wireframeEntity } private func makeEdge(from start: SIMD3, to end: SIMD3, thickness: Float, material: RealityKit.Material) -> ModelEntity { let direction = end - start let length = simd_length(direction) let midPoint = (start + end) / 2 let axis = normalize(direction) // Create a small box to act as a thin cylinder between start and end let edgeMesh = MeshResource.generateBox(width: thickness, height: thickness, depth: length) let edgeEntity = ModelEntity(mesh: edgeMesh, materials: [material]) edgeEntity.position = midPoint // Rotate the box to align with the direction vector edgeEntity.orientation = simd_quatf(from: [0, 0, 1], to: axis) return edgeEntity } // MARK: - Create Label private func createLabel(for observation: VNRecognizedObjectObservation) -> ModelEntity { let label = observation.labels.first?.identifier ?? "Unknown" let confidence = observation.labels.first?.confidence ?? 0 let labelText = "\(label) (\(Int(confidence * 100))%)" let textMesh = MeshResource.generateText( labelText, extrusionDepth: 0.01, font: .systemFont(ofSize: 0.05), containerFrame: .zero, alignment: .center, lineBreakMode: .byTruncatingTail ) let textMaterial = UnlitMaterial(color: .white) let textEntity = ModelEntity(mesh: textMesh, materials: [textMaterial]) textEntity.position = SIMD3(0, 0.1, 0) return textEntity } }