ActivityClassifier doesn't classify movement

Question

Adamc93 OP

Created 11h

Replies 0

Boosts 0

Participants 1

I'm using a custom create ML model to classify the movement of a user's hand in a game,

The classifier has 3 different spell movements, but my code constantly predicts all of them at an equal 1/3 probability regardless of movement which leads me to believe my code isn't correct (as opposed to the model) which in CreateML at least gives me a heavily weighted prediction

My code is below.

On adding debug prints everywhere all the data looks good to me and matches similar to my test CSV data

So I'm thinking my issue must be in the setup of my model code?

/// Feeds samples into the model and keeps a sliding window of the last N frames.
final class WandGestureStreamer {
    static let shared = WandGestureStreamer()

    private let model: SpellActivityClassifier
    private var samples: [Transform] = []
    private let windowSize = 100  // number of frames the model expects

    /// RNN hidden state passed between inferences
    private var stateIn: MLMultiArray

    /// Last transform dropped from the window for continuity
    private var lastDropped: Transform?

    private init() {
        let config = MLModelConfiguration()
        self.model = try! SpellActivityClassifier(configuration: config)
        // Initialize stateIn to the model’s required shape
        let constraint = self.model.model.modelDescription
            .inputDescriptionsByName["stateIn"]!
            .multiArrayConstraint!
        self.stateIn = try! MLMultiArray(shape: constraint.shape, dataType: .double)
    }

    /// Call once per frame with the latest wand position (or any feature vector).
    func appendSample(_ sample: Transform) {
        samples.append(sample)

        // drop oldest frame if over capacity, retaining it for delta at window start
        if samples.count > windowSize {
            lastDropped = samples.removeFirst()
        }
    }

    func classifyIfReady(threshold: Double = 0.6) -> (label: String, confidence: Double)? {
        guard samples.count == windowSize else { return nil }

        do {
            let input = try makeInput(initialState: stateIn)

            let output = try model.prediction(input: input)

            // Save state for continuity
            stateIn = output.stateOut
            let best  = output.label
            let conf  = output.labelProbability[best] ?? 0

            // If you’ve recognized a gesture with high confidence:
            if conf > threshold {
                return (best, conf)
            } else {
                return nil
            }

        } catch {
            print("Error", error.localizedDescription, error)

            return nil
        }
    }

    /// Constructs a SpellActivityClassifierInput from recorded wand transforms.
    func makeInput(initialState: MLMultiArray) throws -> SpellActivityClassifierInput {
        let count = samples.count as NSNumber
        let shape = [count]

        let timeArr = try MLMultiArray(shape: shape, dataType: .double)
        let dxArr   = try MLMultiArray(shape: shape, dataType: .double)
        let dyArr   = try MLMultiArray(shape: shape, dataType: .double)
        let dzArr   = try MLMultiArray(shape: shape, dataType: .double)
        let rwArr   = try MLMultiArray(shape: shape, dataType: .double)
        let rxArr   = try MLMultiArray(shape: shape, dataType: .double)
        let ryArr   = try MLMultiArray(shape: shape, dataType: .double)
        let rzArr   = try MLMultiArray(shape: shape, dataType: .double)

        for (i, sample) in samples.enumerated() {
            let previousSample = i > 0 ? samples[i - 1] : lastDropped
            let model = WandMovementRecording.DataModel(transform: sample, previous: previousSample)

            //            print("model", model)

            timeArr[i] = NSNumber(value: model.timestamp)

            dxArr[i] = NSNumber(value: model.dx)
            dyArr[i] = NSNumber(value: model.dy)
            dzArr[i] = NSNumber(value: model.dz)

            let rot = model.rotation
            rwArr[i] = NSNumber(value: rot.w)
            rxArr[i] = NSNumber(value: rot.x)
            ryArr[i] = NSNumber(value: rot.y)
            rzArr[i] = NSNumber(value: rot.z)
        }

        return SpellActivityClassifierInput(
            dx: dxArr, dy: dyArr, dz: dzArr,
            rotation_w: rwArr, rotation_x: rxArr, rotation_y: ryArr, rotation_z: rzArr,
            timestamp: timeArr,
            stateIn: initialState
        )
    }
}

Boost