I'm using a custom create ML model to classify the movement of a user's hand in a game,
The classifier has 3 different spell movements, but my code constantly predicts all of them at an equal 1/3 probability regardless of movement which leads me to believe my code isn't correct (as opposed to the model) which in CreateML at least gives me a heavily weighted prediction
My code is below.
On adding debug prints everywhere all the data looks good to me and matches similar to my test CSV data
So I'm thinking my issue must be in the setup of my model code?
/// Feeds samples into the model and keeps a sliding window of the last N frames.
final class WandGestureStreamer {
static let shared = WandGestureStreamer()
private let model: SpellActivityClassifier
private var samples: [Transform] = []
private let windowSize = 100 // number of frames the model expects
/// RNN hidden state passed between inferences
private var stateIn: MLMultiArray
/// Last transform dropped from the window for continuity
private var lastDropped: Transform?
private init() {
let config = MLModelConfiguration()
self.model = try! SpellActivityClassifier(configuration: config)
// Initialize stateIn to the model’s required shape
let constraint = self.model.model.modelDescription
.inputDescriptionsByName["stateIn"]!
.multiArrayConstraint!
self.stateIn = try! MLMultiArray(shape: constraint.shape, dataType: .double)
}
/// Call once per frame with the latest wand position (or any feature vector).
func appendSample(_ sample: Transform) {
samples.append(sample)
// drop oldest frame if over capacity, retaining it for delta at window start
if samples.count > windowSize {
lastDropped = samples.removeFirst()
}
}
func classifyIfReady(threshold: Double = 0.6) -> (label: String, confidence: Double)? {
guard samples.count == windowSize else { return nil }
do {
let input = try makeInput(initialState: stateIn)
let output = try model.prediction(input: input)
// Save state for continuity
stateIn = output.stateOut
let best = output.label
let conf = output.labelProbability[best] ?? 0
// If you’ve recognized a gesture with high confidence:
if conf > threshold {
return (best, conf)
} else {
return nil
}
} catch {
print("Error", error.localizedDescription, error)
return nil
}
}
/// Constructs a SpellActivityClassifierInput from recorded wand transforms.
func makeInput(initialState: MLMultiArray) throws -> SpellActivityClassifierInput {
let count = samples.count as NSNumber
let shape = [count]
let timeArr = try MLMultiArray(shape: shape, dataType: .double)
let dxArr = try MLMultiArray(shape: shape, dataType: .double)
let dyArr = try MLMultiArray(shape: shape, dataType: .double)
let dzArr = try MLMultiArray(shape: shape, dataType: .double)
let rwArr = try MLMultiArray(shape: shape, dataType: .double)
let rxArr = try MLMultiArray(shape: shape, dataType: .double)
let ryArr = try MLMultiArray(shape: shape, dataType: .double)
let rzArr = try MLMultiArray(shape: shape, dataType: .double)
for (i, sample) in samples.enumerated() {
let previousSample = i > 0 ? samples[i - 1] : lastDropped
let model = WandMovementRecording.DataModel(transform: sample, previous: previousSample)
// print("model", model)
timeArr[i] = NSNumber(value: model.timestamp)
dxArr[i] = NSNumber(value: model.dx)
dyArr[i] = NSNumber(value: model.dy)
dzArr[i] = NSNumber(value: model.dz)
let rot = model.rotation
rwArr[i] = NSNumber(value: rot.w)
rxArr[i] = NSNumber(value: rot.x)
ryArr[i] = NSNumber(value: rot.y)
rzArr[i] = NSNumber(value: rot.z)
}
return SpellActivityClassifierInput(
dx: dxArr, dy: dyArr, dz: dzArr,
rotation_w: rwArr, rotation_x: rxArr, rotation_y: ryArr, rotation_z: rzArr,
timestamp: timeArr,
stateIn: initialState
)
}
}