I have been working on Gaze Estimation in Browser environment and for that I used the following repo: gaze-estimation
For my use case I converted their PyTorch mobileone_S0 model to a TFJS model. During inference time, I am calling the following function in a loop.
async gazeProcessor(humanFaceArray, dataConfig = GAZE360_CONFIG) {
const imageTensor = this.human.tf.tensor(humanFaceArray);
const resizedImage = imageTensor.resizeBilinear([448, 448]);
const normalizedImage = resizedImage.sub(IMAGENET_MEAN).div(IMAGENET_STD);
const imageBatch = normalizedImage.expandDims(0);
const transposedImage = imageBatch.transpose([0, 3, 1, 2]);
try {
const executeGazeModel = async () => {
// Execute the gaze model asynchronously and dispose only after it is done
const [pitch, yaw] = await this.gazeModel.executeAsync(transposedImage);
// Dispose of transposedImage as it is no longer needed
transposedImage.dispose();
return this.human.tf.tidy(() => {
const pitchPredicted = this.human.tf.softmax(pitch, 1);
const yawPredicted = this.human.tf.softmax(yaw, 1);
const bins = dataConfig.bins;
const binWidth = dataConfig.binwidth;
const angle = dataConfig.angle;
const idxTensor = this.human.tf.range(0, bins, 1, 'float32');
const pitchWeightedTensor = pitchPredicted.mul(idxTensor).sum(1).mul(binWidth).sub(angle);
const yawWeightedTensor = yawPredicted.mul(idxTensor).sum(1).mul(binWidth).sub(angle);
// Extract values
const pitchWeighted = pitchWeightedTensor.dataSync()[0];
const yawWeighted = yawWeightedTensor.dataSync()[0];
// Dispose of tensors
pitchPredicted.dispose();
yawPredicted.dispose();
idxTensor.dispose();
pitchWeightedTensor.dispose();
yawWeightedTensor.dispose();
// Dispose tensors returned by the model
pitch.dispose();
yaw.dispose();
return [pitchWeighted.toFixed(2), yawWeighted.toFixed(2)];
});
};
const [pitchPredictedWeighted, yawPredictedWeighted] = await executeGazeModel();
if (yawPredictedWeighted > -YAW_THRESHOLD && yawPredictedWeighted < YAW_THRESHOLD) {
return true;
} else {
return false;
}
} catch (error) {
console.error("Error during gaze prediction:", error);
return false;
} finally {
// Ensure any remaining memory cleanup
transposedImage.dispose();
imageTensor.dispose();
resizedImage.dispose();
normalizedImage.dispose();
imageBatch.dispose();
console.log(this.human.tf.memory());
}
}
PS: The gazeModel has been initialized separately.
When the gazeProcessor() is not called, no memory leak happens. However, when it is called, the total number of tensors increase in the following pattern:
Call 1
At start: x tensors
At end: x+1 tensors
Call 2
At start: x+6 tensors
At end: x+7 tensors
Note: I have checked all the other similar questions and applied all the solutions they suggest. I even asked the query with the repo owner but they replied with something about reparameterization here