I'm trying to build trueskill model with two teams and 5 players each with Infer.Net. However when inferring the skills the means of the distribution get way too big or small.
Below is code of my implementation with made up players and matches that replicates the problem. Even though I'm using made up players and matches here I get similar results with data from the real world.
using Microsoft.ML.Probabilistic.Distributions;
using Microsoft.ML.Probabilistic.Factors;
using Microsoft.ML.Probabilistic.Models;
using Microsoft.ML.Probabilistic.Collections;
using Microsoft.Data.Analysis;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Numerics;
using System.Text;
using System.Threading.Tasks;
using System.Collections;
using System.Security.Cryptography;
using System.IO;
using System.Security.Policy;
using System.Reflection;
namespace ConsoleApp3
{
internal class infernetDemo5
{
static void Main(string[] args)
{
int[][] winnerData = {
new[] { 0, 1, 2, 3, 4 },
new[] { 0, 1, 2, 3, 5 },
new[] { 5, 6, 7, 8, 9 },
new[] { 5, 6, 7, 8, 9 },
new[] { 0, 1, 2, 3, 4 },
new[] { 0, 1, 2, 3, 5 },
new[] { 5, 6, 7, 8, 9 },
new[] { 5, 6, 7, 8, 9 },
new[] { 0, 1, 2, 3, 4 },
new[] { 0, 1, 2, 3, 5 },
new[] { 5, 6, 7, 8, 9 },
new[] { 5, 6, 7, 8, 9 },
new[] { 0, 1, 2, 3, 4 },
new[] { 0, 1, 2, 3, 5 },
new[] { 5, 6, 7, 8, 9 },
new[] { 5, 6, 7, 8, 9 },
new[] { 0, 1, 2, 3, 4 },
new[] { 0, 1, 2, 3, 5 },
new[] { 5, 6, 7, 8, 9 },
new[] { 5, 6, 7, 8, 9 },
new[] { 0, 1, 2, 3, 4 },
new[] { 0, 1, 2, 3, 5 },
new[] { 5, 6, 7, 8, 9 },
new[] { 5, 6, 7, 8, 9 },
new[] { 0, 1, 2, 3, 4 },
new[] { 0, 1, 2, 3, 5 },
new[] { 5, 6, 7, 8, 9 },
new[] { 5, 6, 7, 8, 9 },
new[] { 0, 1, 2, 3, 4 },
new[] { 0, 1, 2, 3, 5 },
new[] { 5, 6, 7, 8, 9 },
new[] { 5, 6, 7, 8, 9 },
};
int[][] loserData = {
new[] { 5, 6, 7, 8, 9 },
new[] { 6, 7, 8, 9, 4 },
new[] { 0, 1, 2, 3, 4 },
new[] { 0, 1, 2, 3, 4 },
new[] { 5, 6, 7, 8, 9 },
new[] { 6, 7, 8, 9, 4 },
new[] { 0, 1, 2, 3, 4 },
new[] { 0, 1, 2, 3, 4 },
new[] { 5, 6, 7, 8, 9 },
new[] { 6, 7, 8, 9, 4 },
new[] { 0, 1, 2, 3, 4 },
new[] { 0, 1, 2, 3, 4 },
new[] { 5, 6, 7, 8, 9 },
new[] { 6, 7, 8, 9, 4 },
new[] { 0, 1, 2, 3, 4 },
new[] { 0, 1, 2, 3, 4 },
new[] { 5, 6, 7, 8, 9 },
new[] { 6, 7, 8, 9, 4 },
new[] { 0, 1, 2, 3, 4 },
new[] { 0, 1, 2, 3, 4 },
new[] { 5, 6, 7, 8, 9 },
new[] { 6, 7, 8, 9, 4 },
new[] { 0, 1, 2, 3, 4 },
new[] { 0, 1, 2, 3, 4 },
new[] { 5, 6, 7, 8, 9 },
new[] { 6, 7, 8, 9, 4 },
new[] { 0, 1, 2, 3, 4 },
new[] { 0, 1, 2, 3, 4 },
new[] { 5, 6, 7, 8, 9 },
new[] { 6, 7, 8, 9, 4 },
new[] { 0, 1, 2, 3, 4 },
new[] { 0, 1, 2, 3, 4 },
};
var nGames = 32;
var nPlayers = 10;
var mu = 25.0;
var sigma = 8.333;
var beta = 4.1667;
var teamSize = new Range(5).Named("TeamSize");
var games = new Range(nGames).Named("Game");
var players = new Range(nPlayers).Named("Player");
var playerSkills = Variable.Array<double>(players).Named("Skill");
var winner_lineups = Variable.Array(Variable.Array<int>(teamSize), games).Named("t1Lineups");
var loser_lineups = Variable.Array(Variable.Array<int>(teamSize), games).Named("t2Lineups");
var w_performances = Variable.Array(Variable.Array<double>(teamSize), games);
var l_performances = Variable.Array(Variable.Array<double>(teamSize), games);
using (Variable.ForEach(players))
{
playerSkills[players] = Variable.GaussianFromMeanAndPrecision(mu, 1 / (sigma* sigma));
}
using (var game = Variable.ForEach(games))
{
var gameIndex = game.Index;
using (var n = Variable.ForEach(teamSize))
{
var playerIndex = n.Index;
w_performances[gameIndex][playerIndex] = Variable.GaussianFromMeanAndPrecision(playerSkills[winner_lineups[gameIndex][playerIndex]], 1 / (beta* beta)).Named("w_player_performance");
l_performances[gameIndex][playerIndex] = Variable.GaussianFromMeanAndPrecision(playerSkills[loser_lineups[gameIndex][playerIndex]], 1 / (beta* beta)).Named("l_player_perfomance");
}
}
var w_performance = Variable.Sum(w_performances[games]);
var l_performance = Variable.Sum(l_performances[games]);
Variable.ConstrainTrue(w_performance > l_performance);
winner_lineups.ObservedValue = winnerData;
loser_lineups.ObservedValue = loserData;
var inferenceEngine = new InferenceEngine();
inferenceEngine.NumberOfIterations = 10;
var inferredSkills = inferenceEngine.Infer<Gaussian[]>(playerSkills);
foreach(var playerSkill in inferredSkills)
{
Console.WriteLine(playerSkill);
}
}
}
}
Output:
Compiling model...done.
Iterating:
.........| 10
Gaussian(25, 69,44)
Gaussian(25, 69,44)
Gaussian(25, 69,44)
Gaussian(25, 69,44)
Gaussian(-6,574e+04, 26,68)
Gaussian(-4,68e+04, 14,8)
Gaussian(-4,889e+04, 10,78)
Gaussian(-4,889e+04, 10,78)
Gaussian(-4,889e+04, 10,78)
Gaussian(-4,889e+04, 10,78)
Using TrueskillThroughTIme .py with same sample data and values which should correspond to the same model gives the following results:
N(mu=26.223, sigma=3.618)
N(mu=26.223, sigma=3.618)
N(mu=26.223, sigma=3.618)
N(mu=26.223, sigma=3.618)
N(mu=16.821, sigma=3.618)
N(mu=33.179, sigma=3.618)
N(mu=23.777, sigma=3.618)
N(mu=23.777, sigma=3.618)
N(mu=23.777, sigma=3.618)
N(mu=23.777, sigma=3.618)
I suspect summing up the teams players performances is the issue as when constraining individual performances according to the results gives reasonable skill estimates.