Training Bayesian network with Infer.NET but doesn't know any prior and conditional probability
Hi, I am new to Infer.NET. I know this question might be asked a few years ago but some posts' links were broken so I just ask here again. Let assume that if I have the whole dataset for training and I would like to construct a Bayesian network in Infer.NET without knowing any prior and conditional probabilities. I wonder how can Infer.NET infer those prior and conditional probabilities(because my inferred probabilities were wrong) and if I have multiple parents(e.g., 7 parents) for one child node, what kind of distribution should I define the prior probabilities of parents and the conditional probability of the child? Is it always a Dirichlet distribution represented in terms of Vectors or it should be something else?
Here is my naive code which tried to construct a Bayesian network like iMay ------> iSex:
using System;
using Microsoft.ML.Probabilistic.Models; //For modelling Bernoulli distribution
using Microsoft.ML.Probabilistic.Math;
using Microsoft.ML.Probabilistic.Distributions;
using Range = Microsoft.ML.Probabilistic.Models.Range;
using System.Data;
using System.IO;
using System.Linq;
namespace learningdotnet
{
public class BNmodel
{
// Primary random variables
public Variable<int> NumberOfExamples;
public VariableArray<int> iMay;
public VariableArray<int> iSex;
public Variable<Vector> ProbiMay;
public VariableArray<Vector> CPTiSex;
public Variable<Dirichlet> ProbiMayPrior;
public VariableArray<Dirichlet> CPTiSexPrior;
public Dirichlet ProbiMayPosterior;
public Dirichlet[] CPTiSexPosterior;
public InferenceEngine Engine = new InferenceEngine();
public BNmodel()
{
// Set up the ranges
NumberOfExamples = Variable.New<int>().Named("NofE");
Range N = new Range(NumberOfExamples).Named("N");
// Variables have just 2 states (go to this attribute/not go to this attribute)
Range M = new Range(2).Named("M");
Range S = new Range(2).Named("S");
// Define the priors and the parameters
ProbiMayPrior = Variable.New<Dirichlet>().Named("ProbiMayPrior");
ProbiMay = Variable<Vector>.Random(ProbiMayPrior).Named("ProbiMay");
ProbiMay.SetValueRange(M);
// iSex probability table conditioned on iMay
CPTiSexPrior = Variable.Array<Dirichlet>(M).Named("CPTiSexPrior");
CPTiSex = Variable.Array<Vector>(M).Named("CPTiSex");
CPTiSex[M] = Variable<Vector>.Random(CPTiSexPrior[M]);
CPTiSex.SetValueRange(S);
iMay = Variable.Array<int>(N).Named("iMay");
iMay[N] = Variable.Discrete(ProbiMay).ForEach(N);
iSex = AddChildFromOneParent(iMay, CPTiSex).Named("iSex");
}
public static VariableArray<int> AddChildFromOneParent(
VariableArray<int> parent,
VariableArray<Vector> cpt)
{
var n = parent.Range;
var child = Variable.Array<int>(n);
ForEachBlock b1 = Variable.ForEach(n);
SwitchBlock b2 = Variable.Switch(parent[n]);
child[n] = Variable.Discrete(cpt[parent[n]]);
b2.CloseBlock();
b1.CloseBlock();
return child;
}
public void LearnParameters(
int[] imay,
int[] isex,
Dirichlet probiMayPrior,
Dirichlet[] cptiSexPrior)
{
NumberOfExamples.ObservedValue = imay.Length;
iMay.ObservedValue = imay;
iSex.ObservedValue = isex;
ProbiMayPrior.ObservedValue = probiMayPrior;
CPTiSexPrior.ObservedValue = cptiSexPrior;
// Inference
ProbiMayPosterior = Engine.Infer<Dirichlet>(ProbiMay);
CPTiSexPosterior = Engine.Infer<Dirichlet[]>(CPTiSex);
}
public void LearnParameters(
int[] imay,
int[] isex)
{
Dirichlet probiMayPrior = Dirichlet.Uniform(2);
Dirichlet[] cptiSexPrior = Enumerable.Repeat(Dirichlet.Uniform(2), 2).ToArray();
LearnParameters(imay, isex, probiMayPrior, cptiSexPrior);
}
public double ProbiSex(
int? imay,
Dirichlet probiMayPrior,
Dirichlet[] cptiSexPrior)
{
NumberOfExamples.ObservedValue = 1;
if (imay.HasValue)
{
iMay.ObservedValue = new int[] { imay.Value };
}
else
{
iMay.ClearObservedValue();
}
iSex.ClearObservedValue();
ProbiMayPrior.ObservedValue = probiMayPrior;
CPTiSexPrior.ObservedValue = cptiSexPrior;
// Inference
var iSexPosterior = Engine.Infer<Discrete[]>(iSex);
// index 0 is true and index 1 is false
return iSexPosterior[0].GetProbs()[0];
}
public double ProbiSex(
int? imay,
Vector probiMay,
Vector[] cptiSex)
{
var probiMayPrior = Dirichlet.PointMass(probiMay);
var cptiSexPrior = cptiSex.Select(v => Dirichlet.PointMass(v)).ToArray();
return ProbiSex(imay, probiMayPrior, cptiSexPrior);
}
}
public class BN
{
public static void infer()
{
Rand.Restart(12347);
BNmodel model = new BNmodel();
double a = 1 - 0.9895496250434754;
Vector probiMay = Vector.FromArray(a, 0.9895496250434754); //iMay, not iMay
a = 1 - 0.9904474054066148;
Vector[] cptiSex = new Vector[] { Vector.FromArray(0.9904474054066148,a) /* iMay */, Vector.FromArray(0.9999999961141224, (1 - 0.9999999961141224)) /* not iMay */ };
double probNotiSexGiveniMay = model.ProbiSex(0, probiMay, cptiSex);
double probiSexGivenNOTiMay = model.ProbiSex(1, probiMay, cptiSex);
Console.WriteLine("P(Not iSex | iMay) = {0:0.0000}", probNotiSexGiveniMay);
Console.WriteLine("P(iSex | not iMay) = {0:0.0000}", probiSexGivenNOTiMay);
// -------------------------------------------------------------
// Learn posterior distributions for the parameters
// -------------------------------------------------------------
Console.WriteLine("\n*********************************************");
Console.WriteLine("Learning parameters from data ");
Console.WriteLine("*********************************************");
int[] imay_ = new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
int[] isex_ = new int[] { 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0 };
// Now see if we can recover the parameters from the data - assume uniform priors
model.LearnParameters(imay_, isex_);
// The posteriors are distributions over the probabilities and CPTs. Print out the means of these
// distributions, and compare with the ground truth
Console.WriteLine("Prob. iMay: Ground truth: {0:0.00}, Inferred: {1:0.00}", 1 - 0.9895496250434754, model.ProbiMayPosterior.GetMean()[0]);
Console.WriteLine("Prob. iSex | iMay: Ground truth: {0:0.00}, Inferred: {1:0.00}", 0.9904, model.CPTiSexPosterior[0].GetMean()[0]);
Console.WriteLine("Prob. iSex | Not iMay: Ground truth: {0:0.00}, Inferred: {1:0.00}", 1.0000, model.CPTiSexPosterior[1].GetMean()[0]);
}
}
}
At the last few lines of code, I tried to learn the prior and CPT probabilities from the dataset but the inferred probabilities are too different from the ground truth, which are:
Prob. iMay: Ground truth: 0.01, Inferred: 0.99
Prob. iSex | iMay: Ground truth: 0.99, Inferred: 0.52
Prob. iSex | Not iMay: Ground truth: 1.00, Inferred: 0.33
I wonder what is doing wrong here. Thank you so much for help!
Jingwen