示例#1
0
def TwoCoins():
    firstCoin = Variable.Bernoulli(0.5)
    secondCoin = Variable.Bernoulli(0.5)
    bothHeads = firstCoin.op_BitwiseAnd(firstCoin, secondCoin)
    engine = InferenceEngine()
    print("Probability both coins are heads: %s" % engine.Infer(bothHeads))
    bothHeads.ObservedValue = False
    print("Probability distribution over firstCoin: %s" % engine.Infer(firstCoin))
示例#2
0
def TruncatedGaussianEfficient():
    threshold = Variable.New[Double]()
    x = Variable.GaussianFromMeanAndVariance(0.0, 1.0)
    Variable.ConstrainTrue(x.op_GreaterThan(x,threshold))
    engine = InferenceEngine()
    for thresh in [i*0.1 for i in range(11)]:
        threshold.ObservedValue = thresh
        print("Dist over x given thresh of %s = %s" % (thresh, engine.Infer(x)))
示例#3
0
def BayesPointMachine(incomes, ages, w, y):
    j = y.Range
    xData = [Vector.FromArray(income, age, 1) for income, age in zip(incomes, ages)]
    x = VariableObserved(xData, j)
    # The following does not work in pythonnet:
    #x = Variable.Observed[Vector](Array[Vector](xData))

    noise = 0.1
    ip = Variable.InnerProduct(w, x.get_Item(j))
    v = Variable.GaussianFromMeanAndVariance(ip, noise)
    v = v.op_GreaterThan(v, 0.0)
    y.set_Item(j, v)
示例#4
0
def HelloStrings():
    str1 = Variable.StringUniform()
    str2 = Variable.StringUniform()
    # TODO: text = str1 + " " + str2
    text = str1.op_Addition(str1, " ")
    text = text.op_Addition(text, str2)
    text.ObservedValue = "hello uncertain world"
    engine = InferenceEngine()
    print(f"str1: {engine.Infer(str1)}")
    print(f"str2: {engine.Infer(str2)}")
    dist_of_str1 = engine.Infer(str1)
    for s in ["hello", "hello uncertain", "hello uncertain world"]:
        print(f"P(str1 = '{s}') = {dist_of_str1.GetProb(s)}")
示例#5
0
def LearningAGaussian():
    # Restart the infer.NET random number generator
    Rand.Restart(12347)
    data = [Rand.Normal(0.0, 1.0) for i in range(100)]

    mean = Variable.GaussianFromMeanAndVariance(0.0, 1.0).Named("mean")
    precision = Variable.GammaFromShapeAndScale(1.0, 1.0).Named("precision")

    for i in range(len(data)):
        x = Variable.GaussianFromMeanAndPrecision(mean, precision)
        x.ObservedValue = data[i]

    engine = InferenceEngine()
    print("mean=%s" % engine.Infer(mean))
    print("precision=%s" % engine.Infer(precision))
示例#6
0
def LearningAGaussianWithRanges():
    # Restart the infer.NET random number generator
    Rand.Restart(12347)
    data = [Rand.Normal(0.0, 1.0) for i in range(100)]

    mean = Variable.GaussianFromMeanAndVariance(0.0, 1.0).Named("mean")
    precision = Variable.GammaFromShapeAndScale(1.0, 1.0).Named("precision")

    data_range = Range(len(data)).Named("n")
    x = Variable.Array[Double](data_range)
    v = Variable.GaussianFromMeanAndPrecision(mean, precision).ForEach(data_range)
    x.set_Item(data_range, v)
    x.ObservedValue = data

    engine = InferenceEngine()
    print("mean=%s" % engine.Infer(mean))
    print("precision=%s" %engine.Infer(precision))
示例#7
0
def ClinicalTrial():
    # Data from clinical trial
    control_group_data = [False, False, True, False, False]
    control_group = VariableObserved(control_group_data)

    treated_group_data = [True, False, True, True, True]
    treated_group = VariableObserved(treated_group_data)

    i = control_group.Range
    j = treated_group.Range

    # Prior on being effective treatment
    is_effective = Variable.Bernoulli(0.5)

    # if block
    if_var = Variable.If(is_effective)
    # Model if treatment is effective
    probIfControl = Variable.Beta(1.0, 1.0)
    t = Variable.Bernoulli(probIfControl).ForEach(i)
    control_group.set_Item(i, t)

    probIfTreated = Variable.Beta(1.0, 1.0)
    t = Variable.Bernoulli(probIfTreated).ForEach(j)
    treated_group.set_Item(j, t)
    if_var.Dispose()

    # A bit of background
    if_var = Variable.IfNot(is_effective)
    # Model if treatment is not effective
    prob_all = Variable.Beta(1.0, 1.0)
    control_group.set_Item(i, Variable.Bernoulli(prob_all).ForEach(i))
    treated_group.set_Item(j, Variable.Bernoulli(prob_all).ForEach(j))
    if_var.Dispose()

    # Clinical accuracy
    engine = InferenceEngine()
    print("Probability treatment has an effect = %s" %
          engine.Infer(is_effective))
    print("Probability of good outcome if given treatment = %s" %
          engine.Infer[Beta](probIfTreated).GetMean())
    print("Probability of good outcome if control = %s" %
          engine.Infer[Beta](probIfControl).GetMean())
示例#8
0
def MixtureOfGaussians():
    # Define a range for the number of mixture components
    k = Range(2)

    # Mixture component means
    means = Variable.Array[Vector](k)
    means_k = Variable.VectorGaussianFromMeanAndPrecision(
        Vector.FromArray(0.0, 0.0),
        PositiveDefiniteMatrix.IdentityScaledBy(2, 0.01)).ForEach(k)
    means.set_Item(k, means_k)

    # Mixture component precisions
    precs = Variable.Array[PositiveDefiniteMatrix](k).Named("precs")
    precs_k = Variable.WishartFromShapeAndScale(
        100.0, PositiveDefiniteMatrix.IdentityScaledBy(2, 0.01)).ForEach(k)
    precs.set_Item(k, precs_k)

    # Mixture weights
    weights = Variable.Dirichlet(k, [1.0, 1.0]).Named("weights")

    # Create a variable array which will hold the data
    n = Range(300).Named("n")
    data = Variable.Array[Vector](n).Named("x")

    # Create latent indicator variable for each data point
    z = Variable.Array[Int32](n).Named("z")

    # The mixture of Gaussians model
    forEachBlock = Variable.ForEach(n)
    z.set_Item(n, Variable.Discrete(weights))
    switchBlock = Variable.Switch(z.get_Item(n))
    data.set_Item(
        n,
        Variable.VectorGaussianFromMeanAndPrecision(
            means.get_Item(z.get_Item(n)), precs.get_Item(z.get_Item(n))))
    switchBlock.CloseBlock()
    forEachBlock.CloseBlock()

    # Attach some generated data
    data.ObservedValue = GenerateData(n.SizeAsInt)

    # Initialise messages randomly to break symmetry
    zInit = Variable.Array[Discrete](n).Named("zInit")
    zInit.ObservedValue = [
        Discrete.PointMass(Rand.Int(k.SizeAsInt), k.SizeAsInt)
        for i in range(n.SizeAsInt)
    ]
    # The following does not work in pythonnet:
    #z.get_Item(n).InitialiseTo[Discrete].Overloads[Variable[Discrete]](zInit.get_Item(n))
    InitialiseTo(z.get_Item(n), zInit.get_Item(n))

    # The inference
    engine = InferenceEngine()
    print("Dist over pi=%s" % engine.Infer(weights))
    print("Dist over means=\n%s" % engine.Infer(means))
    print("Dist over precs=\n%s" % engine.Infer(precs))
示例#9
0
def StringFormat():
    # Infer argument
    name = Variable.StringCapitalized()
    text = VariableStringFormat("My name is {0}.", name)
    text.ObservedValue = "My name is John."
    engine = InferenceEngine()
    engine.Compiler.RecommendedQuality = QualityBand.Experimental
    print(f"name is '{engine.Infer(name)}'")

    # Infer template
    name = Variable.StringCapitalized()
    # The following does not work in pythonnet:
    #template = Variable.StringUniform() + Variable.CharNonWord()
    template = Variable.StringUniform().op_Addition(Variable.StringUniform(), Variable.CharNonWord())
    template = template.op_Addition(template, "{0}")
    template = template.op_Addition(template, Variable.CharNonWord())
    template = template.op_Addition(template, Variable.StringUniform())
    text = VariableStringFormat(template, name)

    text.ObservedValue = "Hello, mate! I'm Dave."
    print(f"name is '{engine.Infer(name)}'")
    print(f"template is '{engine.Infer(template)}'")

    # With a slightly different observation.
    text.ObservedValue = "Hi! My name is John."
    print(f"name is '{engine.Infer(name)}'")
    print(f"template is '{engine.Infer(template)}'")

    # Provide more data to reduce ambiguity.
    name2 = Variable.StringCapitalized()
    text2 = VariableStringFormat(template, name2)
    text2.ObservedValue = "Hi! My name is Tom."                
    print(f"name is '{engine.Infer(name)}'")
    print(f"name2 is '{engine.Infer(name2)}'")
    print(f"template is '{engine.Infer(template)}'")

    # Generate text with the learned template.
    text3 = VariableStringFormat(template, "Boris")
    print(f"text3 is '{engine.Infer(text3)}'")
示例#10
0
def MotifFinder():
    Rand.Restart(1337)

    SequenceCount = 50
    SequenceLength = 25
    MotifPresenceProbability = 0.8

    trueMotifNucleobaseDist = [
        NucleobaseDist(a=0.8, c=0.1, g=0.05, t=0.05),
        NucleobaseDist(a=0.0, c=0.9, g=0.05, t=0.05),
        NucleobaseDist(a=0.0, c=0.0, g=0.5, t=0.5),
        NucleobaseDist(a=0.25, c=0.25, g=0.25, t=0.25),
        NucleobaseDist(a=0.1, c=0.1, g=0.1, t=0.7),
        NucleobaseDist(a=0.0, c=0.0, g=0.9, t=0.1),
        NucleobaseDist(a=0.9, c=0.05, g=0.0, t=0.05),
        NucleobaseDist(a=0.5, c=0.5, g=0.0, t=0.0),
    ]

    backgroundNucleobaseDist = NucleobaseDist(a=0.25, c=0.25, g=0.25, t=0.25)

    sequenceData, motifPositionData = SampleMotifData(SequenceCount, SequenceLength, 
                                                      MotifPresenceProbability, 
                                                      trueMotifNucleobaseDist, 
                                                      backgroundNucleobaseDist)

    assert(sequenceData[0] == "CTACTTCGAATTTACCCCTATATTT")
    # should be CTACTTCGAATTTACCCCTATATTT
    assert(len(sequenceData) == 50) 
    assert(motifPositionData[:10] ==[2, 15, -1, 0, 14, 5, -1, 5, 1, 9])
    assert(len(motifPositionData) == 50)
    # Char.MaxValue is a string '\uffff', so we convert the hex to decimal.
    motif_nucleobase_pseudo_counts = PiecewiseVector.Constant(int('ffff', 16) + 1, 1e-6)
    # Cannot call managed PiecewiseVector object's indexer with ['A'], i.e. cannot do
    # motif_nucleobase_pseudo_counts['A'] = 2.0
    motif_nucleobase_pseudo_counts[ord('A')] = 2.0
    motif_nucleobase_pseudo_counts[ord('C')] = 2.0
    motif_nucleobase_pseudo_counts[ord('G')] = 2.0
    motif_nucleobase_pseudo_counts[ord('T')] = 2.0
   
    motifLength = len(trueMotifNucleobaseDist)  # Assume we know the true motif length.
    motifCharsRange = Range(motifLength)
    motifNucleobaseProbs = Variable.Array[Vector](motifCharsRange)
    # Cannot do motifNucleobaseProbs[motifCharsRange] = Variable.Dirichlet...
    motifNucleobaseProbs.set_Item(motifCharsRange, Variable.Dirichlet(motif_nucleobase_pseudo_counts).ForEach(motifCharsRange))
    sequenceRange = Range(SequenceCount)
    sequences = Variable.Array[str](sequenceRange)

    motifPositions = Variable.Array[int](sequenceRange)
    motifPositions.set_Item(sequenceRange, Variable.DiscreteUniform(SequenceLength - motifLength + 1).ForEach(sequenceRange))

    motifPresence = Variable.Array[bool](sequenceRange)
    motifPresence.set_Item(sequenceRange, Variable.Bernoulli(MotifPresenceProbability).ForEach(sequenceRange))

    forEachBlock = Variable.ForEach(sequenceRange)
    ifVar = Variable.If(motifPresence.get_Item(sequenceRange))

    motifChars = Variable.Array[Char](motifCharsRange)
    motifChars.set_Item(motifCharsRange, Variable.Char(motifNucleobaseProbs.get_Item(motifCharsRange)))
    motif = Variable.StringFromArray(motifChars)
    motifPos = motifPositions.get_Item(sequenceRange)

    backgroundLengthRight = motifPos.op_Subtraction(SequenceLength - motifLength, motifPositions.get_Item(sequenceRange))
    backgroundLeft = VariableStringOfLength(motifPositions.get_Item(sequenceRange), backgroundNucleobaseDist)
    backgroundRight = VariableStringOfLength(backgroundLengthRight, backgroundNucleobaseDist)
    added_vars = backgroundLeft.op_Addition(backgroundLeft, motif)
    added_vars = added_vars.op_Addition(added_vars, backgroundRight)
    sequences.set_Item(sequenceRange, added_vars)

    ifVar.Dispose()

    ifNotVar = Variable.IfNot(motifPresence.get_Item(sequenceRange))

    sequences.set_Item(sequenceRange, VariableStringOfLength(SequenceLength, backgroundNucleobaseDist))

    ifNotVar.Dispose()
    forEachBlock.CloseBlock()

    sequences.ObservedValue = sequenceData
    engine = InferenceEngine()
    engine.NumberOfIterations = 30  #30
    engine.Compiler.RecommendedQuality = QualityBand.Experimental

    motifNucleobaseProbsPosterior = engine.Infer[Array[Dirichlet]](motifNucleobaseProbs)
    motifPresencePosterior = engine.Infer[Array[Bernoulli]](motifPresence)
    motifPositionPosterior = engine.Infer[Array[Discrete]](motifPositions)

    # PrintMotifInferenceResults
    PrintPositionFrequencyMatrix("\nTrue position frequency matrix:",
                                 trueMotifNucleobaseDist,
                                 lambda dist, c: dist[c])  # Distributions.DiscreteChar indexer is implemented.

    PrintPositionFrequencyMatrix("\nInferred position frequency matrix mean:",
                                 motifNucleobaseProbsPosterior, # Array of Distribtions.Dirichlet; mean of each is a PiecewiseVector
                                 lambda dist, c: dist.GetMean()[ord(c)])  # PiecewiseVector indexer is implemented, but not for strings...
    # TypeError: No method matches given arguments for get_Item: (<class 'str'>) -> need to do ord(c)
    # Tried importing Console and ConsoleColor from System which works in powershell but not in VS console.

    printc("\nPREDICTION   ", ConsoleColor.Yellow)
    printc("GROUND TRUTH    ", ConsoleColor.Red)
    printc("OVERLAP    \n\n", ConsoleColor.Green)
    for i in range(min(SequenceCount, 30)):
        motifPos = motifPositionPosterior[i].GetMode() if motifPresencePosterior[i].GetProbTrue() > 0.5 else -1

        inPrediction, inGroundTruth = False, False
        for j in range(SequenceLength):
            if j == motifPos:
                inPrediction = True
            elif j == motifPos + motifLength:
                inPrediction = False
            if j == motifPositionData[i]:
                inGroundTruth = True
            elif j == motifPositionData[i] + motifLength:
                inGroundTruth = False

            color = Console.ForegroundColor
            if (inPrediction and inGroundTruth):
                color = ConsoleColor.Green
            elif (inPrediction):
                color = ConsoleColor.Yellow
            elif inGroundTruth:
                color = ConsoleColor.Red
            printc(sequenceData[i][j], color)
        print(f"    P(has motif) = {motifPresencePosterior[i].GetProbTrue():.2f}", end="");
        if (motifPos != -1):
            print(f"   P(pos={motifPos}) = {motifPositionPosterior[i][motifPos]:.2f}", end="");
        print()