Пример #1
0
def test_parallel_structure_learning():
    logps = -19.8282, -345.9527, -4847.59688, -604.0190
    for X, logp in zip(datasets, logps):
        model = BayesianNetwork.from_samples(X, algorithm='exact')
        model2 = BayesianNetwork.from_samples(X, algorithm='exact', n_jobs=2)
        assert_equal(model.log_probability(X).sum(), model2.log_probability(X).sum())
        assert_almost_equal(model.log_probability(X).sum(), logp, 4)
Пример #2
0
    def _likelihoods(cls,
                     real_data,
                     synthetic_data,
                     metadata=None,
                     structure=None):
        metadata = cls._validate_inputs(real_data, synthetic_data, metadata)
        structure = metadata.get('structure', structure)
        fields = cls._select_fields(metadata, ('categorical', 'boolean'))

        if not fields:
            return np.full(len(real_data), np.nan)

        LOGGER.debug('Fitting the BayesianNetwork to the real data')
        if structure:
            if isinstance(structure, dict):
                structure = BayesianNetwork.from_json(
                    json.dumps(structure)).structure

            bn = BayesianNetwork.from_structure(real_data[fields].to_numpy(),
                                                structure)
        else:
            bn = BayesianNetwork.from_samples(real_data[fields].to_numpy(),
                                              algorithm='chow-liu')

        LOGGER.debug('Evaluating likelihood of the synthetic data')
        probabilities = []
        for _, row in synthetic_data[fields].iterrows():
            try:
                probabilities.append(bn.probability([row.to_numpy()]))
            except ValueError:
                probabilities.append(0)

        return np.asarray(probabilities)
Пример #3
0
def _evaluate_bayesian_likelihood(train, test, metadata):
    LOGGER.info('Evaluating using Bayesian Likelihood.')

    train_mapped = _mapper(train, metadata)
    test_mapped = _mapper(test, metadata)

    structure_json = json.dumps(metadata['structure'])
    bn1 = BayesianNetwork.from_json(structure_json)
    bn2 = BayesianNetwork.from_structure(train_mapped, bn1.structure)

    l1 = np.mean(np.log(bn1.probability(train_mapped) + 1e-8))

    l2_probs = []
    failed = 0
    for item in test_mapped:
        try:
            l2_probs.append(bn2.probability([item]))
        except ValueError:
            failed += 1
            l2_probs.append(0)

    l2 = np.mean(np.log(np.asarray(l2_probs) + 1e-8))

    return pd.DataFrame([{
        "name": "Bayesian Likelihood",
        "syn_likelihood": l1,
        "test_likelihood": l2,
    }])
Пример #4
0
def test_parallel_structure_learning():
    logps = -19.8282, -345.9527, -4847.59688, -604.0190
    for X, logp in zip(datasets, logps):
        model = BayesianNetwork.from_samples(X, algorithm='exact')
        model2 = BayesianNetwork.from_samples(X, algorithm='exact', n_jobs=2)
        assert_equal(model.log_probability(X).sum(), model2.log_probability(X).sum())
        assert_almost_equal(model.log_probability(X).sum(), logp, 4)
Пример #5
0
def default_bayesian_likelihood(dataset, trainset, testset, meta):
    struct = glob.glob("data/*/{}_structure.json".format(dataset))
    assert len(struct) == 1
    bn1 = BayesianNetwork.from_json(struct[0])

    trainset_mapped = mapper(trainset, meta)
    testset_mapped = mapper(testset, meta)
    prob = []
    for item in trainset_mapped:
        try:
            prob.append(bn1.probability(item))
        except:
            prob.append(1e-8)
    l1 = np.mean(np.log(np.asarray(prob) + 1e-8))

    bn2 = BayesianNetwork.from_structure(trainset_mapped, bn1.structure)
    prob = []
    for item in testset_mapped:
        try:
            prob.append(bn2.probability(item))
        except:
            prob.append(1e-8)
    l2 = np.mean(np.log(np.asarray(prob) + 1e-8))

    return [{
        "name": "default",
        "syn_likelihood": l1,
        "test_likelihood": l2,
    }]
Пример #6
0
def _evaluate_bayesian_likelihood(train, test, metadata):
    LOGGER.info('Evaluating using Bayesian Likelihood.')
    structure_json = json.dumps(metadata['structure'])
    bn1 = BayesianNetwork.from_json(structure_json)

    train_mapped = _mapper(train, metadata)
    test_mapped = _mapper(test, metadata)
    prob = []
    for item in train_mapped:
        try:
            prob.append(bn1.probability(item))
        except Exception:
            prob.append(1e-8)

    l1 = np.mean(np.log(np.asarray(prob) + 1e-8))

    bn2 = BayesianNetwork.from_structure(train_mapped, bn1.structure)
    prob = []

    for item in test_mapped:
        try:
            prob.append(bn2.probability(item))
        except Exception:
            prob.append(1e-8)

    l2 = np.mean(np.log(np.asarray(prob) + 1e-8))

    return pd.DataFrame([{
        "name": "Bayesian Likelihood",
        "syn_likelihood": l1,
        "test_likelihood": l2,
    }])
Пример #7
0
def test_io_from_samples():
    model1 = BayesianNetwork.from_samples(X, weights=weights)
    model2 = BayesianNetwork.from_samples(data_generator)

    logp1 = model1.log_probability(X)
    logp2 = model2.log_probability(X)

    assert_array_almost_equal(logp1, logp2)
Пример #8
0
def test_exact_nan_structure_learning():
    logps = -6.13764, -159.6505, -2055.76364, -201.73615
    for X, logp in zip(datasets_nan, logps):
        model = BayesianNetwork.from_samples(X, algorithm='exact')
        model2 = BayesianNetwork.from_samples(X, algorithm='exact-dp')

        assert_equal(model.log_probability(X).sum(), model2.log_probability(X).sum())
        assert_almost_equal(model.log_probability(X).sum(), logp, 4)  
Пример #9
0
def test_exact_nan_structure_learning():
    logps = -6.13764, -159.6505, -2055.76364, -201.73615
    for X, logp in zip(datasets_nan, logps):
        model = BayesianNetwork.from_samples(X, algorithm='exact')
        model2 = BayesianNetwork.from_samples(X, algorithm='exact-dp')

        assert_equal(model.log_probability(X).sum(), model2.log_probability(X).sum())
        assert_almost_equal(model.log_probability(X).sum(), logp, 4)
Пример #10
0
def test_from_structure():
    X = datasets[1]
    structure = ((1, 2), (4,), (), (), (3,))
    model = BayesianNetwork.from_structure(X, structure=structure)

    assert_equal(model.structure, structure)
    assert_almost_equal(model.log_probability(X).sum(), -344.38287, 4)

    model2 = BayesianNetwork.from_json(model.to_json())
    assert_equal(model2.structure, structure)
    assert_almost_equal(model.log_probability(X).sum(), -344.38287, 4)
Пример #11
0
def test_from_structure():
    X = datasets[1]
    structure = ((1, 2), (4,), (), (), (3,))
    model = BayesianNetwork.from_structure(X, structure=structure)

    assert_equal(model.structure, structure)
    assert_almost_equal(model.log_probability(X).sum(), -344.38287, 4)

    model2 = BayesianNetwork.from_json(model.to_json())
    assert_equal(model2.structure, structure)
    assert_almost_equal(model.log_probability(X).sum(), -344.38287, 4)
Пример #12
0
def test_io_from_structure():
    structure = ((2,), (0, 2), ())

    model1 = BayesianNetwork.from_structure(X=X, weights=weights,
        structure=structure)
    model2 = BayesianNetwork.from_structure(X=data_generator,
        structure=structure)

    logp1 = model1.log_probability(X)
    logp2 = model2.log_probability(X)

    assert_array_almost_equal(logp1, logp2)
Пример #13
0
    def __init__(self):
        Pollution = DiscreteDistribution({'F': 0.9, 'T': 0.1})
        Smoker = DiscreteDistribution({'T': 0.3, 'F': 0.7})
        print(Smoker)
        Cancer = ConditionalProbabilityTable([
            ['T', 'T', 'T', 0.05],
            ['T', 'T', 'F', 0.95],
            ['T', 'F', 'T', 0.02],
            ['T', 'F', 'F', 0.98],
            ['F', 'T', 'T', 0.03],
            ['F', 'T', 'F', 0.97],
            ['F', 'F', 'T', 0.001],
            ['F', 'F', 'F', 0.999],
        ], [Pollution, Smoker])
        print(Cancer)
        XRay = ConditionalProbabilityTable([
            ['T', 'T', 0.9],
            ['T', 'F', 0.1],
            ['F', 'T', 0.2],
            ['F', 'F', 0.8],
        ], [Cancer])
        Dyspnoea = ConditionalProbabilityTable([
            ['T', 'T', 0.65],
            ['T', 'F', 0.35],
            ['F', 'T', 0.3],
            ['F', 'F', 0.7],
        ], [Cancer])
        s1 = Node(Pollution, name="Pollution")
        s2 = Node(Smoker, name="Smoker")
        s3 = Node(Cancer, name="Cancer")
        s4 = Node(XRay, name="XRay")
        s5 = Node(Dyspnoea, name="Dyspnoea")

        model = BayesianNetwork("Lung Cancer")
        model.add_states(s1, s2, s3, s4, s5)
        model.add_edge(s1, s3)
        model.add_edge(s2, s3)
        model.add_edge(s3, s4)
        model.add_edge(s3, s5)
        model.bake()
        self.model = model

        meta = []
        name_mapper = ["Pollution", "Smoker", "Cancer", "XRay", "Dyspnoea"]
        for i in range(self.model.node_count()):
            meta.append({
                "name": name_mapper[i],
                "type": "categorical",
                "size": 2,
                "i2s": ['T', 'F']
            })
        self.meta = meta
Пример #14
0
def setup_titanic():
    # Build a model of the titanic disaster
    global titanic_network, passenger, gender, tclass

    # Passengers on the Titanic either survive or perish
    passenger = DiscreteDistribution({'survive': 0.6, 'perish': 0.4})

    # Gender, given survival data
    gender = ConditionalProbabilityTable(
        [['survive', 'male', 0.0], ['survive', 'female', 1.0],
         ['perish', 'male', 1.0], ['perish', 'female', 0.0]], [passenger])

    # Class of travel, given survival data
    tclass = ConditionalProbabilityTable(
        [['survive', 'first', 0.0], ['survive', 'second', 1.0],
         ['survive', 'third', 0.0], ['perish', 'first', 1.0],
         ['perish', 'second', 0.0], ['perish', 'third', 0.0]], [passenger])

    # State objects hold both the distribution, and a high level name.
    s1 = State(passenger, name="passenger")
    s2 = State(gender, name="gender")
    s3 = State(tclass, name="class")

    # Create the Bayesian network object with a useful name
    titanic_network = BayesianNetwork("Titanic Disaster")

    # Add the three nodes to the network
    titanic_network.add_nodes(s1, s2, s3)

    # Add transitions which represent conditional dependencies, where the
    # second node is conditionally dependent on the first node (Monty is
    # dependent on both guest and prize)
    titanic_network.add_edge(s1, s2)
    titanic_network.add_edge(s1, s3)
    titanic_network.bake()
Пример #15
0
def test_from_structure():
    X = datasets[1]
    structure = ((1, 2), (4,), (), (), (3,))
    model = BayesianNetwork.from_structure(X, structure=structure)

    assert_equal(model.structure, structure)
    assert_almost_equal(model.log_probability(X).sum(), -344.38287, 4)

    model2 = BayesianNetwork.from_json(model.to_json())
    assert_equal(model2.structure, structure)
    assert_almost_equal(model.log_probability(X).sum(), -344.38287, 4)

    model_dtype = type(model.states[0].distribution.parameters[0][0][0])
    model2_dtype = type(model2.states[0].distribution.parameters[0][0][0])
    assert_equal(model_dtype, model2_dtype)
Пример #16
0
def test_from_json():
    model2 = BayesianNetwork.from_json(model.to_json())

    logp1 = model.log_probability(X)
    logp2 = model2.log_probability(X)
    logp = [
        -2.304186, -1.898721, -1.898721, -2.224144, -1.898721, -1.978764,
        -1.898721, -1.898721, -1.898721, -1.898721, -1.818679, -2.384229,
        -2.304186, -1.978764, -2.304186, -2.384229, -2.304186, -2.384229,
        -2.304186, -1.978764, -2.224144, -1.818679, -1.898721, -2.304186,
        -2.304186, -1.898721, -1.818679, -1.898721, -1.818679, -2.304186,
        -1.978764, -2.224144, -1.898721, -2.304186, -1.898721, -1.818679,
        -2.304186, -1.898721, -1.898721, -2.384229, -2.224144, -1.818679,
        -2.384229, -1.978764, -1.818679, -1.978764, -1.898721, -1.818679,
        -2.224144, -1.898721
    ]

    assert_array_almost_equal(logp1, logp2)
    assert_array_almost_equal(logp1, logp)
    assert_array_almost_equal(logp2, logp)

    model_dtype = type(
        list(model.states[0].distribution.parameters[0].keys())[0])
    model2_dtype = type(
        list(model2.states[0].distribution.parameters[0].keys())[0])
    assert_equal(model_dtype, model2_dtype)
def createModel(train, test):
    print("I am in create model")

    header = [
        'acceleration_mean', 'acceleration_stdev', 'pitch1', 'pitch2',
        'pitch3', 'roll1', 'roll2', 'roll3', 'classes', 'total_accel_sensor_1',
        'total_accel_sensor_2', 'total_accel_sensor_4'
    ]

    start_time = datetime.now()
    print("Start time: ", start_time)

    model = BayesianNetwork.from_samples(train,
                                         algorithm='greedy',
                                         state_names=header)

    print("doing model.bake")
    model.bake()

    time = datetime.now() - start_time
    print("Time: ", time)

    predict = test['classes'].tolist()
    test['classes'] = None

    print("Evaluating predict...")
    test = test.to_numpy()
    pred_values = model.predict(test)

    pred_values = [x.item(2) for x in pred_values]
    main.calculate_accuracy(predict, pred_values)
Пример #18
0
def test_guest_titanic():
    male = titanic_network.predict_proba({'gender': 'male'})
    female = titanic_network.predict_proba({'gender': 'female'})

    assert_equal(female[0].log_probability("survive"), 0.0)
    assert_equal(female[0].log_probability("perish"), float("-inf"))

    assert_equal(female[1].log_probability("male"), float("-inf"))
    assert_equal(female[1].log_probability("female"), 0.0)

    assert_equal(female[2].log_probability("first"), float("-inf"))
    assert_equal(female[2].log_probability("second"), 0.0)
    assert_equal(female[2].log_probability("third"), float("-inf"))

    assert_equal(male[0].log_probability("survive"), float("-inf"))
    assert_equal(male[0].log_probability("perish"), 0.0)

    assert_equal(male[1].log_probability("male"), 0.0)
    assert_equal(male[1].log_probability("female"), float("-inf"))

    assert_equal(male[2].log_probability("first"), 0.0)
    assert_equal(male[2].log_probability("second"), float("-inf"))
    assert_equal(male[2].log_probability("third"), float("-inf"))

    titanic_network2 = BayesianNetwork.from_json(titanic_network.to_json())
def test_guest_titanic():
    male = titanic_network.predict_proba({'gender': 'male'})
    female = titanic_network.predict_proba({'gender': 'female'})

    assert_equal(female[0].log_probability("survive"), 0.0)
    assert_equal(female[0].log_probability("perish"), float("-inf"))

    assert_equal(female[1].log_probability("male"), float("-inf"))
    assert_equal(female[1].log_probability("female"), 0.0)

    assert_equal(female[2].log_probability("first"), float("-inf"))
    assert_equal(female[2].log_probability("second"), 0.0)
    assert_equal(female[2].log_probability("third"), float("-inf"))

    assert_equal(male[0].log_probability("survive"), float("-inf"))
    assert_equal(male[0].log_probability("perish"), 0.0)

    assert_equal(male[1].log_probability("male"), 0.0)
    assert_equal(male[1].log_probability("female"), float("-inf"))

    assert_equal(male[2].log_probability("first"), 0.0)
    assert_equal(male[2].log_probability("second"), float("-inf"))
    assert_equal(male[2].log_probability("third"), float("-inf"))

    titanic_network2 = BayesianNetwork.from_json(titanic_network.to_json())
Пример #20
0
def train_model(data: np.ndarray,
                clusters: int = 5,
                init_nodes: list = None) -> BayesianNetwork:

    bn = BayesNet()
    #Сluster the initial data in order to fill in a hidden variable based on the distribution of clusters
    kmeans = KMeans(n_clusters=clusters, random_state=0).fit(data)
    labels = kmeans.labels_
    hidden_dist = DiscreteDistribution.from_samples(labels)
    hidden_var = np.array(hidden_dist.sample(data.shape[0]))
    new_data = np.column_stack((data, hidden_var))
    latent = (new_data.shape[1]) - 1

    #Train the network structure on data taking into account a hidden variable
    bn = hc_rr(new_data, latent=latent, init_nodes=init_nodes)
    structure = []
    nodes = sorted(list(bn.nodes()))
    for rv in nodes:
        structure.append(tuple(bn.F[rv]['parents']))
    structure = tuple(structure)
    bn = BayesianNetwork.from_structure(new_data, structure)
    bn.bake()
    #Learn a hidden variable
    hidden_var = np.array([np.nan] * (data.shape[0]))
    new_data = np.column_stack((data, hidden_var))
    bn.predict(new_data)
    bn.fit(new_data)
    bn.bake()
    return (bn)
Пример #21
0
def launcher(N_jobs,
             model_filename,
             X_test,
             y_test,
             model_dirname='',
             save_dirname=''):
    model = BayesianNetwork.from_json(model_dirname + model_filename)
    model.freeze()
    jobs = []
    Nsamples_test = y_test.size
    Nsamples_per_process = Nsamples_test / N_jobs
    Number_of_hard_workers = Nsamples_test % N_jobs
    ind = 0
    for i in xrange(N_jobs):
        offset = Nsamples_per_process
        if i < Number_of_hard_workers:
            offset += 1
        X_part = X_test[ind:ind + offset, :].copy()
        y_part = y_test[ind:ind + offset].copy()
        if len(y_part) == 0:
            break
        p = multiprocessing.Process(target=worker,
                                    args=(i, model, X_part, y_part,
                                          save_dirname, 1))
        jobs.append(p)
        p.start()
        print "process %d with %d samples (%d-%d)" % (i + 1, y_part.size, ind +
                                                      1, ind + y_part.size)
        ind += offset
    for p in jobs:
        p.join()
    print "========================================================"
    print "Launcher has successfully finished his work"
Пример #22
0
    def train(input_data, structure, fields, prior_data=None):
        """Creates bayesian networks from the given data with the given structure.

        The given data cannot contain any missing data. If called multiple
        times, the old model will be replaced.  To update the model with new
        data, see `update`.

        Args:
            input_data (SegmentedData): typed data to train on
            structure (iterable(iterable)): structure as returned from
                    define_bayes_net_structure
            fields (list(unicode)): field names to learn
            prior_data (list(data)): optional list of training samples to use
                    as a prior for each network.

        Return:
            BayesianNetworkModel: A predictive model training on the given data

        """
        type_to_network = {}
        for type_, data in input_data.type_to_data.items():
            if prior_data is not None:
                # Make defensive copy
                data = list(data) + list(prior_data)
            bayesian_network = BayesianNetwork.from_structure(data, structure)
            type_to_network[type_] = bayesian_network
        return BayesianNetworkModel(type_to_network,
                                    fields,
                                    segmenter=input_data.segmenter)
Пример #23
0
def predict():
    global row_to_predict

    f = open('model_pomm.txt', "r")
    contents = f.read()
    model = BayesianNetwork.from_json(contents)

    row_to_predict['classes'] = None

    prediction_prob = model.predict_proba(row_to_predict.to_numpy())
    prediction_prob = prediction_prob[2].parameters[0]

    classes = ['sittingdown', 'standingup', 'walking', 'standing', 'sitting']

    result = []
    for item in prediction_prob.items():
        y = [classes[item[0]], round(item[1], 2)]
        result.append(y)

    result = sorted(result, key=itemgetter(1), reverse=True)

    return render_template('index.html',
                           has_sample=True,
                           main=True,
                           has_prediction=True,
                           has_predict_button=False,
                           sample=row,
                           result=result,
                           true_prediction=true_prediction)
def read_model(name: str) -> BayesianNetwork:
    
    string_data = ""
    with open('models/'+ name +'.json') as f:
        string_data = json.load(f)
    bn = BayesianNetwork.from_json(string_data)
    return(bn)
    
Пример #25
0
def setup_monty():
    # Build a model of the Monty Hall Problem
    global monty_network, monty_index, prize_index, guest_index

    random.seed(0)

    # Friends emissions are completely random
    guest = DiscreteDistribution({'A': 1. / 3, 'B': 1. / 3, 'C': 1. / 3})

    # The actual prize is independent of the other distributions
    prize = DiscreteDistribution({'A': 1. / 3, 'B': 1. / 3, 'C': 1. / 3})
    # Monty is dependent on both the guest and the prize.
    monty = ConditionalProbabilityTable(
        [['A', 'A', 'A', 0.0],
         ['A', 'A', 'B', 0.5],
         ['A', 'A', 'C', 0.5],
         ['A', 'B', 'A', 0.0],
         ['A', 'B', 'B', 0.0],
         ['A', 'B', 'C', 1.0],
         ['A', 'C', 'A', 0.0],
         ['A', 'C', 'B', 1.0],
         ['A', 'C', 'C', 0.0],
         ['B', 'A', 'A', 0.0],
         ['B', 'A', 'B', 0.0],
         ['B', 'A', 'C', 1.0],
         ['B', 'B', 'A', 0.5],
         ['B', 'B', 'B', 0.0],
         ['B', 'B', 'C', 0.5],
         ['B', 'C', 'A', 1.0],
         ['B', 'C', 'B', 0.0],
         ['B', 'C', 'C', 0.0],
         ['C', 'A', 'A', 0.0],
         ['C', 'A', 'B', 1.0],
         ['C', 'A', 'C', 0.0],
         ['C', 'B', 'A', 1.0],
         ['C', 'B', 'B', 0.0],
         ['C', 'B', 'C', 0.0],
         ['C', 'C', 'A', 0.5],
         ['C', 'C', 'B', 0.5],
         ['C', 'C', 'C', 0.0]], [guest, prize])

    # Make the states
    s1 = State(guest, name="guest")
    s2 = State(prize, name="prize")
    s3 = State(monty, name="monty")

    # Make the bayes net, add the states, and the conditional dependencies.
    monty_network = BayesianNetwork("test")
    monty_network.add_nodes(s1, s2, s3)
    monty_network.add_edge(s1, s3)
    monty_network.add_edge(s2, s3)
    monty_network.bake()

    monty_index = monty_network.states.index(s3)
    prize_index = monty_network.states.index(s2)
    guest_index = monty_network.states.index(s1)
Пример #26
0
def partial_model_train(bn1: BayesianNetwork,
                        data: np.ndarray,
                        init_nodes: list = None,
                        clusters: int = 5) -> BayesianNetwork:

    hidden_input_var = np.array(bn1.marginal()[-1].sample(data.shape[0]))
    new_data = np.column_stack((hidden_input_var, data))
    bn = train_model(new_data, clusters=clusters, init_nodes=init_nodes)
    return (bn)
Пример #27
0
 def __naive_algorithm(self, X):
     graph = networkx.DiGraph()
     for i in range(1, len(self.state_names)):
         graph.add_edge((0, ), (i, ))
     return BayesianNetwork.from_samples(X,
                                         algorithm=self.algorithm_name,
                                         state_names=self.state_names,
                                         root=0,
                                         constraint_graph=graph)
Пример #28
0
 def fit_chow_liu(self, X_train, y_train, sequence_length_train):
     # TODO: use sequence_length_train
     self.formatted_labels = self.le.fit_transform(y_train)
     self.formatted_labels = self.formatted_labels.reshape(
         self.formatted_labels.shape[0], 1)
     X = np.concatenate((self.formatted_labels, X_train), axis=1)
     self.model = BayesianNetwork.from_samples(X,
                                               algorithm='chow-liu',
                                               state_names=self.state_names,
                                               root=0)
Пример #29
0
    def fit(self, features, prediction, **kwargs):
        """Create a Bayesian network from the given samples"""
        data = pd.concat([features, prediction], axis='columns')

        self.model = BayesianNetwork.from_samples(X=data,
                                                  state_names=data.columns,
                                                  name="Insurance Advisor",
                                                  **kwargs)
        self.model.freeze()
        print("Training finished")
Пример #30
0
    def __init__(self):
        Rain = DiscreteDistribution({'T': 0.2, 'F': 0.8})
        Sprinkler = ConditionalProbabilityTable([
            ['F', 'T', 0.4],
            ['F', 'F', 0.6],
            ['T', 'T', 0.1],
            ['T', 'F', 0.9],
        ], [Rain])
        Wet = ConditionalProbabilityTable([
            ['F', 'F', 'T', 0.01],
            ['F', 'F', 'F', 0.99],
            ['F', 'T', 'T', 0.8],
            ['F', 'T', 'F', 0.2],
            ['T', 'F', 'T', 0.9],
            ['T', 'F', 'F', 0.1],
            ['T', 'T', 'T', 0.99],
            ['T', 'T', 'F', 0.01],
        ], [Sprinkler, Rain])

        s1 = Node(Rain, name="Rain")
        s2 = Node(Sprinkler, name="Sprinkler")
        s3 = Node(Wet, name="Wet")

        model = BayesianNetwork("Simple fully connected")
        model.add_states(s1, s2, s3)
        model.add_edge(s1, s2)
        model.add_edge(s1, s3)
        model.add_edge(s2, s3)
        model.bake()
        self.model = model

        meta = []
        for i in range(self.model.node_count()):
            meta.append({
                "name": None,
                "type": "categorical",
                "size": 2,
                "i2s": ['T', 'F']
            })
        meta[0]['name'] = 'Rain'
        meta[1]['name'] = 'Sprinkler'
        meta[2]['name'] = 'Wet'
        self.meta = meta
 def load(cls, data_store, filename):
     pgm_model = None
     if type(data_store) is LocalFileSystem:
         pgm_model = data_store.read_pomegranate_model(filename=filename)
     if type(data_store) is S3DataStore:
         local_filename = "/tmp/kronos.json"
         data_store.download_file(filename, local_filename)
         with open(local_filename, 'rb') as f:
             pgm_model = BayesianNetwork.from_json(f.read())
     return PGMPomegranate(pgm_model)
Пример #32
0
    def __init__(self):
        A = DiscreteDistribution({'1': 1. / 3, '2': 1. / 3, '3': 1. / 3})
        B = ConditionalProbabilityTable([
            ['1', '1', 0.5],
            ['1', '2', 0.5],
            ['1', '3', 0],
            ['2', '1', 0],
            ['2', '2', 0.5],
            ['2', '3', 0.5],
            ['3', '1', 0.5],
            ['3', '2', 0],
            ['3', '3', 0.5],
        ], [A])
        C = ConditionalProbabilityTable([
            ['1', '4', 0.5],
            ['1', '5', 0.5],
            ['1', '6', 0],
            ['2', '4', 0],
            ['2', '5', 0.5],
            ['2', '6', 0.5],
            ['3', '4', 0.5],
            ['3', '5', 0],
            ['3', '6', 0.5],
        ], [A])

        s1 = Node(A, name="A")
        s2 = Node(B, name="B")
        s3 = Node(C, name="C")

        model = BayesianNetwork("tree")
        model.add_states(s1, s2, s3)
        model.add_edge(s1, s2)
        model.add_edge(s1, s3)
        model.bake()
        self.model = model

        meta = []
        for i in range(self.model.node_count() - 1):
            meta.append({
                "name": chr(ord('A') + i),
                "type": "categorical",
                "size": 3,
                "i2s": ['1', '2', '3']
            })
        meta.append({
            "name": "C",
            "type": "categorical",
            "size": 3,
            "i2s": ['4', '5', '6']
        })
        self.meta = meta
Пример #33
0
def setup_monty():
    # Build a model of the Monty Hall Problem
    global monty_network, monty_index, prize_index, guest_index

    random.seed(0)

    # Friends emissions are completely random
    guest = DiscreteDistribution({'A': 1. / 3, 'B': 1. / 3, 'C': 1. / 3})

    # The actual prize is independent of the other distributions
    prize = DiscreteDistribution({'A': 1. / 3, 'B': 1. / 3, 'C': 1. / 3})
    # Monty is dependent on both the guest and the prize.
    monty = ConditionalProbabilityTable(
        [['A', 'A', 'A', 0.0],
         ['A', 'A', 'B', 0.5],
         ['A', 'A', 'C', 0.5],
         ['A', 'B', 'A', 0.0],
         ['A', 'B', 'B', 0.0],
         ['A', 'B', 'C', 1.0],
         ['A', 'C', 'A', 0.0],
         ['A', 'C', 'B', 1.0],
         ['A', 'C', 'C', 0.0],
         ['B', 'A', 'A', 0.0],
         ['B', 'A', 'B', 0.0],
         ['B', 'A', 'C', 1.0],
         ['B', 'B', 'A', 0.5],
         ['B', 'B', 'B', 0.0],
         ['B', 'B', 'C', 0.5],
         ['B', 'C', 'A', 1.0],
         ['B', 'C', 'B', 0.0],
         ['B', 'C', 'C', 0.0],
         ['C', 'A', 'A', 0.0],
         ['C', 'A', 'B', 1.0],
         ['C', 'A', 'C', 0.0],
         ['C', 'B', 'A', 1.0],
         ['C', 'B', 'B', 0.0],
         ['C', 'B', 'C', 0.0],
         ['C', 'C', 'A', 0.5],
         ['C', 'C', 'B', 0.5],
         ['C', 'C', 'C', 0.0]], [guest, prize])

    # Make the states
    s1 = State(guest, name="guest")
    s2 = State(prize, name="prize")
    s3 = State(monty, name="monty")

    # Make the bayes net, add the states, and the conditional dependencies.
    monty_network = BayesianNetwork("test")
    monty_network.add_nodes(s1, s2, s3)
    monty_network.add_edge(s1, s3)
    monty_network.add_edge(s2, s3)
    monty_network.bake()

    monty_index = monty_network.states.index(s3)
    prize_index = monty_network.states.index(s2)
    guest_index = monty_network.states.index(s1)
Пример #34
0
def pomegranate_test():
    mydb = np.array([[1, 1, 1], [1, 1, 1], [0, 1, 1]])

    mymodel = BayesianNetwork.from_samples(mydb)

    # print(mymodel.node_count())

    # mymodel.plot()

    print(mymodel.probability([[1, 1, 1]]))
    print(mymodel.probability([[None, 1, 1]]))
    print(mymodel.predict_proba({}))
Пример #35
0
def generateSkeleton(data):
    config = Config()
    dfrm = getDataFrames(data)
    print('LOG: Generate Skeleton')
    model = BayesianNetwork.from_samples(dfrm,
                                         algorithm='greedy',
                                         state_names=config.variables())
    model.bake()
    with open(
            'generatedSkeleton/skeletonGraph' + str(config.nOfBuckets()) +
            'buckets.txt', "w+") as f:
        f.write(model.to_json())
Пример #36
0
def setup_titanic():
    # Build a model of the titanic disaster
    global titanic_network, passenger, gender, tclass

    # Passengers on the Titanic either survive or perish
    passenger = DiscreteDistribution({'survive': 0.6, 'perish': 0.4})

    # Gender, given survival data
    gender = ConditionalProbabilityTable(
        [['survive', 'male',   0.0],
         ['survive', 'female', 1.0],
         ['perish', 'male',    1.0],
         ['perish', 'female',  0.0]], [passenger])

    # Class of travel, given survival data
    tclass = ConditionalProbabilityTable(
        [['survive', 'first',  0.0],
         ['survive', 'second', 1.0],
         ['survive', 'third',  0.0],
         ['perish', 'first',  1.0],
         ['perish', 'second', 0.0],
         ['perish', 'third',  0.0]], [passenger])

    # State objects hold both the distribution, and a high level name.
    s1 = State(passenger, name="passenger")
    s2 = State(gender, name="gender")
    s3 = State(tclass, name="class")

    # Create the Bayesian network object with a useful name
    titanic_network = BayesianNetwork("Titanic Disaster")

    # Add the three nodes to the network
    titanic_network.add_nodes(s1, s2, s3)

    # Add transitions which represent conditional dependencies, where the
    # second node is conditionally dependent on the first node (Monty is
    # dependent on both guest and prize)
    titanic_network.add_edge(s1, s2)
    titanic_network.add_edge(s1, s3)
    titanic_network.bake()
Пример #37
0
def test_greedy_nan_structure_learning():
    logps = -7.5239, -159.6505, -2058.5706, -203.7662
    for X, logp in zip(datasets_nan, logps):
        model = BayesianNetwork.from_samples(X, algorithm='greedy')
        assert_almost_equal(model.log_probability(X).sum(), logp, 4)
Пример #38
0
def test_greedy_structure_learning():
    logps = -19.8282, -345.9527, -4847.59688, -611.0356
    for X, logp in zip(datasets, logps):
        model = BayesianNetwork.from_samples(X, algorithm='greedy')
        assert_almost_equal(model.log_probability(X).sum(), logp, 4)
Пример #39
0
def test_chow_liu_structure_learning():
    logps = -19.8282, -344.248785, -4842.40158, -603.2370
    for X, logp in zip(datasets, logps):
        model = BayesianNetwork.from_samples(X, algorithm='chow-liu')
        assert_almost_equal(model.log_probability(X).sum(), logp, 4)
Пример #40
0
def setup_huge_monty():
    # Build the huge monty hall huge_monty_network. This is an example I made
    # up with which may not exactly flow logically, but tests a varied type of
    # tables ensures heterogeneous types of data work together.
    global huge_monty_network, huge_monty_friend, huge_monty_guest, huge_monty
    global huge_monty_remaining, huge_monty_randomize, huge_monty_prize

    # Huge_Monty_Friend
    huge_monty_friend = DiscreteDistribution({True: 0.5, False: 0.5})

    # Huge_Monty_Guest emisisons are completely random
    huge_monty_guest = ConditionalProbabilityTable(
        [[True, 'A', 0.50],
         [True, 'B', 0.25],
         [True, 'C', 0.25],
         [False, 'A', 0.0],
         [False, 'B', 0.7],
         [False, 'C', 0.3]], [huge_monty_friend])

    # Number of huge_monty_remaining cars
    huge_monty_remaining = DiscreteDistribution({0: 0.1, 1: 0.7, 2: 0.2, })

    # Whether they huge_monty_randomize is dependent on the numnber of
    # huge_monty_remaining cars
    huge_monty_randomize = ConditionalProbabilityTable(
        [[0, True, 0.05],
         [0, False, 0.95],
         [1, True, 0.8],
         [1, False, 0.2],
         [2, True, 0.5],
         [2, False, 0.5]], [huge_monty_remaining])

    # Where the huge_monty_prize is depends on if they huge_monty_randomize or
    # not and also the huge_monty_guests huge_monty_friend
    huge_monty_prize = ConditionalProbabilityTable(
        [[True, True, 'A', 0.3],
         [True, True, 'B', 0.4],
         [True, True, 'C', 0.3],
         [True, False, 'A', 0.2],
         [True, False, 'B', 0.4],
         [True, False, 'C', 0.4],
         [False, True, 'A', 0.1],
         [False, True, 'B', 0.9],
         [False, True, 'C', 0.0],
         [False, False, 'A', 0.0],
         [False, False, 'B', 0.4],
         [False, False, 'C', 0.6]], [huge_monty_randomize, huge_monty_friend])

    # Monty is dependent on both the huge_monty_guest and the huge_monty_prize.
    huge_monty = ConditionalProbabilityTable(
        [['A', 'A', 'A', 0.0],
         ['A', 'A', 'B', 0.5],
         ['A', 'A', 'C', 0.5],
         ['A', 'B', 'A', 0.0],
         ['A', 'B', 'B', 0.0],
         ['A', 'B', 'C', 1.0],
         ['A', 'C', 'A', 0.0],
         ['A', 'C', 'B', 1.0],
         ['A', 'C', 'C', 0.0],
         ['B', 'A', 'A', 0.0],
         ['B', 'A', 'B', 0.0],
         ['B', 'A', 'C', 1.0],
         ['B', 'B', 'A', 0.5],
         ['B', 'B', 'B', 0.0],
         ['B', 'B', 'C', 0.5],
         ['B', 'C', 'A', 1.0],
         ['B', 'C', 'B', 0.0],
         ['B', 'C', 'C', 0.0],
         ['C', 'A', 'A', 0.0],
         ['C', 'A', 'B', 1.0],
         ['C', 'A', 'C', 0.0],
         ['C', 'B', 'A', 1.0],
         ['C', 'B', 'B', 0.0],
         ['C', 'B', 'C', 0.0],
         ['C', 'C', 'A', 0.5],
         ['C', 'C', 'B', 0.5],
         ['C', 'C', 'C', 0.0]], [huge_monty_guest, huge_monty_prize])

    # Make the states
    s0 = State(huge_monty_friend, name="huge_monty_friend")
    s1 = State(huge_monty_guest, name="huge_monty_guest")
    s2 = State(huge_monty_prize, name="huge_monty_prize")
    s3 = State(huge_monty, name="huge_monty")
    s4 = State(huge_monty_remaining, name="huge_monty_remaining")
    s5 = State(huge_monty_randomize, name="huge_monty_randomize")

    # Make the bayes net, add the states, and the conditional dependencies.
    huge_monty_network = BayesianNetwork("test")
    huge_monty_network.add_nodes(s0, s1, s2, s3, s4, s5)
    huge_monty_network.add_transition(s0, s1)
    huge_monty_network.add_transition(s1, s3)
    huge_monty_network.add_transition(s2, s3)
    huge_monty_network.add_transition(s4, s5)
    huge_monty_network.add_transition(s5, s2)
    huge_monty_network.add_transition(s0, s2)
    huge_monty_network.bake()