def test_guest_titanic(): male = titanic_network.predict_proba({'gender': 'male'}) female = titanic_network.predict_proba({'gender': 'female'}) assert_equal(female[0].log_probability("survive"), 0.0) assert_equal(female[0].log_probability("perish"), float("-inf")) assert_equal(female[1].log_probability("male"), float("-inf")) assert_equal(female[1].log_probability("female"), 0.0) assert_equal(female[2].log_probability("first"), float("-inf")) assert_equal(female[2].log_probability("second"), 0.0) assert_equal(female[2].log_probability("third"), float("-inf")) assert_equal(male[0].log_probability("survive"), float("-inf")) assert_equal(male[0].log_probability("perish"), 0.0) assert_equal(male[1].log_probability("male"), 0.0) assert_equal(male[1].log_probability("female"), float("-inf")) assert_equal(male[2].log_probability("first"), 0.0) assert_equal(male[2].log_probability("second"), float("-inf")) assert_equal(male[2].log_probability("third"), float("-inf")) titanic_network2 = BayesianNetwork.from_json(titanic_network.to_json())
def _evaluate_bayesian_likelihood(train, test, metadata): LOGGER.info('Evaluating using Bayesian Likelihood.') train_mapped = _mapper(train, metadata) test_mapped = _mapper(test, metadata) structure_json = json.dumps(metadata['structure']) bn1 = BayesianNetwork.from_json(structure_json) bn2 = BayesianNetwork.from_structure(train_mapped, bn1.structure) l1 = np.mean(np.log(bn1.probability(train_mapped) + 1e-8)) l2_probs = [] failed = 0 for item in test_mapped: try: l2_probs.append(bn2.probability([item])) except ValueError: failed += 1 l2_probs.append(0) l2 = np.mean(np.log(np.asarray(l2_probs) + 1e-8)) return pd.DataFrame([{ "name": "Bayesian Likelihood", "syn_likelihood": l1, "test_likelihood": l2, }])
def test_from_json(): model2 = BayesianNetwork.from_json(model.to_json()) logp1 = model.log_probability(X) logp2 = model2.log_probability(X) logp = [ -2.304186, -1.898721, -1.898721, -2.224144, -1.898721, -1.978764, -1.898721, -1.898721, -1.898721, -1.898721, -1.818679, -2.384229, -2.304186, -1.978764, -2.304186, -2.384229, -2.304186, -2.384229, -2.304186, -1.978764, -2.224144, -1.818679, -1.898721, -2.304186, -2.304186, -1.898721, -1.818679, -1.898721, -1.818679, -2.304186, -1.978764, -2.224144, -1.898721, -2.304186, -1.898721, -1.818679, -2.304186, -1.898721, -1.898721, -2.384229, -2.224144, -1.818679, -2.384229, -1.978764, -1.818679, -1.978764, -1.898721, -1.818679, -2.224144, -1.898721 ] assert_array_almost_equal(logp1, logp2) assert_array_almost_equal(logp1, logp) assert_array_almost_equal(logp2, logp) model_dtype = type( list(model.states[0].distribution.parameters[0].keys())[0]) model2_dtype = type( list(model2.states[0].distribution.parameters[0].keys())[0]) assert_equal(model_dtype, model2_dtype)
def predict(): global row_to_predict f = open('model_pomm.txt', "r") contents = f.read() model = BayesianNetwork.from_json(contents) row_to_predict['classes'] = None prediction_prob = model.predict_proba(row_to_predict.to_numpy()) prediction_prob = prediction_prob[2].parameters[0] classes = ['sittingdown', 'standingup', 'walking', 'standing', 'sitting'] result = [] for item in prediction_prob.items(): y = [classes[item[0]], round(item[1], 2)] result.append(y) result = sorted(result, key=itemgetter(1), reverse=True) return render_template('index.html', has_sample=True, main=True, has_prediction=True, has_predict_button=False, sample=row, result=result, true_prediction=true_prediction)
def _likelihoods(cls, real_data, synthetic_data, metadata=None, structure=None): metadata = cls._validate_inputs(real_data, synthetic_data, metadata) structure = metadata.get('structure', structure) fields = cls._select_fields(metadata, ('categorical', 'boolean')) if not fields: return np.full(len(real_data), np.nan) LOGGER.debug('Fitting the BayesianNetwork to the real data') if structure: if isinstance(structure, dict): structure = BayesianNetwork.from_json( json.dumps(structure)).structure bn = BayesianNetwork.from_structure(real_data[fields].to_numpy(), structure) else: bn = BayesianNetwork.from_samples(real_data[fields].to_numpy(), algorithm='chow-liu') LOGGER.debug('Evaluating likelihood of the synthetic data') probabilities = [] for _, row in synthetic_data[fields].iterrows(): try: probabilities.append(bn.probability([row.to_numpy()])) except ValueError: probabilities.append(0) return np.asarray(probabilities)
def _evaluate_bayesian_likelihood(train, test, metadata): LOGGER.info('Evaluating using Bayesian Likelihood.') structure_json = json.dumps(metadata['structure']) bn1 = BayesianNetwork.from_json(structure_json) train_mapped = _mapper(train, metadata) test_mapped = _mapper(test, metadata) prob = [] for item in train_mapped: try: prob.append(bn1.probability(item)) except Exception: prob.append(1e-8) l1 = np.mean(np.log(np.asarray(prob) + 1e-8)) bn2 = BayesianNetwork.from_structure(train_mapped, bn1.structure) prob = [] for item in test_mapped: try: prob.append(bn2.probability(item)) except Exception: prob.append(1e-8) l2 = np.mean(np.log(np.asarray(prob) + 1e-8)) return pd.DataFrame([{ "name": "Bayesian Likelihood", "syn_likelihood": l1, "test_likelihood": l2, }])
def default_bayesian_likelihood(dataset, trainset, testset, meta): struct = glob.glob("data/*/{}_structure.json".format(dataset)) assert len(struct) == 1 bn1 = BayesianNetwork.from_json(struct[0]) trainset_mapped = mapper(trainset, meta) testset_mapped = mapper(testset, meta) prob = [] for item in trainset_mapped: try: prob.append(bn1.probability(item)) except: prob.append(1e-8) l1 = np.mean(np.log(np.asarray(prob) + 1e-8)) bn2 = BayesianNetwork.from_structure(trainset_mapped, bn1.structure) prob = [] for item in testset_mapped: try: prob.append(bn2.probability(item)) except: prob.append(1e-8) l2 = np.mean(np.log(np.asarray(prob) + 1e-8)) return [{ "name": "default", "syn_likelihood": l1, "test_likelihood": l2, }]
def launcher(N_jobs, model_filename, X_test, y_test, model_dirname='', save_dirname=''): model = BayesianNetwork.from_json(model_dirname + model_filename) model.freeze() jobs = [] Nsamples_test = y_test.size Nsamples_per_process = Nsamples_test / N_jobs Number_of_hard_workers = Nsamples_test % N_jobs ind = 0 for i in xrange(N_jobs): offset = Nsamples_per_process if i < Number_of_hard_workers: offset += 1 X_part = X_test[ind:ind + offset, :].copy() y_part = y_test[ind:ind + offset].copy() if len(y_part) == 0: break p = multiprocessing.Process(target=worker, args=(i, model, X_part, y_part, save_dirname, 1)) jobs.append(p) p.start() print "process %d with %d samples (%d-%d)" % (i + 1, y_part.size, ind + 1, ind + y_part.size) ind += offset for p in jobs: p.join() print "========================================================" print "Launcher has successfully finished his work"
def read_model(name: str) -> BayesianNetwork: string_data = "" with open('models/'+ name +'.json') as f: string_data = json.load(f) bn = BayesianNetwork.from_json(string_data) return(bn)
def load(cls, data_store, filename): pgm_model = None if type(data_store) is LocalFileSystem: pgm_model = data_store.read_pomegranate_model(filename=filename) if type(data_store) is S3DataStore: local_filename = "/tmp/kronos.json" data_store.download_file(filename, local_filename) with open(local_filename, 'rb') as f: pgm_model = BayesianNetwork.from_json(f.read()) return PGMPomegranate(pgm_model)
def test_from_structure(): X = datasets[1] structure = ((1, 2), (4,), (), (), (3,)) model = BayesianNetwork.from_structure(X, structure=structure) assert_equal(model.structure, structure) assert_almost_equal(model.log_probability(X).sum(), -344.38287, 4) model2 = BayesianNetwork.from_json(model.to_json()) assert_equal(model2.structure, structure) assert_almost_equal(model.log_probability(X).sum(), -344.38287, 4)
def test_from_structure(): X = datasets[1] structure = ((1, 2), (4,), (), (), (3,)) model = BayesianNetwork.from_structure(X, structure=structure) assert_equal(model.structure, structure) assert_almost_equal(model.log_probability(X).sum(), -344.38287, 4) model2 = BayesianNetwork.from_json(model.to_json()) assert_equal(model2.structure, structure) assert_almost_equal(model.log_probability(X).sum(), -344.38287, 4) model_dtype = type(model.states[0].distribution.parameters[0][0][0]) model2_dtype = type(model2.states[0].distribution.parameters[0][0][0]) assert_equal(model_dtype, model2_dtype)
def from_json(json_string, segmenter=None): """Create BayesianNetworkModel from the given json blob in string format Args: json_string (unicode): the string created by `from_json` Returns: BayesianNetworkModel: generative model equivalent to stored model """ json_blob = json.loads(json_string) type_to_network = {} for type_, network_json in json_blob['type_to_network'].items(): type_to_network[type_] = BayesianNetwork.from_json(json.dumps(network_json)) fields = list(json_blob['fieldnames']) return BayesianNetworkModel(type_to_network, fields, segmenter)
def test_from_json(): model2 = BayesianNetwork.from_json(model.to_json()) logp1 = model.log_probability(X) logp2 = model2.log_probability(X) logp = [-2.304186, -1.898721, -1.898721, -2.224144, -1.898721, -1.978764, -1.898721, -1.898721, -1.898721, -1.898721, -1.818679, -2.384229, -2.304186, -1.978764, -2.304186, -2.384229, -2.304186, -2.384229, -2.304186, -1.978764, -2.224144, -1.818679, -1.898721, -2.304186, -2.304186, -1.898721, -1.818679, -1.898721, -1.818679, -2.304186, -1.978764, -2.224144, -1.898721, -2.304186, -1.898721, -1.818679, -2.304186, -1.898721, -1.898721, -2.384229, -2.224144, -1.818679, -2.384229, -1.978764, -1.818679, -1.978764, -1.898721, -1.818679, -2.224144, -1.898721] assert_array_almost_equal(logp1, logp2) assert_array_almost_equal(logp1, logp) assert_array_almost_equal(logp2, logp)
Positive_Features_train = r["P_F_tr"] Num_Positive_train = Positive_Features_train.shape[0] Positive_Labels_train = np.linspace(1, 1, Num_Positive_train) Positive_Features_test = r["P_F_te"] Num_Positive_test = Positive_Features_test.shape[0] Positive_Labels_test = np.linspace(1, 1, Num_Positive_test) Negative_Features_train = r["N_F_tr"] Num_Negative_train = Negative_Features_train.shape[0] Negative_Labels_train = np.linspace(0, 0, Num_Negative_train) Negative_Features_test = r["N_F_te"] Num_Negative_test = Negative_Features_test.shape[0] Negative_Labels_test = np.linspace(0, 0, Num_Negative_test) print("Po_tr: ", Num_Positive_train, "Ne_tr: ", Num_Negative_train, "Po_te: ", Num_Positive_test, "Ne_te: ", Num_Negative_test) bayes = BayesianNetwork.from_json(bayesnet_file) Negative_Features_train_prob = bayes.probability(Negative_Features_train) Positive_Features_train_prob = np.linspace(0, 0, Num_Positive_train) for k in range(Num_Positive_train): try: Positive_Features_train_prob[k] = bayes.probability( Positive_Features_train[k]) except KeyError: Positive_Features_train_prob[k] = 0 # print(Positive_Features_train_prob[k]) data = Negative_Features_train_prob data_sorted = np.sort(data) #for i in data_sorted: # print(i)
clf = xgb.XGBClassifier() if m == "xGBoost": Feature_train = Features_train_o Label_train = Labels_train_o clf.fit(Feature_train, Label_train) Label_predict = clf.predict(Feature_test) Label_score = clf.predict_proba(Feature_test) elif m == "SMOTE": sm = SMOTE() Feature_train, Label_train = sm.fit_sample( Features_train_o, Labels_train_o) clf.fit(Feature_train, Label_train) Label_predict = clf.predict(Feature_test) Label_score = clf.predict_proba(Feature_test) elif m == "Bayesian": bayes = BayesianNetwork.from_json(bayes_name) Negative_Features_train_prob = bayes.probability( Negative_Features_train) Positive_Features_train_prob = np.zeros( (Num_Positive_train, 1)) for k in range(Num_Positive_train): try: Positive_Features_train_prob[k] = bayes.probability( Positive_Features_train[k]) except KeyError: Positive_Features_train_prob[k] = 0 max_prob = np.max(Positive_Features_train_prob) print(max_prob) if max_prob > 0:
def load_model(self, model_id): """Load a previously saved model""" with open('model/advisor-{}.json'.format(model_id), 'r') as file: self.model = BayesianNetwork.from_json(file.read())
def plot_model(model_id): """Load a previously saved model""" with open('model/advisor-{}.json'.format(model_id), 'r') as file: model = BayesianNetwork.from_json(file.read()) model.plot('graph.pdf')
def read_pomegranate_model(self, filename): with open(os.path.join(self.src_dir, filename), 'rb') as ik: model = BayesianNetwork.from_json(pickle.load(ik)) return model