def create(self, network_factory: bayesianpy.network.NetworkFactory): network = network_factory.create() #builder.create_cluster_variable(network, 5) if not dk.empty(self._continuous): for c_name in self._continuous.columns: builder.create_continuous_variable(network, c_name) if not dk.empty(self._discrete): for d_name in self._discrete.columns: builder.create_discrete_variable(network, self._discrete, d_name, blanks=self._blanks) network = bayesianpy.network.remove_single_state_nodes(network) return network
def create(self, network_factory): network = network_factory.create() cluster = builder.try_get_node(network, "Cluster") if cluster is None: cluster = builder.create_cluster_variable( network, self._latent_states, variable_name=self._latent_variable_name) if not dk.empty(self._continuous): for c_name in self._continuous.columns: self._logger.info("Pre-processing {} column".format(c_name)) c = builder.create_continuous_variable(network, c_name) try: builder.create_link(network, cluster, c) except ValueError as e: self._logger.warn(e) if not dk.empty(self._discrete): for d_name in self._discrete.columns: if d_name in self._discrete_states: states = self._discrete_states[str(d_name)] else: states = dk.compute(self._discrete[str( d_name)].dropna().unique()).tolist() try: c = builder.create_discrete_variable( network, self._discrete, str(d_name), states) builder.create_link(network, cluster, c) except BaseException as e: self._logger.warn(e) return network
def create(self, network_factory): network = network_factory.create() if not dk.empty(self._continuous): for c_name in self._continuous.columns: c = builder.create_continuous_variable(network, c_name) if dk.empty(self._discrete): for d_name in self._discrete.columns: if d_name in self._discrete_states: states = self._discrete_states[d_name] else: states = dk.compute( self._discrete[d_name].dropna().unique()).tolist() try: c = builder.create_discrete_variable( network, self._discrete, d_name, states) except BaseException as e: self._logger.warn(e) parent_node = builder.try_get_node(network, self._parent_node) if parent_node is None: raise ValueError("Parent node: {} not recognised".format( self._parent_node)) for node in network.getNodes(): if node == parent_node: continue builder.create_link(network, parent_node, node) return network
def main(): logger = logging.getLogger() logger.addHandler(logging.StreamHandler()) logger.setLevel(logging.INFO) bayesianpy.jni.attach(logger) db_folder = bayesianpy.utils.get_path_to_parent_dir(__file__) iris = pd.read_csv(os.path.join(db_folder, "data/iris.csv"), index_col=False) network = bayesianpy.network.create_network() cluster = builder.create_cluster_variable(network, 4) node = builder.create_multivariate_continuous_node( network, iris.drop('iris_class', axis=1).columns.tolist(), "joint") builder.create_link(network, cluster, node) class_variable = builder.create_discrete_variable( network, iris, 'iris_class', iris['iris_class'].unique()) builder.create_link(network, cluster, class_variable) head_variables = [ 'sepal_length', 'sepal_width', 'petal_length', 'petal_width' ] with bayesianpy.data.DataSet(iris, db_folder, logger) as dataset: model = bayesianpy.model.NetworkModel(network, logger) model.train(dataset) queries = [ bayesianpy.model.QueryConditionalJointProbability( head_variables=[v], tail_variables=['iris_class']) for v in head_variables ] (engine, _, _) = bayesianpy.model.InferenceEngine(network).create() query = bayesianpy.model.SingleQuery(network, engine, logger) results = query.query(queries, aslist=True) jd = bayesianpy.visual.JointDistribution() fig = plt.figure(figsize=(10, 10)) for i, r in enumerate(list(results)): ax = fig.add_subplot(2, 2, i + 1) jd.plot_distribution_with_variance(ax, iris, queries[i].get_head_variables(), r) plt.show()
def create(self, network_factory: bayesianpy.network.NetworkFactory): network = network_factory.create() cluster = builder.create_cluster_variable(network, self._latent_states) if not dk.empty(self._continuous): for c_name in self._continuous.columns: c = builder.create_discretised_variable(network, self._continuous, c_name, bin_count=self._bin_count, mode=self._binning_mode, zero_crossing=self._zero_crossing) builder.create_link(network, cluster, c) if not dk.empty(self._discrete): for d_name in self._discrete.columns: states = dk.compute(self._discrete[d_name].dropna().unique()) c = builder.create_discrete_variable(network, self._discrete, d_name, states) builder.create_link(network, cluster, c) return network
def main(): logger = logging.getLogger() logger.addHandler(logging.StreamHandler()) logger.setLevel(logging.INFO) bayesianpy.jni.attach(logger) db_folder = bayesianpy.utils.get_path_to_parent_dir(__file__) iris = pd.read_csv(os.path.join(db_folder, "data/iris.csv"), index_col=False) network = bayesianpy.network.create_network() cluster = builder.create_cluster_variable(network, 4) node = builder.create_multivariate_continuous_node( network, iris.drop('iris_class', axis=1).columns.tolist(), "joint") builder.create_link(network, cluster, node) class_variable = builder.create_discrete_variable( network, iris, 'iris_class', iris['iris_class'].unique()) builder.create_link(network, cluster, class_variable) jd = bayesianpy.visual.JointDistribution() def plot(head_variables, results): fig = plt.figure(figsize=(10, 10)) n = len(head_variables) - 1 total = n * (n + 1) / 2 k = 1 for i, hv in enumerate(head_variables): for j in range(i + 1, len(head_variables)): ax = fig.add_subplot(total / 2, 2, k) jd.plot_distribution_with_covariance( ax, iris, (head_variables[i], head_variables[j]), results) k += 1 plt.show() with bayesianpy.data.DataSet(iris, db_folder, logger) as dataset: model = bayesianpy.model.NetworkModel(network, logger) model.train(dataset) head_variables = [ 'sepal_length', 'sepal_width', 'petal_length', 'petal_width' ] query_type_class = bayesianpy.model.QueryConditionalJointProbability( head_variables=head_variables, tail_variables=['iris_class', 'Cluster']) (engine, _, _) = bayesianpy.model.InferenceEngine(network).create() query = bayesianpy.model.Query(network, engine, logger) results_class = query.execute([query_type_class]) plot(head_variables, results_class) query_type_cluster = bayesianpy.model.QueryConditionalJointProbability( head_variables=head_variables, tail_variables=['Cluster']) results_cluster = query.execute([query_type_cluster]) plot(head_variables, results_cluster)
from bayesianpy.network import Builder as builder import bayesianpy.network nt = bayesianpy.network.create_network() # where df is your dataframe task = builder.create_discrete_variable(nt, df, 'task') size = builder.create_continuous_variable(nt, 'size') grasp_pose = builder.create_continuous_variable(nt, 'GraspPose') builder.create_link(nt, size, grasp_pose) builder.create_link(nt, task, grasp_pose) for v in ['fill level', 'object shape', 'side graspable']: va = builder.create_discrete_variable(nt, df, v) builder.create_link(nt, va, grasp_pose) builder.create_link(nt, task, va) # write df to data store with bayesianpy.data.DataSet(df, bayesianpy.utils.get_path_to_parent_dir(__file__), logger) as dataset: model = bayesianpy.model.NetworkModel(nt, logger) model.train(dataset) # to query model multi-threaded results = model.batch_query(dataset, [bayesianpy.model.QueryModelStatistics()], append_to_df=False)
def main(): logger = logging.getLogger() logger.addHandler(logging.StreamHandler()) logger.setLevel(logging.INFO) bayesianpy.jni.attach(logger) db_folder = bayesianpy.utils.get_path_to_parent_dir(__file__) iris = pd.read_csv(os.path.join(db_folder, "data/iris.csv"), index_col=False) network = bayesianpy.network.create_network() num_clusters = 3 cluster = builder.create_cluster_variable(network, num_clusters) node = builder.create_multivariate_continuous_node( network, iris.drop('iris_class', axis=1).columns.tolist(), "joint") builder.create_link(network, cluster, node) class_variable = builder.create_discrete_variable( network, iris, 'iris_class', iris['iris_class'].unique()) builder.create_link(network, cluster, class_variable) train, test = train_test_split(iris, test_size=0.7) # train the model and query the most likely states and probability of each latent state. with bayesianpy.data.DataSet(iris, db_folder, logger) as dataset: model = bayesianpy.model.NetworkModel(network, logger) model.train(dataset.subset(train.index.tolist())) test_subset = dataset.subset(test.index.tolist()) results = model.batch_query( test_subset, # creates columns Cluster$$Cluster0, Cluster$$Cluster1, # Cluster$$Cluster2, as # suffix is set to an empty string. [ bayesianpy.model.QueryStateProbability("Cluster", suffix=""), # creates column 'iris_class_maxlikelihood' bayesianpy.model.QueryMostLikelyState("iris_class"), # creates column 'Cluster_maxlikelihood' bayesianpy.model.QueryMostLikelyState("Cluster") ]) cluster_accuracy = {} # get a list of cluster accuracies, using the Bayes Server Confusion matrix class # weighted by the Cluster accuracy. with bayesianpy.data.DataSet(results, db_folder, logger) as resultset: for c in range(num_clusters): matrix = bayesianpy.jni.bayesServerAnalysis()\ .ConfusionMatrix.create(resultset.create_data_reader_command(), "iris_class", "iris_class_maxlikelihood", "Cluster$$Cluster{}".format(c)) cluster_accuracy.update( {'Cluster{}'.format(c): matrix.getAccuracy()}) # generate samples from the trained model, to give us some additional testing data. samples = bayesianpy.model.Sampling(network).sample(num_samples=20).drop( ["Cluster", "iris_class"], axis=1) reader = bayesianpy.data.DataFrameReader(samples) inference = bayesianpy.model.InferenceEngine(network).create_engine() evidence = bayesianpy.model.Evidence(network, inference) query = bayesianpy.model.SingleQuery(network, inference, logger) query_type = [bayesianpy.model.QueryStateProbability('Cluster', suffix="")] # query the expected Cluster membership, and generate a wrapper for # comparing the values, weighted by cluster membership. while reader.read(): result = query.query(query_type, evidence=evidence.apply(reader.to_dict())) cv_results = [] for i, (key, value) in enumerate(result.items()): n = bayesianpy.network.Discrete.fromstring(key) weighting = cluster_accuracy[n.state] cv_results.append(bayesianpy.jni.bayesServerAnalysis(). DefaultCrossValidationTestResult( jp.JDouble(weighting), jp.JObject(value, jp.java.lang.Object), jp.java.lang.Double(jp.JDouble(value)))) score = bayesianpy.jni.bayesServerAnalysis().CrossValidation.combine( jp.java.util.Arrays.asList(cv_results), bayesianpy.jni.bayesServerAnalysis().CrossValidationCombineMethod. WEIGHTED_AVERAGE) # append the score on to the existing dataframe samples.set_value(reader.get_index(), 'score', score) variables = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width'] cmap = plt.cm.get_cmap('Blues') fig = plt.figure(figsize=(10, 10)) k = 1 # plot! for i, v in enumerate(variables): for j in range(i + 1, len(variables)): v1 = variables[j] ax = fig.add_subplot(3, 2, k) ax.set_title("{} vs {}".format(v, v1)) ax.scatter(x=iris[v].tolist(), y=iris[v1].tolist(), facecolors='none', alpha=0.1) h = ax.scatter(x=samples[v].tolist(), y=samples[v1].tolist(), c=samples['score'].tolist(), vmin=samples.score.min(), vmax=samples.score.max(), cmap=cmap) k += 1 fig.subplots_adjust(right=0.8) cbar_ax = fig.add_axes([0.85, 0.15, 0.05, 0.7]) fig.colorbar(h, cax=cbar_ax) plt.show()