Python Builder.create_cluster_variable示例，bayesianpy.network.Builder.create_cluster_variable Python示例

示例#1

0

显示文件

文件： template.py 项目： RenHongJia/bayesianpy

    def create(self, network_factory):
        network = network_factory.create()
        cluster = builder.try_get_node(network, "Cluster")
        if cluster is None:
            cluster = builder.create_cluster_variable(
                network,
                self._latent_states,
                variable_name=self._latent_variable_name)

        if not dk.empty(self._continuous):
            for c_name in self._continuous.columns:
                self._logger.info("Pre-processing {} column".format(c_name))
                c = builder.create_continuous_variable(network, c_name)
                try:
                    builder.create_link(network, cluster, c)
                except ValueError as e:
                    self._logger.warn(e)

        if not dk.empty(self._discrete):
            for d_name in self._discrete.columns:
                if d_name in self._discrete_states:
                    states = self._discrete_states[str(d_name)]
                else:
                    states = dk.compute(self._discrete[str(
                        d_name)].dropna().unique()).tolist()

                try:
                    c = builder.create_discrete_variable(
                        network, self._discrete, str(d_name), states)

                    builder.create_link(network, cluster, c)
                except BaseException as e:
                    self._logger.warn(e)

        return network

示例#2

0

显示文件

文件： iris_gaussian_plot.py 项目： jmsCompany/bayesianpy

def main():

    logger = logging.getLogger()
    logger.addHandler(logging.StreamHandler())
    logger.setLevel(logging.INFO)

    bayesianpy.jni.attach(logger)

    db_folder = bayesianpy.utils.get_path_to_parent_dir(__file__)
    iris = pd.read_csv(os.path.join(db_folder, "data/iris.csv"),
                       index_col=False)

    network = bayesianpy.network.create_network()
    cluster = builder.create_cluster_variable(network, 4)
    node = builder.create_multivariate_continuous_node(
        network,
        iris.drop('iris_class', axis=1).columns.tolist(), "joint")
    builder.create_link(network, cluster, node)

    class_variable = builder.create_discrete_variable(
        network, iris, 'iris_class', iris['iris_class'].unique())
    builder.create_link(network, cluster, class_variable)

    head_variables = [
        'sepal_length', 'sepal_width', 'petal_length', 'petal_width'
    ]

    with bayesianpy.data.DataSet(iris, db_folder, logger) as dataset:
        model = bayesianpy.model.NetworkModel(network, logger)
        model.train(dataset)

        queries = [
            bayesianpy.model.QueryConditionalJointProbability(
                head_variables=[v], tail_variables=['iris_class'])
            for v in head_variables
        ]

        (engine, _, _) = bayesianpy.model.InferenceEngine(network).create()
        query = bayesianpy.model.SingleQuery(network, engine, logger)
        results = query.query(queries, aslist=True)
        jd = bayesianpy.visual.JointDistribution()
        fig = plt.figure(figsize=(10, 10))

        for i, r in enumerate(list(results)):
            ax = fig.add_subplot(2, 2, i + 1)
            jd.plot_distribution_with_variance(ax, iris,
                                               queries[i].get_head_variables(),
                                               r)

        plt.show()

示例#3

0

显示文件

    def create(self, network_factory: bayesianpy.network.NetworkFactory):
        network = network_factory.create()
        cluster = builder.create_cluster_variable(network, self._latent_states)

        if not dk.empty(self._continuous):
            for c_name in self._continuous.columns:
                c = builder.create_discretised_variable(network, self._continuous, c_name, bin_count=self._bin_count,
                                                        mode=self._binning_mode, zero_crossing=self._zero_crossing)

                builder.create_link(network, cluster, c)

        if not dk.empty(self._discrete):
            for d_name in self._discrete.columns:
                states = dk.compute(self._discrete[d_name].dropna().unique())
                c = builder.create_discrete_variable(network, self._discrete, d_name, states)
                builder.create_link(network, cluster, c)

        return network

示例#4

0

显示文件

文件： analysis.py 项目： jmsCompany/bayesianpy

    def analyse(self, df: pd.DataFrame, continuous_variable_names: List[str]):
        kf = NewKFold(n_splits=3, shuffle=self._shuffle)

        network_factory = bayesianpy.network.NetworkFactory(self._logger)
        variations = [1, 5, 10, 20, 30]
        results = {}
        with bayesianpy.data.DataSet(df, logger=self._logger) as dataset:
            ll = defaultdict(list)
            for variable in continuous_variable_names:
                likelihoods = []
                for cluster_count in variations:
                    weighted = []
                    weights = []
                    for k, (train_indexes, test_indexes) in enumerate(kf):

                        x_train, x_test = train_indexes, test_indexes

                        nt = network_factory.create()
                        cluster = builder.create_cluster_variable(
                            nt, cluster_count)
                        node = builder.create_continuous_variable(nt, variable)
                        builder.create_link(nt, cluster, node)

                        model = bayesianpy.model.NetworkModel(nt, self._logger)

                        try:
                            ll = model.train(dataset.subset(
                                x_train)).get_metrics()['loglikelihood']
                        except BaseException as e:
                            self._logger.warning(e)
                            continue

                        weighted.append(ll)
                        weights.append(len(x_train))

                    likelihoods.append(np.average(weighted, weights=weights))

                max_index = np.argmax(likelihoods)
                if variations[max_index] > 5:
                    results.update({variable: True})
                else:
                    results.update({variable: False})

        return results

示例#5

0

显示文件

文件： template.py 项目： RenHongJia/bayesianpy

    def create(self, network_factory: bayesianpy.network.NetworkFactory):
        network = self._template.create(network_factory)

        cluster = builder.create_cluster_variable(network, self._latent_states)

        for node in bayesianpy.network.get_nodes(network):
            if node == cluster:
                continue
            builder.create_link(network, cluster, node)

        if self._target_nodes is not None:
            for target_node in self._target_nodes:
                target = builder.get_node(network, target_node)
                builder.delete_links_from(network, target)

                if self._remove_target_node:
                    bayesianpy.network.remove_node(network, self._target_nodes)

        return network

示例#6

0

显示文件

文件： iris_clustering_visualisation.py 项目： jmsCompany/bayesianpy

def main():

    logger = logging.getLogger()
    logger.addHandler(logging.StreamHandler())
    logger.setLevel(logging.INFO)

    bayesianpy.jni.attach(logger)

    db_folder = bayesianpy.utils.get_path_to_parent_dir(__file__)
    iris = pd.read_csv(os.path.join(db_folder, "data/iris.csv"),
                       index_col=False)

    network = bayesianpy.network.create_network()
    cluster = builder.create_cluster_variable(network, 4)
    node = builder.create_multivariate_continuous_node(
        network,
        iris.drop('iris_class', axis=1).columns.tolist(), "joint")
    builder.create_link(network, cluster, node)

    class_variable = builder.create_discrete_variable(
        network, iris, 'iris_class', iris['iris_class'].unique())
    builder.create_link(network, cluster, class_variable)

    jd = bayesianpy.visual.JointDistribution()

    def plot(head_variables, results):

        fig = plt.figure(figsize=(10, 10))
        n = len(head_variables) - 1
        total = n * (n + 1) / 2

        k = 1
        for i, hv in enumerate(head_variables):
            for j in range(i + 1, len(head_variables)):
                ax = fig.add_subplot(total / 2, 2, k)
                jd.plot_distribution_with_covariance(
                    ax, iris, (head_variables[i], head_variables[j]), results)

                k += 1
        plt.show()

    with bayesianpy.data.DataSet(iris, db_folder, logger) as dataset:
        model = bayesianpy.model.NetworkModel(network, logger)
        model.train(dataset)

        head_variables = [
            'sepal_length', 'sepal_width', 'petal_length', 'petal_width'
        ]

        query_type_class = bayesianpy.model.QueryConditionalJointProbability(
            head_variables=head_variables,
            tail_variables=['iris_class', 'Cluster'])

        (engine, _, _) = bayesianpy.model.InferenceEngine(network).create()
        query = bayesianpy.model.Query(network, engine, logger)
        results_class = query.execute([query_type_class])

        plot(head_variables, results_class)

        query_type_cluster = bayesianpy.model.QueryConditionalJointProbability(
            head_variables=head_variables, tail_variables=['Cluster'])

        results_cluster = query.execute([query_type_cluster])

        plot(head_variables, results_cluster)

示例#7

0

显示文件

文件： iris_anomaly_detection.py 项目： jmsCompany/bayesianpy

def main():

    logger = logging.getLogger()
    logger.addHandler(logging.StreamHandler())
    logger.setLevel(logging.INFO)

    bayesianpy.jni.attach(logger)

    db_folder = bayesianpy.utils.get_path_to_parent_dir(__file__)
    iris = pd.read_csv(os.path.join(db_folder, "data/iris.csv"),
                       index_col=False)

    # manually build the network, leaving out the 'iris-class' variable
    network = bayesianpy.network.create_network()
    cluster = builder.create_cluster_variable(network, 4)
    node = builder.create_multivariate_continuous_node(network,
                                                       iris.columns.tolist(),
                                                       "joint")
    builder.create_link(network, cluster, node)

    with bayesianpy.data.DataSet(iris.drop('iris_class', axis=1), db_folder,
                                 logger) as dataset:

        # build the 'normal' model on two of the classes
        model = bayesianpy.model.NetworkModel(network, logger)

        subset = dataset.subset(
            iris[(iris.iris_class == "Iris-versicolor") |
                 (iris.iris_class == "Iris-virginica")].index.tolist())

        model.train(subset)

        variables = [
            'sepal_length', 'sepal_width', 'petal_length', 'petal_width'
        ]

        # query the trained model on all the data, including the Iris-setosa class

        # get the loglikelihood value for the whole model on each individual sample,
        # the lower the loglikelihood value the less likely the data point has been
        # generated by the model.
        results = model.batch_query(dataset,
                                    [bayesianpy.model.QueryModelStatistics()])
        cmap = plt.cm.get_cmap('Blues_r')
        fig = plt.figure(figsize=(10, 10))
        k = 1
        for i, v in enumerate(variables):
            for j in range(i + 1, len(variables)):
                v1 = variables[j]
                ax = fig.add_subplot(3, 2, k)
                ax.set_title("{} vs {}".format(v, v1))
                h = ax.scatter(x=iris[v].tolist(),
                               y=iris[v1].tolist(),
                               c=results['loglikelihood'].tolist(),
                               vmin=results.loglikelihood.min(),
                               vmax=results.loglikelihood.max(),
                               cmap=cmap)
                k += 1

        fig.subplots_adjust(right=0.8)
        cbar_ax = fig.add_axes([0.85, 0.15, 0.05, 0.7])
        fig.colorbar(h, cax=cbar_ax)
        plt.show()

示例#8

0

显示文件

def main():

    logger = logging.getLogger()
    logger.addHandler(logging.StreamHandler())
    logger.setLevel(logging.INFO)

    bayesianpy.jni.attach(logger)

    db_folder = bayesianpy.utils.get_path_to_parent_dir(__file__)
    iris = pd.read_csv(os.path.join(db_folder, "data/iris.csv"),
                       index_col=False)

    network = bayesianpy.network.create_network()
    num_clusters = 3
    cluster = builder.create_cluster_variable(network, num_clusters)
    node = builder.create_multivariate_continuous_node(
        network,
        iris.drop('iris_class', axis=1).columns.tolist(), "joint")
    builder.create_link(network, cluster, node)

    class_variable = builder.create_discrete_variable(
        network, iris, 'iris_class', iris['iris_class'].unique())
    builder.create_link(network, cluster, class_variable)

    train, test = train_test_split(iris, test_size=0.7)

    # train the model and query the most likely states and probability of each latent state.
    with bayesianpy.data.DataSet(iris, db_folder, logger) as dataset:
        model = bayesianpy.model.NetworkModel(network, logger)
        model.train(dataset.subset(train.index.tolist()))

        test_subset = dataset.subset(test.index.tolist())

        results = model.batch_query(
            test_subset,
            # creates columns Cluster$$Cluster0, Cluster$$Cluster1,
            # Cluster$$Cluster2, as
            # suffix is set to an empty string.
            [
                bayesianpy.model.QueryStateProbability("Cluster", suffix=""),
                # creates column 'iris_class_maxlikelihood'
                bayesianpy.model.QueryMostLikelyState("iris_class"),
                # creates column 'Cluster_maxlikelihood'
                bayesianpy.model.QueryMostLikelyState("Cluster")
            ])

    cluster_accuracy = {}
    # get a list of cluster accuracies, using the Bayes Server Confusion matrix class
    # weighted by the Cluster accuracy.
    with bayesianpy.data.DataSet(results, db_folder, logger) as resultset:
        for c in range(num_clusters):
            matrix = bayesianpy.jni.bayesServerAnalysis()\
                .ConfusionMatrix.create(resultset.create_data_reader_command(), "iris_class",
                                        "iris_class_maxlikelihood", "Cluster$$Cluster{}".format(c))
            cluster_accuracy.update(
                {'Cluster{}'.format(c): matrix.getAccuracy()})

    # generate samples from the trained model, to give us some additional testing data.
    samples = bayesianpy.model.Sampling(network).sample(num_samples=20).drop(
        ["Cluster", "iris_class"], axis=1)
    reader = bayesianpy.data.DataFrameReader(samples)
    inference = bayesianpy.model.InferenceEngine(network).create_engine()
    evidence = bayesianpy.model.Evidence(network, inference)
    query = bayesianpy.model.SingleQuery(network, inference, logger)
    query_type = [bayesianpy.model.QueryStateProbability('Cluster', suffix="")]

    # query the expected Cluster membership, and generate a wrapper for
    # comparing the values, weighted by cluster membership.
    while reader.read():
        result = query.query(query_type,
                             evidence=evidence.apply(reader.to_dict()))
        cv_results = []
        for i, (key, value) in enumerate(result.items()):
            n = bayesianpy.network.Discrete.fromstring(key)
            weighting = cluster_accuracy[n.state]
            cv_results.append(bayesianpy.jni.bayesServerAnalysis().
                              DefaultCrossValidationTestResult(
                                  jp.JDouble(weighting),
                                  jp.JObject(value, jp.java.lang.Object),
                                  jp.java.lang.Double(jp.JDouble(value))))

        score = bayesianpy.jni.bayesServerAnalysis().CrossValidation.combine(
            jp.java.util.Arrays.asList(cv_results),
            bayesianpy.jni.bayesServerAnalysis().CrossValidationCombineMethod.
            WEIGHTED_AVERAGE)

        # append the score on to the existing dataframe
        samples.set_value(reader.get_index(), 'score', score)

    variables = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width']

    cmap = plt.cm.get_cmap('Blues')
    fig = plt.figure(figsize=(10, 10))
    k = 1
    # plot!
    for i, v in enumerate(variables):
        for j in range(i + 1, len(variables)):
            v1 = variables[j]
            ax = fig.add_subplot(3, 2, k)
            ax.set_title("{} vs {}".format(v, v1))
            ax.scatter(x=iris[v].tolist(),
                       y=iris[v1].tolist(),
                       facecolors='none',
                       alpha=0.1)
            h = ax.scatter(x=samples[v].tolist(),
                           y=samples[v1].tolist(),
                           c=samples['score'].tolist(),
                           vmin=samples.score.min(),
                           vmax=samples.score.max(),
                           cmap=cmap)
            k += 1

    fig.subplots_adjust(right=0.8)
    cbar_ax = fig.add_axes([0.85, 0.15, 0.05, 0.7])
    fig.colorbar(h, cax=cbar_ax)
    plt.show()