Пример #1
0
    def create(self, network_factory: bayesianpy.network.NetworkFactory):
        network = self._template.create(network_factory)
        for i, var in enumerate(self._discretised_variables.columns.tolist()):
            node = builder.get_node(network, str(var))
            if node is not None:

                links_from = [
                    link.getFrom() for link in node.getLinks()
                    if link.getFrom().getName() != var
                ]
                links_to = [
                    link.getTo() for link in node.getLinks()
                    if link.getTo().getName() != var
                ]

                network.getNodes().remove(node)

        for node in builder.create_discretised_variables(
                network,
                self._continuous,
                self._discretised_variables.columns.tolist(),
                bin_count=self._default_bin_count,
                mode=self._mode,
                zero_crossing=self._zero_crossing,
                defined_bins=self._bins):

            if node is not None:
                for l in links_from:
                    builder.create_link(network, l, node)

                for l in links_to:
                    builder.create_link(network, node, l)

        return network
Пример #2
0
    def create(self, network_factory):
        network = network_factory.create()

        if not dk.empty(self._continuous):
            for c_name in self._continuous.columns:
                c = builder.create_continuous_variable(network, c_name)

        if dk.empty(self._discrete):
            for d_name in self._discrete.columns:
                if d_name in self._discrete_states:
                    states = self._discrete_states[d_name]
                else:
                    states = dk.compute(
                        self._discrete[d_name].dropna().unique()).tolist()

                try:
                    c = builder.create_discrete_variable(
                        network, self._discrete, d_name, states)
                except BaseException as e:
                    self._logger.warn(e)

        parent_node = builder.try_get_node(network, self._parent_node)
        if parent_node is None:
            raise ValueError("Parent node: {} not recognised".format(
                self._parent_node))

        for node in network.getNodes():
            if node == parent_node:
                continue
            builder.create_link(network, parent_node, node)

        return network
Пример #3
0
    def create(self, network_factory: bayesianpy.network.NetworkFactory):
        network = self._template.create(network_factory)
        for node in network.getNodes():
            if bayesianpy.network.is_variable_continuous(node.getVariables().get(0)):
                n = builder.create_discretised_variable(
                            network, self._template.get_network_factory().get_data(), node.getName() + "_0Node",
                            bins=[(jp.java.lang.Double.NEGATIVE_INFINITY, 0.5, "closed", "open"),
                                  (0.5, jp.java.lang.Double.POSITIVE_INFINITY, "closed", "closed")])

                builder.create_link(network, n, node)
        return network
Пример #4
0
    def create(self, network_factory: bayesianpy.network.NetworkFactory):
        network = self._template.create(network_factory)
        for node in self._fully_connected_nodes:
            for child in network.getNodes():
                if bayesianpy.network.is_cluster_variable(child):
                    continue

                if child.getName() == node:
                    continue

                try:
                    builder.create_link(network, node, child)
                except ValueError:
                    continue

        return network
Пример #5
0
    def learn(self, network):

        if len(self._links) > 0 and self._use_same_model:
            return self._links

        data_reader_command = self._dataset.create_data_reader_command(
        ).create(None)

        reader_options = self._dataset.get_reader_options().create()
        network.getLinks().clear()

        variable_references = list(
            bayesianpy.network.create_variable_references(
                network, self._dataset.get_dataframe()))

        evidence_reader_command = bayesServer(
        ).data.DefaultEvidenceReaderCommand(
            data_reader_command,
            jp.java.util.Arrays.asList(variable_references), reader_options)

        if self._engine == 'PC':
            options = bayesServerStructure().PCStructuralLearningOptions()
            options.setMaximumConditional(2)
            self._logger.info("Learning structure from {} variables.".format(
                len(variable_references)))
            output = bayesServerStructure().PCStructuralLearning().learn(
                evidence_reader_command,
                jp.java.util.Arrays.asList(network.getNodes().toArray()),
                options)
        elif self._engine == 'TAN':
            options = bayesServerStructure().TANStructuralLearningOptions()
            options.setTarget(
                bayesianpy.network.get_node(network, self._root_node))
            self._logger.info("Learning structure from {} variables.".format(
                len(variable_references)))
            output = bayesServerStructure().TANStructuralLearning().learn(
                evidence_reader_command,
                jp.java.util.Arrays.asList(network.getNodes().toArray()),
                options)
        elif self._engine == 'Hierarchical':

            from sklearn.model_selection import KFold

            class DefaultEvidenceReaderCommandFactory:
                def __init__(self, ds: bayesianpy.data.DataSet, options):
                    #self._data_reader_command = cmd
                    self._ds = ds
                    #self._variable_references = refs
                    self._reader_options = options
                    self._kfold = None
                    self._partitions = {}

                def create(self, network):
                    variable_references = list(
                        bayesianpy.network.create_variable_references(
                            network, self._ds.get_dataframe()))

                    return bayesServer().data.DefaultEvidenceReaderCommand(
                        self._ds.create_data_reader_command(),
                        jp.java.util.Arrays.asList(variable_references),
                        self._reader_options)

                def createPartitioned(self, network, dataPartitioning,
                                      partitionCount):

                    if self._kfold is None:
                        self._kfold = KFold(n_splits=partitionCount,
                                            shuffle=False)

                    variable_references = list(
                        bayesianpy.network.create_variable_references(
                            network, self._ds.get_dataframe()))

                    if dataPartitioning.getPartitionNumber(
                    ) in self._partitions:
                        train, test = self._partitions[
                            dataPartitioning.getPartitionNumber()]
                    else:
                        train, test = next(self._kfold.split(self._ds.data))
                        #train = train.index.tolist()
                        #test = test.index.tolist()
                        self._partitions.update({
                            dataPartitioning.getPartitionNumber():
                            (train, test)
                        })

                    if dataPartitioning.getMethod() == bayesServer(
                    ).data.DataPartitionMethod.EXCLUDE_PARTITION_DATA:
                        print("Excluding")
                        subset = self._ds.subset(train)
                    else:
                        print("Including")
                        subset = self._ds.subset(test)

                    cmd = subset.create_data_reader_command()
                    return bayesServer().data.DefaultEvidenceReaderCommand(
                        cmd, jp.java.util.Arrays.asList(variable_references),
                        self._reader_options)

            ercf = DefaultEvidenceReaderCommandFactory(self._dataset,
                                                       reader_options)
            proxy = jp.JProxy(
                "com.bayesserver.data.EvidenceReaderCommandFactory", inst=ercf)

            options = bayesServerStructure(
            ).HierarchicalStructuralLearningOptions()
            self._logger.info("Learning structure from {} variables.".format(
                len(variable_references)))
            output = bayesServerStructure().HierarchicalStructuralLearning(
            ).learn(proxy,
                    jp.java.util.Arrays.asList(network.getNodes().toArray()),
                    options)

        self._logger.info("Created {} links.".format(
            len(output.getLinkOutputs())))

        for link in output.getLinkOutputs():
            self._links.append((link.getLink().getFrom().getName(),
                                link.getLink().getTo().getName()))

        return self._links