Пример #1
0
    def _tax_properties(tx):
        """
        Returns a dictionary of taxon / sample properties, to be included as taxon metadata.

        :param tx: Neo4j transaction
        :return: Dictionary of dictionary of taxon properties
        """
        nodes = tx.run("MATCH (n)--(m:Property) WHERE n:Taxon OR n:Agglom_Taxon RETURN m").data()
        nodes = _get_unique(nodes, 'm')
        properties = dict()
        for node in nodes:
            property = tx.run("MATCH (m:Property) RETURN m").data()
            property_key = property[0]['m']['type']
            properties[property_key] = dict()
            hits = tx.run("MATCH (b)--(n {name: '" + node +
                          "'}) WHERE b:Taxon OR b:Agglom_Taxon RETURN b").data()
            if hits:
                for hit in hits:
                    properties[property_key][hit['b'].get('name')] = property[0]['m']['name']
            for taxon in properties[property_key]:
                if len(properties[property_key][taxon]) == 1:
                    # tries exporting property as float instead of list
                    try:
                        properties[property_key][taxon] = np.round(float(properties[property_key][property]), 4)
                    except ValueError:
                        pass
        return properties
Пример #2
0
    def _get_list(tx, label):
        """
        Returns a list of nodes with the specified label.

        :param tx: Neo4j transaction
        :param label: Neo4j database label of nodes
        :return: List of nodes with specified label.
        """
        results = tx.run(("MATCH (n:" + label + ") RETURN n")).data()
        results = _get_unique(results, key="n")
        return results
Пример #3
0
    def _return_networks(self, networks):
        """
        Returns NetworkX networks from the Neo4j database.

        :param networks: Names of networks to return.
        :return: Dictionary of networks
        """
        results = dict()
        with self._driver.session() as session:
            tax_dict = session.read_transaction(self._tax_dict)
        with self._driver.session() as session:
            tax_properties = session.read_transaction(self._tax_properties)
        for item in tax_properties:
            for taxon in tax_properties[item]:
                tax_properties[item][taxon] = str(tax_properties[item][taxon])
        if not networks:
            with self._driver.session() as session:
                networks = session.read_transaction(self._query,
                                                    "MATCH (n:Network) RETURN n")
                networks.extend(session.read_transaction(self._query,
                                                         "MATCH (n:Set) RETURN n"))
            networks = list(_get_unique(networks, key='n'))
        # create 1 network per database
        for network in networks:
            g = nx.MultiGraph()
            with self._driver.session() as session:
                edge_list = session.read_transaction(self._association_list, network)
            for edge in edge_list[0]:
                index_1 = edge[0]
                index_2 = edge[1]
                all_weights = []
                try:
                    all_weights = re.findall("[-+]?[.]?[\d]+(?:,\d\d\d)*[\.]?\d*(?:[eE][-+]?\d+)?",
                                             edge_list[1][edge][0])
                except TypeError:
                    if type(all_weights) == list:
                        all_weights = edge_list[1][edge][0]
                    else:
                        all_weights = []
                all_weights = [float(x) for x in all_weights]
                weight = float(np.mean(all_weights))
                g.add_edge(index_1, index_2, source=str(edge_list[0][edge]),
                           weight=weight, all_weights=str(all_weights))
            # necessary for networkx indexing
            for item in tax_dict:
                nx.set_node_attributes(g, tax_dict[item], item)
            for item in tax_properties:
                nx.set_node_attributes(g, tax_properties[item], item)
            g = g.to_undirected()
            results[network] = g
        return results
Пример #4
0
    def _association_list(tx, network):
        """
        Returns a list of associations, as taxon1, taxon2, and, if present, weight.

        :param tx: Neo4j transaction
        :param network: Name of network node
        :return: List of lists with source and target nodes, source networks and edge weights.
        """
        associations = tx.run(("MATCH (n:Association)--(b {name: '" + network +
                               "'}) RETURN n")).data()
        networks = dict()
        weights = dict()
        for assoc in associations:
            taxa = tx.run(("MATCH (m)--(:Association {name: '" + assoc['n'].get('name') +
                           "'})--(n) "
                           "WHERE (m:Taxon OR m:Agglom_Taxon) AND (n:Taxon OR n:Agglom_Taxon) "
                           "AND m.name <> n.name "
                           "RETURN m, n LIMIT 1")).data()
            if len(taxa) == 0:
                pass  # apparently this can happen. Need to figure out why!!
            else:
                edge = (taxa[0]['m'].get('name'), taxa[0]['n'].get('name'))
                network = tx.run(("MATCH (:Association {name: '" + assoc['n'].get('name') +
                                  "'})-->(n:Network) RETURN n"))
                network = _get_unique(network, key='n')
                network_list = list()
                for item in network:
                    network_list.append(item)
                weight = [assoc['n'].get('weight')]
                # it is possible for sets to contain associations with different weights
                if edge in networks.keys():
                    network_list.extend(networks[edge])
                    networks[edge] = set(network_list)
                    weight.extend(weights[edge])
                    weights[edge] = weight
                elif (edge[1], edge[0]) in networks.keys():
                    network_list.extend(networks[(edge[1], edge[0])])
                    networks[(edge[1], edge[0])] = set(network_list)
                    weight.extend(weights[(edge[1], edge[0])])
                    weights[(edge[1], edge[0])] = weight
                else:
                    networks[edge] = network_list
                    weights[edge] = weight
        edge_list = (networks, weights)
        return edge_list
Пример #5
0
    def _find_nodes(tx, names):
        """
        Returns True if all nodes in the 'names' list are found in the database.

        :param tx: Neo4j transaction
        :param names: List of names of nodes
        :return:
        """
        for name in names:
            netname = tx.run("MATCH (n {name: '" + name +
                             "'}) RETURN n").data()
            netname = _get_unique(netname, key='n')
            # only checking node name; should be unique in database!
            found = True
            if len(netname) == 0:
                found = False
            elif len(netname) > 1:
                logger.warning("Duplicated node name in database! \n")
        return found
Пример #6
0
    def _agglom_list(tx):
        """
        Returns a list of relationships between agglomerated taxa and taxa.

        :param tx: Neo4j transaction
        :return: List of nodes labeled Agglom_Taxon
        """
        agglom_nodes = tx.run("MATCH (n:Agglom_Taxon) RETURN n")
        agglom_nodes = _get_unique(agglom_nodes, 'n')
        edge_list = list()
        for node in agglom_nodes:
            taxa = tx.run("MATCH (:Agglom_Taxon {name: '" + node +
                          "'})--(n:Taxon) RETURN n")
            for taxon in taxa:
                sublist = list()
                sublist.append(node)
                sublist.append(taxon['n'].get('name'))
                edge_list.append(sublist)
        return edge_list
Пример #7
0
    def _sample_list(tx):
        """
        Returns a list of sample occurrences, as taxon, sample and count.

        :param tx: Neo4j transaction
        :return: List of samples
        """
        tax_nodes = tx.run("MATCH (n)--(:Association) WHERE n:Taxon RETURN n")
        tax_nodes = _get_unique(tax_nodes, 'n')
        edge_list = list()
        for node in tax_nodes:
            samples = tx.run(("MATCH (:Taxon {name: '" + node +
                              "'})-[r:Sample]-(n:Property) RETURN n, r"))
            for sample in samples:
                sublist = list()
                sublist.append(node)
                sublist.append(sample['n'].get('name'))
                sublist.append(sample['n'].get('type'))
                sublist.append(str(type(sample['r'])))
                sublist.append(sample['r'].get('correlation'))
                edge_list.append(sublist)
        return edge_list
Пример #8
0
    def _tax_dict(tx):
        """
        Returns a dictionary of taxonomic values for each node.

        :param tx: Neo4j transaction
        :return: Dictionary of taxonomy separated by taxon
        """
        taxa = tx.run("MATCH (n)--(:Association) WHERE n:Taxon OR n:Agglom_Taxon RETURN n").data()
        taxa = _get_unique(taxa, 'n')
        tax_dict = dict()
        tax_levels = ['Kingdom', 'Phylum', 'Class', 'Order', 'Family', 'Genus', 'Species']
        for item in tax_levels:
            tax_dict[item] = dict()
        for item in taxa:
            for level in tax_levels:
                tax = None
                level_name = tx.run("MATCH (b {name: '" + item +
                                 "'})--(n:"+ level + ") RETURN n").data()
                if len(level_name) != 0:
                    tax = level_name[0]['n'].get('name')
                if tax:
                    tax_dict[level][item] = tax
        return tax_dict