def _tax_properties(tx): """ Returns a dictionary of taxon / sample properties, to be included as taxon metadata. :param tx: Neo4j transaction :return: Dictionary of dictionary of taxon properties """ nodes = tx.run("MATCH (n)--(m:Property) WHERE n:Taxon OR n:Agglom_Taxon RETURN m").data() nodes = _get_unique(nodes, 'm') properties = dict() for node in nodes: property = tx.run("MATCH (m:Property) RETURN m").data() property_key = property[0]['m']['type'] properties[property_key] = dict() hits = tx.run("MATCH (b)--(n {name: '" + node + "'}) WHERE b:Taxon OR b:Agglom_Taxon RETURN b").data() if hits: for hit in hits: properties[property_key][hit['b'].get('name')] = property[0]['m']['name'] for taxon in properties[property_key]: if len(properties[property_key][taxon]) == 1: # tries exporting property as float instead of list try: properties[property_key][taxon] = np.round(float(properties[property_key][property]), 4) except ValueError: pass return properties
def _get_list(tx, label): """ Returns a list of nodes with the specified label. :param tx: Neo4j transaction :param label: Neo4j database label of nodes :return: List of nodes with specified label. """ results = tx.run(("MATCH (n:" + label + ") RETURN n")).data() results = _get_unique(results, key="n") return results
def _return_networks(self, networks): """ Returns NetworkX networks from the Neo4j database. :param networks: Names of networks to return. :return: Dictionary of networks """ results = dict() with self._driver.session() as session: tax_dict = session.read_transaction(self._tax_dict) with self._driver.session() as session: tax_properties = session.read_transaction(self._tax_properties) for item in tax_properties: for taxon in tax_properties[item]: tax_properties[item][taxon] = str(tax_properties[item][taxon]) if not networks: with self._driver.session() as session: networks = session.read_transaction(self._query, "MATCH (n:Network) RETURN n") networks.extend(session.read_transaction(self._query, "MATCH (n:Set) RETURN n")) networks = list(_get_unique(networks, key='n')) # create 1 network per database for network in networks: g = nx.MultiGraph() with self._driver.session() as session: edge_list = session.read_transaction(self._association_list, network) for edge in edge_list[0]: index_1 = edge[0] index_2 = edge[1] all_weights = [] try: all_weights = re.findall("[-+]?[.]?[\d]+(?:,\d\d\d)*[\.]?\d*(?:[eE][-+]?\d+)?", edge_list[1][edge][0]) except TypeError: if type(all_weights) == list: all_weights = edge_list[1][edge][0] else: all_weights = [] all_weights = [float(x) for x in all_weights] weight = float(np.mean(all_weights)) g.add_edge(index_1, index_2, source=str(edge_list[0][edge]), weight=weight, all_weights=str(all_weights)) # necessary for networkx indexing for item in tax_dict: nx.set_node_attributes(g, tax_dict[item], item) for item in tax_properties: nx.set_node_attributes(g, tax_properties[item], item) g = g.to_undirected() results[network] = g return results
def _association_list(tx, network): """ Returns a list of associations, as taxon1, taxon2, and, if present, weight. :param tx: Neo4j transaction :param network: Name of network node :return: List of lists with source and target nodes, source networks and edge weights. """ associations = tx.run(("MATCH (n:Association)--(b {name: '" + network + "'}) RETURN n")).data() networks = dict() weights = dict() for assoc in associations: taxa = tx.run(("MATCH (m)--(:Association {name: '" + assoc['n'].get('name') + "'})--(n) " "WHERE (m:Taxon OR m:Agglom_Taxon) AND (n:Taxon OR n:Agglom_Taxon) " "AND m.name <> n.name " "RETURN m, n LIMIT 1")).data() if len(taxa) == 0: pass # apparently this can happen. Need to figure out why!! else: edge = (taxa[0]['m'].get('name'), taxa[0]['n'].get('name')) network = tx.run(("MATCH (:Association {name: '" + assoc['n'].get('name') + "'})-->(n:Network) RETURN n")) network = _get_unique(network, key='n') network_list = list() for item in network: network_list.append(item) weight = [assoc['n'].get('weight')] # it is possible for sets to contain associations with different weights if edge in networks.keys(): network_list.extend(networks[edge]) networks[edge] = set(network_list) weight.extend(weights[edge]) weights[edge] = weight elif (edge[1], edge[0]) in networks.keys(): network_list.extend(networks[(edge[1], edge[0])]) networks[(edge[1], edge[0])] = set(network_list) weight.extend(weights[(edge[1], edge[0])]) weights[(edge[1], edge[0])] = weight else: networks[edge] = network_list weights[edge] = weight edge_list = (networks, weights) return edge_list
def _find_nodes(tx, names): """ Returns True if all nodes in the 'names' list are found in the database. :param tx: Neo4j transaction :param names: List of names of nodes :return: """ for name in names: netname = tx.run("MATCH (n {name: '" + name + "'}) RETURN n").data() netname = _get_unique(netname, key='n') # only checking node name; should be unique in database! found = True if len(netname) == 0: found = False elif len(netname) > 1: logger.warning("Duplicated node name in database! \n") return found
def _agglom_list(tx): """ Returns a list of relationships between agglomerated taxa and taxa. :param tx: Neo4j transaction :return: List of nodes labeled Agglom_Taxon """ agglom_nodes = tx.run("MATCH (n:Agglom_Taxon) RETURN n") agglom_nodes = _get_unique(agglom_nodes, 'n') edge_list = list() for node in agglom_nodes: taxa = tx.run("MATCH (:Agglom_Taxon {name: '" + node + "'})--(n:Taxon) RETURN n") for taxon in taxa: sublist = list() sublist.append(node) sublist.append(taxon['n'].get('name')) edge_list.append(sublist) return edge_list
def _sample_list(tx): """ Returns a list of sample occurrences, as taxon, sample and count. :param tx: Neo4j transaction :return: List of samples """ tax_nodes = tx.run("MATCH (n)--(:Association) WHERE n:Taxon RETURN n") tax_nodes = _get_unique(tax_nodes, 'n') edge_list = list() for node in tax_nodes: samples = tx.run(("MATCH (:Taxon {name: '" + node + "'})-[r:Sample]-(n:Property) RETURN n, r")) for sample in samples: sublist = list() sublist.append(node) sublist.append(sample['n'].get('name')) sublist.append(sample['n'].get('type')) sublist.append(str(type(sample['r']))) sublist.append(sample['r'].get('correlation')) edge_list.append(sublist) return edge_list
def _tax_dict(tx): """ Returns a dictionary of taxonomic values for each node. :param tx: Neo4j transaction :return: Dictionary of taxonomy separated by taxon """ taxa = tx.run("MATCH (n)--(:Association) WHERE n:Taxon OR n:Agglom_Taxon RETURN n").data() taxa = _get_unique(taxa, 'n') tax_dict = dict() tax_levels = ['Kingdom', 'Phylum', 'Class', 'Order', 'Family', 'Genus', 'Species'] for item in tax_levels: tax_dict[item] = dict() for item in taxa: for level in tax_levels: tax = None level_name = tx.run("MATCH (b {name: '" + item + "'})--(n:"+ level + ") RETURN n").data() if len(level_name) != 0: tax = level_name[0]['n'].get('name') if tax: tax_dict[level][item] = tax return tax_dict