from uk.ac.ebi.vfb.neo4j.flybase2neo.dbtools import dict_cursor, get_fb_conn from uk.ac.ebi.vfb.neo4j.tools import neo4j_connect import re """Populate pub data. Should be run as a final step, once all content added.""" ## TODO: Add microrefs - for more compact views (P1) ## TODO: Add pub types (P2) ## TODO: Add authors (P3) ## TODO: Add pub relationships (P3) base_uri = sys.argv[1] usr = sys.argv[2] pwd = sys.argv[3] nc = neo4j_connect(base_uri, usr, pwd) # Pull all pub FBrfs from graph statements = ['MATCH (pub) RETURN DISTINCT pub.FlyBase'] pub_list_results = nc.commit_list(statements) pub_list = [str(x['row'][0]) for x in pub_list_results[0]['data']] # Parsing returned Json for results. c = get_fb_conn() cursor=c.cursor() def gen_micro_ref_from_miniref(miniref): # Use regex to truncate after year, remove brackets. return def gen_micro_ref_from_db():
import json import sys from uk.ac.ebi.vfb.neo4j.flybase2neo.dbtools import dict_cursor, get_fb_conn, FB2Neo, FeatureRelationship from uk.ac.ebi.vfb.neo4j.tools import neo4j_connect import re # neo connection base_uri = sys.argv[1] usr = sys.argv[2] pwd = sys.argv[3] nc = neo4j_connect(base_uri, usr, pwd) statements = [] # fb connection c = get_fb_conn() cursor = c.cursor() def map_feature_type(fbid, ftype): mapping = { 'transgenic_transposon': 'SO_0000796', 'insertion_site': 'SO_0001218', 'transposable_element_insertion_site': 'SO_0001218', 'natural_transposon_isolate_named': 'SO_0000797', 'chromosome_structure_variation': 'SO_1000183' } # see if there is some alternative query method, as classification is on FB site.
Arg2 = usr Arg2 = pwd This script relies on a uniqueness constraint being in place for OBO ids. Created on 4 Feb 2016 @author: davidos""" # Current version makes all edges. Might want to limit the types of edges made to those needed for graphing purposes. # TODO: add in check of uniqueness constraint # Use REST calls to /db/data/schema/ nc = neo4j_connect(base_uri = sys.argv[1], usr = sys.argv[2], pwd = sys.argv[3]) def make_name_edges(typ): statements = ["MATCH (n)-[r:%s]->(m) RETURN n.short_form, r.label, m.short_form" % typ] r = nc.commit_list(statements) triples = [x['row'] for x in r[0]['data']] statements = [] # Iterate over, making named edges for labels (sub space for _) for t in triples: subj = t[0] rel = re.sub(' ', '_', t[1]) # In case any labels have spaces obj = t[2] # Merge ensures this doesn't lead to duplicated edges if already present: statements.append("MATCH (n:Class),(m:Class) " \ "WHERE n.short_form = '%s' and m.short_form = '%s' " \ "MERGE (n)-[r:%s { type: '%s' }]->(m)" % (subj, obj, rel, typ))
Can these be paged or streamed? @author: davidos ''' # Using rest API to write (required for 2.n prod) from neo4j.v1 import GraphDatabase, basic_auth import sys from uk.ac.ebi.vfb.neo4j.tools import neo4j_connect d = GraphDatabase.driver(url='bolt://blanik.inf.ed.ac.uk:7687', auth=basic_auth(sys.argv[1], sys.argv[2])) nc = neo4j_connect('http://blanik.inf.ed.ac.uk:7447', sys.argv[1], sys.argv[2]) session = d.session() # Assuming can handle full dump of triples! # Assuming everything can be done with short_forms and edge types (can't yet). class node2cypher: """A Class for generating Cypher statements for adding content from nodes returned by boldt queries. Assumes nodes have short_form property""" def __init__(self, node): self.node = node self.short_form = node.properties['short_form'] self.id = node.id def gen_node_ref_statement(self, node_key): try:
#(ri)-[:INSTANCEOF]->(ab:Class { label: 'adult brain'}) ## NOTE: This glosses over gene product nodes - missing important information about whether gene or transgene expressed! ## Consider adding this back, but will need to settle on correct relationships to do so. # Strategy ## Phase 1a: Add all classes transgenes/genes for which we have expression data ## Phase 1b: Link individuals to expression pattern classes ## Phase 2: Add all expression statements (relying on FBex as unique identifier) ## Phase 3: Link transgenes to expression statement nodes via expression pattern nodes & occurs in edges, adding pubs. #### MATCH (ep:Class { label: 'Expression pattern' })<-[:InstanceOf]-(n:Individual) #### -[:expresses]->(:Class { short_form: 'FBbi1234567' } ) RETURN ep c = get_fb_conn() cursor = c.cursor() nc = neo4j_connect(base_uri=sys.argv[1], usr=sys.argv[2], pwd=sys.argv[3]) ## Phase 1: Add nodes for all transgenes/genes for which we have expression data ### Note: cursor.execute("SELECT DISTINCT tgtyp.name as tg_type, obj2.feature_id as transgene_feature_id, obj2.name as transgene_name, " \ "obj2.uniquename as transgene_uniquename, stype.name as gp_type, " \ "ex.uniquename as expession_id, subj.name as gp_name, subj.uniquename as gp_uname " \ " FROM feature_expression fe " \ " JOIN feature subj ON (fe.feature_id=subj.feature_id) " \ " JOIN cvterm stype ON (subj.type_id=stype.cvterm_id) " \ " JOIN feature_relationship fr1 ON (fe.feature_id = fr1.subject_id) " \ " JOIN cvterm rel1 ON (fr1.type_id = rel1.cvterm_id) " \ " JOIN feature_relationship fr2 ON (fr1.object_id = fr2.subject_id) " \ " JOIN cvterm rel2 ON (fr2.type_id = rel2.cvterm_id) " \ " JOIN feature obj2 ON (fr2.object_id = obj2.feature_id) " \
Can these be paged or streamed? @author: davidos ''' # Using rest API to write (required for 2.n prod) from neo4j.v1 import GraphDatabase, basic_auth import sys from uk.ac.ebi.vfb.neo4j.tools import neo4j_connect d = GraphDatabase.driver(url='bolt://blanik.inf.ed.ac.uk:7687', auth=basic_auth(sys.argv[1], sys.argv[2])) nc = neo4j_connect('http://blanik.inf.ed.ac.uk:7447', sys.argv[1], sys.argv[2]) session = d.session() # Assuming can handle full dump of triples! # Assuming everything can be done with short_forms and edge types (can't yet). class node2cypher: """A Class for generating Cypher statements for adding content from nodes returned by boldt queries. Assumes nodes have short_form property""" def __init__(self, node): self.node = node self.short_form = node.properties['short_form'] self.id = node.id def gen_node_ref_statement(self, node_key):