from uk.ac.ebi.vfb.neo4j.flybase2neo.dbtools import dict_cursor, get_fb_conn
from uk.ac.ebi.vfb.neo4j.tools import neo4j_connect
import re

"""Populate pub data.  Should be run as a final step, once all content added."""

## TODO: Add microrefs - for more compact views (P1)
## TODO: Add pub types (P2)
## TODO: Add authors (P3)
## TODO: Add pub relationships (P3)

base_uri = sys.argv[1]
usr = sys.argv[2]
pwd = sys.argv[3]

nc = neo4j_connect(base_uri, usr, pwd)

# Pull all pub FBrfs from graph
statements = ['MATCH (pub) RETURN DISTINCT pub.FlyBase']
pub_list_results = nc.commit_list(statements)
pub_list = [str(x['row'][0]) for x in pub_list_results[0]['data']] # Parsing returned Json for results.

c = get_fb_conn()
cursor=c.cursor()

def gen_micro_ref_from_miniref(miniref):
    # Use regex to truncate after year, remove brackets.
    
    return

def gen_micro_ref_from_db():
import json

import sys
from uk.ac.ebi.vfb.neo4j.flybase2neo.dbtools import dict_cursor, get_fb_conn, FB2Neo, FeatureRelationship
from uk.ac.ebi.vfb.neo4j.tools import neo4j_connect
import re

# neo connection
base_uri = sys.argv[1]
usr = sys.argv[2]
pwd = sys.argv[3]

nc = neo4j_connect(base_uri, usr, pwd)

statements = []

# fb connection

c = get_fb_conn()
cursor = c.cursor()


def map_feature_type(fbid, ftype):
    mapping = {
        'transgenic_transposon': 'SO_0000796',
        'insertion_site': 'SO_0001218',
        'transposable_element_insertion_site': 'SO_0001218',
        'natural_transposon_isolate_named': 'SO_0000797',
        'chromosome_structure_variation': 'SO_1000183'
    }
    # see if there is some alternative query method, as classification is on FB site.
Arg2 = usr
Arg2 = pwd

This script relies on a uniqueness constraint being in place for OBO ids.

Created on 4 Feb 2016

@author: davidos"""

# Current version makes all edges.  Might want to limit the types of edges made to those needed for graphing purposes.

# TODO: add in check of uniqueness constraint
# Use REST calls to /db/data/schema/


nc = neo4j_connect(base_uri = sys.argv[1], usr = sys.argv[2], pwd = sys.argv[3])

def make_name_edges(typ):
    statements = ["MATCH (n)-[r:%s]->(m) RETURN n.short_form, r.label, m.short_form" % typ]
    r = nc.commit_list(statements)        
    triples = [x['row'] for x in r[0]['data']]
    statements = []
    # Iterate over, making named edges for labels (sub space for _)
    for t in triples:
        subj = t[0]
        rel = re.sub(' ', '_', t[1]) # In case any labels have spaces
        obj = t[2]
        # Merge ensures this doesn't lead to duplicated edges if already present:
        statements.append("MATCH (n:Class),(m:Class) " \
                          "WHERE n.short_form = '%s' and m.short_form = '%s' " \
                          "MERGE (n)-[r:%s { type: '%s' }]->(m)" % (subj, obj, rel, typ)) 
示例#4
0
Can these be paged or streamed?


@author: davidos
'''

# Using rest API to write (required for 2.n prod)

from neo4j.v1 import GraphDatabase, basic_auth
import sys
from uk.ac.ebi.vfb.neo4j.tools import neo4j_connect 

d = GraphDatabase.driver(url='bolt://blanik.inf.ed.ac.uk:7687',
                         auth=basic_auth(sys.argv[1], sys.argv[2]))

nc = neo4j_connect('http://blanik.inf.ed.ac.uk:7447',
                         sys.argv[1], sys.argv[2])
session = d.session()

# Assuming can handle full dump of triples!
# Assuming everything can be done with short_forms and edge types (can't yet).

class node2cypher:
    """A Class for generating Cypher statements for adding content
     from nodes returned by boldt queries.  Assumes nodes have short_form property"""
    def __init__(self, node):
        self.node = node
        self.short_form = node.properties['short_form']
        self.id = node.id
    
    def gen_node_ref_statement(self, node_key):
        try:
#(ri)-[:INSTANCEOF]->(ab:Class { label: 'adult brain'})

## NOTE: This glosses over gene product nodes - missing important information about whether gene or transgene expressed!
## Consider adding this back, but will need to settle on correct relationships to do so.

# Strategy
## Phase 1a: Add all classes transgenes/genes for which we have expression data
## Phase 1b: Link individuals to expression pattern classes
## Phase 2: Add all expression statements (relying on FBex as unique identifier)
## Phase 3: Link transgenes to expression statement nodes via expression pattern nodes & occurs in edges, adding pubs.
#### MATCH (ep:Class { label: 'Expression pattern' })<-[:InstanceOf]-(n:Individual)
#### -[:expresses]->(:Class { short_form: 'FBbi1234567' } )  RETURN  ep

c = get_fb_conn()
cursor = c.cursor()
nc = neo4j_connect(base_uri=sys.argv[1], usr=sys.argv[2], pwd=sys.argv[3])

## Phase 1: Add nodes for all transgenes/genes for which we have expression data
### Note:

cursor.execute("SELECT DISTINCT tgtyp.name as tg_type, obj2.feature_id as transgene_feature_id, obj2.name as transgene_name, " \
               "obj2.uniquename as transgene_uniquename, stype.name as gp_type,  " \
               "ex.uniquename as expession_id, subj.name as gp_name, subj.uniquename as gp_uname " \
               "    FROM feature_expression fe " \
               "    JOIN feature subj ON (fe.feature_id=subj.feature_id) " \
               "    JOIN cvterm stype ON (subj.type_id=stype.cvterm_id) " \
               "    JOIN feature_relationship fr1 ON (fe.feature_id = fr1.subject_id) " \
               "    JOIN cvterm rel1 ON (fr1.type_id = rel1.cvterm_id) " \
               "    JOIN feature_relationship fr2 ON (fr1.object_id = fr2.subject_id) " \
               "    JOIN cvterm rel2 ON (fr2.type_id = rel2.cvterm_id) " \
               "    JOIN feature obj2 ON (fr2.object_id = obj2.feature_id) " \
示例#6
0
Can these be paged or streamed?


@author: davidos
'''

# Using rest API to write (required for 2.n prod)

from neo4j.v1 import GraphDatabase, basic_auth
import sys
from uk.ac.ebi.vfb.neo4j.tools import neo4j_connect

d = GraphDatabase.driver(url='bolt://blanik.inf.ed.ac.uk:7687',
                         auth=basic_auth(sys.argv[1], sys.argv[2]))

nc = neo4j_connect('http://blanik.inf.ed.ac.uk:7447', sys.argv[1], sys.argv[2])
session = d.session()

# Assuming can handle full dump of triples!
# Assuming everything can be done with short_forms and edge types (can't yet).


class node2cypher:
    """A Class for generating Cypher statements for adding content
     from nodes returned by boldt queries.  Assumes nodes have short_form property"""
    def __init__(self, node):
        self.node = node
        self.short_form = node.properties['short_form']
        self.id = node.id

    def gen_node_ref_statement(self, node_key):