def go_annotations(dburl=config.get("databases.go_url",None), genus="Saccharomyces", species="cerevisiae", include_ancestors=False): """Accesses GO annotations in a MySQL database. Database data can be obtained from the geneontology website. example url: "mysql://username:password@hostname:port/go """ go = open_go(dburl) g = go.species |Match(_.id, _.species_id)| go.gene_product g = g |Match(_.gene_product.id, _.gene_product_id)| go.association g = g |Match(_.association.id, _.association_id)| go.evidence g = g |Match(_.gene_product.dbxref_id, _.id)| go.dbxref if include_ancestors: g = g |Match(_.association.term_id, _.term2_id)| go.graph_path g = g |Match(_.term1_id, _.id)| go.term//"annot" g = g |Match(_.relationship_type_id, _.id)| go.term//"rel" g = g[_.rel.name == "is_a"] else: g = g |Match(_.association.term_id, _.id)| go.term//"annot" g = g[(_.genus==genus) & (_.species == species)][_.is_not == False][_.evidence.code != "ND"] g = g.ReplaceMissing() g = g.Get(_.symbol /"gene_symbol", _.xref_key / "gene_id", _.annot.acc/"go_id", _.annot.name/"annotation", _.annot.term_type/"go_type", _.evidence.code/"evidence")%"annotations" return g.Copy()
def string_interaction_types(dburl=config.get('databases.string_url',None), species="Saccharomyces cerevisiae", external_names=False): """Given a Postgres db with String data, specified in dburl, and a species, returns all interactions and their score. The database data can be obtained from String. example url: "postgres://*****:*****@hostname:port/string_dbname" Use ``connect`` to access the whole database:: #Get available species names: >>> connect(dburl).items.species.offical_name """ z = string(dburl) inter = z.network.actions inter = inter |Match(_.item_id_a, _.protein_id)| z.items.proteins//"left" inter = inter |Match(_.item_id_b, _.protein_id)| z.items.proteins//"right" inter = inter |Match(_.left.species_id, _.species_id)| z.items.species inter = inter[_.official_name == species] if external_names: names = inter.Get(_.left.protein_external_id/"left", _.right.protein_external_id/"right").Each(lambda x : x.split('.')[1],dtype="bytes") else: names = inter.Get(_.left.preferred_name/"left", _.right.preferred_name/"right") return inter.Get(names, _.mode, _.action, _.a_is_acting, _.score) % "interactions"
def string_interactions(dburl=config.get('databases.string_url',None), species="Saccharomyces cerevisiae", subscores=False, external_names=False): """Given a Postgres db with String data, specified in dburl, and a species, returns all interactions and their score. The database data can be obtained from String. example url: "postgres://*****:*****@hostname:port/string_dbname" Use ``connect`` to access the whole database:: #Get available species names: >>> connect(dburl).items.species.offical_name """ z = string(dburl) inter = z.items.species |Match| z.network.protein_protein_links inter = inter[_.official_name == species] inter = inter |Match(_.protein_id_a, _.protein_id)| z.items.proteins//"left" inter = inter |Match(_.protein_id_b, _.protein_id)| z.items.proteins//"right" if external_names: names = inter.Get(_.left.protein_external_id/"left", _.right.protein_external_id/"right").Each(lambda x : x.split('.')[1],dtype="bytes") else: names = inter.Get(_.left.preferred_name/"left", _.right.preferred_name/"right") if(subscores): return inter.Get(names, _.equiv_nscore/"neighborhood_score", _.equiv_nscore_transferred/"neighborhood_score_transferred", _.equiv_fscore/"fusion_score", _.equiv_pscore/"phylo_cooccurence_score", _.equiv_hscore/"homology_score", _.array_score/"coexpression_score", _.array_score_transferred/"coexpression_score_transferred", _.experimental_score/"experimental_score", _.experimental_score_transferred/"experimental_score_transferred", _.database_score/"curated_score", _.database_score_transferred/"curated_score_transferred", _.textmining_score/"textmining_score", _.textmining_score_transferred/"textmining_score_transferred", _.combined_score) % "interactions" else: return inter.Get(names, _.combined_score) % "interactions"
def go_info(dburl=config.get("databases.go_url",None), genus="Saccharomyces", species="cerevisiae", include_ancestors=False): """Accesses GO term info in a MySQL database. Database data can be obtained from the geneontology website. example url: "mysql://username:password@hostname:port/go """ go = open_go(dburl) #select annotations from a certain species, and their ancestor annotations g = go.species g = g |Match(_.id, _.species_id)| go.gene_product g = g |Match(_.gene_product.id, _.gene_product_id)| go.association g = g |Match(_.association.term_id, _.term2_id)| go.graph_path g = g[(_.genus==genus) & (_.species == species)] #link them to terms to get actual go accession id g = g |Match(_.term2_id, _.id)| go.term//"child" g = g |Match(_.term1_id, _.id)| go.term//"parent" g = g |Match(_.relationship_type_id, _.id)| go.term//"rel" #get relevant fields g = g.ReplaceMissing() if include_ancestors: g = g.Get(_.child.acc/"go_id", _.parent.acc/"parent_id", _.gene_product.id/"gene_id").Copy() else: g = g.Get(_.child.acc/"go_id", _.parent.acc/"parent_id", _.child.term_type/"go_type", _.child.name/"annotation", _.distance, _.rel.name/"relationship", _.gene_product.id/"gene_id").Copy() #step A: calculate number of genes associated with go terms ngenes = g.GroupBy(_.parent_id).Get(_.parent_id/"go_id",_.gene_id.Unique().Count()/"ngenes") if include_ancestors: g2 = go.graph_path g2 = g2 |Match(_.term2_id, _.id)| go.term//"child" g2 = g2 |Match(_.term1_id, _.id)| go.term//"parent" g2 = g2 |Match(_.relationship_type_id, _.id)| go.term//"rel" g2 = g2.Get(_.child.acc/"go_id", _.parent.acc/"parent_id", _.child.term_type/"go_type", _.child.name/"annotation", _.distance, _.rel.name/"relationship").Copy() #step B1: group per term g2 = g2[_.go_id |In| ngenes.go_id] goinfo = g2.GroupBy(_.go_id, flat=(_.go_type, _.annotation)) #step B2: for ech term, determine type, max depth to root, ancestors (non-unique) and relationship to ancestors goinfo = goinfo.Get(_.go_id, _.go_type, _.annotation, _[_.parent_id == "all"].distance.Max()/"depth", _.parent_id / "ancestor", _.relationship, _.distance) #step B3: select for each ancestor the minimum path length goinfo = goinfo.GroupBy(_.ancestor)[..., _.distance.Argmin()].Copy() else: #step B1: group per term goinfo = g.GroupBy(_.go_id, flat=(_.go_type, _.annotation)) #step B2: for ech term, determine type, max depth to root, ancestors (non-unique) and relationship to ancestors goinfo = goinfo.Get(_.go_id, _.go_type, _.annotation, _[_.parent_id == "all"].distance.Max()/"depth", _.parent_id / "ancestor", _.relationship, _.distance) #step B3: select for each ancestor the minimum path length goinfo = goinfo.GroupBy(_.ancestor)[..., _.distance.Argmin()].Copy() #step C: combine, copy, return return (goinfo |Match| ngenes).Copy()
def open_go(dburl=config.get("databases.go_url",None)): go = Connect(dburl) return go
def string(dburl=config.get('databases.string_url',None)): return Connect(dburl)
def run(cls, query, run_manager, portnumber=config.get('debug.cytoscape_port_number',9000)): self = cls() self.rand = random.randint(0,10000000) self.graph = run_manager.pass_results[create_graph.CreateGraph] self.graph.pruneGraph() self.server = xmlrpclib.ServerProxy("http://localhost:" + str(portnumber)).Cytoscape self.network = self.server.createNetwork("network" + str(networkid())) self.unique_names = defaultdict(int) self.names = dict() self.node_name = dict() self.node_class = dict() self.node_type = dict() self.node_rep = dict() self.edge_from = [] self.edge_to = [] self.edge_type = [] self.edge_attr = [] for node in self.graph.nodes: self.node(node) self.server.createNodes(self.network, self.names.values()) for source,edges in self.graph.edge_source.iteritems(): for edge in edges: assert edge.source is source, "Source in edge and index not equal" self.edgeKey(edge.__class__.__name__,edge) self.edgeids = self.server.createEdges(self.network,self.edge_from, self.edge_to, self.edge_type,[True] * len(self.edge_type),False) self.server.addNodeAttributes("name","STRING",self.node_name,False) self.server.addNodeAttributes("type","STRING",self.node_type,False) self.server.addNodeAttributes("class","STRING",self.node_class,False) self.server.addNodeAttributes("rep","STRING",self.node_rep,False) self.server.addEdgeAttributes("type","STRING",dict(zip(self.edgeids,self.edge_type))) self.server.addEdgeAttributes("attr","STRING",dict(zip(self.edgeids,self.edge_attr))) for attribute,attribute_dict in self.graph.node_attributes.iteritems(): attribute_name_dict = {} if(isinstance(attribute_dict.values()[0], float)): xtype = "FLOATING" cls = float elif(isinstance(attribute_dict.values()[0], int)): xtype = "INTEGER" cls = int else: xtype = "STRING" cls = str for node, node_name in self.names.iteritems(): try: r = attribute_dict.get(node,"") if(isinstance(r, representor.Representor)): r = str(r.__class__.__name__) else: r = cls(r) if isinstance(r,str) and len(r) > 500: r = r[:500] + ' ...' attribute_name_dict[node_name] = r except: pass self.server.addNodeAttributes(attribute,xtype,attribute_name_dict,False) if(attribute == 'links'): import matplotlib.cm cm = discrete_color_map(attribute_name_dict.values(), matplotlib.cm.gist_rainbow) self.server.createDiscreteMapper('default','links', 'Node Color','#444444',cm) if(attribute == "time"): self.server.createContinuousMapper('default','time', 'Node Size',[0.0, max(attribute_dict.values())],[20.0, 20.0, 100.0, 100.0]) self.server.setNodeLabel(self.network, "name", "","default") self.server.setDiscreteNodeShapeMapper(self.network, 'default', 'type', 'diamond', {'else':'ellipse', 'unaryop':'octagon', 'rep':'round_rect'}, True) self.server.setEdgeTargetArrowRule(self.network,"type","Arrow",["ParamListEdge","ParamChoiceListEdge"],["T","T"]) self.server.setEdgeLineStyleRule(self.network,"type","SOLID",["ParamChoiceEdge","ParamChoiceListEdge",'SQLResultEdge'],["DOT","DOT","SINEWAVE"]) self.server.performLayout(self.network, "hierarchical")