def _parse_relation(element): new_relation = Relation() new_relation.entry1 = int(element.attrib['entry1']) new_relation.entry2 = int(element.attrib['entry2']) new_relation.type = element.attrib['type'] for subtype in element.getchildren(): name, value = subtype.attrib['name'], subtype.attrib['value'] if name in ('compound', 'hidden compound'): new_relation.subtypes.append((name, int(value))) else: new_relation.subtypes.append((name, value)) self.pathway.add_relation(new_relation)
def _parse_relation(element): new_relation = Relation() new_relation.entry1 = int(element.attrib["entry1"]) new_relation.entry2 = int(element.attrib["entry2"]) new_relation.type = element.attrib["type"] for subtype in element: name, value = subtype.attrib["name"], subtype.attrib["value"] if name in ("compound", "hidden compound"): new_relation.subtypes.append((name, int(value))) else: new_relation.subtypes.append((name, value)) self.pathway.add_relation(new_relation)
def prune_kgml(pathway): """ prune_KGML prune given kgml pathway This function prunes the given entries and relations by deleting empty entries and corresponding relations, and empty relations and corresponding entries. Further pruning is done for our case. We delete inner pathways, complex components, PCrel (by inserting edge among nodes it connects), maplink, orthogonals Inputs: pathway: entries: index map of entries relations: list of relations Outputs: entries: pruned entries relations: pruned relations new_relations: new relations added in place of removed compounds """ # copy relations as list and entries as dict entries = dict([(e.id, e) for e in pathway.entries.values()]) relations = [r for r in pathway.relations] new_relations = [] # replace groups with their components and relations between components for e in [e for e in entries.values() if e.type == 'group']: comp_eids = [comp.id for comp in list(e.components)] for e1_id in comp_eids: for e2_id in comp_eids: if can_add_relation(e1_id, e2_id, relations, new_relations): new_relations.append((e1_id, e2_id)) # prune orthologs, groups, other, map in entries # if any "other" type of entry is included, relations that has # these entries are removed from the relations type_filter = ['ortholog', 'group', 'other', 'map'] relations = [r for r in relations if r.entry1.type not in type_filter and r.entry2.type not in type_filter] # remove orthologs, groups, and others from entries for e in [e for e in entries.values() if e.type in type_filter]: del entries[e.id] # % prune compounds in entries # retrieve gene ids gene_ids = set([e.id for e in entries.values() if e.type == 'gene']) # for every compound for cid in [e.id for e in entries.values() if e.type == 'compound']: # find indexes of compounds in relations as source and # derive the related ids from it source_ids = set([r.entry1.id for r in relations if r.entry2.id == cid]) source_set = source_ids & gene_ids # take just genes, ignore others # find indexes of compounds in relations as destination # and derive the related ids from it dest_ids = set([r.entry2.id for r in relations if r.entry1.id == cid]) dest_set = dest_ids & gene_ids # take just genes, ignore others # create the relation among source and destination if # it is not self-relation and if the relation does not already # exist in the relations # counter = 0 if not source_ids or not dest_ids: continue print('src =', source_ids, 'dst =', dest_ids) for tmp_s in source_set: for tmp_d in dest_set: # check if the relation already exists if can_add_relation(tmp_s, tmp_d, relations, new_relations): new_relations.append((tmp_s, tmp_d)) else: print([tmp_s, tmp_d], 'already in relations') # removed relations containing this compound entry relations = [r for r in relations if r.entry1.id != cid and r.entry2.id != cid] # remove compounds from entries del entries[cid] # turn new relations to relation objects new_rel = [] for s, d in new_relations: r = Relation() r._entry1 = entries[s] r._entry2 = entries[d] r._pamogk = True # to mark relation as added by PAMOGK new_rel.append(r) relations += new_rel return entries, relations