def __init__(self): super(NcbiGeneParser, self).__init__() # arguments self.arguments = ['taxid'] # output data # both gene IDs and GeneSymbols have the label 'Gene' # two different NodeSets are used because only the GeneSymbol nodes need taxid for uniqueness self.genes = NodeSet(['Gene'], merge_keys=['sid'], default_props={'source': 'ncbigene'}) self.genesymbols = NodeSet(['Gene'], merge_keys=['sid', 'taxid'], default_props={ 'source': 'ncbigene', 'type': 'symbol' }) self.genesymbol_synonym_genesymbol = RelationshipSet( 'SYNONYM', ['Gene'], ['Gene'], ['sid', 'taxid'], ['sid', 'taxid'], default_props={'source': 'ncbigene'}) self.gene_maps_genesymbol = RelationshipSet( 'MAPS', ['Gene'], ['Gene'], ['sid'], ['sid', 'taxid'], default_props={'source': 'ncbigene'})
def __init__(self): """ :param mesh_instance: NcbiGene Instance :type mesh_instance: DataSourceInstance """ super(GtexMetadataParser, self).__init__() # NodeSets self.tissues = NodeSet(['GtexTissue'], merge_keys=['name']) self.detailed_tissues = NodeSet(['GtexDetailedTissue'], merge_keys=['name']) self.sample = NodeSet(['GtexSample'], merge_keys=['sid']) self.sample_measures_tissue = RelationshipSet('MEASURES', ['GtexSample'], ['GtexTissue'], ['sid'], ['name']) self.sample_measures_detailed_tissue = RelationshipSet( 'MEASURES', ['GtexSample'], ['GtexDetailedTissue'], ['sid'], ['name']) self.tissue_parent_detailed_tissue = RelationshipSet( 'PARENT', ['GtexTissue'], ['GtexDetailedTissue'], ['name'], ['name']) self.tissue_parent_detailed_tissue.unique = True
def __init__(self): """ :param ensembl_instance: The ENSEMBL DataSource instance. """ super(EnsemblEntityParser, self).__init__() # arguments self.arguments = ['taxid'] # NodeSets self.genes = NodeSet(['Gene'], merge_keys=['sid'], default_props={'source': 'ensembl'}) self.transcripts = NodeSet(['Transcript'], merge_keys=['sid'], default_props={'source': 'ensembl'}) self.proteins = NodeSet(['Protein'], merge_keys=['sid'], default_props={'source': 'ensembl'}) # RelationshipSets self.gene_codes_transcript = RelationshipSet( 'CODES', ['Gene'], ['Transcript'], ['sid'], ['sid'], default_props={'source': 'ensembl'}) self.transcript_codes_protein = RelationshipSet( 'CODES', ['Transcript'], ['Protein'], ['sid'], ['sid'], default_props={'source': 'ensembl'})
def __init__(self): super(LncipediaParser, self).__init__() self.genes = NodeSet(['Gene'], merge_keys=['sid']) self.transcripts = NodeSet(['Transcript'], merge_keys=['sid']) self.gene_codes_transcripts = RelationshipSet('CODES', ['Gene'], ['Transcript'], ['sid'], ['sid']) self.gene_maps_gene = RelationshipSet('MAPS', ['Gene'], ['Gene'], ['sid'], ['sid']) self.transcript_maps_transcript = RelationshipSet('MAPS', ['Transcript'], ['Transcript'], ['sid'], ['sid'])
def __init__(self): super(SwissLipidsParser, self).__init__() # define NodeSet and RelationshipSet self.lipids = NodeSet(['Lipid'], merge_keys=['sid']) self.lipid_fromclass_lipid = RelationshipSet('FROM_LIPID_CLASS', ['Lipid'], ['Lipid'], ['sid'], ['sid']) self.lipid_parent_lipid = RelationshipSet('HAS_PARENT', ['Lipid'], ['Lipid'], ['sid'], ['sid']) self.lipid_component_lipid = RelationshipSet('HAS_COMPONENT', ['Lipid'], ['Lipid'], ['sid'], ['sid']) self.lipid_maps_metabolite = RelationshipSet('MAPS', ['Lipid'], ['Metabolite'], ['sid'], ['sid']) self.lipid_associates_protein = RelationshipSet('HAS_ASSOCIATION', ['Lipid'], ['Protein'], ['sid'], ['sid'])
def __init__(self): """ :param refseq_instance: The RefSeq DataSource instance. """ super(RefseqCodesParser, self).__init__() # arguments self.arguments = ['taxid'] # define NodeSet and RelationshipSet self.gene_codes_transcript = RelationshipSet('CODES', ['Gene'], ['Transcript'], ['sid'], ['sid'], default_props={'source': 'refseq'}) self.transcript_codes_protein = RelationshipSet('CODES', ['Transcript'], ['Protein'], ['sid'], ['sid'], default_props={'source': 'refseq'})
def load_wpp_data(base_path, graph): """ Load UN population data. :param base_path: Path where file was downloaded. """ un_wpp_csv_file = os.path.join(base_path, 'WPP2019_PopulationByAgeSex_Medium.csv') log.info('Parse UN population data file: {}'.format(un_wpp_csv_file)) country = NodeSet(['Country'], ['name']) age_group_nodes = NodeSet(['AgeGroup'], ['group']) country_total_group = RelationshipSet('CURRENT_TOTAL', ['Country'], ['AgeGroup'], ['name'], ['group']) country_male_group = RelationshipSet('CURRENT_MALE', ['Country'], ['AgeGroup'], ['name'], ['group']) country_female_group = RelationshipSet('CURRENT_FEMALE', ['Country'], ['AgeGroup'], ['name'], ['group']) countries_added = set() age_groups_added = set() with open(un_wpp_csv_file, 'rt') as f: csv_file = csv.reader(f, delimiter=',', quotechar='"') # skip header next(csv_file) for row in csv_file: # LocID,Location,VarID,Variant,Time,MidPeriod,AgeGrp,AgeGrpStart,AgeGrpSpan,PopMale,PopFemale,PopTotal loc_id = row[0] location = row[1] time = int(row[4]) age_group = row[6] age_group_start = int(row[7]) age_group_span = row[8] pop_male = int(float((row[9])) * 1000) pop_female = int(float((row[10])) * 1000) pop_total = int(float((row[11])) * 1000) # only take 2019 if time == 2019: if location not in countries_added: country.add_node({'name': location, 'un_id': loc_id}) countries_added.add(location) if age_group not in age_groups_added: age_group_nodes.add_node({'group': age_group, 'start': age_group_start, 'span': age_group_span}) country_total_group.add_relationship({'name': location}, {'group': age_group}, {'count': pop_total}) country_male_group.add_relationship({'name': location}, {'group': age_group}, {'count': pop_male}) country_female_group.add_relationship({'name': location}, {'group': age_group}, {'count': pop_female}) log.info('Load data to Neo4j') country.merge(graph) age_group_nodes.merge(graph) country_total_group.merge(graph) country_male_group.merge(graph) country_female_group.merge(graph)
def read_daily_report_data_csv_JHU(file): """ Extract data from a single daile report file from JHU. :param file: Path to the CSV file :return: """ log.info('Read JHU CSV file {}'.format(file)) countries = NodeSet(['Country'], ['name']) provinces = NodeSet(['Province'], ['name']) updates = NodeSet(['DailyReport'], ['uuid']) province_in_country = RelationshipSet('PART_OF', ['Province'], ['Country'], ['name'], ['name']) province_in_country.unique = True province_rep_update = RelationshipSet('REPORTED', ['Province'], ['DailyReport'], ['name'], ['uuid']) with open(file, 'rt') as csvfile: rows = csv.reader(csvfile, delimiter=',', quotechar='"') # skip header next(rows) for row in rows: country = row[1] province = row[0] # if no name for province, use country name if not province: province = '{}_complete'.format(country) date = parse(row[2]) uuid = country+province+str(date) confirmed = int(row[3]) if row[3] else 'na' death = int(row[4]) if row[4] else 'na' recovered = int(row[5]) if row[5] else 'na' lat = row[6] if len(row) >= 7 else None long = row[7] if len(row) >= 8 else None province_dict = {'name': province} if lat and long: province_dict['latitude'] = lat province_dict['longitude'] = long provinces.add_unique(province_dict) countries.add_unique({'name': country}) updates.add_unique( {'date': date, 'confirmed': confirmed, 'death': death, 'recovered': recovered, 'uuid': uuid}) province_in_country.add_relationship({'name': province}, {'name': country}, {'source': 'jhu'}) province_rep_update.add_relationship({'name': province}, {'uuid': uuid}, {'source': 'jhu'}) return countries, provinces, updates, province_in_country, province_rep_update
def __init__(self): super(HmdbParser, self).__init__() # NodeSets self.metabolites = NodeSet(['Metabolite'], merge_keys=['sid'], default_props={'source': 'hmdb'}) self.metabolite_map_metabolite = RelationshipSet( 'MAPS', ['Metabolite'], ['Metabolite'], ['sid'], ['sid'], default_props={'source': 'hmdb'}) self.metabolite_associates_protein = RelationshipSet( 'HAS_ASSOCIATION', ['Metabolite'], ['Protein'], ['sid'], ['sid'], default_props={'source': 'hmdb'})
def __init__(self): super(RefseqRemovedRecordsParser, self).__init__() self.arguments = ['taxid'] self.legacy_ids = set() self.legacy_transcripts = NodeSet(['Transcript', 'Legacy'], merge_keys=['sid'], default_props={'source': 'refseq'}) self.legacy_transcript_now_transcript = RelationshipSet('REPLACED_BY', ['Transcript'], ['Transcript'], ['sid'], ['sid'], default_props={'source': 'refseq'}) self.legacy_proteins = NodeSet(['Protein', 'Legacy'], merge_keys=['sid'], default_props={'source': 'refseq'}) self.legacy_protein_now_protein = RelationshipSet('REPLACED_BY', ['Protein'], ['Protein'], ['sid'], ['sid'], default_props={'source': 'refseq'}) self.gene_codes_legacy_transcript = RelationshipSet('CODES', ['Gene'], ['Transcript', 'Legacy'], ['sid'], ['sid'], default_props={'source': 'refseq'}) self.legacy_transcript_codes_protein = RelationshipSet('CODES', ['Transcript', 'Legacy'], ['Protein'], ['sid'], ['sid'], default_props={'source': 'refseq'})
def __init__(self): super(ChebiParser, self).__init__() # NodeSets self.metabolites = NodeSet(['Metabolite'], merge_keys=['sid'], default_props={'source': 'chebi'}) self.metabolite_isa_metabolite = RelationshipSet( 'IS_A', ['Metabolite'], ['Metabolite'], ['sid'], ['sid'], default_props={'source': 'chebi'}) self.metabolite_rel_metabolite = RelationshipSet( 'CHEBI_REL', ['Metabolite'], ['Metabolite'], ['sid'], ['sid'], default_props={'source': 'chebi'}) self.metabolite_maps_metabolite = RelationshipSet( 'MAPS', ['Metabolite'], ['Metabolite'], ['sid'], ['sid'], default_props={'source': 'chebi'})
def __init__(self): super(MirbaseParser, self).__init__() # NodeSets self.precursor_mirna = NodeSet(['PrecursorMirna'], merge_keys=['sid']) self.mature_mirna = NodeSet(['Mirna'], merge_keys=['sid']) # RelationshipSets self.precursor_codes_mature = RelationshipSet('PRE', ['PrecursorMirna'], ['Mirna'], ['sid'], ['sid']) self.transcript_codes_precursor = RelationshipSet( 'IS', ['Transcript'], ['PrecursorMirna'], ['sid'], ['sid']) self.gene_is_precursor = RelationshipSet('IS', ['Gene'], ['PrecursorMirna'], ['sid'], ['sid'])
def __init__(self): """ """ super(MirtarbaseParser, self).__init__() # RelationshipSets self.mirna_targets_gene = RelationshipSet('TARGETS', ['Mirna'], ['Gene'], ['name'], ['sid'])
def __init__(self): super(SomeParser, self).__init__() self.source = NodeSet(['Source'], merge_keys=['source_id']) self.target = NodeSet(['Target'], merge_keys=['target_id']) self.rels = RelationshipSet('FOO', ['Source'], ['Target'], ['source_id'], ['target_id'])
def __init__(self): super(GeneOntologyAssociationParser, self).__init__() self.arguments = ['taxid'] # RelationshipSets self.protein_associates_goterm = RelationshipSet( 'ASSOCIATION', ['Protein'], ['Term'], ['sid'], ['sid'])
def __init__(self): super(EnsemblMappingParser, self).__init__() # arguments self.arguments = ['taxid'] # define NodeSet and RelationshipSet self.gene_maps_gene = RelationshipSet( 'MAPS', ['Gene'], ['Gene'], ['sid'], ['sid'], default_props={'source': 'ensembl'}) self.transcript_maps_transcript = RelationshipSet( 'MAPS', ['Transcript'], ['Transcript'], ['sid'], ['sid'], default_props={'source': 'ensembl'}) self.protein_maps_protein = RelationshipSet( 'MAPS', ['Protein'], ['Protein'], ['sid'], ['sid'], default_props={'source': 'ensembl'})
def __init__(self): """ :param ncbigene_instance: NcbiGene Instance :type ncbigene_instance: DataSourceInstance :param taxid: """ super(HGNCParser, self).__init__() # output data self.genes = NodeSet(['Gene'], merge_keys=['sid']) self.gene_maps_gene = RelationshipSet('MAPS', ['Gene'], ['Gene'], ['sid'], ['sid']) self.gene_maps_genesymbol = RelationshipSet('MAPS', ['Gene'], ['GeneSymbol'], ['sid'], ['sid', 'taxid'])
def __init__(self): """ :param uniprot_instance: The Uniprot instance :param taxid: The taxid """ super(UniprotKnowledgebaseParser, self).__init__() # arguments self.arguments = ['taxid'] # NodeSet self.proteins = NodeSet(['Protein'], merge_keys=['sid'], default_props={'source': 'uniprot'}) # RelationshipSet self.protein_primary_protein = RelationshipSet('PRIMARY', ['Protein'], ['Protein'], ['sid'], ['sid'], default_props={'source': 'uniprot'}) self.transcript_codes_protein = RelationshipSet('CODES', ['Transcript'], ['Protein'], ['sid'], ['sid'], default_props={'source': 'uniprot'}) self.protein_maps_protein = RelationshipSet('MAPS', ['Protein'], ['Protein'], ['sid'], ['sid'], default_props={'source': 'uniprot'})
def __init__(self): super(MirdbParser, self).__init__() # arguments self.arguments = ['taxid'] # RelationshipSets self.mirna_targets_transcript = RelationshipSet( 'TARGETS', ['Mirna'], ['Transcript'], ['name'], ['sid'])
def __init__(self): super(MeshParser, self).__init__() # NodeSets self.descriptor = NodeSet(['MeshDescriptor'], merge_keys=['sid']) self.qualifier = NodeSet(['MeshQualifier'], merge_keys=['sid']) self.concept = NodeSet(['MeshConcept'], merge_keys=['sid']) self.term = NodeSet(['MeshTerm'], merge_keys=['sid']) self.descriptor_allowed_qualifier = RelationshipSet('ALLOWED', ['MeshDescriptor'], ['MeshQualifier'], ['sid'], ['sid']) self.descriptor_has_concept = RelationshipSet('HAS', ['MeshDescriptor'], ['MeshConcept'], ['sid'], ['sid']) self.descriptor_has_concept.unique = True self.concept_has_term = RelationshipSet('HAS', ['MeshConcept'], ['MeshTerm'], ['sid'], ['sid']) self.concept_has_term.unique = True self.concept_related_concept = RelationshipSet('RELATED', ['MeshConcept'], ['MeshConcept'], ['sid'], ['sid']) self.concept_related_concept.unique = True
def __init__(self): super(NcbiLegacyGeneParser, self).__init__() self.arguments = ['taxid'] self.legacy_genes = NodeSet(['Gene', 'Legacy'], merge_keys=['sid'], default_props={'source': 'ncbigene'}) self.legacy_gene_now_gene = RelationshipSet( 'REPLACED_BY', ['Gene', 'Legacy'], ['Gene'], ['sid'], ['sid'], default_props={'source': 'ncbigene'})
def __init__(self): super(DummyParser, self).__init__() # arguments self.arguments = ['taxid'] # output data self.dummy_nodes = NodeSet(['Dummy'], merge_keys=['sid']) self.fummy_nodes = NodeSet(['Fummy'], merge_keys=['sid']) self.dummy_knows_fummy = RelationshipSet('KNOWS', ['Dummy'], ['Fummy'], ['sid'], ['sid'])
def __init__(self): """ :param ncbigene_instance: NcbiGene Instance :type ncbigene_instance: DataSourceInstance :param taxid: """ super(NcbiGeneOrthologParser, self).__init__() self.gene_ortholog_gene = RelationshipSet('ORTHOLOG', ['Gene'], ['Gene'], ['sid'], ['sid']) self.object_sets = [self.gene_ortholog_gene] self.container.add_all(self.object_sets)
def __init__(self): """ :param mesh_instance: NcbiGene Instance :type mesh_instance: DataSourceInstance """ super(GtexDataParser, self).__init__() self.gene_expressed_tissue = RelationshipSet('EXPRESSED', ['Gene'], ['GtexDetailedTissue'], ['sid'], ['name']) self.object_sets = [self.gene_expressed_tissue] self.container.add_all(self.object_sets)
def _define_node_and_relatinship_sets(self): # Define nodesets nodeSets = {} relSets = {} nodeSets["Papers"] = NodeSet(["Paper"], ["paper_id"]) nodeSets["PaperIDHubs"] = NodeSet( [self.id_node_label, config.JSON2GRAPH_COLLECTION_NODE_LABEL], ["id"]) nodeSets["Metadata"] = NodeSet(["Metadata"], ["_hash_id"]) nodeSets["Authors"] = NodeSet(["Author"], ["_hash_id"]) nodeSets["AuthorHubs"] = NodeSet( ["Author", config.JSON2GRAPH_COLLECTION_NODE_LABEL], ["id"]) nodeSets["Abstracts"] = NodeSet(["Abstract"], ["_hash_id"]) nodeSets["AbstractHubs"] = NodeSet( ["Abstract", config.JSON2GRAPH_COLLECTION_NODE_LABEL], ["id"]) relSets["PAPER_HAS_PAPERID_COLLECTION"] = RelationshipSet( rel_type="PAPER_HAS_PAPERID_COLLECTION", start_node_labels=["Paper"], end_node_labels=[ self.id_node_label, config.JSON2GRAPH_COLLECTION_NODE_LABEL, ], start_node_properties=["paper_id"], end_node_properties=["id"], ) relSets["PAPER_HAS_METADATA"] = RelationshipSet( rel_type="PAPER_HAS_METADATA", start_node_labels=["Paper"], end_node_labels=["Metadata"], start_node_properties=["paper_id"], end_node_properties=["_hash_id"], ) relSets["METADATA_HAS_AUTHORHUB"] = RelationshipSet( rel_type="METADATA_HAS_AUTHOR", start_node_labels=["Metadata"], end_node_labels=[ "Author", config.JSON2GRAPH_COLLECTION_NODE_LABEL ], start_node_properties=["_hash_id"], end_node_properties=["id"], ) relSets["AUTHORHUB_HAS_AUTHOR"] = RelationshipSet( rel_type="AUTHOR_HAS_AUTHOR", start_node_labels=[ "Author", config.JSON2GRAPH_COLLECTION_NODE_LABEL ], end_node_labels=["Author"], start_node_properties=["id"], end_node_properties=["_hash_id"], ) relSets["PAPER_HAS_ABSTRACTHUB"] = RelationshipSet( rel_type="PAPER_HAS_ABSTRACT", start_node_labels=["Paper"], end_node_labels=[ "Abstract", config.JSON2GRAPH_COLLECTION_NODE_LABEL ], start_node_properties=["paper_id"], end_node_properties=["id"], ) relSets["ABSTRACTHUB_HAS_ABSTRACT"] = RelationshipSet( rel_type="ABSTRACT_HAS_ABSTRACT", start_node_labels=[ "Abstract", config.JSON2GRAPH_COLLECTION_NODE_LABEL ], end_node_labels=["Abstract"], start_node_properties=["id"], end_node_properties=["_hash_id"], ) # Define id nodes and relations for col_name, node_props in self.id_columns.items(): nodeSets[node_props["label"]] = NodeSet( [self.id_node_label, node_props["label"]], ["id"]) relSets[node_props["label"]] = RelationshipSet( rel_type="PAPERID_COLLECTION_HAS_PAPERID", start_node_labels=[ self.id_node_label, config.JSON2GRAPH_COLLECTION_NODE_LABEL, ], end_node_labels=[self.id_node_label, node_props["label"]], start_node_properties=["id"], end_node_properties=[node_props["attr"]], ) self.nodeSets = nodeSets self.relSets = relSets
def read_daily_report_data_csv_JHU(file): """ Extract data from a single daile report file from JHU. Old format (until 03-21-2020) Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered,Latitude,Longitude New format: FIPS,Admin2,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,Combined_Key :param file: Path to the CSV file :return: """ log.info('Read JHU CSV file {}'.format(file)) # understand if old fromat ( countries = NodeSet(['Country'], ['name']) provinces = NodeSet(['Province'], ['name']) updates = NodeSet(['DailyReport'], ['uuid']) province_in_country = RelationshipSet('PART_OF', ['Province'], ['Country'], ['name'], ['name']) province_in_country.unique = True province_rep_update = RelationshipSet('REPORTED', ['Province'], ['DailyReport'], ['name'], ['uuid']) with open(file, 'rt') as csvfile: rows = csv.reader(csvfile, delimiter=',', quotechar='"') # skip header header = next(rows) if len(header) > 8: file_type = 'new' else: file_type = 'old' log.info("File type: {}".format(file_type)) for row in rows: if file_type == 'old': country, province, date, confirmed, death, recovered, lat, long = parse_jhu_old_file_row( row) elif file_type == 'new': country, province, date, confirmed, death, recovered, lat, long = parse_jhu_new_file_row( row) province_dict = {'name': province} if lat and long: province_dict['latitude'] = lat province_dict['longitude'] = long uuid = country + province + str(date) provinces.add_unique(province_dict) countries.add_unique({'name': country}) updates.add_unique({ 'date': date, 'confirmed': confirmed, 'death': death, 'recovered': recovered, 'uuid': uuid }) province_in_country.add_relationship({'name': province}, {'name': country}, {'source': 'jhu'}) province_rep_update.add_relationship({'name': province}, {'uuid': uuid}, {'source': 'jhu'}) return countries, provinces, updates, province_in_country, province_rep_update
graph = py2neo.Graph(host=NEO4J_HOST, user=NEO4J_USER, password=NEO4J_PASSWORD) graph.run("MATCH (a) RETURN a LIMIT 1") # Download file from NCBI FTP Server print('Download file from NCBI FTP server') with urlopen( 'ftp://ftp.ncbi.nih.gov/gene/DATA/GENE_INFO/Mammalia/Homo_sapiens.gene_info.gz' ) as r: with open(DOWNLOAD_FILE_PATH, 'wb') as f: shutil.copyfileobj(r, f) # define NodeSet and RelationshipSet ncbi_gene_nodes = NodeSet(['Gene'], ['gene_id']) ensembl_gene_nodes = NodeSet(['Gene'], ['gene_id']) gene_mapping_rels = RelationshipSet('MAPS', ['Gene'], ['Gene'], ['gene_id'], ['gene_id']) # iterate the data file and extract nodes/relationships print('Iterate file and create nodes/relationships') # collect mapped ENSEMBL gene IDs to avoid duplicate genes ensembl_gene_ids_added = set() with gzip.open(DOWNLOAD_FILE_PATH, 'rt') as file: # skip header line next(file) # iterate file for line in file: fields = line.strip().split('\t') ncbi_gene_id = fields[1] # get mapping to ENSEMBL Gene IDs
def _create_relation(self, parent_node: Node, child_node: Node, relation_props={}, relationshipset_identifier=None): if parent_node is None or child_node is None: return None # labels = ":".join(parent_node.labels) + "|" + ":".join(child_node.labels) if relationshipset_identifier == None: relationshipset_identifier = ( frozenset(parent_node.labels), frozenset(child_node.labels), ) if (hasattr(parent_node, "override_reltype") and child_node.__primarylabel__ in parent_node.override_reltype): relationshipset_identifier = ( frozenset(parent_node.labels), frozenset(child_node.labels), frozenset( parent_node.override_reltype[child_node.__primarylabel__]), ) # Create new relationshipset if necessary if not relationshipset_identifier in self.relationshipSets: rel_name = None if callable(self.config_func_custom_relation_name_generator): rel_name = self.config_func_custom_relation_name_generator( parent_node, child_node, relation_props) if rel_name is None: child_node_name = child_node.__primarylabel__.upper() parent_node_name = parent_node.__primarylabel__.upper() rel_name = "{}_HAS_{}".format( parent_node_name, child_node_name, ) if hasattr(parent_node, "override_reltype"): if child_node.__primarylabel__ in parent_node.override_reltype: rel_name = parent_node.override_reltype[ child_node.__primarylabel__].upper() if rel_name in self.config_dict_reltype_override: rel_name = self.config_dict_reltype_override[rel_name] if rel_name in self.config_list_drop_reltypes: self._blocked_reltypes.append(relationshipset_identifier) else: self.relationshipSets[ relationshipset_identifier] = RelationshipSet( rel_type=rel_name, start_node_labels=frozenset(parent_node.labels), end_node_labels=frozenset(child_node.labels), start_node_properties=self._get_merge_keys( parent_node), end_node_properties=self._get_merge_keys(child_node), ) # add relationship to set if not blocked by caller config if not relationshipset_identifier in self._blocked_reltypes: self.relationshipSets[relationshipset_identifier].add_relationship( start_node_properties={ key: val for key, val in dict(parent_node).items() if key in self._get_merge_keys(parent_node) }, end_node_properties={ key: val for key, val in dict(child_node).items() if key in self._get_merge_keys(child_node) }, properties=relation_props, )
def __init__(self): super(NcbiHomoloGeneParser, self).__init__() # output data self.gene_homolog_gene = RelationshipSet('HOMOLOG', ['Gene'], ['Gene'], ['sid'], ['sid'])
def __init__(self): super(DependingTestParser, self).__init__() self.rels = RelationshipSet('FOO', ['Source'], ['Target'], ['source_id'], ['target_id'])