async def upload_mappings(file: UploadFile = File(...)): if file.content_type == 'text/tab-separated-values': df = pd.read_csv(file.file, sep='\t', comment='#').fillna('') msd = from_dataframe(df, NAMESPACES, {}) # Importer(neo4j_graph()).import_mapping_set(msd.mapping_set, NAMESPACES) Importer(neo4j_graph()).import_mapping_set(msd.mapping_set) return { "filename": file.filename, 'mappings': len(msd.mapping_set.mappings) } else: raise HTTPException( status_code=404, detail=f"content type not supported: {file.content_type}")
def import_all(): """Import data from all nodes / data sources""" Importer(neo4j_graph()).import_ncit() Importer(neo4j_graph()).import_node_attributes( PdcImporter.read_data_dictionary()) Importer(neo4j_graph()).import_node_attributes( GdcImporter.read_data_dictionary()) Importer(neo4j_graph()).import_harmonized_attributes( CrdcHImporter.read_harmonized_attributes()) Importer(neo4j_graph()).import_ncit_mapping( GdcImporter.read_ncit_mappings(), 'GDC') Importer(neo4j_graph()).import_ncit_mapping( GdcImporter.read_ncit_mappings(), 'PDC')
"""CCDH Concept References: classes and endpoints""" from fastapi import Request from ccdh.api.cache import cache from starlette.responses import StreamingResponse from tccm_api.routers import concept_reference from ccdh.api.routers.mappings import generate_sssom_tsv from ccdh.api.routers.models import MappingSet from ccdh.config import neo4j_graph from ccdh.db.mdr_graph import MdrGraph # TODO: Architecturally, why is 'concept_reference' in TCCM-API when so much else # ...is in CCDH terminology service? What is the logic behind what is and isn't # ...in TCCM? - joeflack4 2021/10/27 router = concept_reference.router mdr_graph = MdrGraph(neo4j_graph()) @router.get( '/{curie}/mappings', description= 'An SSSOM TSV file depending on request headers, otherwise a list of mappings', response_model=MappingSet) @cache() def get_concept_reference_mappings(curie: str, request: Request): """Get concept reference mappings""" mapping_set = mdr_graph.find_mappings_of_concept_reference(curie) if request.headers['accept'] == 'text/tab-separated-values+sssom': return StreamingResponse(generate_sssom_tsv( MappingSet.parse_obj(mapping_set.__dict__)), media_type='text/tab-separated-values+sssom')
class_values = standard_class_dict.values() # dict_values harmonized_attributes = {} for cls in class_values: for attribute in cls.get('attributes', {}).values(): key = f'{model_name}.{cls["name"]}.{attribute["name"]}' harmonized_attribute = { 'system': model_name, 'entity': cls["name"], 'attribute': attribute["name"], 'definition': attribute["description"], 'node_attributes': [] } for mapping in mapping_types: if mapping in attribute: for m in attribute[mapping]: # TODO: Shouldn't exact_mapping be nested within node_attributes or # ...node_attributes/mappings?= instead? (updated here and in # ...Importer.read_harmonized_attribute() harmonized_attribute['node_attributes'].append(m) harmonized_attributes[key] = harmonized_attribute logger.info("Parsed the content in the CCDH Model YAML") return harmonized_attributes if __name__ == '__main__': from ccdh.importers.importer import Importer Importer(neo4j_graph()).import_harmonized_attributes( CrdcHImporter.read_harmonized_attributes())
print( f'GDC data dictionary saved to \n{jsonfile}\nand linked to \n{GDC_JSON_FILE}' ) @staticmethod def read_ncit_mappings(): """Read mappings from NCIT""" gdc_ncit_map = {} gdc_ncit_file = GDC_MAPING_DIR / 'current.csv' with open(gdc_ncit_file, 'r') as csvfile: reader = csv.reader(csvfile) next(reader) for row in reader: target_code = row[3] if target_code not in gdc_ncit_map: gdc_ncit_map[target_code] = {} gdc_ncit_map[target_code][row[4]] = row return gdc_ncit_map if __name__ == '__main__': # A. If just need update dictionary GdcImporter.update_data_dictionary() # B. If need debug importer from ccdh.importers.importer import Importer # Importer(neo4j_graph()).import_node_attributes(GdcImporter.read_data_dictionary()) Importer(neo4j_graph()).import_ncit_mapping( GdcImporter.read_ncit_mappings(), 'GDC') Importer(neo4j_graph()).import_ncit_mapping( GdcImporter.read_ncit_mappings(), 'PDC')