def import_spreadsheet(self, xls_filename): self._logger.debug("importing spreadsheet") ingest_api = IngestApi(url=Config['INGEST_API']) importer = XlsImporter(ingest_api) return importer.dry_run_import_file(file_path=self._xls_filename)
def test_spreadsheet_import(self): self.metadata_spreadsheet_path = os.path.join(self.test_data_path, SPREADSHEET_FILE) download_file(SPREADSHEET_LOCATION, self.metadata_spreadsheet_path) importer = XlsImporter(self.ingest_api) submission_resource = self.ingest_api.create_submission() submission_url = submission_resource["_links"]["self"]["href"].rsplit( "{")[0] submission, _ = importer.import_file(self.metadata_spreadsheet_path, submission_url, False) entities_by_type = {} for entity in submission.get_entities(): entity_type = entity.type if not entities_by_type.get(entity_type): entities_by_type[entity_type] = [] entities_by_type[entity_type].append(entity) files = list(self.ingest_api.get_entities(submission_url, 'files')) biomaterials = list( self.ingest_api.get_entities(submission_url, 'biomaterials')) protocols = list( self.ingest_api.get_entities(submission_url, 'protocols')) processes = list( self.ingest_api.get_entities(submission_url, 'processes')) self.assertEquals(len(files), len(entities_by_type['file'])) self.assertEquals(len(biomaterials), len(entities_by_type['biomaterial'])) self.assertEquals(len(protocols), len(entities_by_type['protocol'])) self.assertEquals(len(processes), len(entities_by_type['process']))
def upload_spreadsheet(): try: logger.info("Uploading spreadsheet") token = _check_token() path = _save_spreadsheet() ingest_api = IngestApi() ingest_api.set_token(token) importer = XlsImporter(ingest_api) project = _check_for_project(ingest_api) project_uuid = None if project and project.get('uuid'): project_uuid = project.get('uuid').get('uuid') submission_url = ingest_api.createSubmission(token) _submit_spreadsheet_data(importer, path, submission_url, project_uuid) return create_upload_success_response(submission_url) except SpreadsheetUploadError as spreadsheetUploadError: return create_upload_failure_response(spreadsheetUploadError.http_code, spreadsheetUploadError.message, spreadsheetUploadError.details) except Exception as err: logger.error(traceback.format_exc()) return create_upload_failure_response( 500, "We experienced a problem while uploading your spreadsheet", str(err))
def test_import_spreadsheet(self): spreadsheet_file = BASE_PATH + '/metadata_spleen_new_protocols.xlsx' submission = XlsImporter(MagicMock()).import_file(file_path=spreadsheet_file, submission_url=None, dry_run=True) self.assertTrue(submission)
def main(): dsp = DspCLI() print( "Welcome to the HCA to DSP easy use script! Please, select the option that better suits your needs:\n" ) print( "1 - Submission for dummies: Guided submisssion through the DSP, with indications and questions along the way\n" "2 - I want to do my own thing: Access to all the functions the DspCLI object provides\n" "3 - I just want to convert a spreadsheet into submittable objects and then exit." ) while True: try: option = int(input()) if 0 < option < 4: break else: print("Please select a valid option: 1, 2 or 3\n") except ValueError: print("Please select a valid option: 1, 2 or 3\n") if option == 2: while True: cli_function = show_cli_options(dsp) if not cli_function: break call_function(cli_function, dsp) if option == 3: # Import the spreadsheet with HCA ingest importer input_path = input( "Please provide with the path to the HCA spreadsheet file: ") print("Importing; Might take some time to process...\n\n") api = IngestApi(url="https://api.ingest.data.humancellatlas.org/") importer = XlsImporter(api) spreadsheet = importer.dry_run_import_file(input_path) entity_map = spreadsheet.get_entities() # Get JSON object list from the entity map json_list = get_json_from_map(entity_map) del json_list['files'] # Write to folder output_path = input( "Please provide with the folder path for the submittable outputs: " ) write_json_to_submit(json_list, output_path) print(f"Saved submittable JSONs to folder {output_path}") print("Goodbye! :)")
def setUp(self): self.mock_ingest_api = MagicMock(spec=IngestApi) self.importer = XlsImporter(self.mock_ingest_api) self.mock_template_mgr = Mock() self.mock_template_mgr.get_schema_url = Mock(return_value='') self.spreadsheet_json_with_project_reference = { 'project': { 'project-uuid': { 'is_linking_reference': True, } } }
def __init__(self, file): ingest_url = os.environ.get( 'INGEST_API', 'https://api.ingest.data.humancellatlas.org') ingest_api = IngestApi(url=ingest_url) self.importer = XlsImporter(ingest_api) self.entity_map = self.importer.dry_run_import_file(file) self.node_by_type = self.entity_map.entities_dict_by_type self.links = {} self.files = [] self.donors = [] self.path_dict = {} self.process_prot_link = {} self.high_level = {} self.known_position = [ "donor_organism", "library_preparation_protocol", "sequencing_protocol" ] self.unknown_position = [ "specimen_from_organism", "cell_line", "organoid", "collection_protocol", "enrichment_protocol", "dissociation_protocol", "cell_suspension" ] self.high_level_paths = []
class GraphValidator: def __init__(self, file): ingest_url = os.environ.get( 'INGEST_API', 'https://api.ingest.data.humancellatlas.org') ingest_api = IngestApi(url=ingest_url) self.importer = XlsImporter(ingest_api) self.entity_map = self.importer.dry_run_import_file(file) self.node_by_type = self.entity_map.entities_dict_by_type self.links = {} self.files = [] self.donors = [] self.path_dict = {} self.process_prot_link = {} self.high_level = {} self.known_position = [ "donor_organism", "library_preparation_protocol", "sequencing_protocol" ] self.unknown_position = [ "specimen_from_organism", "cell_line", "organoid", "collection_protocol", "enrichment_protocol", "dissociation_protocol", "cell_suspension" ] self.high_level_paths = [] def get_all_links(self): order_list = ['file', 'biomaterial', 'process'] for order in order_list: for uniq_node, val in self.node_by_type[order].items(): process = False if order == 'file': self.files.append(uniq_node) if order == 'biomaterial': if self.node_by_type.get(order)[uniq_node].content.get( 'describedBy').split( '/')[-1:][0] == 'donor_organism': self.donors.append(uniq_node) direct_links = val.direct_links if direct_links: for link in direct_links: direct_link_id = link.get('id') if link.get('entity') == 'process': process = True if uniq_node not in self.links.keys(): self.links[uniq_node] = [] self.links[uniq_node].append(direct_link_id) else: self.links[uniq_node].append(direct_link_id) if order == 'process': if link.get('entity') == 'protocol': if uniq_node not in self.process_prot_link.keys( ): self.process_prot_link[uniq_node] = [] self.process_prot_link[uniq_node].append( link.get('id')) else: if link.get( 'id' ) not in self.process_prot_link[uniq_node]: self.process_prot_link[ uniq_node].append(link.get('id')) if not process: self.links[uniq_node] = [] def add_process_links(self): for key, val in self.links.items(): for link in val: if link in self.links.keys(): if key not in self.links[link]: self.links[link].append(key) def identify_float(self): for key in self.links.keys(): if not self.links[key]: print('%s is a floating entity' % key) def find_path(self, s, e, link_graph, path=None): if not path: path = [] path = path + [s] # print(path) if s == e: return path if s not in link_graph: return None for node in link_graph[s]: if node not in path: extended_path = self.find_path(node, e, link_graph, path=path) if extended_path: return extended_path return None def find_all_paths(self): for f in self.files: for d in self.donors: entity_full_path = self.find_path(f, d, self.links) if entity_full_path: filename = entity_full_path[0] entity_path = entity_full_path[1:] for key in self.process_prot_link: if key in entity_path: entity_path[entity_path.index( key)] = key + '/' + '/'.join( self.process_prot_link[key]) entity_path.reverse() entity_path = ':'.join(entity_path) if entity_path not in self.path_dict.keys(): self.path_dict[entity_path] = [] self.path_dict[entity_path].append(filename) else: self.path_dict[entity_path].append(filename) def clean_output(self): for key, val in self.path_dict.items(): print('%s %s' % (key.replace(':', ' -> '), val)) def gather_entity_info(self): for node_type in self.node_by_type: node_dict = self.node_by_type.get(node_type) for uniq_node, val in node_dict.items(): specific_type = val.content.get('describedBy').split( '/')[-1:][0] if specific_type not in self.high_level: self.high_level[specific_type] = [] if uniq_node not in self.high_level[specific_type]: self.high_level[specific_type].append(uniq_node) else: self.high_level[specific_type].append(uniq_node) def summary(self): print('Number of bundles: %s' % len(self.path_dict.keys())) for key, val in self.high_level.items(): print('Number of %s: %s' % (key, len(val)))