def transfer_inputs(dataset_ids, results_ids, from_storage, to_storage): tantalus_api = TantalusApi() for dataset_id in dataset_ids: transfer_dataset(tantalus_api, dataset_id, 'sequencedataset', from_storage, to_storage) for results_id in results_ids: transfer_dataset(tantalus_api, results_id, 'resultsdataset', from_storage, to_storage)
def download_datasets(results_type, from_storage_name, to_storage_name, dataset_id=None, jira_ticket=None): ''' Download a set of datasets by type. ''' tantalus_api = TantalusApi() if dataset_id is not None: datasets = tantalus_api.list('results', id=dataset_id) elif jira_ticket is not None: datasets = tantalus_api.list('results', results_type=results_type, analysis__jira_ticket=jira_ticket) else: datasets = tantalus_api.list('results', results_type=results_type) dataset_ids = list() for dataset in datasets: dataset_ids.append(dataset['id']) # Download most recent first dataset_ids = reversed(sorted(dataset_ids)) failed = False for dataset_id in dataset_ids: try: transfer_dataset(tantalus_api, dataset_id, 'resultsdataset', from_storage_name, to_storage_name) except: logging.exception(f'failed to download {dataset_id}') failed = True if failed: raise Exception('one or more downloads failed')
def add_generic_results(filepaths, storage_name, results_name, results_type, results_version, sample_ids=(), library_ids=(), analysis_pk=None, recursive=False, tag_name=None, update=False, remote_storage_name=None): tantalus_api = TantalusApi() sample_pks = [] for sample_id in sample_ids: samples = tantalus_api.get( "sample", sample_id=sample_id, ) sample_pks.append(samples['id']) library_pks = [] for library_id in library_ids: librarys = tantalus_api.get( "dna_library", library_id=library_id, ) library_pks.append(librarys['id']) #Add the file resource to tantalus file_resource_pks = [] for filepath in filepaths: if recursive: logging.info("Recursing directory {}".format(filepath)) add_filepaths = [] for (dirpath, dirnames, filenames) in os.walk(filepath): for filename in filenames: add_filepaths.append(os.path.join(dirpath, filename)) else: add_filepaths = [filepath] for add_filepath in add_filepaths: logging.info( "Adding file resource for {} to Tantalus".format(add_filepath)) resource, instance = tantalus_api.add_file( storage_name=storage_name, filepath=add_filepath, update=update, ) file_resource_pks.append(resource["id"]) results_dataset_fields = dict( name=results_name, results_type=results_type, results_version=results_version, analysis=analysis_pk, samples=sample_pks, libraries=library_pks, file_resources=file_resource_pks, ) #Add the dataset to tantalus try: results_id = tantalus_api.get( "results", name=results_dataset_fields["name"])["id"] except NotFoundError: results_id = None if update and results_id is not None: logging.warning("results dataset {} exists, updating".format( results_dataset_fields["name"])) results_dataset = tantalus_api.update("results", id=results_id, **results_dataset_fields) else: logging.info("creating results dataset {}".format( results_dataset_fields["name"])) results_dataset = tantalus_api.get_or_create("results", **results_dataset_fields) if tag_name is not None: tantalus_api.tag(tag_name, resultsdataset_set=[results_id]) logging.info("Succesfully created sequence dataset with ID {}".format( results_dataset["id"])) if remote_storage_name is not None: transfer_files.transfer_dataset(tantalus_api, results_dataset['id'], "resultsdataset", storage_name, remote_storage_name) return results_dataset