def get_workflow_inputs(l, pkd, gi, git, history_name, library): # LibraryDatasetDatasetAssociation (ldda), LibraryDataset (ld), HistoryDatasetAssociation (hda), # or HistoryDatasetCollectionAssociation (hdca). st = get_time_stamp() hc = HistoryClient(gi) worklow_inputs_d = {} for table, filter, dinput_name, dinput_step, dinput_type in l: pks = pkd[str(table.prefix)] # will get multiple inputs here because we can multiple galaxyfilelinks per file. They are all the same # file so we can just get unique selected_objects = GenericFile.objects.filter(pk__in=pks).distinct() print('PKS', pks, dinput_type) print(selected_objects) if dinput_type == 'data_input': # can only use the first selection (need to use data collection for multiple files, currently this # approach doesn't support using 'multiple files' as input as not possible with BioBlend (i think) s = selected_objects[0] gid = s.galaxyfilelink_set.filter( galaxy_library=library)[0].galaxy_id print(gid) worklow_inputs_d[dinput_step] = {'id': gid, 'src': 'ld'} elif dinput_type == 'data_collection_input': element_identifiers = [] hist = hc.create_history('{}-(data-history-{})-{}'.format( history_name, dinput_name, st)) for s in selected_objects: print(s) gfl = s.galaxyfilelink_set.filter(galaxy_library=library)[0] if library: dataset = hc.upload_dataset_from_library( hist['id'], lib_dataset_id=gfl.galaxy_id) element_identifiers.append({ 'id': dataset['id'], 'name': os.path.basename(dataset['file_name']), 'src': 'hda' }) else: element_identifiers.append({ 'id': gfl.galaxy_id, 'name': gfl.genericfile.data_file.name, 'src': 'hda' }) c_descript = { 'collection_type': 'list', 'element_identifiers': element_identifiers, 'name': dinput_name, } dc = hc.create_dataset_collection(hist['id'], c_descript) worklow_inputs_d[dinput_step] = {'id': dc['id'], 'src': 'hdca'} return worklow_inputs_d
print("ready files: {}".format(ready)) old_ready = ready ready = 0 for f in fc.show_folder(folder["id"], contents=True)["folder_contents"]: if f["state"] == "ok": ready = ready + 1 sleep(5) print("...") print("All {} datasets ready!".format(ready)) # add files to history history = hc.create_history("{}".format(now_string)) print(history) # create dataset collection collection_description = { 'collection_type': 'list', 'element_identifiers': [], 'name': 'manifest collection' } for f in files: element_identifier = { 'id': f["id"], 'name': f["name"], 'src': 'ldda'} collection_description["element_identifiers"].append(element_identifier) print(collection_description) hc.create_dataset_collection(history["id"], collection_description)