Exemplo n.º 1
0
def get_workflow_inputs(l, pkd, gi, git, history_name, library):
    # LibraryDatasetDatasetAssociation (ldda), LibraryDataset (ld), HistoryDatasetAssociation (hda),
    # or HistoryDatasetCollectionAssociation (hdca).
    st = get_time_stamp()

    hc = HistoryClient(gi)
    worklow_inputs_d = {}

    for table, filter, dinput_name, dinput_step, dinput_type in l:
        pks = pkd[str(table.prefix)]

        #  will get multiple inputs here because we can multiple galaxyfilelinks per file. They are all the same
        # file so we can just get unique
        selected_objects = GenericFile.objects.filter(pk__in=pks).distinct()

        print('PKS', pks, dinput_type)
        print(selected_objects)

        if dinput_type == 'data_input':

            # can only use the first selection (need to use data collection for multiple files, currently this
            # approach doesn't support using 'multiple files' as input as not possible with BioBlend (i think)
            s = selected_objects[0]
            gid = s.galaxyfilelink_set.filter(
                galaxy_library=library)[0].galaxy_id

            print(gid)

            worklow_inputs_d[dinput_step] = {'id': gid, 'src': 'ld'}

        elif dinput_type == 'data_collection_input':

            element_identifiers = []
            hist = hc.create_history('{}-(data-history-{})-{}'.format(
                history_name, dinput_name, st))

            for s in selected_objects:
                print(s)
                gfl = s.galaxyfilelink_set.filter(galaxy_library=library)[0]

                if library:
                    dataset = hc.upload_dataset_from_library(
                        hist['id'], lib_dataset_id=gfl.galaxy_id)
                    element_identifiers.append({
                        'id':
                        dataset['id'],
                        'name':
                        os.path.basename(dataset['file_name']),
                        'src':
                        'hda'
                    })
                else:
                    element_identifiers.append({
                        'id':
                        gfl.galaxy_id,
                        'name':
                        gfl.genericfile.data_file.name,
                        'src':
                        'hda'
                    })

            c_descript = {
                'collection_type': 'list',
                'element_identifiers': element_identifiers,
                'name': dinput_name,
            }

            dc = hc.create_dataset_collection(hist['id'], c_descript)
            worklow_inputs_d[dinput_step] = {'id': dc['id'], 'src': 'hdca'}

    return worklow_inputs_d
Exemplo n.º 2
0
        print("ready files: {}".format(ready))
        old_ready = ready
    ready = 0
    for f in fc.show_folder(folder["id"], contents=True)["folder_contents"]:
        if f["state"] == "ok":
            ready = ready + 1
    sleep(5)
    print("...")
print("All {} datasets ready!".format(ready))

# add files to history 
history = hc.create_history("{}".format(now_string))
print(history)

# create dataset collection
collection_description = {
    'collection_type': 'list',
    'element_identifiers': [],
    'name': 'manifest collection'
}
for f in files:
    element_identifier = {
        'id': f["id"],
        'name': f["name"],
        'src': 'ldda'}
    collection_description["element_identifiers"].append(element_identifier)

print(collection_description)

hc.create_dataset_collection(history["id"], collection_description)