Exemplo n.º 1
0
def get(datasets_identifiers, identifier_type='hid', history_id=None):
    """
        Given the history_id that is displayed to the user, this function will
        download the file[s] from the history and stores them under /import/
        Return value[s] are the path[s] to the dataset[s] stored under /import/
    """
    history_id = history_id or os.environ['HISTORY_ID']
    # The object version of bioblend is to slow in retrieving all datasets from a history
    # fallback to the non-object path
    gi = get_galaxy_connection(history_id=history_id, obj=False)
    for dataset_identifier in datasets_identifiers:
        file_path = '/import/%s' % dataset_identifier
        log.debug('Downloading gx=%s history=%s dataset=%s', gi, history_id, dataset_identifier)
        # Cache the file requests. E.g. in the example of someone doing something
        # silly like a get() for a Galaxy file in a for-loop, wouldn't want to
        # re-download every time and add that overhead.
        if not os.path.exists(file_path):
            hc = HistoryClient(gi)
            dc = DatasetClient(gi)
            history = hc.show_history(history_id, contents=True)
            datasets = {ds[identifier_type]: ds['id'] for ds in history}
            if identifier_type == 'hid':
                dataset_identifier = int(dataset_identifier)
            dc.download_dataset(datasets[dataset_identifier], file_path=file_path, use_default_filename=False)
        else:
            log.debug('Cached, not re-downloading')

    return file_path
Exemplo n.º 2
0
def get( dataset_id, history_id = None ):
    """
        Given the history_id that is displayed to the user, this function will
        download the file from the history and stores it under /import/
        Return value is the path to the dataset stored under /import/
    """
    conf = _get_conf()
    gi = get_galaxy_connection()
    hc = HistoryClient( gi )
    dc = DatasetClient( gi )

    file_path = '/import/%s' % dataset_id
    history_id = history_id or _get_history_id()

    # Cache the file requests. E.g. in the example of someone doing something
    # silly like a get() for a Galaxy file in a for-loop, wouldn't want to
    # re-download every time and add that overhead.
    if not os.path.exists(file_path):
        dataset_mapping = dict( [(dataset['hid'], dataset['id']) for dataset in hc.show_history( history_id, contents=True )] )
        try:
            hc.download_dataset( history_id, dataset_mapping[dataset_id], file_path, use_default_filename=False, to_ext=None )
        except:
            dc.download_dataset(dataset_mapping[dataset_id], file_path, use_default_filename=False)

    return file_path
Exemplo n.º 3
0
def get(dataset_id, history_id=None):
    """
        Given the history_id that is displayed to the user, this function will
        download the file from the history and stores it under /import/
        Return value is the path to the dataset stored under /import/
    """
    conf = _get_conf()
    gi = get_galaxy_connection()
    hc = HistoryClient(gi)
    dc = DatasetClient(gi)

    file_path = '/import/%s' % dataset_id
    history_id = history_id or _get_history_id()

    # Cache the file requests. E.g. in the example of someone doing something
    # silly like a get() for a Galaxy file in a for-loop, wouldn't want to
    # re-download every time and add that overhead.
    if not os.path.exists(file_path):
        dataset_mapping = dict([
            (dataset['hid'], dataset['id'])
            for dataset in hc.show_history(history_id, contents=True)
        ])
        try:
            hc.download_dataset(history_id,
                                dataset_mapping[dataset_id],
                                file_path,
                                use_default_filename=False,
                                to_ext=None)
        except:
            dc.download_dataset(dataset_mapping[dataset_id],
                                file_path,
                                use_default_filename=False)

    return file_path
Exemplo n.º 4
0
def get(dataset_id, history_id=None):
    """
        Given the history_id that is displayed to the user, this function will
        download the file from the history and stores it under /import/
        Return value is the path to the dataset stored under /import/
    """
    history_id = history_id or os.environ['HISTORY_ID']
    # The object version of bioblend is to slow in retrieving all datasets from a history
    # fallback to the non-object path
    gi = get_galaxy_connection(history_id=history_id, obj=False)
    file_path = '/import/%s' % dataset_id
    log.debug('Downloading gx=%s history=%s dataset=%s', gi, history_id,
              dataset_id)
    # Cache the file requests. E.g. in the example of someone doing something
    # silly like a get() for a Galaxy file in a for-loop, wouldn't want to
    # re-download every time and add that overhead.
    if not os.path.exists(file_path):
        hc = HistoryClient(gi)
        dc = DatasetClient(gi)
        history = hc.show_history(history_id, contents=True)
        datasets = {ds['hid']: ds['id'] for ds in history}
        dc.download_dataset(datasets[dataset_id],
                            file_path=file_path,
                            use_default_filename=False)
    else:
        log.debug('Cached, not re-downloading')

    return file_path
Exemplo n.º 5
0
def get_user_history (history_id=None):
    """
       Get all visible dataset infos of user history.
       Return a list of dict of each dataset.
    """ 
    history_id = history_id or os.environ['HISTORY_ID']
    gi = get_galaxy_connection(history_id=history_id, obj=False)
    hc = HistoryClient(gi)
    history = hc.show_history(history_id, visible=True, contents=True)
    return history
Exemplo n.º 6
0
def get_user_history(history_id=None):
    """
       Get all visible dataset infos of user history.
       Return a list of dict of each dataset.
    """
    history_id = history_id or os.environ['HISTORY_ID']
    gi = get_galaxy_connection(history_id=history_id, obj=False)
    hc = HistoryClient(gi)
    history = hc.show_history(history_id, visible=True, contents=True)
    return history
def get( dataset_id ):
    """
        Given the history_id that is displayed to the user, this function will
        download the file from the history and stores it under /import/
        Return value is the path to the dataset stored under /import/
    """
    conf = _get_conf()
    gi = get_galaxy_connection()
    hc = HistoryClient( gi )
    dc = DatasetClient( gi )

    file_path = '/import/%s' % dataset_id

    dataset_mapping = dict( [(dataset['hid'], dataset['id']) for dataset in hc.show_history(conf['history_id'], contents=True)] )
    try:
        hc.download_dataset(conf['history_id'], dataset_mapping[dataset_id], file_path, use_default_filename=False, to_ext=None)
    except:
        dc.download_dataset(dataset_mapping[dataset_id], file_path, use_default_filename=False)

    return file_path
Exemplo n.º 8
0
def get_workflow_status(user):
    # go through every galaxy instance
    gits = GalaxyInstanceTracking.objects.filter(
        galaxyuser__internal_user=user)
    dj_wfs = Workflow.objects.all()
    # loop through instances
    status = []
    for git in gits:
        ## loop through workflows for that instance
        gi, gu = get_gi_gu(user, git)
        wc = WorkflowClient(gi)
        hc = HistoryClient(gi)
        wfs = wc.get_workflows()
        for wf in wfs:
            wfd = wc.show_workflow(wf['id'])
            winvoke = wc.get_invocations(wf['id'])
            for wi in winvoke:
                wid = wc.show_invocation(wf['id'], wi['id'])
                h_l = hc.get_histories(wid['history_id'], deleted=True)

                if h_l:
                    h = h_l[0]
                else:
                    continue
                sd = get_status_d(wid)
                sd['name'] = wfd['name']
                hd = hc.show_history(h['id'])
                sd['history_name'] = h['name']
                datetime_object = datetime.strptime(hd['update_time'],
                                                    '%Y-%m-%dT%H:%M:%S.%f')
                # sd['history_url'] =  '{}{}'.format(git.url, hd['url'])

                sd['update_time'] = datetime_object.strftime(
                    '%Y-%m-%d %H:%M:%S')
                sd['update_time_unix'] = unixtime(datetime_object)
                sd['galaxy_instance'] = git.name
                status.append(sd)

    status = sorted(status, key=lambda k: k['update_time_unix'], reverse=True)

    return status
def get_history_status(user, hist_id=None):
    # go through every galaxy instance
    gits = GalaxyInstanceTracking.objects.filter(
        galaxyuser__internal_user=user)

    # loop through instances
    status = []
    for git in gits:
        ## loop through workflows for that instance
        gi, gu = get_gi_gu(user, git)
        hc = HistoryClient(gi)
        hists = hc.get_histories()

        # loop through and create a list of dictionaries for our django table
        for hist in hists:

            sd = {}
            # add useful info
            if hist_id and hist['id'] != hist_id:
                continue

            history_info = hc.show_history(hist['id'])

            # add status info
            sd_bioblend = hc.get_status(hist['id'])
            state_details = sd_bioblend['state_details']
            sd.update(state_details)

            sd['estimated_progress'] = sd_bioblend['percent_complete']
            datetime_object = datetime.strptime(history_info['update_time'],
                                                '%Y-%m-%dT%H:%M:%S.%f')
            sd['update_time'] = datetime_object.strftime('%Y-%m-%d %H:%M:%S')
            sd['update_time_unix'] = unixtime(datetime_object)
            sd['galaxy_instance'] = git.name

            sd['name'] = hist['name']

            hsq = History.objects.filter(galaxy_id=hist['id'],
                                         galaxyinstancetracking=git)

            if hsq:

                hs = hsq[0]
                hs.name = hist['name']
                hs.update_time = datetime_object.strftime('%Y-%m-%d %H:%M:%S')
                hs.empty = state_details['empty']
                hs.error = state_details['error']
                hs.failed_metadata = state_details['failed_metadata']
                hs.new = state_details['new']
                hs.ok = state_details['ok']
                hs.paused = state_details['paused']
                hs.running = state_details['running']
                hs.queued = state_details['queued']
                hs.setting_metadata = state_details['setting_metadata']
                hs.upload = state_details['upload']
                hs.estimated_progress = sd_bioblend['percent_complete']
            else:
                hs = History(
                    galaxyinstancetracking=git,
                    name=hist['name'],
                    update_time=datetime_object.strftime('%Y-%m-%d %H:%M:%S'),
                    empty=state_details['empty'],
                    error=state_details['error'],
                    failed_metadata=state_details['failed_metadata'],
                    new=state_details['new'],
                    ok=state_details['ok'],
                    paused=state_details['paused'],
                    running=state_details['running'],
                    queued=state_details['queued'],
                    setting_metadata=state_details['setting_metadata'],
                    upload=state_details['upload'],
                    galaxy_id=hist['id'],
                    estimated_progress=sd_bioblend['percent_complete'])

            hs.save()
            sd['history_data_bioblend_list'] = '/galaxy/history_data_bioblend_list/{}'.format(
                hs.pk)
            status.append(sd)

    status = sorted(status, key=lambda k: k['update_time_unix'], reverse=True)

    return status
Exemplo n.º 10
0
class GalaxyHandler:
    '''
    This class represents a Galaxy instance and provides functions to interact with that instance.
    '''
    def __init__(self, url, api_key, container_file=None, oci_bundle=False):
        self.url = url
        self.api_key = api_key
        self.container_file = container_file
        self.oci_bundle = oci_bundle

        # Bioblend GalaxyInstance
        self.instance = None
        # Bioblend Clients
        self.user_client = None
        self.config_client = None
        self.workflow_client = None
        self.tool_client = None
        self.toolshed_client = None
        self.library_client = None
        self.roles_client = None
        self.history_client = None
        self.dataset_client = None

    def start_container_galaxy(self, writable=False, binds=None):
        '''
        Run a containerized Galaxy instance.
        '''
        with open(os.devnull, 'w') as FNULL:
            if self.oci_bundle:
                subprocess.call([
                    "sh", "/galaxy/run.sh", "--log-file", "/output/paster.log",
                    "--pid-file", " /output/paster.pid", "--daemon"
                ],
                                stdout=FNULL,
                                stderr=subprocess.STDOUT)
            else:
                if writable:
                    subprocess.call([
                        "sudo", "singularity", "exec", "-w",
                        self.container_file, "sh", "/galaxy/run.sh", "--daemon"
                    ],
                                    stdout=FNULL,
                                    stderr=subprocess.STDOUT)
                elif binds:
                    subprocess.call([
                        "singularity", "exec", "--bind", binds,
                        self.container_file, "sh", "/galaxy/run.sh",
                        "--log-file", "/output/paster.log", "--pid-file",
                        " /output/paster.pid", "--daemon"
                    ],
                                    stdout=FNULL,
                                    stderr=subprocess.STDOUT)
                else:
                    subprocess.call([
                        "singularity", "exec", self.container_file, "sh",
                        "/galaxy/run.sh", "--daemon"
                    ],
                                    stdout=FNULL,
                                    stderr=subprocess.STDOUT)

            # Wait until the Galaxy instance is available but do not wait longer than 1 minute
            response = None
            t = 0
            while not response:
                try:
                    response = urllib.urlopen(
                        self.url).getcode()  # returns 200 if galaxy is up
                except:
                    if t > 60:
                        logger.error(
                            "Galaxy is not up after 1 minute. Something went wrong. Maybe the container is corrupted. Try to open a shell in writable mode in the container and start Galaxy from the shell"
                        )
                        exit(1)
                    else:
                        # Wait 5s until Galaxy is up
                        logger.info(
                            "Galaxy is not up ... wait 5 seconds and try again"
                        )
                        t = t + 5
                        time.sleep(5)
                        response = None
                        continue
            self.instance_running = True
        return

    def stop_container_galaxy(self, sudo=False, bind_dirs=None, tmp_dir=None):
        '''
        Stop a running containerized Galaxy instance.
        Remove an existing temporary directory
        '''
        with open(os.devnull, 'w') as FNULL:
            if self.oci_bundle:
                # No binds, no Singularity, just plain run.sh stop-daemon
                subprocess.call(["sh", "/galaxy/run.sh", "--stop-daemon"],
                                stdout=FNULL,
                                stderr=subprocess.STDOUT)
                self.instance_running = False
                time.sleep(5)
            else:
                if sudo:
                    # We use sudo only for importing workflows, so no binds.
                    subprocess.call([
                        "sudo", "singularity", "exec", "-w",
                        self.container_file, "sh", "/galaxy/run.sh",
                        "--stop-daemon"
                    ],
                                    stdout=FNULL,
                                    stderr=subprocess.STDOUT)
                    self.instance_running = False
                    time.sleep(5)
                else:
                    # We this only for workflow execution
                    subprocess.call([
                        "singularity", "exec", "--bind", bind_dirs,
                        self.container_file, "sh", "/galaxy/run.sh",
                        "--log-file", "/output/paster.log", "--pid-file",
                        " /output/paster.pid", "--stop-daemon"
                    ],
                                    stdout=FNULL,
                                    stderr=subprocess.STDOUT)
                    self.instance_running = False
                    time.sleep(5)

        # Remove temporary directories
        if tmp_dir:
            logger.info("Remove temporary directory: %s", tmp_dir)
            shutil.rmtree(tmp_dir)

        return

    def create_galaxy_instance(self, check_admin=False):
        '''
        Create a bioblend GalaxyInstance.
        If check_admin = True, check if the user is admin of the galaxy instance. If not, return None.
        Returns False if an error occurs.
        '''
        # Check if the URL is valid
        if not check_url(self.url):
            logger.error("URL to galaxy instance is not a valid URL: %s",
                         self.url)
            return False
        # Try to create a bioblend Galaxy instance
        try:
            self.instance = GalaxyInstance(url=self.url, key=self.api_key)
        except:
            logger.error("Cannot create Galaxy instance.")
            return False
        return True

    def create_clients(self):
        '''
        Create bioblend clients for the Galaxy instance.
        '''
        # Create first client and check if the API works
        self.config_client = ConfigClient(self.instance)
        try:
            self.config_client.get_version()
            self.config_client.get_config()
        except:
            logger.error("Provided API-key does not work.")
            return False
        try:
            self.user_client = UserClient(self.instance)
            self.workflow_client = WorkflowClient(self.instance)
            self.tool_client = ToolClient(self.instance)
            self.toolshed_client = ToolShedClient(self.instance)
            self.library_client = LibraryClient(self.instance)
            self.roles_client = RolesClient(self.instance)
            self.history_client = HistoryClient(self.instance)
            self.dataset_client = DatasetClient(self.instance)
        except:
            logger.error("Error initializing other bioblend clients.")
            return False
        return True

    def initialize(self):
        '''
        Initialize bioblend GalaxyInstance, clients, and check if the API works.
        Returns False if something went wrong.
        '''
        if not self.create_galaxy_instance():
            logger.error(
                "Cannot create bioblend GalaxyInstance for the GalaxyHandler")
            return False
        if not self.create_clients():
            logger.error(
                "Cannot create bioblend clients for the GalaxyHandler")
            return False
        return True

    def create_user(self, name, mail, password):
        '''
        Create a new Galaxy user for an specific Galaxy instance.
        Return the user_id and an api-key.
        '''
        try:
            new_user = self.user_client.create_local_user(name, mail, password)
        except ConnectionError as e:
            # User already exists
            if "already exists" in e.body:
                new_user = self.user_client.get_users(f_email=mail)[0]
        new_user_id = new_user['id']

        # Create API key for that user
        new_user_api_key = self.user_client.create_user_apikey(new_user_id)

        return (new_user_id, new_user_api_key)

    def create_input_library(self, name, user):
        '''
        Create a dataset library for this instance.
        '''
        try:
            # Create the library
            new_library = self.library_client.create_library(name,
                                                             description=None,
                                                             synopsis=None)
            logger.info("new_library ok")
            # Get the role of the user
            user_role_id = self.roles_client.get_roles()[0]['id']
            logger.info("user_role_id ok")
            # Set permissions for that library
            # The following settings will enable the upload of input data by the user to this libary
            self.library_client.set_library_permissions(
                library_id=new_library['id'],
                access_in=user_role_id,
                modify_in=user_role_id,
                add_in=user_role_id,
                manage_in=user_role_id)
            return True
        except:
            logger.error("Cannot create Galaxy data library")
            return False

    def create_history(self, name):
        '''
        Create a history and return the history id
        '''
        history_dict = self.history_client.create_history(name)
        return history_dict['id']

    def create_folder(self, library_name, user_mail):
        '''
        Create a folder for the files in a library.
        This is used to store files for the a Galaxy library.
        Return a tuple containing the library id and the folder id.
        '''
        # Assume that there is just one library with this name
        library = self.library_client.get_libraries(library_id=None,
                                                    name=library_name,
                                                    deleted=False)[0]
        folder = self.library_client.create_folder(library['id'], user_mail)
        return library['id'], folder[0]['id']

    def upload_workflow_input(self,
                              workflow_input,
                              library_id,
                              folder_id,
                              mount_input_dir=True,
                              input_dir=None):
        '''
        Upload the input data for a workflow to Galaxy.
        The files are uploaded from the filesystem to a folder of an Galaxy library.
        The files are not duplicated, because just symbolic links will be created.
        If a user provides his own data, the files are 'uploaded' from the /input directory,
        which is just a mount point for a directory outside the container.
        If a user wants to use test data provided with the container, mount_input_dir is False
        and the directory inside the container has to be specified.
        '''
        for step_uuid, step_param in workflow_input.iteritems():
            if step_param['step_type'] == 'data_input':
                if mount_input_dir:
                    # Input data is mounted in the container
                    path = os.path.join('/input', step_param['filename'])
                else:
                    # input_dir exists inside the container (e.g. workflow test data)
                    path = os.path.join(input_dir, step_param['filename'])
                logger.info("Next upload: " + path)
                workflow_input[step_uuid][
                    'dataset_id'] = self.library_client.upload_from_galaxy_filesystem(
                        library_id,
                        path,
                        folder_id=folder_id,
                        file_type=step_param['galaxy_file_type'],
                        link_data_only='link_to_files')

    def export_output_history(self, history_id, output_dir):
        '''
        Export all datasets of a history to the output directory.
        '''
        # Get a list of all datasets in the output history
        history_datasets = self.history_client.show_history(history_id,
                                                            contents=True,
                                                            deleted=None,
                                                            visible=None,
                                                            details=None,
                                                            types=None)

        # Iterate over the datasets of the history and download each dataset that has 'ok' state (e.g. the tool completed)
        for dataset in history_datasets:
            # Check the dataset status, e.g. if the corresponding task completed. Do not download input datasets!
            if dataset['state'] == 'ok':
                logger.info("Download dataset %s, state: %s", dataset['name'],
                            dataset['state'])
                self.dataset_client.download_dataset(dataset['id'],
                                                     file_path=output_dir,
                                                     use_default_filename=True,
                                                     wait_for_completion=False,
                                                     maxwait=12000)
            else:
                logger.info("Do not download dataset %s, state: %s",
                            dataset['name'], dataset['state'])
Exemplo n.º 11
0
#!/usr/bin/env python
import os
import shutil

import galaxy_ie_helpers

from bioblend.galaxy.histories import HistoryClient

hid = os.environ.get('DATASET_HID', None)
history_id = os.environ['HISTORY_ID']
if hid not in ('None', None):
    galaxy_ie_helpers.get(int(hid))
    shutil.copy('/import/%s' % hid, '/import/ipython_galaxy_notebook.ipynb')

additional_ids = os.environ.get("ADDITIONAL_IDS", "")
if additional_ids:
    gi = galaxy_ie_helpers.get_galaxy_connection(history_id=history_id,
                                                 obj=False)
    hc = HistoryClient(gi)
    history = hc.show_history(history_id, contents=True)
    additional_ids = additional_ids.split(",")
    for hda in history:
        if hda["id"] in additional_ids:
            galaxy_ie_helpers.get(int(hda["hid"]))
Exemplo n.º 12
0
def get(datasets_identifiers,
        identifier_type='hid',
        history_id=None,
        retrieve_datatype=None):
    """
        Given the history_id that is displayed to the user, this function will
        either search for matching files in the history if the identifier_type
        is set to 'regex', otherwise it will directly download the file[s] from
        the history and stores them under /import/.
        Return value[s] are the path[s] to the dataset[s] stored under /import/
    """
    history_id = history_id or os.environ['HISTORY_ID']
    # The object version of bioblend is to slow in retrieving all datasets from a history
    # fallback to the non-object path
    gi = get_galaxy_connection(history_id=history_id, obj=False)
    file_path_all = []
    datatypes_all = []

    if type(datasets_identifiers) is not list:
        datasets_identifiers = [datasets_identifiers]

    if identifier_type == "regex":
        datasets_identifiers = find_matching_history_ids(datasets_identifiers)
        identifier_type = "hid"

    for dataset_id in datasets_identifiers:
        file_path = '/import/%s' % dataset_id
        log.debug('Downloading gx=%s history=%s dataset=%s', gi, history_id,
                  dataset_id)
        # Cache the file requests. E.g. in the example of someone doing something
        # silly like a get() for a Galaxy file in a for-loop, wouldn't want to
        # re-download every time and add that overhead.
        if not os.path.exists(file_path):
            hc = HistoryClient(gi)
            dc = DatasetClient(gi)
            history = hc.show_history(history_id, contents=True)
            datasets = {ds[identifier_type]: ds['id'] for ds in history}
            if retrieve_datatype:
                datatypes_all.append(
                    {ds[identifier_type]: ds['extension']
                     for ds in history})
            if identifier_type == 'hid':
                dataset_id = int(dataset_id)
            dc.download_dataset(datasets[dataset_id],
                                file_path=file_path,
                                use_default_filename=False)
        else:
            hc = HistoryClient(gi)
            dc = DatasetClient(gi)
            history = hc.show_history(history_id, contents=True)
            datatypes_all.append(
                {ds[identifier_type]: ds['extension']
                 for ds in history})
            log.debug('Cached, not re-downloading')

        file_path_all.append(file_path)

    ## First path if only one item given, otherwise all paths.
    ## Should not break compatibility.
    if retrieve_datatype:
        if len(file_path_all) == 1:
            dataset_number = int(file_path_all[0].strip().split("/")[-1])
            return file_path_all, datatypes_all[0][dataset_number]
        else:
            datatype_multi = dict()
            for i in file_path_all:
                dataset_number = int(i.strip().split("/")[-1])
                datatype_multi[dataset_number] = datatypes_all[0][
                    dataset_number]
            return file_path_all, datatype_multi
    else:
        return file_path_all[0] if len(file_path_all) == 1 else file_path_all