Пример #1
0
 def decorator( self, trans, *args, **kwargs ):
     def error( environ, start_response ):
         start_response( error_status, [('Content-type', 'text/plain')] )
         return error_message
     error_status = '403 Forbidden'
     if trans.error_message:
         return trans.error_message
     if user_required and trans.anonymous:
         error_message = "API Authentication Required for this request"
         return error
     if trans.request.body:
         try:
             kwargs['payload'] = __extract_payload_from_request(trans, func, kwargs)
         except ValueError:
             error_status = '400 Bad Request'
             error_message = 'Your request did not appear to be valid JSON, please consult the API documentation'
             return error
     trans.response.set_content_type( "application/json" )
     # send 'do not cache' headers to handle IE's caching of ajax get responses
     trans.response.headers[ 'Cache-Control' ] = "max-age=0,no-cache,no-store"
     # Perform api_run_as processing, possibly changing identity
     if 'payload' in kwargs and 'run_as' in kwargs['payload']:
         if not trans.user_can_do_run_as():
             error_message = 'User does not have permissions to run jobs as another user'
             return error
         try:
             decoded_user_id = trans.security.decode_id( kwargs['payload']['run_as'] )
         except TypeError:
             trans.response.status = 400
             return "Malformed user id ( %s ) specified, unable to decode." % str( kwargs['payload']['run_as'] )
         try:
             user = trans.sa_session.query( trans.app.model.User ).get( decoded_user_id )
             trans.api_inherit_admin = trans.user_is_admin()
             trans.set_user(user)
         except:
             trans.response.status = 400
             return "That user does not exist."
     try:
         rval = func( self, trans, *args, **kwargs)
         if to_json and trans.debug:
             rval = dumps( rval, indent=4, sort_keys=True )
         elif to_json:
             rval = dumps( rval )
         return rval
     except paste.httpexceptions.HTTPException:
         raise  # handled
     except:
         log.exception( 'Uncaught exception in exposed API method:' )
         raise paste.httpexceptions.HTTPServerError()
def add_sequencer( sequencer_index, sequencer_form_definition_id, sequencer_info ):
    '''Adds a new sequencer to the sequencer table along with its form values.'''
    # Create a new form values record with the supplied sequencer information
    values = dumps( { 'field_0': sequencer_info.get( 'host', '' ),
                               'field_1': sequencer_info.get( 'username', '' ),
                               'field_2': sequencer_info.get( 'password', '' ),
                               'field_3': sequencer_info.get( 'data_dir', '' ),
                               'field_4': sequencer_info.get( 'rename_dataset', '' ) } )
    cmd = "INSERT INTO form_values VALUES ( %s, %s, %s, %s, '%s' )" % ( nextval( 'form_values' ),
                                                                        localtimestamp(),
                                                                        localtimestamp(),
                                                                        sequencer_form_definition_id,
                                                                        values )
    migrate_engine.execute(cmd)
    sequencer_form_values_id = get_latest_id( 'form_values' )
    # Create a new sequencer record with reference to the form value created above.
    name = 'Sequencer_%i' % sequencer_index
    desc = ''
    version = ''
    result_datasets = dict()
    sequencer_type_id = 'simple_unknown_sequencer'
    cmd = "INSERT INTO sequencer VALUES ( %s, %s, %s, '%s', '%s', '%s', '%s', %s, %s, %s )"
    cmd = cmd % ( nextval('sequencer'),
                  localtimestamp(),
                  localtimestamp(),
                  name,
                  desc,
                  sequencer_type_id,
                  version,
                  sequencer_form_definition_id,
                  sequencer_form_values_id,
                  boolean( 'false' ) )
    migrate_engine.execute(cmd)
    return get_latest_id( 'sequencer' )
Пример #3
0
def create_job( trans, params, tool, json_file_path, data_list, folder=None, history=None ):
    """
    Create the upload job.
    """
    job = trans.app.model.Job()
    galaxy_session = trans.get_galaxy_session()
    if type( galaxy_session ) == trans.model.GalaxySession:
        job.session_id = galaxy_session.id
    if trans.user is not None:
        job.user_id = trans.user.id
    if folder:
        job.library_folder_id = folder.id
    else:
        if not history:
            history = trans.history
        job.history_id = history.id
    job.tool_id = tool.id
    job.tool_version = tool.version
    job.state = job.states.UPLOAD
    trans.sa_session.add( job )
    trans.sa_session.flush()
    log.info( 'tool %s created job id %d' % ( tool.id, job.id ) )
    trans.log_event( 'created job id %d' % job.id, tool_id=tool.id )

    for name, value in tool.params_to_strings( params, trans.app ).iteritems():
        job.add_parameter( name, value )
    job.add_parameter( 'paramfile', dumps( json_file_path ) )
    object_store_id = None
    for i, dataset in enumerate( data_list ):
        if folder:
            job.add_output_library_dataset( 'output%i' % i, dataset )
        else:
            job.add_output_dataset( 'output%i' % i, dataset )
        # Create an empty file immediately
        if not dataset.dataset.external_filename:
            dataset.dataset.object_store_id = object_store_id
            try:
                trans.app.object_store.create( dataset.dataset )
            except ObjectInvalid:
                raise Exception('Unable to create output dataset: object store is full')
            object_store_id = dataset.dataset.object_store_id
            trans.sa_session.add( dataset )
            # open( dataset.file_name, "w" ).close()
    job.object_store_id = object_store_id
    job.state = job.states.NEW
    job.set_handler(tool.get_job_handler(None))
    trans.sa_session.add( job )
    trans.sa_session.flush()

    # Queue the job for execution
    trans.app.job_queue.put( job.id, job.tool_id )
    trans.log_event( "Added job to the job queue, id: %s" % str(job.id), tool_id=job.tool_id )
    output = odict()
    for i, v in enumerate( data_list ):
        output[ 'output%i' % i ] = v
    return job, output
Пример #4
0
 def get_account_info(self, trans, key_id, secret):
     """
     Get EC2 Account Info
     """
     account_info = {}
     cml = cloudman.launch.CloudManLauncher(key_id, secret)
     ec2_conn = cml.connect_ec2(key_id, secret)
     kps = ec2_conn.get_all_key_pairs()
     account_info['clusters'] = cml.get_clusters_pd()
     account_info['keypairs'] = [akp.name for akp in kps]
     return dumps(account_info)
Пример #5
0
def job_param_filter(view, left, operator, right):
    view.do_query = True
    alias = aliased( JobParameter )
    param_name = re.sub(r'^param.', '', left)
    view.query = view.query.filter(
        and_(
            Job.id == alias.job_id,
            alias.name == param_name,
            alias.value == dumps(right)
        )
    )
Пример #6
0
 def handle( self ):
     request = self.request.recv( 8192 )
     response = {}
     valid, request, response = json.validate_jsonrpc_request( request, ( 'get_state', ), () )
     if valid:
         self.request.send( json.dumps( json.jsonrpc_response( request=request, result=self.server.state_result.result ) ) )
     else:
         error_msg = 'Unable to serve request: %s' % response['error']['message']
         if 'data' in response['error']:
             error_msg += ': %s' % response['error']['data']
         log.error( error_msg )
         log.debug( 'Original request was: %s' % request )
Пример #7
0
def file_err( msg, dataset, json_file ):
    json_file.write( dumps( dict( type='dataset',
                                  ext='data',
                                  dataset_id=dataset.dataset_id,
                                  stderr=msg ) ) + "\n" )
    # never remove a server-side upload
    if dataset.type in ( 'server_dir', 'path_paste' ):
        return
    try:
        os.remove( dataset.path )
    except:
        pass
Пример #8
0
def create_archive( history_attrs_file, datasets_attrs_file, jobs_attrs_file, out_file, gzip=False ):
    """ Create archive from the given attribute/metadata files and save it to out_file. """
    tarfile_mode = "w"
    if gzip:
        tarfile_mode += ":gz"
    try:

        history_archive = tarfile.open( out_file, tarfile_mode )

        # Read datasets attributes from file.
        datasets_attr_in = open( datasets_attrs_file, 'rb' )
        datasets_attr_str = ''
        buffsize = 1048576
        try:
            while True:
                datasets_attr_str += datasets_attr_in.read( buffsize )
                if not datasets_attr_str or len( datasets_attr_str ) % buffsize != 0:
                    break
        except OverflowError:
            pass
        datasets_attr_in.close()
        datasets_attrs = loads( datasets_attr_str )

        # Add datasets to archive and update dataset attributes.
        # TODO: security check to ensure that files added are in Galaxy dataset directory?
        for dataset_attrs in datasets_attrs:
            if dataset_attrs['exported']:
                dataset_file_name = dataset_attrs[ 'file_name' ]  # Full file name.
                dataset_archive_name = os.path.join( 'datasets',
                                                     get_dataset_filename( dataset_attrs[ 'name' ], dataset_attrs[ 'extension' ] ) )
                history_archive.add( dataset_file_name, arcname=dataset_archive_name )
                # Update dataset filename to be archive name.
                dataset_attrs[ 'file_name' ] = dataset_archive_name

        # Rewrite dataset attributes file.
        datasets_attrs_out = open( datasets_attrs_file, 'w' )
        datasets_attrs_out.write( dumps( datasets_attrs ) )
        datasets_attrs_out.close()

        # Finish archive.
        history_archive.add( history_attrs_file, arcname="history_attrs.txt" )
        history_archive.add( datasets_attrs_file, arcname="datasets_attrs.txt" )
        if os.path.exists( datasets_attrs_file + ".provenance" ):
            history_archive.add( datasets_attrs_file + ".provenance", arcname="datasets_attrs.txt.provenance" )
        history_archive.add( jobs_attrs_file, arcname="jobs_attrs.txt" )
        history_archive.close()

        # Status.
        return 'Created history archive.'
    except Exception, e:
        return 'Error creating history archive: %s' % str( e ), sys.stderr
Пример #9
0
def params_to_strings(params, param_values, app):
    """
    Convert a dictionary of parameter values to a dictionary of strings
    suitable for persisting. The `value_to_basic` method of each parameter
    is called to convert its value to basic types, the result of which
    is then json encoded (this allowing complex nested parameters and
    such).
    """
    rval = dict()
    for key, value in param_values.iteritems():
        if key in params:
            value = params[key].value_to_basic(value, app)
        rval[key] = str(dumps(value))
    return rval
Пример #10
0
 def get_chunk(self, trans, dataset, chunk):
     ck_index = int(chunk)
     f = open(dataset.file_name)
     f.seek(ck_index * trans.app.config.display_chunk_size)
     # If we aren't at the start of the file, seek to next newline.  Do this better eventually.
     if f.tell() != 0:
         cursor = f.read(1)
         while cursor and cursor != '\n':
             cursor = f.read(1)
     ck_data = f.read(trans.app.config.display_chunk_size)
     cursor = f.read(1)
     while cursor and ck_data[-1] != '\n':
         ck_data += cursor
         cursor = f.read(1)
     return dumps( { 'ck_data': util.unicodify( ck_data ), 'ck_index': ck_index + 1 } )
Пример #11
0
 def get_account_info(self, trans, key_id, secret):
     """
     Get EC2 Account Info
     """
     try:
         account_info = {}
         cml = cloudman.launch.CloudManLauncher(key_id, secret)
         ec2_conn = cml.connect_ec2(key_id, secret)
         kps = ec2_conn.get_all_key_pairs()
         account_info['clusters'] = cml.get_clusters_pd()
         account_info['keypairs'] = [akp.name for akp in kps]
         return dumps(account_info)
     except EC2ResponseError as e:
         trans.response.status = 400
         return e.message
Пример #12
0
 def launch_instance(self, trans, cluster_name, password, key_id, secret,
                     instance_type, share_string, keypair, ami=None,
                     zone=None, bucket_default=None, **kwargs):
     ami = ami or trans.app.config.cloudlaunch_default_ami
     cfg = cloudman.CloudManConfig(key_id, secret, cluster_name, ami,
                                   instance_type, password, placement=zone)
     cml = cloudman.launch.CloudManLauncher(key_id, secret)
     # This should probably be handled better on the bioblend side, but until
     # an egg update can be made, this needs to conditionally include the
     # parameter or not, even if the value is None.
     if bucket_default:
         result = cml.launch(cluster_name, ami, instance_type, password,
                             cfg.kernel_id, cfg.ramdisk_id, cfg.key_name,
                             cfg.security_groups, cfg.placement,
                             bucket_default=bucket_default)
     else:
         result = cml.launch(cluster_name, ami, instance_type, password,
                             cfg.kernel_id, cfg.ramdisk_id, cfg.key_name,
                             cfg.security_groups, cfg.placement)
     # result is a dict with sg_names, kp_name, kp_material, rs, and instance_id
     if not result['rs']:
         trans.response.status = 400
         return "Instance failure, but no specific error was detected.  Please check your AWS Console."
     instance = result['rs'].instances[0]
     while not instance.public_dns_name:
         try:
             instance.update()
         except EC2ResponseError:
             # This can happen when update is invoked before the instance is fully registered.
             pass
         time.sleep(1)
     if result['kp_material']:
         # We have created a keypair.  Save to tempfile for one time retrieval.
         (fd, fname) = tempfile.mkstemp(prefix=PKEY_PREFIX, dir=trans.app.config.new_file_path)
         f = os.fdopen(fd, 'wt')
         f.write(result['kp_material'])
         f.close()
         kp_material_tag = fname[fname.rfind(PKEY_PREFIX) + len(PKEY_PREFIX):]
     else:
         kp_material_tag = None
     return dumps({'cluster_name': cluster_name,
                            'instance_id': result['rs'].instances[0].id,
                            'image_id': result['rs'].instances[0].image_id,
                            'public_dns_name': result['rs'].instances[0].public_dns_name,
                            'kp_name': result['kp_name'],
                            'kp_material_tag': kp_material_tag})
Пример #13
0
def __api_error_response( trans, **kwds ):
    error_dict = __api_error_message( trans, **kwds )
    exception = kwds.get( "exception", None )
    # If we are given an status code directly - use it - otherwise check
    # the exception for a status_code attribute.
    if "status_code" in kwds:
        status_code = int( kwds.get( "status_code" ) )
    elif hasattr( exception, "status_code" ):
        status_code = int( exception.status_code )
    else:
        status_code = 500
    response = trans.response
    if not response.status or str(response.status).startswith("20"):
        # Unset status code appears to be string '200 OK', if anything
        # non-success (i.e. not 200 or 201) has been set, do not override
        # underlying controller.
        response.status = status_code
    return dumps( error_dict )
Пример #14
0
 def handle_incoming(cls, incoming):
     npd = {}
     for key, val in incoming.iteritems():
         if key.startswith("pja"):
             sp = key.split("__")
             ao_key = sp[2] + sp[1]
             # flag / output_name / pjatype / desc
             if ao_key not in npd:
                 npd[ao_key] = {"action_type": sp[2], "output_name": sp[1], "action_arguments": {}}
             if len(sp) > 3:
                 if sp[3] == "output_name":
                     npd[ao_key]["output_name"] = val
                 else:
                     npd[ao_key]["action_arguments"][sp[3]] = val
         else:
             # Not pja stuff.
             pass
     return dumps(npd)
Пример #15
0
 def handle_incoming(cls, incoming):
     npd = {}
     for key, val in incoming.iteritems():
         if key.startswith('pja'):
             sp = key.split('__')
             ao_key = sp[2] + sp[1]
             # flag / output_name / pjatype / desc
             if ao_key not in npd:
                 npd[ao_key] = {'action_type': sp[2],
                                'output_name': sp[1],
                                'action_arguments': {}}
             if len(sp) > 3:
                 if sp[3] == 'output_name':
                     npd[ao_key]['output_name'] = val
                 else:
                     npd[ao_key]['action_arguments'][sp[3]] = val
         else:
             # Not pja stuff.
             pass
     return dumps(npd)
Пример #16
0
 def get_html( self, prefix="", disabled=False ):
     primary_field = self.primary_field
     html = '<div class="switch-option">'
     html += primary_field.get_html( prefix=prefix, disabled=disabled )
     html += '<input name="__switch_default__" type="hidden" value="%s" />' % self.default_field
     options = []
     for name, delegate_field in self.delegate_fields.items():
         field = escape( dumps( delegate_field.to_dict() ) )
         option = " '%s': %s" % ( name, field )
         options.append( option )
     html += '<script>$(document).ready( function() {\nvar switchOptions = {\n'
     html += ','.join( options )
     html += '};\n'
     html += 'if ( window.enhanced_galaxy_tools ) {\n'
     html += 'require( [ "galaxy.tools" ], function( mod_tools ) { new mod_tools.SwitchSelectView({\n'
     html += 'el: $(\'[name="%s%s"]\').closest( "div.switch-option" ),' % ( prefix, primary_field.name )
     html += 'default_option: "%s",\n' % self.default_field
     html += 'prefix: "%s",\n' % prefix
     html += 'switch_options: switchOptions }); } )\n'
     html += "}"
     html += '});\n</script></div>'
     return html
Пример #17
0
 def get_state(self, transfer_jobs, via_socket=False):
     transfer_jobs = listify(transfer_jobs)
     rval = []
     for tj in transfer_jobs:
         if via_socket and tj.state not in tj.terminal_states and tj.socket:
             try:
                 request = json.jsonrpc_request(method="get_state", id=True)
                 sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
                 sock.settimeout(5)
                 sock.connect(("localhost", tj.socket))
                 sock.send(json.dumps(request))
                 response = sock.recv(8192)
                 valid, response = json.validate_jsonrpc_response(response, id=request["id"])
                 if not valid:
                     # No valid response received, make some pseudo-json-rpc
                     raise Exception(
                         dict(code=128, message="Did not receive valid response from transfer daemon for state")
                     )
                 if "error" in response:
                     # Response was valid but Request resulted in an error
                     raise Exception(response["error"])
                 else:
                     # Request was valid
                     response["result"]["transfer_job_id"] = tj.id
                     rval.append(response["result"])
             except Exception, e:
                 # State checking via the transfer daemon failed, just
                 # return the state from the database instead.  Callers can
                 # look for the 'error' member of the response to see why
                 # the check failed.
                 self.sa_session.refresh(tj)
                 error = e.args
                 if type(error) != dict:
                     error = dict(code=256, message="Error connecting to transfer daemon", data=str(e))
                 rval.append(dict(transfer_job_id=tj.id, state=tj.state, error=error))
         else:
             self.sa_session.refresh(tj)
             rval.append(dict(transfer_job_id=tj.id, state=tj.state))
Пример #18
0
 def get_chunk(self, trans, dataset, chunk):
     ck_index = int(chunk)
     f = open(dataset.file_name)
     f.seek(ck_index * trans.app.config.display_chunk_size)
     # If we aren't at the start of the file, seek to next newline.  Do this better eventually.
     if f.tell() != 0:
         cursor = f.read(1)
         while cursor and cursor != '\n':
             cursor = f.read(1)
     ck_data = f.read(trans.app.config.display_chunk_size)
     cursor = f.read(1)
     while cursor and ck_data[-1] != '\n':
         ck_data += cursor
         cursor = f.read(1)
     
     # The ConnectivityTable format has several derivatives of which one is delimited by (multiple) spaces.
     # By converting these spaces back to tabs, chucks can still be interpreted by tab delimited file parsers
     ck_data_header, ck_data_body = ck_data.split('\n', 1)
     ck_data_header = re.sub('^([0-9]+)[ ]+',r'\1\t',ck_data_header)
     ck_data_body = re.sub('\n[ \t]+','\n',ck_data_body)
     ck_data_body = re.sub('[ ]+','\t',ck_data_body)
     
     return dumps( { 'ck_data': util.unicodify(ck_data_header + "\n" + ck_data_body ), 'ck_index': ck_index + 1 } )
    def load( self, trans, **kwd ):
        """
        Load dataset from the given source into the library.

        :param  encoded_folder_id:      the encoded id of the folder to import dataset to
        :type   encoded_folder_id:      an encoded id string
        :param  source:                 source of the dataset to be loaded
        :type   source:                 str
        :param  link_data:              flag whether to link the dataset to data or copy it to Galaxy
        :type   link_data:              bool
        :param  preserve_dirs:          flag whether to preserver directory structure when importing dir
        :type   preserve_dirs:          bool
        """

        kwd[ 'space_to_tab' ] = 'False'
        kwd[ 'to_posix_lines' ] = 'True'
        
        kwd[ 'dbkey' ] = kwd.get( 'dbkey', '?' )
        kwd[ 'file_type' ] = kwd.get( 'file_type', 'auto' )
        kwd[' link_data_only' ] = 'link_to_files' if util.string_as_bool( kwd.get( 'link_data', False ) ) else 'copy_files'
        encoded_folder_id = kwd.get( 'encoded_folder_id', None )
        if encoded_folder_id is not None:
            folder_id = self.folder_manager.cut_and_decode( trans, encoded_folder_id )
        else:
            raise exceptions.RequestParameterMissingException( 'The required atribute encoded_folder_id is missing.' )
        path = kwd.get( 'path', None)
        if path is None:
            raise exceptions.RequestParameterMissingException( 'The required atribute path is missing.' )
        folder = self.folder_manager.get( trans, folder_id )

        source = kwd.get( 'source', None )
        if source not in [ 'userdir_file', 'userdir_folder', 'admin_path' ]:
            raise exceptions.RequestParameterMissingException( 'You have to specify "source" parameter. Possible values are "userdir_file", "userdir_folder" and "admin_path". ')

        if source in [ 'userdir_file', 'userdir_folder' ]:
            user_login = trans.user.email
            user_base_dir = trans.app.config.user_library_import_dir
            if user_base_dir is None:
                raise exceptions.ConfigDoesNotAllowException( 'The configuration of this Galaxy instance does not allow upload from user directories.' )
            full_dir = os.path.join( user_base_dir, user_login )
            # path_to_root_import_folder = None
            if not path.lower().startswith( full_dir.lower() ):
                # path_to_root_import_folder = path
                path = os.path.join( full_dir, path )
            if not os.path.exists( path ):
                raise exceptions.RequestParameterInvalidException( 'Given path does not exist on the host.' )
            if not self.folder_manager.can_add_item( trans, folder ):
                raise exceptions.InsufficientPermissionsException( 'You do not have proper permission to add items to the given folder.' )
        if source == 'admin_path':
            if not trans.app.config.allow_library_path_paste:
                raise exceptions.ConfigDoesNotAllowException( 'The configuration of this Galaxy instance does not allow admins to import into library from path.' )
            if not trans.user_is_admin:
                raise exceptions.AdminRequiredException( 'Only admins can import from path.' )

        # Set up the traditional tool state/params
        tool_id = 'upload1'
        tool = trans.app.toolbox.get_tool( tool_id )
        state = tool.new_state( trans )
        tool.update_state( trans, tool.inputs_by_page[ 0 ], state.inputs, kwd )
        tool_params = state.inputs
        dataset_upload_inputs = []
        for input_name, input in tool.inputs.iteritems():
            if input.type == "upload_dataset":
                dataset_upload_inputs.append( input )
        library_bunch = upload_common.handle_library_params( trans, {}, trans.security.encode_id( folder.id ) )
        abspath_datasets = []
        kwd[ 'filesystem_paths' ] = path
        params = util.Params( kwd )
        # user wants to import one file only
        if source == "userdir_file":
            file = os.path.abspath( path )
            abspath_datasets.append( trans.webapp.controllers[ 'library_common' ].make_library_uploaded_dataset(
                trans, 'api', params, os.path.basename( file ), file, 'server_dir', library_bunch ) )
        # user wants to import whole folder
        if source == "userdir_folder":
            uploaded_datasets_bunch = trans.webapp.controllers[ 'library_common' ].get_path_paste_uploaded_datasets(
                trans, 'api', params, library_bunch, 200, '' )
            uploaded_datasets = uploaded_datasets_bunch[0]
            if uploaded_datasets is None:
                raise exceptions.ObjectNotFound( 'Given folder does not contain any datasets.' )
            for ud in uploaded_datasets:
                ud.path = os.path.abspath( ud.path )
                abspath_datasets.append( ud )
        #  user wants to import from path (admins only)
        if source == "admin_path":
            # validate the path is within root
            uploaded_datasets_bunch = trans.webapp.controllers[ 'library_common' ].get_path_paste_uploaded_datasets(
                trans, 'api', params, library_bunch, 200, '' )
            uploaded_datasets = uploaded_datasets_bunch[0]
            if uploaded_datasets is None:
                raise exceptions.ObjectNotFound( 'Given folder does not contain any datasets.' )
            for ud in uploaded_datasets:
                ud.path = os.path.abspath( ud.path )
                abspath_datasets.append( ud )
        json_file_path = upload_common.create_paramfile( trans, abspath_datasets )
        data_list = [ ud.data for ud in abspath_datasets ]
        job, output = upload_common.create_job( trans, tool_params, tool, json_file_path, data_list, folder=folder )
        # HACK: Prevent outputs_to_working_directory from overwriting inputs when "linking"
        job.add_parameter( 'link_data_only', dumps( kwd.get( 'link_data_only', 'copy_files' ) ) )
        job.add_parameter( 'uuid', dumps( kwd.get( 'uuid', None ) ) )
        trans.sa_session.add( job )
        trans.sa_session.flush()
        job_dict = job.to_dict()
        job_dict[ 'id' ] = trans.security.encode_id( job_dict[ 'id' ] )
        return job_dict
    def load( self, trans, payload=None, **kwd ):
        """
        * POST /api/libraries/datasets
        Load dataset from the given source into the library.
        Source can be:
            user directory - root folder specified in galaxy.ini as "$user_library_import_dir"
                example path: path/to/galaxy/$user_library_import_dir/[email protected]/{user can browse everything here}
                the folder with the user login has to be created beforehand
            (admin)import directory - root folder specified in galaxy ini as "$library_import_dir"
                example path: path/to/galaxy/$library_import_dir/{admin can browse everything here}
            (admin)any absolute or relative path - option allowed with "allow_library_path_paste" in galaxy.ini

        :param   payload: dictionary structure containing:
            :param  encoded_folder_id:      the encoded id of the folder to import dataset(s) to
            :type   encoded_folder_id:      an encoded id string
            :param  source:                 source the datasets should be loaded from
            :type   source:                 str
            :param  link_data:              flag whether to link the dataset to data or copy it to Galaxy, defaults to copy
                                            while linking is set to True all symlinks will be resolved _once_
            :type   link_data:              bool
            :param  preserve_dirs:          flag whether to preserve the directory structure when importing dir
                                            if False only datasets will be imported
            :type   preserve_dirs:          bool
            :param  file_type:              file type of the loaded datasets, defaults to 'auto' (autodetect)
            :type   file_type:              str
            :param  dbkey:                  dbkey of the loaded genome, defaults to '?' (unknown)
            :type   dbkey:                  str
        :type   dictionary
        :returns:   dict containing information about the created upload job
        :rtype:     dictionary
        :raises: RequestParameterMissingException, AdminRequiredException, ConfigDoesNotAllowException, RequestParameterInvalidException
                    InsufficientPermissionsException, ObjectNotFound
        """
        if payload:
            kwd.update(payload)
        kwd['space_to_tab'] = False
        kwd['to_posix_lines'] = True
        kwd[ 'dbkey' ] = kwd.get( 'dbkey', '?' )
        kwd[ 'file_type' ] = kwd.get( 'file_type', 'auto' )
        kwd['link_data_only'] = 'link_to_files' if util.string_as_bool( kwd.get( 'link_data', False ) ) else 'copy_files'
        encoded_folder_id = kwd.get( 'encoded_folder_id', None )
        if encoded_folder_id is not None:
            folder_id = self.folder_manager.cut_and_decode( trans, encoded_folder_id )
        else:
            raise exceptions.RequestParameterMissingException( 'The required atribute encoded_folder_id is missing.' )
        path = kwd.get( 'path', None)
        if path is None:
            raise exceptions.RequestParameterMissingException( 'The required atribute path is missing.' )
        folder = self.folder_manager.get( trans, folder_id )

        source = kwd.get( 'source', None )
        if source not in [ 'userdir_file', 'userdir_folder', 'importdir_file', 'importdir_folder', 'admin_path' ]:
            raise exceptions.RequestParameterMissingException( 'You have to specify "source" parameter. Possible values are "userdir_file", "userdir_folder", "admin_path", "importdir_file" and "importdir_folder". ')
        if source in [ 'importdir_file', 'importdir_folder' ]:
            if not trans.user_is_admin:
                raise exceptions.AdminRequiredException( 'Only admins can import from importdir.' )
            if not trans.app.config.library_import_dir:
                raise exceptions.ConfigDoesNotAllowException( 'The configuration of this Galaxy instance does not allow admins to import into library from importdir.' )
            import_base_dir = trans.app.config.library_import_dir
            path = os.path.join( import_base_dir, path )
        if source in [ 'userdir_file', 'userdir_folder' ]:
            user_login = trans.user.email
            user_base_dir = trans.app.config.user_library_import_dir
            if user_base_dir is None:
                raise exceptions.ConfigDoesNotAllowException( 'The configuration of this Galaxy instance does not allow upload from user directories.' )
            full_dir = os.path.join( user_base_dir, user_login )
            if not path.lower().startswith( full_dir.lower() ):
                path = os.path.join( full_dir, path )
            if not os.path.exists( path ):
                raise exceptions.RequestParameterInvalidException( 'Given path does not exist on the host.' )
            if not self.folder_manager.can_add_item( trans, folder ):
                raise exceptions.InsufficientPermissionsException( 'You do not have proper permission to add items to the given folder.' )
        if source == 'admin_path':
            if not trans.app.config.allow_library_path_paste:
                raise exceptions.ConfigDoesNotAllowException( 'The configuration of this Galaxy instance does not allow admins to import into library from path.' )
            if not trans.user_is_admin:
                raise exceptions.AdminRequiredException( 'Only admins can import from path.' )

        # Set up the traditional tool state/params
        tool_id = 'upload1'
        tool = trans.app.toolbox.get_tool( tool_id )
        state = tool.new_state( trans )
        tool.populate_state( trans, tool.inputs, state.inputs, kwd )
        tool_params = state.inputs
        dataset_upload_inputs = []
        for input in tool.inputs.itervalues():
            if input.type == "upload_dataset":
                dataset_upload_inputs.append( input )
        library_bunch = upload_common.handle_library_params( trans, {}, trans.security.encode_id( folder.id ) )
        abspath_datasets = []
        kwd[ 'filesystem_paths' ] = path
        if source in [ 'importdir_folder' ]:
            kwd[ 'filesystem_paths' ] = os.path.join( import_base_dir, path )
        # user wants to import one file only
        if source in [ "userdir_file", "importdir_file" ]:
            file = os.path.abspath( path )
            abspath_datasets.append( trans.webapp.controllers[ 'library_common' ].make_library_uploaded_dataset(
                trans, 'api', kwd, os.path.basename( file ), file, 'server_dir', library_bunch ) )
        # user wants to import whole folder
        if source == "userdir_folder":
            uploaded_datasets_bunch = trans.webapp.controllers[ 'library_common' ].get_path_paste_uploaded_datasets(
                trans, 'api', kwd, library_bunch, 200, '' )
            uploaded_datasets = uploaded_datasets_bunch[ 0 ]
            if uploaded_datasets is None:
                raise exceptions.ObjectNotFound( 'Given folder does not contain any datasets.' )
            for ud in uploaded_datasets:
                ud.path = os.path.abspath( ud.path )
                abspath_datasets.append( ud )
        #  user wants to import from path
        if source in [ "admin_path", "importdir_folder" ]:
            # validate the path is within root
            uploaded_datasets_bunch = trans.webapp.controllers[ 'library_common' ].get_path_paste_uploaded_datasets(
                trans, 'api', kwd, library_bunch, 200, '' )
            uploaded_datasets = uploaded_datasets_bunch[0]
            if uploaded_datasets is None:
                raise exceptions.ObjectNotFound( 'Given folder does not contain any datasets.' )
            for ud in uploaded_datasets:
                ud.path = os.path.abspath( ud.path )
                abspath_datasets.append( ud )
        json_file_path = upload_common.create_paramfile( trans, abspath_datasets )
        data_list = [ ud.data for ud in abspath_datasets ]
        job, output = upload_common.create_job( trans, tool_params, tool, json_file_path, data_list, folder=folder )
        # HACK: Prevent outputs_to_working_directory from overwriting inputs when "linking"
        job.add_parameter( 'link_data_only', dumps( kwd.get( 'link_data_only', 'copy_files' ) ) )
        job.add_parameter( 'uuid', dumps( kwd.get( 'uuid', None ) ) )
        trans.sa_session.add( job )
        trans.sa_session.flush()
        job_dict = job.to_dict()
        job_dict[ 'id' ] = trans.security.encode_id( job_dict[ 'id' ] )
        return job_dict
Пример #21
0
    def load( self, trans, **kwd ):
        """
        load( self, trans, **kwd ):
        * POST /api/libraries/datasets
        Load dataset from the given source into the library. 
        Source can be:
            user directory - root folder specified in galaxy.ini as "$user_library_import_dir"
                example path: path/to/galaxy/$user_library_import_dir/[email protected]/{user can browse everything here}
                the folder with the user login has to be created beforehand
            (admin)import directory - root folder specified in galaxy ini as "$library_import_dir"
                example path: path/to/galaxy/$library_import_dir/{admin can browse everything here}
            (admin)any absolute or relative path - option allowed with "allow_library_path_paste" in galaxy.ini
         
        :param  encoded_folder_id:      the encoded id of the folder to import dataset(s) to
        :type   encoded_folder_id:      an encoded id string
        :param  source:                 source the datasets should be loaded form
        :type   source:                 str
        :param  link_data:              flag whether to link the dataset to data or copy it to Galaxy, defaults to copy
                                        while linking is set to True all symlinks will be resolved _once_
        :type   link_data:              bool
        :param  preserve_dirs:          flag whether to preserve the directory structure when importing dir
                                        if False only datasets will be imported
        :type   preserve_dirs:          bool
        :param  file_type:              file type of the loaded datasets, defaults to 'auto' (autodetect)
        :type   file_type:              str
        :param  dbkey:                  dbkey of the loaded genome, defaults to '?' (unknown)
        :type   dbkey:                  str

        :returns:   dict containing information about the created upload job
        :rtype:     dictionary        
        """

        kwd[ 'space_to_tab' ] = 'False'
        kwd[ 'to_posix_lines' ] = 'True'
        kwd[ 'dbkey' ] = kwd.get( 'dbkey', '?' )
        kwd[ 'file_type' ] = kwd.get( 'file_type', 'auto' )
        kwd[' link_data_only' ] = 'link_to_files' if util.string_as_bool( kwd.get( 'link_data', False ) ) else 'copy_files'
        encoded_folder_id = kwd.get( 'encoded_folder_id', None )
        if encoded_folder_id is not None:
            folder_id = self.folder_manager.cut_and_decode( trans, encoded_folder_id )
        else:
            raise exceptions.RequestParameterMissingException( 'The required atribute encoded_folder_id is missing.' )
        path = kwd.get( 'path', None)
        if path is None:
            raise exceptions.RequestParameterMissingException( 'The required atribute path is missing.' )
        folder = self.folder_manager.get( trans, folder_id )

        source = kwd.get( 'source', None )
        if source not in [ 'userdir_file', 'userdir_folder', 'importdir_file', 'importdir_folder', 'admin_path' ]:
            raise exceptions.RequestParameterMissingException( 'You have to specify "source" parameter. Possible values are "userdir_file", "userdir_folder", "admin_path", "importdir_file" and "importdir_folder". ')
        if source in [ 'importdir_file', 'importdir_folder' ]:
            if not trans.user_is_admin:
                raise exceptions.AdminRequiredException( 'Only admins can import from importdir.' )
            if not trans.app.config.library_import_dir:
                raise exceptions.ConfigDoesNotAllowException( 'The configuration of this Galaxy instance does not allow admins to import into library from importdir.' )
            import_base_dir = trans.app.config.library_import_dir
            path = os.path.join( import_base_dir, path )
        if source in [ 'userdir_file', 'userdir_folder' ]:
            user_login = trans.user.email
            user_base_dir = trans.app.config.user_library_import_dir
            if user_base_dir is None:
                raise exceptions.ConfigDoesNotAllowException( 'The configuration of this Galaxy instance does not allow upload from user directories.' )
            full_dir = os.path.join( user_base_dir, user_login )
            if not path.lower().startswith( full_dir.lower() ):
                path = os.path.join( full_dir, path )
            if not os.path.exists( path ):
                raise exceptions.RequestParameterInvalidException( 'Given path does not exist on the host.' )
            if not self.folder_manager.can_add_item( trans, folder ):
                raise exceptions.InsufficientPermissionsException( 'You do not have proper permission to add items to the given folder.' )
        if source == 'admin_path':
            if not trans.app.config.allow_library_path_paste:
                raise exceptions.ConfigDoesNotAllowException( 'The configuration of this Galaxy instance does not allow admins to import into library from path.' )
            if not trans.user_is_admin:
                raise exceptions.AdminRequiredException( 'Only admins can import from path.' )

        # Set up the traditional tool state/params
        tool_id = 'upload1'
        tool = trans.app.toolbox.get_tool( tool_id )
        state = tool.new_state( trans )
        tool.update_state( trans, tool.inputs_by_page[ 0 ], state.inputs, kwd )
        tool_params = state.inputs
        dataset_upload_inputs = []
        for input in tool.inputs.itervalues():
            if input.type == "upload_dataset":
                dataset_upload_inputs.append( input )
        library_bunch = upload_common.handle_library_params( trans, {}, trans.security.encode_id( folder.id ) )
        abspath_datasets = []
        kwd[ 'filesystem_paths' ] = path
        if source in [ 'importdir_folder' ]:
            kwd[ 'filesystem_paths' ] = os.path.join( import_base_dir, path )
        params = util.Params( kwd )
        # user wants to import one file only
        if source in [ "userdir_file", "importdir_file" ]:
            file = os.path.abspath( path )
            abspath_datasets.append( trans.webapp.controllers[ 'library_common' ].make_library_uploaded_dataset(
                trans, 'api', params, os.path.basename( file ), file, 'server_dir', library_bunch ) )
        # user wants to import whole folder
        if source == "userdir_folder":
            uploaded_datasets_bunch = trans.webapp.controllers[ 'library_common' ].get_path_paste_uploaded_datasets(
                trans, 'api', params, library_bunch, 200, '' )
            uploaded_datasets = uploaded_datasets_bunch[ 0 ]
            if uploaded_datasets is None:
                raise exceptions.ObjectNotFound( 'Given folder does not contain any datasets.' )
            for ud in uploaded_datasets:
                ud.path = os.path.abspath( ud.path )
                abspath_datasets.append( ud )
        #  user wants to import from path
        if source in [ "admin_path", "importdir_folder" ]:
            # validate the path is within root
            uploaded_datasets_bunch = trans.webapp.controllers[ 'library_common' ].get_path_paste_uploaded_datasets(
                trans, 'api', params, library_bunch, 200, '' )
            uploaded_datasets = uploaded_datasets_bunch[0]
            if uploaded_datasets is None:
                raise exceptions.ObjectNotFound( 'Given folder does not contain any datasets.' )
            for ud in uploaded_datasets:
                ud.path = os.path.abspath( ud.path )
                abspath_datasets.append( ud )
        json_file_path = upload_common.create_paramfile( trans, abspath_datasets )
        data_list = [ ud.data for ud in abspath_datasets ]
        job, output = upload_common.create_job( trans, tool_params, tool, json_file_path, data_list, folder=folder )
        # HACK: Prevent outputs_to_working_directory from overwriting inputs when "linking"
        job.add_parameter( 'link_data_only', dumps( kwd.get( 'link_data_only', 'copy_files' ) ) )
        job.add_parameter( 'uuid', dumps( kwd.get( 'uuid', None ) ) )
        trans.sa_session.add( job )
        trans.sa_session.flush()
        job_dict = job.to_dict()
        job_dict[ 'id' ] = trans.security.encode_id( job_dict[ 'id' ] )
        return job_dict
Пример #22
0
    def __call__( self, trans, **kwargs ):
        # Get basics.
        # FIXME: pretty sure this is only here to pass along, can likely be eliminated
        status = kwargs.get( 'status', None )
        message = kwargs.get( 'message', None )
        # Build a base filter and sort key that is the combination of the saved state and defaults.
        # Saved state takes preference over defaults.
        base_filter = {}
        if self.default_filter:
            # default_filter is a dictionary that provides a default set of filters based on the grid's columns.
            base_filter = self.default_filter.copy()
        base_sort_key = self.default_sort_key
        if self.preserve_state:
            pref_name = unicode( self.__class__.__name__ + self.cur_filter_pref_name )
            if pref_name in trans.get_user().preferences:
                saved_filter = loads( trans.get_user().preferences[pref_name] )
                base_filter.update( saved_filter )
            pref_name = unicode( self.__class__.__name__ + self.cur_sort_key_pref_name )
            if pref_name in trans.get_user().preferences:
                base_sort_key = loads( trans.get_user().preferences[pref_name] )
        # Build initial query
        query = self.build_initial_query( trans, **kwargs )
        query = self.apply_query_filter( trans, query, **kwargs )
        # Maintain sort state in generated urls
        extra_url_args = {}
        # Determine whether use_default_filter flag is set.
        use_default_filter_str = kwargs.get( 'use_default_filter' )
        use_default_filter = False
        if use_default_filter_str:
            use_default_filter = ( use_default_filter_str.lower() == 'true' )
        # Process filtering arguments to (a) build a query that represents the filter and (b) build a
        # dictionary that denotes the current filter.
        cur_filter_dict = {}
        for column in self.columns:
            if column.key:
                # Get the filter criterion for the column. Precedence is (a) if using default filter, only look there; otherwise,
                # (b) look in kwargs; and (c) look in base filter.
                column_filter = None
                if use_default_filter:
                    if self.default_filter:
                        column_filter = self.default_filter.get( column.key )
                elif "f-" + column.model_class.__name__ + ".%s" % column.key in kwargs:
                    # Queries that include table joins cannot guarantee unique column names.  This problem is
                    # handled by setting the column_filter value to <TableName>.<ColumnName>.
                    column_filter = kwargs.get( "f-" + column.model_class.__name__ + ".%s" % column.key )
                elif "f-" + column.key in kwargs:
                    column_filter = kwargs.get( "f-" + column.key )
                elif column.key in base_filter:
                    column_filter = base_filter.get( column.key )

                # Method (1) combines a mix of strings and lists of strings into a single string and (2) attempts to de-jsonify all strings.
                def loads_recurse(item):
                    decoded_list = []
                    if isinstance( item, basestring):
                        try:
                            # Not clear what we're decoding, so recurse to ensure that we catch everything.
                            decoded_item = loads( item )
                            if isinstance( decoded_item, list):
                                decoded_list = loads_recurse( decoded_item )
                            else:
                                decoded_list = [ unicode( decoded_item ) ]
                        except ValueError:
                            decoded_list = [ unicode( item ) ]
                    elif isinstance( item, list):
                        for element in item:
                            a_list = loads_recurse( element )
                            decoded_list = decoded_list + a_list
                    return decoded_list
                # If column filter found, apply it.
                if column_filter is not None:
                    # TextColumns may have a mix of json and strings.
                    if isinstance( column, TextColumn ):
                        column_filter = loads_recurse( column_filter )
                        if len( column_filter ) == 1:
                            column_filter = column_filter[0]
                    # Interpret ',' as a separator for multiple terms.
                    if isinstance( column_filter, basestring ) and column_filter.find(',') != -1:
                        column_filter = column_filter.split(',')

                    # Check if filter is empty
                    if isinstance( column_filter, list ):
                        # Remove empty strings from filter list
                        column_filter = [x for x in column_filter if x != '']
                        if len(column_filter) == 0:
                            continue
                    elif isinstance(column_filter, basestring):
                        # If filter criterion is empty, do nothing.
                        if column_filter == '':
                            continue

                    # Update query.
                    query = column.filter( trans, trans.user, query, column_filter )
                    # Upate current filter dict.
                    # Column filters are rendered in various places, sanitize them all here.
                    cur_filter_dict[ column.key ] = sanitize_text(column_filter)
                    # Carry filter along to newly generated urls; make sure filter is a string so
                    # that we can encode to UTF-8 and thus handle user input to filters.
                    if isinstance( column_filter, list ):
                        # Filter is a list; process each item.
                        for filter in column_filter:
                            if not isinstance( filter, basestring ):
                                filter = unicode( filter ).encode("utf-8")
                        extra_url_args[ "f-" + column.key ] = dumps( column_filter )
                    else:
                        # Process singleton filter.
                        if not isinstance( column_filter, basestring ):
                            column_filter = unicode(column_filter)
                        extra_url_args[ "f-" + column.key ] = column_filter.encode("utf-8")
        # Process sort arguments.
        sort_key = None
        if 'sort' in kwargs:
            sort_key = kwargs['sort']
        elif base_sort_key:
            sort_key = base_sort_key
        if sort_key:
            ascending = not( sort_key.startswith( "-" ) )
            # Queries that include table joins cannot guarantee unique column names.  This problem is
            # handled by setting the column_filter value to <TableName>.<ColumnName>.
            table_name = None
            if sort_key.find( '.' ) > -1:
                a_list = sort_key.split( '.' )
                if ascending:
                    table_name = a_list[0]
                else:
                    table_name = a_list[0][1:]
                column_name = a_list[1]
            elif ascending:
                column_name = sort_key
            else:
                column_name = sort_key[1:]
            # Sort key is a column key.
            for column in self.columns:
                if column.key and column.key.find( '.' ) > -1:
                    column_key = column.key.split( '.' )[1]
                else:
                    column_key = column.key
                if ( table_name is None or table_name == column.model_class.__name__ ) and column_key == column_name:
                    query = column.sort( trans, query, ascending, column_name=column_name )
                    break
            extra_url_args['sort'] = sort_key
        # There might be a current row
        current_item = self.get_current_item( trans, **kwargs )
        # Process page number.
        if self.use_paging:
            if 'page' in kwargs:
                if kwargs['page'] == 'all':
                    page_num = 0
                else:
                    page_num = int( kwargs['page'] )
            else:
                page_num = 1
            if page_num == 0:
                # Show all rows in page.
                total_num_rows = query.count()
                page_num = 1
                num_pages = 1
            else:
                # Show a limited number of rows. Before modifying query, get the total number of rows that query
                # returns so that the total number of pages can be computed.
                total_num_rows = query.count()
                query = query.limit( self.num_rows_per_page ).offset( ( page_num - 1 ) * self.num_rows_per_page )
                num_pages = int( math.ceil( float( total_num_rows ) / self.num_rows_per_page ) )
        else:
            # Defaults.
            page_num = 1
            num_pages = 1
        # There are some places in grid templates where it's useful for a grid
        # to have its current filter.
        self.cur_filter_dict = cur_filter_dict
        # Preserve grid state: save current filter and sort key.
        if self.preserve_state:
            pref_name = unicode( self.__class__.__name__ + self.cur_filter_pref_name )
            trans.get_user().preferences[pref_name] = unicode( dumps( cur_filter_dict ) )
            if sort_key:
                pref_name = unicode( self.__class__.__name__ + self.cur_sort_key_pref_name )
                trans.get_user().preferences[pref_name] = unicode( dumps( sort_key ) )
            trans.sa_session.flush()
        # Log grid view.
        context = unicode( self.__class__.__name__ )
        params = cur_filter_dict.copy()
        params['sort'] = sort_key
        params['async'] = ( 'async' in kwargs )

        # TODO:??
        # commenting this out; when this fn calls session.add( action ) and session.flush the query from this fn
        # is effectively 'wiped' out. Nate believes it has something to do with our use of session( autocommit=True )
        # in mapping.py. If you change that to False, the log_action doesn't affect the query
        # Below, I'm rendering the template first (that uses query), then calling log_action, then returning the page
        # trans.log_action( trans.get_user(), unicode( "grid.view" ), context, params )

        # Render grid.
        def url( *args, **kwargs ):
            # Only include sort/filter arguments if not linking to another
            # page. This is a bit of a hack.
            if 'action' in kwargs:
                new_kwargs = dict()
            else:
                new_kwargs = dict( extra_url_args )
            # Extend new_kwargs with first argument if found
            if len(args) > 0:
                new_kwargs.update( args[0] )
            new_kwargs.update( kwargs )
            # We need to encode item ids
            if 'id' in new_kwargs:
                id = new_kwargs[ 'id' ]
                if isinstance( id, list ):
                    new_kwargs[ 'id' ] = [ trans.security.encode_id( i ) for i in id ]
                else:
                    new_kwargs[ 'id' ] = trans.security.encode_id( id )
            # The url_for invocation *must* include a controller and action.
            if 'controller' not in new_kwargs:
                new_kwargs['controller'] = trans.controller
            if 'action' not in new_kwargs:
                new_kwargs['action'] = trans.action
            return url_for( **new_kwargs)

        self.use_panels = ( kwargs.get( 'use_panels', False ) in [ True, 'True', 'true' ] )
        self.advanced_search = ( kwargs.get( 'advanced_search', False ) in [ True, 'True', 'true' ] )
        async_request = ( ( self.use_async ) and ( kwargs.get( 'async', False ) in [ True, 'True', 'true'] ) )
        # Currently, filling the template returns a str object; this requires decoding the string into a
        # unicode object within mako templates. What probably should be done is to return the template as
        # utf-8 unicode; however, this would require encoding the object as utf-8 before returning the grid
        # results via a controller method, which is require substantial changes. Hence, for now, return grid
        # as str.
        page = trans.fill_template( iff( async_request, self.async_template, self.template ),
                                    grid=self,
                                    query=query,
                                    cur_page_num=page_num,
                                    num_pages=num_pages,
                                    num_page_links=self.num_page_links,
                                    default_filter_dict=self.default_filter,
                                    cur_filter_dict=cur_filter_dict,
                                    sort_key=sort_key,
                                    current_item=current_item,
                                    ids=kwargs.get( 'id', [] ),
                                    url=url,
                                    status=status,
                                    message=message,
                                    info_text=self.info_text,
                                    use_panels=self.use_panels,
                                    use_hide_message=self.use_hide_message,
                                    advanced_search=self.advanced_search,
                                    show_item_checkboxes=( self.show_item_checkboxes or
                                                           kwargs.get( 'show_item_checkboxes', '' ) in [ 'True', 'true' ] ),
                                    # Pass back kwargs so that grid template can set and use args without
                                    # grid explicitly having to pass them.
                                    kwargs=kwargs )
        trans.log_action( trans.get_user(), unicode( "grid.view" ), context, params )
        return page
Пример #23
0
        else:
            # This should not happen, but it's here just in case
            shutil.copy( dataset.path, output_path )
    elif link_data_only == 'copy_files':
        shutil.move( dataset.path, output_path )
    # Write the job info
    stdout = stdout or 'uploaded %s file' % data_type
    info = dict( type='dataset',
                 dataset_id=dataset.dataset_id,
                 ext=ext,
                 stdout=stdout,
                 name=dataset.name,
                 line_count=line_count )
    if dataset.get('uuid', None) is not None:
        info['uuid'] = dataset.get('uuid')
    json_file.write( dumps( info ) + "\n" )

    if link_data_only == 'copy_files' and datatype.dataset_content_needs_grooming( output_path ):
        # Groom the dataset content if necessary
        datatype.groom_dataset_content( output_path )


def add_composite_file( dataset, json_file, output_path, files_path ):
        if dataset.composite_files:
            os.mkdir( files_path )
            for name, value in dataset.composite_files.iteritems():
                value = util.bunch.Bunch( **value )
                if dataset.composite_file_paths[ value.name ] is None and not value.optional:
                    file_err( 'A required composite data file was not provided (%s)' % name, dataset, json_file )
                    break
                elif dataset.composite_file_paths[value.name] is not None:
Пример #24
0
    def execute_via_app( self, tool, app, session_id, history_id, user=None,
                         incoming={}, set_output_hid=False, overwrite=True,
                         history=None, job_params=None ):
        """
        Execute using application.
        """
        for name, value in incoming.iteritems():
            if isinstance( value, app.model.HistoryDatasetAssociation ):
                dataset = value
                dataset_name = name
                type = 'hda'
                break
            elif isinstance( value, app.model.LibraryDatasetDatasetAssociation ):
                dataset = value
                dataset_name = name
                type = 'ldda'
                break
            else:
                raise Exception( 'The dataset to set metadata on could not be determined.' )

        sa_session = app.model.context

        # Create the job object
        job = app.model.Job()
        job.session_id = session_id
        job.history_id = history_id
        job.tool_id = tool.id
        if user:
            job.user_id = user.id
        if job_params:
            job.params = dumps( job_params )
        start_job_state = job.state  # should be job.states.NEW
        try:
            # For backward compatibility, some tools may not have versions yet.
            job.tool_version = tool.version
        except:
            job.tool_version = "1.0.1"
        job.state = job.states.WAITING  # we need to set job state to something other than NEW, or else when tracking jobs in db it will be picked up before we have added input / output parameters
        job.set_handler(tool.get_job_handler( job_params ))
        sa_session.add( job )
        sa_session.flush()  # ensure job.id is available

        # add parameters to job_parameter table
        # Store original dataset state, so we can restore it. A separate table might be better (no chance of 'losing' the original state)?
        incoming[ '__ORIGINAL_DATASET_STATE__' ] = dataset.state
        input_paths = [DatasetPath( dataset.id, real_path=dataset.file_name, mutable=False )]
        app.object_store.create(job, base_dir='job_work', dir_only=True, extra_dir=str(job.id))
        job_working_dir = app.object_store.get_filename(job, base_dir='job_work', dir_only=True, extra_dir=str(job.id))
        external_metadata_wrapper = JobExternalOutputMetadataWrapper( job )
        cmd_line = external_metadata_wrapper.setup_external_metadata( dataset,
                                                                      sa_session,
                                                                      exec_dir=None,
                                                                      tmp_dir=job_working_dir,
                                                                      dataset_files_path=app.model.Dataset.file_path,
                                                                      output_fnames=input_paths,
                                                                      config_root=app.config.root,
                                                                      config_file=app.config.config_file,
                                                                      datatypes_config=app.datatypes_registry.integrated_datatypes_configs,
                                                                      job_metadata=None,
                                                                      include_command=False,
                                                                      max_metadata_value_size=app.config.max_metadata_value_size,
                                                                      kwds={ 'overwrite' : overwrite } )
        incoming[ '__SET_EXTERNAL_METADATA_COMMAND_LINE__' ] = cmd_line
        for name, value in tool.params_to_strings( incoming, app ).iteritems():
            job.add_parameter( name, value )
        # add the dataset to job_to_input_dataset table
        if type == 'hda':
            job.add_input_dataset( dataset_name, dataset )
        elif type == 'ldda':
            job.add_input_library_dataset( dataset_name, dataset )
        # Need a special state here to show that metadata is being set and also allow the job to run
        # i.e. if state was set to 'running' the set metadata job would never run, as it would wait for input (the dataset to set metadata on) to be in a ready state
        dataset._state = dataset.states.SETTING_METADATA
        job.state = start_job_state  # job inputs have been configured, restore initial job state
        sa_session.flush()

        # Queue the job for execution
        app.job_queue.put( job.id, tool.id )
        # FIXME: need to add event logging to app and log events there rather than trans.
        # trans.log_event( "Added set external metadata job to the job queue, id: %s" % str(job.id), tool_id=job.tool_id )

        # clear e.g. converted files
        dataset.datatype.before_setting_metadata( dataset )

        return job, odict()
Пример #25
0
    def setup_job(self,
                  trans,
                  jeha,
                  include_hidden=False,
                  include_deleted=False):
        """ Perform setup for job to export a history into an archive. Method generates
            attribute files for export, sets the corresponding attributes in the jeha
            object, and returns a command line for running the job. The command line
            includes the command, inputs, and options; it does not include the output
            file because it must be set at runtime. """

        #
        # Helper methods/classes.
        #

        def get_item_tag_dict(item):
            """ Create dictionary of an item's tags. """
            tags = {}
            for tag in item.tags:
                tag_user_tname = to_unicode(tag.user_tname)
                tag_user_value = to_unicode(tag.user_value)
                tags[tag_user_tname] = tag_user_value
            return tags

        def prepare_metadata(metadata):
            """ Prepare metatdata for exporting. """
            for name, value in metadata.items():
                # Metadata files are not needed for export because they can be
                # regenerated.
                if isinstance(value, trans.app.model.MetadataFile):
                    del metadata[name]
            return metadata

        class HistoryDatasetAssociationEncoder(json.JSONEncoder):
            """ Custom JSONEncoder for a HistoryDatasetAssociation. """
            def default(self, obj):
                """ Encode an HDA, default encoding for everything else. """
                if isinstance(obj, trans.app.model.HistoryDatasetAssociation):
                    rval = {
                        "__HistoryDatasetAssociation__":
                        True,
                        "create_time":
                        obj.create_time.__str__(),
                        "update_time":
                        obj.update_time.__str__(),
                        "hid":
                        obj.hid,
                        "name":
                        to_unicode(obj.name),
                        "info":
                        to_unicode(obj.info),
                        "blurb":
                        obj.blurb,
                        "peek":
                        obj.peek,
                        "extension":
                        obj.extension,
                        "metadata":
                        prepare_metadata(dict(obj.metadata.items())),
                        "parent_id":
                        obj.parent_id,
                        "designation":
                        obj.designation,
                        "deleted":
                        obj.deleted,
                        "visible":
                        obj.visible,
                        "file_name":
                        obj.file_name,
                        "uuid": (lambda uuid: str(uuid)
                                 if uuid else None)(obj.dataset.uuid),
                        "annotation":
                        to_unicode(getattr(obj, 'annotation', '')),
                        "tags":
                        get_item_tag_dict(obj),
                        "extra_files_path":
                        obj.extra_files_path
                    }
                    if not obj.visible and not include_hidden:
                        rval['exported'] = False
                    elif obj.deleted and not include_deleted:
                        rval['exported'] = False
                    else:
                        rval['exported'] = True
                    return rval
                if isinstance(obj, UnvalidatedValue):
                    return obj.__str__()
                return json.JSONEncoder.default(self, obj)

        #
        # Create attributes/metadata files for export.
        #
        temp_output_dir = tempfile.mkdtemp()

        # Write history attributes to file.
        history = jeha.history
        history_attrs = {
            "create_time":
            history.create_time.__str__(),
            "update_time":
            history.update_time.__str__(),
            "name":
            to_unicode(history.name),
            "hid_counter":
            history.hid_counter,
            "genome_build":
            history.genome_build,
            "annotation":
            to_unicode(
                self.get_item_annotation_str(trans.sa_session, history.user,
                                             history)),
            "tags":
            get_item_tag_dict(history),
            "includes_hidden_datasets":
            include_hidden,
            "includes_deleted_datasets":
            include_deleted
        }
        history_attrs_filename = tempfile.NamedTemporaryFile(
            dir=temp_output_dir).name
        history_attrs_out = open(history_attrs_filename, 'w')
        history_attrs_out.write(dumps(history_attrs))
        history_attrs_out.close()
        jeha.history_attrs_filename = history_attrs_filename

        # Write datasets' attributes to file.
        datasets = self.get_history_datasets(trans, history)
        included_datasets = []
        datasets_attrs = []
        provenance_attrs = []
        for dataset in datasets:
            dataset.annotation = self.get_item_annotation_str(
                trans.sa_session, history.user, dataset)
            if (not dataset.visible
                    and not include_hidden) or (dataset.deleted
                                                and not include_deleted):
                provenance_attrs.append(dataset)
            else:
                datasets_attrs.append(dataset)
                included_datasets.append(dataset)
        datasets_attrs_filename = tempfile.NamedTemporaryFile(
            dir=temp_output_dir).name
        datasets_attrs_out = open(datasets_attrs_filename, 'w')
        datasets_attrs_out.write(
            dumps(datasets_attrs, cls=HistoryDatasetAssociationEncoder))
        datasets_attrs_out.close()
        jeha.datasets_attrs_filename = datasets_attrs_filename

        provenance_attrs_out = open(datasets_attrs_filename + ".provenance",
                                    'w')
        provenance_attrs_out.write(
            dumps(provenance_attrs, cls=HistoryDatasetAssociationEncoder))
        provenance_attrs_out.close()

        #
        # Write jobs attributes file.
        #

        # Get all jobs associated with included HDAs.
        jobs_dict = {}
        for hda in included_datasets:
            # Get the associated job, if any. If this hda was copied from another,
            # we need to find the job that created the origial hda
            job_hda = hda
            while job_hda.copied_from_history_dataset_association:  # should this check library datasets as well?
                job_hda = job_hda.copied_from_history_dataset_association
            if not job_hda.creating_job_associations:
                # No viable HDA found.
                continue

            # Get the job object.
            job = None
            for assoc in job_hda.creating_job_associations:
                job = assoc.job
                break
            if not job:
                # No viable job.
                continue

            jobs_dict[job.id] = job

        # Get jobs' attributes.
        jobs_attrs = []
        for id, job in jobs_dict.items():
            job_attrs = {}
            job_attrs['tool_id'] = job.tool_id
            job_attrs['tool_version'] = job.tool_version
            job_attrs['state'] = job.state
            job_attrs['info'] = job.info
            job_attrs['traceback'] = job.traceback
            job_attrs['command_line'] = job.command_line
            job_attrs['stderr'] = job.stderr
            job_attrs['stdout'] = job.stdout
            job_attrs['exit_code'] = job.exit_code
            job_attrs['create_time'] = job.create_time.isoformat()
            job_attrs['update_time'] = job.update_time.isoformat()

            # Get the job's parameters
            try:
                params_objects = job.get_param_values(trans.app)
            except:
                # Could not get job params.
                continue

            params_dict = {}
            for name, value in params_objects.items():
                params_dict[name] = value
            job_attrs['params'] = params_dict

            # -- Get input, output datasets. --

            input_datasets = []
            input_mapping = {}
            for assoc in job.input_datasets:
                # Optional data inputs will not have a dataset.
                if assoc.dataset:
                    input_datasets.append(assoc.dataset.hid)
                    input_mapping[assoc.name] = assoc.dataset.hid
            job_attrs['input_datasets'] = input_datasets
            job_attrs['input_mapping'] = input_mapping
            output_datasets = [
                assoc.dataset.hid for assoc in job.output_datasets
            ]
            job_attrs['output_datasets'] = output_datasets

            jobs_attrs.append(job_attrs)

        jobs_attrs_filename = tempfile.NamedTemporaryFile(
            dir=temp_output_dir).name
        jobs_attrs_out = open(jobs_attrs_filename, 'w')
        jobs_attrs_out.write(
            dumps(jobs_attrs, cls=HistoryDatasetAssociationEncoder))
        jobs_attrs_out.close()
        jeha.jobs_attrs_filename = jobs_attrs_filename

        #
        # Create and return command line for running tool.
        #
        options = ""
        if jeha.compressed:
            options = "-G"
        return "%s %s %s %s" % (options, history_attrs_filename,
                                datasets_attrs_filename, jobs_attrs_filename)
Пример #26
0
    def _rerun_tool( self, trans, payload, **kwargs ):
        """
        Rerun a tool to produce a new output dataset that corresponds to a
        dataset that a user is currently viewing.
        """

        #
        # TODO: refactor to use same code as run_tool.
        #

        # Run tool on region if region is specificied.
        run_on_regions = False
        regions = payload.get( 'regions', None )
        if regions:
            if isinstance( regions, dict ):
                # Regions is a single region.
                regions = [ GenomeRegion.from_dict( regions ) ]
            elif isinstance( regions, list ):
                # There is a list of regions.
                regions = [ GenomeRegion.from_dict( r ) for r in regions ]

                if len( regions ) > 1:
                    # Sort by chrom name, start so that data is not fetched out of order.
                    regions = sorted(regions, key=lambda r: (r.chrom.lower(), r.start))

                    # Merge overlapping regions so that regions do not overlap
                    # and hence data is not included multiple times.
                    prev = regions[0]
                    cur = regions[1]
                    index = 1
                    while True:
                        if cur.chrom == prev.chrom and cur.start <= prev.end:
                            # Found overlapping regions, so join them into prev.
                            prev.end = cur.end
                            del regions[ index ]
                        else:
                            # No overlap, move to next region.
                            prev = cur
                            index += 1

                        # Get next region or exit.
                        if index == len( regions ):
                            # Done.
                            break
                        else:
                            cur = regions[ index ]

            run_on_regions = True

        # Dataset check.
        decoded_dataset_id = self.decode_id( payload.get( 'target_dataset_id' ) )
        original_dataset = self.hda_manager.get_accessible( trans, decoded_dataset_id, user=trans.user )
        original_dataset = self.hda_manager.error_if_uploading( trans, original_dataset )
        msg = self.hda_manager.data_conversion_status( trans, original_dataset )
        if msg:
            return msg

        # Set tool parameters--except non-hidden dataset parameters--using combination of
        # job's previous parameters and incoming parameters. Incoming parameters
        # have priority.
        #
        original_job = self.hda_manager.creating_job( original_dataset )
        tool = trans.app.toolbox.get_tool( original_job.tool_id )
        if not tool or not tool.allow_user_access( trans.user ):
            return trans.app.model.Dataset.conversion_messages.NO_TOOL
        tool_params = dict( [ ( p.name, p.value ) for p in original_job.parameters ] )

        # TODO: rather than set new inputs using dict of json'ed value, unpack parameters and set using set_param_value below.
        # TODO: need to handle updates to conditional parameters; conditional
        # params are stored in dicts (and dicts within dicts).
        new_inputs = payload[ 'inputs' ]
        tool_params.update( dict( [ ( key, dumps( value ) ) for key, value in new_inputs.items() if key in tool.inputs and new_inputs[ key ] is not None ] ) )
        tool_params = tool.params_from_strings( tool_params, self.app )

        #
        # If running tool on region, convert input datasets (create indices) so
        # that can regions of data can be quickly extracted.
        #
        data_provider_registry = trans.app.data_provider_registry
        messages_list = []
        if run_on_regions:
            for jida in original_job.input_datasets:
                input_dataset = jida.dataset
                data_provider = data_provider_registry.get_data_provider( trans, original_dataset=input_dataset, source='data' )
                if data_provider and ( not data_provider.converted_dataset
                                       or data_provider.converted_dataset.state != trans.app.model.Dataset.states.OK ):
                    # Can convert but no converted dataset yet, so return message about why.
                    data_sources = input_dataset.datatype.data_sources
                    msg = input_dataset.convert_dataset( trans, data_sources[ 'data' ] )
                    if msg is not None:
                        messages_list.append( msg )

        # Return any messages generated during conversions.
        return_message = self._get_highest_priority_msg( messages_list )
        if return_message:
            return return_message

        #
        # Set target history (the history that tool will use for inputs/outputs).
        # If user owns dataset, put new data in original dataset's history; if
        # user does not own dataset (and hence is accessing dataset via sharing),
        # put new data in user's current history.
        #
        if original_dataset.history.user == trans.user:
            target_history = original_dataset.history
        else:
            target_history = trans.get_history( create=True )
        hda_permissions = trans.app.security_agent.history_get_default_permissions( target_history )

        def set_param_value( param_dict, param_name, param_value ):
            """
            Set new parameter value in a tool's parameter dictionary.
            """

            # Recursive function to set param value.
            def set_value( param_dict, group_name, group_index, param_name, param_value ):
                if group_name in param_dict:
                    param_dict[ group_name ][ group_index ][ param_name ] = param_value
                    return True
                elif param_name in param_dict:
                    param_dict[ param_name ] = param_value
                    return True
                else:
                    # Recursive search.
                    return_val = False
                    for value in param_dict.values():
                        if isinstance( value, dict ):
                            return_val = set_value( value, group_name, group_index, param_name, param_value)
                            if return_val:
                                return return_val
                    return False

            # Parse parameter name if necessary.
            if param_name.find( "|" ) == -1:
                # Non-grouping parameter.
                group_name = group_index = None
            else:
                # Grouping parameter.
                group, param_name = param_name.split( "|" )
                index = group.rfind( "_" )
                group_name = group[ :index ]
                group_index = int( group[ index + 1: ] )

            return set_value( param_dict, group_name, group_index, param_name, param_value )

        # Set parameters based tool's trackster config.
        params_set = {}
        for action in tool.trackster_conf.actions:
            success = False
            for joda in original_job.output_datasets:
                if joda.name == action.output_name:
                    set_param_value( tool_params, action.name, joda.dataset )
                    params_set[ action.name ] = True
                    success = True
                    break

            if not success:
                return trans.app.model.Dataset.conversion_messages.ERROR

        #
        # Set input datasets for tool. If running on regions, extract and use subset
        # when possible.
        #
        if run_on_regions:
            regions_str = ",".join( [ str( r ) for r in regions ] )
        for jida in original_job.input_datasets:
            # If param set previously by config actions, do nothing.
            if jida.name in params_set:
                continue

            input_dataset = jida.dataset
            if input_dataset is None:  # optional dataset and dataset wasn't selected
                tool_params[ jida.name ] = None
            elif run_on_regions and 'data' in input_dataset.datatype.data_sources:
                # Dataset is indexed and hence a subset can be extracted and used
                # as input.

                # Look for subset.
                subset_dataset_association = trans.sa_session.query( trans.app.model.HistoryDatasetAssociationSubset ) \
                                                             .filter_by( hda=input_dataset, location=regions_str ) \
                                                             .first()
                if subset_dataset_association:
                    # Data subset exists.
                    subset_dataset = subset_dataset_association.subset
                else:
                    # Need to create subset.
                    data_source = input_dataset.datatype.data_sources[ 'data' ]
                    converted_dataset = input_dataset.get_converted_dataset( trans, data_source )
                    deps = input_dataset.get_converted_dataset_deps( trans, data_source )

                    # Create new HDA for input dataset's subset.
                    new_dataset = trans.app.model.HistoryDatasetAssociation( extension=input_dataset.ext, \
                                                                             dbkey=input_dataset.dbkey, \
                                                                             create_dataset=True, \
                                                                             sa_session=trans.sa_session,
                                                                             name="Subset [%s] of data %i" % \
                                                                                 ( regions_str, input_dataset.hid ),
                                                                             visible=False )
                    target_history.add_dataset( new_dataset )
                    trans.sa_session.add( new_dataset )
                    trans.app.security_agent.set_all_dataset_permissions( new_dataset.dataset, hda_permissions )

                    # Write subset of data to new dataset
                    data_provider = data_provider_registry.get_data_provider( trans, original_dataset=input_dataset, source='data' )
                    trans.app.object_store.create( new_dataset.dataset )
                    data_provider.write_data_to_file( regions, new_dataset.file_name )

                    # TODO: (a) size not working; (b) need to set peek.
                    new_dataset.set_size()
                    new_dataset.info = "Data subset for trackster"
                    new_dataset.set_dataset_state( trans.app.model.Dataset.states.OK )

                    # Set metadata.
                    # TODO: set meta internally if dataset is small enough?
                    trans.app.datatypes_registry.set_external_metadata_tool.tool_action.execute( trans.app.datatypes_registry.set_external_metadata_tool,
                                                                                                 trans, incoming={ 'input1': new_dataset },
                                                                                                 overwrite=False, job_params={ "source" : "trackster" } )
                    # Add HDA subset association.
                    subset_association = trans.app.model.HistoryDatasetAssociationSubset( hda=input_dataset, subset=new_dataset, location=regions_str )
                    trans.sa_session.add( subset_association )

                    subset_dataset = new_dataset

                trans.sa_session.flush()

                # Add dataset to tool's parameters.
                if not set_param_value( tool_params, jida.name, subset_dataset ):
                    return { "error" : True, "message" : "error setting parameter %s" % jida.name }

        #
        # Execute tool and handle outputs.
        #
        try:
            subset_job, subset_job_outputs = tool.execute( trans, incoming=tool_params,
                                                           history=target_history,
                                                           job_params={ "source" : "trackster" } )
        except Exception, e:
            # Lots of things can go wrong when trying to execute tool.
            return { "error" : True, "message" : e.__class__.__name__ + ": " + str(e) }
Пример #27
0
    def __call__(self, trans, **kwargs):
        # Get basics.
        # FIXME: pretty sure this is only here to pass along, can likely be eliminated
        status = kwargs.get('status', None)
        message = kwargs.get('message', None)
        # Build a base filter and sort key that is the combination of the saved state and defaults.
        # Saved state takes preference over defaults.
        base_filter = {}
        if self.default_filter:
            # default_filter is a dictionary that provides a default set of filters based on the grid's columns.
            base_filter = self.default_filter.copy()
        base_sort_key = self.default_sort_key
        if self.preserve_state:
            pref_name = unicode(self.__class__.__name__ +
                                self.cur_filter_pref_name)
            if pref_name in trans.get_user().preferences:
                saved_filter = loads(trans.get_user().preferences[pref_name])
                base_filter.update(saved_filter)
            pref_name = unicode(self.__class__.__name__ +
                                self.cur_sort_key_pref_name)
            if pref_name in trans.get_user().preferences:
                base_sort_key = loads(trans.get_user().preferences[pref_name])
        # Build initial query
        query = self.build_initial_query(trans, **kwargs)
        query = self.apply_query_filter(trans, query, **kwargs)
        # Maintain sort state in generated urls
        extra_url_args = {}
        # Determine whether use_default_filter flag is set.
        use_default_filter_str = kwargs.get('use_default_filter')
        use_default_filter = False
        if use_default_filter_str:
            use_default_filter = (use_default_filter_str.lower() == 'true')
        # Process filtering arguments to (a) build a query that represents the filter and (b) build a
        # dictionary that denotes the current filter.
        cur_filter_dict = {}
        for column in self.columns:
            if column.key:
                # Get the filter criterion for the column. Precedence is (a) if using default filter, only look there; otherwise,
                # (b) look in kwargs; and (c) look in base filter.
                column_filter = None
                if use_default_filter:
                    if self.default_filter:
                        column_filter = self.default_filter.get(column.key)
                elif "f-" + column.model_class.__name__ + ".%s" % column.key in kwargs:
                    # Queries that include table joins cannot guarantee unique column names.  This problem is
                    # handled by setting the column_filter value to <TableName>.<ColumnName>.
                    column_filter = kwargs.get("f-" +
                                               column.model_class.__name__ +
                                               ".%s" % column.key)
                elif "f-" + column.key in kwargs:
                    column_filter = kwargs.get("f-" + column.key)
                elif column.key in base_filter:
                    column_filter = base_filter.get(column.key)

                # Method (1) combines a mix of strings and lists of strings into a single string and (2) attempts to de-jsonify all strings.
                def loads_recurse(item):
                    decoded_list = []
                    if isinstance(item, basestring):
                        try:
                            # Not clear what we're decoding, so recurse to ensure that we catch everything.
                            decoded_item = loads(item)
                            if isinstance(decoded_item, list):
                                decoded_list = loads_recurse(decoded_item)
                            else:
                                decoded_list = [unicode(decoded_item)]
                        except ValueError:
                            decoded_list = [unicode(item)]
                    elif isinstance(item, list):
                        for element in item:
                            a_list = loads_recurse(element)
                            decoded_list = decoded_list + a_list
                    return decoded_list

                # If column filter found, apply it.
                if column_filter is not None:
                    # TextColumns may have a mix of json and strings.
                    if isinstance(column, TextColumn):
                        column_filter = loads_recurse(column_filter)
                        if len(column_filter) == 1:
                            column_filter = column_filter[0]
                    # Interpret ',' as a separator for multiple terms.
                    if isinstance(
                            column_filter,
                            basestring) and column_filter.find(',') != -1:
                        column_filter = column_filter.split(',')

                    # Check if filter is empty
                    if isinstance(column_filter, list):
                        # Remove empty strings from filter list
                        column_filter = [x for x in column_filter if x != '']
                        if len(column_filter) == 0:
                            continue
                    elif isinstance(column_filter, basestring):
                        # If filter criterion is empty, do nothing.
                        if column_filter == '':
                            continue

                    # Update query.
                    query = column.filter(trans, trans.user, query,
                                          column_filter)
                    # Upate current filter dict.
                    # Column filters are rendered in various places, sanitize them all here.
                    cur_filter_dict[column.key] = sanitize_text(column_filter)
                    # Carry filter along to newly generated urls; make sure filter is a string so
                    # that we can encode to UTF-8 and thus handle user input to filters.
                    if isinstance(column_filter, list):
                        # Filter is a list; process each item.
                        for filter in column_filter:
                            if not isinstance(filter, basestring):
                                filter = unicode(filter).encode("utf-8")
                        extra_url_args["f-" +
                                       column.key] = dumps(column_filter)
                    else:
                        # Process singleton filter.
                        if not isinstance(column_filter, basestring):
                            column_filter = unicode(column_filter)
                        extra_url_args[
                            "f-" + column.key] = column_filter.encode("utf-8")
        # Process sort arguments.
        sort_key = None
        if 'sort' in kwargs:
            sort_key = kwargs['sort']
        elif base_sort_key:
            sort_key = base_sort_key
        if sort_key:
            ascending = not (sort_key.startswith("-"))
            # Queries that include table joins cannot guarantee unique column names.  This problem is
            # handled by setting the column_filter value to <TableName>.<ColumnName>.
            table_name = None
            if sort_key.find('.') > -1:
                a_list = sort_key.split('.')
                if ascending:
                    table_name = a_list[0]
                else:
                    table_name = a_list[0][1:]
                column_name = a_list[1]
            elif ascending:
                column_name = sort_key
            else:
                column_name = sort_key[1:]
            # Sort key is a column key.
            for column in self.columns:
                if column.key and column.key.find('.') > -1:
                    column_key = column.key.split('.')[1]
                else:
                    column_key = column.key
                if (table_name is None
                        or table_name == column.model_class.__name__
                    ) and column_key == column_name:
                    query = column.sort(trans,
                                        query,
                                        ascending,
                                        column_name=column_name)
                    break
            extra_url_args['sort'] = sort_key
        # There might be a current row
        current_item = self.get_current_item(trans, **kwargs)
        # Process page number.
        if self.use_paging:
            if 'page' in kwargs:
                if kwargs['page'] == 'all':
                    page_num = 0
                else:
                    page_num = int(kwargs['page'])
            else:
                page_num = 1
            if page_num == 0:
                # Show all rows in page.
                total_num_rows = query.count()
                page_num = 1
                num_pages = 1
            else:
                # Show a limited number of rows. Before modifying query, get the total number of rows that query
                # returns so that the total number of pages can be computed.
                total_num_rows = query.count()
                query = query.limit(self.num_rows_per_page).offset(
                    (page_num - 1) * self.num_rows_per_page)
                num_pages = int(
                    math.ceil(float(total_num_rows) / self.num_rows_per_page))
        else:
            # Defaults.
            page_num = 1
            num_pages = 1
        # There are some places in grid templates where it's useful for a grid
        # to have its current filter.
        self.cur_filter_dict = cur_filter_dict
        # Preserve grid state: save current filter and sort key.
        if self.preserve_state:
            pref_name = unicode(self.__class__.__name__ +
                                self.cur_filter_pref_name)
            trans.get_user().preferences[pref_name] = unicode(
                dumps(cur_filter_dict))
            if sort_key:
                pref_name = unicode(self.__class__.__name__ +
                                    self.cur_sort_key_pref_name)
                trans.get_user().preferences[pref_name] = unicode(
                    dumps(sort_key))
            trans.sa_session.flush()
        # Log grid view.
        context = unicode(self.__class__.__name__)
        params = cur_filter_dict.copy()
        params['sort'] = sort_key
        params['async'] = ('async' in kwargs)

        #TODO:??
        # commenting this out; when this fn calls session.add( action ) and session.flush the query from this fn
        # is effectively 'wiped' out. Nate believes it has something to do with our use of session( autocommit=True )
        # in mapping.py. If you change that to False, the log_action doesn't affect the query
        # Below, I'm rendering the template first (that uses query), then calling log_action, then returning the page
        #trans.log_action( trans.get_user(), unicode( "grid.view" ), context, params )

        # Render grid.
        def url(*args, **kwargs):
            # Only include sort/filter arguments if not linking to another
            # page. This is a bit of a hack.
            if 'action' in kwargs:
                new_kwargs = dict()
            else:
                new_kwargs = dict(extra_url_args)
            # Extend new_kwargs with first argument if found
            if len(args) > 0:
                new_kwargs.update(args[0])
            new_kwargs.update(kwargs)
            # We need to encode item ids
            if 'id' in new_kwargs:
                id = new_kwargs['id']
                if isinstance(id, list):
                    new_kwargs['id'] = [
                        trans.security.encode_id(i) for i in id
                    ]
                else:
                    new_kwargs['id'] = trans.security.encode_id(id)
            #The url_for invocation *must* include a controller and action.
            if 'controller' not in new_kwargs:
                new_kwargs['controller'] = trans.controller
            if 'action' not in new_kwargs:
                new_kwargs['action'] = trans.action
            return url_for(**new_kwargs)

        self.use_panels = (kwargs.get('use_panels', False)
                           in [True, 'True', 'true'])
        self.advanced_search = (kwargs.get('advanced_search', False)
                                in [True, 'True', 'true'])
        async_request = ((self.use_async) and (kwargs.get('async', False)
                                               in [True, 'True', 'true']))
        # Currently, filling the template returns a str object; this requires decoding the string into a
        # unicode object within mako templates. What probably should be done is to return the template as
        # utf-8 unicode; however, this would require encoding the object as utf-8 before returning the grid
        # results via a controller method, which is require substantial changes. Hence, for now, return grid
        # as str.
        page = trans.fill_template(
            iff(async_request, self.async_template, self.template),
            grid=self,
            query=query,
            cur_page_num=page_num,
            num_pages=num_pages,
            num_page_links=self.num_page_links,
            default_filter_dict=self.default_filter,
            cur_filter_dict=cur_filter_dict,
            sort_key=sort_key,
            current_item=current_item,
            ids=kwargs.get('id', []),
            url=url,
            status=status,
            message=message,
            info_text=self.info_text,
            use_panels=self.use_panels,
            use_hide_message=self.use_hide_message,
            advanced_search=self.advanced_search,
            show_item_checkboxes=(self.show_item_checkboxes or kwargs.get(
                'show_item_checkboxes', '') in ['True', 'true']),
            # Pass back kwargs so that grid template can set and use args without
            # grid explicitly having to pass them.
            kwargs=kwargs)
        trans.log_action(trans.get_user(), unicode("grid.view"), context,
                         params)
        return page
Пример #28
0
def pretty_print_json(json_data, is_json_string=False):
    if is_json_string:
        json_data = json.loads(json_data)
    return json.dumps(json_data, sort_keys=True, indent=4)
Пример #29
0
    def execute(self,
                tool,
                trans,
                incoming={},
                return_job=False,
                set_output_hid=True,
                set_output_history=True,
                history=None,
                job_params=None,
                rerun_remap_job_id=None,
                mapping_over_collection=False):
        """
        Executes a tool, creating job and tool outputs, associating them, and
        submitting the job to the job queue. If history is not specified, use
        trans.history as destination for tool's output datasets.
        """
        # Set history.
        if not history:
            history = tool.get_default_history_by_trans(trans, create=True)

        out_data = odict()
        out_collections = {}
        out_collection_instances = {}
        # Track input dataset collections - but replace with simply lists so collect
        # input datasets can process these normally.
        inp_dataset_collections = self.collect_input_dataset_collections(
            tool, incoming)
        # Collect any input datasets from the incoming parameters
        inp_data = self.collect_input_datasets(tool, incoming, trans)

        # Deal with input dataset names, 'dbkey' and types
        input_names = []
        input_ext = 'data'
        input_dbkey = incoming.get("dbkey", "?")
        for name, data in inp_data.items():
            if not data:
                data = NoneDataset(
                    datatypes_registry=trans.app.datatypes_registry)
                continue

            # Convert LDDA to an HDA.
            if isinstance(data, LibraryDatasetDatasetAssociation):
                data = data.to_history_dataset_association(None)
                inp_data[name] = data

            else:  # HDA
                if data.hid:
                    input_names.append('data %s' % data.hid)
            input_ext = data.ext

            if data.dbkey not in [None, '?']:
                input_dbkey = data.dbkey

        # Collect chromInfo dataset and add as parameters to incoming
        (chrom_info, db_dataset) = trans.app.genome_builds.get_chrom_info(
            input_dbkey,
            trans=trans,
            custom_build_hack_get_len_from_fasta_conversion=tool.id !=
            'CONVERTER_fasta_to_len')
        if db_dataset:
            inp_data.update({"chromInfo": db_dataset})
        incoming["chromInfo"] = chrom_info

        # Determine output dataset permission/roles list
        existing_datasets = [inp for inp in inp_data.values() if inp]
        if existing_datasets:
            output_permissions = trans.app.security_agent.guess_derived_permissions_for_datasets(
                existing_datasets)
        else:
            # No valid inputs, we will use history defaults
            output_permissions = trans.app.security_agent.history_get_default_permissions(
                history)

        # Build name for output datasets based on tool name and input names
        on_text = on_text_for_names(input_names)

        # Add the dbkey to the incoming parameters
        incoming["dbkey"] = input_dbkey
        # wrapped params are used by change_format action and by output.label; only perform this wrapping once, as needed
        wrapped_params = WrappedParameters(trans, tool, incoming)
        # Keep track of parent / child relationships, we'll create all the
        # datasets first, then create the associations
        parent_to_child_pairs = []
        child_dataset_names = set()
        object_store_populator = ObjectStorePopulator(trans.app)

        def handle_output(name, output):
            if output.parent:
                parent_to_child_pairs.append((output.parent, name))
                child_dataset_names.add(name)
            ## What is the following hack for? Need to document under what
            ## conditions can the following occur? ([email protected])
            # HACK: the output data has already been created
            #      this happens i.e. as a result of the async controller
            if name in incoming:
                dataid = incoming[name]
                data = trans.sa_session.query(
                    trans.app.model.HistoryDatasetAssociation).get(dataid)
                assert data is not None
                out_data[name] = data
            else:
                ext = determine_output_format(output, wrapped_params.params,
                                              inp_data, input_ext)
                data = trans.app.model.HistoryDatasetAssociation(
                    extension=ext,
                    create_dataset=True,
                    sa_session=trans.sa_session)
                if output.hidden:
                    data.visible = False
                # Commit the dataset immediately so it gets database assigned unique id
                trans.sa_session.add(data)
                trans.sa_session.flush()
                trans.app.security_agent.set_all_dataset_permissions(
                    data.dataset, output_permissions)

            object_store_populator.set_object_store_id(data)

            # This may not be neccesary with the new parent/child associations
            data.designation = name
            # Copy metadata from one of the inputs if requested.

            # metadata source can be either a string referencing an input
            # or an actual object to copy.
            metadata_source = output.metadata_source
            if metadata_source:
                if isinstance(metadata_source, basestring):
                    metadata_source = inp_data[metadata_source]

            if metadata_source is not None:
                data.init_meta(copy_from=metadata_source)
            else:
                data.init_meta()
            # Take dbkey from LAST input
            data.dbkey = str(input_dbkey)
            # Set state
            # FIXME: shouldn't this be NEW until the job runner changes it?
            data.state = data.states.QUEUED
            data.blurb = "queued"
            # Set output label
            data.name = self.get_output_name(output, data, tool, on_text,
                                             trans, incoming, history,
                                             wrapped_params.params, job_params)
            # Store output
            out_data[name] = data
            if output.actions:
                #Apply pre-job tool-output-dataset actions; e.g. setting metadata, changing format
                output_action_params = dict(out_data)
                output_action_params.update(incoming)
                output.actions.apply_action(data, output_action_params)
            # Store all changes to database
            trans.sa_session.flush()
            return data

        for name, output in tool.outputs.items():
            if not filter_output(output, incoming):
                if output.collection:
                    collections_manager = trans.app.dataset_collections_service

                    # As far as I can tell - this is always true - but just verify
                    assert set_output_history, "Cannot create dataset collection for this kind of tool."

                    elements = odict()
                    input_collections = dict([
                        (k, v[0])
                        for k, v in inp_dataset_collections.iteritems()
                    ])
                    known_outputs = output.known_outputs(
                        input_collections, collections_manager.type_registry)
                    # Just to echo TODO elsewhere - this should be restructured to allow
                    # nested collections.
                    for output_part_def in known_outputs:
                        effective_output_name = output_part_def.effective_output_name
                        element = handle_output(effective_output_name,
                                                output_part_def.output_def)
                        # Following hack causes dataset to no be added to history...
                        child_dataset_names.add(effective_output_name)

                        if set_output_history:
                            history.add_dataset(element,
                                                set_hid=set_output_hid)
                        trans.sa_session.add(element)
                        trans.sa_session.flush()

                        elements[output_part_def.element_identifier] = element

                    if output.dynamic_structure:
                        assert not elements  # known_outputs must have been empty
                        elements = collections_manager.ELEMENTS_UNINITIALIZED

                    if mapping_over_collection:
                        dc = collections_manager.create_dataset_collection(
                            trans,
                            collection_type=output.structure.collection_type,
                            elements=elements,
                        )
                        out_collections[name] = dc
                    else:
                        hdca_name = self.get_output_name(
                            output, None, tool, on_text, trans, incoming,
                            history, wrapped_params.params, job_params)
                        hdca = collections_manager.create(
                            trans,
                            history,
                            name=hdca_name,
                            collection_type=output.structure.collection_type,
                            elements=elements,
                        )
                        # name here is name of the output element - not name
                        # of the hdca.
                        out_collection_instances[name] = hdca
                else:
                    handle_output(name, output)
        # Add all the top-level (non-child) datasets to the history unless otherwise specified
        for name in out_data.keys():
            if name not in child_dataset_names and name not in incoming:  # don't add children; or already existing datasets, i.e. async created
                data = out_data[name]
                if set_output_history:
                    history.add_dataset(data, set_hid=set_output_hid)
                trans.sa_session.add(data)
                trans.sa_session.flush()
        # Add all the children to their parents
        for parent_name, child_name in parent_to_child_pairs:
            parent_dataset = out_data[parent_name]
            child_dataset = out_data[child_name]
            parent_dataset.children.append(child_dataset)
        # Store data after custom code runs
        trans.sa_session.flush()
        # Create the job object
        job = trans.app.model.Job()

        if hasattr(trans, "get_galaxy_session"):
            galaxy_session = trans.get_galaxy_session()
            # If we're submitting from the API, there won't be a session.
            if type(galaxy_session) == trans.model.GalaxySession:
                job.session_id = galaxy_session.id
        if trans.user is not None:
            job.user_id = trans.user.id
        job.history_id = history.id
        job.tool_id = tool.id
        try:
            # For backward compatibility, some tools may not have versions yet.
            job.tool_version = tool.version
        except:
            job.tool_version = "1.0.0"
        # FIXME: Don't need all of incoming here, just the defined parameters
        #        from the tool. We need to deal with tools that pass all post
        #        parameters to the command as a special case.
        for name, (dataset_collection,
                   reduced) in inp_dataset_collections.iteritems():
            # TODO: Does this work if nested in repeat/conditional?
            if reduced:
                incoming[
                    name] = "__collection_reduce__|%s" % dataset_collection.id
            # Should verify security? We check security of individual
            # datasets below?
            job.add_input_dataset_collection(name, dataset_collection)
        for name, value in tool.params_to_strings(incoming,
                                                  trans.app).iteritems():
            job.add_parameter(name, value)
        current_user_roles = trans.get_current_user_roles()
        for name, dataset in inp_data.iteritems():
            if dataset:
                if not trans.app.security_agent.can_access_dataset(
                        current_user_roles, dataset.dataset):
                    raise "User does not have permission to use a dataset (%s) provided for input." % data.id
                job.add_input_dataset(name, dataset)
            else:
                job.add_input_dataset(name, None)
        for name, dataset in out_data.iteritems():
            job.add_output_dataset(name, dataset)
        for name, dataset_collection in out_collections.iteritems():
            job.add_implicit_output_dataset_collection(name,
                                                       dataset_collection)
        for name, dataset_collection_instance in out_collection_instances.iteritems(
        ):
            job.add_output_dataset_collection(name,
                                              dataset_collection_instance)
        job.object_store_id = object_store_populator.object_store_id
        if job_params:
            job.params = dumps(job_params)
        job.set_handler(tool.get_job_handler(job_params))
        trans.sa_session.add(job)
        # Now that we have a job id, we can remap any outputs if this is a rerun and the user chose to continue dependent jobs
        # This functionality requires tracking jobs in the database.
        if trans.app.config.track_jobs_in_database and rerun_remap_job_id is not None:
            try:
                old_job = trans.sa_session.query(
                    trans.app.model.Job).get(rerun_remap_job_id)
                assert old_job is not None, '(%s/%s): Old job id is invalid' % (
                    rerun_remap_job_id, job.id)
                assert old_job.tool_id == job.tool_id, '(%s/%s): Old tool id (%s) does not match rerun tool id (%s)' % (
                    old_job.id, job.id, old_job.tool_id, job.tool_id)
                if trans.user is not None:
                    assert old_job.user_id == trans.user.id, '(%s/%s): Old user id (%s) does not match rerun user id (%s)' % (
                        old_job.id, job.id, old_job.user_id, trans.user.id)
                elif trans.user is None and type(
                        galaxy_session) == trans.model.GalaxySession:
                    assert old_job.session_id == galaxy_session.id, '(%s/%s): Old session id (%s) does not match rerun session id (%s)' % (
                        old_job.id, job.id, old_job.session_id,
                        galaxy_session.id)
                else:
                    raise Exception(
                        '(%s/%s): Remapping via the API is not (yet) supported'
                        % (old_job.id, job.id))
                for jtod in old_job.output_datasets:
                    for (job_to_remap,
                         jtid) in [(jtid.job, jtid)
                                   for jtid in jtod.dataset.dependent_jobs]:
                        if (trans.user is not None and job_to_remap.user_id
                                == trans.user.id) or (trans.user is None and
                                                      job_to_remap.session_id
                                                      == galaxy_session.id):
                            if job_to_remap.state == job_to_remap.states.PAUSED:
                                job_to_remap.state = job_to_remap.states.NEW
                            for hda in [
                                    dep_jtod.dataset for dep_jtod in
                                    job_to_remap.output_datasets
                            ]:
                                if hda.state == hda.states.PAUSED:
                                    hda.state = hda.states.NEW
                                    hda.info = None
                            for p in job_to_remap.parameters:
                                if p.name == jtid.name and p.value == str(
                                        jtod.dataset.id):
                                    p.value = str(out_data[jtod.name].id)
                            jtid.dataset = out_data[jtod.name]
                            jtid.dataset.hid = jtod.dataset.hid
                            log.info(
                                'Job %s input HDA %s remapped to new HDA %s' %
                                (job_to_remap.id, jtod.dataset.id,
                                 jtid.dataset.id))
                            trans.sa_session.add(job_to_remap)
                            trans.sa_session.add(jtid)
                    jtod.dataset.visible = False
                    trans.sa_session.add(jtod)
            except Exception, e:
                log.exception('Cannot remap rerun dependencies.')
Пример #30
0
    def _rerun_tool(self, trans, payload, **kwargs):
        """
        Rerun a tool to produce a new output dataset that corresponds to a
        dataset that a user is currently viewing.
        """

        #
        # TODO: refactor to use same code as run_tool.
        #

        # Run tool on region if region is specificied.
        run_on_regions = False
        regions = payload.get('regions', None)
        if regions:
            if isinstance(regions, dict):
                # Regions is a single region.
                regions = [GenomeRegion.from_dict(regions)]
            elif isinstance(regions, list):
                # There is a list of regions.
                regions = [GenomeRegion.from_dict(r) for r in regions]

                if len(regions) > 1:
                    # Sort by chrom name, start so that data is not fetched out of order.
                    regions = sorted(regions,
                                     key=lambda r: (r.chrom.lower(), r.start))

                    # Merge overlapping regions so that regions do not overlap
                    # and hence data is not included multiple times.
                    prev = regions[0]
                    cur = regions[1]
                    index = 1
                    while True:
                        if cur.chrom == prev.chrom and cur.start <= prev.end:
                            # Found overlapping regions, so join them into prev.
                            prev.end = cur.end
                            del regions[index]
                        else:
                            # No overlap, move to next region.
                            prev = cur
                            index += 1

                        # Get next region or exit.
                        if index == len(regions):
                            # Done.
                            break
                        else:
                            cur = regions[index]

            run_on_regions = True

        # Dataset check.
        decoded_dataset_id = self.decode_id(payload.get('target_dataset_id'))
        original_dataset = self.hda_manager.get_accessible(decoded_dataset_id,
                                                           user=trans.user)
        original_dataset = self.hda_manager.error_if_uploading(
            original_dataset)
        msg = self.hda_manager.data_conversion_status(original_dataset)
        if msg:
            return msg

        # Set tool parameters--except non-hidden dataset parameters--using combination of
        # job's previous parameters and incoming parameters. Incoming parameters
        # have priority.
        #
        original_job = self.hda_manager.creating_job(trans, original_dataset)
        tool = trans.app.toolbox.get_tool(original_job.tool_id)
        if not tool or not tool.allow_user_access(trans.user):
            return trans.app.model.Dataset.conversion_messages.NO_TOOL
        tool_params = dict([(p.name, p.value)
                            for p in original_job.parameters])

        # TODO: rather than set new inputs using dict of json'ed value, unpack parameters and set using set_param_value below.
        # TODO: need to handle updates to conditional parameters; conditional
        # params are stored in dicts (and dicts within dicts).
        new_inputs = payload['inputs']
        tool_params.update(
            dict([(key, dumps(value)) for key, value in new_inputs.items()
                  if key in tool.inputs and new_inputs[key] is not None]))
        tool_params = tool.params_from_strings(tool_params, self.app)

        #
        # If running tool on region, convert input datasets (create indices) so
        # that can regions of data can be quickly extracted.
        #
        data_provider_registry = trans.app.data_provider_registry
        messages_list = []
        if run_on_regions:
            for jida in original_job.input_datasets:
                input_dataset = jida.dataset
                data_provider = data_provider_registry.get_data_provider(
                    trans, original_dataset=input_dataset, source='data')
                if data_provider and (not data_provider.converted_dataset
                                      or data_provider.converted_dataset.state
                                      != trans.app.model.Dataset.states.OK):
                    # Can convert but no converted dataset yet, so return message about why.
                    data_sources = input_dataset.datatype.data_sources
                    msg = input_dataset.convert_dataset(
                        trans, data_sources['data'])
                    if msg is not None:
                        messages_list.append(msg)

        # Return any messages generated during conversions.
        return_message = self._get_highest_priority_msg(messages_list)
        if return_message:
            return return_message

        #
        # Set target history (the history that tool will use for inputs/outputs).
        # If user owns dataset, put new data in original dataset's history; if
        # user does not own dataset (and hence is accessing dataset via sharing),
        # put new data in user's current history.
        #
        if original_dataset.history.user == trans.user:
            target_history = original_dataset.history
        else:
            target_history = trans.get_history(create=True)
        hda_permissions = trans.app.security_agent.history_get_default_permissions(
            target_history)

        def set_param_value(param_dict, param_name, param_value):
            """
            Set new parameter value in a tool's parameter dictionary.
            """

            # Recursive function to set param value.
            def set_value(param_dict, group_name, group_index, param_name,
                          param_value):
                if group_name in param_dict:
                    param_dict[group_name][group_index][
                        param_name] = param_value
                    return True
                elif param_name in param_dict:
                    param_dict[param_name] = param_value
                    return True
                else:
                    # Recursive search.
                    return_val = False
                    for value in param_dict.values():
                        if isinstance(value, dict):
                            return_val = set_value(value, group_name,
                                                   group_index, param_name,
                                                   param_value)
                            if return_val:
                                return return_val
                    return False

            # Parse parameter name if necessary.
            if param_name.find("|") == -1:
                # Non-grouping parameter.
                group_name = group_index = None
            else:
                # Grouping parameter.
                group, param_name = param_name.split("|")
                index = group.rfind("_")
                group_name = group[:index]
                group_index = int(group[index + 1:])

            return set_value(param_dict, group_name, group_index, param_name,
                             param_value)

        # Set parameters based tool's trackster config.
        params_set = {}
        for action in tool.trackster_conf.actions:
            success = False
            for joda in original_job.output_datasets:
                if joda.name == action.output_name:
                    set_param_value(tool_params, action.name, joda.dataset)
                    params_set[action.name] = True
                    success = True
                    break

            if not success:
                return trans.app.model.Dataset.conversion_messages.ERROR

        #
        # Set input datasets for tool. If running on regions, extract and use subset
        # when possible.
        #
        if run_on_regions:
            regions_str = ",".join([str(r) for r in regions])
        for jida in original_job.input_datasets:
            # If param set previously by config actions, do nothing.
            if jida.name in params_set:
                continue

            input_dataset = jida.dataset
            if input_dataset is None:  # optional dataset and dataset wasn't selected
                tool_params[jida.name] = None
            elif run_on_regions and 'data' in input_dataset.datatype.data_sources:
                # Dataset is indexed and hence a subset can be extracted and used
                # as input.

                # Look for subset.
                subset_dataset_association = trans.sa_session.query( trans.app.model.HistoryDatasetAssociationSubset ) \
                                                             .filter_by( hda=input_dataset, location=regions_str ) \
                                                             .first()
                if subset_dataset_association:
                    # Data subset exists.
                    subset_dataset = subset_dataset_association.subset
                else:
                    # Need to create subset.
                    data_source = input_dataset.datatype.data_sources['data']
                    converted_dataset = input_dataset.get_converted_dataset(
                        trans, data_source)
                    deps = input_dataset.get_converted_dataset_deps(
                        trans, data_source)

                    # Create new HDA for input dataset's subset.
                    new_dataset = trans.app.model.HistoryDatasetAssociation( extension=input_dataset.ext, \
                                                                             dbkey=input_dataset.dbkey, \
                                                                             create_dataset=True, \
                                                                             sa_session=trans.sa_session,
                                                                             name="Subset [%s] of data %i" % \
                                                                                 ( regions_str, input_dataset.hid ),
                                                                             visible=False )
                    target_history.add_dataset(new_dataset)
                    trans.sa_session.add(new_dataset)
                    trans.app.security_agent.set_all_dataset_permissions(
                        new_dataset.dataset, hda_permissions)

                    # Write subset of data to new dataset
                    data_provider = data_provider_registry.get_data_provider(
                        trans, original_dataset=input_dataset, source='data')
                    trans.app.object_store.create(new_dataset.dataset)
                    data_provider.write_data_to_file(regions,
                                                     new_dataset.file_name)

                    # TODO: (a) size not working; (b) need to set peek.
                    new_dataset.set_size()
                    new_dataset.info = "Data subset for trackster"
                    new_dataset.set_dataset_state(
                        trans.app.model.Dataset.states.OK)

                    # Set metadata.
                    # TODO: set meta internally if dataset is small enough?
                    trans.app.datatypes_registry.set_external_metadata_tool.tool_action.execute(
                        trans.app.datatypes_registry.
                        set_external_metadata_tool,
                        trans,
                        incoming={'input1': new_dataset},
                        overwrite=False,
                        job_params={"source": "trackster"})
                    # Add HDA subset association.
                    subset_association = trans.app.model.HistoryDatasetAssociationSubset(
                        hda=input_dataset,
                        subset=new_dataset,
                        location=regions_str)
                    trans.sa_session.add(subset_association)

                    subset_dataset = new_dataset

                trans.sa_session.flush()

                # Add dataset to tool's parameters.
                if not set_param_value(tool_params, jida.name, subset_dataset):
                    return {
                        "error": True,
                        "message": "error setting parameter %s" % jida.name
                    }

        #
        # Execute tool and handle outputs.
        #
        try:
            subset_job, subset_job_outputs = tool.execute(
                trans,
                incoming=tool_params,
                history=target_history,
                job_params={"source": "trackster"})
        except Exception, e:
            # Lots of things can go wrong when trying to execute tool.
            return {
                "error": True,
                "message": e.__class__.__name__ + ": " + str(e)
            }
Пример #31
0
def create_job(trans,
               params,
               tool,
               json_file_path,
               data_list,
               folder=None,
               history=None):
    """
    Create the upload job.
    """
    job = trans.app.model.Job()
    galaxy_session = trans.get_galaxy_session()
    if type(galaxy_session) == trans.model.GalaxySession:
        job.session_id = galaxy_session.id
    if trans.user is not None:
        job.user_id = trans.user.id
    if folder:
        job.library_folder_id = folder.id
    else:
        if not history:
            history = trans.history
        job.history_id = history.id
    job.tool_id = tool.id
    job.tool_version = tool.version
    job.set_state(job.states.UPLOAD)
    trans.sa_session.add(job)
    trans.sa_session.flush()
    log.info('tool %s created job id %d' % (tool.id, job.id))
    trans.log_event('created job id %d' % job.id, tool_id=tool.id)

    for name, value in tool.params_to_strings(params, trans.app).iteritems():
        job.add_parameter(name, value)
    job.add_parameter('paramfile', dumps(json_file_path))
    object_store_id = None
    for i, dataset in enumerate(data_list):
        if folder:
            job.add_output_library_dataset('output%i' % i, dataset)
        else:
            job.add_output_dataset('output%i' % i, dataset)
        # Create an empty file immediately
        if not dataset.dataset.external_filename:
            dataset.dataset.object_store_id = object_store_id
            try:
                trans.app.object_store.create(dataset.dataset)
            except ObjectInvalid:
                raise Exception(
                    'Unable to create output dataset: object store is full')
            object_store_id = dataset.dataset.object_store_id
            trans.sa_session.add(dataset)
            # open( dataset.file_name, "w" ).close()
    job.object_store_id = object_store_id
    job.set_state(job.states.NEW)
    job.set_handler(tool.get_job_handler(None))
    trans.sa_session.add(job)
    trans.sa_session.flush()

    # Queue the job for execution
    trans.app.job_queue.put(job.id, job.tool_id)
    trans.log_event("Added job to the job queue, id: %s" % str(job.id),
                    tool_id=job.tool_id)
    output = odict()
    for i, v in enumerate(data_list):
        output['output%i' % i] = v
    return job, output
Пример #32
0
            test_config.plugins.addPlugin( StructuredTestDataPlugin() )
            test_config.configure( sys.argv )
            result = run_tests( test_config )
            success = result.wasSuccessful()
            return success

        if testing_migrated_tools or testing_installed_tools:
            shed_tools_dict = {}
            if testing_migrated_tools:
                has_test_data, shed_tools_dict = parse_tool_panel_config( migrated_tool_panel_config, shed_tools_dict )
            elif testing_installed_tools:
                for shed_tool_config in installed_tool_panel_configs:
                    has_test_data, shed_tools_dict = parse_tool_panel_config( shed_tool_config, shed_tools_dict )
            # Persist the shed_tools_dict to the galaxy_tool_shed_test_file.
            shed_tools_file = open( galaxy_tool_shed_test_file, 'w' )
            shed_tools_file.write( dumps( shed_tools_dict ) )
            shed_tools_file.close()
            if not os.path.isabs( galaxy_tool_shed_test_file ):
                galaxy_tool_shed_test_file = os.path.join( os.getcwd(), galaxy_tool_shed_test_file )
            os.environ[ 'GALAXY_TOOL_SHED_TEST_FILE' ] = galaxy_tool_shed_test_file
            if testing_installed_tools:
                # Eliminate the migrated_tool_panel_config from the app's tool_configs, append the list of installed_tool_panel_configs,
                # and reload the app's toolbox.
                relative_migrated_tool_panel_config = os.path.join( app.config.root, migrated_tool_panel_config )
                if relative_migrated_tool_panel_config in tool_configs:
                    tool_configs.remove( relative_migrated_tool_panel_config )
                for installed_tool_panel_config in installed_tool_panel_configs:
                    tool_configs.append( installed_tool_panel_config )
                app.toolbox = tools.ToolBox( tool_configs, app.config.tool_path, app )
            success = _run_functional_test( testing_shed_tools=True )
            try:
Пример #33
0
def pretty_print_json(json_data, is_json_string=False):
    if is_json_string:
        json_data = json.loads(json_data)
    return json.dumps(json_data, sort_keys=True, indent=4)
Пример #34
0
            result = run_tests(test_config)
            success = result.wasSuccessful()
            return success

        if testing_migrated_tools or testing_installed_tools:
            shed_tools_dict = {}
            if testing_migrated_tools:
                has_test_data, shed_tools_dict = parse_tool_panel_config(
                    migrated_tool_panel_config, shed_tools_dict)
            elif testing_installed_tools:
                for shed_tool_config in installed_tool_panel_configs:
                    has_test_data, shed_tools_dict = parse_tool_panel_config(
                        shed_tool_config, shed_tools_dict)
            # Persist the shed_tools_dict to the galaxy_tool_shed_test_file.
            shed_tools_file = open(galaxy_tool_shed_test_file, 'w')
            shed_tools_file.write(dumps(shed_tools_dict))
            shed_tools_file.close()
            if not os.path.isabs(galaxy_tool_shed_test_file):
                galaxy_tool_shed_test_file = os.path.join(
                    os.getcwd(), galaxy_tool_shed_test_file)
            os.environ[
                'GALAXY_TOOL_SHED_TEST_FILE'] = galaxy_tool_shed_test_file
            if testing_installed_tools:
                # Eliminate the migrated_tool_panel_config from the app's tool_configs, append the list of installed_tool_panel_configs,
                # and reload the app's toolbox.
                relative_migrated_tool_panel_config = os.path.join(
                    app.config.root, migrated_tool_panel_config)
                if relative_migrated_tool_panel_config in tool_configs:
                    tool_configs.remove(relative_migrated_tool_panel_config)
                for installed_tool_panel_config in installed_tool_panel_configs:
                    tool_configs.append(installed_tool_panel_config)
Пример #35
0
 def get_state( self, secure=True ):
     return dumps( self.state )
Пример #36
0
                         dataset_id=data.dataset.id,
                         dbkey=uploaded_dataset.dbkey,
                         type=uploaded_dataset.type,
                         is_binary=is_binary,
                         link_data_only=link_data_only,
                         uuid=uuid_str,
                         to_posix_lines=getattr(uploaded_dataset, "to_posix_lines", True),
                         space_to_tab=uploaded_dataset.space_to_tab,
                         in_place=trans.app.config.external_chown_script is None,
                         path=uploaded_dataset.path )
            # TODO: This will have to change when we start bundling inputs.
            # Also, in_place above causes the file to be left behind since the
            # user cannot remove it unless the parent directory is writable.
            if link_data_only == 'copy_files' and trans.app.config.external_chown_script:
                _chown( uploaded_dataset.path )
        json_file.write( dumps( json ) + '\n' )
    json_file.close()
    if trans.app.config.external_chown_script:
        _chown( json_file_path )
    return json_file_path


def create_job( trans, params, tool, json_file_path, data_list, folder=None, history=None ):
    """
    Create the upload job.
    """
    job = trans.app.model.Job()
    galaxy_session = trans.get_galaxy_session()
    if type( galaxy_session ) == trans.model.GalaxySession:
        job.session_id = galaxy_session.id
    if trans.user is not None:
Пример #37
0
                dbkey=uploaded_dataset.dbkey,
                type=uploaded_dataset.type,
                is_binary=is_binary,
                link_data_only=link_data_only,
                uuid=uuid_str,
                to_posix_lines=getattr(uploaded_dataset, "to_posix_lines",
                                       True),
                space_to_tab=uploaded_dataset.space_to_tab,
                in_place=trans.app.config.external_chown_script is None,
                path=uploaded_dataset.path)
            # TODO: This will have to change when we start bundling inputs.
            # Also, in_place above causes the file to be left behind since the
            # user cannot remove it unless the parent directory is writable.
            if link_data_only == 'copy_files' and trans.app.config.external_chown_script:
                _chown(uploaded_dataset.path)
        json_file.write(dumps(json) + '\n')
    json_file.close()
    if trans.app.config.external_chown_script:
        _chown(json_file_path)
    return json_file_path


def create_job(trans,
               params,
               tool,
               json_file_path,
               data_list,
               folder=None,
               history=None):
    """
    Create the upload job.
Пример #38
0
    def cleanup_after_job(self):
        """ Set history, datasets, and jobs' attributes and clean up archive directory. """

        #
        # Helper methods.
        #

        def file_in_dir(file_path, a_dir):
            """ Returns true if file is in directory. """
            abs_file_path = os.path.abspath(file_path)
            return os.path.split(abs_file_path)[0] == a_dir

        def read_file_contents(file_path):
            """ Read contents of a file. """
            fp = open(file_path, 'rb')
            buffsize = 1048576
            file_contents = ''
            try:
                while True:
                    file_contents += fp.read(buffsize)
                    if not file_contents or len(file_contents) % buffsize != 0:
                        break
            except OverflowError:
                pass
            fp.close()
            return file_contents

        def get_tag_str(tag, value):
            """ Builds a tag string for a tag, value pair. """
            if not value:
                return tag
            else:
                return tag + ":" + value

        #
        # Import history.
        #

        jiha = self.sa_session.query(model.JobImportHistoryArchive).filter_by(
            job_id=self.job_id).first()
        if jiha:
            try:
                archive_dir = jiha.archive_dir
                user = jiha.job.user

                #
                # Create history.
                #
                history_attr_file_name = os.path.join(archive_dir,
                                                      'history_attrs.txt')
                history_attr_str = read_file_contents(history_attr_file_name)
                history_attrs = loads(history_attr_str)

                # Create history.
                new_history = model.History(
                    name='imported from archive: %s' %
                    history_attrs['name'].encode('utf-8'),
                    user=user)
                new_history.importing = True
                new_history.hid_counter = history_attrs['hid_counter']
                new_history.genome_build = history_attrs['genome_build']
                self.sa_session.add(new_history)
                jiha.history = new_history
                self.sa_session.flush()

                # Add annotation, tags.
                if user:
                    self.add_item_annotation(self.sa_session, user,
                                             new_history,
                                             history_attrs['annotation'])
                    """
                    TODO: figure out to how add tags to item.
                    for tag, value in history_attrs[ 'tags' ].items():
                        trans.app.tag_handler.apply_item_tags( trans, trans.user, new_history, get_tag_str( tag, value ) )
                    """

                #
                # Create datasets.
                #
                datasets_attrs_file_name = os.path.join(
                    archive_dir, 'datasets_attrs.txt')
                datasets_attr_str = read_file_contents(
                    datasets_attrs_file_name)
                datasets_attrs = loads(datasets_attr_str)

                if os.path.exists(datasets_attrs_file_name + ".provenance"):
                    provenance_attr_str = read_file_contents(
                        datasets_attrs_file_name + ".provenance")
                    provenance_attrs = loads(provenance_attr_str)
                    datasets_attrs += provenance_attrs

                # Get counts of how often each dataset file is used; a file can
                # be linked to multiple dataset objects (HDAs).
                datasets_usage_counts = {}
                for dataset_attrs in datasets_attrs:
                    temp_dataset_file_name = \
                        os.path.abspath( os.path.join( archive_dir, dataset_attrs['file_name'] ) )
                    if (temp_dataset_file_name not in datasets_usage_counts):
                        datasets_usage_counts[temp_dataset_file_name] = 0
                    datasets_usage_counts[temp_dataset_file_name] += 1

                # Create datasets.
                for dataset_attrs in datasets_attrs:
                    metadata = dataset_attrs['metadata']

                    # Create dataset and HDA.
                    hda = model.HistoryDatasetAssociation(
                        name=dataset_attrs['name'].encode('utf-8'),
                        extension=dataset_attrs['extension'],
                        info=dataset_attrs['info'].encode('utf-8'),
                        blurb=dataset_attrs['blurb'],
                        peek=dataset_attrs['peek'],
                        designation=dataset_attrs['designation'],
                        visible=dataset_attrs['visible'],
                        dbkey=metadata['dbkey'],
                        metadata=metadata,
                        history=new_history,
                        create_dataset=True,
                        sa_session=self.sa_session)
                    if 'uuid' in dataset_attrs:
                        hda.dataset.uuid = dataset_attrs["uuid"]
                    if dataset_attrs.get('exported', True) is False:
                        hda.state = hda.states.DISCARDED
                        hda.deleted = True
                        hda.purged = True
                    else:
                        hda.state = hda.states.OK
                    self.sa_session.add(hda)
                    self.sa_session.flush()
                    new_history.add_dataset(hda, genome_build=None)
                    hda.hid = dataset_attrs[
                        'hid']  # Overwrite default hid set when HDA added to history.
                    # TODO: Is there a way to recover permissions? Is this needed?
                    # permissions = trans.app.security_agent.history_get_default_permissions( new_history )
                    # trans.app.security_agent.set_all_dataset_permissions( hda.dataset, permissions )
                    self.sa_session.flush()
                    if dataset_attrs.get('exported', True) is True:
                        # Do security check and move/copy dataset data.
                        temp_dataset_file_name = \
                            os.path.realpath( os.path.abspath( os.path.join( archive_dir, dataset_attrs['file_name'] ) ) )
                        if not file_in_dir(
                                temp_dataset_file_name,
                                os.path.join(archive_dir, "datasets")):
                            raise MalformedContents(
                                "Invalid dataset path: %s" %
                                temp_dataset_file_name)
                        if datasets_usage_counts[temp_dataset_file_name] == 1:
                            self.app.object_store.update_from_file(
                                hda.dataset,
                                file_name=temp_dataset_file_name,
                                create=True)

                            # Import additional files if present. Histories exported previously might not have this attribute set.
                            dataset_extra_files_path = dataset_attrs.get(
                                'extra_files_path', None)
                            if dataset_extra_files_path:
                                try:
                                    file_list = os.listdir(
                                        os.path.join(archive_dir,
                                                     dataset_extra_files_path))
                                except OSError:
                                    file_list = []

                                if file_list:
                                    for extra_file in file_list:
                                        self.app.object_store.update_from_file(
                                            hda.dataset,
                                            extra_dir='dataset_%s_files' %
                                            hda.dataset.id,
                                            alt_name=extra_file,
                                            file_name=os.path.join(
                                                archive_dir,
                                                dataset_extra_files_path,
                                                extra_file),
                                            create=True)
                        else:
                            datasets_usage_counts[temp_dataset_file_name] -= 1
                            shutil.copyfile(temp_dataset_file_name,
                                            hda.file_name)
                        hda.dataset.set_total_size(
                        )  # update the filesize record in the database

                    # Set tags, annotations.
                    if user:
                        self.add_item_annotation(self.sa_session, user, hda,
                                                 dataset_attrs['annotation'])
                        # TODO: Set tags.
                        """
                        for tag, value in dataset_attrs[ 'tags' ].items():
                            trans.app.tag_handler.apply_item_tags( trans, trans.user, hda, get_tag_str( tag, value ) )
                            self.sa_session.flush()
                        """

                    # Although metadata is set above, need to set metadata to recover BAI for BAMs.
                    if hda.extension == 'bam':
                        self.app.datatypes_registry.set_external_metadata_tool.tool_action.execute_via_app(
                            self.app.datatypes_registry.
                            set_external_metadata_tool,
                            self.app,
                            jiha.job.session_id,
                            new_history.id,
                            jiha.job.user,
                            incoming={'input1': hda},
                            overwrite=False)

                #
                # Create jobs.
                #

                # Read jobs attributes.
                jobs_attr_file_name = os.path.join(archive_dir,
                                                   'jobs_attrs.txt')
                jobs_attr_str = read_file_contents(jobs_attr_file_name)

                # Decode jobs attributes.
                def as_hda(obj_dct):
                    """ Hook to 'decode' an HDA; method uses history and HID to get the HDA represented by
                        the encoded object. This only works because HDAs are created above. """
                    if obj_dct.get('__HistoryDatasetAssociation__', False):
                        return self.sa_session.query(
                            model.HistoryDatasetAssociation).filter_by(
                                history=new_history,
                                hid=obj_dct['hid']).first()
                    return obj_dct

                jobs_attrs = loads(jobs_attr_str, object_hook=as_hda)

                # Create each job.
                for job_attrs in jobs_attrs:
                    imported_job = model.Job()
                    imported_job.user = user
                    # TODO: set session?
                    # imported_job.session = trans.get_galaxy_session().id
                    imported_job.history = new_history
                    imported_job.imported = True
                    imported_job.tool_id = job_attrs['tool_id']
                    imported_job.tool_version = job_attrs['tool_version']
                    imported_job.set_state(job_attrs['state'])
                    imported_job.info = job_attrs.get('info', None)
                    imported_job.exit_code = job_attrs.get('exit_code', None)
                    imported_job.traceback = job_attrs.get('traceback', None)
                    imported_job.stdout = job_attrs.get('stdout', None)
                    imported_job.stderr = job_attrs.get('stderr', None)
                    imported_job.command_line = job_attrs.get(
                        'command_line', None)
                    try:
                        imported_job.create_time = datetime.datetime.strptime(
                            job_attrs["create_time"], "%Y-%m-%dT%H:%M:%S.%f")
                        imported_job.update_time = datetime.datetime.strptime(
                            job_attrs["update_time"], "%Y-%m-%dT%H:%M:%S.%f")
                    except:
                        pass
                    self.sa_session.add(imported_job)
                    self.sa_session.flush()

                    class HistoryDatasetAssociationIDEncoder(json.JSONEncoder):
                        """ Custom JSONEncoder for a HistoryDatasetAssociation that encodes an HDA as its ID. """
                        def default(self, obj):
                            """ Encode an HDA, default encoding for everything else. """
                            if isinstance(obj,
                                          model.HistoryDatasetAssociation):
                                return obj.id
                            return json.JSONEncoder.default(self, obj)

                    # Set parameters. May be useful to look at metadata.py for creating parameters.
                    # TODO: there may be a better way to set parameters, e.g.:
                    #   for name, value in tool.params_to_strings( incoming, trans.app ).iteritems():
                    #       job.add_parameter( name, value )
                    # to make this work, we'd need to flesh out the HDA objects. The code below is
                    # relatively similar.
                    for name, value in job_attrs['params'].items():
                        # Transform parameter values when necessary.
                        if isinstance(value, model.HistoryDatasetAssociation):
                            # HDA input: use hid to find input.
                            input_hda = self.sa_session.query( model.HistoryDatasetAssociation ) \
                                            .filter_by( history=new_history, hid=value.hid ).first()
                            value = input_hda.id
                        # print "added parameter %s-->%s to job %i" % ( name, value, imported_job.id )
                        imported_job.add_parameter(
                            name,
                            dumps(value,
                                  cls=HistoryDatasetAssociationIDEncoder))

                    # TODO: Connect jobs to input datasets.

                    # Connect jobs to output datasets.
                    for output_hid in job_attrs['output_datasets']:
                        # print "%s job has output dataset %i" % (imported_job.id, output_hid)
                        output_hda = self.sa_session.query(
                            model.HistoryDatasetAssociation).filter_by(
                                history=new_history, hid=output_hid).first()
                        if output_hda:
                            imported_job.add_output_dataset(
                                output_hda.name, output_hda)

                    # Connect jobs to input datasets.
                    if 'input_mapping' in job_attrs:
                        for input_name, input_hid in job_attrs[
                                'input_mapping'].items():
                            input_hda = self.sa_session.query( model.HistoryDatasetAssociation ) \
                                            .filter_by( history=new_history, hid=input_hid ).first()
                            if input_hda:
                                imported_job.add_input_dataset(
                                    input_name, input_hda)

                    self.sa_session.flush()

                # Done importing.
                new_history.importing = False
                self.sa_session.flush()

                # Cleanup.
                if os.path.exists(archive_dir):
                    shutil.rmtree(archive_dir)
            except Exception, e:
                jiha.job.stderr += "Error cleaning up history import job: %s" % e
                self.sa_session.flush()
                raise
Пример #39
0
 def log_action( self, user=None, action=None, context=None, params=None):
     """
     Application-level logging of user actions.
     """
     if self.app.config.log_actions:
         action = self.app.model.UserAction(action=action, context=context, params=unicode( dumps( params ) ) )
         try:
             if user:
                 action.user = user
             else:
                 action.user = self.user
         except:
             action.user = None
         try:
             action.session_id = self.galaxy_session.id
         except:
             action.session_id = None
         self.sa_session.add( action )
         self.sa_session.flush()
Пример #40
0
    def execute_via_app(self,
                        tool,
                        app,
                        session_id,
                        history_id,
                        user=None,
                        incoming={},
                        set_output_hid=False,
                        overwrite=True,
                        history=None,
                        job_params=None):
        """
        Execute using application.
        """
        for name, value in incoming.iteritems():
            if isinstance(value, app.model.HistoryDatasetAssociation):
                dataset = value
                dataset_name = name
                type = 'hda'
                break
            elif isinstance(value, app.model.LibraryDatasetDatasetAssociation):
                dataset = value
                dataset_name = name
                type = 'ldda'
                break
            else:
                raise Exception(
                    'The dataset to set metadata on could not be determined.')

        sa_session = app.model.context

        # Create the job object
        job = app.model.Job()
        job.session_id = session_id
        job.history_id = history_id
        job.tool_id = tool.id
        if user:
            job.user_id = user.id
        if job_params:
            job.params = dumps(job_params)
        start_job_state = job.state  # should be job.states.NEW
        try:
            # For backward compatibility, some tools may not have versions yet.
            job.tool_version = tool.version
        except:
            job.tool_version = "1.0.1"
        job.state = job.states.WAITING  # we need to set job state to something other than NEW, or else when tracking jobs in db it will be picked up before we have added input / output parameters
        job.set_handler(tool.get_job_handler(job_params))
        sa_session.add(job)
        sa_session.flush()  # ensure job.id is available

        # add parameters to job_parameter table
        # Store original dataset state, so we can restore it. A separate table might be better (no chance of 'losing' the original state)?
        incoming['__ORIGINAL_DATASET_STATE__'] = dataset.state
        input_paths = [
            DatasetPath(dataset.id, real_path=dataset.file_name, mutable=False)
        ]
        app.object_store.create(job,
                                base_dir='job_work',
                                dir_only=True,
                                extra_dir=str(job.id))
        job_working_dir = app.object_store.get_filename(job,
                                                        base_dir='job_work',
                                                        dir_only=True,
                                                        extra_dir=str(job.id))
        external_metadata_wrapper = JobExternalOutputMetadataWrapper(job)
        cmd_line = external_metadata_wrapper.setup_external_metadata(
            dataset,
            sa_session,
            exec_dir=None,
            tmp_dir=job_working_dir,
            dataset_files_path=app.model.Dataset.file_path,
            output_fnames=input_paths,
            config_root=app.config.root,
            config_file=app.config.config_file,
            datatypes_config=app.datatypes_registry.
            integrated_datatypes_configs,
            job_metadata=None,
            include_command=False,
            max_metadata_value_size=app.config.max_metadata_value_size,
            kwds={'overwrite': overwrite})
        incoming['__SET_EXTERNAL_METADATA_COMMAND_LINE__'] = cmd_line
        for name, value in tool.params_to_strings(incoming, app).iteritems():
            job.add_parameter(name, value)
        # add the dataset to job_to_input_dataset table
        if type == 'hda':
            job.add_input_dataset(dataset_name, dataset)
        elif type == 'ldda':
            job.add_input_library_dataset(dataset_name, dataset)
        # Need a special state here to show that metadata is being set and also allow the job to run
        # i.e. if state was set to 'running' the set metadata job would never run, as it would wait for input (the dataset to set metadata on) to be in a ready state
        dataset._state = dataset.states.SETTING_METADATA
        job.state = start_job_state  # job inputs have been configured, restore initial job state
        sa_session.flush()

        # Queue the job for execution
        app.job_queue.put(job.id, tool.id)
        # FIXME: need to add event logging to app and log events there rather than trans.
        # trans.log_event( "Added set external metadata job to the job queue, id: %s" % str(job.id), tool_id=job.tool_id )

        # clear e.g. converted files
        dataset.datatype.before_setting_metadata(dataset)

        return job, odict()
Пример #41
0
    def execute(self, tool, trans, incoming={}, return_job=False, set_output_hid=True, set_output_history=True, history=None, job_params=None, rerun_remap_job_id=None, mapping_over_collection=False):
        """
        Executes a tool, creating job and tool outputs, associating them, and
        submitting the job to the job queue. If history is not specified, use
        trans.history as destination for tool's output datasets.
        """
        assert tool.allow_user_access( trans.user ), "User (%s) is not allowed to access this tool." % ( trans.user )
        # Set history.
        if not history:
            history = tool.get_default_history_by_trans( trans, create=True )

        out_data = odict()
        out_collections = {}
        out_collection_instances = {}
        # Track input dataset collections - but replace with simply lists so collect
        # input datasets can process these normally.
        inp_dataset_collections = self.collect_input_dataset_collections( tool, incoming )
        # Collect any input datasets from the incoming parameters
        inp_data = self.collect_input_datasets( tool, incoming, trans )

        # Deal with input dataset names, 'dbkey' and types
        input_names = []
        input_ext = 'data'
        input_dbkey = incoming.get( "dbkey", "?" )
        inp_items = inp_data.items()
        inp_items.reverse()
        for name, data in inp_items:
            if not data:
                data = NoneDataset( datatypes_registry=trans.app.datatypes_registry )
                continue

            # Convert LDDA to an HDA.
            if isinstance(data, LibraryDatasetDatasetAssociation):
                data = data.to_history_dataset_association( None )
                inp_data[name] = data

            else:  # HDA
                if data.hid:
                    input_names.append( 'data %s' % data.hid )
            input_ext = data.ext

            if data.dbkey not in [None, '?']:
                input_dbkey = data.dbkey

            identifier = getattr( data, "element_identifier", None )
            if identifier is not None:
                incoming[ "%s|__identifier__" % name ] = identifier

        # Collect chromInfo dataset and add as parameters to incoming
        ( chrom_info, db_dataset ) = trans.app.genome_builds.get_chrom_info( input_dbkey, trans=trans, custom_build_hack_get_len_from_fasta_conversion=tool.id != 'CONVERTER_fasta_to_len' )
        if db_dataset:
            inp_data.update( { "chromInfo": db_dataset } )
        incoming[ "chromInfo" ] = chrom_info

        # Determine output dataset permission/roles list
        existing_datasets = [ inp for inp in inp_data.values() if inp ]
        if existing_datasets:
            output_permissions = trans.app.security_agent.guess_derived_permissions_for_datasets( existing_datasets )
        else:
            # No valid inputs, we will use history defaults
            output_permissions = trans.app.security_agent.history_get_default_permissions( history )

        # Build name for output datasets based on tool name and input names
        on_text = on_text_for_names( input_names )

        # Add the dbkey to the incoming parameters
        incoming[ "dbkey" ] = input_dbkey
        # wrapped params are used by change_format action and by output.label; only perform this wrapping once, as needed
        wrapped_params = WrappedParameters( trans, tool, incoming )
        # Keep track of parent / child relationships, we'll create all the
        # datasets first, then create the associations
        parent_to_child_pairs = []
        child_dataset_names = set()
        object_store_populator = ObjectStorePopulator( trans.app )

        def handle_output( name, output ):
            if output.parent:
                parent_to_child_pairs.append( ( output.parent, name ) )
                child_dataset_names.add( name )
            # What is the following hack for? Need to document under what
            # conditions can the following occur? ([email protected])
            # HACK: the output data has already been created
            #      this happens i.e. as a result of the async controller
            if name in incoming:
                dataid = incoming[name]
                data = trans.sa_session.query( trans.app.model.HistoryDatasetAssociation ).get( dataid )
                assert data is not None
                out_data[name] = data
            else:
                ext = determine_output_format( output, wrapped_params.params, inp_data, input_ext )
                data = trans.app.model.HistoryDatasetAssociation( extension=ext, create_dataset=True, sa_session=trans.sa_session )
                if output.hidden:
                    data.visible = False
                # Commit the dataset immediately so it gets database assigned unique id
                trans.sa_session.add( data )
                trans.sa_session.flush()
                trans.app.security_agent.set_all_dataset_permissions( data.dataset, output_permissions )

            object_store_populator.set_object_store_id( data )

            # This may not be neccesary with the new parent/child associations
            data.designation = name
            # Copy metadata from one of the inputs if requested.

            # metadata source can be either a string referencing an input
            # or an actual object to copy.
            metadata_source = output.metadata_source
            if metadata_source:
                if isinstance( metadata_source, basestring ):
                    metadata_source = inp_data[metadata_source]

            if metadata_source is not None:
                data.init_meta( copy_from=metadata_source )
            else:
                data.init_meta()
            # Take dbkey from LAST input
            data.dbkey = str(input_dbkey)
            # Set state
            # FIXME: shouldn't this be NEW until the job runner changes it?
            data.state = data.states.QUEUED
            data.blurb = "queued"
            # Set output label
            data.name = self.get_output_name( output, data, tool, on_text, trans, incoming, history, wrapped_params.params, job_params )
            # Store output
            out_data[ name ] = data
            if output.actions:
                # Apply pre-job tool-output-dataset actions; e.g. setting metadata, changing format
                output_action_params = dict( out_data )
                output_action_params.update( incoming )
                output.actions.apply_action( data, output_action_params )
            # Store all changes to database
            trans.sa_session.flush()
            return data

        for name, output in tool.outputs.items():
            if not filter_output(output, incoming):
                if output.collection:
                    collections_manager = trans.app.dataset_collections_service
                    # As far as I can tell - this is always true - but just verify
                    assert set_output_history, "Cannot create dataset collection for this kind of tool."

                    element_identifiers = []
                    input_collections = dict( [ (k, v[0]) for k, v in inp_dataset_collections.iteritems() ] )
                    known_outputs = output.known_outputs( input_collections, collections_manager.type_registry )
                    # Just to echo TODO elsewhere - this should be restructured to allow
                    # nested collections.
                    for output_part_def in known_outputs:
                        # Add elements to top-level collection, unless nested...
                        current_element_identifiers = element_identifiers
                        current_collection_type = output.structure.collection_type

                        for parent_id in (output_part_def.parent_ids or []):
                            # TODO: replace following line with formal abstractions for doing this.
                            current_collection_type = ":".join(current_collection_type.split(":")[1:])
                            name_to_index = dict(map(lambda (index, value): (value["name"], index), enumerate(current_element_identifiers)))
                            if parent_id not in name_to_index:
                                if parent_id not in current_element_identifiers:
                                    index = len(current_element_identifiers)
                                    current_element_identifiers.append(dict(
                                        name=parent_id,
                                        collection_type=current_collection_type,
                                        src="new_collection",
                                        element_identifiers=[],
                                    ))
                                else:
                                    index = name_to_index[parent_id]
                            current_element_identifiers = current_element_identifiers[ index ][ "element_identifiers" ]

                        effective_output_name = output_part_def.effective_output_name
                        element = handle_output( effective_output_name, output_part_def.output_def )
                        # Following hack causes dataset to no be added to history...
                        child_dataset_names.add( effective_output_name )

                        if set_output_history:
                            history.add_dataset( element, set_hid=set_output_hid )
                        trans.sa_session.add( element )
                        trans.sa_session.flush()

                        current_element_identifiers.append({
                            "__object__": element,
                            "name": output_part_def.element_identifier,
                        })
                        log.info(element_identifiers)

                    if output.dynamic_structure:
                        assert not element_identifiers  # known_outputs must have been empty
                        element_kwds = dict(elements=collections_manager.ELEMENTS_UNINITIALIZED)
                    else:
                        element_kwds = dict(element_identifiers=element_identifiers)

                    if mapping_over_collection:
                        dc = collections_manager.create_dataset_collection(
                            trans,
                            collection_type=output.structure.collection_type,
                            **element_kwds
                        )
                        out_collections[ name ] = dc
                    else:
                        hdca_name = self.get_output_name( output, None, tool, on_text, trans, incoming, history, wrapped_params.params, job_params )
                        hdca = collections_manager.create(
                            trans,
                            history,
                            name=hdca_name,
                            collection_type=output.structure.collection_type,
                            trusted_identifiers=True,
                            **element_kwds
                        )
                        # name here is name of the output element - not name
                        # of the hdca.
                        out_collection_instances[ name ] = hdca
                else:
                    handle_output_timer = ExecutionTimer()
                    handle_output( name, output )
                    log.info("Handled output %s" % handle_output_timer)
        # Add all the top-level (non-child) datasets to the history unless otherwise specified
        for name in out_data.keys():
            if name not in child_dataset_names and name not in incoming:  # don't add children; or already existing datasets, i.e. async created
                data = out_data[ name ]
                if set_output_history:
                    history.add_dataset( data, set_hid=set_output_hid )
                trans.sa_session.add( data )
                trans.sa_session.flush()
        # Add all the children to their parents
        for parent_name, child_name in parent_to_child_pairs:
            parent_dataset = out_data[ parent_name ]
            child_dataset = out_data[ child_name ]
            parent_dataset.children.append( child_dataset )
        # Store data after custom code runs
        trans.sa_session.flush()
        # Create the job object
        job = trans.app.model.Job()

        if hasattr( trans, "get_galaxy_session" ):
            galaxy_session = trans.get_galaxy_session()
            # If we're submitting from the API, there won't be a session.
            if type( galaxy_session ) == trans.model.GalaxySession:
                job.session_id = galaxy_session.id
        if trans.user is not None:
            job.user_id = trans.user.id
        job.history_id = history.id
        job.tool_id = tool.id
        try:
            # For backward compatibility, some tools may not have versions yet.
            job.tool_version = tool.version
        except:
            job.tool_version = "1.0.0"
        # FIXME: Don't need all of incoming here, just the defined parameters
        #        from the tool. We need to deal with tools that pass all post
        #        parameters to the command as a special case.
        for name, ( dataset_collection, reduced ) in inp_dataset_collections.iteritems():
            # TODO: Does this work if nested in repeat/conditional?
            if reduced:
                incoming[ name ] = "__collection_reduce__|%s" % dataset_collection.id
            # Should verify security? We check security of individual
            # datasets below?
            job.add_input_dataset_collection( name, dataset_collection )
        for name, value in tool.params_to_strings( incoming, trans.app ).iteritems():
            job.add_parameter( name, value )
        current_user_roles = trans.get_current_user_roles()
        access_timer = ExecutionTimer()
        for name, dataset in inp_data.iteritems():
            if dataset:
                if not trans.app.security_agent.can_access_dataset( current_user_roles, dataset.dataset ):
                    raise Exception("User does not have permission to use a dataset (%s) provided for input." % data.id)
                job.add_input_dataset( name, dataset )
            else:
                job.add_input_dataset( name, None )
        log.info("Verified access to datasets %s" % access_timer)
        for name, dataset in out_data.iteritems():
            job.add_output_dataset( name, dataset )
        for name, dataset_collection in out_collections.iteritems():
            job.add_implicit_output_dataset_collection( name, dataset_collection )
        for name, dataset_collection_instance in out_collection_instances.iteritems():
            job.add_output_dataset_collection( name, dataset_collection_instance )
        job.object_store_id = object_store_populator.object_store_id
        if job_params:
            job.params = dumps( job_params )
        job.set_handler(tool.get_job_handler(job_params))
        trans.sa_session.add( job )
        # Now that we have a job id, we can remap any outputs if this is a rerun and the user chose to continue dependent jobs
        # This functionality requires tracking jobs in the database.
        if trans.app.config.track_jobs_in_database and rerun_remap_job_id is not None:
            try:
                old_job = trans.sa_session.query( trans.app.model.Job ).get(rerun_remap_job_id)
                assert old_job is not None, '(%s/%s): Old job id is invalid' % (rerun_remap_job_id, job.id)
                assert old_job.tool_id == job.tool_id, '(%s/%s): Old tool id (%s) does not match rerun tool id (%s)' % (old_job.id, job.id, old_job.tool_id, job.tool_id)
                if trans.user is not None:
                    assert old_job.user_id == trans.user.id, '(%s/%s): Old user id (%s) does not match rerun user id (%s)' % (old_job.id, job.id, old_job.user_id, trans.user.id)
                elif trans.user is None and type( galaxy_session ) == trans.model.GalaxySession:
                    assert old_job.session_id == galaxy_session.id, '(%s/%s): Old session id (%s) does not match rerun session id (%s)' % (old_job.id, job.id, old_job.session_id, galaxy_session.id)
                else:
                    raise Exception('(%s/%s): Remapping via the API is not (yet) supported' % (old_job.id, job.id))
                for jtod in old_job.output_datasets:
                    for (job_to_remap, jtid) in [(jtid.job, jtid) for jtid in jtod.dataset.dependent_jobs]:
                        if (trans.user is not None and job_to_remap.user_id == trans.user.id) or (trans.user is None and job_to_remap.session_id == galaxy_session.id):
                            if job_to_remap.state == job_to_remap.states.PAUSED:
                                job_to_remap.state = job_to_remap.states.NEW
                            for hda in [ dep_jtod.dataset for dep_jtod in job_to_remap.output_datasets ]:
                                if hda.state == hda.states.PAUSED:
                                    hda.state = hda.states.NEW
                                    hda.info = None
                            for p in job_to_remap.parameters:
                                if p.name == jtid.name and p.value == str(jtod.dataset.id):
                                    p.value = str(out_data[jtod.name].id)
                            jtid.dataset = out_data[jtod.name]
                            jtid.dataset.hid = jtod.dataset.hid
                            log.info('Job %s input HDA %s remapped to new HDA %s' % (job_to_remap.id, jtod.dataset.id, jtid.dataset.id))
                            trans.sa_session.add(job_to_remap)
                            trans.sa_session.add(jtid)
                    jtod.dataset.visible = False
                    trans.sa_session.add(jtod)
            except Exception:
                log.exception('Cannot remap rerun dependencies.')
        trans.sa_session.flush()
        # Some tools are not really executable, but jobs are still created for them ( for record keeping ).
        # Examples include tools that redirect to other applications ( epigraph ).  These special tools must
        # include something that can be retrieved from the params ( e.g., REDIRECT_URL ) to keep the job
        # from being queued.
        if 'REDIRECT_URL' in incoming:
            # Get the dataset - there should only be 1
            for name in inp_data.keys():
                dataset = inp_data[ name ]
            redirect_url = tool.parse_redirect_url( dataset, incoming )
            # GALAXY_URL should be include in the tool params to enable the external application
            # to send back to the current Galaxy instance
            GALAXY_URL = incoming.get( 'GALAXY_URL', None )
            assert GALAXY_URL is not None, "GALAXY_URL parameter missing in tool config."
            redirect_url += "&GALAXY_URL=%s" % GALAXY_URL
            # Job should not be queued, so set state to ok
            job.set_state( trans.app.model.Job.states.OK )
            job.info = "Redirected to: %s" % redirect_url
            trans.sa_session.add( job )
            trans.sa_session.flush()
            trans.response.send_redirect( url_for( controller='tool_runner', action='redirect', redirect_url=redirect_url ) )
        else:
            # Put the job in the queue if tracking in memory
            trans.app.job_queue.put( job.id, job.tool_id )
            trans.log_event( "Added job to the job queue, id: %s" % str(job.id), tool_id=job.tool_id )
            return job, out_data
                                                                            repository,
                                                                            None,
                                                                            as_html=False )
                    results[ 'unsuccessful_count' ] += 1
                else:
                    message = "Successfully reset metadata on repository %s owned by %s" % \
                        ( str( repository.name ), str( repository.owner ) )
                    results[ 'successful_count' ] += 1
            except Exception, e:
                message = "Error resetting metadata on repository %s owned by %s: %s" % \
                    ( str( repository.name ), str( repository.owner ), str( e ) )
                results[ 'unsuccessful_count' ] += 1
            results[ 'repository_status' ].append( message )
        stop_time = strftime( "%Y-%m-%d %H:%M:%S" )
        results[ 'stop_time' ] = stop_time
        return json.dumps( results, sort_keys=True, indent=4 )

    @expose_api
    def show( self, trans, id, **kwd ):
        """
        GET /api/tool_shed_repositories/{encoded_tool_shed_repsository_id}
        Display a dictionary containing information about a specified tool_shed_repository.

        :param id: the encoded id of the ToolShedRepository object
        """
        # Example URL: http://localhost:8763/api/tool_shed_repositories/df7a1f0c02a5b08e
        tool_shed_repository = suc.get_tool_shed_repository_by_id( trans.app, id )
        if tool_shed_repository is None:
            log.debug( "Unable to locate tool_shed_repository record for id %s." % ( str( id ) ) )
            return {}
        tool_shed_repository_dict = tool_shed_repository.as_dict( value_mapper=self.__get_value_mapper( trans, tool_shed_repository ) )
Пример #43
0
        else:
            # This should not happen, but it's here just in case
            shutil.copy(dataset.path, output_path)
    elif link_data_only == 'copy_files':
        shutil.move(dataset.path, output_path)
    # Write the job info
    stdout = stdout or 'uploaded %s file' % data_type
    info = dict(type='dataset',
                dataset_id=dataset.dataset_id,
                ext=ext,
                stdout=stdout,
                name=dataset.name,
                line_count=line_count)
    if dataset.get('uuid', None) is not None:
        info['uuid'] = dataset.get('uuid')
    json_file.write(dumps(info) + "\n")

    if link_data_only == 'copy_files' and datatype.dataset_content_needs_grooming(
            output_path):
        # Groom the dataset content if necessary
        datatype.groom_dataset_content(output_path)


def add_composite_file(dataset, json_file, output_path, files_path):
    if dataset.composite_files:
        os.mkdir(files_path)
        for name, value in dataset.composite_files.iteritems():
            value = util.bunch.Bunch(**value)
            if dataset.composite_file_paths[
                    value.name] is None and not value.optional:
                file_err(
Пример #44
0
 cmd = "SELECT f.id, f.fields FROM form_definition AS f"
 result = migrate_engine.execute(cmd)
 for row in result:
     form_definition_id = row[0]
     fields = str(row[1])
     if not fields.strip():
         continue
     fields_list = loads(_sniffnfix_pg9_hex(fields))
     if len(fields_list):
         for index, field in enumerate(fields_list):
             field['name'] = 'field_%i' % index
             field['helptext'] = field['helptext'].replace("'",
                                                           "''").replace(
                                                               '"', "")
             field['label'] = field['label'].replace("'", "''")
         fields_json = dumps(fields_list)
         if migrate_engine.name == 'mysql':
             cmd = "UPDATE form_definition AS f SET f.fields='%s' WHERE f.id=%i" % (
                 fields_json, form_definition_id)
         else:
             cmd = "UPDATE form_definition SET fields='%s' WHERE id=%i" % (
                 fields_json, form_definition_id)
         migrate_engine.execute(cmd)
 # replace the values list in the content field of the form_values table with a name:value dict
 cmd = "SELECT form_values.id, form_values.content, form_definition.fields" \
       " FROM form_values, form_definition" \
       " WHERE form_values.form_definition_id=form_definition.id" \
       " ORDER BY form_values.id ASC"
 result = migrate_engine.execute(cmd)
 for row in result:
     form_values_id = int(row[0])