Пример #1
0
 def __init__( self, datatypes_registry = None, ext = 'data', dbkey = '?' ):
     self.ext = self.extension = ext
     self.dbkey = dbkey
     if datatypes_registry is None: datatypes_registry = Registry()
     self.datatype = datatypes_registry.get_datatype_by_extension( ext )
     self._metadata = None
     self.metadata = MetadataCollection( self )
Пример #2
0
 def __init__(self, config):
     self.object_store = build_object_store_from_config(config)
     # Setup the database engine and ORM
     self.model = galaxy.config.init_models_from_config(config, object_store=self.object_store)
     registry = Registry()
     registry.load_datatypes()
     galaxy.model.set_datatypes_registry(registry)
Пример #3
0
def __main__():

    if len(sys.argv) < 4:
        print('usage: upload.py <root> <datatypes_conf> <json paramfile> <output spec> ...', file=sys.stderr)
        sys.exit(1)

    output_paths = parse_outputs(sys.argv[4:])

    registry = Registry()
    registry.load_datatypes(root_dir=sys.argv[1], config=sys.argv[2])

    try:
        datasets = __read_paramfile(sys.argv[3])
    except (ValueError, AssertionError):
        datasets = __read_old_paramfile(sys.argv[3])

    metadata = []
    for dataset in datasets:
        dataset = bunch.Bunch(**safe_dict(dataset))
        try:
            output_path = output_paths[int(dataset.dataset_id)][0]
        except Exception:
            print('Output path for dataset %s not found on command line' % dataset.dataset_id, file=sys.stderr)
            sys.exit(1)
        try:
            if dataset.type == 'composite':
                files_path = output_paths[int(dataset.dataset_id)][1]
                metadata.append(add_composite_file(dataset, registry, output_path, files_path))
            else:
                metadata.append(add_file(dataset, registry, output_path))
        except UploadProblemException as e:
            metadata.append(file_err(unicodify(e), dataset))
    __write_job_metadata(metadata)
Пример #4
0
def __main__():

    if len(sys.argv) < 4:
        print >> sys.stderr, 'usage: upload.py <root> <datatypes_conf> <json paramfile> <output spec> ...'
        sys.exit(1)

    output_paths = parse_outputs(sys.argv[4:])
    json_file = open('galaxy.json', 'w')

    registry = Registry()
    registry.load_datatypes(root_dir=sys.argv[1], config=sys.argv[2])

    for line in open(sys.argv[3], 'r'):
        dataset = from_json_string(line)
        dataset = util.bunch.Bunch(**safe_dict(dataset))
        try:
            output_path = output_paths[int(dataset.dataset_id)][0]
        except:
            print >> sys.stderr, 'Output path for dataset %s not found on command line' % dataset.dataset_id
            sys.exit(1)
        if dataset.type == 'composite':
            files_path = output_paths[int(dataset.dataset_id)][1]
            add_composite_file(dataset, registry, json_file, output_path,
                               files_path)
        else:
            add_file(dataset, registry, json_file, output_path)

    # clean up paramfile
    # TODO: this will not work when running as the actual user unless the
    # parent directory is writable by the user.
    try:
        os.remove(sys.argv[3])
    except:
        pass
Пример #5
0
def __main__():

    if len(sys.argv) < 4:
        print('usage: upload.py <root> <datatypes_conf> <json paramfile> <output spec> ...', file=sys.stderr)
        sys.exit(1)

    output_paths = parse_outputs(sys.argv[4:])

    registry = Registry()
    registry.load_datatypes(root_dir=sys.argv[1], config=sys.argv[2])

    try:
        datasets = __read_paramfile(sys.argv[3])
    except (ValueError, AssertionError):
        datasets = __read_old_paramfile(sys.argv[3])

    metadata = []
    for dataset in datasets:
        dataset = bunch.Bunch(**safe_dict(dataset))
        try:
            output_path = output_paths[int(dataset.dataset_id)][0]
        except Exception:
            print('Output path for dataset %s not found on command line' % dataset.dataset_id, file=sys.stderr)
            sys.exit(1)
        try:
            if dataset.type == 'composite':
                files_path = output_paths[int(dataset.dataset_id)][1]
                metadata.append(add_composite_file(dataset, output_path, files_path))
            else:
                metadata.append(add_file(dataset, registry, output_path))
        except UploadProblemException as e:
            metadata.append(file_err(e.message, dataset))
    __write_job_metadata(metadata)
Пример #6
0
def __main__():

    if len(sys.argv) < 4:
        print('usage: upload.py <root> <datatypes_conf> <json paramfile> <output spec> ...', file=sys.stderr)
        sys.exit(1)

    output_paths = parse_outputs(sys.argv[4:])
    json_file = open('galaxy.json', 'w')

    registry = Registry()
    registry.load_datatypes(root_dir=sys.argv[1], config=sys.argv[2])

    for line in open(sys.argv[3], 'r'):
        dataset = loads(line)
        dataset = util.bunch.Bunch(**safe_dict(dataset))
        try:
            output_path = output_paths[int(dataset.dataset_id)][0]
        except:
            print('Output path for dataset %s not found on command line' % dataset.dataset_id, file=sys.stderr)
            sys.exit(1)
        if dataset.type == 'composite':
            files_path = output_paths[int(dataset.dataset_id)][1]
            add_composite_file(dataset, json_file, output_path, files_path)
        else:
            add_file(dataset, registry, json_file, output_path)

    # clean up paramfile
    # TODO: this will not work when running as the actual user unless the
    # parent directory is writable by the user.
    try:
        os.remove(sys.argv[3])
    except:
        pass
Пример #7
0
def __main__():
    filename = sys.argv[1]
    try:
        max_file_size = int( sys.argv[2] )
    except:
        max_file_size = 0
    
    job_params, params = load_input_parameters( filename )
    if job_params is None: #using an older tabular file
        enhanced_handling = False
        job_params = dict( param_dict = params )
        job_params[ 'output_data' ] =  [ dict( out_data_name = 'output',
                                               ext = 'data',
                                               file_name = filename,
                                               files_path = None ) ]
        job_params[ 'job_config' ] = dict( GALAXY_ROOT_DIR=GALAXY_ROOT_DIR, GALAXY_DATATYPES_CONF_FILE=GALAXY_DATATYPES_CONF_FILE, TOOL_PROVIDED_JOB_METADATA_FILE = TOOL_PROVIDED_JOB_METADATA_FILE )
    else:
        enhanced_handling = True
        json_file = open( job_params[ 'job_config' ][ 'TOOL_PROVIDED_JOB_METADATA_FILE' ], 'w' ) #specially named file for output junk to pass onto set metadata

    datatypes_registry = Registry()
    datatypes_registry.load_datatypes( root_dir = job_params[ 'job_config' ][ 'GALAXY_ROOT_DIR' ], config = job_params[ 'job_config' ][ 'GALAXY_DATATYPES_CONF_FILE' ] )

    URL = params.get( 'URL', None ) #using exactly URL indicates that only one dataset is being downloaded
    URL_method = params.get( 'URL_method', None )
    simpleD = params.get('galaxyData')
    # The Python support for fetching resources from the web is layered. urllib uses the httplib
    # library, which in turn uses the socket library.  As of Python 2.3 you can specify how long
    # a socket should wait for a response before timing out. By default the socket module has no
    # timeout and can hang. Currently, the socket timeout is not exposed at the httplib or urllib2
    # levels. However, you can set the default timeout ( in seconds ) globally for all sockets by
    # doing the following.
    socket.setdefaulttimeout( 600 )
    cur_filename = params.get('output')
    outputfile = open( cur_filename, 'w' ).write( simpleD )
Пример #8
0
 def __init__(self, datatypes_registry=None, ext='data', dbkey='?'):
     self.ext = self.extension = ext
     self.dbkey = dbkey
     if datatypes_registry is None: datatypes_registry = Registry()
     self.datatype = datatypes_registry.get_datatype_by_extension(ext)
     self._metadata = None
     self.metadata = MetadataCollection(self)
Пример #9
0
def __main__():
    filename = sys.argv[1]
    try:
        max_file_size = int( sys.argv[2] )
    except:
        max_file_size = 0

    job_params, params = load_input_parameters( filename )

    if job_params is None: #using an older tabular file
        enhanced_handling = False
        job_params = dict( param_dict = params )
        job_params[ 'output_data' ] =  [ dict( out_data_name = 'output',
                                               ext = 'data',
                                               file_name = filename,
                                               extra_files_path = None ) ]
        job_params[ 'job_config' ] = dict( GALAXY_ROOT_DIR=GALAXY_ROOT_DIR, GALAXY_DATATYPES_CONF_FILE=GALAXY_DATATYPES_CONF_FILE, TOOL_PROVIDED_JOB_METADATA_FILE = TOOL_PROVIDED_JOB_METADATA_FILE )
    else:
        enhanced_handling = True
        json_file = open( job_params[ 'job_config' ][ 'TOOL_PROVIDED_JOB_METADATA_FILE' ], 'w' ) #specially named file for output junk to pass onto set metadata

    datatypes_registry = Registry()
    datatypes_registry.load_datatypes( root_dir = job_params[ 'job_config' ][ 'GALAXY_ROOT_DIR' ], config = job_params[ 'job_config' ][ 'GALAXY_DATATYPES_CONF_FILE' ] )

    URL = params.get( 'URL', None ) #using exactly URL indicates that only one dataset is being downloaded
    URL_method = params.get( 'URL_method', None )

    # The Python support for fetching resources from the web is layered. urllib uses the httplib
    # library, which in turn uses the socket library.  As of Python 2.3 you can specify how long
    # a socket should wait for a response before timing out. By default the socket module has no
    # timeout and can hang. Currently, the socket timeout is not exposed at the httplib or urllib2
    # levels. However, you can set the default timeout ( in seconds ) globally for all sockets by
    # doing the following.
    socket.setdefaulttimeout( 600 )

    for data_dict in job_params[ 'output_data' ]:
        cur_filename =  data_dict.get( 'file_name', filename )
        cur_URL =  params.get( '%s|%s|URL' % ( GALAXY_PARAM_PREFIX, data_dict[ 'out_data_name' ] ), URL )
        if not cur_URL:
            open( cur_filename, 'w' ).write( "" )
            stop_err( 'The remote data source application has not sent back a URL parameter in the request.' )

        # The following calls to urllib.urlopen() will use the above default timeout
        try:
            if not URL_method or URL_method == 'get':
                page = urllib.urlopen( cur_URL )
            elif URL_method == 'post':
                page = urllib.urlopen( cur_URL, urllib.urlencode( params ) )
        except Exception, e:
            stop_err( 'The remote data source application may be off line, please try again later. Error: %s' % str( e ) )
        if max_file_size:
            file_size = int( page.info().get( 'Content-Length', 0 ) )
            if file_size > max_file_size:
                stop_err( 'The size of the data (%d bytes) you have requested exceeds the maximum allowed (%d bytes) on this server.' % ( file_size, max_file_size ) )
        #do sniff stream for multi_byte
        try:
            cur_filename, is_multi_byte = sniff.stream_to_open_named_file( page, os.open( cur_filename, os.O_WRONLY | os.O_CREAT ), cur_filename, source_encoding=get_charset_from_http_headers( page.headers ) )
        except Exception, e:
            stop_err( 'Unable to fetch %s:\n%s' % ( cur_URL, e ) )
Пример #10
0
 def __init__(self, config):
     self.object_store = build_object_store_from_config(config)
     # Setup the database engine and ORM
     self.model = galaxy.config.init_models_from_config(
         config, object_store=self.object_store)
     registry = Registry()
     registry.load_datatypes()
     galaxy.model.set_datatypes_registry(registry)
def __main__():
    filename = sys.argv[1]
    try:
        max_file_size = int( sys.argv[2] )
    except:
        max_file_size = 0

    job_params, params = load_input_parameters( filename )
    if job_params is None: #using an older tabular file
        enhanced_handling = False
        job_params = dict( param_dict = params )
        job_params[ 'output_data' ] =  [ dict( out_data_name = 'output',
                                               ext = 'data',
                                               file_name = filename,
                                               extra_files_path = None ) ]
        job_params[ 'job_config' ] = dict( GALAXY_ROOT_DIR=GALAXY_ROOT_DIR, GALAXY_DATATYPES_CONF_FILE=GALAXY_DATATYPES_CONF_FILE, TOOL_PROVIDED_JOB_METADATA_FILE = TOOL_PROVIDED_JOB_METADATA_FILE )
    else:
        enhanced_handling = True
        json_file = open( job_params[ 'job_config' ][ 'TOOL_PROVIDED_JOB_METADATA_FILE' ], 'w' ) #specially named file for output junk to pass onto set metadata

    datatypes_registry = Registry()
    datatypes_registry.load_datatypes( root_dir = job_params[ 'job_config' ][ 'GALAXY_ROOT_DIR' ], config = job_params[ 'job_config' ][ 'GALAXY_DATATYPES_CONF_FILE' ] )

    URL = params.get( 'URL', None ) #using exactly URL indicates that only one dataset is being downloaded
    URL_method = params.get( 'URL_method', None )

    # The Python support for fetching resources from the web is layered. urllib uses the httplib
    # library, which in turn uses the socket library.  As of Python 2.3 you can specify how long
    # a socket should wait for a response before timing out. By default the socket module has no
    # timeout and can hang. Currently, the socket timeout is not exposed at the httplib or urllib2
    # levels. However, you can set the default timeout ( in seconds ) globally for all sockets by
    # doing the following.
    socket.setdefaulttimeout( 600 )

    for data_dict in job_params[ 'output_data' ]:
        cur_filename =  data_dict.get( 'file_name', filename )
        cur_URL =  params.get( '%s|%s|URL' % ( GALAXY_PARAM_PREFIX, data_dict[ 'out_data_name' ] ), URL )
        if not cur_URL:
            open( cur_filename, 'w' ).write( "" )
            stop_err( 'The remote data source application has not sent back a URL parameter in the request.' )

        # The following calls to urllib.urlopen() will use the above default timeout
        try:
            if not URL_method or URL_method == 'get':
                page = urllib.urlopen( cur_URL )
            elif URL_method == 'post':
                page = urllib.urlopen( cur_URL, urllib.urlencode( params ) )
        except Exception, e:
            stop_err( 'The remote data source application may be off line, please try again later. Error: %s' % str( e ) )
        if max_file_size:
            file_size = int( page.info().get( 'Content-Length', 0 ) )
            if file_size > max_file_size:
                stop_err( 'The size of the data (%d bytes) you have requested exceeds the maximum allowed (%d bytes) on this server.' % ( file_size, max_file_size ) )
        #do sniff stream for multi_byte
        try:
            cur_filename, is_multi_byte = sniff.stream_to_open_named_file( page, os.open( cur_filename, os.O_WRONLY | os.O_CREAT ), cur_filename, source_encoding=get_charset_from_http_headers( page.headers ) )
        except Exception, e:
            stop_err( 'Unable to fetch %s:\n%s' % ( cur_URL, e ) )
Пример #12
0
def collect_test_data():
    registry = Registry()
    registry.load_datatypes(root_dir=GALAXY_ROOT, config=DATATYPES_CONFIG)
    test_files = os.listdir(TEST_FILE_DIR)
    files = [os.path.join(TEST_FILE_DIR, f) for f in test_files]
    datatypes = [find_datatype(registry, f) for f in test_files]
    uploadable = [datatype.file_ext in registry.upload_file_formats for datatype in datatypes]
    test_data_description = [TEST_DATA(*items) for items in zip(files, datatypes, uploadable)]
    return {os.path.basename(data.path): data for data in test_data_description}
Пример #13
0
 def __init__(self, config):
     if config.database_connection is False:
         config.database_connection = "sqlite:///%s?isolation_level=IMMEDIATE" % config.database
     self.object_store = build_object_store_from_config(config)
     # Setup the database engine and ORM
     self.model = galaxy.model.mapping.init(config.file_path, config.database_connection, engine_options={}, create_tables=False, object_store=self.object_store)
     registry = Registry()
     registry.load_datatypes()
     galaxy.model.set_datatypes_registry(registry)
Пример #14
0
 def __init__( self, datatypes_registry=None, ext='data', dbkey='?' ):
     self.ext = self.extension = ext
     self.dbkey = dbkey
     if datatypes_registry is None:
         # Default Value Required for unit tests
         datatypes_registry = Registry()
         datatypes_registry.load_datatypes()
     self.datatype = datatypes_registry.get_datatype_by_extension( ext )
     self._metadata = None
     self.metadata = MetadataCollection( self )
Пример #15
0
 def __init__(self,
              app_name,
              security,
              model,
              tool_data_path,
              shed_tool_data_path,
              tool_data_tables=None,
              registry=None,
              hgweb_config_manager=None):
     self.name = app_name
     self.security = security
     self.model = model
     self.config = Bunch()
     self.config.tool_data_path = tool_data_path
     self.config.shed_tool_data_path = shed_tool_data_path
     self.temporary_path = tempfile.mkdtemp(prefix='tool_validation_')
     self.config.tool_data_table_config = os.path.join(
         self.temporary_path, 'tool_data_table_conf.xml')
     self.config.shed_tool_data_table_config = os.path.join(
         self.temporary_path, 'shed_tool_data_table_conf.xml')
     self.tool_data_tables = tool_data_tables
     self.datatypes_registry = registry or Registry()
     self.hgweb_config_manager = hgweb_config_manager
     self.config.len_file_path = os.path.join(self.temporary_path,
                                              'chromlen.txt')
     # If the builds file path is set to None, tools/__init__.py will load the default.
     # Otherwise it will attempt to load a nonexistent file and log an error. This does
     # not appear to be an issue with the len_file_path config option.
     self.config.builds_file_path = None
     self.genome_builds = GenomeBuilds(self)
Пример #16
0
def __main__():

    if len( sys.argv ) < 4:
        print >>sys.stderr, 'usage: upload.py <root> <datatypes_conf> <json paramfile> <output spec> ...'
        sys.exit( 1 )

    output_paths = parse_outputs( sys.argv[4:] )
    json_file = open( 'galaxy.json', 'w' )

    registry = Registry( sys.argv[1], sys.argv[2] )

    for line in open( sys.argv[3], 'r' ):
        dataset = from_json_string( line )
        dataset = util.bunch.Bunch( **safe_dict( dataset ) )
        try:
            output_path = output_paths[int( dataset.dataset_id )][0]
        except:
            print >>sys.stderr, 'Output path for dataset %s not found on command line' % dataset.dataset_id
            sys.exit( 1 )
        if dataset.type == 'composite':
            files_path = output_paths[int( dataset.dataset_id )][1]
            add_composite_file( dataset, registry, json_file, output_path, files_path )
        else:
            add_file( dataset, registry, json_file, output_path )
    # clean up paramfile
    try:
        os.remove( sys.argv[1] )
    except:
        pass
Пример #17
0
def sniff_and_handle_data_type(json_params, output_file):
    """
    The sniff.handle_uploaded_dataset_file() method in Galaxy performs dual
    functions: it sniffs the filetype and if it's a compressed archive for
    a non compressed datatype such as fasta, it will be unpacked.
    """
    try:
        datatypes_registry = Registry()
        datatypes_registry.load_datatypes(
            root_dir=json_params['job_config']['GALAXY_ROOT_DIR'],
            config=json_params['job_config']['GALAXY_DATATYPES_CONF_FILE'])
        file_type = sniff.handle_uploaded_dataset_file(output_file,
                                                       datatypes_registry)
        return file_type
    except Exception:
        return None
Пример #18
0
 def get_converters_for_collection(self, trans, id, datatypes_registry: Registry, instance_type="history"):
     dataset_collection_instance = self.get_dataset_collection_instance(
         trans,
         id=id,
         instance_type=instance_type,
         check_ownership=True
     )
     dbkeys_and_extensions = dataset_collection_instance.dataset_dbkeys_and_extensions_summary
     suitable_converters = set()
     first_extension = True
     most_recent_datatype = None
     # TODO error checking
     for datatype in dbkeys_and_extensions[1]:
         new_converters = datatypes_registry.get_converters_by_datatype(datatype)
         set_of_new_converters = set()
         for tgt_type, tgt_val in new_converters.items():
             converter = (tgt_type, tgt_val)
             set_of_new_converters.add(converter)
         if (first_extension is True):
             suitable_converters = set_of_new_converters
             most_recent_datatype = datatype
             first_extension = False
         else:
             suitable_converters = suitable_converters.intersection(set_of_new_converters)
             if suitable_converters:
                 most_recent_datatype = datatype
     suitable_tool_ids = list()
     for tool in suitable_converters:
         tool_info = {"tool_id": tool[1].id, "name": tool[1].name, "target_type": tool[0], "original_type": most_recent_datatype}
         suitable_tool_ids.append(tool_info)
     return suitable_tool_ids
Пример #19
0
def __main__():
    filename = sys.argv[1]
    try:
        int(sys.argv[2])
    except Exception:
        pass

    job_params, params = load_input_parameters(filename)
    if job_params is None:  # using an older tabular file
        job_params = dict(param_dict=params)
        job_params["output_data"] = [
            dict(out_data_name="output",
                 ext="data",
                 file_name=filename,
                 files_path=None)
        ]
        job_params["job_config"] = dict(
            GALAXY_ROOT_DIR=GALAXY_ROOT_DIR,
            GALAXY_DATATYPES_CONF_FILE=GALAXY_DATATYPES_CONF_FILE,
            TOOL_PROVIDED_JOB_METADATA_FILE=TOOL_PROVIDED_JOB_METADATA_FILE,
        )
    # else:
    #     json_file = open(
    #         job_params["job_config"]["TOOL_PROVIDED_JOB_METADATA_FILE"], "w"
    #     )  # specially named file for output junk to pass onto set metadata

    datatypes_registry = Registry()
    datatypes_registry.load_datatypes(
        root_dir=job_params["job_config"]["GALAXY_ROOT_DIR"],
        config=job_params["job_config"]["GALAXY_DATATYPES_CONF_FILE"],
    )

    # URL = params.get(
    #     "URL", None
    # )  # using exactly URL indicates that only one dataset is being downloaded
    params.get("URL_method", None)
    simpleD = params.get("galaxyData")
    # The Python support for fetching resources from the web is layered. urllib uses the httplib
    # library, which in turn uses the socket library.  As of Python 2.3 you can specify how long
    # a socket should wait for a response before timing out. By default the socket module has no
    # timeout and can hang. Currently, the socket timeout is not exposed at the httplib or urllib2
    # levels. However, you can set the default timeout ( in seconds ) globally for all sockets by
    # doing the following.
    socket.setdefaulttimeout(600)
    cur_filename = params.get("output")
    open(cur_filename, "w").write(simpleD)
Пример #20
0
def main(argv=None):
    if argv is None:
        argv = sys.argv[1:]
    args = _arg_parser().parse_args(argv)

    registry = Registry()
    registry.load_datatypes(root_dir=args.galaxy_root, config=args.datatypes_registry)

    request_path = args.request
    assert os.path.exists(request_path)
    with open(request_path) as f:
        request = json.load(f)

    upload_config = UploadConfig(request, registry)
    galaxy_json = _request_to_galaxy_json(upload_config, request)
    with open("galaxy.json", "w") as f:
        json.dump(galaxy_json, f)
Пример #21
0
def __main__():
    filename = sys.argv[1]
    try:
        max_file_size = int(sys.argv[2])
    except:
        max_file_size = 0

    job_params, params = load_input_parameters(filename)
    if job_params is None:  #using an older tabular file
        enhanced_handling = False
        job_params = dict(param_dict=params)
        job_params['output_data'] = [
            dict(out_data_name='output',
                 ext='data',
                 file_name=filename,
                 extra_files_path=None)
        ]
        job_params['job_config'] = dict(
            GALAXY_ROOT_DIR=GALAXY_ROOT_DIR,
            GALAXY_DATATYPES_CONF_FILE=GALAXY_DATATYPES_CONF_FILE,
            TOOL_PROVIDED_JOB_METADATA_FILE=TOOL_PROVIDED_JOB_METADATA_FILE)
    else:
        enhanced_handling = True
        json_file = open(
            job_params['job_config']['TOOL_PROVIDED_JOB_METADATA_FILE'], 'w'
        )  #specially named file for output junk to pass onto set metadata

    datatypes_registry = Registry()
    datatypes_registry.load_datatypes(
        root_dir=job_params['job_config']['GALAXY_ROOT_DIR'],
        config=job_params['job_config']['GALAXY_DATATYPES_CONF_FILE'])

    URL = params.get(
        'URL', None
    )  #using exactly URL indicates that only one dataset is being downloaded
    URL_method = params.get('URL_method', None)
    simpleD = params.get('galaxyData')
    # The Python support for fetching resources from the web is layered. urllib uses the httplib
    # library, which in turn uses the socket library.  As of Python 2.3 you can specify how long
    # a socket should wait for a response before timing out. By default the socket module has no
    # timeout and can hang. Currently, the socket timeout is not exposed at the httplib or urllib2
    # levels. However, you can set the default timeout ( in seconds ) globally for all sockets by
    # doing the following.
    socket.setdefaulttimeout(600)
    cur_filename = params.get('output')
    outputfile = open(cur_filename, 'w').write(simpleD)
Пример #22
0
def sniff_and_handle_data_type(json_params, output_file):
    """
    The sniff.handle_uploaded_dataset_file() method in Galaxy performs dual
    functions: it sniffs the filetype and if it's a compressed archive for
    a non compressed datatype such as fasta, it will be unpacked.
    """
    try:
        datatypes_registry = Registry()
        datatypes_registry.load_datatypes(
            root_dir=json_params['job_config']['GALAXY_ROOT_DIR'],
            config=json_params['job_config']['GALAXY_DATATYPES_CONF_FILE'])
        file_type = sniff.handle_uploaded_dataset_file(
            output_file,
            datatypes_registry)
        return file_type
    except:
        return None
Пример #23
0
def main(argv=None):
    if argv is None:
        argv = sys.argv[1:]
    args = _arg_parser().parse_args(argv)

    registry = Registry()
    registry.load_datatypes(root_dir=args.galaxy_root,
                            config=args.datatypes_registry)

    request_path = args.request
    assert os.path.exists(request_path)
    with open(request_path) as f:
        request = json.load(f)

    upload_config = UploadConfig(request, registry)
    galaxy_json = _request_to_galaxy_json(upload_config, request)
    with open("galaxy.json", "w") as f:
        json.dump(galaxy_json, f)
Пример #24
0
def main(argv=None):
    if argv is None:
        argv = sys.argv[1:]
    args = _arg_parser().parse_args(argv)

    registry = Registry()
    registry.load_datatypes(root_dir=args.galaxy_root, config=args.datatypes_registry)

    request_path = args.request
    assert os.path.exists(request_path)
    with open(request_path) as f:
        request = json.load(f)

    working_directory = args.working_directory or os.getcwd()
    allow_failed_collections = request.get("allow_failed_collections", False)
    upload_config = UploadConfig(request, registry, working_directory, allow_failed_collections)
    galaxy_json = _request_to_galaxy_json(upload_config, request)
    galaxy_json_path = os.path.join(working_directory, "galaxy.json")
    with open(galaxy_json_path, "w") as f:
        json.dump(galaxy_json, f)
Пример #25
0
 def _configure_datatypes_registry(self, installed_repository_manager=None):
     # Create an empty datatypes registry.
     self.datatypes_registry = Registry(self.config)
     if installed_repository_manager and self.config.load_tool_shed_datatypes:
         # Load proprietary datatypes defined in datatypes_conf.xml files in all installed tool shed repositories.  We
         # load proprietary datatypes before datatypes in the distribution because Galaxy's default sniffers include some
         # generic sniffers (eg text,xml) which catch anything, so it's impossible for proprietary sniffers to be used.
         # However, if there is a conflict (2 datatypes with the same extension) between a proprietary datatype and a datatype
         # in the Galaxy distribution, the datatype in the Galaxy distribution will take precedence.  If there is a conflict
         # between 2 proprietary datatypes, the datatype from the repository that was installed earliest will take precedence.
         installed_repository_manager.load_proprietary_datatypes()
     # Load the data types in the Galaxy distribution, which are defined in self.config.datatypes_config.
     datatypes_configs = self.config.datatypes_config
     for datatypes_config in listify(datatypes_configs):
         # Setting override=False would make earlier files would take
         # precedence - but then they wouldn't override tool shed
         # datatypes.
         self.datatypes_registry.load_datatypes(self.config.root,
                                                datatypes_config,
                                                override=True)
def download_from_genomespace_importer(username, token, json_parameter_file,
                                       genomespace_site, gs_toolname):
    json_params = json.loads(open(json_parameter_file, 'r').read())
    datasource_params = json_params.get('param_dict')
    assert None not in [username,
                        token], "Missing GenomeSpace username or token."
    output_filename = datasource_params.get("output_file1", None)
    dataset_id = base_dataset_id = json_params['output_data'][0]['dataset_id']
    hda_id = json_params['output_data'][0]['hda_id']
    url_opener = get_cookie_opener(username, token, gs_toolname=gs_toolname)
    #load and set genomespace format ids to galaxy exts
    genomespace_site_dict = get_genomespace_site_urls()[genomespace_site]
    set_genomespace_format_identifiers(url_opener,
                                       genomespace_site_dict['dmServer'])
    file_url_name = "URL"
    metadata_parameter_file = open(
        json_params['job_config']['TOOL_PROVIDED_JOB_METADATA_FILE'], 'wb')
    #setup datatypes registry for sniffing
    datatypes_registry = Registry()
    datatypes_registry.load_datatypes(
        root_dir=json_params['job_config']['GALAXY_ROOT_DIR'],
        config=json_params['job_config']['GALAXY_DATATYPES_CONF_FILE'])
    url_param = datasource_params.get(file_url_name, None)
    used_filenames = []
    for download_url in url_param.split(','):
        using_temp_file = False
        parsed_url = urlparse.urlparse(download_url)
        query_params = urlparse.parse_qs(parsed_url[4])
        #write file to disk
        new_file_request = urllib2.Request(download_url)
        new_file_request.get_method = lambda: 'GET'
        target_download_url = url_opener.open(new_file_request)
        filename = None
        if 'Content-Disposition' in target_download_url.info():
            content_disposition = dict(
                map(
                    lambda x: x.strip().split('=')
                    if '=' in x else (x.strip(), ''),
                    target_download_url.info()['Content-Disposition'].split(
                        ';')))
            if 'filename' in content_disposition:
                filename = content_disposition['filename'].strip("\"'")
        if not filename:
            parsed_url = urlparse.urlparse(download_url)
            query_params = urlparse.parse_qs(parsed_url[4])
            filename = urllib.unquote_plus(parsed_url[2].split('/')[-1])
        if not filename:
            filename = download_url
        if output_filename is None:
            #need to use a temp file here, because we do not know the ext yet
            using_temp_file = True
            output_filename = tempfile.NamedTemporaryFile(
                prefix='tmp-genomespace-importer-').name
        output_file = open(output_filename, 'wb')
        chunk_write(target_download_url, output_file)
        output_file.close()

        #determine file format
        file_type = None
        if 'dataformat' in query_params:  #this is a converted dataset
            file_type = query_params['dataformat'][0]
            file_type = get_galaxy_ext_from_genomespace_format_url(
                url_opener, file_type)
        else:
            try:
                #get and use GSMetadata object
                download_file_path = download_url.split(
                    "%s/file/" % (genomespace_site_dict['dmServer']), 1
                )[-1]  #FIXME: This is a very bad way to get the path for determining metadata. There needs to be a way to query API using download URLto get to the metadata object
                metadata_request = urllib2.Request(
                    "%s/%s/filemetadata/%s" %
                    (genomespace_site_dict['dmServer'],
                     GENOMESPACE_API_VERSION_STRING, download_file_path))
                metadata_request.get_method = lambda: 'GET'
                metadata_url = url_opener.open(metadata_request)
                file_metadata_dict = json.loads(metadata_url.read())
                metadata_url.close()
                file_type = file_metadata_dict.get('dataFormat', None)
                if file_type and file_type.get('url'):
                    file_type = file_type.get('url')
                    file_type = get_galaxy_ext_from_genomespace_format_url(
                        url_opener, file_type, default=None)
            except:
                pass
        if file_type is None:
            #try to sniff datatype
            try:
                file_type = sniff.handle_uploaded_dataset_file(
                    output_filename, datatypes_registry)
            except:
                pass  #sniff failed
        if file_type is None and '.' in parsed_url[2]:
            #still no known datatype, fall back to using extension
            file_type = parsed_url[2].rsplit('.', 1)[-1]
            file_type = GENOMESPACE_EXT_TO_GALAXY_EXT.get(file_type, file_type)
        if file_type is None:
            #use default extension (e.g. 'data')
            file_type = DEFAULT_GALAXY_EXT

        #save json info for single primary dataset
        if dataset_id is not None:
            metadata_parameter_file.write("%s\n" % json.dumps(
                dict(type='dataset',
                     dataset_id=dataset_id,
                     ext=file_type,
                     name="GenomeSpace importer on %s" % (filename))))
        #if using tmp file, move the file to the new file path dir to get scooped up later
        if using_temp_file:
            original_filename = filename
            filename = ''.join(c in VALID_CHARS and c or '-' for c in filename)
            while filename in used_filenames:
                filename = "-%s" % filename
            used_filenames.append(filename)
            target_output_filename = os.path.join(
                os.getcwd(),
                'primary_%i_%s_visible_%s' % (hda_id, filename, file_type))
            shutil.move(output_filename, target_output_filename)
            metadata_parameter_file.write("%s\n" % json.dumps(
                dict(type='new_primary_dataset',
                     base_dataset_id=base_dataset_id,
                     ext=file_type,
                     filename=target_output_filename,
                     name="GenomeSpace importer on %s" % (original_filename))))
        dataset_id = None  #only one primary dataset available
        output_filename = None  #only have one filename available
    metadata_parameter_file.close()
    return True
Пример #27
0
from sqlalchemy.orm import *  # noqa
from sqlalchemy.exc import *  # noqa
from sqlalchemy.sql import label  # noqa

sys.path.insert(1, os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir, 'lib')))

from galaxy.datatypes.registry import Registry
from galaxy.model import *  # noqa
from galaxy.model import set_datatypes_registry  # More explicit than `*` import
from galaxy.model.mapping import init
from galaxy.model.orm.scripts import get_config

if sys.version_info > (3,):
    long = int

registry = Registry()
registry.load_datatypes()
set_datatypes_registry(registry)
db_url = get_config(sys.argv)['db_url']
sa_session = init('/tmp/', db_url).context


# Helper function for debugging sqlalchemy queries...
# http://stackoverflow.com/questions/5631078/sqlalchemy-print-the-actual-query
def printquery(statement, bind=None):
    """
    Print a query, with values filled in
    for debugging purposes *only*
    for security, you should always separate queries from their values
    please also note that this function is quite slow
    """
Пример #28
0
    uploadable = [datatype.file_ext in registry.upload_file_formats for datatype in datatypes]
    test_data_description = [TestData(*items) for items in zip(files, datatypes, uploadable)]
    return {os.path.basename(data.path): data for data in test_data_description}


class UploadTestDatatypeDataTestCase(BaseUploadContentConfigurationInstance):
    framework_tool_and_types = False
    datatypes_conf_override = DATATYPES_CONFIG
    object_store_config = None
    object_store_config_path = None


instance = integration_util.integration_module_instance(UploadTestDatatypeDataTestCase)


registry = Registry()
registry.load_datatypes(root_dir=GALAXY_ROOT, config=DATATYPES_CONFIG)
TEST_CASES = collect_test_data(registry)


@pytest.mark.parametrize('test_data', TEST_CASES.values(), ids=list(TEST_CASES.keys()))
def test_upload_datatype_auto(instance, test_data, temp_file):
    upload_datatype_helper(instance, test_data, temp_file)


def upload_datatype_helper(instance, test_data, temp_file):
    is_compressed = False
    for is_method in (is_bz2, is_gzip, is_zip):
        is_compressed = is_method(test_data.path)
        if is_compressed:
            break
Пример #29
0
def download_from_genomespace_file_browser(json_parameter_file,
                                           genomespace_site):
    json_params = json.loads(open(json_parameter_file, 'r').read())
    datasource_params = json_params.get('param_dict')
    username = datasource_params.get("gs-username", None)
    token = datasource_params.get("gs-token", None)
    assert None not in [username,
                        token], "Missing GenomeSpace username or token."
    output_filename = datasource_params.get("output", None)
    dataset_id = json_params['output_data'][0]['dataset_id']
    hda_id = json_params['output_data'][0]['hda_id']
    url_opener = get_cookie_opener(username, token)
    #load and set genomespace format ids to galaxy exts
    genomespace_site_dict = get_genomespace_site_urls()[genomespace_site]
    set_genomespace_format_identifiers(url_opener,
                                       genomespace_site_dict['dmServer'])

    file_url_prefix = "fileUrl"
    file_type_prefix = "fileFormat"
    metadata_parameter_file = open(
        json_params['job_config']['TOOL_PROVIDED_JOB_METADATA_FILE'], 'wb')

    #setup datatypes registry for sniffing
    datatypes_registry = Registry()
    datatypes_registry.load_datatypes(
        root_dir=json_params['job_config']['GALAXY_ROOT_DIR'],
        config=json_params['job_config']['GALAXY_DATATYPES_CONF_FILE'])

    file_numbers = []
    for name in datasource_params.keys():
        if name.startswith(file_url_prefix):
            name = name[len(file_url_prefix):]
            file_numbers.append(int(name))
    if not file_numbers:
        if output_filename:
            open(output_filename, 'wb')  #erase contents of file
        raise Exception(
            "You must select at least one file to import into Galaxy.")
    file_numbers.sort()
    used_filenames = []
    for file_num in file_numbers:
        url_key = "%s%i" % (file_url_prefix, file_num)
        download_url = datasource_params.get(url_key, None)
        if download_url is None:
            break
        filetype_key = "%s%i" % (file_type_prefix, file_num)
        filetype_url = datasource_params.get(filetype_key, None)
        galaxy_ext = get_galaxy_ext_from_genomespace_format_url(
            url_opener, filetype_url)
        formated_download_url = "%s?%s" % (
            download_url, urllib.urlencode([('dataformat', filetype_url)]))
        new_file_request = urllib2.Request(formated_download_url)
        new_file_request.get_method = lambda: 'GET'
        target_download_url = url_opener.open(new_file_request)
        filename = None
        if 'Content-Disposition' in target_download_url.info():
            # If the response has Content-Disposition, try to get filename from it
            content_disposition = dict(
                map(
                    lambda x: x.strip().split('=')
                    if '=' in x else (x.strip(), ''),
                    target_download_url.info()['Content-Disposition'].split(
                        ';')))
            if 'filename' in content_disposition:
                filename = content_disposition['filename'].strip("\"'")
        if not filename:
            parsed_url = urlparse.urlparse(download_url)
            query_params = urlparse.parse_qs(parsed_url[4])
            filename = urllib.unquote_plus(parsed_url[2].split('/')[-1])
        if not filename:
            filename = download_url
        metadata_dict = None
        original_filename = filename
        if output_filename is None:
            filename = ''.join(c in VALID_CHARS and c or '-' for c in filename)
            while filename in used_filenames:
                filename = "-%s" % filename
            used_filenames.append(filename)
            output_filename = os.path.join(
                os.getcwd(),
                'primary_%i_%s_visible_%s' % (hda_id, filename, galaxy_ext))

            metadata_dict = dict(type='new_primary_dataset',
                                 base_dataset_id=dataset_id,
                                 ext=galaxy_ext,
                                 filename=output_filename,
                                 name="GenomeSpace import on %s" %
                                 (original_filename))
        else:
            if dataset_id is not None:
                metadata_dict = dict(type='dataset',
                                     dataset_id=dataset_id,
                                     ext=galaxy_ext,
                                     name="GenomeSpace import on %s" %
                                     (filename))
        output_file = open(output_filename, 'wb')
        chunk_write(target_download_url, output_file)
        output_file.close()

        if (galaxy_ext == AUTO_GALAXY_EXT or filetype_url
                == GENOMESPACE_FORMAT_IDENTIFIER_UNKNOWN) and metadata_dict:
            #try to sniff datatype
            try:
                galaxy_ext = sniff.handle_uploaded_dataset_file(
                    output_filename, datatypes_registry)
            except:
                #sniff failed
                galaxy_ext = original_filename.rsplit('.', 1)[-1]
                if galaxy_ext not in datatypes_registry.datatypes_by_extension:
                    galaxy_ext = DEFAULT_GALAXY_EXT
            metadata_dict['ext'] = galaxy_ext

        output_filename = None  #only have one filename available

        #write out metadata info
        if metadata_dict:
            metadata_parameter_file.write("%s\n" % json.dumps(metadata_dict))

    metadata_parameter_file.close()
    return True
def download_from_genomespace_importer( username, token, json_parameter_file, genomespace_site ):
    json_params = simplejson.loads( open( json_parameter_file, 'r' ).read() )
    datasource_params = json_params.get( 'param_dict' )
    assert None not in [ username, token ], "Missing GenomeSpace username or token."
    output_filename = datasource_params.get( "output_file1", None )
    dataset_id = json_params['output_data'][0]['dataset_id']
    hda_id = json_params['output_data'][0]['hda_id']
    url_opener = get_cookie_opener( username, token )
    #load and set genomespace format ids to galaxy exts
    genomespace_site_dict = get_genomespace_site_urls()[ genomespace_site ]
    set_genomespace_format_identifiers( url_opener, genomespace_site_dict['dmServer'] )
    file_url_name = "URL"
    metadata_parameter_file = open( json_params['job_config']['TOOL_PROVIDED_JOB_METADATA_FILE'], 'wb' )
    #setup datatypes registry for sniffing
    datatypes_registry = Registry()
    datatypes_registry.load_datatypes( root_dir = json_params[ 'job_config' ][ 'GALAXY_ROOT_DIR' ], config = json_params[ 'job_config' ][ 'GALAXY_DATATYPES_CONF_FILE' ] )
    url_param = datasource_params.get( file_url_name, None )
    for download_url in url_param.split( ',' ):
        using_temp_file = False
        parsed_url = urlparse.urlparse( download_url )
        query_params = urlparse.parse_qs( parsed_url[4] )
        #write file to disk
        new_file_request = urllib2.Request( download_url )
        new_file_request.get_method = lambda: 'GET'
        target_download_url = url_opener.open( new_file_request )
        filename = None
        if 'Content-Disposition' in target_download_url.info():
            content_disposition = dict( map( lambda x: x.strip().split('=') if '=' in x else ( x.strip(),'' ), target_download_url.info()['Content-Disposition'].split( ';' ) ) )
            if 'filename' in content_disposition:
                filename = content_disposition[ 'filename' ].strip( "\"'" )
        if not filename:
            parsed_url = urlparse.urlparse( download_url )
            query_params = urlparse.parse_qs( parsed_url[4] )
            filename = urllib.unquote_plus( parsed_url[2].split( '/' )[-1] )
        if output_filename is None:
            #need to use a temp file here, because we do not know the ext yet
            using_temp_file = True
            output_filename = tempfile.NamedTemporaryFile( prefix='tmp-genomespace-importer-' ).name
        output_file = open( output_filename, 'wb' )
        chunk_write( target_download_url, output_file )
        output_file.close()
        
        #determine file format
        file_type = None
        if 'dataformat' in query_params: #this is a converted dataset
            file_type = query_params[ 'dataformat' ][0]
            file_type = get_galaxy_ext_from_genomespace_format_url( url_opener, file_type )
        else:
            try:
                #get and use GSMetadata object
                download_file_path = download_url.split( "%s/file/" % ( genomespace_site_dict['dmServer'] ), 1)[-1] #FIXME: This is a very bad way to get the path for determining metadata. There needs to be a way to query API using download URLto get to the metadata object
                metadata_request = urllib2.Request( "%s/%s/filemetadata/%s" % ( genomespace_site_dict['dmServer'], GENOMESPACE_API_VERSION_STRING, download_file_path ) )
                metadata_request.get_method = lambda: 'GET'
                metadata_url = url_opener.open( metadata_request )
                file_metadata_dict = simplejson.loads( metadata_url.read() )
                metadata_url.close()
                file_type = file_metadata_dict.get( 'dataFormat', None )
                if file_type and file_type.get( 'url' ):
                    file_type = file_type.get( 'url' )
                    file_type = get_galaxy_ext_from_genomespace_format_url( url_opener, file_type, default = None )
            except:
                pass
        if file_type is None:
            #try to sniff datatype
            try:
                file_type = sniff.handle_uploaded_dataset_file( output_filename, datatypes_registry )
            except:
                pass #sniff failed
        if file_type is None and '.' in parsed_url[2]:
            #still no known datatype, fall back to using extension
            file_type = parsed_url[2].rsplit( '.', 1 )[-1]
            file_type = GENOMESPACE_EXT_TO_GALAXY_EXT.get( file_type, file_type )
        if file_type is None:
            #use default extension (e.g. 'data')
            file_type = DEFAULT_GALAXY_EXT
        
        #save json info for single primary dataset
        if dataset_id is not None:
           metadata_parameter_file.write( "%s\n" % simplejson.dumps( dict( type = 'dataset',
                                 dataset_id = dataset_id,
                                 ext = file_type,
                                 name = "GenomeSpace importer on %s" % ( filename ) ) ) )
        #if using tmp file, move the file to the new file path dir to get scooped up later
        if using_temp_file:
            shutil.move( output_filename, os.path.join( datasource_params['__new_file_path__'],  'primary_%i_output%s_visible_%s' % ( hda_id, ''.join( c in VALID_CHARS and c or '-' for c in filename ), file_type ) ) )
        
        dataset_id = None #only one primary dataset available
        output_filename = None #only have one filename available
    metadata_parameter_file.close()
    return True
Пример #31
0
def __main__():
    filename = sys.argv[1]
    try:
        max_file_size = int(sys.argv[2])
    except Exception:
        max_file_size = 0

    job_params, params = load_input_parameters(filename)
    if job_params is None:  # using an older tabular file
        enhanced_handling = False
        job_params = dict(param_dict=params)
        job_params['output_data'] = [
            dict(out_data_name='output',
                 ext='data',
                 file_name=filename,
                 extra_files_path=None)
        ]
        job_params['job_config'] = dict(
            GALAXY_ROOT_DIR=GALAXY_ROOT_DIR,
            GALAXY_DATATYPES_CONF_FILE=GALAXY_DATATYPES_CONF_FILE,
            TOOL_PROVIDED_JOB_METADATA_FILE=TOOL_PROVIDED_JOB_METADATA_FILE)
    else:
        enhanced_handling = True
        json_file = open(
            job_params['job_config']['TOOL_PROVIDED_JOB_METADATA_FILE'], 'w'
        )  # specially named file for output junk to pass onto set metadata

    datatypes_registry = Registry()
    datatypes_registry.load_datatypes(
        root_dir=job_params['job_config']['GALAXY_ROOT_DIR'],
        config=job_params['job_config']['GALAXY_DATATYPES_CONF_FILE'])

    URL = params.get(
        'URL', None
    )  # using exactly URL indicates that only one dataset is being downloaded
    URL_method = params.get('URL_method', None)

    for data_dict in job_params['output_data']:
        cur_filename = data_dict.get('file_name', filename)
        cur_URL = params.get(
            '%s|%s|URL' % (GALAXY_PARAM_PREFIX, data_dict['out_data_name']),
            URL)
        if not cur_URL or urlparse(cur_URL).scheme not in ('http', 'https',
                                                           'ftp'):
            open(cur_filename, 'w').write("")
            stop_err(
                'The remote data source application has not sent back a URL parameter in the request.'
            )

        # The following calls to urlopen() will use the above default timeout
        try:
            if not URL_method or URL_method == 'get':
                page = urlopen(cur_URL, timeout=DEFAULT_SOCKET_TIMEOUT)
            elif URL_method == 'post':
                page = urlopen(cur_URL,
                               urlencode(params).encode("utf-8"),
                               timeout=DEFAULT_SOCKET_TIMEOUT)
        except Exception as e:
            stop_err(
                'The remote data source application may be off line, please try again later. Error: %s'
                % str(e))
        if max_file_size:
            file_size = int(page.info().get('Content-Length', 0))
            if file_size > max_file_size:
                stop_err(
                    'The size of the data (%d bytes) you have requested exceeds the maximum allowed (%d bytes) on this server.'
                    % (file_size, max_file_size))
        try:
            cur_filename = sniff.stream_to_open_named_file(
                page,
                os.open(cur_filename, os.O_WRONLY | os.O_CREAT),
                cur_filename,
                source_encoding=get_charset_from_http_headers(page.headers))
        except Exception as e:
            stop_err('Unable to fetch %s:\n%s' % (cur_URL, e))

        # here import checks that upload tool performs
        if enhanced_handling:
            try:
                ext = sniff.handle_uploaded_dataset_file(filename,
                                                         datatypes_registry,
                                                         ext=data_dict['ext'])
            except Exception as e:
                stop_err(str(e))
            info = dict(type='dataset',
                        dataset_id=data_dict['dataset_id'],
                        ext=ext)

            json_file.write("%s\n" % dumps(info))
def load_datatypes_registry(job_params):
    datatypes_registry = Registry()
    datatypes_registry.load_datatypes(
        root_dir=job_params['job_config']['GALAXY_ROOT_DIR'],
        config=job_params['job_config']['GALAXY_DATATYPES_CONF_FILE'])
    return datatypes_registry
Пример #33
0
class ConfiguresGalaxyMixin:
    """Shared code for configuring Galaxy-like app objects."""

    config: config.GalaxyAppConfiguration
    tool_cache: ToolCache
    job_config: jobs.JobConfiguration
    toolbox: tools.ToolBox
    toolbox_search: ToolBoxSearch
    container_finder: containers.ContainerFinder

    def _configure_genome_builds(self,
                                 data_table_name="__dbkeys__",
                                 load_old_style=True):
        self.genome_builds = GenomeBuilds(self,
                                          data_table_name=data_table_name,
                                          load_old_style=load_old_style)

    def wait_for_toolbox_reload(self, old_toolbox):
        timer = ExecutionTimer()
        log.debug('Waiting for toolbox reload')
        # Wait till toolbox reload has been triggered (or more than 60 seconds have passed)
        while timer.elapsed < 60:
            if self.toolbox.has_reloaded(old_toolbox):
                log.debug('Finished waiting for toolbox reload %s', timer)
                break
            time.sleep(0.1)
        else:
            log.warning(
                'Waiting for toolbox reload timed out after 60 seconds')

    def _configure_tool_config_files(self):
        if self.config.shed_tool_config_file not in self.config.tool_configs:
            self.config.tool_configs.append(self.config.shed_tool_config_file)
        # The value of migrated_tools_config is the file reserved for containing only those tools that have been
        # eliminated from the distribution and moved to the tool shed. If migration checking is disabled, only add it if
        # it exists (since this may be an existing deployment where migrations were previously run).
        if (os.path.exists(self.config.migrated_tools_config)
                and self.config.migrated_tools_config
                not in self.config.tool_configs):
            self.config.tool_configs.append(self.config.migrated_tools_config)

    def _configure_toolbox(self):
        if not isinstance(self, BasicSharedApp):
            raise Exception("Must inherit from BasicSharedApp")

        self.citations_manager = CitationsManager(self)
        self.biotools_metadata_source = get_galaxy_biotools_metadata_source(
            self.config)

        self.dynamic_tools_manager = DynamicToolManager(self)
        self._toolbox_lock = threading.RLock()
        self.toolbox = tools.ToolBox(self.config.tool_configs,
                                     self.config.tool_path, self)
        galaxy_root_dir = os.path.abspath(self.config.root)
        file_path = os.path.abspath(self.config.file_path)
        app_info = AppInfo(
            galaxy_root_dir=galaxy_root_dir,
            default_file_path=file_path,
            tool_data_path=self.config.tool_data_path,
            shed_tool_data_path=self.config.shed_tool_data_path,
            outputs_to_working_directory=self.config.
            outputs_to_working_directory,
            container_image_cache_path=self.config.container_image_cache_path,
            library_import_dir=self.config.library_import_dir,
            enable_mulled_containers=self.config.enable_mulled_containers,
            container_resolvers_config_file=self.config.
            container_resolvers_config_file,
            container_resolvers_config_dict=self.config.container_resolvers,
            involucro_path=self.config.involucro_path,
            involucro_auto_init=self.config.involucro_auto_init,
            mulled_channels=self.config.mulled_channels,
        )
        mulled_resolution_cache = None
        if self.config.mulled_resolution_cache_type:
            cache_opts = {
                "cache.type": self.config.mulled_resolution_cache_type,
                "cache.data_dir": self.config.mulled_resolution_cache_data_dir,
                "cache.lock_dir": self.config.mulled_resolution_cache_lock_dir,
                "cache.expire": self.config.mulled_resolution_cache_expire,
            }
            mulled_resolution_cache = CacheManager(
                **parse_cache_config_options(cache_opts)).get_cache(
                    'mulled_resolution')
        self.container_finder = containers.ContainerFinder(
            app_info, mulled_resolution_cache=mulled_resolution_cache)
        self._set_enabled_container_types()
        index_help = getattr(self.config, "index_tool_help", True)
        self.toolbox_search = ToolBoxSearch(
            self.toolbox,
            index_dir=self.config.tool_search_index_dir,
            index_help=index_help)

    def reindex_tool_search(self):
        # Call this when tools are added or removed.
        self.toolbox_search.build_index(tool_cache=self.tool_cache,
                                        toolbox=self.toolbox)
        self.tool_cache.reset_status()

    def _set_enabled_container_types(self):
        container_types_to_destinations = collections.defaultdict(list)
        for destinations in self.job_config.destinations.values():
            for destination in destinations:
                for enabled_container_type in self.container_finder._enabled_container_types(
                        destination.params):
                    container_types_to_destinations[
                        enabled_container_type].append(destination)
        self.toolbox.dependency_manager.set_enabled_container_types(
            container_types_to_destinations)
        self.toolbox.dependency_manager.resolver_classes.update(
            self.container_finder.default_container_registry.resolver_classes)
        self.toolbox.dependency_manager.dependency_resolvers.extend(
            self.container_finder.default_container_registry.
            container_resolvers)

    def _configure_tool_data_tables(self, from_shed_config):
        # Initialize tool data tables using the config defined by self.config.tool_data_table_config_path.
        self.tool_data_tables = ToolDataTableManager(
            tool_data_path=self.config.tool_data_path,
            config_filename=self.config.tool_data_table_config_path,
            other_config_dict=self.config)
        # Load additional entries defined by self.config.shed_tool_data_table_config into tool data tables.
        try:
            self.tool_data_tables.load_from_config_file(
                config_filename=self.config.shed_tool_data_table_config,
                tool_data_path=self.tool_data_tables.tool_data_path,
                from_shed_config=from_shed_config)
        except OSError as exc:
            # Missing shed_tool_data_table_config is okay if it's the default
            if exc.errno != errno.ENOENT or self.config.is_set(
                    'shed_tool_data_table_config'):
                raise

    def _configure_datatypes_registry(self, installed_repository_manager=None):
        # Create an empty datatypes registry.
        self.datatypes_registry = Registry(self.config)
        if installed_repository_manager and self.config.load_tool_shed_datatypes:
            # Load proprietary datatypes defined in datatypes_conf.xml files in all installed tool shed repositories.  We
            # load proprietary datatypes before datatypes in the distribution because Galaxy's default sniffers include some
            # generic sniffers (eg text,xml) which catch anything, so it's impossible for proprietary sniffers to be used.
            # However, if there is a conflict (2 datatypes with the same extension) between a proprietary datatype and a datatype
            # in the Galaxy distribution, the datatype in the Galaxy distribution will take precedence.  If there is a conflict
            # between 2 proprietary datatypes, the datatype from the repository that was installed earliest will take precedence.
            installed_repository_manager.load_proprietary_datatypes()
        # Load the data types in the Galaxy distribution, which are defined in self.config.datatypes_config.
        datatypes_configs = self.config.datatypes_config
        for datatypes_config in listify(datatypes_configs):
            # Setting override=False would make earlier files would take
            # precedence - but then they wouldn't override tool shed
            # datatypes.
            self.datatypes_registry.load_datatypes(self.config.root,
                                                   datatypes_config,
                                                   override=True)

    def _configure_object_store(self, **kwds):
        self.object_store = build_object_store_from_config(self.config, **kwds)

    def _configure_security(self):
        self.security = IdEncodingHelper(id_secret=self.config.id_secret)
        BaseDatabaseIdField.security = self.security

    def _configure_tool_shed_registry(self):
        # Set up the tool sheds registry
        if os.path.isfile(self.config.tool_sheds_config_file):
            self.tool_shed_registry = tool_shed_registry.Registry(
                self.config.tool_sheds_config_file)
        else:
            self.tool_shed_registry = tool_shed_registry.Registry()

    def _configure_models(self,
                          check_migrate_databases=False,
                          config_file=None):
        """Preconditions: object_store must be set on self."""
        db_url = self.config.database_connection
        install_db_url = self.config.install_database_connection
        # TODO: Consider more aggressive check here that this is not the same
        # database file under the hood.
        combined_install_database = not (install_db_url
                                         and install_db_url != db_url)
        install_db_url = install_db_url or db_url
        install_database_options = self.config.database_engine_options if combined_install_database else self.config.install_database_engine_options

        if self.config.database_wait:
            self._wait_for_database(db_url)

        if getattr(self.config, "max_metadata_value_size", None):
            custom_types.MAX_METADATA_VALUE_SIZE = self.config.max_metadata_value_size

        if check_migrate_databases:
            # Initialize database / check for appropriate schema version.  # If this
            # is a new installation, we'll restrict the tool migration messaging.
            create_or_verify_database(
                db_url,
                config_file,
                self.config.database_engine_options,
                app=self,
                map_install_models=combined_install_database)
            if not combined_install_database:
                tsi_create_or_verify_database(install_db_url,
                                              install_database_options,
                                              app=self)

        self.model = init_models_from_config(
            self.config,
            map_install_models=combined_install_database,
            object_store=self.object_store,
            trace_logger=getattr(self, "trace_logger", None))
        if combined_install_database:
            log.info(
                "Install database targetting Galaxy's database configuration.")
            self.install_model = self.model
        else:
            install_db_url = self.config.install_database_connection
            log.info(
                f"Install database using its own connection {install_db_url}")
            self.install_model = install_mapping.init(
                install_db_url, install_database_options)

    def _configure_signal_handlers(self, handlers):
        for sig, handler in handlers.items():
            signal.signal(sig, handler)

    def _wait_for_database(self, url):
        attempts = self.config.database_wait_attempts
        pause = self.config.database_wait_sleep
        for i in range(1, attempts):
            try:
                database_exists(url)
                break
            except Exception:
                log.info("Waiting for database: attempt %d of %d" %
                         (i, attempts))
                time.sleep(pause)

    @property
    def tool_dependency_dir(self):
        return self.toolbox.dependency_manager.default_base_path
Пример #34
0
def download_from_genomespace_file_browser( json_parameter_file, genomespace_site ):
    json_params = json.loads( open( json_parameter_file, 'r' ).read() )
    datasource_params = json_params.get( 'param_dict' )
    username = datasource_params.get( "gs-username", None )
    token = datasource_params.get( "gs-token", None )
    assert None not in [ username, token ], "Missing GenomeSpace username or token."
    output_filename = datasource_params.get( "output", None )
    dataset_id = json_params['output_data'][0]['dataset_id']
    hda_id = json_params['output_data'][0]['hda_id']
    url_opener = get_cookie_opener( username, token )
    #load and set genomespace format ids to galaxy exts
    genomespace_site_dict = get_genomespace_site_urls()[ genomespace_site ]
    set_genomespace_format_identifiers( url_opener, genomespace_site_dict['dmServer'] )
    
    file_url_prefix = "fileUrl"
    file_type_prefix = "fileFormat"
    metadata_parameter_file = open( json_params['job_config']['TOOL_PROVIDED_JOB_METADATA_FILE'], 'wb' )
    
    #setup datatypes registry for sniffing
    datatypes_registry = Registry()
    datatypes_registry.load_datatypes( root_dir = json_params[ 'job_config' ][ 'GALAXY_ROOT_DIR' ], config = json_params[ 'job_config' ][ 'GALAXY_DATATYPES_CONF_FILE' ] )
    
    file_numbers = []
    for name in datasource_params.keys():
        if name.startswith( file_url_prefix ):
            name = name[len( file_url_prefix ):]
            file_numbers.append( int( name ) )
    if not file_numbers:
        if output_filename:
            open( output_filename, 'wb' ) #erase contents of file
        raise Exception( "You must select at least one file to import into Galaxy." )
    file_numbers.sort()
    used_filenames = []
    for file_num in file_numbers:
        url_key = "%s%i" % ( file_url_prefix, file_num )
        download_url = datasource_params.get( url_key, None )
        if download_url is None:
            break
        filetype_key = "%s%i" % ( file_type_prefix, file_num )
        filetype_url = datasource_params.get( filetype_key, None )
        galaxy_ext = get_galaxy_ext_from_genomespace_format_url( url_opener, filetype_url )
        formated_download_url = "%s?%s" % ( download_url, urllib.urlencode( [ ( 'dataformat', filetype_url ) ] ) )
        new_file_request = urllib2.Request( formated_download_url )
        new_file_request.get_method = lambda: 'GET'
        target_download_url = url_opener.open( new_file_request )
        filename = None
        if 'Content-Disposition' in target_download_url.info():
            # If the response has Content-Disposition, try to get filename from it
            content_disposition = dict( map( lambda x: x.strip().split('=') if '=' in x else ( x.strip(),'' ), target_download_url.info()['Content-Disposition'].split( ';' ) ) )
            if 'filename' in content_disposition:
                filename = content_disposition[ 'filename' ].strip( "\"'" )
        if not filename:
            parsed_url = urlparse.urlparse( download_url )
            query_params = urlparse.parse_qs( parsed_url[4] )
            filename = urllib.unquote_plus( parsed_url[2].split( '/' )[-1] )
        if not filename:
            filename = download_url
        metadata_dict = None
        original_filename = filename
        if output_filename is None:
            filename = ''.join( c in VALID_CHARS and c or '-' for c in filename )
            while filename in used_filenames:
                filename = "-%s" % filename
            used_filenames.append( filename )
            output_filename = os.path.join( os.getcwd(),  'primary_%i_%s_visible_%s' % ( hda_id, filename, galaxy_ext ) )
            
            metadata_dict = dict( type = 'new_primary_dataset',
                                base_dataset_id = dataset_id,
                                ext = galaxy_ext,
                                filename = output_filename,
                                name = "GenomeSpace import on %s" % ( original_filename ) )
        else:
            if dataset_id is not None:
                metadata_dict = dict( type = 'dataset',
                                dataset_id = dataset_id,
                                ext = galaxy_ext,
                                name = "GenomeSpace import on %s" % ( filename ) )
        output_file = open( output_filename, 'wb' )
        chunk_write( target_download_url, output_file )
        output_file.close()
        
        if ( galaxy_ext == AUTO_GALAXY_EXT or filetype_url == GENOMESPACE_FORMAT_IDENTIFIER_UNKNOWN ) and metadata_dict:
            #try to sniff datatype
            try:
                galaxy_ext = sniff.handle_uploaded_dataset_file( output_filename, datatypes_registry )
            except:
                #sniff failed
                galaxy_ext = original_filename.rsplit( '.', 1 )[-1]
                if galaxy_ext not in datatypes_registry.datatypes_by_extension:
                    galaxy_ext = DEFAULT_GALAXY_EXT
            metadata_dict[ 'ext' ] = galaxy_ext
        
        output_filename = None #only have one filename available
        
        #write out metadata info
        if metadata_dict:
            metadata_parameter_file.write( "%s\n" % json.dumps( metadata_dict ) )
        
    metadata_parameter_file.close()
    return True
Пример #35
0
                pass
        else:
            # this should not happen, but it's here just in case
            shutil.copy(dataset.path, output_path)
    else:
        shutil.move(dataset.path, output_path)
    # Write the job info
    info = dict(type='dataset',
                dataset_id=dataset.dataset_id,
                ext=ext,
                stdout='uploaded %s file' % data_type,
                name=dataset.name,
                line_count=line_count)
    json_file.write(to_json_string(info) + "\n")
    # Groom the dataset content if necessary
    datatype = Registry().get_datatype_by_extension(ext)
    datatype.groom_dataset_content(output_path)


def add_composite_file(dataset, json_file, output_path, files_path):
    if dataset.composite_files:
        os.mkdir(files_path)
        for name, value in dataset.composite_files.iteritems():
            value = util.bunch.Bunch(**value)
            if dataset.composite_file_paths[
                    value.name] is None and not value.optional:
                file_err(
                    'A required composite data file was not provided (%s)' %
                    name, dataset, json_file)
                break
            elif dataset.composite_file_paths[value.name] is not None:
Пример #36
0
from sqlalchemy.sql import label  # noqa

sys.path.insert(
    1,
    os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir, 'lib')))

from galaxy.datatypes.registry import Registry
from galaxy.model import *  # noqa
from galaxy.model import set_datatypes_registry  # More explicit than `*` import
from galaxy.model.mapping import init
from galaxy.model.orm.scripts import get_config

if sys.version_info > (3, ):
    long = int

registry = Registry()
registry.load_datatypes()
set_datatypes_registry(registry)
db_url = get_config(sys.argv)['db_url']
sa_session = init('/tmp/', db_url).context


# Helper function for debugging sqlalchemy queries...
# http://stackoverflow.com/questions/5631078/sqlalchemy-print-the-actual-query
def printquery(statement, bind=None):
    """
    Print a query, with values filled in
    for debugging purposes *only*
    for security, you should always separate queries from their values
    please also note that this function is quite slow
    """
Пример #37
0
    elif dataset.type in ("server_dir", "path_paste"):
        shutil.copy(dataset.path, output_path)
    else:
        shutil.move(dataset.path, output_path)
    # Write the job info
    info = dict(
        type="dataset",
        dataset_id=dataset.dataset_id,
        ext=ext,
        stdout="uploaded %s file" % data_type,
        name=dataset.name,
        line_count=line_count,
    )
    json_file.write(to_json_string(info) + "\n")
    # Groom the dataset content if necessary
    datatype = Registry().get_datatype_by_extension(ext)
    datatype.groom_dataset_content(output_path)


def add_composite_file(dataset, json_file, output_path, files_path):
    if dataset.composite_files:
        os.mkdir(files_path)
        for name, value in dataset.composite_files.iteritems():
            value = util.bunch.Bunch(**value)
            if dataset.composite_file_paths[value.name] is None and not value.optional:
                file_err("A required composite data file was not provided (%s)" % name, dataset, json_file)
                break
            elif dataset.composite_file_paths[value.name] is not None:
                if not value.is_binary:
                    if uploaded_dataset.composite_files[value.name].space_to_tab:
                        sniff.convert_newlines_sep2tabs(dataset.composite_file_paths[value.name]["path"])