def __init__(self, root=None, **kwargs): Bunch.__init__(self, **kwargs) root = root or '/tmp' self.security = security.SecurityHelper(id_secret='bler') self.use_remote_user = kwargs.get('use_remote_user', False) self.file_path = '/tmp' self.jobs_directory = '/tmp' self.new_file_path = '/tmp' self.tool_data_path = '/tmp' self.object_store_config_file = '' self.object_store = 'disk' self.object_store_check_old_style = False self.user_activation_on = False self.new_user_dataset_access_role_default_private = False self.expose_dataset_path = True self.allow_user_dataset_purge = True self.enable_old_display_applications = True self.umask = 0o77 # Follow two required by GenomeBuilds self.len_file_path = os.path.join('tool-data', 'shared', 'ucsc', 'chrom') self.builds_file_path = os.path.join('tool-data', 'shared', 'ucsc', 'builds.txt.sample') self.migrated_tools_config = "/tmp/migrated_tools_conf.xml" self.preserve_python_environment = "always" # set by MockDir self.root = root
def mock_trans(has_user=True, is_admin=False): trans = Bunch(user_is_admin=lambda: is_admin) if has_user: trans.user = Bunch(preferences={}) else: trans.user = None return trans
def mock_trans( has_user=True ): trans = Bunch( ) if has_user: trans.user = Bunch(preferences={}) else: trans.user = None return trans
def __init__( self, **kwd ): Bunch.__init__( self, **kwd ) self.primary_file = None self.composite_files = odict() self.dbkey = None self.warnings = [] self._temp_filenames = [] #store all created filenames here, delete on cleanup
def filter_factory(config_dict=None): if config_dict is None: config_dict = dict( tool_filters=["filtermod:filter_tool"], tool_section_filters=["filtermod:filter_section"], tool_label_filters=["filtermod:filter_label_1", "filtermod:filter_label_2"], ) config = Bunch(**config_dict) config.toolbox_filter_base_modules = "galaxy.tools.filters,unit.tools.filter_modules" app = Bunch(config=config) toolbox = Bunch(app=app) return FilterFactory(toolbox)
def __init__( self, **kwargs ): Bunch.__init__( self, **kwargs ) self.security = security.SecurityHelper( id_secret='bler' ) self.file_path = '/tmp' self.job_working_directory = '/tmp' self.new_file_path = '/tmp' self.object_store_config_file = '' self.object_store = 'disk' self.object_store_check_old_style = False self.user_activation_on = False self.new_user_dataset_access_role_default_private = False self.expose_dataset_path = True self.allow_user_dataset_purge = True self.enable_old_display_applications = True
def get_dataset(filename, index_attr='bam_index', dataset_id=1, has_data=True): dataset = Bunch() dataset.has_data = lambda: True dataset.id = dataset_id dataset.metadata = Bunch() with get_input_files(filename) as input_files, get_tmp_path() as index_path: dataset.file_name = input_files[0] index = Bunch() index.file_name = index_path setattr(dataset.metadata, index_attr, index) yield dataset
def __init__( self, root=None, **kwargs ): Bunch.__init__( self, **kwargs ) self.security = security.SecurityHelper( id_secret='bler' ) self.use_remote_user = kwargs.get( 'use_remote_user', False ) self.file_path = '/tmp' self.jobs_directory = '/tmp' self.new_file_path = '/tmp' self.object_store_config_file = '' self.object_store = 'disk' self.object_store_check_old_style = False self.user_activation_on = False self.new_user_dataset_access_role_default_private = False self.expose_dataset_path = True self.allow_user_dataset_purge = True self.enable_old_display_applications = True self.umask = 0o77 # set by MockDir self.root = root
def init( file_path, url, engine_options={}, create_tables=False ): """Connect mappings to the database""" # Load the appropriate db module load_egg_for_url( url ) # Create the database engine engine = create_engine( url, **engine_options ) # Connect the metadata to the database. metadata.bind = engine # Clear any existing contextual sessions and reconfigure Session.remove() Session.configure( bind=engine ) # Create tables if needed if create_tables: metadata.create_all() # Pack everything into a bunch result = Bunch( **globals() ) result.engine = engine result.session = Session result.create_tables = create_tables # Load local tool shed security policy result.security_agent = CommunityRBACAgent( result ) result.shed_counter = shed_statistics.ShedCounter( result ) result.hgweb_config_manager = galaxy.webapps.tool_shed.util.hgweb_config.HgWebConfigManager() return result
def main(): parser = optparse.OptionParser() parser.add_option( '-b', '--buffer', dest='buffer', type='int', default=1000000, help='Number of lines to buffer at a time. Default: 1,000,000 lines. A buffer of 0 will attempt to use memory only.' ) parser.add_option( '-d', '--index_depth', dest='index_depth', type='int', default=3, help='Depth to use on filebased offset indexing. Default: 3.' ) parser.add_option( '-p', '--keep_partial', action='store_true', dest='keep_partial', default=False, help='Keep rows in first input which are missing identifiers.') parser.add_option( '-u', '--keep_unmatched', action='store_true', dest='keep_unmatched', default=False, help='Keep rows in first input which are not joined with the second input.') parser.add_option( '-f', '--fill_options_file', dest='fill_options_file', type='str', default=None, help='Fill empty columns with a values from a JSONified file.') parser.add_option( '-H', '--keep_headers', action='store_true', dest='keep_headers', default=False, help='Keep the headers') options, args = parser.parse_args() fill_options = None if options.fill_options_file is not None: try: fill_options = Bunch(**stringify_dictionary_keys(json.load(open(options.fill_options_file)))) # json.load( open( options.fill_options_file ) ) except Exception as e: print("Warning: Ignoring fill options due to json error (%s)." % e) if fill_options is None: fill_options = Bunch() if 'fill_unjoined_only' not in fill_options: fill_options.fill_unjoined_only = True if 'file1_columns' not in fill_options: fill_options.file1_columns = None if 'file2_columns' not in fill_options: fill_options.file2_columns = None try: filename1 = args[0] filename2 = args[1] column1 = int(args[2]) - 1 column2 = int(args[3]) - 1 out_filename = args[4] except Exception: print("Error parsing command line.", file=sys.stderr) sys.exit() # Character for splitting fields and joining lines split = "\t" return join_files(filename1, column1, filename2, column2, out_filename, split, options.buffer, options.keep_unmatched, options.keep_partial, options.keep_headers, options.index_depth, fill_options=fill_options)
def job_io(self): return Bunch(get_output_fnames=lambda: ['output1'], check_job_script_integrity=False)
class ToolDependency(object): installation_status = Bunch(NEVER_INSTALLED='Never installed', INSTALLING='Installing', INSTALLED='Installed', ERROR='Error', UNINSTALLED='Uninstalled') states = Bunch(INSTALLING='running', OK='ok', WARNING='queued', ERROR='error', UNINSTALLED='deleted_new') def __init__(self, tool_shed_repository_id=None, name=None, version=None, type=None, status=None, error_message=None): self.tool_shed_repository_id = tool_shed_repository_id self.name = name self.version = version self.type = type self.status = status self.error_message = error_message @property def can_install(self): return self.status in [self.installation_status.NEVER_INSTALLED, self.installation_status.UNINSTALLED] @property def can_uninstall(self): return self.status in [self.installation_status.ERROR, self.installation_status.INSTALLED] @property def can_update(self): return self.status in [self.installation_status.NEVER_INSTALLED, self.installation_status.INSTALLED, self.installation_status.ERROR, self.installation_status.UNINSTALLED] def get_env_shell_file_path(self, app): installation_directory = self.installation_directory(app) file_path = os.path.join(installation_directory, 'env.sh') if os.path.exists(file_path): return file_path return None @property def in_error_state(self): return self.status == self.installation_status.ERROR def installation_directory(self, app): if self.type == 'package': return os.path.join(app.config.tool_dependency_dir, self.name, self.version, self.tool_shed_repository.owner, self.tool_shed_repository.name, self.tool_shed_repository.installed_changeset_revision) if self.type == 'set_environment': return os.path.join(app.config.tool_dependency_dir, 'environment_settings', self.name, self.tool_shed_repository.owner, self.tool_shed_repository.name, self.tool_shed_repository.installed_changeset_revision) @property def is_installed(self): return self.status == self.installation_status.INSTALLED
def get_uploaded_datasets(self, trans, context, override_name=None, override_info=None): def get_data_file_filename(data_file, override_name=None, override_info=None, purge=True): dataset_name = override_name def get_file_name(file_name): file_name = file_name.split('\\')[-1] file_name = file_name.split('/')[-1] return file_name try: # Use the existing file if not dataset_name and 'filename' in data_file: dataset_name = get_file_name(data_file['filename']) return Bunch(type='file', path=data_file['local_filename'], name=dataset_name, purge_source=purge) except Exception: # The uploaded file should've been persisted by the upload tool action return Bunch(type=None, path=None, name=None) def get_url_paste_urls_or_filename(group_incoming, override_name=None, override_info=None): url_paste_file = group_incoming.get('url_paste', None) if url_paste_file is not None: url_paste = open(url_paste_file).read() def start_of_url(content): start_of_url_paste = content.lstrip()[0:10].lower() looks_like_url = False for url_prefix in URI_PREFIXES: if start_of_url_paste.startswith(url_prefix): looks_like_url = True break return looks_like_url if start_of_url(url_paste): url_paste = url_paste.replace('\r', '').split('\n') for line in url_paste: line = line.strip() if line: if not start_of_url(line): continue # non-url line, ignore if "file://" in line: if not trans.user_is_admin: raise AdminRequiredException() elif not trans.app.config.allow_path_paste: raise ConfigDoesNotAllowException() upload_path = line[len("file://"):] dataset_name = os.path.basename(upload_path) else: dataset_name = line if override_name: dataset_name = override_name yield Bunch(type='url', path=line, name=dataset_name) else: dataset_name = 'Pasted Entry' # we need to differentiate between various url pastes here if override_name: dataset_name = override_name yield Bunch(type='file', path=url_paste_file, name=dataset_name) def get_one_filename(context): data_file = context['file_data'] url_paste = context['url_paste'] ftp_files = context['ftp_files'] name = context.get('NAME', None) info = context.get('INFO', None) uuid = context.get('uuid', None) or None # Turn '' to None file_type = context.get('file_type', None) dbkey = self.get_dbkey(context) warnings = [] to_posix_lines = False if context.get('to_posix_lines', None) not in ["None", None, False]: to_posix_lines = True auto_decompress = False if context.get('auto_decompress', None) not in ["None", None, False]: auto_decompress = True space_to_tab = False if context.get('space_to_tab', None) not in ["None", None, False]: space_to_tab = True file_bunch = get_data_file_filename(data_file, override_name=name, override_info=info) if file_bunch.path: if url_paste is not None and url_paste.strip(): warnings.append( "All file contents specified in the paste box were ignored." ) if ftp_files: warnings.append( "All FTP uploaded file selections were ignored.") elif url_paste is not None and url_paste.strip( ): # we need to use url_paste for file_bunch in get_url_paste_urls_or_filename( context, override_name=name, override_info=info): if file_bunch.path: break if file_bunch.path and ftp_files is not None: warnings.append( "All FTP uploaded file selections were ignored.") elif ftp_files is not None and trans.user is not None: # look for files uploaded via FTP user_ftp_dir = trans.user_ftp_dir assert not os.path.islink( user_ftp_dir ), "User FTP directory cannot be a symbolic link" for (dirpath, dirnames, filenames) in os.walk(user_ftp_dir): for filename in filenames: for ftp_filename in ftp_files: if ftp_filename == filename: path = relpath(os.path.join(dirpath, filename), user_ftp_dir) if not os.path.islink( os.path.join(dirpath, filename)): ftp_data_file = { 'local_filename': os.path.abspath( os.path.join(user_ftp_dir, path)), 'filename': os.path.basename(path) } purge = getattr(trans.app.config, 'ftp_upload_purge', True) file_bunch = get_data_file_filename( ftp_data_file, override_name=name, override_info=info, purge=purge, ) if file_bunch.path: break if file_bunch.path: break if file_bunch.path: break file_bunch.to_posix_lines = to_posix_lines file_bunch.auto_decompress = auto_decompress file_bunch.space_to_tab = space_to_tab file_bunch.uuid = uuid if file_type is not None: file_bunch.file_type = file_type if dbkey is not None: file_bunch.dbkey = dbkey return file_bunch, warnings def get_filenames(context): rval = [] data_file = context['file_data'] ftp_files = context['ftp_files'] uuid = context.get('uuid', None) or None # Turn '' to None name = context.get('NAME', None) info = context.get('INFO', None) file_type = context.get('file_type', None) dbkey = self.get_dbkey(context) to_posix_lines = False if context.get('to_posix_lines', None) not in ["None", None, False]: to_posix_lines = True auto_decompress = False if context.get('auto_decompress', None) not in ["None", None, False]: auto_decompress = True space_to_tab = False if context.get('space_to_tab', None) not in ["None", None, False]: space_to_tab = True file_bunch = get_data_file_filename(data_file, override_name=name, override_info=info) file_bunch.uuid = uuid if file_bunch.path: file_bunch.to_posix_lines = to_posix_lines file_bunch.auto_decompress = auto_decompress file_bunch.space_to_tab = space_to_tab if file_type is not None: file_bunch.file_type = file_type if dbkey is not None: file_bunch.dbkey = dbkey rval.append(file_bunch) for file_bunch in get_url_paste_urls_or_filename( context, override_name=name, override_info=info): if file_bunch.path: file_bunch.uuid = uuid file_bunch.to_posix_lines = to_posix_lines file_bunch.auto_decompress = auto_decompress file_bunch.space_to_tab = space_to_tab if file_type is not None: file_bunch.file_type = file_type if dbkey is not None: file_bunch.dbkey = dbkey rval.append(file_bunch) # look for files uploaded via FTP valid_files = [] if ftp_files is not None: # Normalize input paths to ensure utf-8 encoding is normal form c. # This allows for comparison when the filesystem uses a different encoding than the browser. ftp_files = [ unicodedata.normalize('NFC', f) for f in ftp_files if isinstance(f, str) ] if trans.user is None: log.warning( 'Anonymous user passed values in ftp_files: %s' % ftp_files) ftp_files = [] # TODO: warning to the user (could happen if session has become invalid) else: user_ftp_dir = trans.user_ftp_dir assert not os.path.islink( user_ftp_dir ), "User FTP directory cannot be a symbolic link" for (dirpath, dirnames, filenames) in os.walk(user_ftp_dir): for filename in filenames: path = relpath(os.path.join(dirpath, filename), user_ftp_dir) if not os.path.islink( os.path.join(dirpath, filename)): # Normalize filesystem paths if isinstance(path, str): valid_files.append( unicodedata.normalize('NFC', path)) else: valid_files.append(path) else: ftp_files = [] for ftp_file in ftp_files: if ftp_file not in valid_files: log.warning( 'User passed an invalid file path in ftp_files: %s' % ftp_file) continue # TODO: warning to the user (could happen if file is already imported) ftp_data_file = { 'local_filename': os.path.abspath(os.path.join(user_ftp_dir, ftp_file)), 'filename': os.path.basename(ftp_file) } purge = getattr(trans.app.config, 'ftp_upload_purge', True) file_bunch = get_data_file_filename(ftp_data_file, override_name=name, override_info=info, purge=purge) if file_bunch.path: file_bunch.to_posix_lines = to_posix_lines file_bunch.auto_decompress = auto_decompress file_bunch.space_to_tab = space_to_tab if file_type is not None: file_bunch.file_type = file_type if dbkey is not None: file_bunch.dbkey = dbkey rval.append(file_bunch) return rval file_type = self.get_file_type(context) file_count = self.get_file_count(trans, context) d_type = self.get_datatype(trans, context) dbkey = self.get_dbkey(context) tag_using_filenames = context.get('tag_using_filenames', False) tags = context.get('tags', False) force_composite = asbool(context.get('force_composite', 'False')) writable_files = d_type.writable_files writable_files_offset = 0 groups_incoming = [None for _ in range(file_count)] for group_incoming in context.get(self.name, []): i = int(group_incoming['__index__']) groups_incoming[i] = group_incoming if d_type.composite_type is not None or force_composite: # handle uploading of composite datatypes # Only one Dataset can be created dataset = Bunch() dataset.type = 'composite' dataset.file_type = file_type dataset.dbkey = dbkey dataset.datatype = d_type dataset.warnings = [] dataset.metadata = {} dataset.composite_files = {} dataset.uuid = None dataset.tag_using_filenames = None dataset.tags = None # load metadata files_metadata = context.get(self.metadata_ref, {}) metadata_name_substition_default_dict = { composite_file.substitute_name_with_metadata: d_type.metadata_spec[ composite_file.substitute_name_with_metadata].default for composite_file in d_type.composite_files.values() if composite_file.substitute_name_with_metadata } for meta_name, meta_spec in d_type.metadata_spec.items(): if meta_spec.set_in_upload: if meta_name in files_metadata: meta_value = files_metadata[meta_name] if meta_name in metadata_name_substition_default_dict: meta_value = sanitize_for_filename( meta_value, default=metadata_name_substition_default_dict[ meta_name]) dataset.metadata[meta_name] = meta_value dataset.name = self.get_composite_dataset_name(context) if dataset.datatype.composite_type == 'auto_primary_file': # replace sniff here with just creating an empty file temp_name = sniff.stream_to_file( io.StringIO(d_type.generate_primary_file(dataset)), prefix='upload_auto_primary_file') dataset.primary_file = temp_name dataset.to_posix_lines = True dataset.auto_decompress = True dataset.space_to_tab = False else: file_bunch, warnings = get_one_filename(groups_incoming[0]) writable_files_offset = 1 dataset.primary_file = file_bunch.path dataset.to_posix_lines = file_bunch.to_posix_lines dataset.auto_decompress = file_bunch.auto_decompress dataset.space_to_tab = file_bunch.space_to_tab if file_bunch.file_type: dataset.file_type = file_type if file_bunch.dbkey: dataset.dbkey = dbkey dataset.warnings.extend(warnings) if dataset.primary_file is None: # remove this before finish, this should create an empty dataset raise Exception( 'No primary dataset file was available for composite upload' ) if not force_composite: keys = [value.name for value in writable_files.values()] else: keys = [str(index) for index in range(file_count)] for i, group_incoming in enumerate( groups_incoming[writable_files_offset:]): key = keys[i + writable_files_offset] if not force_composite and group_incoming is None and not writable_files[ list(writable_files.keys())[keys.index(key)]].optional: dataset.warnings.append( "A required composite file (%s) was not specified." % (key)) dataset.composite_files[key] = None else: file_bunch, warnings = get_one_filename(group_incoming) dataset.warnings.extend(warnings) if file_bunch.path: if force_composite: key = group_incoming.get("NAME") or i dataset.composite_files[key] = file_bunch.__dict__ elif not force_composite: dataset.composite_files[key] = None if not writable_files[list(writable_files.keys())[ keys.index(key)]].optional: dataset.warnings.append( "A required composite file (%s) was not specified." % (key)) return [dataset] else: rval = [] for i, file_contexts in enumerate(context[self.name]): datasets = get_filenames(file_contexts) for dataset in datasets: override_file_type = self.get_file_type( context[self.name][i], parent_context=context) d_type = self.get_datatype(trans, context[self.name][i], parent_context=context) dataset.file_type = override_file_type dataset.datatype = d_type dataset.ext = self.get_datatype_ext(trans, context[self.name][i], parent_context=context) dataset.dbkey = self.get_dbkey(context[self.name][i], parent_context=context) dataset.tag_using_filenames = tag_using_filenames dataset.tags = tags rval.append(dataset) return rval
def join_files(filename1, column1, filename2, column2, out_filename, split=None, buffer=1000000, keep_unmatched=False, keep_partial=False, index_depth=3, fill_options=None): # return identifier based upon line def get_identifier_by_line(line, column, split=None): if isinstance(line, str): fields = line.rstrip('\r\n').split(split) if column < len(fields): return fields[column] return None if fill_options is None: fill_options = Bunch(fill_unjoined_only=True, file1_columns=None, file2_columns=None) out = open(out_filename, 'w+b') index = BufferedIndex(filename2, column2, split, buffer, index_depth) for line1 in open(filename1, 'rb'): identifier = get_identifier_by_line(line1, column1, split) if identifier: written = False for line2 in index.get_lines_by_identifier(identifier): if not fill_options.fill_unjoined_only: out.write( "%s%s%s\n" % (fill_empty_columns(line1.rstrip('\r\n'), split, fill_options.file1_columns), split, fill_empty_columns(line2.rstrip('\r\n'), split, fill_options.file2_columns))) else: out.write( "%s%s%s\n" % (line1.rstrip('\r\n'), split, line2.rstrip('\r\n'))) written = True if not written and keep_unmatched: out.write( fill_empty_columns(line1.rstrip('\r\n'), split, fill_options.file1_columns)) if fill_options: if fill_options.file2_columns: out.write("%s%s" % (split, fill_empty_columns( "", split, fill_options.file2_columns))) out.write("\n") elif keep_partial: out.write( fill_empty_columns(line1.rstrip('\r\n'), split, fill_options.file1_columns)) if fill_options: if fill_options.file2_columns: out.write( "%s%s" % (split, fill_empty_columns( "", split, fill_options.file2_columns))) out.write("\n") out.close()
def wrap_input(input_values, input): value = input_values[input.name] if isinstance(input, DataToolParameter) and input.multiple: dataset_instances = DatasetListWrapper.to_dataset_instances(value) input_values[input.name] = \ DatasetListWrapper(job_working_directory, dataset_instances, compute_environment=self.compute_environment, datatypes_registry=self.app.datatypes_registry, tool=self.tool, name=input.name, formats=input.formats) elif isinstance(input, DataToolParameter): # FIXME: We're populating param_dict with conversions when # wrapping values, this should happen as a separate # step before wrapping (or call this wrapping step # something more generic) (but iterating this same # list twice would be wasteful) # Add explicit conversions by name to current parent for conversion_name, conversion_extensions, conversion_datatypes in input.conversions: # If we are at building cmdline step, then converters # have already executed conv_ext, converted_dataset = input_values[input.name].find_conversion_destination(conversion_datatypes) # When dealing with optional inputs, we'll provide a # valid extension to be used for None converted dataset if not conv_ext: conv_ext = conversion_extensions[0] # input_values[ input.name ] is None when optional # dataset, 'conversion' of optional dataset should # create wrapper around NoneDataset for converter output if input_values[input.name] and not converted_dataset: # Input that converter is based from has a value, # but converted dataset does not exist raise Exception('A path for explicit datatype conversion has not been found: %s --/--> %s' % (input_values[input.name].extension, conversion_extensions)) else: # Trick wrapper into using target conv ext (when # None) without actually being a tool parameter input_values[conversion_name] = \ DatasetFilenameWrapper(converted_dataset, datatypes_registry=self.app.datatypes_registry, tool=Bunch(conversion_name=Bunch(extensions=conv_ext)), name=conversion_name) # Wrap actual input dataset dataset = input_values[input.name] wrapper_kwds = dict( datatypes_registry=self.app.datatypes_registry, tool=self, name=input.name, compute_environment=self.compute_environment ) element_identifier = element_identifier_mapper.identifier(dataset, param_dict) if element_identifier: wrapper_kwds["identifier"] = element_identifier input_values[input.name] = \ DatasetFilenameWrapper(dataset, **wrapper_kwds) elif isinstance(input, DataCollectionToolParameter): dataset_collection = value wrapper_kwds = dict( datatypes_registry=self.app.datatypes_registry, compute_environment=self.compute_environment, tool=self, name=input.name ) wrapper = DatasetCollectionWrapper( job_working_directory, dataset_collection, **wrapper_kwds ) input_values[input.name] = wrapper elif isinstance(input, SelectToolParameter): if input.multiple: value = listify(value) input_values[input.name] = SelectToolParameterWrapper( input, value, other_values=param_dict, compute_environment=self.compute_environment) else: input_values[input.name] = InputValueWrapper( input, value, param_dict)
def _verify_outputs(testdef, history, jobs, tool_id, data_list, data_collection_list, galaxy_interactor, quiet=False): assert len( jobs ) == 1, "Test framework logic error, somehow tool test resulted in more than one job." job = jobs[0] maxseconds = testdef.maxseconds if testdef.num_outputs is not None: expected = testdef.num_outputs actual = len(data_list) if expected != actual: messaage_template = "Incorrect number of outputs - expected %d, found %s." message = messaage_template % (expected, actual) raise Exception(message) found_exceptions = [] def register_exception(e): if not found_exceptions and not quiet: # Only print this stuff out once. for stream in ['stdout', 'stderr']: if stream in job_stdio: print(_format_stream(job_stdio[stream], stream=stream, format=True), file=sys.stderr) found_exceptions.append(e) if testdef.expect_failure: if testdef.outputs: raise Exception( "Cannot specify outputs in a test expecting failure.") # Wait for the job to complete and register expections if the final # status was not what test was expecting. job_failed = False try: galaxy_interactor.wait_for_job(job['id'], history, maxseconds) except Exception as e: job_failed = True if not testdef.expect_failure: found_exceptions.append(e) job_stdio = galaxy_interactor.get_job_stdio(job['id']) if not job_failed and testdef.expect_failure: error = AssertionError( "Expected job to fail but Galaxy indicated the job successfully completed." ) register_exception(error) expect_exit_code = testdef.expect_exit_code if expect_exit_code is not None: exit_code = job_stdio["exit_code"] if str(expect_exit_code) != str(exit_code): error = AssertionError( "Expected job to complete with exit code %s, found %s" % (expect_exit_code, exit_code)) register_exception(error) for output_index, output_dict in enumerate(testdef.outputs): # Get the correct hid name = output_dict["name"] outfile = output_dict["value"] attributes = output_dict["attributes"] output_testdef = Bunch(name=name, outfile=outfile, attributes=attributes) try: output_data = data_list[name] except (TypeError, KeyError): # Legacy - fall back on ordered data list access if data_list is # just a list (case with twill variant or if output changes its # name). if hasattr(data_list, "values"): output_data = list(data_list.values())[output_index] else: output_data = data_list[len(data_list) - len(testdef.outputs) + output_index] assert output_data is not None try: galaxy_interactor.verify_output(history, jobs, output_data, output_testdef=output_testdef, tool_id=tool_id, maxseconds=maxseconds) except Exception as e: register_exception(e) other_checks = { "command_line": "Command produced by the job", "stdout": "Standard output of the job", "stderr": "Standard error of the job", } for what, description in other_checks.items(): if getattr(testdef, what, None) is not None: try: data = job_stdio[what] verify_assertions(data, getattr(testdef, what)) except AssertionError as err: errmsg = '%s different than expected\n' % description errmsg += str(err) register_exception(AssertionError(errmsg)) for output_collection_def in testdef.output_collections: try: name = output_collection_def.name # TODO: data_collection_list is clearly a bad name for dictionary. if name not in data_collection_list: template = "Failed to find output [%s], tool outputs include [%s]" message = template % (name, ",".join( data_collection_list.keys())) raise AssertionError(message) # Data collection returned from submission, elements may have been populated after # the job completed so re-hit the API for more information. data_collection_returned = data_collection_list[name] data_collection = galaxy_interactor._get( "dataset_collections/%s" % data_collection_returned["id"], data={ "instance_type": "history" }).json() def get_element(elements, id): for element in elements: if element["element_identifier"] == id: return element return False expected_collection_type = output_collection_def.collection_type if expected_collection_type: collection_type = data_collection["collection_type"] if expected_collection_type != collection_type: template = "Expected output collection [%s] to be of type [%s], was of type [%s]." message = template % (name, expected_collection_type, collection_type) raise AssertionError(message) expected_element_count = output_collection_def.count if expected_element_count: actual_element_count = len(data_collection["elements"]) if expected_element_count != actual_element_count: template = "Expected output collection [%s] to have %s elements, but it had %s." message = template % (name, expected_element_count, actual_element_count) raise AssertionError(message) def verify_elements(element_objects, element_tests): for element_identifier, ( element_outfile, element_attrib) in element_tests.items(): element = get_element(element_objects, element_identifier) if not element: template = "Failed to find identifier [%s] for testing, tool generated collection elements [%s]" message = template % (element_identifier, element_objects) raise AssertionError(message) element_type = element["element_type"] if element_type != "dataset_collection": hda = element["object"] galaxy_interactor.verify_output_dataset( history, hda_id=hda["id"], outfile=element_outfile, attributes=element_attrib, tool_id=tool_id) if element_type == "dataset_collection": elements = element["object"]["elements"] verify_elements(elements, element_attrib.get("elements", {})) verify_elements(data_collection["elements"], output_collection_def.element_tests) except Exception as e: register_exception(e) if found_exceptions: raise JobOutputsError(found_exceptions, job_stdio) else: return job_stdio
def get_permitted_actions(**kwds): return Bunch()
def __init__(self, root=None, **kwargs): Bunch.__init__(self, **kwargs) if not root: root = tempfile.mkdtemp() self._remove_root = True else: self._remove_root = False self.schema = self.MockSchema() self.security = idencoding.IdEncodingHelper( id_secret='6e46ed6483a833c100e68cc3f1d0dd76') self.database_connection = kwargs.get('database_connection', "sqlite:///:memory:") self.use_remote_user = kwargs.get('use_remote_user', False) self.enable_celery_tasks = False self.data_dir = os.path.join(root, 'database') self.file_path = os.path.join(self.data_dir, 'files') self.jobs_directory = os.path.join(self.data_dir, 'jobs_directory') self.new_file_path = os.path.join(self.data_dir, 'tmp') self.tool_data_path = os.path.join(root, 'tool-data') self.tool_dependency_dir = None self.metadata_strategy = 'legacy' self.object_store_config_file = '' self.object_store = 'disk' self.object_store_check_old_style = False self.object_store_cache_path = '/tmp/cache' self.umask = os.umask(0o77) self.gid = os.getgid() self.user_activation_on = False self.new_user_dataset_access_role_default_private = False self.expose_dataset_path = True self.allow_user_dataset_purge = True self.enable_old_display_applications = True self.redact_username_in_logs = False self.auth_config_file = "config/auth_conf.xml.sample" self.error_email_to = "*****@*****.**" self.password_expiration_period = 0 self.umask = 0o77 self.flush_per_n_datasets = 0 # Compliance related config self.redact_email_in_job_name = False # Follow two required by GenomeBuilds self.len_file_path = os.path.join('tool-data', 'shared', 'ucsc', 'chrom') self.builds_file_path = os.path.join('tool-data', 'shared', 'ucsc', 'builds.txt.sample') self.shed_tool_config_file = "config/shed_tool_conf.xml" self.shed_tool_config_file_set = False self.enable_beta_edam_toolbox = False self.preserve_python_environment = "always" self.enable_beta_gdpr = False self.version_major = "19.09" # set by MockDir self.root = root self.enable_tool_document_cache = False self.tool_cache_data_dir = os.path.join(root, 'tool_cache') self.delay_tool_initialization = True self.external_chown_script = None self.default_panel_view = "default" self.panel_views_dir = '' self.panel_views = {} self.edam_panel_views = '' self.config_file = None
def __init__(self, test_directory, mock_model=True): # The following line is needed in order to create # HistoryDatasetAssociations - ideally the model classes would be # usable without the ORM infrastructure in place. in_memomry_model = mapping.init("/tmp", "sqlite:///:memory:", create_tables=True) self.datatypes_registry = Bunch( integrated_datatypes_configs= '/galaxy/integrated_datatypes_configs.xml', get_datatype_by_extension=lambda ext: Bunch(), ) self.config = Bunch( outputs_to_working_directory=False, commands_in_new_shell=True, new_file_path=os.path.join(test_directory, "new_files"), tool_data_path=os.path.join(test_directory, "tools"), root=os.path.join(test_directory, "galaxy"), admin_users="*****@*****.**", len_file_path=os.path.join('tool-data', 'shared', 'ucsc', 'chrom'), builds_file_path=os.path.join('tool-data', 'shared', 'ucsc', 'builds.txt.sample'), migrated_tools_config=os.path.join(test_directory, "migrated_tools_conf.xml"), server_name="test_server", ) # Setup some attributes for downstream extension by specific tests. self.job_config = Bunch(dynamic_params=None, ) # Two ways to handle model layer, one is to stub out some objects that # have an interface similar to real model (mock_model) and can keep # track of 'persisted' objects in a map. The other is to use a real # sqlalchemy layer but target an in memory database. Depending on what # is being tested. if mock_model: # Create self.model to mimic app.model. self.model = Bunch(context=MockContext()) for module_member_name in dir(galaxy.model): module_member = getattr(galaxy.model, module_member_name) if type(module_member) == type: self.model[module_member_name] = module_member else: self.model = in_memomry_model self.genome_builds = GenomeBuilds(self) self.toolbox = None self.object_store = None self.security = SecurityHelper(id_secret="testing") from galaxy.security import GalaxyRBACAgent self.job_queue = NoopQueue() self.security_agent = GalaxyRBACAgent(self.model) self.tool_data_tables = {} self.dataset_collections_service = None self.container_finder = NullContainerFinder() self.name = "galaxy" self._toolbox_lock = MockLock() self.tool_version_cache = Bunch(app=self, tool_version_by_id={}, tool_version_by_tool_id={}, tool_id_to_parent_id={}, parent_id_to_tool_id={})
def upload_async_create(self, trans, tool_id=None, **kwd): """ Precreate datasets for asynchronous uploading. """ cntrller = kwd.get('cntrller', '') roles = kwd.get('roles', False) if roles: # The user associated the DATASET_ACCESS permission on the uploaded datasets with 1 or more roles. # We need to ensure that the roles are legitimately derived from the roles associated with the LIBRARY_ACCESS # permission if the library is not public ( this should always be the case since any ill-legitimate roles # were filtered out of the roles displayed on the upload form. In addition, we need to ensure that the user # did not associated roles that would make the dataset in-accessible by everyone. library_id = trans.app.security.decode_id(kwd.get( 'library_id', '')) vars = dict(DATASET_ACCESS_in=roles) permissions, in_roles, error, msg = trans.app.security_agent.derive_roles_from_access( trans, library_id, cntrller, library=True, **vars) if error: return ['error', msg] def create_dataset(name): ud = Bunch(name=name, file_type=None, dbkey=None) if nonfile_params.get('folder_id', False): replace_id = nonfile_params.get('replace_id', None) if replace_id not in [None, 'None']: replace_dataset = trans.sa_session.query( trans.app.model.LibraryDataset).get( trans.security.decode_id(replace_id)) else: replace_dataset = None # FIXME: instead of passing params here ( chiech have been process by util.Params(), the original kwd # should be passed so that complex objects that may have been included in the initial request remain. library_bunch = upload_common.handle_library_params( trans, nonfile_params, nonfile_params.folder_id, replace_dataset) else: library_bunch = None return upload_common.new_upload( trans, cntrller, ud, library_bunch=library_bunch, state=trans.app.model.HistoryDatasetAssociation.states.UPLOAD) tool = self.get_toolbox().get_tool(tool_id) if not tool: return False # bad tool_id nonfile_params = galaxy.util.Params(kwd, sanitize=False) if kwd.get('tool_state', None) not in (None, 'None'): encoded_state = galaxy.util.string_to_object(kwd["tool_state"]) tool_state = DefaultToolState() tool_state.decode(encoded_state, tool, trans.app) else: tool_state = tool.new_state(trans) tool.update_state(trans, tool.inputs, tool_state.inputs, kwd, update_only=True) datasets = [] dataset_upload_inputs = [] for input_name, input in tool.inputs.iteritems(): if input.type == "upload_dataset": dataset_upload_inputs.append(input) assert dataset_upload_inputs, Exception( "No dataset upload groups were found.") for dataset_upload_input in dataset_upload_inputs: d_type = dataset_upload_input.get_datatype(trans, kwd) if d_type.composite_type is not None: datasets.append( create_dataset( dataset_upload_input.get_composite_dataset_name(kwd))) else: params = Bunch( **tool_state.inputs[dataset_upload_input.name][0]) if params.file_data not in [None, ""]: name = params.file_data if name.count('/'): name = name.rsplit('/', 1)[1] if name.count('\\'): name = name.rsplit('\\', 1)[1] datasets.append(create_dataset(name)) if params.url_paste not in [None, ""]: url_paste = params.url_paste.replace('\r', '').split('\n') url = False for line in url_paste: line = line.rstrip('\r\n').strip() if not line: continue elif line.lower().startswith('http://') or line.lower( ).startswith('ftp://') or line.lower().startswith( 'https://'): url = True datasets.append(create_dataset(line)) else: if url: continue # non-url when we've already processed some urls else: # pasted data datasets.append(create_dataset('Pasted Entry')) break return [d.id for d in datasets]
def is_filtered(filters, trans, tool): context = Bunch(trans=trans) return not all(_(context, tool) for _ in filters)
def get_uploaded_datasets( self, trans, context, override_name = None, override_info = None ): def get_data_file_filename( data_file, override_name = None, override_info = None ): dataset_name = override_name dataset_info = override_info def get_file_name( file_name ): file_name = file_name.split( '\\' )[-1] file_name = file_name.split( '/' )[-1] return file_name try: # Use the existing file if not dataset_name and 'filename' in data_file: dataset_name = get_file_name( data_file['filename'] ) if not dataset_info: dataset_info = 'uploaded file' return Bunch( type='file', path=data_file['local_filename'], name=get_file_name( data_file['filename'] ) ) #return 'file', data_file['local_filename'], get_file_name( data_file.filename ), dataset_name, dataset_info except: # The uploaded file should've been persisted by the upload tool action return Bunch( type=None, path=None, name=None ) #return None, None, None, None, None def get_url_paste_urls_or_filename( group_incoming, override_name = None, override_info = None ): filenames = [] url_paste_file = group_incoming.get( 'url_paste', None ) if url_paste_file is not None: url_paste = open( url_paste_file, 'r' ).read( 1024 ) if url_paste.lstrip().lower().startswith( 'http://' ) or url_paste.lstrip().lower().startswith( 'ftp://' ): url_paste = url_paste.replace( '\r', '' ).split( '\n' ) for line in url_paste: line = line.strip() if line: if not line.lower().startswith( 'http://' ) and not line.lower().startswith( 'ftp://' ): continue # non-url line, ignore precreated_name = line dataset_name = override_name if not dataset_name: dataset_name = line dataset_info = override_info if not dataset_info: dataset_info = 'uploaded url' yield Bunch( type='url', path=line, name=precreated_name ) #yield ( 'url', line, precreated_name, dataset_name, dataset_info ) else: dataset_name = dataset_info = precreated_name = 'Pasted Entry' #we need to differentiate between various url pastes here if override_name: dataset_name = override_name if override_info: dataset_info = override_info yield Bunch( type='file', path=url_paste_file, name=precreated_name ) #yield ( 'file', url_paste_file, precreated_name, dataset_name, dataset_info ) def get_one_filename( context ): data_file = context['file_data'] url_paste = context['url_paste'] name = context.get( 'NAME', None ) info = context.get( 'INFO', None ) warnings = [] space_to_tab = False if context.get( 'space_to_tab', None ) not in ["None", None]: space_to_tab = True file_bunch = get_data_file_filename( data_file, override_name = name, override_info = info ) if file_bunch.path and url_paste: if url_paste.strip(): warnings.append( "All file contents specified in the paste box were ignored." ) else: #we need to use url_paste for file_bunch in get_url_paste_urls_or_filename( context, override_name = name, override_info = info ): if file_bunch.path: break return file_bunch, warnings def get_filenames( context ): rval = [] data_file = context['file_data'] url_paste = context['url_paste'] name = context.get( 'NAME', None ) info = context.get( 'INFO', None ) space_to_tab = False if context.get( 'space_to_tab', None ) not in ["None", None]: space_to_tab = True warnings = [] file_bunch = get_data_file_filename( data_file, override_name = name, override_info = info ) if file_bunch.path: file_bunch.space_to_tab = space_to_tab rval.append( file_bunch ) for file_bunch in get_url_paste_urls_or_filename( context, override_name = name, override_info = info ): if file_bunch.path: file_bunch.space_to_tab = space_to_tab rval.append( file_bunch ) return rval file_type = self.get_file_type( context ) d_type = self.get_datatype( trans, context ) dbkey = context.get( 'dbkey', None ) writable_files = d_type.writable_files writable_files_offset = 0 groups_incoming = [ None for filename in writable_files ] for group_incoming in context.get( self.name, [] ): i = int( group_incoming['__index__'] ) groups_incoming[ i ] = group_incoming if d_type.composite_type is not None: #handle uploading of composite datatypes #Only one Dataset can be created dataset = Bunch() dataset.type = 'composite' dataset.file_type = file_type dataset.dbkey = dbkey dataset.datatype = d_type dataset.warnings = [] dataset.metadata = {} dataset.composite_files = {} #load metadata files_metadata = context.get( self.metadata_ref, {} ) for meta_name, meta_spec in d_type.metadata_spec.iteritems(): if meta_spec.set_in_upload: if meta_name in files_metadata: dataset.metadata[ meta_name ] = files_metadata[ meta_name ] dataset_name = None dataset_info = None if dataset.datatype.composite_type == 'auto_primary_file': #replace sniff here with just creating an empty file temp_name, is_multi_byte = sniff.stream_to_file( StringIO.StringIO( d_type.generate_primary_file() ), prefix='upload_auto_primary_file' ) dataset.primary_file = temp_name dataset.space_to_tab = False dataset.precreated_name = dataset.name = 'Uploaded Composite Dataset (%s)' % ( file_type ) else: file_bunch, warnings = get_one_filename( groups_incoming[ 0 ] ) if dataset.datatype.composite_type: precreated_name = 'Uploaded Composite Dataset (%s)' % ( file_type ) writable_files_offset = 1 dataset.primary_file = file_bunch.path dataset.space_to_tab = file_bunch.space_to_tab dataset.precreated_name = file_bunch.precreated_name dataset.name = file_bunch.precreated_name dataset.warnings.extend( file_bunch.warnings ) if dataset.primary_file is None:#remove this before finish, this should create an empty dataset raise Exception( 'No primary dataset file was available for composite upload' ) keys = [ value.name for value in writable_files.values() ] for i, group_incoming in enumerate( groups_incoming[ writable_files_offset : ] ): key = keys[ i + writable_files_offset ] if group_incoming is None and not writable_files[ writable_files.keys()[ keys.index( key ) ] ].optional: dataset.warnings.append( "A required composite file (%s) was not specified." % ( key ) ) dataset.composite_files[ key ] = None else: file_bunch, warnings = get_one_filename( group_incoming ) if file_bunch.path: dataset.composite_files[ key ] = file_bunch.__dict__ else: dataset.composite_files[ key ] = None if not writable_files[ writable_files.keys()[ keys.index( key ) ] ].optional: dataset.warnings.append( "A required composite file (%s) was not specified." % ( key ) ) return [ dataset ] else: datasets = get_filenames( context[ self.name ][0] ) rval = [] for dataset in datasets: dataset.file_type = file_type dataset.datatype = d_type dataset.ext = self.get_datatype_ext( trans, context ) dataset.dbkey = dbkey rval.append( dataset ) return rval
def filter_by(self, **kwds): return Bunch(first=lambda: None)
class Repository(Dictifiable): dict_collection_visible_keys = [ 'id', 'name', 'type', 'remote_repository_url', 'homepage_url', 'description', 'user_id', 'private', 'deleted', 'times_downloaded', 'deprecated' ] dict_element_visible_keys = [ 'id', 'name', 'type', 'remote_repository_url', 'homepage_url', 'description', 'long_description', 'user_id', 'private', 'deleted', 'times_downloaded', 'deprecated' ] file_states = Bunch(NORMAL='n', NEEDS_MERGING='m', MARKED_FOR_REMOVAL='r', MARKED_FOR_ADDITION='a', NOT_TRACKED='?') def __init__(self, id=None, name=None, type=None, remote_repository_url=None, homepage_url=None, description=None, long_description=None, user_id=None, private=False, deleted=None, email_alerts=None, times_downloaded=0, deprecated=False): self.id = id self.name = name or "Unnamed repository" self.type = type self.remote_repository_url = remote_repository_url self.homepage_url = homepage_url self.description = description self.long_description = long_description self.user_id = user_id self.private = private self.deleted = deleted self.email_alerts = email_alerts self.times_downloaded = times_downloaded self.deprecated = deprecated @property def admin_role(self): admin_role_name = '%s_%s_admin' % (str( self.name), str(self.user.username)) for rra in self.roles: role = rra.role if str(role.name) == admin_role_name: return role raise Exception( 'Repository %s owned by %s is not associated with a required administrative role.' % (str(self.name), str(self.user.username))) def allow_push(self, app): repo = hg_util.get_repo_for_repository(app, repository=self) return repo.ui.config('web', 'allow_push') def can_change_type(self, app): # Allow changing the type only if the repository has no contents, has never been installed, or has # never been changed from the default type. if self.is_new(app): return True if self.times_downloaded == 0: return True if self.type == rt_util.UNRESTRICTED: return True return False def can_change_type_to(self, app, new_type_label): if self.type == new_type_label: return False if self.can_change_type(app): new_type = app.repository_types_registry.get_class_by_label( new_type_label) if new_type.is_valid_for_type(app, self): return True return False def get_changesets_for_setting_metadata(self, app): type_class = self.get_type_class(app) return type_class.get_changesets_for_setting_metadata(app, self) def get_repository_dependencies(self, app, changeset, toolshed_url): # We aren't concerned with repositories of type tool_dependency_definition here if a # repository_metadata record is not returned because repositories of this type will never # have repository dependencies. However, if a readme file is uploaded, or some other change # is made that does not create a new downloadable changeset revision but updates the existing # one, we still want to be able to get repository dependencies. repository_metadata = metadata_util.get_current_repository_metadata_for_changeset_revision( app, self, changeset) if repository_metadata: metadata = repository_metadata.metadata if metadata: rb = relation_builder.RelationBuilder(app, self, repository_metadata, toolshed_url) repository_dependencies = rb.get_repository_dependencies_for_changeset_revision( ) if repository_dependencies: return repository_dependencies return None def get_type_class(self, app): return app.repository_types_registry.get_class_by_label(self.type) def get_tool_dependencies(self, app, changeset_revision): changeset_revision = metadata_util.get_next_downloadable_changeset_revision( app, self, changeset_revision) for downloadable_revision in self.downloadable_revisions: if downloadable_revision.changeset_revision == changeset_revision: return downloadable_revision.metadata.get( 'tool_dependencies', {}) return {} def installable_revisions(self, app, sort_revisions=True): return metadata_util.get_metadata_revisions( app, self, sort_revisions=sort_revisions) def is_new(self, app): repo = hg_util.get_repo_for_repository(app, repository=self) tip_ctx = repo.changectx(repo.changelog.tip()) return tip_ctx.rev() < 0 def repo_path(self, app): return app.hgweb_config_manager.get_entry( os.path.join("repos", self.user.username, self.name)) def revision(self, app): repo = hg_util.get_repo_for_repository(app, repository=self) tip_ctx = repo.changectx(repo.changelog.tip()) return "%s:%s" % (str( tip_ctx.rev()), str(repo.changectx(repo.changelog.tip()))) def set_allow_push(self, app, usernames, remove_auth=''): allow_push = util.listify(self.allow_push(app)) if remove_auth: allow_push.remove(remove_auth) else: for username in util.listify(usernames): if username not in allow_push: allow_push.append(username) allow_push = '%s\n' % ','.join(allow_push) repo = hg_util.get_repo_for_repository(app, repository=self) # Why doesn't the following work? # repo.ui.setconfig( 'web', 'allow_push', allow_push ) lines = repo.opener('hgrc', 'rb').readlines() fp = repo.opener('hgrc', 'wb') for line in lines: if line.startswith('allow_push'): fp.write('allow_push = %s' % allow_push) else: fp.write(line) fp.close() def tip(self, app): repo = hg_util.get_repo_for_repository(app, repository=self) return str(repo.changectx(repo.changelog.tip())) def to_dict(self, view='collection', value_mapper=None): rval = super(Repository, self).to_dict(view=view, value_mapper=value_mapper) if 'user_id' in rval: rval['owner'] = self.user.username return rval
) if self.area: as_dict["area"] = True if self.input_type == INPUT_TYPE.INTEGER: as_dict["value"] = "0" if self.input_type == INPUT_TYPE.FLOAT: as_dict["value"] = "0.0" elif self.input_type == INPUT_TYPE.DATA_COLLECTON: as_dict["collection_type"] = self.collection_type return as_dict OUTPUT_TYPE = Bunch( GLOB="glob", STDOUT="stdout", ) # TODO: Different subclasses - this is representing different types of things. class OutputInstance: def __init__(self, name, output_data_type, output_type, path=None, fields=None): self.name = name self.output_data_type = output_data_type self.output_type = output_type self.path = path
from galaxy import config, tools, web, util from galaxy.web.base.controller import BaseController, BaseAPIController from galaxy.util.bunch import Bunch messages = Bunch(NO_TOOL="no tool") class ToolsController(BaseAPIController): """ RESTful controller for interactions with tools. """ @web.expose_api def index(self, trans, **kwds): """ GET /api/tools: returns a list of tools defined by parameters parameters: in_panel - if true, tools are returned in panel structure, including sections and labels trackster - if true, only tools that are compatible with Trackster are returned """ # Read params. in_panel = util.string_as_bool(kwds.get('in_panel', 'True')) trackster = util.string_as_bool(kwds.get('trackster', 'False')) # Create return value. return self.app.toolbox.to_dict(trans, in_panel=in_panel, trackster=trackster)
""" There is some shared logic between matching/multiplying inputs in workflows and tools. This module is meant to capture some general permutation logic that can be applicable for both cases but will only be used in the newer tools case first. Maybe this doesn't make sense and maybe much of this stuff could be replaced with itertools product and permutations. These are open questions. """ from galaxy.exceptions import MessageException from galaxy.util.bunch import Bunch input_classification = Bunch( SINGLE="single", MATCHED="matched", MULTIPLIED="multiplied", ) class InputMatchedException(MessageException): """ Indicates problem matching inputs while building up inputs permutations. """ def expand_multi_inputs(inputs, classifier, key_filter=None): key_filter = key_filter or (lambda x: True) single_inputs, matched_multi_inputs, multiplied_multi_inputs = __split_inputs( inputs, classifier, key_filter )
def encode_runtime_state(self, trans, state): fake_tool = Bunch(inputs=self.get_runtime_inputs()) return state.encode(fake_tool, trans.app)
def main(): parser = optparse.OptionParser() parser.add_option( '-b', '--buffer', dest='buffer', type='int', default=1000000, help= 'Number of lines to buffer at a time. Default: 1,000,000 lines. A buffer of 0 will attempt to use memory only.' ) parser.add_option( '-d', '--index_depth', dest='index_depth', type='int', default=3, help='Depth to use on filebased offset indexing. Default: 3.') parser.add_option( '-p', '--keep_partial', action='store_true', dest='keep_partial', default=False, help='Keep rows in first input which are missing identifiers.') parser.add_option( '-u', '--keep_unmatched', action='store_true', dest='keep_unmatched', default=False, help= 'Keep rows in first input which are not joined with the second input.') parser.add_option( '-f', '--fill_options_file', dest='fill_options_file', type='str', default=None, help='Fill empty columns with a values from a JSONified file.') options, args = parser.parse_args() fill_options = None if options.fill_options_file is not None: try: fill_options = Bunch(**stringify_dictionary_keys( json.load(open(options.fill_options_file)) )) # json.load( open( options.fill_options_file ) ) except Exception as e: print("Warning: Ignoring fill options due to json error (%s)." % e) if fill_options is None: fill_options = Bunch() if 'fill_unjoined_only' not in fill_options: fill_options.fill_unjoined_only = True if 'file1_columns' not in fill_options: fill_options.file1_columns = None if 'file2_columns' not in fill_options: fill_options.file2_columns = None try: filename1 = args[0] filename2 = args[1] column1 = int(args[2]) - 1 column2 = int(args[3]) - 1 out_filename = args[4] except: print("Error parsing command line.", file=sys.stderr) sys.exit() # Character for splitting fields and joining lines split = "\t" return join_files(filename1, column1, filename2, column2, out_filename, split, options.buffer, options.keep_unmatched, options.keep_partial, options.index_depth, fill_options=fill_options)
def decode_runtime_state(self, trans, string): fake_tool = Bunch(inputs=self.get_runtime_inputs()) state = galaxy.tools.DefaultToolState() state.decode(string, fake_tool, trans.app) return state
class ToolShedRepository(object): dict_collection_visible_keys = ['id', 'tool_shed', 'name', 'owner', 'installed_changeset_revision', 'changeset_revision', 'ctx_rev', 'includes_datatypes', 'tool_shed_status', 'deleted', 'uninstalled', 'dist_to_shed', 'status', 'error_message'] dict_element_visible_keys = ['id', 'tool_shed', 'name', 'owner', 'installed_changeset_revision', 'changeset_revision', 'ctx_rev', 'includes_datatypes', 'tool_shed_status', 'deleted', 'uninstalled', 'dist_to_shed', 'status', 'error_message'] installation_status = Bunch(NEW='New', CLONING='Cloning', SETTING_TOOL_VERSIONS='Setting tool versions', INSTALLING_REPOSITORY_DEPENDENCIES='Installing repository dependencies', INSTALLING_TOOL_DEPENDENCIES='Installing tool dependencies', LOADING_PROPRIETARY_DATATYPES='Loading proprietary datatypes', INSTALLED='Installed', DEACTIVATED='Deactivated', ERROR='Error', UNINSTALLED='Uninstalled') states = Bunch(INSTALLING='running', OK='ok', WARNING='queued', ERROR='error', UNINSTALLED='deleted_new') def __init__(self, id=None, create_time=None, tool_shed=None, name=None, description=None, owner=None, installed_changeset_revision=None, changeset_revision=None, ctx_rev=None, metadata=None, includes_datatypes=False, tool_shed_status=None, deleted=False, uninstalled=False, dist_to_shed=False, status=None, error_message=None): self.id = id self.create_time = create_time self.tool_shed = tool_shed self.name = name self.description = description self.owner = owner self.installed_changeset_revision = installed_changeset_revision self.changeset_revision = changeset_revision self.ctx_rev = ctx_rev self.metadata = metadata self.includes_datatypes = includes_datatypes self.tool_shed_status = tool_shed_status self.deleted = deleted self.uninstalled = uninstalled self.dist_to_shed = dist_to_shed self.status = status self.error_message = error_message def as_dict(self, value_mapper=None): return self.to_dict(view='element', value_mapper=value_mapper) @property def can_install(self): return self.status == self.installation_status.NEW @property def can_reset_metadata(self): return self.status == self.installation_status.INSTALLED @property def can_uninstall(self): return self.status != self.installation_status.UNINSTALLED @property def can_deactivate(self): return self.status not in [self.installation_status.DEACTIVATED, self.installation_status.ERROR, self.installation_status.UNINSTALLED] @property def can_reinstall_or_activate(self): return self.deleted def get_sharable_url(self, app): tool_shed_url = common_util.get_tool_shed_url_from_tool_shed_registry(app, self.tool_shed) if tool_shed_url: # Append a slash to the tool shed URL, because urlparse.urljoin will eliminate # the last part of a URL if it does not end with a forward slash. tool_shed_url = '%s/' % tool_shed_url return urljoin(tool_shed_url, 'view/%s/%s' % (self.owner, self.name)) return tool_shed_url def get_shed_config_filename(self): shed_config_filename = None if self.metadata: shed_config_filename = self.metadata.get('shed_config_filename', shed_config_filename) return shed_config_filename def get_shed_config_dict(self, app, default=None): """ Return the in-memory version of the shed_tool_conf file, which is stored in the config_elems entry in the shed_tool_conf_dict. """ def _is_valid_shed_config_filename(filename): for shed_tool_conf_dict in app.toolbox.dynamic_confs(include_migrated_tool_conf=True): if filename == shed_tool_conf_dict['config_filename']: return True return False if not self.shed_config_filename or not _is_valid_shed_config_filename(self.shed_config_filename): self.guess_shed_config(app, default=default) if self.shed_config_filename: for shed_tool_conf_dict in app.toolbox.dynamic_confs(include_migrated_tool_conf=True): if self.shed_config_filename == shed_tool_conf_dict['config_filename']: return shed_tool_conf_dict return default def get_tool_relative_path(self, app): shed_conf_dict = self.get_shed_config_dict(app) tool_path = None relative_path = None if shed_conf_dict: tool_path = shed_conf_dict['tool_path'] relative_path = os.path.join(self.tool_shed_path_name, 'repos', self.owner, self.name, self.installed_changeset_revision) return tool_path, relative_path def guess_shed_config(self, app, default=None): tool_ids = [] metadata = self.metadata or {} for tool in metadata.get('tools', []): tool_ids.append(tool.get('guid')) for shed_tool_conf_dict in app.toolbox.dynamic_confs(include_migrated_tool_conf=True): name = shed_tool_conf_dict['config_filename'] for elem in shed_tool_conf_dict['config_elems']: if elem.tag == 'tool': for sub_elem in elem.findall('id'): tool_id = sub_elem.text.strip() if tool_id in tool_ids: self.shed_config_filename = name return shed_tool_conf_dict elif elem.tag == "section": for tool_elem in elem.findall('tool'): for sub_elem in tool_elem.findall('id'): tool_id = sub_elem.text.strip() if tool_id in tool_ids: self.shed_config_filename = name return shed_tool_conf_dict if self.includes_datatypes or self.includes_data_managers: # We need to search by file paths here, which is less desirable. tool_shed = common_util.remove_protocol_and_port_from_tool_shed_url(self.tool_shed) for shed_tool_conf_dict in app.toolbox.dynamic_confs(include_migrated_tool_conf=True): tool_path = shed_tool_conf_dict['tool_path'] relative_path = os.path.join(tool_path, tool_shed, 'repos', self.owner, self.name, self.installed_changeset_revision) if os.path.exists(relative_path): self.shed_config_filename = shed_tool_conf_dict['config_filename'] return shed_tool_conf_dict return default @property def has_readme_files(self): if self.metadata: return 'readme_files' in self.metadata return False @property def has_repository_dependencies(self): if self.metadata: repository_dependencies_dict = self.metadata.get('repository_dependencies', {}) repository_dependencies = repository_dependencies_dict.get('repository_dependencies', []) # [["http://localhost:9009", "package_libgtextutils_0_6", "test", "e2003cbf18cd", "True", "True"]] for rd_tup in repository_dependencies: tool_shed, name, owner, changeset_revision, prior_installation_required, only_if_compiling_contained_td = \ common_util.parse_repository_dependency_tuple(rd_tup) if not asbool(only_if_compiling_contained_td): return True return False @property def has_repository_dependencies_only_if_compiling_contained_td(self): if self.metadata: repository_dependencies_dict = self.metadata.get('repository_dependencies', {}) repository_dependencies = repository_dependencies_dict.get('repository_dependencies', []) # [["http://localhost:9009", "package_libgtextutils_0_6", "test", "e2003cbf18cd", "True", "True"]] for rd_tup in repository_dependencies: tool_shed, name, owner, changeset_revision, prior_installation_required, only_if_compiling_contained_td = \ common_util.parse_repository_dependency_tuple(rd_tup) if not asbool(only_if_compiling_contained_td): return False return True return False @property def in_error_state(self): return self.status == self.installation_status.ERROR @property def includes_data_managers(self): if self.metadata: return bool(len(self.metadata.get('data_manager', {}).get('data_managers', {}))) return False @property def includes_tools(self): if self.metadata: return 'tools' in self.metadata return False @property def includes_tools_for_display_in_tool_panel(self): if self.includes_tools: tool_dicts = self.metadata['tools'] for tool_dict in tool_dicts: if tool_dict.get('add_to_tool_panel', True): return True return False @property def includes_tool_dependencies(self): if self.metadata: return 'tool_dependencies' in self.metadata return False @property def includes_workflows(self): if self.metadata: return 'workflows' in self.metadata return False @property def installed_repository_dependencies(self): """Return the repository's repository dependencies that are currently installed.""" installed_required_repositories = [] for required_repository in self.repository_dependencies: if required_repository.status == self.installation_status.INSTALLED: installed_required_repositories.append(required_repository) return installed_required_repositories @property def installed_tool_dependencies(self): """Return the repository's tool dependencies that are currently installed, but possibly in an error state.""" installed_dependencies = [] for tool_dependency in self.tool_dependencies: if tool_dependency.status in [ToolDependency.installation_status.INSTALLED]: installed_dependencies.append(tool_dependency) return installed_dependencies @property def is_deprecated_in_tool_shed(self): if self.tool_shed_status: return asbool(self.tool_shed_status.get('repository_deprecated', False)) return False @property def is_deactivated_or_installed(self): return self.status in [self.installation_status.DEACTIVATED, self.installation_status.INSTALLED] @property def is_installed(self): return self.status == self.installation_status.INSTALLED @property def is_latest_installable_revision(self): if self.tool_shed_status: return asbool(self.tool_shed_status.get('latest_installable_revision', False)) return False @property def is_new(self): return self.status == self.installation_status.NEW @property def missing_repository_dependencies(self): """Return the repository's repository dependencies that are not currently installed, and may not ever have been installed.""" missing_required_repositories = [] for required_repository in self.repository_dependencies: if required_repository.status not in [self.installation_status.INSTALLED]: missing_required_repositories.append(required_repository) return missing_required_repositories @property def missing_tool_dependencies(self): """Return the repository's tool dependencies that are not currently installed, and may not ever have been installed.""" missing_dependencies = [] for tool_dependency in self.tool_dependencies: if tool_dependency.status not in [ToolDependency.installation_status.INSTALLED]: missing_dependencies.append(tool_dependency) return missing_dependencies def repo_files_directory(self, app): repo_path = self.repo_path(app) if repo_path: return os.path.join(repo_path, self.name) return None def repo_path(self, app): tool_shed = common_util.remove_protocol_and_port_from_tool_shed_url(self.tool_shed) for shed_tool_conf_dict in app.toolbox.dynamic_confs(include_migrated_tool_conf=True): tool_path = shed_tool_conf_dict['tool_path'] relative_path = os.path.join(tool_path, tool_shed, 'repos', self.owner, self.name, self.installed_changeset_revision) if os.path.exists(relative_path): return relative_path return None @property def repository_dependencies(self): """ Return all of this repository's repository dependencies, ignoring their attributes like prior_installation_required and only_if_compiling_contained_td. """ required_repositories = [] for rrda in self.required_repositories: repository_dependency = rrda.repository_dependency required_repository = repository_dependency.repository if required_repository: required_repositories.append(required_repository) return required_repositories @property def repository_dependencies_being_installed(self): """Return the repository's repository dependencies that are currently being installed.""" required_repositories_being_installed = [] for required_repository in self.repository_dependencies: if required_repository.status in [self.installation_status.CLONING, self.installation_status.INSTALLING_REPOSITORY_DEPENDENCIES, self.installation_status.INSTALLING_TOOL_DEPENDENCIES, self.installation_status.LOADING_PROPRIETARY_DATATYPES, self.installation_status.SETTING_TOOL_VERSIONS]: required_repositories_being_installed.append(required_repository) return required_repositories_being_installed @property def repository_dependencies_missing_or_being_installed(self): """Return the repository's repository dependencies that are either missing or currently being installed.""" required_repositories_missing_or_being_installed = [] for required_repository in self.repository_dependencies: if required_repository.status in [self.installation_status.ERROR, self.installation_status.INSTALLING, self.installation_status.NEVER_INSTALLED, self.installation_status.UNINSTALLED]: required_repositories_missing_or_being_installed.append(required_repository) return required_repositories_missing_or_being_installed @property def repository_dependencies_with_installation_errors(self): """Return the repository's repository dependencies that have installation errors.""" required_repositories_with_installation_errors = [] for required_repository in self.repository_dependencies: if required_repository.status == self.installation_status.ERROR: required_repositories_with_installation_errors.append(required_repository) return required_repositories_with_installation_errors @property def requires_prior_installation_of(self): """ Return a list of repository dependency tuples like (tool_shed, name, owner, changeset_revision, prior_installation_required) for this repository's repository dependencies where prior_installation_required is True. By definition, repository dependencies are required to be installed in order for this repository to function correctly. However, those repository dependencies that are defined for this repository with prior_installation_required set to True place them in a special category in that the required repositories must be installed before this repository is installed. Among other things, this enables these "special" repository dependencies to include information that enables the successful installation of this repository. This method is not used during the initial installation of this repository, but only after it has been installed (metadata must be set for this repository in order for this method to be useful). """ required_rd_tups_that_must_be_installed = [] if self.has_repository_dependencies: rd_tups = self.metadata['repository_dependencies']['repository_dependencies'] for rd_tup in rd_tups: if len(rd_tup) == 5: tool_shed, name, owner, changeset_revision, prior_installation_required, only_if_compiling_contained_td = \ common_util.parse_repository_dependency_tuple(rd_tup, contains_error=False) if asbool(prior_installation_required): required_rd_tups_that_must_be_installed.append((tool_shed, name, owner, changeset_revision, 'True', 'False')) elif len(rd_tup) == 6: tool_shed, name, owner, changeset_revision, prior_installation_required, only_if_compiling_contained_td = \ common_util.parse_repository_dependency_tuple(rd_tup, contains_error=False) # The repository dependency will only be required to be previously installed if it does not fall into the category of # a repository that must be installed only so that its contained tool dependency can be used for compiling the tool # dependency of the dependent repository. if not asbool(only_if_compiling_contained_td): if asbool(prior_installation_required): required_rd_tups_that_must_be_installed.append((tool_shed, name, owner, changeset_revision, 'True', 'False')) return required_rd_tups_that_must_be_installed @property def revision_update_available(self): # This method should be named update_available, but since it is no longer possible to drop a table column using migration scripts # with the sqlite database (see ~/galaxy/model/migrate/versions/0016_drop_update_available_col_add_tool_shed_status_col.py), we # have to name it in such a way that it will not conflict with the eliminated tool_shed_repository.update_available column (which # cannot be eliminated if using the sqlite database). if self.tool_shed_status: return asbool(self.tool_shed_status.get('revision_update', False)) return False def set_shed_config_filename(self, value): self.metadata['shed_config_filename'] = value shed_config_filename = property(get_shed_config_filename, set_shed_config_filename) def to_dict(self, view='collection', value_mapper=None): if value_mapper is None: value_mapper = {} rval = {} try: visible_keys = self.__getattribute__('dict_' + view + '_visible_keys') except AttributeError: raise Exception('Unknown API view: %s' % view) for key in visible_keys: try: rval[key] = self.__getattribute__(key) if key in value_mapper: rval[key] = value_mapper.get(key, rval[key]) except AttributeError: rval[key] = None return rval @property def tool_dependencies_being_installed(self): dependencies_being_installed = [] for tool_dependency in self.tool_dependencies: if tool_dependency.status == ToolDependency.installation_status.INSTALLING: dependencies_being_installed.append(tool_dependency) return dependencies_being_installed @property def tool_dependencies_installed_or_in_error(self): """Return the repository's tool dependencies that are currently installed, but possibly in an error state.""" installed_dependencies = [] for tool_dependency in self.tool_dependencies: if tool_dependency.status in [ToolDependency.installation_status.INSTALLED, ToolDependency.installation_status.ERROR]: installed_dependencies.append(tool_dependency) return installed_dependencies @property def tool_dependencies_missing_or_being_installed(self): dependencies_missing_or_being_installed = [] for tool_dependency in self.tool_dependencies: if tool_dependency.status in [ToolDependency.installation_status.ERROR, ToolDependency.installation_status.INSTALLING, ToolDependency.installation_status.NEVER_INSTALLED, ToolDependency.installation_status.UNINSTALLED]: dependencies_missing_or_being_installed.append(tool_dependency) return dependencies_missing_or_being_installed @property def tool_dependencies_with_installation_errors(self): dependencies_with_installation_errors = [] for tool_dependency in self.tool_dependencies: if tool_dependency.status == ToolDependency.installation_status.ERROR: dependencies_with_installation_errors.append(tool_dependency) return dependencies_with_installation_errors @property def tool_shed_path_name(self): tool_shed_url = self.tool_shed if tool_shed_url.find(':') > 0: # Eliminate the port, if any, since it will result in an invalid directory name. tool_shed_url = tool_shed_url.split(':')[0] return tool_shed_url.rstrip('/') @property def tuples_of_repository_dependencies_needed_for_compiling_td(self): """ Return tuples defining this repository's repository dependencies that are necessary only for compiling this repository's tool dependencies. """ rd_tups_of_repositories_needed_for_compiling_td = [] if self.metadata: repository_dependencies = self.metadata.get('repository_dependencies', None) rd_tups = repository_dependencies['repository_dependencies'] for rd_tup in rd_tups: if len(rd_tup) == 6: tool_shed, name, owner, changeset_revision, prior_installation_required, only_if_compiling_contained_td = rd_tup if asbool(only_if_compiling_contained_td): rd_tups_of_repositories_needed_for_compiling_td.append((tool_shed, name, owner, changeset_revision, 'False', 'True')) return rd_tups_of_repositories_needed_for_compiling_td @property def uninstalled_repository_dependencies(self): """Return the repository's repository dependencies that have been uninstalled.""" uninstalled_required_repositories = [] for required_repository in self.repository_dependencies: if required_repository.status == self.installation_status.UNINSTALLED: uninstalled_required_repositories.append(required_repository) return uninstalled_required_repositories @property def uninstalled_tool_dependencies(self): """Return the repository's tool dependencies that have been uninstalled.""" uninstalled_tool_dependencies = [] for tool_dependency in self.tool_dependencies: if tool_dependency.status == ToolDependency.installation_status.UNINSTALLED: uninstalled_tool_dependencies.append(tool_dependency) return uninstalled_tool_dependencies @property def upgrade_available(self): if self.tool_shed_status: if self.is_deprecated_in_tool_shed: # Only allow revision upgrades if the repository is not deprecated in the tool shed. return False return asbool(self.tool_shed_status.get('revision_upgrade', False)) return False
STAGING_ACTION_REMOTE = "remote" STAGING_ACTION_LOCAL = "local" STAGING_ACTION_NONE = None STAGING_ACTION_DEFAULT = "default" # Poor man's enum. path_type = Bunch( # Galaxy input datasets and extra files. INPUT="input", # Galaxy config and param files. CONFIG="config", # Files from tool's tool_dir (for now just wrapper if available). TOOL="tool", # Input work dir files - e.g. metadata files, task-split input files, etc.. WORKDIR="workdir", # Galaxy output datasets in their final home. OUTPUT="output", # Galaxy from_work_dir output paths and other files (e.g. galaxy.json) OUTPUT_WORKDIR="output_workdir", # Other fixed tool parameter paths (likely coming from tool data, but not # nessecarily). Not sure this is the best name... UNSTRUCTURED="unstructured", ) ACTION_DEFAULT_PATH_TYPES = [ path_type.INPUT, path_type.CONFIG, path_type.TOOL, path_type.WORKDIR, path_type.OUTPUT,
def get_uploaded_datasets(self, trans, context, override_name=None, override_info=None): def get_data_file_filename(data_file, override_name=None, override_info=None, purge=True): dataset_name = override_name def get_file_name(file_name): file_name = file_name.split('\\')[-1] file_name = file_name.split('/')[-1] return file_name try: # Use the existing file if not dataset_name and 'filename' in data_file: dataset_name = get_file_name(data_file['filename']) return Bunch(type='file', path=data_file['local_filename'], name=dataset_name, purge_source=purge) except Exception: # The uploaded file should've been persisted by the upload tool action return Bunch(type=None, path=None, name=None) def get_url_paste_urls_or_filename(group_incoming, override_name=None, override_info=None): url_paste_file = group_incoming.get('url_paste', None) if url_paste_file is not None: url_paste = open(url_paste_file, 'r').read() def start_of_url(content): start_of_url_paste = content.lstrip()[0:8].lower() looks_like_url = False for url_prefix in ["http://", "https://", "ftp://", "file://"]: if start_of_url_paste.startswith(url_prefix): looks_like_url = True break return looks_like_url if start_of_url(url_paste): url_paste = url_paste.replace('\r', '').split('\n') for line in url_paste: line = line.strip() if line: if not start_of_url(line): continue # non-url line, ignore if "file://" in line: if not trans.user_is_admin: raise AdminRequiredException() elif not trans.app.config.allow_path_paste: raise ConfigDoesNotAllowException() upload_path = line[len("file://"):] dataset_name = os.path.basename(upload_path) else: dataset_name = line if override_name: dataset_name = override_name yield Bunch(type='url', path=line, name=dataset_name) else: dataset_name = 'Pasted Entry' # we need to differentiate between various url pastes here if override_name: dataset_name = override_name yield Bunch(type='file', path=url_paste_file, name=dataset_name) def get_one_filename(context): data_file = context['file_data'] url_paste = context['url_paste'] ftp_files = context['ftp_files'] name = context.get('NAME', None) info = context.get('INFO', None) uuid = context.get('uuid', None) or None # Turn '' to None file_type = context.get('file_type', None) dbkey = self.get_dbkey(context) warnings = [] to_posix_lines = False if context.get('to_posix_lines', None) not in ["None", None, False]: to_posix_lines = True auto_decompress = False if context.get('auto_decompress', None) not in ["None", None, False]: auto_decompress = True space_to_tab = False if context.get('space_to_tab', None) not in ["None", None, False]: space_to_tab = True file_bunch = get_data_file_filename(data_file, override_name=name, override_info=info) if file_bunch.path: if url_paste is not None and url_paste.strip(): warnings.append("All file contents specified in the paste box were ignored.") if ftp_files: warnings.append("All FTP uploaded file selections were ignored.") elif url_paste is not None and url_paste.strip(): # we need to use url_paste for file_bunch in get_url_paste_urls_or_filename(context, override_name=name, override_info=info): if file_bunch.path: break if file_bunch.path and ftp_files is not None: warnings.append("All FTP uploaded file selections were ignored.") elif ftp_files is not None and trans.user is not None: # look for files uploaded via FTP user_ftp_dir = trans.user_ftp_dir assert not os.path.islink(user_ftp_dir), "User FTP directory cannot be a symbolic link" for (dirpath, dirnames, filenames) in os.walk(user_ftp_dir): for filename in filenames: for ftp_filename in ftp_files: if ftp_filename == filename: path = relpath(os.path.join(dirpath, filename), user_ftp_dir) if not os.path.islink(os.path.join(dirpath, filename)): ftp_data_file = {'local_filename' : os.path.abspath(os.path.join(user_ftp_dir, path)), 'filename' : os.path.basename(path)} purge = getattr(trans.app.config, 'ftp_upload_purge', True) file_bunch = get_data_file_filename( ftp_data_file, override_name=name, override_info=info, purge=purge, ) if file_bunch.path: break if file_bunch.path: break if file_bunch.path: break file_bunch.to_posix_lines = to_posix_lines file_bunch.auto_decompress = auto_decompress file_bunch.space_to_tab = space_to_tab file_bunch.uuid = uuid if file_type is not None: file_bunch.file_type = file_type if dbkey is not None: file_bunch.dbkey = dbkey return file_bunch, warnings def get_filenames(context): rval = [] data_file = context['file_data'] ftp_files = context['ftp_files'] uuid = context.get('uuid', None) or None # Turn '' to None name = context.get('NAME', None) info = context.get('INFO', None) file_type = context.get('file_type', None) dbkey = self.get_dbkey(context) to_posix_lines = False if context.get('to_posix_lines', None) not in ["None", None, False]: to_posix_lines = True auto_decompress = False if context.get('auto_decompress', None) not in ["None", None, False]: auto_decompress = True space_to_tab = False if context.get('space_to_tab', None) not in ["None", None, False]: space_to_tab = True file_bunch = get_data_file_filename(data_file, override_name=name, override_info=info) file_bunch.uuid = uuid if file_bunch.path: file_bunch.to_posix_lines = to_posix_lines file_bunch.auto_decompress = auto_decompress file_bunch.space_to_tab = space_to_tab if file_type is not None: file_bunch.file_type = file_type if dbkey is not None: file_bunch.dbkey = dbkey rval.append(file_bunch) for file_bunch in get_url_paste_urls_or_filename(context, override_name=name, override_info=info): if file_bunch.path: file_bunch.uuid = uuid file_bunch.to_posix_lines = to_posix_lines file_bunch.auto_decompress = auto_decompress file_bunch.space_to_tab = space_to_tab if file_type is not None: file_bunch.file_type = file_type if dbkey is not None: file_bunch.dbkey = dbkey rval.append(file_bunch) # look for files uploaded via FTP valid_files = [] if ftp_files is not None: # Normalize input paths to ensure utf-8 encoding is normal form c. # This allows for comparison when the filesystem uses a different encoding than the browser. ftp_files = [unicodedata.normalize('NFC', f) for f in ftp_files if isinstance(f, text_type)] if trans.user is None: log.warning('Anonymous user passed values in ftp_files: %s' % ftp_files) ftp_files = [] # TODO: warning to the user (could happen if session has become invalid) else: user_ftp_dir = trans.user_ftp_dir assert not os.path.islink(user_ftp_dir), "User FTP directory cannot be a symbolic link" for (dirpath, dirnames, filenames) in os.walk(user_ftp_dir): for filename in filenames: path = relpath(os.path.join(dirpath, filename), user_ftp_dir) if not os.path.islink(os.path.join(dirpath, filename)): # Normalize filesystem paths if isinstance(path, text_type): valid_files.append(unicodedata.normalize('NFC', path)) else: valid_files.append(path) else: ftp_files = [] for ftp_file in ftp_files: if ftp_file not in valid_files: log.warning('User passed an invalid file path in ftp_files: %s' % ftp_file) continue # TODO: warning to the user (could happen if file is already imported) ftp_data_file = {'local_filename' : os.path.abspath(os.path.join(user_ftp_dir, ftp_file)), 'filename' : os.path.basename(ftp_file)} purge = getattr(trans.app.config, 'ftp_upload_purge', True) file_bunch = get_data_file_filename(ftp_data_file, override_name=name, override_info=info, purge=purge) if file_bunch.path: file_bunch.to_posix_lines = to_posix_lines file_bunch.auto_decompress = auto_decompress file_bunch.space_to_tab = space_to_tab if file_type is not None: file_bunch.file_type = file_type if dbkey is not None: file_bunch.dbkey = dbkey rval.append(file_bunch) return rval file_type = self.get_file_type(context) file_count = self.get_file_count(trans, context) d_type = self.get_datatype(trans, context) dbkey = self.get_dbkey(context) tag_using_filenames = context.get('tag_using_filenames', False) force_composite = asbool(context.get('force_composite', 'False')) writable_files = d_type.writable_files writable_files_offset = 0 groups_incoming = [None for _ in range(file_count)] for group_incoming in context.get(self.name, []): i = int(group_incoming['__index__']) groups_incoming[i] = group_incoming if d_type.composite_type is not None or force_composite: # handle uploading of composite datatypes # Only one Dataset can be created dataset = Bunch() dataset.type = 'composite' dataset.file_type = file_type dataset.dbkey = dbkey dataset.datatype = d_type dataset.warnings = [] dataset.metadata = {} dataset.composite_files = {} dataset.uuid = None dataset.tag_using_filenames = None # load metadata files_metadata = context.get(self.metadata_ref, {}) metadata_name_substition_default_dict = dict((composite_file.substitute_name_with_metadata, d_type.metadata_spec[composite_file.substitute_name_with_metadata].default) for composite_file in d_type.composite_files.values() if composite_file.substitute_name_with_metadata) for meta_name, meta_spec in d_type.metadata_spec.items(): if meta_spec.set_in_upload: if meta_name in files_metadata: meta_value = files_metadata[meta_name] if meta_name in metadata_name_substition_default_dict: meta_value = sanitize_for_filename(meta_value, default=metadata_name_substition_default_dict[meta_name]) dataset.metadata[meta_name] = meta_value dataset.name = self.get_composite_dataset_name(context) if dataset.datatype.composite_type == 'auto_primary_file': # replace sniff here with just creating an empty file temp_name = sniff.stream_to_file(StringIO(d_type.generate_primary_file(dataset)), prefix='upload_auto_primary_file') dataset.primary_file = temp_name dataset.to_posix_lines = True dataset.auto_decompress = True dataset.space_to_tab = False else: file_bunch, warnings = get_one_filename(groups_incoming[0]) writable_files_offset = 1 dataset.primary_file = file_bunch.path dataset.to_posix_lines = file_bunch.to_posix_lines dataset.auto_decompress = file_bunch.auto_decompress dataset.space_to_tab = file_bunch.space_to_tab if file_bunch.file_type: dataset.file_type = file_type if file_bunch.dbkey: dataset.dbkey = dbkey dataset.warnings.extend(warnings) if dataset.primary_file is None: # remove this before finish, this should create an empty dataset raise Exception('No primary dataset file was available for composite upload') if not force_composite: keys = [value.name for value in writable_files.values()] else: keys = [str(index) for index in range(file_count)] for i, group_incoming in enumerate(groups_incoming[writable_files_offset:]): key = keys[i + writable_files_offset] if not force_composite and group_incoming is None and not writable_files[list(writable_files.keys())[keys.index(key)]].optional: dataset.warnings.append("A required composite file (%s) was not specified." % (key)) dataset.composite_files[key] = None else: file_bunch, warnings = get_one_filename(group_incoming) dataset.warnings.extend(warnings) if file_bunch.path: if force_composite: key = group_incoming.get("NAME") or i dataset.composite_files[key] = file_bunch.__dict__ elif not force_composite: dataset.composite_files[key] = None if not writable_files[list(writable_files.keys())[keys.index(key)]].optional: dataset.warnings.append("A required composite file (%s) was not specified." % (key)) return [dataset] else: rval = [] for i, file_contexts in enumerate(context[self.name]): datasets = get_filenames(file_contexts) for dataset in datasets: override_file_type = self.get_file_type(context[self.name][i], parent_context=context) d_type = self.get_datatype(trans, context[self.name][i], parent_context=context) dataset.file_type = override_file_type dataset.datatype = d_type dataset.ext = self.get_datatype_ext(trans, context[self.name][i], parent_context=context) dataset.dbkey = self.get_dbkey(context[self.name][i], parent_context=context) dataset.tag_using_filenames = tag_using_filenames rval.append(dataset) return rval
def test_pulsar_objectstore(self): # Define real object store used by Pulsar server. object_store_config_file = join(self.temp_directory, "object_store_conf.xml") with open(object_store_config_file, "w") as configf: config_template = Template("""<?xml version="1.0"?> <object_store type="disk"> <files_dir path="${temp_directory}"/> <extra_dir type="temp" path="${temp_directory}"/> <extra_dir type="job_work" path="${temp_directory}"/> </object_store> """) config_contents = config_template.safe_substitute( temp_directory=self.temp_directory) configf.write(config_contents) app_conf = dict( object_store_config_file=object_store_config_file, private_token="12345", ) from .test_utils import test_pulsar_server with test_pulsar_server(app_conf=app_conf) as server: url = server.application_url # Define a proxy Pulsar object store. proxy_object_store_config_file = join( self.temp_directory, "proxy_object_store_conf.xml") with open(proxy_object_store_config_file, "w") as configf: config_template = Template("""<?xml version="1.0"?> <object_store type="pulsar" url="$url" private_token="12345" transport="urllib"> <!-- private_token is optional - see Pulsar documentation for more information. --> <!-- transport is optional, set to curl to use libcurl instead of urllib for communication with Pulsar. --> </object_store> """) contents = config_template.safe_substitute(url=url) configf.write(contents) config = Bunch( object_store_config_file=proxy_object_store_config_file) object_store = build_object_store_from_config(config=config) # Test no dataset with id 1 exists. absent_dataset = MockDataset(1) assert not object_store.exists(absent_dataset) # Write empty dataset 2 in second backend, ensure it is empty and # exists. empty_dataset = MockDataset(2) self.__write(b"", "000/dataset_2.dat") assert object_store.exists(empty_dataset) assert object_store.empty(empty_dataset) # Write non-empty dataset in backend 1, test it is not emtpy & exists. hello_world_dataset = MockDataset(3) self.__write(b"Hello World!", "000/dataset_3.dat") assert object_store.exists(hello_world_dataset) assert not object_store.empty(hello_world_dataset) # Test get_data data = object_store.get_data(hello_world_dataset) assert data == b"Hello World!" data = object_store.get_data(hello_world_dataset, start=1, count=6) assert data == b"ello W" # Test Size # Test absent and empty datasets yield size of 0. assert object_store.size(absent_dataset) == 0 assert object_store.size(empty_dataset) == 0 # Elsewise assert object_store.size( hello_world_dataset ) > 0 # Should this always be the number of bytes? # Test percent used (to some degree) percent_store_used = object_store.get_store_usage_percent() assert percent_store_used > 0.0 assert percent_store_used < 100.0 # Test update_from_file test output_dataset = MockDataset(4) output_real_path = join(self.temp_directory, "000", "dataset_4.dat") assert not exists(output_real_path) output_working_path = self.__write( b"NEW CONTENTS", "job_working_directory1/example_output") object_store.update_from_file(output_dataset, file_name=output_working_path, create=True) assert exists(output_real_path) # Test delete to_delete_dataset = MockDataset(5) to_delete_real_path = self.__write(b"content to be deleted!", "000/dataset_5.dat") assert object_store.exists(to_delete_dataset) assert object_store.delete(to_delete_dataset) assert not object_store.exists(to_delete_dataset) assert not exists(to_delete_real_path) # Test json content. complex_contents_dataset = MockDataset(6) complex_content = b'{"a":6}' self.__write(complex_content, "000/dataset_6.dat") assert object_store.exists(complex_contents_dataset) data = object_store.get_data( complex_contents_dataset) == complex_content
def __main__(): #Parse Command Line parser = optparse.OptionParser() parser.add_option( '', '--threads', dest='threads', help='The number of threads to use' ) parser.add_option( '', '--ref_name', dest='ref_name', help='The reference name to change all output matches to' ) parser.add_option( '', '--ref_source', dest='ref_source', help='Whether the reference is self, cached or from the history' ) parser.add_option( '', '--ref_sequences', dest='ref_sequences', help='Number of sequences in the reference dataset' ) parser.add_option( '', '--mirror', dest='mirror', help='Do or do not report mirror image of all gap-free alignments' ) parser.add_option( '', '--source_select', dest='source_select', help='Whether to used pre-set or cached reference file' ) parser.add_option( '', '--input1', dest='input1', help='The name of the reference file if using history or reference base name if using cached' ) parser.add_option( '', '--input2', dest='input2', help='The reads file to align' ) parser.add_option( '', '--strand', dest='strand', help='Which strand of the read to search, if specifying all parameters' ) parser.add_option( '', '--match_reward', dest='match_reward', help='Score values for a match (reward)' ) parser.add_option( '', '--match_penalty', dest='match_penalty', help='Score values for a mismatch (penalty), same as reward when not specified (but reward is)' ) parser.add_option( '', '--gapped', dest='gapped', help='Perform gapped extension of HSPs (or seeds if gapped-free extension is not performed) after first reducing them to anchor points' ) parser.add_option( '', '--gap_open', dest='gap_open', help='Score penalties for opening a gap' ) parser.add_option( '', '--gap_extend', dest='gap_extend', help='Score penalties for extending a gap' ) parser.add_option( '', '--ambiguous', dest='ambiguous', help='Treat as ambiguous nucleotides' ) parser.add_option( '', '--step', dest='step', help='Offset between the starting positions of successive target words considered for potential seeds' ) parser.add_option( '', '--masking', dest='masking', help='Dynamically mask the target sequence by excluding any positions that appear in too many alignments from further consideration for seeds' ) parser.add_option( '', '--seed', dest='seed', help='Offset between the starting positions of successive target words considered for potential seeds' ) parser.add_option( '', '--match_length', dest='match_length', help='Seeds require bp word of this length with matches in all positions' ) parser.add_option( '', '--transition', dest='transition', help='Transition settings, affects the number of allowed transition substitutions in each seed' ) parser.add_option( '', '--xdrop', dest='xdrop', help='Find HSPs using the xdrop extension method with the given termination threshold instead of using the exact match method' ) parser.add_option( '', '--hspthresh', dest='hspthresh', help='Score threshold for the x-drop extension method' ) parser.add_option( '', '--entropy', dest='entropy', help='Whether to adjust for entropy when qualifying HSPs in the x-drop extension method' ) parser.add_option( '', '--chain', dest='chain', help='Perform chaining of HSPs with no penalties' ) parser.add_option( '', '--ydrop', dest='ydrop', help='Set the threshold for terminating gapped extension' ) parser.add_option( '', '--ytrim', dest='ytrim', help='Trim back to peak score if y-drop extension encounters end of sequence' ) parser.add_option( '', '--gappedthresh', dest='gappedthresh', help='Threshold for gapped extension. Alignments scoring lower are discarded.' ) parser.add_option( '', '--filter', dest='filter', help='Filter alignments.' ) parser.add_option( '', '--identity_min', dest='identity_min', help='Minimum for filtering alignments by their percent identity.' ) parser.add_option( '', '--identity_max', dest='identity_max', help='Maximum for filtering alignments by their percent identity.' ) parser.add_option( '', '--coverage_min', dest='coverage_min', help='Minimum for filtering alignments by how much of the input sequence they cover.' ) parser.add_option( '', '--coverage_max', dest='coverage_max', help='Maximum for filtering alignments by how much of the input sequence they cover.' ) parser.add_option( '', '--nmatch_min', dest='nmatch_min', help='Minimum for filtering alignments by how many bases they match.' ) parser.add_option( '', '--nmismatch_max', dest='nmismatch_max', help='Maximum for filtering alignments by the number of mismatches.' ) parser.add_option( '', '--trivial', dest='trivial', help='Do or do not output a trivial self-alignment block if the target and query sequences are identical.' ) parser.add_option( '', '--inner', dest='inner', help='Perform additional alignment between the gapped alignment blocks using (presumably) more sensitive alignment parameters.' ) parser.add_option( '', '--shortcuts_for_yasra', dest='shortcuts_for_yasra', help='Shortcut options to support the Yasra mapping assembler' ) parser.add_option( '', '--out_format', dest='format', help='The format of the output file (sam, diffs, or tabular (general))' ) parser.add_option( '', '--output', dest='output', help='The output file' ) parser.add_option( '', '--lastzSeqsFileDir', dest='lastzSeqsFileDir', help='Directory of local lastz_seqs.loc file' ) ( options, args ) = parser.parse_args() # Output version # of tool try: tmp = tempfile.NamedTemporaryFile().name tmp_stdout = open( tmp, 'wb' ) proc = subprocess.Popen( args='lastz -v', shell=True, stdout=tmp_stdout ) tmp_stdout.close() returncode = proc.wait() stdout = None for line in open( tmp_stdout.name, 'rb' ): if line.lower().find( 'version' ) >= 0: stdout = line.strip() break if stdout: sys.stdout.write( '%s\n' % stdout ) else: raise Exception except: sys.stdout.write( 'Could not determine Lastz version\n' ) if options.ref_name: ref_name = '[nickname=%s]' % options.ref_name else: ref_name = '' set_options = '' # Commonly-used preset options if options.source_select == 'pre_set': # Handle ref_source if options.ref_source == 'self': # --mirror is available only if ref_source selection is --self if options.mirror == 'yes': set_options += '--nomirror ' else: # Full set of user-specified options # Handle ref_source if options.ref_source == 'self': # --mirror is available only if ref_source selection is --self if options.mirror == 'yes': set_options += '--nomirror ' else: # Using --self automatically enables this option if options.trivial == 'no': set_options += '--notrivial ' # Handle --match if options.match_reward not in [ "", "0" ]: if options.match_penalty in [ "", "0" ]: match_penalty = options.match_reward else: match_penalty = options.match_penalty set_options += '--match=%s,%s ' % ( options.match_reward, match_penalty ) # Handle --gapped if options.gapped == 'yes': set_options += '--gapped ' if options.gap_open not in [ "" ]: if options.gap_extend in [ "" ]: set_options += '--gap=%s ' % options.gap_open else: set_options += '--gap=%s,%s ' % ( options.gap_open, options.gap_extend ) # Handle --ydrop if options.ydrop not in [ "", "0" ]: set_options += '--ydrop=%s ' % options.ydrop # Handle --ytrim if options.ytrim == 'no': set_options += '--noytrim ' # Handle --gappedthresh if options.gappedthresh not in [ "", "0" ]: set_options += '--gappedthresh=%s ' % options.gappedthresh # Handle --inner if options.inner not in [ "" ]: set_options += '--inner=%s ' % options.inner else: set_options += '--nogapped ' # Handle --step if options.step not in [ "", "0" ]: set_options += '--step=%s ' % options.step # Handle --masking if options.masking not in [ '0' ]: set_options += '--masking=%s ' % options.masking # Handle --seed if options.seed not in [ "no" ]: if options.seed == 'match': set_options += '--seed=match%s ' % options.match_length else: set_options += '--seed=%s ' % options.seed # Handle --transition if options.transition == '0': set_options += '--notransition ' else: set_options += '--transition=%s ' % options.transition # Handle --xdrop if options.xdrop not in [ "", "0" ]: set_options += '--xdrop=%s ' % options.xdrop # handle --hspthresh if options.hspthresh not in [ "", "0" ]: set_options += '--hspthresh=%s ' % options.hspthresh # Handle --entropy if options.entropy == 'no': set_options += '--noentropy ' else: set_options += '--entropy ' # Handle --chain if options.chain == 'no': set_options += '--nochain ' else: set_options += '--chain ' # Handle --filter if options.filter not in [ "no" ]: if options.filter == 'identity': identity_min = options.identity_min if options.identity_max in [ "", "0" ] or options.identity_max <= identity_min: identity_max = '100' else: identity_max = options.identity_max set_options += '--filter=identity:%s..%s ' % ( identity_min, identity_max ) elif options.filter == 'coverage': coverage_min = options.coverage_min if options.coverage_max in [ "", "0" ] or options.coverage_max <= coverage_min: coverage_max = '100' else: coverage_max = options.coverage_max set_options += '--filter=coverage:%s..%s ' % ( coverage_min, coverage_max ) elif options.filter == 'nmatch': set_options += '--filter=nmatch:%s% ' % options.nmatch_min elif options.filter == 'nmismatch': set_options += '--filter=nmismatch:0..%s ' % options.nmismatch_max # Handle --strand set_options += '--strand=%s ' % options.strand # Handle --ambiguous if options.ambiguous not in [ "no" ]: set_options += '--ambiguous=%s ' % options.ambiguous # Handle --shortcuts_for_yasra if options.shortcuts_for_yasra not in [ 'none' ]: set_options += '--%s ' % ( options.shortcuts_for_yasra ) # Specify input2 and add [fullnames] modifier if output format is diffs if options.format == 'diffs': input2 = '%s[fullnames]' % options.input2 else: input2 = options.input2 if options.format == 'tabular': # Change output format to general if it's tabular and add field names for tabular output format = 'general-' tabular_fields = ':score,name1,strand1,size1,start1,zstart1,end1,length1,text1,name2,strand2,size2,start2,zstart2,end2,start2+,zstart2+,end2+,length2,text2,diff,cigar,identity,coverage,gaprate,diagonal,shingle' elif options.format == 'sam': # We currently need to keep headers. format = 'sam' tabular_fields = '' else: format = options.format tabular_fields = '' # Set up our queues threads = int( options.threads ) lastz_job_queue = LastzJobQueue( threads, slots=SLOTS ) combine_data_queue = CombineDataQueue( options.output ) if str( options.ref_source ) in [ 'history', 'self' ]: # Reference is a fasta dataset from the history or the dataset containing the target sequence itself, # so split job across the number of sequences in the dataset ( this could be a HUGE number ). try: # Ensure there is at least 1 sequence in the dataset ( this may not be necessary ). error_msg = "The reference dataset is missing metadata. Click the pencil icon in the history item and 'auto-detect' the metadata attributes." ref_sequences = int( options.ref_sequences ) if ref_sequences < 1: stop_queues( lastz_job_queue, combine_data_queue ) stop_err( error_msg ) except: stop_queues( lastz_job_queue, combine_data_queue ) stop_err( error_msg ) seqs = 0 fasta_reader = FastaReader( open( options.input1 ) ) while True: # Read the next sequence from the reference dataset seq = fasta_reader.next() if not seq: break seqs += 1 # Create a temporary file to contain the current sequence as input to lastz tmp_in_fd, tmp_in_name = tempfile.mkstemp( suffix='.in' ) tmp_in = os.fdopen( tmp_in_fd, 'wb' ) # Write the current sequence to the temporary input file tmp_in.write( '>%s\n%s\n' % ( seq.name, seq.text ) ) tmp_in.close() # Create a 2nd temporary file to contain the output from lastz execution on the current sequence tmp_out_fd, tmp_out_name = tempfile.mkstemp( suffix='.out' ) os.close( tmp_out_fd ) # Generate the command line for calling lastz on the current sequence command = 'lastz %s%s %s %s --format=%s%s > %s' % ( tmp_in_name, ref_name, input2, set_options, format, tabular_fields, tmp_out_name ) # Create a job object job = Bunch() job.command = command job.output = tmp_out_name job.cleanup = [ tmp_in_name, tmp_out_name ] job.combine_data_queue = combine_data_queue # Add another job to the lastz_job_queue. Execution will wait at this point if the queue is full. lastz_job_queue.put( job, block=True ) # Make sure the value of sequences in the metadata is the same as the number of # sequences read from the dataset. According to Bob, this may not be necessary. if ref_sequences != seqs: stop_queues( lastz_job_queue, combine_data_queue ) stop_err( "The value of metadata.sequences (%d) differs from the number of sequences read from the reference (%d)." % ( ref_sequences, seqs ) ) else: # Reference is a locally cached 2bit file, split job across number of chroms in 2bit file tbf = TwoBitFile( open( options.input1, 'r' ) ) for chrom in tbf.keys(): # Create a temporary file to contain the output from lastz execution on the current chrom tmp_out_fd, tmp_out_name = tempfile.mkstemp( suffix='.out' ) os.close( tmp_out_fd ) command = 'lastz %s/%s%s %s %s --format=%s%s >> %s' % \ ( options.input1, chrom, ref_name, input2, set_options, format, tabular_fields, tmp_out_name ) # Create a job object job = Bunch() job.command = command job.output = tmp_out_name job.cleanup = [ tmp_out_name ] job.combine_data_queue = combine_data_queue # Add another job to the lastz_job_queue. Execution will wait at this point if the queue is full. lastz_job_queue.put( job, block=True ) # Stop the lastz_job_queue. for t in lastz_job_queue.threads: lastz_job_queue.put( STOP_SIGNAL, True ) # Although all jobs are submitted to the queue, we can't shut down the combine_data_queue # until we know that all jobs have been submitted to its queue. We do this by checking # whether all of the threads in the lastz_job_queue have terminated. while threading.activeCount() > 2: time.sleep( 1 ) # Now it's safe to stop the combine_data_queue. combine_data_queue.put( STOP_SIGNAL )
def __init__(self, trans, plugin): self.trans = trans self.log = log self.attr = Bunch() self.attr.viz_id = plugin.name self.attr.history_id = trans.security.encode_id(trans.history.id) self.attr.galaxy_config = trans.app.config self.attr.redact_username_in_logs = trans.app.config.redact_username_in_logs self.attr.galaxy_root_dir = os.path.abspath( self.attr.galaxy_config.root) self.attr.root = web.url_for("/") self.attr.app_root = self.attr.root + "static/plugins/interactive_environments/" + self.attr.viz_id + "/static/" self.attr.import_volume = True plugin_path = os.path.abspath(plugin.path) # Store our template and configuration path self.attr.our_config_dir = os.path.join(plugin_path, "config") self.attr.our_template_dir = os.path.join(plugin_path, "templates") self.attr.HOST = trans.request.host.rsplit(':', 1)[0] self.load_deploy_config() self.load_allowed_images() self.load_container_interface() self.attr.docker_hostname = self.attr.viz_config.get( "docker", "docker_hostname") raw_docker_connect_port = self.attr.viz_config.get( "docker", "docker_connect_port") self.attr.docker_connect_port = int( raw_docker_connect_port) if raw_docker_connect_port else None # Generate per-request passwords the IE plugin can use to configure # the destination container. self.notebook_pw_salt = self.generate_password(length=12) self.notebook_pw = self.generate_password(length=24) ie_parent_temp_dir = self.attr.viz_config.get( "docker", "docker_galaxy_temp_dir") or None self.temp_dir = os.path.abspath( tempfile.mkdtemp(dir=ie_parent_temp_dir)) if self.attr.viz_config.getboolean("docker", "wx_tempdir"): # Ensure permissions are set try: os.chmod(self.temp_dir, os.stat(self.temp_dir).st_mode | stat.S_IXOTH) except Exception: log.error("Could not change permissions of tmpdir %s" % self.temp_dir) # continue anyway # This duplicates the logic in the proxy manager if self.attr.galaxy_config.dynamic_proxy_external_proxy: self.attr.proxy_prefix = '/'.join(( '', trans.cookie_path.strip('/'), self.attr.galaxy_config.dynamic_proxy_prefix.strip('/'), self.attr.viz_id, )) else: self.attr.proxy_prefix = '' # If cookie_path is unset (thus '/'), the proxy prefix ends up with # multiple leading '/' characters, which will cause the client to # request resources from http://dynamic_proxy_prefix if self.attr.proxy_prefix.startswith('/'): self.attr.proxy_prefix = '/' + self.attr.proxy_prefix.lstrip('/') assert not self.attr.container_interface \ or not self.attr.container_interface.publish_port_list_required \ or (self.attr.container_interface.publish_port_list_required and self.attr.docker_connect_port is not None), \ "Error: Container interface requires publish port list but docker_connect_port is not set"
def __build_metadata_configuration(self, client, job_wrapper, remote_metadata, remote_job_config): metadata_kwds = {} if remote_metadata: remote_system_properties = remote_job_config.get( "system_properties", {}) remote_galaxy_home = remote_system_properties.get( "galaxy_home", None) if not remote_galaxy_home: raise Exception(NO_REMOTE_GALAXY_FOR_METADATA_MESSAGE) metadata_kwds['exec_dir'] = remote_galaxy_home outputs_directory = remote_job_config['outputs_directory'] configs_directory = remote_job_config['configs_directory'] working_directory = remote_job_config['working_directory'] # For metadata calculation, we need to build a list of of output # file objects with real path indicating location on Galaxy server # and false path indicating location on compute server. Since the # Pulsar disables from_work_dir copying as part of the job command # line we need to take the list of output locations on the Pulsar # server (produced by self.get_output_files(job_wrapper)) and for # each work_dir output substitute the effective path on the Pulsar # server relative to the remote working directory as the # false_path to send the metadata command generation module. work_dir_outputs = self.get_work_dir_outputs( job_wrapper, job_working_directory=working_directory) outputs = [ Bunch(false_path=os.path.join(outputs_directory, os.path.basename(path)), real_path=path) for path in self.get_output_files(job_wrapper) ] for output in outputs: for pulsar_workdir_path, real_path in work_dir_outputs: if real_path == output.real_path: output.false_path = pulsar_workdir_path metadata_kwds['output_fnames'] = outputs metadata_kwds['compute_tmp_dir'] = working_directory metadata_kwds['config_root'] = remote_galaxy_home default_config_file = os.path.join(remote_galaxy_home, 'config/galaxy.ini') metadata_kwds['config_file'] = remote_system_properties.get( 'galaxy_config_file', default_config_file) metadata_kwds['dataset_files_path'] = remote_system_properties.get( 'galaxy_dataset_files_path', None) if PulsarJobRunner.__use_remote_datatypes_conf(client): remote_datatypes_config = remote_system_properties.get( 'galaxy_datatypes_config_file', None) if not remote_datatypes_config: log.warn(NO_REMOTE_DATATYPES_CONFIG) remote_datatypes_config = os.path.join( remote_galaxy_home, 'datatypes_conf.xml') metadata_kwds['datatypes_config'] = remote_datatypes_config else: integrates_datatypes_config = self.app.datatypes_registry.integrated_datatypes_configs # Ensure this file gets pushed out to the remote config dir. job_wrapper.extra_filenames.append(integrates_datatypes_config) metadata_kwds['datatypes_config'] = os.path.join( configs_directory, os.path.basename(integrates_datatypes_config)) return metadata_kwds
def get_uploaded_datasets( self, trans, context, override_name=None, override_info=None ): def get_data_file_filename( data_file, override_name=None, override_info=None ): dataset_name = override_name dataset_info = override_info def get_file_name( file_name ): file_name = file_name.split( '\\' )[-1] file_name = file_name.split( '/' )[-1] return file_name try: # Use the existing file if not dataset_name and 'filename' in data_file: dataset_name = get_file_name( data_file['filename'] ) if not dataset_info: dataset_info = 'uploaded file' return Bunch( type='file', path=data_file['local_filename'], name=dataset_name ) # return 'file', data_file['local_filename'], get_file_name( data_file.filename ), dataset_name, dataset_info except: # The uploaded file should've been persisted by the upload tool action return Bunch( type=None, path=None, name=None ) # return None, None, None, None, None def get_url_paste_urls_or_filename( group_incoming, override_name=None, override_info=None ): url_paste_file = group_incoming.get( 'url_paste', None ) if url_paste_file is not None: url_paste = open( url_paste_file, 'r' ).read( 1024 ) if url_paste.lstrip().lower().startswith( 'http://' ) or url_paste.lstrip().lower().startswith( 'ftp://' ) or url_paste.lstrip().lower().startswith( 'https://' ): url_paste = url_paste.replace( '\r', '' ).split( '\n' ) for line in url_paste: line = line.strip() if line: if not line.lower().startswith( 'http://' ) and not line.lower().startswith( 'ftp://' ) and not line.lower().startswith( 'https://' ): continue # non-url line, ignore dataset_name = override_name if not dataset_name: dataset_name = line dataset_info = override_info if not dataset_info: dataset_info = 'uploaded url' yield Bunch( type='url', path=line, name=dataset_name ) # yield ( 'url', line, precreated_name, dataset_name, dataset_info ) else: dataset_name = dataset_info = precreated_name = 'Pasted Entry' # we need to differentiate between various url pastes here if override_name: dataset_name = override_name if override_info: dataset_info = override_info yield Bunch( type='file', path=url_paste_file, name=precreated_name ) # yield ( 'file', url_paste_file, precreated_name, dataset_name, dataset_info ) def get_one_filename( context ): data_file = context['file_data'] url_paste = context['url_paste'] ftp_files = context['ftp_files'] name = context.get( 'NAME', None ) info = context.get( 'INFO', None ) uuid = context.get( 'uuid', None ) or None # Turn '' to None warnings = [] to_posix_lines = False if context.get( 'to_posix_lines', None ) not in [ "None", None, False ]: to_posix_lines = True space_to_tab = False if context.get( 'space_to_tab', None ) not in [ "None", None, False ]: space_to_tab = True file_bunch = get_data_file_filename( data_file, override_name=name, override_info=info ) if file_bunch.path: if url_paste is not None and url_paste.strip(): warnings.append( "All file contents specified in the paste box were ignored." ) if ftp_files: warnings.append( "All FTP uploaded file selections were ignored." ) elif url_paste is not None and url_paste.strip(): # we need to use url_paste for file_bunch in get_url_paste_urls_or_filename( context, override_name=name, override_info=info ): if file_bunch.path: break if file_bunch.path and ftp_files is not None: warnings.append( "All FTP uploaded file selections were ignored." ) elif ftp_files is not None and trans.user is not None: # look for files uploaded via FTP user_ftp_dir = trans.user_ftp_dir for ( dirpath, dirnames, filenames ) in os.walk( user_ftp_dir ): for filename in filenames: for ftp_filename in ftp_files: if ftp_filename == filename: path = relpath( os.path.join( dirpath, filename ), user_ftp_dir ) if not os.path.islink( os.path.join( dirpath, filename ) ): ftp_data_file = { 'local_filename' : os.path.abspath( os.path.join( user_ftp_dir, path ) ), 'filename' : os.path.basename( path ) } file_bunch = get_data_file_filename( ftp_data_file, override_name=name, override_info=info ) if file_bunch.path: break if file_bunch.path: break if file_bunch.path: break file_bunch.to_posix_lines = to_posix_lines file_bunch.space_to_tab = space_to_tab file_bunch.uuid = uuid return file_bunch, warnings def get_filenames( context ): rval = [] data_file = context['file_data'] ftp_files = context['ftp_files'] uuid = context.get( 'uuid', None ) or None # Turn '' to None name = context.get( 'NAME', None ) info = context.get( 'INFO', None ) to_posix_lines = False if context.get( 'to_posix_lines', None ) not in [ "None", None, False ]: to_posix_lines = True space_to_tab = False if context.get( 'space_to_tab', None ) not in [ "None", None, False ]: space_to_tab = True file_bunch = get_data_file_filename( data_file, override_name=name, override_info=info ) file_bunch.uuid = uuid if file_bunch.path: file_bunch.to_posix_lines = to_posix_lines file_bunch.space_to_tab = space_to_tab rval.append( file_bunch ) for file_bunch in get_url_paste_urls_or_filename( context, override_name=name, override_info=info ): if file_bunch.path: file_bunch.uuid = uuid file_bunch.to_posix_lines = to_posix_lines file_bunch.space_to_tab = space_to_tab rval.append( file_bunch ) # look for files uploaded via FTP valid_files = [] if ftp_files is not None: # Normalize input paths to ensure utf-8 encoding is normal form c. # This allows for comparison when the filesystem uses a different encoding than the browser. ftp_files = [unicodedata.normalize('NFC', f) for f in ftp_files if isinstance(f, unicode)] if trans.user is None: log.warning( 'Anonymous user passed values in ftp_files: %s' % ftp_files ) ftp_files = [] # TODO: warning to the user (could happen if session has become invalid) else: user_ftp_dir = trans.user_ftp_dir for ( dirpath, dirnames, filenames ) in os.walk( user_ftp_dir ): for filename in filenames: path = relpath( os.path.join( dirpath, filename ), user_ftp_dir ) if not os.path.islink( os.path.join( dirpath, filename ) ): # Normalize filesystem paths if isinstance(path, unicode): valid_files.append(unicodedata.normalize('NFC', path )) else: valid_files.append(path) else: ftp_files = [] for ftp_file in ftp_files: if ftp_file not in valid_files: log.warning( 'User passed an invalid file path in ftp_files: %s' % ftp_file ) continue # TODO: warning to the user (could happen if file is already imported) ftp_data_file = { 'local_filename' : os.path.abspath( os.path.join( user_ftp_dir, ftp_file ) ), 'filename' : os.path.basename( ftp_file ) } file_bunch = get_data_file_filename( ftp_data_file, override_name=name, override_info=info ) if file_bunch.path: file_bunch.to_posix_lines = to_posix_lines file_bunch.space_to_tab = space_to_tab rval.append( file_bunch ) return rval file_type = self.get_file_type( context ) d_type = self.get_datatype( trans, context ) dbkey = context.get( 'dbkey', None ) writable_files = d_type.writable_files writable_files_offset = 0 groups_incoming = [ None for _ in writable_files ] for group_incoming in context.get( self.name, [] ): i = int( group_incoming['__index__'] ) groups_incoming[ i ] = group_incoming if d_type.composite_type is not None: # handle uploading of composite datatypes # Only one Dataset can be created dataset = Bunch() dataset.type = 'composite' dataset.file_type = file_type dataset.dbkey = dbkey dataset.datatype = d_type dataset.warnings = [] dataset.metadata = {} dataset.composite_files = {} dataset.uuid = None # load metadata files_metadata = context.get( self.metadata_ref, {} ) metadata_name_substition_default_dict = dict( [ ( composite_file.substitute_name_with_metadata, d_type.metadata_spec[ composite_file.substitute_name_with_metadata ].default ) for composite_file in d_type.composite_files.values() if composite_file.substitute_name_with_metadata ] ) for meta_name, meta_spec in d_type.metadata_spec.iteritems(): if meta_spec.set_in_upload: if meta_name in files_metadata: meta_value = files_metadata[ meta_name ] if meta_name in metadata_name_substition_default_dict: meta_value = sanitize_for_filename( meta_value, default=metadata_name_substition_default_dict[ meta_name ] ) dataset.metadata[ meta_name ] = meta_value dataset.precreated_name = dataset.name = self.get_composite_dataset_name( context ) if dataset.datatype.composite_type == 'auto_primary_file': # replace sniff here with just creating an empty file temp_name, is_multi_byte = sniff.stream_to_file( StringIO.StringIO( d_type.generate_primary_file( dataset ) ), prefix='upload_auto_primary_file' ) dataset.primary_file = temp_name dataset.to_posix_lines = True dataset.space_to_tab = False else: file_bunch, warnings = get_one_filename( groups_incoming[ 0 ] ) writable_files_offset = 1 dataset.primary_file = file_bunch.path dataset.to_posix_lines = file_bunch.to_posix_lines dataset.space_to_tab = file_bunch.space_to_tab dataset.warnings.extend( warnings ) if dataset.primary_file is None: # remove this before finish, this should create an empty dataset raise Exception( 'No primary dataset file was available for composite upload' ) keys = [ value.name for value in writable_files.values() ] for i, group_incoming in enumerate( groups_incoming[ writable_files_offset : ] ): key = keys[ i + writable_files_offset ] if group_incoming is None and not writable_files[ writable_files.keys()[ keys.index( key ) ] ].optional: dataset.warnings.append( "A required composite file (%s) was not specified." % ( key ) ) dataset.composite_files[ key ] = None else: file_bunch, warnings = get_one_filename( group_incoming ) dataset.warnings.extend( warnings ) if file_bunch.path: dataset.composite_files[ key ] = file_bunch.__dict__ else: dataset.composite_files[ key ] = None if not writable_files[ writable_files.keys()[ keys.index( key ) ] ].optional: dataset.warnings.append( "A required composite file (%s) was not specified." % ( key ) ) return [ dataset ] else: datasets = get_filenames( context[ self.name ][0] ) rval = [] for dataset in datasets: dataset.file_type = file_type dataset.datatype = d_type dataset.ext = self.get_datatype_ext( trans, context ) dataset.dbkey = dbkey rval.append( dataset ) return rval
class DeferredJobQueue( object ): job_states = Bunch( READY='ready', WAIT='wait', INVALID='invalid' ) def __init__( self, app ): self.app = app self.sa_session = app.model.context.current self.queue = Queue() self.plugins = {} self._load_plugins() self.sleeper = Sleeper() self.running = True self.waiting_jobs = [] self.__check_jobs_at_startup() self.monitor_thread = threading.Thread( target=self.__monitor ) self.monitor_thread.start() log.info( 'Deferred job queue started' ) def _load_plugins( self ): for fname in os.listdir( os.path.dirname( __file__ ) ): if not fname.startswith( '_' ) and fname.endswith( '.py' ): name = fname[:-3] module_name = 'galaxy.jobs.deferred.' + name try: module = __import__( module_name ) except: log.exception( 'Deferred job plugin appears to exist but is not loadable: %s' % module_name ) continue for comp in module_name.split( "." )[1:]: module = getattr( module, comp ) if '__all__' not in dir( module ): log.error( 'Plugin "%s" does not contain a list of exported classes in __all__' % module_name ) continue for obj in module.__all__: display_name = ':'.join( ( module_name, obj ) ) plugin = getattr( module, obj ) for name in ( 'check_job', 'run_job' ): if name not in dir( plugin ): log.error( 'Plugin "%s" does not contain required method "%s()"' % ( display_name, name ) ) break else: self.plugins[obj] = plugin( self.app ) self.plugins[obj].job_states = self.job_states log.debug( 'Loaded deferred job plugin: %s' % display_name ) def __check_jobs_at_startup( self ): waiting_jobs = self.sa_session.query( model.DeferredJob ) \ .filter( model.DeferredJob.state == model.DeferredJob.states.WAITING ).all() for job in waiting_jobs: if not self.__check_job_plugin( job ): continue if 'check_interval' in dir( self.plugins[job.plugin] ): job.check_interval = self.plugins[job.plugin].check_interval log.info( 'Recovered deferred job (id: %s) at startup' % job.id ) # Pass the job ID as opposed to the job, since the monitor thread # needs to load it in its own threadlocal scoped session. self.waiting_jobs.append( job.id ) def __monitor( self ): while self.running: try: self.__monitor_step() except: log.exception( 'Exception in monitor_step' ) self.sleeper.sleep( 1 ) log.info( 'job queue stopped' ) def __monitor_step( self ): # TODO: Querying the database with this frequency is bad, we need message passing new_jobs = self.sa_session.query( model.DeferredJob ) \ .filter( model.DeferredJob.state == model.DeferredJob.states.NEW ).all() for job in new_jobs: if not self.__check_job_plugin( job ): continue job.state = model.DeferredJob.states.WAITING self.sa_session.add( job ) self.sa_session.flush() if 'check_interval' in dir( self.plugins[job.plugin] ): job.check_interval = self.plugins[job.plugin].check_interval self.waiting_jobs.append( job ) new_waiting = [] for job in self.waiting_jobs: try: # Recovered jobs are passed in by ID assert type( job ) is int job = self.sa_session.query( model.DeferredJob ).get( job ) except: pass if job.is_check_time: try: job_state = self.plugins[job.plugin].check_job( job ) except Exception as e: self.__fail_job( job ) log.exception( 'Set deferred job %s to error because of an exception in check_job(): %s' % ( job.id, str( e ) ) ) continue if job_state == self.job_states.READY: try: self.plugins[job.plugin].run_job( job ) except Exception as e: self.__fail_job( job ) log.exception( 'Set deferred job %s to error because of an exception in run_job(): %s' % ( job.id, str( e ) ) ) continue elif job_state == self.job_states.INVALID: self.__fail_job( job ) log.error( 'Unable to run deferred job (id: %s): Plugin "%s" marked it as invalid' % ( job.id, job.plugin ) ) continue else: new_waiting.append( job ) job.last_check = 'now' else: new_waiting.append( job ) self.waiting_jobs = new_waiting def __check_job_plugin( self, job ): if job.plugin not in self.plugins: log.error( 'Invalid deferred job plugin: %s' ) % job.plugin job.state = model.DeferredJob.states.ERROR self.sa_session.add( job ) self.sa_session.flush() return False return True def __check_if_ready_to_run( self, job ): return self.plugins[job.plugin].check_job( job ) def __fail_job( self, job ): job.state = model.DeferredJob.states.ERROR self.sa_session.add( job ) self.sa_session.flush() def shutdown( self ): self.running = False self.sleeper.wake()
cols = [ int( c ) for c in str( options.columns ).split( ',' ) if int( c ) > hinge ] inputs = [ options.input1, options.input2 ] if options.fill_options_file == 'None': inputs.extend( args ) elif len( args ) > 0: inputs.extend( args ) fill_options = None if options.fill_options_file != 'None' and options.fill_options_file is not None: try: if simplejson is None: raise simplejson_exception fill_options = Bunch( **stringify_dictionary_keys( simplejson.load( open( options.fill_options_file ) ) ) ) except Exception, e: print 'Warning: Ignoring fill options due to simplejson error (%s).' % e if fill_options is None: fill_options = Bunch() if 'file1_columns' not in fill_options: fill_options.file1_columns = None if fill_options and fill_options.file1_columns: fill_empty = {} for col in cols: fill_empty[ col ] = fill_options.file1_columns[ col - 1 ] else: fill_empty = None assert len( cols ) > 0, 'You need to select at least one column in addition to the hinge' delimiter = '\t' # make sure all files are sorted in same way, ascending tmp_input_files = [] input_files = inputs[:] for in_file in input_files: tmp_file = tempfile.NamedTemporaryFile()
default=None, help="Fill empty columns with a values from a JSONified file.", ) options, args = parser.parse_args() fill_options = None if options.fill_options_file is not None: try: fill_options = Bunch( **stringify_dictionary_keys(json.load(open(options.fill_options_file))) ) # json.load( open( options.fill_options_file ) ) except Exception, e: print "Warning: Ignoring fill options due to json error (%s)." % e if fill_options is None: fill_options = Bunch() if "fill_unjoined_only" not in fill_options: fill_options.fill_unjoined_only = True if "file1_columns" not in fill_options: fill_options.file1_columns = None if "file2_columns" not in fill_options: fill_options.file2_columns = None try: filename1 = args[0] filename2 = args[1] column1 = int(args[2]) - 1 column2 = int(args[3]) - 1 out_filename = args[4] except: print >> sys.stderr, "Error parsing command line."
def get_uploaded_datasets(self, trans, context, override_name=None, override_info=None): def get_data_file_filename(data_file, override_name=None, override_info=None): dataset_name = override_name dataset_info = override_info def get_file_name(file_name): file_name = file_name.split('\\')[-1] file_name = file_name.split('/')[-1] return file_name try: # Use the existing file if not dataset_name and 'filename' in data_file: dataset_name = get_file_name(data_file['filename']) if not dataset_info: dataset_info = 'uploaded file' return Bunch(type='file', path=data_file['local_filename'], name=dataset_name) #return 'file', data_file['local_filename'], get_file_name( data_file.filename ), dataset_name, dataset_info except: # The uploaded file should've been persisted by the upload tool action return Bunch(type=None, path=None, name=None) #return None, None, None, None, None def get_url_paste_urls_or_filename(group_incoming, override_name=None, override_info=None): filenames = [] url_paste_file = group_incoming.get('url_paste', None) if url_paste_file is not None: url_paste = open(url_paste_file, 'r').read(1024) if url_paste.lstrip().lower().startswith( 'http://') or url_paste.lstrip().lower().startswith( 'ftp://') or url_paste.lstrip().lower().startswith( 'https://'): url_paste = url_paste.replace('\r', '').split('\n') for line in url_paste: line = line.strip() if line: if not line.lower().startswith( 'http://') and not line.lower().startswith( 'ftp://') and not line.lower( ).startswith('https://'): continue # non-url line, ignore dataset_name = override_name if not dataset_name: dataset_name = line dataset_info = override_info if not dataset_info: dataset_info = 'uploaded url' yield Bunch(type='url', path=line, name=dataset_name) #yield ( 'url', line, precreated_name, dataset_name, dataset_info ) else: dataset_name = dataset_info = precreated_name = 'Pasted Entry' #we need to differentiate between various url pastes here if override_name: dataset_name = override_name if override_info: dataset_info = override_info yield Bunch(type='file', path=url_paste_file, name=precreated_name) #yield ( 'file', url_paste_file, precreated_name, dataset_name, dataset_info ) def get_one_filename(context): data_file = context['file_data'] url_paste = context['url_paste'] ftp_files = context['ftp_files'] name = context.get('NAME', None) info = context.get('INFO', None) uuid = context.get('uuid', None) or None # Turn '' to None warnings = [] to_posix_lines = False if context.get('to_posix_lines', None) not in ["None", None, False]: to_posix_lines = True space_to_tab = False if context.get('space_to_tab', None) not in ["None", None, False]: space_to_tab = True file_bunch = get_data_file_filename(data_file, override_name=name, override_info=info) if file_bunch.path: if url_paste is not None and url_paste.strip(): warnings.append( "All file contents specified in the paste box were ignored." ) if ftp_files: warnings.append( "All FTP uploaded file selections were ignored.") elif url_paste is not None and url_paste.strip( ): #we need to use url_paste for file_bunch in get_url_paste_urls_or_filename( context, override_name=name, override_info=info): if file_bunch.path: break if file_bunch.path and ftp_files is not None: warnings.append( "All FTP uploaded file selections were ignored.") elif ftp_files is not None and trans.user is not None: # look for files uploaded via FTP user_ftp_dir = trans.user_ftp_dir for (dirpath, dirnames, filenames) in os.walk(user_ftp_dir): for filename in filenames: for ftp_filename in ftp_files: if ftp_filename == filename: path = relpath(os.path.join(dirpath, filename), user_ftp_dir) if not os.path.islink( os.path.join(dirpath, filename)): ftp_data_file = { 'local_filename': os.path.abspath( os.path.join(user_ftp_dir, path)), 'filename': os.path.basename(path) } file_bunch = get_data_file_filename( ftp_data_file, override_name=name, override_info=info) if file_bunch.path: break if file_bunch.path: break if file_bunch.path: break file_bunch.to_posix_lines = to_posix_lines file_bunch.space_to_tab = space_to_tab file_bunch.uuid = uuid return file_bunch, warnings def get_filenames(context): rval = [] data_file = context['file_data'] url_paste = context['url_paste'] ftp_files = context['ftp_files'] uuid = context.get('uuid', None) or None # Turn '' to None name = context.get('NAME', None) info = context.get('INFO', None) to_posix_lines = False if context.get('to_posix_lines', None) not in ["None", None, False]: to_posix_lines = True space_to_tab = False if context.get('space_to_tab', None) not in ["None", None, False]: space_to_tab = True warnings = [] file_bunch = get_data_file_filename(data_file, override_name=name, override_info=info) file_bunch.uuid = uuid if file_bunch.path: file_bunch.to_posix_lines = to_posix_lines file_bunch.space_to_tab = space_to_tab rval.append(file_bunch) for file_bunch in get_url_paste_urls_or_filename( context, override_name=name, override_info=info): if file_bunch.path: file_bunch.uuid = uuid file_bunch.to_posix_lines = to_posix_lines file_bunch.space_to_tab = space_to_tab rval.append(file_bunch) # look for files uploaded via FTP valid_files = [] if ftp_files is not None: # Normalize input paths to ensure utf-8 encoding is normal form c. # This allows for comparison when the filesystem uses a different encoding than the browser. ftp_files = [ unicodedata.normalize('NFC', f) for f in ftp_files if isinstance(f, unicode) ] if trans.user is None: log.warning( 'Anonymous user passed values in ftp_files: %s' % ftp_files) ftp_files = [] # TODO: warning to the user (could happen if session has become invalid) else: user_ftp_dir = trans.user_ftp_dir for (dirpath, dirnames, filenames) in os.walk(user_ftp_dir): for filename in filenames: path = relpath(os.path.join(dirpath, filename), user_ftp_dir) if not os.path.islink( os.path.join(dirpath, filename)): # Normalize filesystem paths if isinstance(path, unicode): valid_files.append( unicodedata.normalize('NFC', path)) else: valid_files.append(path) else: ftp_files = [] for ftp_file in ftp_files: if ftp_file not in valid_files: log.warning( 'User passed an invalid file path in ftp_files: %s' % ftp_file) continue # TODO: warning to the user (could happen if file is already imported) ftp_data_file = { 'local_filename': os.path.abspath(os.path.join(user_ftp_dir, ftp_file)), 'filename': os.path.basename(ftp_file) } file_bunch = get_data_file_filename(ftp_data_file, override_name=name, override_info=info) if file_bunch.path: file_bunch.to_posix_lines = to_posix_lines file_bunch.space_to_tab = space_to_tab rval.append(file_bunch) return rval file_type = self.get_file_type(context) d_type = self.get_datatype(trans, context) dbkey = context.get('dbkey', None) writable_files = d_type.writable_files writable_files_offset = 0 groups_incoming = [None for filename in writable_files] for group_incoming in context.get(self.name, []): i = int(group_incoming['__index__']) groups_incoming[i] = group_incoming if d_type.composite_type is not None: #handle uploading of composite datatypes #Only one Dataset can be created dataset = Bunch() dataset.type = 'composite' dataset.file_type = file_type dataset.dbkey = dbkey dataset.datatype = d_type dataset.warnings = [] dataset.metadata = {} dataset.composite_files = {} dataset.uuid = None #load metadata files_metadata = context.get(self.metadata_ref, {}) metadata_name_substition_default_dict = dict([ (composite_file.substitute_name_with_metadata, d_type.metadata_spec[ composite_file.substitute_name_with_metadata].default) for composite_file in d_type.composite_files.values() if composite_file.substitute_name_with_metadata ]) for meta_name, meta_spec in d_type.metadata_spec.iteritems(): if meta_spec.set_in_upload: if meta_name in files_metadata: meta_value = files_metadata[meta_name] if meta_name in metadata_name_substition_default_dict: meta_value = sanitize_for_filename( meta_value, default=metadata_name_substition_default_dict[ meta_name]) dataset.metadata[meta_name] = meta_value dataset.precreated_name = dataset.name = self.get_composite_dataset_name( context) if dataset.datatype.composite_type == 'auto_primary_file': #replace sniff here with just creating an empty file temp_name, is_multi_byte = sniff.stream_to_file( StringIO.StringIO(d_type.generate_primary_file(dataset)), prefix='upload_auto_primary_file') dataset.primary_file = temp_name dataset.to_posix_lines = True dataset.space_to_tab = False else: file_bunch, warnings = get_one_filename(groups_incoming[0]) writable_files_offset = 1 dataset.primary_file = file_bunch.path dataset.to_posix_lines = file_bunch.to_posix_lines dataset.space_to_tab = file_bunch.space_to_tab dataset.warnings.extend(warnings) if dataset.primary_file is None: #remove this before finish, this should create an empty dataset raise Exception( 'No primary dataset file was available for composite upload' ) keys = [value.name for value in writable_files.values()] for i, group_incoming in enumerate( groups_incoming[writable_files_offset:]): key = keys[i + writable_files_offset] if group_incoming is None and not writable_files[ writable_files.keys()[keys.index(key)]].optional: dataset.warnings.append( "A required composite file (%s) was not specified." % (key)) dataset.composite_files[key] = None else: file_bunch, warnings = get_one_filename(group_incoming) dataset.warnings.extend(warnings) if file_bunch.path: dataset.composite_files[key] = file_bunch.__dict__ else: dataset.composite_files[key] = None if not writable_files[writable_files.keys()[keys.index( key)]].optional: dataset.warnings.append( "A required composite file (%s) was not specified." % (key)) return [dataset] else: datasets = get_filenames(context[self.name][0]) rval = [] for dataset in datasets: dataset.file_type = file_type dataset.datatype = d_type dataset.ext = self.get_datatype_ext(trans, context) dataset.dbkey = dbkey rval.append(dataset) return rval
def __init__(self, app): self.app = app self.options = Bunch(sanitize=False)
class Dataset( object ): states = Bunch( NEW='new', UPLOAD='upload', QUEUED='queued', RUNNING='running', OK='ok', EMPTY='empty', ERROR='error', DISCARDED='discarded' ) permitted_actions = get_permitted_actions( filter='DATASET' ) file_path = "/tmp/" engine = None def __init__( self, id=None, state=None, external_filename=None, extra_files_path=None, file_size=None, purgable=True ): self.id = id self.state = state self.deleted = False self.purged = False self.purgable = purgable self.external_filename = external_filename self._extra_files_path = extra_files_path self.file_size = file_size def get_file_name( self ): if not self.external_filename: assert self.id is not None, "ID must be set before filename used (commit the object)" # First try filename directly under file_path filename = os.path.join( self.file_path, "dataset_%d.dat" % self.id ) # Only use that filename if it already exists (backward compatibility), # otherwise construct hashed path if not os.path.exists( filename ): dir = os.path.join( self.file_path, *directory_hash_id( self.id ) ) # Create directory if it does not exist try: os.makedirs( dir ) except OSError as e: # File Exists is okay, otherwise reraise if e.errno != errno.EEXIST: raise # Return filename inside hashed directory return os.path.abspath( os.path.join( dir, "dataset_%d.dat" % self.id ) ) else: filename = self.external_filename # Make filename absolute return os.path.abspath( filename ) def set_file_name( self, filename ): if not filename: self.external_filename = None else: self.external_filename = filename file_name = property( get_file_name, set_file_name ) @property def extra_files_path( self ): if self._extra_files_path: path = self._extra_files_path else: path = os.path.join( self.file_path, "dataset_%d_files" % self.id ) # only use path directly under self.file_path if it exists if not os.path.exists( path ): path = os.path.join( os.path.join( self.file_path, *directory_hash_id( self.id ) ), "dataset_%d_files" % self.id ) # Make path absolute return os.path.abspath( path ) def get_size( self ): """Returns the size of the data on disk""" if self.file_size: return self.file_size else: try: return os.path.getsize( self.file_name ) except OSError: return 0 def set_size( self ): """Returns the size of the data on disk""" try: if not self.file_size: self.file_size = os.path.getsize( self.file_name ) except OSError: self.file_size = 0 def has_data( self ): """Detects whether there is any data""" return self.get_size() > 0 def mark_deleted( self, include_children=True ): self.deleted = True # FIXME: sqlalchemy will replace this def _delete(self): """Remove the file that corresponds to this data""" try: os.remove(self.data.file_name) except OSError as e: log.critical('%s delete error %s' % (self.__class__.__name__, e))
def __init__(self, test_directory): self.config = Bunch(tool_data_path=test_directory)
""" Model objects for docker objects """ from __future__ import absolute_import import logging try: import docker except ImportError: from galaxy.util.bunch import Bunch docker = Bunch(errors=Bunch(NotFound=None)) from galaxy.containers import (Container, ContainerPort, ContainerVolume) from galaxy.util import ( pretty_print_time_interval, unicodify, ) CPUS_LABEL = '_galaxy_cpus' IMAGE_LABEL = '_galaxy_image' CPUS_CONSTRAINT = 'node.labels.' + CPUS_LABEL IMAGE_CONSTRAINT = 'node.labels.' + IMAGE_LABEL log = logging.getLogger(__name__) class DockerAttributeContainer(object): def __init__(self, members=None): if members is None: members = set()
import sys sys.path.insert(1,'/galaxy-central') sys.path.insert(1,'/galaxy-central/lib') from scripts.db_shell import * from galaxy.util.bunch import Bunch from galaxy.security import GalaxyRBACAgent from sqlalchemy.orm import sessionmaker from sqlalchemy import * import argparse bunch = Bunch( **globals() ) engine = create_engine('postgres://*****:*****@localhost:5432/galaxy') bunch.session = sessionmaker(bind=engine) # For backward compatibility with "model.context.current" bunch.context = sessionmaker(bind=engine) security_agent = GalaxyRBACAgent( bunch ) security_agent.sa_session = sa_session def add_user(email, password, key=None): """ Add Galaxy User. From John https://gist.github.com/jmchilton/4475646 """ query = sa_session.query( User ).filter_by( email=email ) if query.count() > 0: return query.first() else: User.use_pbkdf2 = False user = User(email)
def __main__(): #Parse Command Line parser = optparse.OptionParser() parser.add_option( '', '--ref_name', dest='ref_name', help='The reference name to change all output matches to' ) parser.add_option( '', '--ref_source', dest='ref_source', help='Whether the reference is cached or from the history' ) parser.add_option( '', '--ref_sequences', dest='ref_sequences', help='Number of sequences in the reference dataset' ) parser.add_option( '', '--source_select', dest='source_select', help='Whether to used pre-set or cached reference file' ) parser.add_option( '', '--input1', dest='input1', help='The name of the reference file if using history or reference base name if using cached' ) parser.add_option( '', '--input2', dest='input2', help='The reads file to align' ) parser.add_option( '', '--pre_set_options', dest='pre_set_options', help='Which of the pre set options to use, if using pre-sets' ) parser.add_option( '', '--strand', dest='strand', help='Which strand of the read to search, if specifying all parameters' ) parser.add_option( '', '--seed', dest='seed', help='Seeding settings, if specifying all parameters' ) parser.add_option( '', '--transition', dest='transition', help='Number of transitions to allow in each seed hit, if specifying all parameters' ) parser.add_option( '', '--gfextend', dest='gfextend', help='Whether to perform gap-free extension of seed hits to HSPs (high scoring segment pairs), if specifying all parameters' ) parser.add_option( '', '--chain', dest='chain', help='Whether to perform chaining of HSPs, if specifying all parameters' ) parser.add_option( '', '--O', dest='O', help='Gap opening penalty, if specifying all parameters' ) parser.add_option( '', '--E', dest='E', help='Gap extension penalty, if specifying all parameters' ) parser.add_option( '', '--X', dest='X', help='X-drop threshold, if specifying all parameters' ) parser.add_option( '', '--Y', dest='Y', help='Y-drop threshold, if specifying all parameters' ) parser.add_option( '', '--K', dest='K', help='Threshold for HSPs, if specifying all parameters' ) parser.add_option( '', '--L', dest='L', help='Threshold for gapped alignments, if specifying all parameters' ) parser.add_option( '', '--entropy', dest='entropy', help='Whether to involve entropy when filtering HSPs, if specifying all parameters' ) parser.add_option( '', '--identity_min', dest='identity_min', help="Minimum identity (don't report matches under this identity)" ) parser.add_option( '', '--identity_max', dest='identity_max', help="Maximum identity (don't report matches above this identity)" ) parser.add_option( '', '--coverage', dest='coverage', help="The minimum coverage value (don't report matches covering less than this)" ) parser.add_option( '', '--unmask', dest='unmask', help='Whether to convert lowercase bases to uppercase' ) parser.add_option( '', '--out_format', dest='format', help='The format of the output file (sam, diffs, or tabular (general))' ) parser.add_option( '', '--output', dest='output', help='The output file' ) parser.add_option( '', '--lastzSeqsFileDir', dest='lastzSeqsFileDir', help='Directory of local lastz_seqs.loc file' ) ( options, args ) = parser.parse_args() # output version # of tool try: tmp = tempfile.NamedTemporaryFile().name tmp_stdout = open( tmp, 'wb' ) proc = subprocess.Popen( args='lastz -v', shell=True, stdout=tmp_stdout ) tmp_stdout.close() returncode = proc.wait() stdout = None for line in open( tmp_stdout.name, 'rb' ): if line.lower().find( 'version' ) >= 0: stdout = line.strip() break if stdout: sys.stdout.write( '%s\n' % stdout ) else: raise Exception except: sys.stdout.write( 'Could not determine Lastz version\n' ) if options.unmask == 'yes': unmask = '[unmask]' else: unmask = '' if options.ref_name: ref_name = '[nickname=%s]' % options.ref_name else: ref_name = '' # Prepare for commonly-used preset options if options.source_select == 'pre_set': set_options = '--%s' % options.pre_set_options # Prepare for user-specified options else: set_options = '--%s --%s --gapped --strand=%s --seed=%s --%s O=%s E=%s X=%s Y=%s K=%s L=%s --%s' % \ ( options.gfextend, options.chain, options.strand, options.seed, options.transition, options.O, options.E, options.X, options.Y, options.K, options.L, options.entropy ) # Specify input2 and add [fullnames] modifier if output format is diffs if options.format == 'diffs': input2 = '%s[fullnames]' % options.input2 else: input2 = options.input2 if options.format == 'tabular': # Change output format to general if it's tabular and add field names for tabular output format = 'general-' tabular_fields = ':score,name1,strand1,size1,start1,zstart1,end1,length1,text1,name2,strand2,size2,start2,zstart2,end2,start2+,zstart2+,end2+,length2,text2,diff,cigar,identity,coverage,gaprate,diagonal,shingle' elif options.format == 'sam': # We currently ALWAYS suppress SAM headers. format = 'sam-' tabular_fields = '' else: format = options.format tabular_fields = '' # Set up our queues lastz_job_queue = LastzJobQueue( WORKERS, slots=SLOTS ) combine_data_queue = CombineDataQueue( options.output ) if options.ref_source == 'history': # Reference is a fasta dataset from the history, so split job across # the number of sequences in the dataset ( this could be a HUGE number ) try: # Ensure there is at least 1 sequence in the dataset ( this may not be necessary ). error_msg = "The reference dataset is missing metadata, click the pencil icon in the history item and 'auto-detect' the metadata attributes." ref_sequences = int( options.ref_sequences ) if ref_sequences < 1: stop_queues( lastz_job_queue, combine_data_queue ) stop_err( error_msg ) except: stop_queues( lastz_job_queue, combine_data_queue ) stop_err( error_msg ) seqs = 0 fasta_reader = FastaReader( open( options.input1 ) ) while True: # Read the next sequence from the reference dataset seq = fasta_reader.next() if not seq: break seqs += 1 # Create a temporary file to contain the current sequence as input to lastz tmp_in_fd, tmp_in_name = tempfile.mkstemp( suffix='.in' ) tmp_in = os.fdopen( tmp_in_fd, 'wb' ) # Write the current sequence to the temporary input file tmp_in.write( '>%s\n%s\n' % ( seq.name, seq.text ) ) tmp_in.close() # Create a 2nd temporary file to contain the output from lastz execution on the current sequence tmp_out_fd, tmp_out_name = tempfile.mkstemp( suffix='.out' ) os.close( tmp_out_fd ) # Generate the command line for calling lastz on the current sequence command = 'lastz %s%s%s %s %s --ambiguousn --nolaj --identity=%s..%s --coverage=%s --format=%s%s > %s' % \ ( tmp_in_name, unmask, ref_name, input2, set_options, options.identity_min, options.identity_max, options.coverage, format, tabular_fields, tmp_out_name ) # Create a job object job = Bunch() job.command = command job.output = tmp_out_name job.cleanup = [ tmp_in_name, tmp_out_name ] job.combine_data_queue = combine_data_queue # Add another job to the lastz_job_queue. Execution # will wait at this point if the queue is full. lastz_job_queue.put( job, block=True ) # Make sure the value of sequences in the metadata is the same as the # number of sequences read from the dataset ( this may not be necessary ). if ref_sequences != seqs: stop_queues( lastz_job_queue, combine_data_queue ) stop_err( "The value of metadata.sequences (%d) differs from the number of sequences read from the reference (%d)." % ( ref_sequences, seqs ) ) else: # Reference is a locally cached 2bit file, split job across number of chroms in 2bit file tbf = TwoBitFile( open( options.input1, 'r' ) ) for chrom in tbf.keys(): # Create a temporary file to contain the output from lastz execution on the current chrom tmp_out_fd, tmp_out_name = tempfile.mkstemp( suffix='.out' ) os.close( tmp_out_fd ) command = 'lastz %s/%s%s%s %s %s --ambiguousn --nolaj --identity=%s..%s --coverage=%s --format=%s%s >> %s' % \ ( options.input1, chrom, unmask, ref_name, input2, set_options, options.identity_min, options.identity_max, options.coverage, format, tabular_fields, tmp_out_name ) # Create a job object job = Bunch() job.command = command job.output = tmp_out_name job.cleanup = [ tmp_out_name ] job.combine_data_queue = combine_data_queue # Add another job to the lastz_job_queue. Execution # will wait at this point if the queue is full. lastz_job_queue.put( job, block=True ) # Stop the lastz_job_queue for t in lastz_job_queue.threads: lastz_job_queue.put( STOP_SIGNAL, True ) # Although all jobs are submitted to the queue, we can't shut down the combine_data_queue # until we know that all jobs have been submitted to its queue. We do this by checking # whether all of the threads in the lastz_job_queue have terminated. while threading.activeCount() > 2: time.sleep( 1 ) # Now it's safe to stop the combine_data_queue combine_data_queue.put( STOP_SIGNAL )