def handle_proxy(proxy_string, client_id, config): """If ARC-enabled server: store a proxy certificate. Arguments: proxy_string - text extracted from given upload client_id - DN for user just being created config - global configuration """ output = [] client_dir = client_id_dir(client_id) proxy_dir = os.path.join(config.user_home, client_dir) proxy_path = os.path.join(config.user_home, client_dir, arcwrapper.Ui.proxy_name) if not config.arc_clusters: output.append({'object_type': 'error_text', 'text': 'No ARC support!'}) return output # store the file try: write_file(proxy_string, proxy_path, config.logger) os.chmod(proxy_path, 0o600) except Exception as exc: output.append({'object_type': 'error_text', 'text': 'Proxy file could not be written (%s)!' % str(exc).replace(proxy_dir, '')}) return output # provide information about the uploaded proxy try: session_ui = arcwrapper.Ui(proxy_dir) proxy = session_ui.getProxy() if proxy.IsExpired(): # can rarely happen, constructor will throw exception output.append({'object_type': 'warning', 'text': 'Proxy certificate is expired.'}) else: output.append({'object_type': 'text', 'text': 'Proxy for %s' % proxy.GetIdentitySN()}) output.append({'object_type': 'text', 'text': 'Proxy certificate will expire on %s (in %s sec.)' % (proxy.Expires(), proxy.getTimeleft())}) except arcwrapper.NoProxyError as err: output.append({'object_type': 'warning', 'text': 'No proxy certificate to load: %s' % err.what()}) return output
def main(client_id, user_arguments_dict): """Main function used by front end""" (configuration, logger, output_objects, op_name) = \ initialize_main_variables(client_id) client_dir = client_id_dir(client_id) defaults = signature()[1] (validate_status, accepted) = validate_input_and_cert( user_arguments_dict, defaults, output_objects, client_id, configuration, allow_rejects=False, # NOTE: path can use wildcards, dst and current_dir cannot typecheck_overrides={'path': valid_path_pattern}, ) if not validate_status: return (accepted, returnvalues.CLIENT_ERROR) flags = ''.join(accepted['flags']) pattern_list = accepted['path'] dst = accepted['dst'][-1] current_dir = accepted['current_dir'][-1].lstrip(os.sep) # All paths are relative to current_dir pattern_list = [os.path.join(current_dir, i) for i in pattern_list] if dst: dst = os.path.join(current_dir, dst) # Please note that base_dir must end in slash to avoid access to other # user dirs when own name is a prefix of another user name base_dir = os.path.abspath( os.path.join(configuration.user_home, client_dir)) + os.sep status = returnvalues.OK if verbose(flags): for flag in flags: output_objects.append({ 'object_type': 'text', 'text': '%s using flag: %s' % (op_name, flag) }) # IMPORTANT: path must be expanded to abs for proper chrooting abs_dir = os.path.abspath( os.path.join(base_dir, current_dir.lstrip(os.sep))) if not valid_user_path(configuration, abs_dir, base_dir, True): output_objects.append({ 'object_type': 'error_text', 'text': "You're not allowed to work in %s!" % current_dir }) logger.warning('%s tried to %s restricted path %s ! (%s)' % (client_id, op_name, abs_dir, current_dir)) return (output_objects, returnvalues.CLIENT_ERROR) if verbose(flags): output_objects.append({ 'object_type': 'text', 'text': "working in %s" % current_dir }) if dst: if not safe_handler(configuration, 'post', op_name, client_id, get_csrf_limit(configuration), accepted): output_objects.append({ 'object_type': 'error_text', 'text': '''Only accepting CSRF-filtered POST requests to prevent unintended updates''' }) return (output_objects, returnvalues.CLIENT_ERROR) # NOTE: dst already incorporates current_dir prefix here # IMPORTANT: path must be expanded to abs for proper chrooting abs_dest = os.path.abspath(os.path.join(base_dir, dst)) logger.info('chksum in %s' % abs_dest) # Don't use abs_path in output as it may expose underlying # fs layout. relative_dest = abs_dest.replace(base_dir, '') if not valid_user_path(configuration, abs_dest, base_dir, True): output_objects.append({ 'object_type': 'error_text', 'text': "Invalid path! (%s expands to an illegal path)" % dst }) logger.warning('%s tried to %s restricted path %s !(%s)' % (client_id, op_name, abs_dest, dst)) return (output_objects, returnvalues.CLIENT_ERROR) if not check_write_access(abs_dest, parent_dir=True): logger.warning('%s called without write access: %s' % (op_name, abs_dest)) output_objects.append({ 'object_type': 'error_text', 'text': 'cannot checksum to "%s": inside a read-only location!' % relative_dest }) return (output_objects, returnvalues.CLIENT_ERROR) all_lines = [] for pattern in pattern_list: # Check directory traversal attempts before actual handling to avoid # leaking information about file system layout while allowing # consistent error messages unfiltered_match = glob.glob(base_dir + pattern) match = [] for server_path in unfiltered_match: # IMPORTANT: path must be expanded to abs for proper chrooting abs_path = os.path.abspath(server_path) if not valid_user_path(configuration, abs_path, base_dir, True): # out of bounds - save user warning for later to allow # partial match: # ../*/* is technically allowed to match own files. logger.warning('%s tried to %s restricted path %s ! (%s)' % (client_id, op_name, abs_path, pattern)) continue match.append(abs_path) # Now actually treat list of allowed matchings and notify if no # (allowed) match if not match: output_objects.append({ 'object_type': 'file_not_found', 'name': pattern }) status = returnvalues.FILE_NOT_FOUND # NOTE: we produce output matching an invocation of: # du -aL --apparent-size --block-size=1 PATH [PATH ...] filedus = [] summarize_output = summarize(flags) for abs_path in match: if invisible_path(abs_path): continue relative_path = abs_path.replace(base_dir, '') # cache accumulated sub dir sizes - du sums into parent dir size dir_sizes = {} try: # Assume a directory to walk for (root, dirs, files) in walk(abs_path, topdown=False, followlinks=True): if invisible_path(root): continue dir_bytes = 0 for name in files: real_file = os.path.join(root, name) if invisible_path(real_file): continue relative_file = real_file.replace(base_dir, '') size = os.path.getsize(real_file) dir_bytes += size if not summarize_output: filedus.append({ 'object_type': 'filedu', 'name': relative_file, 'bytes': size }) for name in dirs: real_dir = os.path.join(root, name) if invisible_path(real_dir): continue dir_bytes += dir_sizes[real_dir] relative_root = root.replace(base_dir, '') dir_bytes += os.path.getsize(root) dir_sizes[root] = dir_bytes if root == abs_path or not summarize_output: filedus.append({ 'object_type': 'filedu', 'name': relative_root, 'bytes': dir_bytes }) if os.path.isfile(abs_path): # Fall back to plain file where walk is empty size = os.path.getsize(abs_path) filedus.append({ 'object_type': 'filedu', 'name': relative_path, 'bytes': size }) except Exception as exc: output_objects.append({ 'object_type': 'error_text', 'text': "%s: '%s': %s" % (op_name, relative_path, exc) }) logger.error("%s: failed on '%s': %s" % (op_name, relative_path, exc)) status = returnvalues.SYSTEM_ERROR continue if dst: all_lines += [ '%(bytes)d\t\t%(name)s\n' % entry for entry in filedus ] else: output_objects.append({ 'object_type': 'filedus', 'filedus': filedus }) if dst and not write_file(''.join(all_lines), abs_dest, logger): output_objects.append({ 'object_type': 'error_text', 'text': "failed to write disk use to %s" % relative_dest }) logger.error("writing disk use to %s for %s failed" % (abs_dest, client_id)) status = returnvalues.SYSTEM_ERROR return (output_objects, status)
def main(client_id, user_arguments_dict): """Main function used by front end""" (configuration, logger, output_objects, op_name) = \ initialize_main_variables(client_id) client_dir = client_id_dir(client_id) defaults = signature()[1] (validate_status, accepted) = validate_input_and_cert( user_arguments_dict, defaults, output_objects, client_id, configuration, allow_rejects=False, # NOTE: path can use wildcards, dst and current_dir cannot typecheck_overrides={'path': valid_path_pattern}, ) if not validate_status: return (accepted, returnvalues.CLIENT_ERROR) flags = ''.join(accepted['flags']) algo_list = accepted['hash_algo'] max_chunks = int(accepted['max_chunks'][-1]) pattern_list = accepted['path'] dst = accepted['dst'][-1] current_dir = accepted['current_dir'][-1].lstrip(os.sep) # All paths are relative to current_dir pattern_list = [os.path.join(current_dir, i) for i in pattern_list] if dst: dst = os.path.join(current_dir, dst) # Please note that base_dir must end in slash to avoid access to other # user dirs when own name is a prefix of another user name base_dir = os.path.abspath( os.path.join(configuration.user_home, client_dir)) + os.sep status = returnvalues.OK if verbose(flags): for flag in flags: output_objects.append({ 'object_type': 'text', 'text': '%s using flag: %s' % (op_name, flag) }) # IMPORTANT: path must be expanded to abs for proper chrooting abs_dir = os.path.abspath( os.path.join(base_dir, current_dir.lstrip(os.sep))) if not valid_user_path(configuration, abs_dir, base_dir, True): output_objects.append({ 'object_type': 'error_text', 'text': "You're not allowed to work in %s!" % current_dir }) logger.warning('%s tried to %s restricted path %s ! (%s)' % (client_id, op_name, abs_dir, current_dir)) return (output_objects, returnvalues.CLIENT_ERROR) if verbose(flags): output_objects.append({ 'object_type': 'text', 'text': "working in %s" % current_dir }) if dst: if not safe_handler(configuration, 'post', op_name, client_id, get_csrf_limit(configuration), accepted): output_objects.append({ 'object_type': 'error_text', 'text': '''Only accepting CSRF-filtered POST requests to prevent unintended updates''' }) return (output_objects, returnvalues.CLIENT_ERROR) # NOTE: dst already incorporates current_dir prefix here # IMPORTANT: path must be expanded to abs for proper chrooting abs_dest = os.path.abspath(os.path.join(base_dir, dst)) logger.info('chksum in %s' % abs_dest) # Don't use abs_path in output as it may expose underlying # fs layout. relative_dest = abs_dest.replace(base_dir, '') if not valid_user_path(configuration, abs_dest, base_dir, True): output_objects.append({ 'object_type': 'error_text', 'text': "Invalid path! (%s expands to an illegal path)" % dst }) logger.warning('%s tried to %s restricted path %s !(%s)' % (client_id, op_name, abs_dest, dst)) return (output_objects, returnvalues.CLIENT_ERROR) if not check_write_access(abs_dest, parent_dir=True): logger.warning('%s called without write access: %s' % (op_name, abs_dest)) output_objects.append({ 'object_type': 'error_text', 'text': 'cannot checksum to "%s": inside a read-only location!' % relative_dest }) return (output_objects, returnvalues.CLIENT_ERROR) all_lines = [] for pattern in pattern_list: # Check directory traversal attempts before actual handling to avoid # leaking information about file system layout while allowing # consistent error messages unfiltered_match = glob.glob(base_dir + pattern) match = [] for server_path in unfiltered_match: # IMPORTANT: path must be expanded to abs for proper chrooting abs_path = os.path.abspath(server_path) if not valid_user_path(configuration, abs_path, base_dir, True): # out of bounds - save user warning for later to allow # partial match: # ../*/* is technically allowed to match own files. logger.warning('%s tried to %s restricted path %s ! (%s)' % (client_id, op_name, abs_path, pattern)) continue match.append(abs_path) # Now actually treat list of allowed matchings and notify if no # (allowed) match if not match: output_objects.append({ 'object_type': 'file_not_found', 'name': pattern }) status = returnvalues.FILE_NOT_FOUND for abs_path in match: relative_path = abs_path.replace(base_dir, '') output_lines = [] for hash_algo in algo_list: try: chksum_helper = _algo_map.get(hash_algo, _algo_map["md5"]) checksum = chksum_helper(abs_path, max_chunks=max_chunks) line = "%s %s\n" % (checksum, relative_path) logger.info("%s %s of %s: %s" % (op_name, hash_algo, abs_path, checksum)) output_lines.append(line) except Exception as exc: output_objects.append({ 'object_type': 'error_text', 'text': "%s: '%s': %s" % (op_name, relative_path, exc) }) logger.error("%s: failed on '%s': %s" % (op_name, relative_path, exc)) status = returnvalues.SYSTEM_ERROR continue entry = {'object_type': 'file_output', 'lines': output_lines} output_objects.append(entry) all_lines += output_lines if dst and not write_file(''.join(all_lines), abs_dest, logger): output_objects.append({ 'object_type': 'error_text', 'text': "failed to write checksums to %s" % relative_dest }) logger.error("writing checksums to %s for %s failed" % (abs_dest, client_id)) status = returnvalues.SYSTEM_ERROR return (output_objects, status)
def gen_job_script( job_dictionary, resource_config, configuration, localjobname, path_without_extension, client_dir, exe, logger, ): """Generate job script from job_dictionary before handout to resource""" script_language = resource_config['SCRIPTLANGUAGE'] if not script_language in configuration.scriptlanguages: print('Unknown script language! (conflict with scriptlanguages in ' + 'configuration?) %s not in %s' % (script_language, configuration.scriptlanguages)) return False if script_language == 'python': generator = genjobscriptpython.GenJobScriptPython( job_dictionary, resource_config, exe, configuration.migserver_https_sid_url, localjobname, path_without_extension, ) elif script_language == 'sh': generator = genjobscriptsh.GenJobScriptSh( job_dictionary, resource_config, exe, configuration.migserver_https_sid_url, localjobname, path_without_extension, ) elif script_language == 'java': generator = genjobscriptjava.GenJobScriptJava( job_dictionary, resource_config, configuration.migserver_https_sid_url, localjobname, path_without_extension) else: print('Unknown script language! (is in configuration but not in ' + 'jobscriptgenerator) %s ' % script_language) return False # String concatenation in python: [X].join is much faster # than repeated use of s += strings getinputfiles_array = [] getinputfiles_array.append(generator.script_init()) getinputfiles_array.append(generator.comment('print start')) getinputfiles_array.append(generator.print_start('get input files')) getinputfiles_array.append(generator.comment('init log')) getinputfiles_array.append(generator.init_io_log()) getinputfiles_array.append(generator.comment('get special inputfiles')) getinputfiles_array.append( generator.get_special_input_files('get_special_status')) getinputfiles_array.append( generator.log_io_status('get_special_input_files', 'get_special_status')) getinputfiles_array.append( generator.print_on_error('get_special_status', '0', 'failed to fetch special input files!')) getinputfiles_array.append(generator.comment('get input files')) getinputfiles_array.append(generator.get_input_files('get_input_status')) getinputfiles_array.append( generator.log_io_status('get_input_files', 'get_input_status')) getinputfiles_array.append( generator.print_on_error('get_input_status', '0', 'failed to fetch input files!')) getinputfiles_array.append(generator.comment('get executables')) getinputfiles_array.append( generator.get_executables('get_executables_status')) getinputfiles_array.append( generator.log_io_status('get_executables', 'get_executables_status')) getinputfiles_array.append( generator.print_on_error('get_executables_status', '0', 'failed to fetch executable files!')) # client_dir equals empty_job_name for sleep jobs getinputfiles_array.append( generator.generate_output_filelists( client_dir != configuration.empty_job_name, 'generate_output_filelists')) getinputfiles_array.append( generator.print_on_error('generate_output_filelists', '0', 'failed to generate output filelists!')) getinputfiles_array.append( generator.generate_input_filelist('generate_input_filelist')) getinputfiles_array.append( generator.print_on_error('generate_input_filelist', '0', 'failed to generate input filelist!')) getinputfiles_array.append( generator.generate_iosessionid_file('generate_iosessionid_file')) getinputfiles_array.append( generator.print_on_error('generate_iosessionid_file', '0', 'failed to generate iosessionid file!')) getinputfiles_array.append( generator.total_status([ 'get_special_status', 'get_input_status', 'get_executables_status', 'generate_output_filelists' ], 'total_status')) getinputfiles_array.append( generator.exit_on_error('total_status', '0', 'total_status')) getinputfiles_array.append(generator.comment('exit script')) getinputfiles_array.append(generator.exit_script('0', 'get input files')) job_array = [] job_array.append(generator.script_init()) job_array.append(generator.set_core_environments()) job_array.append(generator.print_start('job')) job_array.append(generator.comment('TODO: switch to job directory here')) job_array.append(generator.comment('make sure job status files exist')) job_array.append( generator.create_files([ job_dictionary['JOB_ID'] + '.stdout', job_dictionary['JOB_ID'] + '.stderr', job_dictionary['JOB_ID'] + '.status' ])) job_array.append(generator.init_status()) job_array.append(generator.comment('chmod +x')) job_array.append(generator.chmod_executables('chmod_status')) job_array.append( generator.print_on_error( 'chmod_status', '0', 'failed to make one or more EXECUTABLES executable')) job_array.append( generator.log_on_error('chmod_status', '0', 'system: chmod')) job_array.append(generator.comment('set environments')) job_array.append(generator.set_environments('env_status')) job_array.append( generator.print_on_error( 'env_status', '0', 'failed to initialize one or more ENVIRONMENTs')) job_array.append( generator.log_on_error('env_status', '0', 'system: set environments')) job_array.append(generator.comment('set runtimeenvironments')) job_array.append( generator.set_runtime_environments( resource_config['RUNTIMEENVIRONMENT'], 're_status')) job_array.append( generator.print_on_error( 're_status', '0', 'failed to initialize one or more RUNTIMEENVIRONMENTs')) job_array.append( generator.log_on_error('re_status', '0', 'system: set RUNTIMEENVIRONMENTs')) job_array.append(generator.comment('enforce some basic job limits')) job_array.append(generator.set_limits()) if job_dictionary.get('MOUNT', []) != []: job_array.append( generator.generate_mountsshprivatekey_file( 'generate_mountsshprivatekey_file')) job_array.append( generator.print_on_error( 'generate_mountsshprivatekey_file', '0', 'failed to generate mountsshprivatekey file!')) job_array.append( generator.generate_mountsshknownhosts_file( 'generate_mountsshknownhosts_file')) job_array.append( generator.print_on_error( 'generate_mountsshknownhosts_file', '0', 'failed to generate mountsshknownhosts file!')) job_array.append(generator.comment('Mount job home')) # Use best available sftp implementation - configuration picks it sftp_address = configuration.user_sftp_show_address sftp_port = configuration.user_sftp_show_port job_array.append( generator.mount(job_dictionary['SESSIONID'], sftp_address, sftp_port, 'mount_status')) job_array.append( generator.print_on_error('mount_status', '0', 'failed to mount job home')) job_array.append( generator.log_on_error('mount_status', '0', 'system: mount')) job_array.append(generator.comment('execute!')) job_array.append(generator.execute('EXECUTING: ', '--Exit code:')) if job_dictionary.get('MOUNT', []) != []: job_array.append(generator.comment('Unmount job home')) job_array.append(generator.umount('umount_status')) job_array.append( generator.print_on_error('umount_status', '0', 'failded to umount job home')) job_array.append( generator.log_on_error('umount_status', '0', 'system: umount')) job_array.append(generator.comment('exit script')) job_array.append(generator.exit_script('0', 'job')) getupdatefiles_array = [] # We need to make sure that curl failures lead to retry while # missing output (from say a failed job) is logged but # ignored in relation to getupdatefiles success. getupdatefiles_array.append(generator.print_start('get update files')) getupdatefiles_array.append(generator.init_io_log()) getupdatefiles_array.append(generator.comment('get io files')) getupdatefiles_array.append(generator.get_io_files('get_io_status')) getupdatefiles_array.append( generator.log_io_status('get_io_files', 'get_io_status')) getupdatefiles_array.append( generator.print_on_error('get_io_status', '0', 'failed to get one or more IO files')) getupdatefiles_array.append( generator.exit_on_error('get_io_status', '0', 'get_io_status')) getupdatefiles_array.append(generator.comment('exit script')) getupdatefiles_array.append(generator.exit_script('0', 'get update files')) sendoutputfiles_array = [] # We need to make sure that curl failures lead to retry while # missing output (from say a failed job) is logged but # ignored in relation to sendoutputfiles success. sendoutputfiles_array.append(generator.print_start('send output files')) sendoutputfiles_array.append(generator.init_io_log()) sendoutputfiles_array.append(generator.comment('check output files')) sendoutputfiles_array.append( generator.output_files_missing('missing_counter')) sendoutputfiles_array.append( generator.log_io_status('output_files_missing', 'missing_counter')) sendoutputfiles_array.append( generator.print_on_error('missing_counter', '0', 'missing output files')) sendoutputfiles_array.append(generator.comment('send output files')) sendoutputfiles_array.append( generator.send_output_files('send_output_status')) sendoutputfiles_array.append( generator.log_io_status('send_output_files', 'send_output_status')) sendoutputfiles_array.append( generator.print_on_error('send_output_status', '0', 'failed to send one or more outputfiles')) sendoutputfiles_array.append( generator.exit_on_error('send_output_status', '0', 'send_output_status')) sendoutputfiles_array.append(generator.comment('send io files')) sendoutputfiles_array.append(generator.send_io_files('send_io_status')) sendoutputfiles_array.append( generator.log_io_status('send_io_files', 'send_io_status')) sendoutputfiles_array.append( generator.print_on_error('send_io_status', '0', 'failed to send one or more IO files')) sendoutputfiles_array.append( generator.exit_on_error('send_io_status', '0', 'send_io_status')) sendoutputfiles_array.append(generator.comment('send status files')) sendoutputfiles_array.append( generator.send_status_files([job_dictionary['JOB_ID'] + '.io-status'], 'send_io_status_status')) sendoutputfiles_array.append( generator.print_on_error('send_io_status_status', '0', 'failed to send io-status file')) sendoutputfiles_array.append( generator.exit_on_error('send_io_status_status', '0', 'send_io_status_status')) # Please note that .status upload marks the end of the # session and thus it must be the last uploaded file. sendoutputfiles_array.append( generator.send_status_files([job_dictionary['JOB_ID'] + '.status'], 'send_status_status')) sendoutputfiles_array.append( generator.print_on_error('send_status_status', '0', 'failed to send status file')) sendoutputfiles_array.append( generator.exit_on_error('send_status_status', '0', 'send_status_status')) # Note that ID.sendouputfiles is called from frontend_script # so exit on failure can be handled there. sendoutputfiles_array.append(generator.comment('exit script')) sendoutputfiles_array.append( generator.exit_script('0', 'send output files')) sendupdatefiles_array = [] # We need to make sure that curl failures lead to retry while # missing output (from say a failed job) is logged but # ignored in relation to sendupdatefiles success. sendupdatefiles_array.append(generator.print_start('send update files')) sendupdatefiles_array.append(generator.init_io_log()) sendupdatefiles_array.append(generator.comment('send io files')) sendupdatefiles_array.append(generator.send_io_files('send_io_status')) sendupdatefiles_array.append( generator.log_io_status('send_io_files', 'send_io_status')) sendupdatefiles_array.append( generator.print_on_error('send_io_status', '0', 'failed to send one or more IO files')) sendupdatefiles_array.append( generator.exit_on_error('send_io_status', '0', 'send_io_status')) sendupdatefiles_array.append(generator.comment('exit script')) sendupdatefiles_array.append( generator.exit_script('0', 'send update files')) jobsshpubkey_array = [] # Save session pub key in SESSIONID.authorized_keys for openssh+subsys use. # That is not needed with grid_sftp where we parse job for key. if configuration.site_enable_sftp_subsys and \ job_dictionary['MOUNTSSHPUBLICKEY']: # Restrict to access from frontend proxy / resource FQDN res_fqdn = resource_config.get('FRONTENDPROXY', '') if not res_fqdn: res_fqdn = resource_config.get('HOSTURL', '') allow_from = '%s' % res_fqdn try: user_ip = socket.gethostbyname_ex(res_fqdn)[2][0] allow_from += ',%s' % user_ip except Exception as exc: user_ip = None logger.warning("Skipping ip in 'from' on job mount key: %s" % exc) # Always minimize key access with all restrictions and source address # NOTE: 'restrict' keyword is only available in new ssh installations # we manually build the corresponding string for now. #restrict_opts = 'restrict' restrict_opts = 'no-agent-forwarding,no-port-forwarding,no-pty,' restrict_opts += 'no-user-rc,no-X11-forwarding' restrictions = 'from="%s",%s' % (allow_from, restrict_opts) pub_key = '%(MOUNTSSHPUBLICKEY)s' % job_dictionary jobsshpubkey_array.append('%s %s\n' % (restrictions, pub_key)) # clean up must be done with SSH (when the .status file # has been uploaded): Job script can't safely/reliably clean up # after itself because of possible user interference. if 'JOBTYPE' in job_dictionary and \ job_dictionary['JOBTYPE'].lower() == 'interactive': # interactive jobs have a .job file just containing a curl # call to the MiG servers cgi-sid/requestinteractivejob # and the usual .job is instead called .interactivejob and # is SCP'ed and started by SSH in the requestinteractive.py # script logger.error('jobtype: interactive') interactivejobfile = generator.script_init() + '\n' + \ generator.request_interactive() + '\n' + \ generator.exit_script('0', 'interactive job') # write the small file containing the requestinteractivejob.py # call as .job write_file( interactivejobfile, configuration.mig_system_files + job_dictionary['JOB_ID'] + '.job', logger) # write the usual .job file as .interactivejob write_file( '\n'.join(job_array), configuration.mig_system_files + job_dictionary['JOB_ID'] + '.interactivejob', logger) print(interactivejobfile) else: # write files write_file( '\n'.join(job_array), configuration.mig_system_files + job_dictionary['JOB_ID'] + '.job', logger) write_file('\n'.join(getinputfiles_array), path_without_extension + '.getinputfiles', logger) write_file( '\n'.join(getupdatefiles_array), configuration.mig_system_files + job_dictionary['JOB_ID'] + '.getupdatefiles', logger) write_file( '\n'.join(sendoutputfiles_array), configuration.mig_system_files + job_dictionary['JOB_ID'] + '.sendoutputfiles', logger) write_file( '\n'.join(sendupdatefiles_array), configuration.mig_system_files + job_dictionary['JOB_ID'] + '.sendupdatefiles', logger) # Save session pub key in SID.authorized_keys file for openssh+subsys use if jobsshpubkey_array: write_file('\n'.join(jobsshpubkey_array), os.path.join( configuration.mig_system_files, 'job_mount', job_dictionary['SESSIONID'] + '.authorized_keys'), logger, umask=0o27) return True
def create_arc_job( job, configuration, logger, ): """Analog to create_job_script for ARC jobs: Creates symLinks for receiving result files, translates job dict to ARC xrsl, and stores resulting job script (xrsl + sh script) for submitting. We do _not_ create a separate job_dict with copies and SESSIONID inside, as opposed to create_job_script, all we need is the link from webserver_home / sessionID into the user's home directory ("job_output/job['JOB_ID']" is added to the result upload URLs in the translation). Returns message (ARC job ID if no error) and sessionid (None if error) """ if not configuration.arc_clusters: return (None, 'No ARC support!') if not job['JOBTYPE'] == 'arc': return (None, 'Error. This is not an ARC job') # Deep copy job for local changes job_dict = deepcopy(job) # Finally expand reserved job variables like +JOBID+ and +JOBNAME+ job_dict = expand_variables(job_dict) # ... no more changes to job_dict from here on client_id = str(job_dict['USER_CERT']) # we do not want to see empty jobs here. Test as done in create_job_script. if client_id == configuration.empty_job_name: return (None, 'Error. empty job for ARC?') # generate random session ID: sessionid = hexlify(open('/dev/urandom').read(session_id_bytes)) logger.debug('session ID (for creating links): %s' % sessionid) client_dir = client_id_dir(client_id) # make symbolic links inside webserver_home: # # we need: link to owner's dir. to receive results, # job mRSL inside sessid_to_mrsl_link_home linklist = [(configuration.user_home + client_dir, configuration.webserver_home + sessionid), (configuration.mrsl_files_dir + client_dir + '/' + str(job_dict['JOB_ID']) + '.mRSL', configuration.sessid_to_mrsl_link_home + sessionid + '.mRSL')] for (dest, loc) in linklist: make_symlink(dest, loc, logger) # the translation generates an xRSL object which specifies to execute # a shell script with script_name. If sessionid != None, results will # be uploaded to sid_redirect/sessionid/job_output/job_id try: (xrsl, script, script_name) = mrsltoxrsl.translate(job_dict, sessionid) logger.debug('translated to xRSL: %s' % xrsl) logger.debug('script:\n %s' % script) except Exception as err: # error during translation, pass a message logger.error('Error during xRSL translation: %s' % err.__str__()) return (None, err.__str__()) # we submit directly from here (the other version above does # copyFileToResource and gen_job_script generates all files) # we have to put the generated script somewhere..., and submit from there. # inputfiles are given by the user as relative paths from his home, # so we should use that location (and clean up afterwards). # write script (to user home) user_home = os.path.join(configuration.user_home, client_dir) script_path = os.path.abspath(os.path.join(user_home, script_name)) write_file(script, script_path, logger) os.chdir(user_home) try: logger.debug('submitting job to ARC') session = arcwrapper.Ui(user_home) arc_job_ids = session.submit(xrsl) # if no exception occurred, we are done: job_dict['ARCID'] = arc_job_ids[0] job_dict['SESSIONID'] = sessionid msg = 'OK' result = job_dict # when errors occurred, pass a message to the caller. except arcwrapper.ARCWrapperError as err: msg = err.what() result = None # unsuccessful except arcwrapper.NoProxyError as err: msg = 'No Proxy found: %s' % err.what() result = None # unsuccessful except Exception as err: msg = err.__str__() result = None # unsuccessful # always remove the generated script os.remove(script_name) # and remove the created links immediately if failed if not result: for (_, link) in linklist: os.remove(link) logger.error('Unsuccessful ARC job submission: %s' % msg) else: logger.debug('submitted to ARC as job %s' % msg) return (result, msg)
def handle_package_upload( real_src, relative_src, client_id, configuration, submit_mrslfiles, dst, ): """A file package was uploaded (eg. .zip file). Extract the content and submit mrsl files if submit_mrsl_files is True. """ logger = configuration.logger msg = '' status = True logger.info("handle_package_upload %s %s %s" % (real_src, relative_src, dst)) client_dir = client_id_dir(client_id) # Please note that base_dir must end in slash to avoid access to other # user dirs when own name is a prefix of another user name base_dir = os.path.abspath(os.path.join(configuration.user_home, client_dir)) + os.sep # Unpack in same directory unless real_dst is given if not dst: real_dst = os.path.abspath(os.path.dirname(real_src)) elif os.path.isabs(dst): real_dst = os.path.abspath(dst) else: real_dst = os.path.join(base_dir, dst) real_dst += os.sep mrslfiles_to_parse = [] real_src_lower = real_src.lower() if real_src_lower.endswith('.zip'): # Handle .zip file msg += "Received '%s' for unpacking. " % relative_src try: zip_object = zipfile.ZipFile(real_src, 'r', allowZip64=True) except Exception as exc: logger.error("open zip failed: %s" % exc) msg += 'Could not open zipfile: %s! ' % exc return (False, msg) logger.info("unpack entries of %s to %s" % (real_src, real_dst)) for zip_entry in zip_object.infolist(): entry_filename = force_utf8(zip_entry.filename) msg += 'Extracting: %s . ' % entry_filename # write zip_entry to disk # IMPORTANT: we must abs-expand for valid_user_path_name check # otherwise it will incorrectly fail on e.g. abc/ # dir entry in archive local_zip_entry_name = os.path.join(real_dst, entry_filename) valid_status, valid_err = valid_user_path_name( entry_filename, os.path.abspath(local_zip_entry_name), base_dir) if not valid_status: status = False msg += "Filename validation error: %s! " % valid_err continue # create sub dir(s) if missing zip_entry_dir = os.path.dirname(local_zip_entry_name) if not os.path.isdir(zip_entry_dir): msg += 'Creating dir %s . ' % entry_filename try: os.makedirs(zip_entry_dir, 0o775) except Exception as exc: logger.error("create directory failed: %s" % exc) msg += 'Error creating directory: %s! ' % exc status = False continue if os.path.isdir(local_zip_entry_name): logger.debug("nothing more to do for dir entry: %s" % local_zip_entry_name) continue try: zip_data = zip_object.read(zip_entry.filename) except Exception as exc: logger.error("read data in %s failed: %s" % (zip_entry.filename, exc)) msg += 'Error reading %s :: %s! ' % (zip_entry.filename, exc) status = False continue # TODO: can we detect and ignore symlinks? # Zip format is horribly designed/documented: # http://www.pkware.com/documents/casestudies/APPNOTE.TXT # I haven't managed to find a way to detect symlinks. Thus # they are simply created as files containing the name they # were supposed to link to: This is inconsistent but safe :-S # write file - symbolic links are written as files! (good for # security). # NB: Needs to use undecoded filename here if not write_file(zip_data, local_zip_entry_name, logger) and \ not os.path.exists(local_zip_entry_name): msg += 'Error unpacking %s to disk! ' % entry_filename status = False continue # get the size as the OS sees it try: __ = os.path.getsize(local_zip_entry_name) except Exception as exc: logger.warning("unpack may have failed: %s" % exc) msg += \ 'File %s unpacked, but could not get file size %s! '\ % (entry_filename, exc) status = False continue # Check if the extension is .mRSL if local_zip_entry_name.upper().endswith('.MRSL'): # A .mrsl file was included in the package! mrslfiles_to_parse.append(local_zip_entry_name) elif real_src_lower.endswith('.tar') or \ real_src_lower.endswith('.tar.gz') or \ real_src_lower.endswith('.tgz') or \ real_src_lower.endswith('.tar.bz2') or \ real_src_lower.endswith('.tbz'): # Handle possibly compressed .tar files if real_src_lower.endswith('.tar.gz') or \ real_src_lower.endswith('.tgz'): msg += "Received '%s' for unpacking. " % relative_src try: tar_object = tarfile.open(real_src, 'r:gz') tar_file_content = tarfile.TarFile.gzopen(real_src) except Exception as exc: logger.error("open tar gz failed: %s" % exc) msg += 'Could not open .tar.gz file: %s! ' % exc return (False, msg) elif real_src_lower.endswith('.tar.bz2') or \ real_src_lower.endswith('.tbz'): msg += "Received '%s' for unpacking. " % relative_src try: tar_object = tarfile.open(real_src, 'r:bz2') tar_file_content = tarfile.TarFile.bz2open(real_src) except Exception as exc: logger.error("open tar bz failed: %s" % exc) msg += 'Could not open .tar.bz2 file: %s! ' % exc return (False, msg) else: try: tar_object = tarfile.open(real_src, 'r') tar_file_content = tarfile.TarFile.open(real_src) except Exception as exc: logger.error("open tar failed: %s" % exc) msg += 'Could not open .tar file: %s! ' % exc return (False, msg) logger.info("unpack entries of %s to %s" % (real_src, real_dst)) for tar_entry in tar_object: entry_filename = force_utf8(tar_entry.name) msg += 'Extracting: %s . ' % entry_filename # write tar_entry to disk # IMPORTANT: we must abs-expand for valid_user_path_name check # otherwise it will incorrectly fail on e.g. abc/ # dir entry in archive local_tar_entry_name = os.path.join(real_dst, entry_filename) valid_status, valid_err = valid_user_path_name( entry_filename, os.path.abspath(local_tar_entry_name), base_dir) if not valid_status: status = False msg += "Filename validation error: %s! " % valid_err continue # Found empty dir - make sure dirname doesn't strip to parent if tar_entry.isdir(): logger.debug("empty dir %s - include in parent creation" % local_tar_entry_name) local_tar_entry_name += os.sep # create sub dir(s) if missing tar_entry_dir = os.path.dirname(local_tar_entry_name) if not os.path.isdir(tar_entry_dir): logger.debug("make tar parent dir: %s" % tar_entry_dir) msg += 'Creating dir %s . ' % entry_filename try: os.makedirs(tar_entry_dir, 0o775) except Exception as exc: logger.error("create directory failed: %s" % exc) msg += 'Error creating directory %s! ' % exc status = False continue if tar_entry.isdir(): # directory created above - nothing more to do continue elif not tar_entry.isfile(): # not a regular file - symlinks are ignored to avoid illegal # access msg += 'Skipping %s: not a regular file or directory! ' % \ entry_filename status = False continue # write file! # NB: Need to user undecoded filename here if not write_file(tar_file_content.extractfile(tar_entry).read(), local_tar_entry_name, logger): msg += 'Error unpacking file %s to disk! ' % entry_filename status = False continue # get the size as the OS sees it try: __ = os.path.getsize(local_tar_entry_name) except Exception as exc: logger.warning("file save may have failed: %s" % exc) msg += \ 'File %s unpacked, but could not get file size %s! ' % \ (entry_filename, exc) status = False continue # Check if the extension is .mRSL if local_tar_entry_name.upper().endswith('.MRSL'): # A .mrsl file was included in the package! mrslfiles_to_parse.append(local_tar_entry_name) else: logger.error("Unpack called on unsupported archive: %s" % real_src) msg += "Unknown/unsupported archive format: %s" % relative_src return (False, msg) if not status: msg = """Unpacked archive with one or more errors: %s""" % msg return (status, msg) # submit mrsl files to the parser. It should be done from within this # function to keep the right order if multiple files are created in the # html form. submitstatuslist = [] if configuration.site_enable_jobs and submit_mrslfiles: # Please note that base_dir must end in slash to avoid access to other # user dirs when own name is a prefix of another user name base_dir = \ os.path.abspath(os.path.join(configuration.user_home, client_dir)) + os.sep for mrslfile in mrslfiles_to_parse: (job_status, parse_msg, job_id) = new_job(mrslfile, client_id, configuration, False, True) relative_filename = os.sep + mrslfile.replace(base_dir, '') submitstatus = {'object_type': 'submitstatus', 'name': relative_filename} if not job_status: submitstatus['status'] = False submitstatus['job_id'] = job_id submitstatus['message'] = parse_msg else: # msg += "<h2>Failure</h2><br>\n" # msg += parse_msg # return(False, msg) submitstatus['status'] = True submitstatus['job_id'] = job_id # msg += "<h2>%s Success</h2><br>\n" % mrslfile # msg += parse_msg submitstatuslist.append(submitstatus) return (status, submitstatuslist)