示例#1
0
def handle_proxy(proxy_string, client_id, config):
    """If ARC-enabled server: store a proxy certificate.
       Arguments: proxy_string - text  extracted from given upload
                  client_id  - DN for user just being created
                  config     - global configuration
    """

    output = []
    client_dir = client_id_dir(client_id)
    proxy_dir = os.path.join(config.user_home, client_dir)
    proxy_path = os.path.join(config.user_home, client_dir,
                              arcwrapper.Ui.proxy_name)

    if not config.arc_clusters:
        output.append({'object_type': 'error_text',
                       'text': 'No ARC support!'})
        return output

    # store the file

    try:
        write_file(proxy_string, proxy_path, config.logger)
        os.chmod(proxy_path, 0o600)
    except Exception as exc:
        output.append({'object_type': 'error_text',
                       'text': 'Proxy file could not be written (%s)!'
                       % str(exc).replace(proxy_dir, '')})
        return output

    # provide information about the uploaded proxy

    try:
        session_ui = arcwrapper.Ui(proxy_dir)
        proxy = session_ui.getProxy()
        if proxy.IsExpired():

            # can rarely happen, constructor will throw exception

            output.append({'object_type': 'warning',
                           'text': 'Proxy certificate is expired.'})
        else:
            output.append({'object_type': 'text', 'text': 'Proxy for %s'
                           % proxy.GetIdentitySN()})
            output.append({'object_type': 'text',
                           'text': 'Proxy certificate will expire on %s (in %s sec.)'
                           % (proxy.Expires(), proxy.getTimeleft())})
    except arcwrapper.NoProxyError as err:

        output.append({'object_type': 'warning',
                       'text': 'No proxy certificate to load: %s'
                       % err.what()})
    return output
示例#2
0
def main(client_id, user_arguments_dict):
    """Main function used by front end"""

    (configuration, logger, output_objects, op_name) = \
        initialize_main_variables(client_id)
    client_dir = client_id_dir(client_id)
    defaults = signature()[1]
    (validate_status, accepted) = validate_input_and_cert(
        user_arguments_dict,
        defaults,
        output_objects,
        client_id,
        configuration,
        allow_rejects=False,
        # NOTE: path can use wildcards, dst and current_dir cannot
        typecheck_overrides={'path': valid_path_pattern},
    )
    if not validate_status:
        return (accepted, returnvalues.CLIENT_ERROR)

    flags = ''.join(accepted['flags'])
    pattern_list = accepted['path']
    dst = accepted['dst'][-1]
    current_dir = accepted['current_dir'][-1].lstrip(os.sep)

    # All paths are relative to current_dir

    pattern_list = [os.path.join(current_dir, i) for i in pattern_list]
    if dst:
        dst = os.path.join(current_dir, dst)

    # Please note that base_dir must end in slash to avoid access to other
    # user dirs when own name is a prefix of another user name

    base_dir = os.path.abspath(
        os.path.join(configuration.user_home, client_dir)) + os.sep

    status = returnvalues.OK

    if verbose(flags):
        for flag in flags:
            output_objects.append({
                'object_type': 'text',
                'text': '%s using flag: %s' % (op_name, flag)
            })

    # IMPORTANT: path must be expanded to abs for proper chrooting
    abs_dir = os.path.abspath(
        os.path.join(base_dir, current_dir.lstrip(os.sep)))
    if not valid_user_path(configuration, abs_dir, base_dir, True):
        output_objects.append({
            'object_type':
            'error_text',
            'text':
            "You're not allowed to work in %s!" % current_dir
        })
        logger.warning('%s tried to %s restricted path %s ! (%s)' %
                       (client_id, op_name, abs_dir, current_dir))
        return (output_objects, returnvalues.CLIENT_ERROR)

    if verbose(flags):
        output_objects.append({
            'object_type': 'text',
            'text': "working in %s" % current_dir
        })

    if dst:
        if not safe_handler(configuration, 'post', op_name, client_id,
                            get_csrf_limit(configuration), accepted):
            output_objects.append({
                'object_type':
                'error_text',
                'text':
                '''Only accepting
                CSRF-filtered POST requests to prevent unintended updates'''
            })
            return (output_objects, returnvalues.CLIENT_ERROR)

        # NOTE: dst already incorporates current_dir prefix here
        # IMPORTANT: path must be expanded to abs for proper chrooting
        abs_dest = os.path.abspath(os.path.join(base_dir, dst))
        logger.info('chksum in %s' % abs_dest)

        # Don't use abs_path in output as it may expose underlying
        # fs layout.

        relative_dest = abs_dest.replace(base_dir, '')
        if not valid_user_path(configuration, abs_dest, base_dir, True):
            output_objects.append({
                'object_type':
                'error_text',
                'text':
                "Invalid path! (%s expands to an illegal path)" % dst
            })
            logger.warning('%s tried to %s restricted path %s !(%s)' %
                           (client_id, op_name, abs_dest, dst))
            return (output_objects, returnvalues.CLIENT_ERROR)
        if not check_write_access(abs_dest, parent_dir=True):
            logger.warning('%s called without write access: %s' %
                           (op_name, abs_dest))
            output_objects.append({
                'object_type':
                'error_text',
                'text':
                'cannot checksum to "%s": inside a read-only location!' %
                relative_dest
            })
            return (output_objects, returnvalues.CLIENT_ERROR)

    all_lines = []
    for pattern in pattern_list:

        # Check directory traversal attempts before actual handling to avoid
        # leaking information about file system layout while allowing
        # consistent error messages

        unfiltered_match = glob.glob(base_dir + pattern)
        match = []
        for server_path in unfiltered_match:
            # IMPORTANT: path must be expanded to abs for proper chrooting
            abs_path = os.path.abspath(server_path)
            if not valid_user_path(configuration, abs_path, base_dir, True):

                # out of bounds - save user warning for later to allow
                # partial match:
                # ../*/* is technically allowed to match own files.

                logger.warning('%s tried to %s restricted path %s ! (%s)' %
                               (client_id, op_name, abs_path, pattern))
                continue
            match.append(abs_path)

        # Now actually treat list of allowed matchings and notify if no
        # (allowed) match

        if not match:
            output_objects.append({
                'object_type': 'file_not_found',
                'name': pattern
            })
            status = returnvalues.FILE_NOT_FOUND

        # NOTE: we produce output matching an invocation of:
        # du -aL --apparent-size --block-size=1 PATH [PATH ...]
        filedus = []
        summarize_output = summarize(flags)
        for abs_path in match:
            if invisible_path(abs_path):
                continue
            relative_path = abs_path.replace(base_dir, '')
            # cache accumulated sub dir sizes - du sums into parent dir size
            dir_sizes = {}
            try:
                # Assume a directory to walk
                for (root, dirs, files) in walk(abs_path,
                                                topdown=False,
                                                followlinks=True):
                    if invisible_path(root):
                        continue
                    dir_bytes = 0
                    for name in files:
                        real_file = os.path.join(root, name)
                        if invisible_path(real_file):
                            continue
                        relative_file = real_file.replace(base_dir, '')
                        size = os.path.getsize(real_file)
                        dir_bytes += size
                        if not summarize_output:
                            filedus.append({
                                'object_type': 'filedu',
                                'name': relative_file,
                                'bytes': size
                            })
                    for name in dirs:
                        real_dir = os.path.join(root, name)
                        if invisible_path(real_dir):
                            continue
                        dir_bytes += dir_sizes[real_dir]
                    relative_root = root.replace(base_dir, '')
                    dir_bytes += os.path.getsize(root)
                    dir_sizes[root] = dir_bytes
                    if root == abs_path or not summarize_output:
                        filedus.append({
                            'object_type': 'filedu',
                            'name': relative_root,
                            'bytes': dir_bytes
                        })
                if os.path.isfile(abs_path):
                    # Fall back to plain file where walk is empty
                    size = os.path.getsize(abs_path)
                    filedus.append({
                        'object_type': 'filedu',
                        'name': relative_path,
                        'bytes': size
                    })
            except Exception as exc:
                output_objects.append({
                    'object_type':
                    'error_text',
                    'text':
                    "%s: '%s': %s" % (op_name, relative_path, exc)
                })
                logger.error("%s: failed on '%s': %s" %
                             (op_name, relative_path, exc))
                status = returnvalues.SYSTEM_ERROR
                continue
        if dst:
            all_lines += [
                '%(bytes)d\t\t%(name)s\n' % entry for entry in filedus
            ]
        else:
            output_objects.append({
                'object_type': 'filedus',
                'filedus': filedus
            })

    if dst and not write_file(''.join(all_lines), abs_dest, logger):
        output_objects.append({
            'object_type':
            'error_text',
            'text':
            "failed to write disk use to %s" % relative_dest
        })
        logger.error("writing disk use to %s for %s failed" %
                     (abs_dest, client_id))
        status = returnvalues.SYSTEM_ERROR

    return (output_objects, status)
示例#3
0
def main(client_id, user_arguments_dict):
    """Main function used by front end"""

    (configuration, logger, output_objects, op_name) = \
        initialize_main_variables(client_id)
    client_dir = client_id_dir(client_id)
    defaults = signature()[1]
    (validate_status, accepted) = validate_input_and_cert(
        user_arguments_dict,
        defaults,
        output_objects,
        client_id,
        configuration,
        allow_rejects=False,
        # NOTE: path can use wildcards, dst and current_dir cannot
        typecheck_overrides={'path': valid_path_pattern},
    )
    if not validate_status:
        return (accepted, returnvalues.CLIENT_ERROR)

    flags = ''.join(accepted['flags'])
    algo_list = accepted['hash_algo']
    max_chunks = int(accepted['max_chunks'][-1])
    pattern_list = accepted['path']
    dst = accepted['dst'][-1]
    current_dir = accepted['current_dir'][-1].lstrip(os.sep)

    # All paths are relative to current_dir

    pattern_list = [os.path.join(current_dir, i) for i in pattern_list]
    if dst:
        dst = os.path.join(current_dir, dst)

    # Please note that base_dir must end in slash to avoid access to other
    # user dirs when own name is a prefix of another user name

    base_dir = os.path.abspath(
        os.path.join(configuration.user_home, client_dir)) + os.sep

    status = returnvalues.OK

    if verbose(flags):
        for flag in flags:
            output_objects.append({
                'object_type': 'text',
                'text': '%s using flag: %s' % (op_name, flag)
            })

    # IMPORTANT: path must be expanded to abs for proper chrooting
    abs_dir = os.path.abspath(
        os.path.join(base_dir, current_dir.lstrip(os.sep)))
    if not valid_user_path(configuration, abs_dir, base_dir, True):
        output_objects.append({
            'object_type':
            'error_text',
            'text':
            "You're not allowed to work in %s!" % current_dir
        })
        logger.warning('%s tried to %s restricted path %s ! (%s)' %
                       (client_id, op_name, abs_dir, current_dir))
        return (output_objects, returnvalues.CLIENT_ERROR)

    if verbose(flags):
        output_objects.append({
            'object_type': 'text',
            'text': "working in %s" % current_dir
        })

    if dst:
        if not safe_handler(configuration, 'post', op_name, client_id,
                            get_csrf_limit(configuration), accepted):
            output_objects.append({
                'object_type':
                'error_text',
                'text':
                '''Only accepting
                CSRF-filtered POST requests to prevent unintended updates'''
            })
            return (output_objects, returnvalues.CLIENT_ERROR)

        # NOTE: dst already incorporates current_dir prefix here
        # IMPORTANT: path must be expanded to abs for proper chrooting
        abs_dest = os.path.abspath(os.path.join(base_dir, dst))
        logger.info('chksum in %s' % abs_dest)

        # Don't use abs_path in output as it may expose underlying
        # fs layout.

        relative_dest = abs_dest.replace(base_dir, '')
        if not valid_user_path(configuration, abs_dest, base_dir, True):
            output_objects.append({
                'object_type':
                'error_text',
                'text':
                "Invalid path! (%s expands to an illegal path)" % dst
            })
            logger.warning('%s tried to %s restricted path %s !(%s)' %
                           (client_id, op_name, abs_dest, dst))
            return (output_objects, returnvalues.CLIENT_ERROR)
        if not check_write_access(abs_dest, parent_dir=True):
            logger.warning('%s called without write access: %s' %
                           (op_name, abs_dest))
            output_objects.append({
                'object_type':
                'error_text',
                'text':
                'cannot checksum to "%s": inside a read-only location!' %
                relative_dest
            })
            return (output_objects, returnvalues.CLIENT_ERROR)

    all_lines = []
    for pattern in pattern_list:

        # Check directory traversal attempts before actual handling to avoid
        # leaking information about file system layout while allowing
        # consistent error messages

        unfiltered_match = glob.glob(base_dir + pattern)
        match = []
        for server_path in unfiltered_match:
            # IMPORTANT: path must be expanded to abs for proper chrooting
            abs_path = os.path.abspath(server_path)
            if not valid_user_path(configuration, abs_path, base_dir, True):

                # out of bounds - save user warning for later to allow
                # partial match:
                # ../*/* is technically allowed to match own files.

                logger.warning('%s tried to %s restricted path %s ! (%s)' %
                               (client_id, op_name, abs_path, pattern))
                continue
            match.append(abs_path)

        # Now actually treat list of allowed matchings and notify if no
        # (allowed) match

        if not match:
            output_objects.append({
                'object_type': 'file_not_found',
                'name': pattern
            })
            status = returnvalues.FILE_NOT_FOUND

        for abs_path in match:
            relative_path = abs_path.replace(base_dir, '')
            output_lines = []
            for hash_algo in algo_list:
                try:
                    chksum_helper = _algo_map.get(hash_algo, _algo_map["md5"])
                    checksum = chksum_helper(abs_path, max_chunks=max_chunks)
                    line = "%s %s\n" % (checksum, relative_path)
                    logger.info("%s %s of %s: %s" %
                                (op_name, hash_algo, abs_path, checksum))
                    output_lines.append(line)
                except Exception as exc:
                    output_objects.append({
                        'object_type':
                        'error_text',
                        'text':
                        "%s: '%s': %s" % (op_name, relative_path, exc)
                    })
                    logger.error("%s: failed on '%s': %s" %
                                 (op_name, relative_path, exc))
                    status = returnvalues.SYSTEM_ERROR
                    continue
            entry = {'object_type': 'file_output', 'lines': output_lines}
            output_objects.append(entry)
            all_lines += output_lines

    if dst and not write_file(''.join(all_lines), abs_dest, logger):
        output_objects.append({
            'object_type':
            'error_text',
            'text':
            "failed to write checksums to %s" % relative_dest
        })
        logger.error("writing checksums to %s for %s failed" %
                     (abs_dest, client_id))
        status = returnvalues.SYSTEM_ERROR

    return (output_objects, status)
示例#4
0
def gen_job_script(
    job_dictionary,
    resource_config,
    configuration,
    localjobname,
    path_without_extension,
    client_dir,
    exe,
    logger,
):
    """Generate job script from job_dictionary before handout to resource"""

    script_language = resource_config['SCRIPTLANGUAGE']
    if not script_language in configuration.scriptlanguages:
        print('Unknown script language! (conflict with scriptlanguages in ' +
              'configuration?) %s not in %s' %
              (script_language, configuration.scriptlanguages))
        return False

    if script_language == 'python':
        generator = genjobscriptpython.GenJobScriptPython(
            job_dictionary,
            resource_config,
            exe,
            configuration.migserver_https_sid_url,
            localjobname,
            path_without_extension,
        )
    elif script_language == 'sh':
        generator = genjobscriptsh.GenJobScriptSh(
            job_dictionary,
            resource_config,
            exe,
            configuration.migserver_https_sid_url,
            localjobname,
            path_without_extension,
        )
    elif script_language == 'java':
        generator = genjobscriptjava.GenJobScriptJava(
            job_dictionary, resource_config,
            configuration.migserver_https_sid_url, localjobname,
            path_without_extension)
    else:
        print('Unknown script language! (is in configuration but not in ' +
              'jobscriptgenerator) %s ' % script_language)
        return False

    # String concatenation in python: [X].join is much faster
    # than repeated use of s += strings

    getinputfiles_array = []
    getinputfiles_array.append(generator.script_init())
    getinputfiles_array.append(generator.comment('print start'))
    getinputfiles_array.append(generator.print_start('get input files'))
    getinputfiles_array.append(generator.comment('init log'))
    getinputfiles_array.append(generator.init_io_log())
    getinputfiles_array.append(generator.comment('get special inputfiles'))
    getinputfiles_array.append(
        generator.get_special_input_files('get_special_status'))
    getinputfiles_array.append(
        generator.log_io_status('get_special_input_files',
                                'get_special_status'))
    getinputfiles_array.append(
        generator.print_on_error('get_special_status', '0',
                                 'failed to fetch special input files!'))
    getinputfiles_array.append(generator.comment('get input files'))
    getinputfiles_array.append(generator.get_input_files('get_input_status'))
    getinputfiles_array.append(
        generator.log_io_status('get_input_files', 'get_input_status'))
    getinputfiles_array.append(
        generator.print_on_error('get_input_status', '0',
                                 'failed to fetch input files!'))
    getinputfiles_array.append(generator.comment('get executables'))
    getinputfiles_array.append(
        generator.get_executables('get_executables_status'))
    getinputfiles_array.append(
        generator.log_io_status('get_executables', 'get_executables_status'))
    getinputfiles_array.append(
        generator.print_on_error('get_executables_status', '0',
                                 'failed to fetch executable files!'))

    # client_dir equals empty_job_name for sleep jobs

    getinputfiles_array.append(
        generator.generate_output_filelists(
            client_dir != configuration.empty_job_name,
            'generate_output_filelists'))
    getinputfiles_array.append(
        generator.print_on_error('generate_output_filelists', '0',
                                 'failed to generate output filelists!'))
    getinputfiles_array.append(
        generator.generate_input_filelist('generate_input_filelist'))
    getinputfiles_array.append(
        generator.print_on_error('generate_input_filelist', '0',
                                 'failed to generate input filelist!'))
    getinputfiles_array.append(
        generator.generate_iosessionid_file('generate_iosessionid_file'))
    getinputfiles_array.append(
        generator.print_on_error('generate_iosessionid_file', '0',
                                 'failed to generate iosessionid file!'))
    getinputfiles_array.append(
        generator.total_status([
            'get_special_status', 'get_input_status', 'get_executables_status',
            'generate_output_filelists'
        ], 'total_status'))
    getinputfiles_array.append(
        generator.exit_on_error('total_status', '0', 'total_status'))
    getinputfiles_array.append(generator.comment('exit script'))
    getinputfiles_array.append(generator.exit_script('0', 'get input files'))

    job_array = []
    job_array.append(generator.script_init())
    job_array.append(generator.set_core_environments())
    job_array.append(generator.print_start('job'))
    job_array.append(generator.comment('TODO: switch to job directory here'))
    job_array.append(generator.comment('make sure job status files exist'))
    job_array.append(
        generator.create_files([
            job_dictionary['JOB_ID'] + '.stdout',
            job_dictionary['JOB_ID'] + '.stderr',
            job_dictionary['JOB_ID'] + '.status'
        ]))
    job_array.append(generator.init_status())
    job_array.append(generator.comment('chmod +x'))
    job_array.append(generator.chmod_executables('chmod_status'))
    job_array.append(
        generator.print_on_error(
            'chmod_status', '0',
            'failed to make one or more EXECUTABLES executable'))
    job_array.append(
        generator.log_on_error('chmod_status', '0', 'system: chmod'))

    job_array.append(generator.comment('set environments'))
    job_array.append(generator.set_environments('env_status'))
    job_array.append(
        generator.print_on_error(
            'env_status', '0',
            'failed to initialize one or more ENVIRONMENTs'))
    job_array.append(
        generator.log_on_error('env_status', '0', 'system: set environments'))

    job_array.append(generator.comment('set runtimeenvironments'))
    job_array.append(
        generator.set_runtime_environments(
            resource_config['RUNTIMEENVIRONMENT'], 're_status'))
    job_array.append(
        generator.print_on_error(
            're_status', '0',
            'failed to initialize one or more RUNTIMEENVIRONMENTs'))
    job_array.append(
        generator.log_on_error('re_status', '0',
                               'system: set RUNTIMEENVIRONMENTs'))

    job_array.append(generator.comment('enforce some basic job limits'))
    job_array.append(generator.set_limits())
    if job_dictionary.get('MOUNT', []) != []:
        job_array.append(
            generator.generate_mountsshprivatekey_file(
                'generate_mountsshprivatekey_file'))
        job_array.append(
            generator.print_on_error(
                'generate_mountsshprivatekey_file', '0',
                'failed to generate mountsshprivatekey file!'))
        job_array.append(
            generator.generate_mountsshknownhosts_file(
                'generate_mountsshknownhosts_file'))
        job_array.append(
            generator.print_on_error(
                'generate_mountsshknownhosts_file', '0',
                'failed to generate mountsshknownhosts file!'))
        job_array.append(generator.comment('Mount job home'))
        # Use best available sftp implementation - configuration picks it
        sftp_address = configuration.user_sftp_show_address
        sftp_port = configuration.user_sftp_show_port
        job_array.append(
            generator.mount(job_dictionary['SESSIONID'], sftp_address,
                            sftp_port, 'mount_status'))
        job_array.append(
            generator.print_on_error('mount_status', '0',
                                     'failed to mount job home'))
        job_array.append(
            generator.log_on_error('mount_status', '0', 'system: mount'))
    job_array.append(generator.comment('execute!'))
    job_array.append(generator.execute('EXECUTING: ', '--Exit code:'))
    if job_dictionary.get('MOUNT', []) != []:
        job_array.append(generator.comment('Unmount job home'))
        job_array.append(generator.umount('umount_status'))
        job_array.append(
            generator.print_on_error('umount_status', '0',
                                     'failded to umount job home'))
        job_array.append(
            generator.log_on_error('umount_status', '0', 'system: umount'))
    job_array.append(generator.comment('exit script'))
    job_array.append(generator.exit_script('0', 'job'))

    getupdatefiles_array = []

    # We need to make sure that curl failures lead to retry while
    # missing output (from say a failed job) is logged but
    # ignored in relation to getupdatefiles success.

    getupdatefiles_array.append(generator.print_start('get update files'))
    getupdatefiles_array.append(generator.init_io_log())

    getupdatefiles_array.append(generator.comment('get io files'))
    getupdatefiles_array.append(generator.get_io_files('get_io_status'))
    getupdatefiles_array.append(
        generator.log_io_status('get_io_files', 'get_io_status'))
    getupdatefiles_array.append(
        generator.print_on_error('get_io_status', '0',
                                 'failed to get one or more IO files'))
    getupdatefiles_array.append(
        generator.exit_on_error('get_io_status', '0', 'get_io_status'))

    getupdatefiles_array.append(generator.comment('exit script'))
    getupdatefiles_array.append(generator.exit_script('0', 'get update files'))

    sendoutputfiles_array = []

    # We need to make sure that curl failures lead to retry while
    # missing output (from say a failed job) is logged but
    # ignored in relation to sendoutputfiles success.

    sendoutputfiles_array.append(generator.print_start('send output files'))
    sendoutputfiles_array.append(generator.init_io_log())
    sendoutputfiles_array.append(generator.comment('check output files'))

    sendoutputfiles_array.append(
        generator.output_files_missing('missing_counter'))
    sendoutputfiles_array.append(
        generator.log_io_status('output_files_missing', 'missing_counter'))
    sendoutputfiles_array.append(
        generator.print_on_error('missing_counter', '0',
                                 'missing output files'))
    sendoutputfiles_array.append(generator.comment('send output files'))
    sendoutputfiles_array.append(
        generator.send_output_files('send_output_status'))
    sendoutputfiles_array.append(
        generator.log_io_status('send_output_files', 'send_output_status'))
    sendoutputfiles_array.append(
        generator.print_on_error('send_output_status', '0',
                                 'failed to send one or more outputfiles'))
    sendoutputfiles_array.append(
        generator.exit_on_error('send_output_status', '0',
                                'send_output_status'))

    sendoutputfiles_array.append(generator.comment('send io files'))
    sendoutputfiles_array.append(generator.send_io_files('send_io_status'))
    sendoutputfiles_array.append(
        generator.log_io_status('send_io_files', 'send_io_status'))
    sendoutputfiles_array.append(
        generator.print_on_error('send_io_status', '0',
                                 'failed to send one or more IO files'))
    sendoutputfiles_array.append(
        generator.exit_on_error('send_io_status', '0', 'send_io_status'))
    sendoutputfiles_array.append(generator.comment('send status files'))
    sendoutputfiles_array.append(
        generator.send_status_files([job_dictionary['JOB_ID'] + '.io-status'],
                                    'send_io_status_status'))
    sendoutputfiles_array.append(
        generator.print_on_error('send_io_status_status', '0',
                                 'failed to send io-status file'))
    sendoutputfiles_array.append(
        generator.exit_on_error('send_io_status_status', '0',
                                'send_io_status_status'))

    # Please note that .status upload marks the end of the
    # session and thus it must be the last uploaded file.

    sendoutputfiles_array.append(
        generator.send_status_files([job_dictionary['JOB_ID'] + '.status'],
                                    'send_status_status'))
    sendoutputfiles_array.append(
        generator.print_on_error('send_status_status', '0',
                                 'failed to send status file'))
    sendoutputfiles_array.append(
        generator.exit_on_error('send_status_status', '0',
                                'send_status_status'))

    # Note that ID.sendouputfiles is called from frontend_script
    # so exit on failure can be handled there.

    sendoutputfiles_array.append(generator.comment('exit script'))
    sendoutputfiles_array.append(
        generator.exit_script('0', 'send output files'))

    sendupdatefiles_array = []

    # We need to make sure that curl failures lead to retry while
    # missing output (from say a failed job) is logged but
    # ignored in relation to sendupdatefiles success.

    sendupdatefiles_array.append(generator.print_start('send update files'))
    sendupdatefiles_array.append(generator.init_io_log())

    sendupdatefiles_array.append(generator.comment('send io files'))
    sendupdatefiles_array.append(generator.send_io_files('send_io_status'))
    sendupdatefiles_array.append(
        generator.log_io_status('send_io_files', 'send_io_status'))
    sendupdatefiles_array.append(
        generator.print_on_error('send_io_status', '0',
                                 'failed to send one or more IO files'))
    sendupdatefiles_array.append(
        generator.exit_on_error('send_io_status', '0', 'send_io_status'))

    sendupdatefiles_array.append(generator.comment('exit script'))
    sendupdatefiles_array.append(
        generator.exit_script('0', 'send update files'))

    jobsshpubkey_array = []
    # Save session pub key in SESSIONID.authorized_keys for openssh+subsys use.
    # That is not needed with grid_sftp where we parse job for key.
    if configuration.site_enable_sftp_subsys and \
            job_dictionary['MOUNTSSHPUBLICKEY']:
        # Restrict to access from frontend proxy / resource FQDN
        res_fqdn = resource_config.get('FRONTENDPROXY', '')
        if not res_fqdn:
            res_fqdn = resource_config.get('HOSTURL', '')
        allow_from = '%s' % res_fqdn
        try:
            user_ip = socket.gethostbyname_ex(res_fqdn)[2][0]
            allow_from += ',%s' % user_ip
        except Exception as exc:
            user_ip = None
            logger.warning("Skipping ip in 'from' on job mount key: %s" % exc)
        # Always minimize key access with all restrictions and source address
        # NOTE: 'restrict' keyword is only available in new ssh installations
        #       we manually build the corresponding string for now.
        #restrict_opts = 'restrict'
        restrict_opts = 'no-agent-forwarding,no-port-forwarding,no-pty,'
        restrict_opts += 'no-user-rc,no-X11-forwarding'
        restrictions = 'from="%s",%s' % (allow_from, restrict_opts)
        pub_key = '%(MOUNTSSHPUBLICKEY)s' % job_dictionary
        jobsshpubkey_array.append('%s %s\n' % (restrictions, pub_key))

    # clean up must be done with SSH (when the .status file
    # has been uploaded): Job script can't safely/reliably clean up
    # after itself because of possible user interference.

    if 'JOBTYPE' in job_dictionary and \
            job_dictionary['JOBTYPE'].lower() == 'interactive':

        # interactive jobs have a .job file just containing a curl
        # call to the MiG servers cgi-sid/requestinteractivejob
        # and the usual .job is instead called .interactivejob and
        # is SCP'ed and started by SSH in the requestinteractive.py
        # script

        logger.error('jobtype: interactive')
        interactivejobfile = generator.script_init() + '\n' + \
            generator.request_interactive() + '\n' + \
            generator.exit_script('0', 'interactive job')

        # write the small file containing the requestinteractivejob.py
        # call as .job

        write_file(
            interactivejobfile,
            configuration.mig_system_files + job_dictionary['JOB_ID'] + '.job',
            logger)

        # write the usual .job file as .interactivejob

        write_file(
            '\n'.join(job_array), configuration.mig_system_files +
            job_dictionary['JOB_ID'] + '.interactivejob', logger)
        print(interactivejobfile)
    else:

        # write files

        write_file(
            '\n'.join(job_array),
            configuration.mig_system_files + job_dictionary['JOB_ID'] + '.job',
            logger)

    write_file('\n'.join(getinputfiles_array),
               path_without_extension + '.getinputfiles', logger)
    write_file(
        '\n'.join(getupdatefiles_array), configuration.mig_system_files +
        job_dictionary['JOB_ID'] + '.getupdatefiles', logger)
    write_file(
        '\n'.join(sendoutputfiles_array), configuration.mig_system_files +
        job_dictionary['JOB_ID'] + '.sendoutputfiles', logger)
    write_file(
        '\n'.join(sendupdatefiles_array), configuration.mig_system_files +
        job_dictionary['JOB_ID'] + '.sendupdatefiles', logger)
    # Save session pub key in SID.authorized_keys file for openssh+subsys use
    if jobsshpubkey_array:
        write_file('\n'.join(jobsshpubkey_array),
                   os.path.join(
                       configuration.mig_system_files, 'job_mount',
                       job_dictionary['SESSIONID'] + '.authorized_keys'),
                   logger,
                   umask=0o27)

    return True
示例#5
0
def create_arc_job(
    job,
    configuration,
    logger,
):
    """Analog to create_job_script for ARC jobs:
    Creates symLinks for receiving result files, translates job dict to ARC
    xrsl, and stores resulting job script (xrsl + sh script) for submitting.

    We do _not_ create a separate job_dict with copies and SESSIONID inside,
    as opposed to create_job_script, all we need is the link from 
    webserver_home / sessionID into the user's home directory 
    ("job_output/job['JOB_ID']" is added to the result upload URLs in the 
    translation). 

    Returns message (ARC job ID if no error) and sessionid (None if error)
    """

    if not configuration.arc_clusters:
        return (None, 'No ARC support!')
    if not job['JOBTYPE'] == 'arc':
        return (None, 'Error. This is not an ARC job')

    # Deep copy job for local changes
    job_dict = deepcopy(job)
    # Finally expand reserved job variables like +JOBID+ and +JOBNAME+
    job_dict = expand_variables(job_dict)
    # ... no more changes to job_dict from here on
    client_id = str(job_dict['USER_CERT'])

    # we do not want to see empty jobs here. Test as done in create_job_script.
    if client_id == configuration.empty_job_name:
        return (None, 'Error. empty job for ARC?')

    # generate random session ID:
    sessionid = hexlify(open('/dev/urandom').read(session_id_bytes))
    logger.debug('session ID (for creating links): %s' % sessionid)

    client_dir = client_id_dir(client_id)

    # make symbolic links inside webserver_home:
    #
    # we need: link to owner's dir. to receive results,
    #          job mRSL inside sessid_to_mrsl_link_home
    linklist = [(configuration.user_home + client_dir,
                 configuration.webserver_home + sessionid),
                (configuration.mrsl_files_dir + client_dir + '/' +
                 str(job_dict['JOB_ID']) + '.mRSL',
                 configuration.sessid_to_mrsl_link_home + sessionid + '.mRSL')]

    for (dest, loc) in linklist:
        make_symlink(dest, loc, logger)

    # the translation generates an xRSL object which specifies to execute
    # a shell script with script_name. If sessionid != None, results will
    # be uploaded to sid_redirect/sessionid/job_output/job_id

    try:
        (xrsl, script, script_name) = mrsltoxrsl.translate(job_dict, sessionid)
        logger.debug('translated to xRSL: %s' % xrsl)
        logger.debug('script:\n %s' % script)

    except Exception as err:
        # error during translation, pass a message
        logger.error('Error during xRSL translation: %s' % err.__str__())
        return (None, err.__str__())

        # we submit directly from here (the other version above does
        # copyFileToResource and gen_job_script generates all files)

    # we have to put the generated script somewhere..., and submit from there.
    # inputfiles are given by the user as relative paths from his home,
    # so we should use that location (and clean up afterwards).

    # write script (to user home)
    user_home = os.path.join(configuration.user_home, client_dir)
    script_path = os.path.abspath(os.path.join(user_home, script_name))
    write_file(script, script_path, logger)

    os.chdir(user_home)

    try:
        logger.debug('submitting job to ARC')
        session = arcwrapper.Ui(user_home)
        arc_job_ids = session.submit(xrsl)

        # if no exception occurred, we are done:

        job_dict['ARCID'] = arc_job_ids[0]
        job_dict['SESSIONID'] = sessionid

        msg = 'OK'
        result = job_dict

    # when errors occurred, pass a message to the caller.
    except arcwrapper.ARCWrapperError as err:
        msg = err.what()
        result = None  # unsuccessful
    except arcwrapper.NoProxyError as err:
        msg = 'No Proxy found: %s' % err.what()
        result = None  # unsuccessful
    except Exception as err:
        msg = err.__str__()
        result = None  # unsuccessful

    # always remove the generated script
    os.remove(script_name)
    # and remove the created links immediately if failed
    if not result:
        for (_, link) in linklist:
            os.remove(link)
        logger.error('Unsuccessful ARC job submission: %s' % msg)
    else:
        logger.debug('submitted to ARC as job %s' % msg)
    return (result, msg)
示例#6
0
def handle_package_upload(
    real_src,
    relative_src,
    client_id,
    configuration,
    submit_mrslfiles,
    dst,
):
    """A file package was uploaded (eg. .zip file). Extract the content and
    submit mrsl files if submit_mrsl_files is True.
    """
    logger = configuration.logger
    msg = ''
    status = True

    logger.info("handle_package_upload %s %s %s" %
                (real_src, relative_src, dst))

    client_dir = client_id_dir(client_id)

    # Please note that base_dir must end in slash to avoid access to other
    # user dirs when own name is a prefix of another user name

    base_dir = os.path.abspath(os.path.join(configuration.user_home,
                                            client_dir)) + os.sep

    # Unpack in same directory unless real_dst is given

    if not dst:
        real_dst = os.path.abspath(os.path.dirname(real_src))
    elif os.path.isabs(dst):
        real_dst = os.path.abspath(dst)
    else:
        real_dst = os.path.join(base_dir, dst)
    real_dst += os.sep
    mrslfiles_to_parse = []

    real_src_lower = real_src.lower()
    if real_src_lower.endswith('.zip'):

        # Handle .zip file

        msg += "Received '%s' for unpacking. " % relative_src
        try:
            zip_object = zipfile.ZipFile(real_src, 'r', allowZip64=True)
        except Exception as exc:
            logger.error("open zip failed: %s" % exc)
            msg += 'Could not open zipfile: %s! ' % exc
            return (False, msg)

        logger.info("unpack entries of %s to %s" %
                    (real_src, real_dst))
        for zip_entry in zip_object.infolist():
            entry_filename = force_utf8(zip_entry.filename)
            msg += 'Extracting: %s . ' % entry_filename

            # write zip_entry to disk

            # IMPORTANT: we must abs-expand for valid_user_path_name check
            #            otherwise it will incorrectly fail on e.g. abc/
            #            dir entry in archive
            local_zip_entry_name = os.path.join(real_dst, entry_filename)
            valid_status, valid_err = valid_user_path_name(
                entry_filename, os.path.abspath(local_zip_entry_name),
                base_dir)
            if not valid_status:
                status = False
                msg += "Filename validation error: %s! " % valid_err
                continue

            # create sub dir(s) if missing

            zip_entry_dir = os.path.dirname(local_zip_entry_name)

            if not os.path.isdir(zip_entry_dir):
                msg += 'Creating dir %s . ' % entry_filename
                try:
                    os.makedirs(zip_entry_dir, 0o775)
                except Exception as exc:
                    logger.error("create directory failed: %s" % exc)
                    msg += 'Error creating directory: %s! ' % exc
                    status = False
                    continue

            if os.path.isdir(local_zip_entry_name):
                logger.debug("nothing more to do for dir entry: %s" %
                             local_zip_entry_name)
                continue

            try:
                zip_data = zip_object.read(zip_entry.filename)
            except Exception as exc:
                logger.error("read data in %s failed: %s" %
                             (zip_entry.filename, exc))
                msg += 'Error reading %s :: %s! ' % (zip_entry.filename, exc)
                status = False
                continue

            # TODO: can we detect and ignore symlinks?
            # Zip format is horribly designed/documented:
            # http://www.pkware.com/documents/casestudies/APPNOTE.TXT
            # I haven't managed to find a way to detect symlinks. Thus
            # they are simply created as files containing the name they
            # were supposed to link to: This is inconsistent but safe :-S

            # write file - symbolic links are written as files! (good for
            # security).

            # NB: Needs to use undecoded filename here

            if not write_file(zip_data, local_zip_entry_name, logger) and \
                    not os.path.exists(local_zip_entry_name):
                msg += 'Error unpacking %s to disk! ' % entry_filename
                status = False
                continue

            # get the size as the OS sees it

            try:
                __ = os.path.getsize(local_zip_entry_name)
            except Exception as exc:
                logger.warning("unpack may have failed: %s" % exc)
                msg += \
                    'File %s unpacked, but could not get file size %s! '\
                    % (entry_filename, exc)
                status = False
                continue

            # Check if the extension is .mRSL

            if local_zip_entry_name.upper().endswith('.MRSL'):

                # A .mrsl file was included in the package!

                mrslfiles_to_parse.append(local_zip_entry_name)
    elif real_src_lower.endswith('.tar') or \
            real_src_lower.endswith('.tar.gz') or \
            real_src_lower.endswith('.tgz') or \
            real_src_lower.endswith('.tar.bz2') or \
            real_src_lower.endswith('.tbz'):

        # Handle possibly compressed .tar files

        if real_src_lower.endswith('.tar.gz') or \
                real_src_lower.endswith('.tgz'):
            msg += "Received '%s' for unpacking. " % relative_src
            try:
                tar_object = tarfile.open(real_src, 'r:gz')
                tar_file_content = tarfile.TarFile.gzopen(real_src)
            except Exception as exc:
                logger.error("open tar gz failed: %s" % exc)
                msg += 'Could not open .tar.gz file: %s! ' % exc
                return (False, msg)
        elif real_src_lower.endswith('.tar.bz2') or \
                real_src_lower.endswith('.tbz'):
            msg += "Received '%s' for unpacking. " % relative_src
            try:
                tar_object = tarfile.open(real_src, 'r:bz2')
                tar_file_content = tarfile.TarFile.bz2open(real_src)
            except Exception as exc:
                logger.error("open tar bz failed: %s" % exc)
                msg += 'Could not open .tar.bz2 file: %s! ' % exc
                return (False, msg)
        else:
            try:
                tar_object = tarfile.open(real_src, 'r')
                tar_file_content = tarfile.TarFile.open(real_src)
            except Exception as exc:
                logger.error("open tar failed: %s" % exc)
                msg += 'Could not open .tar file: %s! ' % exc
                return (False, msg)

        logger.info("unpack entries of %s to %s" %
                    (real_src, real_dst))
        for tar_entry in tar_object:
            entry_filename = force_utf8(tar_entry.name)
            msg += 'Extracting: %s . ' % entry_filename

            # write tar_entry to disk

            # IMPORTANT: we must abs-expand for valid_user_path_name check
            #            otherwise it will incorrectly fail on e.g. abc/
            #            dir entry in archive
            local_tar_entry_name = os.path.join(real_dst, entry_filename)
            valid_status, valid_err = valid_user_path_name(
                entry_filename, os.path.abspath(local_tar_entry_name),
                base_dir)
            if not valid_status:
                status = False
                msg += "Filename validation error: %s! " % valid_err
                continue

            # Found empty dir - make sure  dirname doesn't strip to parent

            if tar_entry.isdir():
                logger.debug("empty dir %s - include in parent creation" %
                             local_tar_entry_name)
                local_tar_entry_name += os.sep

            # create sub dir(s) if missing

            tar_entry_dir = os.path.dirname(local_tar_entry_name)

            if not os.path.isdir(tar_entry_dir):
                logger.debug("make tar parent dir: %s" % tar_entry_dir)
                msg += 'Creating dir %s . ' % entry_filename
                try:
                    os.makedirs(tar_entry_dir, 0o775)
                except Exception as exc:
                    logger.error("create directory failed: %s" % exc)
                    msg += 'Error creating directory %s! ' % exc
                    status = False
                    continue

            if tar_entry.isdir():

                # directory created above - nothing more to do

                continue

            elif not tar_entry.isfile():

                # not a regular file - symlinks are ignored to avoid illegal
                # access

                msg += 'Skipping %s: not a regular file or directory! ' % \
                       entry_filename
                status = False
                continue

            # write file!
            # NB: Need to user undecoded filename here

            if not write_file(tar_file_content.extractfile(tar_entry).read(),
                              local_tar_entry_name,
                              logger):
                msg += 'Error unpacking file %s to disk! ' % entry_filename
                status = False
                continue

            # get the size as the OS sees it

            try:
                __ = os.path.getsize(local_tar_entry_name)
            except Exception as exc:
                logger.warning("file save may have failed: %s" % exc)
                msg += \
                    'File %s unpacked, but could not get file size %s! ' % \
                    (entry_filename, exc)
                status = False
                continue

            # Check if the extension is .mRSL

            if local_tar_entry_name.upper().endswith('.MRSL'):

                # A .mrsl file was included in the package!

                mrslfiles_to_parse.append(local_tar_entry_name)
    else:
        logger.error("Unpack called on unsupported archive: %s" % real_src)
        msg += "Unknown/unsupported archive format: %s" % relative_src
        return (False, msg)

    if not status:
        msg = """Unpacked archive with one or more errors: 
%s""" % msg
        return (status, msg)

    # submit mrsl files to the parser. It should be done from within this
    # function to keep the right order if multiple files are created in the
    # html form.

    submitstatuslist = []
    if configuration.site_enable_jobs and submit_mrslfiles:

        # Please note that base_dir must end in slash to avoid access to other
        # user dirs when own name is a prefix of another user name

        base_dir = \
            os.path.abspath(os.path.join(configuration.user_home,
                                         client_dir)) + os.sep
        for mrslfile in mrslfiles_to_parse:
            (job_status, parse_msg, job_id) = new_job(mrslfile, client_id,
                                                      configuration, False, True)
            relative_filename = os.sep + mrslfile.replace(base_dir, '')
            submitstatus = {'object_type': 'submitstatus',
                            'name': relative_filename}
            if not job_status:
                submitstatus['status'] = False
                submitstatus['job_id'] = job_id
                submitstatus['message'] = parse_msg
            else:

                # msg += "<h2>Failure</h2><br>\n"
                # msg += parse_msg
                # return(False, msg)

                submitstatus['status'] = True
                submitstatus['job_id'] = job_id

            # msg += "<h2>%s Success</h2><br>\n" % mrslfile
            # msg += parse_msg

            submitstatuslist.append(submitstatus)
    return (status, submitstatuslist)