Пример #1
0
def update_section_helper(client_id,
                          configuration,
                          section_filename,
                          changes,
                          defaults,
                          create_missing=True):
    """Update settings section in pickled file with values from changes
    dictionary. Optional create_missing can be used if the pickle should be
    created if not already there.
    The defaults dictionary is used to set any missing values.
    """

    client_dir = client_id_dir(client_id)
    section_path = os.path.join(configuration.user_settings, client_dir,
                                section_filename)
    if not os.path.exists(section_path):
        if create_missing:
            section_dict = {}
        else:
            raise Exception('no %s file to update!' % section_filename)
    else:
        section_dict = unpickle(section_path, configuration.logger)
    for (key, val) in defaults.items():
        section_dict[key] = section_dict.get(key, val)
    section_dict.update(changes)
    if not pickle(section_dict, section_path, configuration.logger):
        raise Exception('could not save updated %s file!' % section_filename)
    return section_dict
Пример #2
0
def save_twofactor_session(configuration,
                           client_id,
                           session_key,
                           user_addr,
                           user_agent,
                           session_start,
                           session_end=-1):
    """Save twofactor session dict for client_id"""
    _logger = configuration.logger
    if configuration.site_enable_gdp:
        client_id = get_base_client_id(configuration,
                                       client_id,
                                       expand_oid_alias=False)
    session_path = os.path.join(configuration.twofactor_home, session_key)
    if session_end < 0:
        session_end = session_start + twofactor_cookie_ttl
    session_data = {
        'client_id': client_id,
        'session_key': session_key,
        'user_addr': user_addr,
        'user_agent': user_agent,
        'session_start': session_start,
        'session_end': session_end
    }
    status = pickle(session_data, session_path, configuration.logger)
    if status and configuration.site_twofactor_strict_address:
        session_path_link = os.path.join(configuration.twofactor_home,
                                         "%s_%s" % (user_addr, session_key))
        status = \
            make_symlink(session_key, session_path_link, _logger, force=False)
        if not status:
            delete_file(session_path, _logger)
    return status
Пример #3
0
def save_queue(queue, path, logger):
    """Save job queue to path for quick loading later"""

    # Don't try to save logger

    queue.logger = None
    return io.pickle(queue, path, logger)
Пример #4
0
def save_queue(queue, path, logger):
    """Save job queue to path for quick loading later"""

    # Don't try to save logger

    queue.logger = None
    return io.pickle(queue, path, logger)
Пример #5
0
def migrate_job(config, job, peer):
    protocol = 'https'
    port = ''

    server = peer['fqdn']

    # Remove schedule hint from job before migration

    del job['SCHEDULE_HINT']

    # Make sure legacy jobs don't fail

    if not job.has_key('MIGRATE_COUNT'):
        job['MIGRATE_COUNT'] = str(0)

    # Add or increment migration counter

    migrate_count = int(job['MIGRATE_COUNT']) + 1
    job['MIGRATE_COUNT'] = str(migrate_count)

    # TODO: only upload if job is not already replicated at
    # remote server
    # TMP!

    steal_job = False

    if not steal_job:

        # upload pickled job to server

        client_dir = client_id_dir(job['USER_CERT'])
        mrsl_filename = config.mrsl_files_dir + client_dir + '/'\
             + job['JOB_ID'] + '.mRSL'
        result = pickle(job, mrsl_filename, config.logger)
        if not result:
            config.logger.error('Aborting migration of job %s (%s)',
                                job['JOB_ID'], result)
            return False

        dest = mrsl_filename

        # TMP!
        # upload_reply = put_data(config, mrsl_filename, protocol, server, port, dest)

        config.logger.warning('Actual migration disabled until fully supported'
                              )
        upload_reply = (-1,
                        'Actual migration disabled until fully supported'
                        )
        if upload_reply[0] != http_success:
            return False

    # migration_msg = ""
    # migration_reply = put_data(config, protocol, server, port, migration_msg)

    return True
Пример #6
0
def migrate_job(config, job, peer):
    protocol = 'https'
    port = ''

    server = peer['fqdn']

    # Remove schedule hint from job before migration

    del job['SCHEDULE_HINT']

    # Make sure legacy jobs don't fail

    if not job.has_key('MIGRATE_COUNT'):
        job['MIGRATE_COUNT'] = str(0)

    # Add or increment migration counter

    migrate_count = int(job['MIGRATE_COUNT']) + 1
    job['MIGRATE_COUNT'] = str(migrate_count)

    # TODO: only upload if job is not already replicated at
    # remote server
    # TMP!

    steal_job = False

    if not steal_job:

        # upload pickled job to server

        client_dir = client_id_dir(job['USER_CERT'])
        mrsl_filename = config.mrsl_files_dir + client_dir + '/'\
             + job['JOB_ID'] + '.mRSL'
        result = pickle(job, mrsl_filename, config.logger)
        if not result:
            config.logger.error('Aborting migration of job %s (%s)',
                                job['JOB_ID'], result)
            return False

        dest = mrsl_filename

        # TMP!
        # upload_reply = put_data(config, mrsl_filename, protocol, server, port, dest)

        config.logger.warning('Actual migration disabled until fully supported'
                              )
        upload_reply = (-1,
                        'Actual migration disabled until fully supported'
                        )
        if upload_reply[0] != http_success:
            return False

    # migration_msg = ""
    # migration_reply = put_data(config, protocol, server, port, migration_msg)

    return True
Пример #7
0
    def __flush(self):
        """Dumps the statistics to disk and clears memory"""

        # Flush dict to file in the statistics

        for stat_type in self.__gridstat_dict.keys():
            for stat_value in self.__gridstat_dict[stat_type].keys():
                filename = self.__configuration.gridstat_files_dir\
                     + stat_type + os.sep + stat_value + '.pck'
                filedir = os.path.dirname(filename)

                if not os.path.exists(filedir):
                    os.makedirs(filedir)
                pickle(self.__gridstat_dict[stat_type][stat_value], filename,
                       self.__logger)

        # When dict has been flushed, clear it to prevent heavy memory load

        self.__gridstat_dict = {}
Пример #8
0
    def __flush(self):
        """Dumps the statistics to disk and clears memory"""

        # Flush dict to file in the statistics

        for stat_type in self.__gridstat_dict.keys():
            for stat_value in self.__gridstat_dict[stat_type].keys():
                filename = self.__configuration.gridstat_files_dir\
                     + stat_type + os.sep + stat_value + '.pck'
                filedir = os.path.dirname(filename)

                if not os.path.exists(filedir):
                    os.makedirs(filedir)
                pickle(self.__gridstat_dict[stat_type][stat_value],
                       filename, self.__logger)

        # When dict has been flushed, clear it to prevent heavy memory load

        self.__gridstat_dict = {}
Пример #9
0
def initialize_and_get_display_dict_filename(configuration, logger):
    filename = os.path.join(configuration.mig_server_home, 'livedisplaysdict')
    if os.path.isfile(filename):
        return (True, filename)
    logger.info(
        'display dict file %s not found, pickling a new with {} as only content'
        % filename)

    dict = {}
    pickle_status = pickle(dict, filename, logger)
    if not pickle_status:
        return (False, 'could not pickle %s when initializing' % filename)
    return (True, filename)
Пример #10
0
def initialize_and_get_display_dict_filename(configuration, logger):
    filename = configuration.mig_server_home + os.sep\
         + 'livedisplaysdict'
    if os.path.isfile(filename):
        return (True, filename)
    logger.info('display dict file %s not found, pickling a new with {} as only content'
                 % filename)

    dict = {}
    pickle_status = pickle(dict, filename, logger)
    if not pickle_status:
        return (False, 'could not pickle %s when initializing'
                 % filename)
    return (True, filename)
Пример #11
0
def _save_rate_limits(configuration, proto, rate_limits, do_lock=True):
    """Save rate limits dict"""
    logger = configuration.logger
    rate_limits_filepath = os.path.join(
        configuration.mig_system_run, "%s.%s" % (proto, _rate_limits_filename))
    if do_lock:
        rate_limits_lock = _acquire_rate_limits_lock(configuration, proto)
    result = pickle(rate_limits, rate_limits_filepath, logger)
    if do_lock:
        _release_rate_limits_lock(rate_limits_lock)

    if not result:
        logger.error("failed to save %s rate limits to %s" %
                     (proto, rate_limits_filepath))

    return result
Пример #12
0
def remove_item_from_pickled_list(
    path,
    item,
    logger,
    allow_empty_list=True,
):

    list_ = unpickle(path, logger)
    output = ''
    if list_ == []:

        # OK, if the list is empty

        pass
    elif not list_:

        output += 'Failure: could not unpickle current list'
        return (False, output)

    # Check if the item is in the list

    item = item.strip()
    if not item in list_:
        output += '%s not found in list' % item
        return (False, output)

    if not allow_empty_list:
        if len(list_) <= 1:
            output += 'You cannot remove the last item'
            return (False, output)

    # ok, lets remove the item and pickle and save the new list

    try:
        list_.remove(item)
    except:
        output += \
            'Strange error, %s could not be removed, but it seems to be in the list'\
             % item
        return (False, output)

    status = pickle(list_, path, logger)
    if not status:
        output += 'Error pickling new owners file'
        return (False, output)

    return (True, output)
Пример #13
0
def remove_item_from_pickled_list(
    path,
    item,
    logger,
    allow_empty_list=True,
    ):

    list_ = unpickle(path, logger)
    output = ''
    if list_ == []:

        # OK, if the list is empty

        pass
    elif not list_:

        output += 'Failure: could not unpickle current list'
        return (False, output)

    # Check if the item is in the list

    item = item.strip()
    if not item in list_:
        output += '%s not found in list' % item
        return (False, output)

    if not allow_empty_list:
        if len(list_) <= 1:
            output += 'You cannot remove the last item'
            return (False, output)

    # ok, lets remove the item and pickle and save the new list

    try:
        list_.remove(item)
    except:
        output += \
            'Strange error, %s could not be removed, but it seems to be in the list'\
             % item
        return (False, output)

    status = pickle(list_, path, logger)
    if not status:
        output += 'Error pickling new owners file'
        return (False, output)

    return (True, output)
Пример #14
0
def migrated_job(filename, client_id, configuration):
    """returns a tuple (bool status, str msg)"""

    logger = configuration.logger
    client_dir = client_id_dir(client_id)
    job_path = os.path.abspath(
        os.path.join(configuration.server_home, client_dir, filename))

    # unpickle and enqueue received job file

    job_path_spaces = job_path.replace('\\ ', '\\\\\\ ')
    job = io.unpickle(job_path_spaces, configuration.logger)

    # TODO: update any fields to mark migration?

    if not job:
        return (False,
                'Fatal migration error: loading pickled job (%s) failed! ' % \
                job_path_spaces)

    job_id = job['JOB_ID']

    # save file with other mRSL files

    mrsl_filename = \
        os.path.abspath(os.path.join(configuration.mrsl_files_dir,
                        client_dir, job_id + '.mRSL'))

    if not io.pickle(job, mrsl_filename, configuration.logger):
        return (False, 'Fatal error: Could not write ' + filename)

    # tell 'grid_script'

    message = 'SERVERJOBFILE ' + client_dir + '/' + job_id + '\n'

    if not io.send_message_to_grid_script(message, logger, configuration):
        return (False, 'Fatal error: Could not write to grid stdin')

    # TODO: do we need to wait for grid_script to ack job reception?
    # ... same question applies to new_job, btw.

    return (True, '%s succesfully migrated.' % job_id)
Пример #15
0
def _save_sessions(configuration,
                   proto,
                   sessions,
                   do_lock=True):
    """Save sessions dict"""
    logger = configuration.logger
    do_lock = None
    sessions_filepath = os.path.join(configuration.mig_system_run,
                                     "%s.%s"
                                     % (proto, _sessions_filename))
    if do_lock:
        sessions_lock = _acquire_sessions_lock(
            configuration, proto, exclusive=True)
    result = pickle(sessions, sessions_filepath, logger)
    if do_lock:
        _release_sessions_lock(sessions_lock)

    if not result:
        logger.error("failed to save active %s sessions to %s" %
                     (proto, sessions_filepath))

    return result
Пример #16
0
def add_item_to_pickled_list(path, item, logger):
    list_ = unpickle(path, logger)
    output = ''
    if list_ == []:
        pass
    elif not list_:
        output += 'Failure: could not unpickle current list'
        return (False, output)

    # Check if the item already is in the list

    if item in list_:
        output += '%s is already in the list' % item
        return (False, output)

    # ok, lets add the new item and pickle and save the new list

    list_.append(item)
    status = pickle(list_, path, logger)
    if not status:
        output += 'pickle error'
        return (False, output)

    return (True, '')
Пример #17
0
def add_item_to_pickled_list(path, item, logger):
    list_ = unpickle(path, logger)
    output = ''
    if list_ == []:
        pass
    elif not list_:
        output += 'Failure: could not unpickle current list'
        return (False, output)

    # Check if the item already is in the list

    if item in list_:
        output += '%s is already in the list' % item
        return (False, output)

    # ok, lets add the new item and pickle and save the new list

    list_.append(item)
    status = pickle(list_, path, logger)
    if not status:
        output += 'pickle error'
        return (False, output)

    return (True, '')
Пример #18
0
def main():
    configuration = get_configuration_object()

    # Overwrite default logger

    logger = configuration.logger = get_logger(logging.INFO)

    vgrids_dict = {}
    logger.info('==================== Filling vgrids ====================')
    fh = open(SETTINGS_LIST)
    for line in fh:
        line = line.strip()
        if len(line) > 0:
            status = fill_vgrids(configuration, line, vgrids_dict)
            if not status:
                break
    fh.close()

    if status:
        logger.info(
            '==================== Filling triggers ====================')

        status = fill_triggers(configuration, vgrids_dict)

    if status:
        logger.info(
            '==================== Writing triggers dict ====================')

        logger.info("'Pickle to file: '%s'" % VGRID_DICT_FILE)

        status = pickle(vgrids_dict, VGRID_DICT_FILE, logger)

    if status:
        return 0
    else:
        return 1
Пример #19
0
def update_section_helper(client_id, configuration, section_filename, changes,
                          defaults, create_missing=True):
    """Update settings section in pickled file with values from changes
    dictionary. Optional create_missing can be used if the pickle should be
    created if not already there.
    The defaults dictionary is used to set any missing values.
    """

    client_dir = client_id_dir(client_id)
    section_path = os.path.join(configuration.user_settings, client_dir,
                                 section_filename)
    if not os.path.exists(section_path):
        if create_missing:
            section_dict = {}
        else:
            raise Exception('no %s file to update!' % section_filename)
    else:
        section_dict = unpickle(section_path, configuration.logger)
    for (key, val) in defaults.items():
        section_dict[key] = section_dict.get(key, val)
    section_dict.update(changes)
    if not pickle(section_dict, section_path, configuration.logger):
        raise Exception('could not save updated %s file!' % section_filename)
    return section_dict
Пример #20
0
    # remove entry from dict and pickle it

    dict = unpickle(filename, logger)
    if dict == False:
        return (False, 'could not unpickle %s' % filename)

    if not dict.has_key(display_number):
        return (False, 'display %s not found in dict' % display_number)
    try:
        del dict[display_number]
    except Exception, e:
        return (False,
                'exception trying to remove %s from display dict. Exception %s'
                 % (display_number, e))

    pickle_status = pickle(dict, filename, logger)

    if not pickle_status:
        return (False, 'could not pickle %s when removing %s'
                 % (filename, display_number))
    return (True, '')


def get_dict_from_display_number(display_number, configuration, logger):
    (init_ret, filename) = \
        initialize_and_get_display_dict_filename(configuration, logger)
    if not init_ret:
        return (False, 'could not initialize')

    dict = unpickle(filename, logger)
    if dict == False:
Пример #21
0
def set_user_display_active(
    client_id,
    display_number,
    vnc_port,
    password,
    configuration,
    logger,
):

    (init_ret, filename) = \
        initialize_and_get_display_dict_filename(configuration, logger)
    if not init_ret:
        return (False, 'could not initialize')

    (dis_ret, dis_dict) = get_dict_from_display_number(display_number,
                                                       configuration, logger)
    if not dis_ret:
        return (False, 'dict error, %s' % dis_dict)
    if dis_dict != -1:
        if dis_dict['client_id'] != client_id:

            # display occupied by another user!

            return (False, 'display %s already in use by another user!' %
                    display_number)

    # getting here means display is free or used by client_id

    dict = unpickle(filename, logger)
    if dict == False:
        return (False, 'could not unpickle %s' % filename)

    current_display = get_users_display_number(client_id, configuration,
                                               logger)
    if not current_display:

        # register display

        dict[display_number] = {
            'client_id': client_id,
            'vnc_port': vnc_port,
            'password': password
        }
        pickle_status = pickle(dict, filename, logger)

        if not pickle_status:
            return (False, 'could not pickle %s when adding %s' %
                    (filename, dict[display_number]))
        logger.info(
            'successfuly registered that display %s is in use by %s in %s' %
            (display_number, client_id, filename))
        return (True, '')

    if current_display != display_number and current_display != -1:

        # problems..

        return (
            False,
            'set_user_display_active met a conflict, can not set display %s when user already has %s registered'
            % (display_number, current_display))
    else:

        # add display to dict

        dict[display_number] = {
            'client_id': client_id,
            'vnc_port': vnc_port,
            'password': password
        }
        pickle_status = pickle(dict, filename, logger)

        if not pickle_status:
            return (False, 'could not pickle %s when adding %s' %
                    (filename, dict[display_number]))

        logger.info(
            'successfuly registered that display %s is in use by %s in %s %s' %
            (display_number, client_id, dict, filename))
        return (True, '')
Пример #22
0
def create_empty_job(
    unique_resource_name, exe, request_cputime, sleep_factor, localjobname, execution_delay, configuration, logger
):
    """Helper to create empty job for idle resources"""

    job_dict = {"": ""}
    helper_dict_filename = os.path.join(
        configuration.resource_home, unique_resource_name, "empty_job_helper_dict.%s" % exe
    )

    max_cputime = int(request_cputime)
    scaled_cputime = int(float(configuration.cputime_for_empty_jobs) * sleep_factor)
    if scaled_cputime > max_cputime:
        cputime = max_cputime
        sleep_time = int(0.8 * cputime)
    else:
        cputime = scaled_cputime
        sleep_time = int(float(configuration.sleep_period_for_empty_jobs) * sleep_factor)

    logger.info(
        "request_cputime: %d, sleep_factor: %.1f, cputime: %d, sleep time: %d",
        max_cputime,
        sleep_factor,
        cputime,
        sleep_time,
    )
    job_id = configuration.empty_job_name + "." + unique_resource_name + "." + exe + "." + localjobname

    job_dict["JOB_ID"] = job_id

    # sessionid = configuration.empty_job_name

    sleep_cmd = "sleep " + str(sleep_time)
    job_dict["EXECUTE"] = [sleep_cmd]
    job_dict["INPUTFILES"] = []
    job_dict["OUTPUTFILES"] = ""
    job_dict["ARGUMENTS"] = ""
    job_dict["EXECUTABLES"] = ""
    job_dict["MOUNT"] = []
    job_dict["CPUTIME"] = str(cputime)
    job_dict["MEMORY"] = 16
    job_dict["DISK"] = 1
    job_dict["EXECUTION_DELAY"] = str(execution_delay)
    job_dict["ENVIRONMENT"] = ""
    job_dict["RUNTIMEENVIRONMENT"] = []
    job_dict["MAXPRICE"] = "0"
    job_dict["JOBNAME"] = "empty job"
    client_id = configuration.empty_job_name
    job_dict["USER_CERT"] = client_id

    # create mRSL file only containing the unique_resource_name.
    # This is used when the .status file from the empty job is
    # uploaded, to find the unique name of the resource to be able
    # to start the exe again if continuous is True
    # if not os.path.isfile(helper_dict_filename):

    helper_dict = {}
    helper_dict["JOB_ID"] = job_id
    helper_dict["UNIQUE_RESOURCE_NAME"] = unique_resource_name
    helper_dict["EXE"] = exe
    helper_dict["IS_EMPTY_JOB_HELPER_DICT"] = True
    helper_dict["LOCALJOBNAME"] = localjobname

    pickle(helper_dict, helper_dict_filename, logger)

    return (job_dict, "OK")
Пример #23
0
def save_schedule_cache(cache, path, logger):
    """Save schedule cache to path for quick loading later"""

    return io.pickle(cache, path, logger)
Пример #24
0
def save_schedule_cache(cache, path, logger):
    """Save schedule cache to path for quick loading later"""

    return io.pickle(cache, path, logger)
Пример #25
0
                logger.error('Could not remove link %s: %s' % (link, err))


    job_dict['STATUS'] = status
    job_dict[ status + '_TIMESTAMP' ] = timestamp

    if not status == 'FINISHED':
        # Generate execution history

        if not job_dict.has_key('EXECUTION_HISTORY'):
            job_dict['EXECUTION_HISTORY'] = []

        history_dict = {
            'QUEUED_TIMESTAMP': job_dict['QUEUED_TIMESTAMP'],
            'EXECUTING_TIMESTAMP': job_dict['EXECUTING_TIMESTAMP'],
            status + '_TIMESTAMP': timestamp,
            status + '_MESSAGE': msg,
            'UNIQUE_RESOURCE_NAME': job_dict['UNIQUE_RESOURCE_NAME'],
        }

        job_dict['EXECUTION_HISTORY'].append(history_dict)

    # save into mrsl

    mrsl_file = os.path.join(configuration.mrsl_files_dir,
                                 client_dir, 
                                 job_dict['JOB_ID'] + '.mRSL')
    io.pickle(job_dict, mrsl_file, logger)

    return
Пример #26
0
                os.remove(link)
            except Exception, err:
                logger.error('Could not remove link %s: %s' % (link, err))

    job_dict['STATUS'] = status
    job_dict[status + '_TIMESTAMP'] = timestamp

    if not status == 'FINISHED':
        # Generate execution history

        if not job_dict.has_key('EXECUTION_HISTORY'):
            job_dict['EXECUTION_HISTORY'] = []

        history_dict = {
            'QUEUED_TIMESTAMP': job_dict['QUEUED_TIMESTAMP'],
            'EXECUTING_TIMESTAMP': job_dict['EXECUTING_TIMESTAMP'],
            status + '_TIMESTAMP': timestamp,
            status + '_MESSAGE': msg,
            'UNIQUE_RESOURCE_NAME': job_dict['UNIQUE_RESOURCE_NAME'],
        }

        job_dict['EXECUTION_HISTORY'].append(history_dict)

    # save into mrsl

    mrsl_file = os.path.join(configuration.mrsl_files_dir, client_dir,
                             job_dict['JOB_ID'] + '.mRSL')
    io.pickle(job_dict, mrsl_file, logger)

    return
Пример #27
0
        new_dict[key] = value_dict['Value']

    new_dict['CREATOR'] = client_id
    new_dict['CREATED_TIMESTAMP'] = datetime.datetime.now()

    # Create settings dir for any old users
    try:
        settings_dir = os.path.join(configuration.user_settings, client_dir)
        os.mkdir(settings_dir)
    except:
        pass

    pickle_filename = os.path.join(configuration.user_settings, client_dir,
                                   destination)

    if not pickle(new_dict, pickle_filename, configuration.logger):
        msg = 'Error saving pickled data!'
        return (False, msg)

    # everything ok

    return (True, '')


def parse_and_save_settings(filename, client_id, configuration):
    """Validate and write settings entries from filename"""
    status = parse_and_save_pickle(filename, settings_filename,
                                   get_settings_fields(), client_id,
                                   configuration, True, True)
    if status[0]:
        mark_user_modified(configuration, client_id)
Пример #28
0
def create_empty_job(
    unique_resource_name,
    exe,
    request_cputime,
    sleep_factor,
    localjobname,
    execution_delay,
    configuration,
    logger,
):
    """Helper to create empty job for idle resources"""

    job_dict = {'': ''}
    helper_dict_filename = os.path.join(configuration.resource_home,
                                        unique_resource_name,
                                        'empty_job_helper_dict.%s' % exe)

    max_cputime = int(request_cputime)
    scaled_cputime = int(float(configuration.cputime_for_empty_jobs)
                         * sleep_factor)
    if scaled_cputime > max_cputime:
        cputime = max_cputime
        sleep_time = int(0.8 * cputime)
    else:
        cputime = scaled_cputime
        sleep_time = int(float(configuration.sleep_period_for_empty_jobs)
                         * sleep_factor)

    logger.info(
        'request_cputime: %d, sleep_factor: %.1f, cputime: %d, sleep time: %d',
        max_cputime, sleep_factor, cputime, sleep_time)
    job_id = configuration.empty_job_name + '.' + unique_resource_name + \
        '.' + exe + '.' + localjobname

    job_dict['JOB_ID'] = job_id

    # sessionid = configuration.empty_job_name

    sleep_cmd = 'sleep ' + str(sleep_time)
    job_dict['EXECUTE'] = [sleep_cmd]
    job_dict['INPUTFILES'] = []
    job_dict['OUTPUTFILES'] = ''
    job_dict['ARGUMENTS'] = ''
    job_dict['EXECUTABLES'] = ''
    job_dict['MOUNT'] = []
    job_dict['CPUTIME'] = str(cputime)
    job_dict['MEMORY'] = 16
    job_dict['DISK'] = 1
    job_dict['EXECUTION_DELAY'] = str(execution_delay)
    job_dict['ENVIRONMENT'] = ''
    job_dict['RUNTIMEENVIRONMENT'] = []
    job_dict['MAXPRICE'] = '0'
    job_dict['JOBNAME'] = 'empty job'
    client_id = configuration.empty_job_name
    job_dict['USER_CERT'] = client_id

    # create mRSL file only containing the unique_resource_name.
    # This is used when the .status file from the empty job is
    # uploaded, to find the unique name of the resource to be able
    # to start the exe again if continuous is True
    # if not os.path.isfile(helper_dict_filename):

    helper_dict = {}
    helper_dict['JOB_ID'] = job_id
    helper_dict['UNIQUE_RESOURCE_NAME'] = unique_resource_name
    helper_dict['EXE'] = exe
    helper_dict['IS_EMPTY_JOB_HELPER_DICT'] = True
    helper_dict['LOCALJOBNAME'] = localjobname

    pickle(helper_dict, helper_dict_filename, logger)

    return (job_dict, 'OK')
Пример #29
0
        except arc.ARCWrapperError, err:
            return (False, err.what())
        except arc.NoProxyError, err:
            return (False, 'No Proxy found: %s' % err.what())
        except Exception, err:
            return (False, err.__str__())

    # save file
    if outfile == 'AUTOMATIC':
        filename = \
            os.path.abspath(os.path.join(configuration.mrsl_files_dir,
                            client_dir, job_id + '.mRSL'))
    else:
        filename = outfile

    if not pickle(global_dict, filename, logger):
        return (False, 'Fatal error: Could not write %s' % filename)

    if not outfile == 'AUTOMATIC':

        # an outfile was specified, so this is just for testing - dont tell
        # grid_script

        return (True, '')

    # tell 'grid_script'

    message = 'USERJOBFILE %s/%s\n' % (client_dir, job_id)

    if not send_message_to_grid_script(message, logger, configuration):
        return (False, '''Fatal error: Could not get exclusive access or write
Пример #30
0
def set_user_display_active(
    client_id,
    display_number,
    vnc_port,
    password,
    configuration,
    logger,
    ):

    (init_ret, filename) = \
        initialize_and_get_display_dict_filename(configuration, logger)
    if not init_ret:
        return (False, 'could not initialize')

    (dis_ret, dis_dict) = get_dict_from_display_number(display_number,
            configuration, logger)
    if not dis_ret:
        return (False, 'dict error, %s' % dis_dict)
    if dis_dict != -1:
        if dis_dict['client_id'] != client_id:

        # display occupied by another user!

            return (False, 'display %s already in use by another user!'
                     % display_number)

    # getting here means display is free or used by client_id

    dict = unpickle(filename, logger)
    if dict == False:
        return (False, 'could not unpickle %s' % filename)

    current_display = get_users_display_number(client_id,
            configuration, logger)
    if not current_display:

    # register display

        dict[display_number] = {'client_id': client_id,
                                'vnc_port': vnc_port,
                                'password': password}
        pickle_status = pickle(dict, filename, logger)

        if not pickle_status:
            return (False, 'could not pickle %s when adding %s'
                     % (filename, dict[display_number]))
        logger.info('successfuly registered that display %s is in use by %s in %s'
                     % (display_number, client_id, filename))
        return (True, '')

    if current_display != display_number and current_display != -1:

    # problems..

        return (False,
                'set_user_display_active met a conflict, can not set display %s when user already has %s registered'
                 % (display_number, current_display))
    else:

    # add display to dict

        dict[display_number] = {'client_id': client_id,
                                'vnc_port': vnc_port,
                                'password': password}
        pickle_status = pickle(dict, filename, logger)

        if not pickle_status:
            return (False, 'could not pickle %s when adding %s'
                     % (filename, dict[display_number]))

        logger.info('successfuly registered that display %s is in use by %s in %s %s'
                     % (display_number, client_id, dict, filename))
        return (True, '')
Пример #31
0
def requeue_job(
    job_dict,
    failed_msg,
    job_queue,
    executing_queue,
    configuration,
    logger,
    ):
    """Requeue a failed job by moving it from executing_queue to job_queue"""
    if not job_dict:
        msg = 'requeue_job: %s is no longer in executing queue'
        print failed_msg
        logger.info(failed_msg)
    else:
        executing_queue.dequeue_job_by_id(job_dict['JOB_ID'])
        failed_timestamp = time.gmtime()

        # Clean up the server for files assosiated with the executing job

        if not job_dict.has_key('SESSIONID')\
             or not job_dict.has_key('IOSESSIONID')\
             or not server_cleanup(
            job_dict['SESSIONID'],
            job_dict['IOSESSIONID'],
            job_dict['LOCALJOBNAME'],
            job_dict['JOB_ID'],
            configuration,
            logger,
            ):
            logger.error('could not clean up MiG server')
            print 'CLEAN UP FAILED'

        client_dir = client_id_dir(job_dict['USER_CERT'])

        # Remove job result files, if they have arrived as the result is not valid
        # This can happen with sandboxes as they can't be stopped serverside

        status_prefix = os.path.join(configuration.user_home, client_dir,
                                     job_dict['JOB_ID'])
        io.delete_file(status_prefix + '.status', logger)
        io.delete_file(status_prefix + '.stdout', logger)
        io.delete_file(status_prefix + '.stderr', logger)

        # Generate execution history

        if not job_dict.has_key('EXECUTION_HISTORY'):
            job_dict['EXECUTION_HISTORY'] = []

        history_dict = {
            'QUEUED_TIMESTAMP': job_dict['QUEUED_TIMESTAMP'],
            'EXECUTING_TIMESTAMP': job_dict['EXECUTING_TIMESTAMP'],
            'FAILED_TIMESTAMP': failed_timestamp,
            'FAILED_MESSAGE': failed_msg,
            'UNIQUE_RESOURCE_NAME': job_dict['UNIQUE_RESOURCE_NAME'],
            'RESOURCE_VGRID': job_dict.get('RESOURCE_VGRID', ''),
            'PUBLICNAME': job_dict.get('PUBLICNAME', 'HIDDEN'),
            }

        job_dict['EXECUTION_HISTORY'].append(history_dict)

        # Retry if retries left

        job_dict['RETRY_COUNT'] = job_dict.get('RETRY_COUNT', 0) + 1

        unique_resource_name = job_dict['UNIQUE_RESOURCE_NAME']

        mrsl_file = os.path.join(configuration.mrsl_files_dir,
                                 client_dir, job_dict['JOB_ID']
                                  + '.mRSL')
        job_retries = job_dict.get('RETRIES', configuration.job_retries)
        if job_dict['RETRY_COUNT'] <= job_retries:
            job_dict['STATUS'] = 'QUEUED'
            job_dict['QUEUED_TIMESTAMP'] = time.gmtime()
            del job_dict['EXECUTING_TIMESTAMP']
            del job_dict['UNIQUE_RESOURCE_NAME']
            del job_dict['EXE']
            del job_dict['RESOURCE_CONFIG']
            del job_dict['LOCALJOBNAME']
            if job_dict.has_key('SESSIONID'):
                del job_dict['SESSIONID']
            if job_dict.has_key('IOSESSIONID'):
                del job_dict['IOSESSIONID']
            if job_dict.has_key('PUBLICNAME'):
                del job_dict['PUBLICNAME']
            if job_dict.has_key('RESOURCE_VGRID'):
                del job_dict['RESOURCE_VGRID']

            io.pickle(job_dict, mrsl_file, logger)

            # Requeue job last in queue for retry later

            job_queue.enqueue_job(job_dict, job_queue.queue_length())

            msg = \
                '%s failed to execute job %s - requeue for retry %d of %d'\
                 % (unique_resource_name, job_dict['JOB_ID'],
                    job_dict['RETRY_COUNT'], job_retries)
            print msg
            logger.info(msg)
        else:

            job_dict['STATUS'] = 'FAILED'
            job_dict['FAILED_TIMESTAMP'] = failed_timestamp
            io.pickle(job_dict, mrsl_file, logger)

            # tell the user the sad news

            msg = 'Gave up on executing job %s after %d retries'\
                 % (job_dict['JOB_ID'], job_retries)
            logger.error(msg)
            print msg
            notify_user_thread(
                job_dict,
                configuration.myfiles_py_location,
                'FAILED',
                logger,
                False,
                configuration,
                )
Пример #32
0
    dict = unpickle(filename, logger)
    if dict == False:
        return (False, 'could not unpickle %s' % filename)

    if not dict.has_key(display_number):
        return (False, 'display %s not found in dict' % display_number)
    try:
        del dict[display_number]
    except Exception, e:
        return (
            False,
            'exception trying to remove %s from display dict. Exception %s' %
            (display_number, e))

    pickle_status = pickle(dict, filename, logger)

    if not pickle_status:
        return (False, 'could not pickle %s when removing %s' %
                (filename, display_number))
    return (True, '')


def get_dict_from_display_number(display_number, configuration, logger):
    (init_ret, filename) = \
        initialize_and_get_display_dict_filename(configuration, logger)
    if not init_ret:
        return (False, 'could not initialize')

    dict = unpickle(filename, logger)
    if dict == False:
Пример #33
0
def main(client_id, user_arguments_dict):
    """Main function used by front end"""

    (configuration, logger, output_objects, op_name) = \
        initialize_main_variables(client_id, op_header=False)
    client_dir = client_id_dir(client_id)
    defaults = signature()[1]
    (validate_status, accepted) = validate_input_and_cert(
        user_arguments_dict,
        defaults,
        output_objects,
        client_id,
        configuration,
        allow_rejects=False,
        )
    if not validate_status:
        return (accepted, returnvalues.CLIENT_ERROR)

    job_ids = accepted['job_id']
    action = accepted['action'][-1]
    src = accepted['src']
    dst = accepted['dst'][-1]

    title_entry = find_entry(output_objects, 'title')
    title_entry['text'] = '%s live I/O' % configuration.short_title
    output_objects.append({'object_type': 'header', 'text'
                           : 'Request live communication with jobs'})

    if not action in valid_actions:
        output_objects.append({'object_type': 'error_text', 'text'
                               : 'Invalid action "%s" (supported: %s)' % \
                               (action, ', '.join(valid_actions))})
        return (output_objects, returnvalues.CLIENT_ERROR)

    if action in post_actions and not correct_handler('POST'):
        output_objects.append(
            {'object_type': 'error_text', 'text'
             : 'Only accepting POST requests to prevent unintended updates'})
        return (output_objects, returnvalues.CLIENT_ERROR)

    if not job_ids or action in interactive_actions:
        job_id = ''
        if job_ids:
            job_id = job_ids[-1]
        output_objects.append({'object_type': 'text', 'text'
                          : '''
Fill in the live I/O details below to request communication with a running
job.
Job ID can be a full ID or a wild card pattern using "*" and "?" to match one
or more of your job IDs.
Use send output without source and destination paths to request upload of the
default stdio files from the job on the resource to the associated job_output
directory in your MiG home.
Destination is a always handled as a directory path to put source files into.
Source and destination paths are always taken relative to the job execution
directory on the resource and your MiG home respectively.
'''})
        html = '''
<table class="liveio">
<tr>
<td>
<form method="post" action="liveio.py">
<table class="liveio">
<tr><td class=centertext>
</td></tr>
<tr><td>
Action:<br />
<input type=radio name=action checked value="send" />send output
<input type=radio name=action value="get" />get input
</td></tr>
<tr><td>
Job ID:<br />
<input type=text size=60 name=job_id value="%s" />
</td></tr>
<tr><td>
Source path(s):<br />
<div id="srcfields">
<input type=text size=60 name=src value="" /><br />
</div>
</td></tr>
<tr><td>
Destination path:<br />
<input type=text size=60 name=dst value="" />
</td></tr>
<tr><td>
<input type="submit" value="Send request" />
</td></tr>
</table>
</form>
</td>
<td>
<script type="text/javascript">
fields = 1;
max_fields = 64;
function addInput() {
    if (fields < max_fields) {
        document.getElementById("srcfields").innerHTML += "<input type=text size=60 name=src value='' /><br />";
        fields += 1;
    } else {
        alert("Maximum " + max_fields + " source fields allowed!");
        document.form.add.disabled=true;
    }
}
</script>
<form name="addsrcform">
<input type="button" onclick="addInput(); return false;" name="add" value="Add another source field" />
</form>
</td>
</tr>
</table>
''' % job_id
        output_objects.append({'object_type': 'html_form', 'text'
                              : html})
        output_objects.append({'object_type': 'text', 'text': '''
Further live job control is avalable through your personal message queues.
They provide a basic interface for centrally storing messages under your grid
account and can be used to pass messages between jobs or for orchestrating
jobs before and during execution.
'''
                               })
        output_objects.append({'object_type': 'link', 'destination':
                               'mqueue.py',
                               'text': 'Message queue interface'})
        return (output_objects, returnvalues.OK)
    elif action in ['get', 'receive', 'input']:
        action = 'get'
        action_desc = 'will be downloaded to the job on the resource'
    elif action in ['put', 'send', 'output']:
        action = 'send'
        action_desc = 'will be uploaded from the job on the resource'
    else:
        output_objects.append({'object_type': 'error_text', 'text'
                              : 'Invalid live io action: %s' % action})
        return (output_objects, returnvalues.CLIENT_ERROR)

    output_objects.append({'object_type': 'text', 'text'
                          : 'Requesting live I/O for %s'
                           % ', '.join(job_ids)})

    if action == 'get' and (not src or not dst):
        output_objects.append(
            {'object_type': 'error_text',
             'text': 'src and dst parameters required for live input'})
        return (output_objects, returnvalues.CLIENT_ERROR)

    # Automatic fall back to stdio files if output with no path provided
                
    if src:
        src_text = 'The files ' + ' '.join(src)
    else:
        src_text = 'The job stdio files'

    if dst:
        dst_text = 'the ' + dst + ' directory'
    else:
        dst_text = 'the corresponding job_output directory'

    # Please note that base_dir must end in slash to avoid access to other
    # user dirs when own name is a prefix of another user name

    base_dir = \
        os.path.abspath(os.path.join(configuration.mrsl_files_dir,
                        client_dir)) + os.sep

    filelist = []
    for job_id in job_ids:
        job_id = job_id.strip()

        # is job currently being executed?

        # Backward compatibility - all_jobs keyword should match all jobs

        if job_id == all_jobs:
            job_id = '*'

        # Check directory traversal attempts before actual handling to avoid
        # leaking information about file system layout while allowing
        # consistent error messages

        unfiltered_match = glob.glob(base_dir + job_id + '.mRSL')
        match = []
        for server_path in unfiltered_match:
            real_path = os.path.abspath(server_path)
            if not valid_user_path(real_path, base_dir, True):

                # out of bounds - save user warning for later to allow
                # partial match:
                # ../*/* is technically allowed to match own files.

                logger.warning("%s tried to %s restricted path %s ! (%s)" % \
                                (client_id, op_name, real_path, job_id))

                continue

            # Insert valid job files in filelist for later treatment

            match.append(real_path)

        # Now actually treat list of allowed matchings and notify if no
        # (allowed) match....

        if not match:
            output_objects.append(
                {'object_type': 'error_text', 'text'
                 : '%s: You do not have any matching job IDs!' % job_id})
        else:
            filelist += match

    for filepath in filelist:

        # Extract jo_id from filepath (replace doesn't modify filepath)

        mrsl_file = filepath.replace(base_dir, '')
        job_id = mrsl_file.replace('.mRSL', '')
        job_dict = unpickle(filepath, logger)
        if not job_dict:
            status = returnvalues.CLIENT_ERROR

            output_objects.append(
                {'object_type': 'error_text', 'text'
                 : ('You can only list status of your own jobs. '
                    'Please verify that you submitted the mRSL file '
                    'with job id "%s" (Could not unpickle mRSL file %s)'
                    ) % (job_id, filepath)})
            continue

        if job_dict['STATUS'] != 'EXECUTING':
            output_objects.append(
                {'object_type': 'text', 'text'
                 : 'Job %s is not currently being executed! Job status: %s'
                 % (job_id, job_dict['STATUS'])})
            continue

        if job_dict['UNIQUE_RESOURCE_NAME'] == 'ARC':
            output_objects.append(
                {'object_type': 'text', 'text'
                 : 'Job %s is submitted to ARC, details are not available!'
                 % job_id })
            continue

        last_live_update_dict = {}
        last_live_update_file = configuration.mig_system_files + os.sep\
             + job_id + '.last_live_update'
        if os.path.isfile(last_live_update_file):
            last_live_update_dict_unpickled = \
                unpickle(last_live_update_file, logger)
            if not last_live_update_dict_unpickled:
                output_objects.append({'object_type': 'error_text',
                        'text'
                        : 'Could not unpickle %s - skipping request!'
                         % last_live_update_file})
                continue

            if not last_live_update_dict_unpickled.has_key(
                'LAST_LIVE_UPDATE_REQUEST_TIMESTAMP'):
                output_objects.append(
                    {'object_type': 'error_text',
                     'text': 'Could not find needed key in %s.'
                     % last_live_update_file})
                continue

            last_live_update_request = \
                last_live_update_dict_unpickled['LAST_LIVE_UPDATE_REQUEST_TIMESTAMP'
                    ]

            difference = datetime.datetime.now()- last_live_update_request
            try:
                min_delay = \
                    int(configuration.min_seconds_between_live_update_requests)
            except:
                min_delay = 30

            if difference.seconds < min_delay:
                output_objects.append(
                    {'object_type': 'error_text',
                     'text': ('Request not allowed, you must wait at least ' \
                              '%s seconds between live update requests!'
                              ) % min_delay})
                continue

        # save this request to file to avoid DoS from a client request loop.

        last_live_update_dict['LAST_LIVE_UPDATE_REQUEST_TIMESTAMP'] = \
            datetime.datetime.now()
        pickle_ret = pickle(last_live_update_dict,
                            last_live_update_file, logger)
        if not pickle_ret:
            output_objects.append(
                {'object_type': 'error_text', 'text'
                 : 'Error saving live io request timestamp to last_live_update '
                 'file, request not sent!'})
            continue

        # #
        # ## job is being executed right now, send live io request to frontend
        # #

        # get resource_config, needed by scp_file_to_resource
        #(status, resource_config) = get_resource_configuration(
        #    resource_home, unique_resource_name, logger)

        resource_config = job_dict['RESOURCE_CONFIG']
        (status, exe) = get_resource_exe(resource_config, job_dict['EXE'],
                                         logger)
        if not status:
            output_objects.append(
                {'object_type': 'error_text', 'text'
                 : 'Could not get exe configuration for job %s' % job_id})
            continue

        local_file = '%s.%supdate' % (job_dict['LOCALJOBNAME'], action)
        if not os.path.exists(local_file):

            # create

            try:
                filehandle = open(local_file, 'w')
                filehandle.write('job_id '
                                  + job_dict['JOB_ID'] + '\n')
                filehandle.write('localjobname '
                                  + job_dict['LOCALJOBNAME'] + '\n')
                filehandle.write('execution_user '
                                  + exe['execution_user'] + '\n')
                filehandle.write('execution_node '
                                  + exe['execution_node'] + '\n')
                filehandle.write('execution_dir ' + exe['execution_dir']
                                  + '\n')
                filehandle.write('target liveio\n')

                # Leave defaults src and dst to FE script if not provided
                
                if src:
                    filehandle.write('source ' + ' '.join(src) + '\n')
                if dst:
                    filehandle.write('destination ' + dst + '\n')

                # Backward compatible test for shared_fs - fall back to scp

                if exe.has_key('shared_fs') and exe['shared_fs']:
                    filehandle.write('copy_command cp\n')
                    filehandle.write('copy_frontend_prefix \n')
                    filehandle.write('copy_execution_prefix \n')
                else:
                    filehandle.write('copy_command scp -B\n')
                    filehandle.write('copy_frontend_prefix ${frontend_user}@${frontend_node}:\n'
                            )
                    filehandle.write('copy_execution_prefix ${execution_user}@${execution_node}:\n'
                            )

                filehandle.write('### END OF SCRIPT ###\n')
                filehandle.close()
            except Exception, exc:
                pass

        if not os.path.exists(local_file):
            output_objects.append(
                {'object_type': 'error_text', 'text'
                 : '.%supdate file not available on %s server' % \
                 (action, configuration.short_title)})
            continue

        scpstatus = copy_file_to_resource(local_file, '%s.%supdate'
                 % (job_dict['LOCALJOBNAME'], action), resource_config, logger)
        if not scpstatus:
            output_objects.append(
                {'object_type': 'error_text', 'text'
                 : 'Error sending request for live io to resource!'})
            continue
        else:
            output_objects.append(
                {'object_type': 'text', 'text'
                 : 'Request for live io was successfully sent to the resource!'
                 })
            output_objects.append(
                {'object_type': 'text', 'text'
                 : '%s %s and should become available in %s in a minute.' % \
                 (src_text, action_desc, dst_text)
                 })
            if action == 'send':
                if not dst:
                    target_path = '%s/%s/*' % (job_output_dir, job_id)
                else:
                    target_path = dst
                output_objects.append({'object_type': 'link', 'destination'
                                       : 'ls.py?path=%s' % target_path,
                                       'text': 'View uploaded files'})
            else:
                output_objects.append({'object_type': 'link', 'destination'
                                       : 'ls.py?path=%s' % ';path='.join(src),
                                       'text': 'View files for download'})

        try:
            os.remove(local_file)
        except Exception, exc:
            pass
Пример #34
0
            return (False, err.what())
        except arc.NoProxyError, err:
            return (False, 'No Proxy found: %s' % err.what())
        except Exception, err:
            return (False, err.__str__())
    
    # save file

    if outfile == 'AUTOMATIC':
        filename = \
            os.path.abspath(os.path.join(configuration.mrsl_files_dir,
                            client_dir, job_id + '.mRSL'))
    else:
        filename = outfile

    if not pickle(global_dict, filename, logger):
        return (False, 'Fatal error: Could not write %s' % filename)

    if not outfile == 'AUTOMATIC':

        # an outfile was specified, so this is just for testing - dont tell
        # grid_script

        return (True, '')

    # tell 'grid_script'

    message = 'USERJOBFILE %s/%s\n' % (client_dir, job_id)

    if not send_message_to_grid_script(message, logger, configuration):
        return (False, '''Fatal error: Could not get exclusive access or write
Пример #35
0
def requeue_job(
    job_dict,
    failed_msg,
    job_queue,
    executing_queue,
    configuration,
    logger,
):
    """Requeue a failed job by moving it from executing_queue to job_queue"""
    if not job_dict:
        msg = 'requeue_job: %s is no longer in executing queue'
        print failed_msg
        logger.info(failed_msg)
    else:
        executing_queue.dequeue_job_by_id(job_dict['JOB_ID'])
        failed_timestamp = time.gmtime()

        # Clean up the server for files assosiated with the executing job

        if not job_dict.has_key('SESSIONID')\
             or not job_dict.has_key('IOSESSIONID')\
             or not server_cleanup(
            job_dict['SESSIONID'],
            job_dict['IOSESSIONID'],
            job_dict['LOCALJOBNAME'],
            job_dict['JOB_ID'],
            configuration,
            logger,
            ):
            logger.error('could not clean up MiG server')
            print 'CLEAN UP FAILED'

        client_dir = client_id_dir(job_dict['USER_CERT'])

        # Remove job result files, if they have arrived as the result is not valid
        # This can happen with sandboxes as they can't be stopped serverside

        status_prefix = os.path.join(configuration.user_home, client_dir,
                                     job_dict['JOB_ID'])
        io.delete_file(status_prefix + '.status', logger)
        io.delete_file(status_prefix + '.stdout', logger)
        io.delete_file(status_prefix + '.stderr', logger)

        # Generate execution history

        if not job_dict.has_key('EXECUTION_HISTORY'):
            job_dict['EXECUTION_HISTORY'] = []

        history_dict = {
            'QUEUED_TIMESTAMP': job_dict['QUEUED_TIMESTAMP'],
            'EXECUTING_TIMESTAMP': job_dict['EXECUTING_TIMESTAMP'],
            'FAILED_TIMESTAMP': failed_timestamp,
            'FAILED_MESSAGE': failed_msg,
            'UNIQUE_RESOURCE_NAME': job_dict['UNIQUE_RESOURCE_NAME'],
            'RESOURCE_VGRID': job_dict.get('RESOURCE_VGRID', ''),
            'PUBLICNAME': job_dict.get('PUBLICNAME', 'HIDDEN'),
        }

        job_dict['EXECUTION_HISTORY'].append(history_dict)

        # Retry if retries left

        job_dict['RETRY_COUNT'] = job_dict.get('RETRY_COUNT', 0) + 1

        unique_resource_name = job_dict['UNIQUE_RESOURCE_NAME']

        mrsl_file = os.path.join(configuration.mrsl_files_dir, client_dir,
                                 job_dict['JOB_ID'] + '.mRSL')
        job_retries = job_dict.get('RETRIES', configuration.job_retries)
        if job_dict['RETRY_COUNT'] <= job_retries:
            job_dict['STATUS'] = 'QUEUED'
            job_dict['QUEUED_TIMESTAMP'] = time.gmtime()
            del job_dict['EXECUTING_TIMESTAMP']
            del job_dict['UNIQUE_RESOURCE_NAME']
            del job_dict['EXE']
            del job_dict['RESOURCE_CONFIG']
            del job_dict['LOCALJOBNAME']
            if job_dict.has_key('SESSIONID'):
                del job_dict['SESSIONID']
            if job_dict.has_key('IOSESSIONID'):
                del job_dict['IOSESSIONID']
            if job_dict.has_key('PUBLICNAME'):
                del job_dict['PUBLICNAME']
            if job_dict.has_key('RESOURCE_VGRID'):
                del job_dict['RESOURCE_VGRID']

            io.pickle(job_dict, mrsl_file, logger)

            # Requeue job last in queue for retry later

            job_queue.enqueue_job(job_dict, job_queue.queue_length())

            msg = \
                '%s failed to execute job %s - requeue for retry %d of %d'\
                 % (unique_resource_name, job_dict['JOB_ID'],
                    job_dict['RETRY_COUNT'], job_retries)
            print msg
            logger.info(msg)
        else:

            job_dict['STATUS'] = 'FAILED'
            job_dict['FAILED_TIMESTAMP'] = failed_timestamp
            io.pickle(job_dict, mrsl_file, logger)

            # tell the user the sad news

            msg = 'Gave up on executing job %s after %d retries'\
                 % (job_dict['JOB_ID'], job_retries)
            logger.error(msg)
            print msg
            notify_user_thread(
                job_dict,
                configuration.myfiles_py_location,
                'FAILED',
                logger,
                False,
                configuration,
            )
Пример #36
0
def main(client_id, user_arguments_dict):
    """Main function used by front end"""
    (configuration, logger, output_objects, op_name) = \
        initialize_main_variables(client_id, op_header=False)
    client_dir = client_id_dir(client_id)
    defaults = signature()[1]
    (validate_status, accepted) = validate_input_and_cert(
        user_arguments_dict,
        defaults,
        output_objects,
        client_id,
        configuration,
        allow_rejects=False,
    )

    if not validate_status:
        return (accepted, returnvalues.CLIENT_ERROR)

    logger.debug("User: %s executing %s" % (client_id, op_name))
    if not configuration.site_enable_jupyter:
        output_objects.append({
            'object_type':
            'error_text',
            'text':
            'The Jupyter service is not enabled on the system'
        })
        return (output_objects, returnvalues.SYSTEM_ERROR)

    if not configuration.site_enable_sftp_subsys and not \
            configuration.site_enable_sftp:
        output_objects.append({
            'object_type':
            'error_text',
            'text':
            'The required sftp service is not enabled on the system'
        })
        return (output_objects, returnvalues.SYSTEM_ERROR)

    if configuration.site_enable_sftp:
        sftp_port = configuration.user_sftp_port

    if configuration.site_enable_sftp_subsys:
        sftp_port = configuration.user_sftp_subsys_port

    requested_service = accepted['service'][-1]
    service = {
        k: v
        for options in configuration.jupyter_services
        for k, v in options.items()
        if options['service_name'] == requested_service
    }

    if not service:
        valid_services = [
            options['name'] for options in configuration.jupyter_services
        ]
        output_objects.append({
            'object_type':
            'error_text',
            'text':
            '%s is not a valid jupyter service, '
            'allowed include %s' % (requested_service, valid_services)
        })
        return (output_objects, returnvalues.SYSTEM_ERROR)

    valid_service = valid_jupyter_service(configuration, service)
    if not valid_service:
        output_objects.append({
            'object_type':
            'error_text',
            'text':
            'The service %s appears to be misconfigured, '
            'please contact a system administrator about this issue' %
            requested_service
        })
        return (output_objects, returnvalues.SYSTEM_ERROR)

    host = get_host_from_service(configuration, service)
    # Get an active jupyterhost
    if host is None:
        logger.error("No active jupyterhub host could be found")
        output_objects.append({
            'object_type':
            'error_text',
            'text':
            'Failed to establish connection to the %s Jupyter service' %
            service['service_name']
        })
        output_objects.append({
            'object_type': 'link',
            'destination': 'jupyter.py',
            'text': 'Back to Jupyter services overview'
        })
        return (output_objects, returnvalues.SYSTEM_ERROR)

    remote_user = unescape(os.environ.get('REMOTE_USER', '')).strip()
    if not remote_user:
        logger.error("Can't connect to jupyter with an empty REMOTE_USER "
                     "environment variable")
        output_objects.append({
            'object_type':
            'error_text',
            'text':
            'Failed to establish connection to the Jupyter service'
        })
        return (output_objects, returnvalues.CLIENT_ERROR)
    # Ensure the remote_user dict can be http posted
    remote_user = str(remote_user)

    # TODO, activate admin info
    # remote_user = {'USER': username, 'IS_ADMIN': is_admin(client_id,
    #                                                      configuration,
    # logger)}

    # Regular sftp path
    mnt_path = os.path.join(configuration.jupyter_mount_files_dir, client_dir)
    # Subsys sftp path
    subsys_path = os.path.join(configuration.mig_system_files, 'jupyter_mount')
    # sftp session path
    link_home = configuration.sessid_to_jupyter_mount_link_home

    user_home_dir = os.path.join(configuration.user_home, client_dir)

    # Preparing prerequisites
    if not os.path.exists(mnt_path):
        os.makedirs(mnt_path)

    if not os.path.exists(link_home):
        os.makedirs(link_home)

    if configuration.site_enable_sftp_subsys:
        if not os.path.exists(subsys_path):
            os.makedirs(subsys_path)

    # Make sure ssh daemon does not complain
    tighten_key_perms(configuration, client_id)

    url_base = '/' + service['service_name']
    url_home = url_base + '/home'
    url_auth = host + url_base + '/hub/login'
    url_data = host + url_base + '/hub/user-data'

    # Does the client home dir contain an active mount key
    # If so just keep on using it.
    jupyter_mount_files = [
        os.path.join(mnt_path, jfile) for jfile in os.listdir(mnt_path)
        if jfile.endswith('.jupyter_mount')
    ]

    logger.info("User: %s mount files: %s" %
                (client_id, "\n".join(jupyter_mount_files)))
    logger.debug("Remote-User %s" % remote_user)
    active_mounts = []
    for jfile in jupyter_mount_files:
        jupyter_dict = unpickle(jfile, logger)
        if not jupyter_dict:
            # Remove failed unpickle
            logger.error("Failed to unpickle %s removing it" % jfile)
            remove_jupyter_mount(jfile, configuration)
        else:
            # Mount has been timed out
            if not is_active(jupyter_dict):
                remove_jupyter_mount(jfile, configuration)
            else:
                # Valid mount
                active_mounts.append({'path': jfile, 'state': jupyter_dict})

    logger.debug(
        "User: %s active keys: %s" %
        (client_id, "\n".join([mount['path'] for mount in active_mounts])))

    # If multiple are active, remove oldest
    active_mount, old_mounts = get_newest_mount(active_mounts)
    for mount in old_mounts:
        remove_jupyter_mount(mount['path'], configuration)

    # A valid active key is already present redirect straight to the jupyter
    # service, pass most recent mount information
    if active_mount is not None:
        mount_dict = mig_to_mount_adapt(active_mount['state'])
        user_dict = mig_to_user_adapt(active_mount['state'])
        logger.debug("Existing header values, Mount: %s User: %s" %
                     (mount_dict, user_dict))

        auth_header = {'Remote-User': remote_user}
        json_data = {'data': {'Mount': mount_dict, 'User': user_dict}}

        if configuration.site_enable_workflows:
            workflows_dict = mig_to_workflows_adapt(active_mount['state'])
            if not workflows_dict:
                # No cached workflows session could be found -> refresh with a
                # one
                workflow_session_id = get_workflow_session_id(
                    configuration, client_id)
                if not workflow_session_id:
                    workflow_session_id = create_workflow_session_id(
                        configuration, client_id)
                # TODO get this dynamically
                url = configuration.migserver_https_sid_url + \
                    '/cgi-sid/workflowsjsoninterface.py?output_format=json'
                workflows_dict = {
                    'WORKFLOWS_URL': url,
                    'WORKFLOWS_SESSION_ID': workflow_session_id
                }

            logger.debug("Existing header values, Workflows: %s" %
                         workflows_dict)
            json_data['workflows_data'] = {'Session': workflows_dict}

        with requests.session() as session:
            # Authenticate and submit data
            response = session.post(url_auth, headers=auth_header)
            if response.status_code == 200:
                response = session.post(url_data, json=json_data)
                if response.status_code != 200:
                    logger.error(
                        "Jupyter: User %s failed to submit data %s to %s" %
                        (client_id, json_data, url_data))
            else:
                logger.error(
                    "Jupyter: User %s failed to authenticate against %s" %
                    (client_id, url_auth))

        # Redirect client to jupyterhub
        return jupyter_host(configuration, output_objects, remote_user,
                            url_home)

    # Create a new keyset
    # Create login session id
    session_id = generate_random_ascii(2 * session_id_bytes,
                                       charset='0123456789abcdef')

    # Generate private/public keys
    (mount_private_key,
     mount_public_key) = generate_ssh_rsa_key_pair(encode_utf8=True)

    # Known hosts
    sftp_addresses = socket.gethostbyname_ex(
        configuration.user_sftp_show_address or socket.getfqdn())

    # Subsys sftp support
    if configuration.site_enable_sftp_subsys:
        # Restrict possible mount agent
        auth_content = []
        restrict_opts = 'no-agent-forwarding,no-port-forwarding,no-pty,'
        restrict_opts += 'no-user-rc,no-X11-forwarding'
        restrictions = '%s' % restrict_opts
        auth_content.append('%s %s\n' % (restrictions, mount_public_key))
        # Write auth file
        write_file('\n'.join(auth_content),
                   os.path.join(subsys_path, session_id + '.authorized_keys'),
                   logger,
                   umask=027)

    logger.debug("User: %s - Creating a new jupyter mount keyset - "
                 "private_key: %s public_key: %s " %
                 (client_id, mount_private_key, mount_public_key))

    jupyter_dict = {
        'MOUNT_HOST': configuration.short_title,
        'SESSIONID': session_id,
        'USER_CERT': client_id,
        # don't need fraction precision, also not all systems provide fraction
        # precision.
        'CREATED_TIMESTAMP': int(time.time()),
        'MOUNTSSHPRIVATEKEY': mount_private_key,
        'MOUNTSSHPUBLICKEY': mount_public_key,
        # Used by the jupyterhub to know which host to mount against
        'TARGET_MOUNT_ADDR': "@" + sftp_addresses[0] + ":",
        'PORT': sftp_port
    }
    client_email = extract_field(client_id, 'email')
    if client_email:
        jupyter_dict.update({'USER_EMAIL': client_email})

    if configuration.site_enable_workflows:
        workflow_session_id = get_workflow_session_id(configuration, client_id)
        if not workflow_session_id:
            workflow_session_id = create_workflow_session_id(
                configuration, client_id)
        # TODO get this dynamically
        url = configuration.migserver_https_sid_url + \
            '/cgi-sid/workflowsjsoninterface.py?output_format=json'
        jupyter_dict.update({
            'WORKFLOWS_URL': url,
            'WORKFLOWS_SESSION_ID': workflow_session_id
        })

    # Only post the required keys, adapt to API expectations
    mount_dict = mig_to_mount_adapt(jupyter_dict)
    user_dict = mig_to_user_adapt(jupyter_dict)
    workflows_dict = mig_to_workflows_adapt(jupyter_dict)
    logger.debug("User: %s Mount header: %s" % (client_id, mount_dict))
    logger.debug("User: %s User header: %s" % (client_id, user_dict))
    if workflows_dict:
        logger.debug("User: %s Workflows header: %s" %
                     (client_id, workflows_dict))

    # Auth and pass a new set of valid mount keys
    auth_header = {'Remote-User': remote_user}
    json_data = {'data': {'Mount': mount_dict, 'User': user_dict}}
    if workflows_dict:
        json_data['workflows_data'] = {'Session': workflows_dict}

    # First login
    with requests.session() as session:
        # Authenticate
        response = session.post(url_auth, headers=auth_header)
        if response.status_code == 200:
            response = session.post(url_data, json=json_data)
            if response.status_code != 200:
                logger.error(
                    "Jupyter: User %s failed to submit data %s to %s" %
                    (client_id, json_data, url_data))
        else:
            logger.error("Jupyter: User %s failed to authenticate against %s" %
                         (client_id, url_auth))

    # Update pickle with the new valid key
    jupyter_mount_state_path = os.path.join(mnt_path,
                                            session_id + '.jupyter_mount')

    pickle(jupyter_dict, jupyter_mount_state_path, logger)

    # Link jupyter pickle state file
    linkdest_new_jupyter_mount = os.path.join(mnt_path,
                                              session_id + '.jupyter_mount')

    linkloc_new_jupyter_mount = os.path.join(link_home,
                                             session_id + '.jupyter_mount')
    make_symlink(linkdest_new_jupyter_mount, linkloc_new_jupyter_mount, logger)

    # Link userhome
    linkloc_user_home = os.path.join(link_home, session_id)
    make_symlink(user_home_dir, linkloc_user_home, logger)

    return jupyter_host(configuration, output_objects, remote_user, url_home)
Пример #37
0
        new_dict[key] = value_dict['Value']

    new_dict['CREATOR'] = client_id
    new_dict['CREATED_TIMESTAMP'] = datetime.datetime.now()

    # Create settings dir for any old users
    try:
        settings_dir = os.path.join(configuration.user_settings, client_dir)
        os.mkdir(settings_dir)
    except:
        pass
                                    
    pickle_filename = os.path.join(configuration.user_settings, client_dir,
                                   destination)

    if not pickle(new_dict, pickle_filename, configuration.logger):
        msg = 'Error saving pickled data!'
        return (False, msg)

    # everything ok

    return (True, '')

def parse_and_save_settings(filename, client_id, configuration):
    """Validate and write settings entries from filename"""
    status = parse_and_save_pickle(filename, settings_filename,
                                   get_settings_fields(), client_id,
                                   configuration, True, True)
    if status[0]:
        mark_user_modified(configuration, client_id)
    return status