Пример #1
0
    def option_reset(self, request):
        """
        Fetch and overwrite values from subcampaign
        """
        prepid = request.get_prepid()
        request_db = Database('requests')
        with self.locker.get_nonblocking_lock(prepid):
            request_json = request_db.get(prepid)
            request = Request(json_input=request_json)
            if request.get('status') != 'new':
                raise Exception('It is not allowed to option reset '
                                'requests that are not in status "new"')

            subcampaign_db = Database('subcampaigns')
            subcampaign_name = request.get('subcampaign')
            subcampaign_json = subcampaign_db.get(subcampaign_name)
            if not subcampaign_json:
                raise Exception(
                    f'Subcampaign "{subcampaign_name}" does not exist')

            subcampaign = Subcampaign(json_input=subcampaign_json)
            request.set('memory', subcampaign.get('memory'))
            request.set('sequences', subcampaign.get('sequences'))
            request.set('energy', subcampaign.get('energy'))
            request.set('cmssw_release', subcampaign.get('cmssw_release'))
            request_db.save(request.get_json())

        return request
Пример #2
0
    def update_workflows(self, relval):
        """
        Update computing workflows from Stats2
        """
        prepid = relval.get_prepid()
        relval_db = Database('relvals')
        with self.locker.get_lock(prepid):
            relval = self.get(prepid)
            workflow_names = {w['name'] for w in relval.get('workflows')}
            stats_workflows = get_workflows_from_stats_for_prepid(prepid)
            workflow_names -= {w['RequestName'] for w in stats_workflows}
            self.logger.info('%s workflows that are not in stats: %s',
                             len(workflow_names), workflow_names)
            stats_workflows += get_workflows_from_stats(list(workflow_names))
            all_workflows = {}
            for workflow in stats_workflows:
                if not workflow or not workflow.get('RequestName'):
                    raise Exception('Could not find workflow in Stats2')

                name = workflow.get('RequestName')
                all_workflows[name] = workflow
                self.logger.info('Found workflow %s', name)

            output_datasets = self.get_output_datasets(relval, all_workflows)
            workflows = self.pick_workflows(all_workflows, output_datasets)
            relval.set('output_datasets', output_datasets)
            relval.set('workflows', workflows)
            relval_db.save(relval.get_json())

        return relval
Пример #3
0
    def submit_relval(self, relval, controller):
        """
        Method that is used by submission workers. This is where the actual submission happens
        """
        prepid = relval.get_prepid()
        credentials_file = Config.get('credentials_file')
        workspace_dir = Config.get('remote_path').rstrip('/')
        prepid = relval.get_prepid()
        self.logger.debug('Will try to acquire lock for %s', prepid)
        with Locker().get_lock(prepid):
            self.logger.info('Locked %s for submission', prepid)
            relval_db = Database('relvals')
            relval = controller.get(prepid)
            try:
                self.check_for_submission(relval)
                with SSHExecutor('lxplus.cern.ch', credentials_file) as ssh:
                    # Start executing commands
                    self.prepare_workspace(relval, controller, ssh,
                                           workspace_dir)
                    # Create configs
                    self.generate_configs(relval, ssh, workspace_dir)
                    # Upload configs
                    config_hashes = self.upload_configs(
                        relval, ssh, workspace_dir)
                    # Remove remote relval directory
                    ssh.execute_command([f'rm -rf {workspace_dir}/{prepid}'])

                self.logger.debug(config_hashes)
                # Iterate through uploaded configs and save their hashes in RelVal steps
                self.update_steps_with_config_hashes(relval, config_hashes)
                # Submit job dict to ReqMgr2
                job_dict = controller.get_job_dict(relval)
                cmsweb_url = Config.get('cmsweb_url')
                grid_cert = Config.get('grid_user_cert')
                grid_key = Config.get('grid_user_key')
                connection = ConnectionWrapper(host=cmsweb_url,
                                               cert_file=grid_cert,
                                               key_file=grid_key)
                workflow_name = self.submit_job_dict(job_dict, connection)
                # Update RelVal after successful submission
                relval.set('workflows', [{'name': workflow_name}])
                relval.set('status', 'submitted')
                relval.add_history('submission', 'succeeded', 'automatic')
                relval_db.save(relval.get_json())
                time.sleep(3)
                self.approve_workflow(workflow_name, connection)
                connection.close()
                if not Config.get('development'):
                    refresh_workflows_in_stats([workflow_name])

            except Exception as ex:
                self.__handle_error(relval, str(ex))
                return

            self.__handle_success(relval)

        if not Config.get('development'):
            controller.update_workflows(relval)

        self.logger.info('Successfully finished %s submission', prepid)
Пример #4
0
    def after_update(self, old_obj, new_obj, changed_values):
        self.logger.info('Changed values: %s', changed_values)
        if 'workflow_name' in changed_values:
            new_relval = self.create(new_obj.get_json())
            old_prepid = old_obj.get_prepid()
            new_prepid = new_relval.get_prepid()
            new_relval.set('history', old_obj.get('history'))
            new_relval.add_history('rename', [old_prepid, new_prepid], None)
            relvals_db = Database('relvals')
            relvals_db.save(new_relval.get_json())
            self.logger.info('Created %s as rename of %s', new_prepid,
                             old_prepid)
            new_obj.set('prepid', new_prepid)
            # Update the ticket...
            tickets_db = Database('tickets')
            tickets = tickets_db.query(
                f'created_relvals={old_obj.get_prepid()}')
            self.logger.debug(json.dumps(tickets, indent=2))
            for ticket_json in tickets:
                ticket_prepid = ticket_json['prepid']
                with self.locker.get_lock(ticket_prepid):
                    ticket_json = tickets_db.get(ticket_prepid)
                    ticket = Ticket(json_input=ticket_json)
                    created_relvals = ticket.get('created_relvals')
                    if old_prepid in created_relvals:
                        created_relvals.remove(old_prepid)

                    created_relvals.append(new_prepid)
                    ticket.set('created_relvals', created_relvals)
                    ticket.add_history('rename', [old_prepid, new_prepid],
                                       None)
                    tickets_db.save(ticket.get_json())

            self.delete(old_obj.get_json())
Пример #5
0
 def update_status(self, request, status, timestamp=None):
     """
     Set new status to request, update history accordingly and save to database
     """
     request_db = Database(self.database_name)
     request.set('status', status)
     request.add_history('status', status, None, timestamp)
     request_db.save(request.get_json())
Пример #6
0
    def create_relvals_for_ticket(self, ticket):
        """
        Create RelVals from given ticket. Return list of relval prepids
        """
        ticket_db = Database('tickets')
        ticket_prepid = ticket.get_prepid()
        ssh_executor = SSHExecutor('lxplus.cern.ch', Config.get('credentials_file'))
        relval_controller = RelValController()
        created_relvals = []
        with self.locker.get_lock(ticket_prepid):
            ticket = self.get(ticket_prepid)
            rewrite_gt_string = ticket.get('rewrite_gt_string')
            recycle_input_of = ticket.get('recycle_input_of')
            try:
                workflows = self.generate_workflows(ticket, ssh_executor)
                # Iterate through workflows and create RelVal objects
                relvals = []
                for workflow_id, workflow_dict in workflows.items():
                    relvals.append(self.create_relval_from_workflow(ticket,
                                                                    workflow_id,
                                                                    workflow_dict))

                # Handle recycling if needed
                if recycle_input_of:
                    if rewrite_gt_string:
                        self.recycle_input_with_gt_rewrite(relvals,
                                                           rewrite_gt_string,
                                                           recycle_input_of)
                    else:
                        self.recycle_input(relvals,
                                           relval_controller,
                                           recycle_input_of)

                for relval in relvals:
                    relval = relval_controller.create(relval.get_json())
                    created_relvals.append(relval)
                    self.logger.info('Created %s', relval.get_prepid())

                created_relval_prepids = [r.get('prepid') for r in created_relvals]
                ticket.set('created_relvals', created_relval_prepids)
                ticket.set('status', 'done')
                ticket.add_history('created_relvals', created_relval_prepids, None)
                ticket_db.save(ticket.get_json())
            except Exception as ex:
                self.logger.error('Error creating RelVal from ticket: %s', ex)
                # Delete created relvals if there was an Exception
                for created_relval in reversed(created_relvals):
                    relval_controller.delete({'prepid': created_relval.get('prepid')})

                # And reraise the exception
                raise ex
            finally:
                # Close all SSH connections
                ssh_executor.close_connections()

        return [r.get('prepid') for r in created_relvals]
Пример #7
0
 def update_status(self, relval, status, timestamp=None):
     """
     Set new status to RelVal, update history accordingly and save to database
     """
     relval_db = Database(self.database_name)
     relval.set('status', status)
     relval.add_history('status', status, None, timestamp)
     relval_db.save(relval.get_json())
     self.logger.info('Set "%s" status to "%s"', relval.get_prepid(),
                      status)
Пример #8
0
    def __check_for_submission(self, request):
        """
        Perform one last check of values before submitting a request
        """
        prepid = request.get_prepid()
        self.logger.debug('Final check before submission for %s', prepid)
        if request.get('status') != 'submitting':
            raise Exception(
                f'Cannot submit a request with status {request.get("status")}')

        if not request.get('input')['dataset']:
            request_db = Database('requests')
            request.set('status', 'approved')
            request_db.save(request.get_json())
            raise Exception('Cannot submit a request without input dataset')
Пример #9
0
    def after_delete(self, obj):
        prepid = obj.get_prepid()
        tickets_db = Database('tickets')
        tickets = tickets_db.query(f'created_relvals={prepid}')
        self.logger.debug(json.dumps(tickets, indent=2))
        for ticket_json in tickets:
            ticket_prepid = ticket_json['prepid']
            with self.locker.get_lock(ticket_prepid):
                ticket_json = tickets_db.get(ticket_prepid)
                ticket = Ticket(json_input=ticket_json)
                created_relvals = ticket.get('created_relvals')
                if prepid in created_relvals:
                    created_relvals.remove(prepid)

                ticket.set('created_relvals', created_relvals)
                ticket.add_history('remove_relval', prepid, None)
                tickets_db.save(ticket.get_json())
Пример #10
0
 def __handle_error(self, request, error_message):
     """
     Handle error that occured during submission, modify request accordingly
     """
     request_db = Database('requests')
     request.set('status', 'new')
     request.add_history('submission', 'failed', 'automatic')
     request_db.save(request.get_json())
     service_url = Config.get('service_url')
     emailer = Emailer()
     prepid = request.get_prepid()
     self.logger.warning('Submission of %s failed', prepid)
     subject = f'Request {prepid} submission failed'
     body = f'Hello,\n\nUnfortunately submission of {prepid} failed.\n'
     body += (f'You can find this request at '
              f'{service_url}/requests?prepid={prepid}\n')
     body += f'Error message:\n\n{error_message}'
     recipients = emailer.get_recipients(request)
     emailer.send(subject, body, recipients)
Пример #11
0
    def change_request_priority(self, request, priority):
        """
        Change request priority
        """
        prepid = request.get_prepid()
        request_db = Database('requests')
        cmsweb_url = Config.get('cmsweb_url')
        grid_cert = Config.get('grid_user_cert')
        grid_key = Config.get('grid_user_key')
        self.logger.info('Will try to change %s priority to %s', prepid,
                         priority)
        with self.locker.get_nonblocking_lock(prepid):
            request_json = request_db.get(prepid)
            request = Request(json_input=request_json)
            if request.get('status') != 'submitted':
                raise Exception('It is not allowed to change priority of '
                                'requests that are not in status "submitted"')

            request.set('priority', priority)
            updated_workflows = []
            active_workflows = self.__pick_active_workflows(request)
            connection = ConnectionWrapper(host=cmsweb_url,
                                           keep_open=True,
                                           cert_file=grid_cert,
                                           key_file=grid_key)
            for workflow in active_workflows:
                workflow_name = workflow['name']
                self.logger.info('Changing "%s" priority to %s', workflow_name,
                                 priority)
                response = connection.api(
                    'PUT', f'/reqmgr2/data/request/{workflow_name}',
                    {'RequestPriority': priority})
                updated_workflows.append(workflow_name)
                self.logger.debug(response)

            connection.close()
            # Update priority in Stats2
            self.force_stats_to_refresh(updated_workflows)
            # Finally save the request
            request_db.save(request.get_json())

        return request
Пример #12
0
    def __handle_error(self, relval, error_message):
        """
        Handle error that occured during submission, modify RelVal accordingly
        """
        self.logger.error(error_message)
        relval_db = Database('relvals')
        relval.set('status', 'new')
        relval.set('campaign_timestamp', 0)
        relval.add_history('submission', 'failed', 'automatic')
        for step in relval.get('steps'):
            step.set('config_id', '')
            step.set('resolved_globaltag', '')

        relval_db.save(relval.get_json())
        service_url = Config.get('service_url')
        emailer = Emailer()
        prepid = relval.get_prepid()
        subject = f'RelVal {prepid} submission failed'
        body = f'Hello,\n\nUnfortunately submission of {prepid} failed.\n'
        body += (f'You can find this relval at '
                 f'{service_url}/relvals?prepid={prepid}\n')
        body += f'Error message:\n\n{error_message}'
        recipients = emailer.get_recipients(relval)
        emailer.send(subject, body, recipients)
Пример #13
0
    def update_workflows(self, request):
        """
        Update computing workflows from Stats2
        """
        prepid = request.get_prepid()
        request_db = Database('requests')
        with self.locker.get_lock(prepid):
            request_json = request_db.get(prepid)
            request = Request(json_input=request_json)
            stats_conn = ConnectionWrapper(host='vocms074.cern.ch',
                                           port=5984,
                                           https=False,
                                           keep_open=True)
            stats_workflows = stats_conn.api(
                'GET',
                f'/requests/_design/_designDoc/_view/prepids?key="{prepid}"&include_docs=True'
            )
            stats_workflows = json.loads(stats_workflows)
            stats_workflows = [x['doc'] for x in stats_workflows['rows']]
            existing_workflows = [x['name'] for x in request.get('workflows')]
            stats_workflows = [x['RequestName'] for x in stats_workflows]
            all_workflow_names = list(
                set(existing_workflows) | set(stats_workflows))
            self.logger.info('All workflows of %s are %s', prepid,
                             ', '.join(all_workflow_names))
            all_workflows = {}
            for workflow_name in all_workflow_names:
                workflow = stats_conn.api('GET', f'/requests/{workflow_name}')
                if not workflow:
                    raise Exception(
                        f'Could not find {workflow_name} in Stats2')

                workflow = json.loads(workflow)
                if not workflow.get('RequestName'):
                    raise Exception(
                        f'Could not find {workflow_name} in Stats2')

                if workflow.get('RequestType', '').lower() == 'resubmission':
                    continue

                all_workflows[workflow_name] = workflow
                self.logger.info('Fetched workflow %s', workflow_name)

            stats_conn.close()
            output_datasets = self.__get_output_datasets(
                request, all_workflows)
            new_workflows = self.__pick_workflows(all_workflows,
                                                  output_datasets)
            all_workflow_names = [x['name'] for x in new_workflows]
            for new_workflow in reversed(new_workflows):
                completed_events = -1
                for output_dataset in new_workflow.get('output_datasets', []):
                    if output_datasets and output_dataset[
                            'name'] == output_datasets[-1]:
                        completed_events = output_dataset['events']
                        break

                if completed_events != -1:
                    request.set('completed_events', completed_events)
                    break

            if all_workflow_names:
                newest_workflow = all_workflows[all_workflow_names[-1]]
                if 'RequestPriority' in newest_workflow:
                    request.set('priority', newest_workflow['RequestPriority'])

                if 'TotalEvents' in newest_workflow:
                    request.set('total_events',
                                max(0, newest_workflow['TotalEvents']))

            request.set('output_datasets', output_datasets)
            request.set('workflows', new_workflows)
            request_db.save(request.get_json())

            if output_datasets:
                subsequent_requests = request_db.query(
                    f'input.request={prepid}')
                self.logger.info('Found %s subsequent requests for %s: %s',
                                 len(subsequent_requests), prepid,
                                 [r['prepid'] for r in subsequent_requests])
                for subsequent_request_json in subsequent_requests:
                    subsequent_request_prepid = subsequent_request_json.get(
                        'prepid')
                    self.update_input_dataset(
                        self.get(subsequent_request_prepid))

        return request
Пример #14
0
"""
Script to add run list to relval steps
"""
import sys
import os.path
import os
sys.path.append(os.path.abspath(os.path.pardir))
from core_lib.database.database import Database

Database.set_credentials_file(os.getenv('DB_AUTH'))
Database.set_database_name('relval')

relvals_database = Database('relvals')

total_relvals = relvals_database.get_count()

print('Total relvals: %s' % (total_relvals))

for index, item in enumerate(relvals_database.query(limit=total_relvals)):
    print('Processing entry %s/%s %s' %
          (index + 1, total_relvals, item.get('prepid', '<no-id>')))
    for step in item['steps']:
        step['input']['run'] = step['input'].get('run', [])

    relvals_database.save(item)

print('Done')
Пример #15
0
total_requests = request_db.get_count()
total_old_tickets = old_ticket_db.get_count()
total_new_tickets = new_ticket_db.get_count()

print('Requests: %s' % (total_requests))
print('Subcampaigns: %s' % (total_subcampaigns))
print('(Old) subcampaign tickets: %s' % (total_old_tickets))
print('(New) tickets: %s' % (total_new_tickets))

for index, subcampaign in enumerate(
        subcampaign_db.query(limit=total_subcampaigns)):
    print('Processing subcampaign %s/%s %s' %
          (index + 1, total_subcampaigns, subcampaign['prepid']))
    subcampaign.pop('_rev', None)
    subcampaign.pop('step', None)
    subcampaign_db.save(subcampaign)

for index, request in enumerate(request_db.query(limit=total_requests)):
    print('Processing request %s/%s %s' %
          (index + 1, total_requests, request['prepid']))
    request.pop('_rev', None)
    request.pop('step', None)
    if 'input_dataset' in request:
        request['input'] = {
            'dataset': request.pop('input_dataset'),
            'request': ''
        }

    request_db.save(request)

for index, ticket in enumerate(old_ticket_db.query(limit=total_old_tickets)):
Пример #16
0
Database.set_credentials_file(os.getenv('DB_AUTH'))
Database.set_database_name('rereco')

request_db = Database('requests')
subcampaign_db = Database('subcampaigns')

total_subcampaigns = subcampaign_db.get_count()
total_requests = request_db.get_count()

print('Requests: %s' % (total_requests))
print('Subcampaigns: %s' % (total_subcampaigns))

for index, subcampaign in enumerate(subcampaign_db.query(limit=total_subcampaigns)):
    print('Processing subcampaign %s/%s %s' % (index + 1,
                                               total_subcampaigns,
                                               subcampaign['prepid']))
    subcampaign.pop('scram_arch', None)
    subcampaign_db.save(subcampaign)

for index, request in enumerate(request_db.query(limit=total_requests)):
    print('Processing request %s/%s %s' % (index + 1, total_requests, request['prepid']))
    request.pop('scram_arch', None)
    request_db.save(request)


total_subcampaigns = subcampaign_db.get_count()
total_requests = request_db.get_count()

print('Requests: %s' % (total_requests))
print('Subcampaigns: %s' % (total_subcampaigns))
Пример #17
0
total_relvals = relvals_database.get_count()

print('Total tickets: %s' % (total_tickets))
print('Total relvals: %s' % (total_relvals))

for index, item in enumerate(tickets_database.query(limit=total_tickets)):
    print('Processing entry %s/%s %s' % (index + 1, total_tickets, item.get('prepid', '<no-id>')))
    item['gpu'] = {'requires': 'forbidden',
                   'gpu_memory': '',
                   'cuda_capabilities': [],
                   'cuda_runtime': '',
                   'gpu_name': '',
                   'cuda_driver_version': '',
                   'cuda_runtime_version': ''}
    item['gpu_steps'] = []
    tickets_database.save(item)

for index, item in enumerate(relvals_database.query(limit=total_relvals)):
    print('Processing entry %s/%s %s' % (index + 1, total_relvals, item.get('prepid', '<no-id>')))
    for step in item['steps']:
        step['gpu'] = {'requires': 'forbidden',
                       'gpu_memory': '',
                       'cuda_capabilities': [],
                       'cuda_runtime': '',
                       'gpu_name': '',
                       'cuda_driver_version': '',
                       'cuda_runtime_version': ''}

    relvals_database.save(item)

Пример #18
0
import sys
import os.path
import os
sys.path.append(os.path.abspath(os.path.pardir))
from core_lib.database.database import Database

Database.set_credentials_file(os.getenv('DB_AUTH'))
Database.set_database_name('relval')

database = Database('relvals')

total_entries = database.get_count()

print('Total entries: %s' % (total_entries))

for index, item in enumerate(database.query(limit=total_entries)):
    print('Processing entry %s/%s %s' %
          (index + 1, total_entries, item.get('prepid', '<no-id>')))
    item['job_dict_overwrite'] = {}
    database.save(item)

print('Done')
Пример #19
0
    def create_requests_for_ticket(self, ticket):
        """
        Create requests from given ticket. Return list of request prepids
        """
        database = Database(self.database_name)
        ticket_prepid = ticket.get_prepid()
        created_requests = []
        dataset_blacklist = set(Settings().get('dataset_blacklist'))
        request_controller = RequestController()
        with self.locker.get_lock(ticket_prepid):
            ticket = Ticket(json_input=database.get(ticket_prepid))
            created_requests = ticket.get('created_requests')
            status = ticket.get('status')
            if status != 'new':
                raise Exception(f'Ticket is not new, it already has '
                                f'{len(created_requests)} requests created')

            # In case black list was updated after ticket was created
            for input_dataset in ticket.get('input_datasets'):
                dataset = input_dataset.split('/')[1]
                if dataset in dataset_blacklist:
                    raise Exception(
                        f'Input dataset {input_dataset} is not '
                        f'allowed because {dataset} is in blacklist')

            try:
                for input_dataset in ticket.get('input_datasets'):
                    last_request_prepid = None
                    for step_index, step in enumerate(ticket.get('steps')):
                        subcampaign_name = step['subcampaign']
                        processing_string = step['processing_string']
                        time_per_event = step['time_per_event']
                        size_per_event = step['size_per_event']
                        priority = step['priority']
                        new_request_json = {
                            'subcampaign': subcampaign_name,
                            'priority': priority,
                            'processing_string': processing_string,
                            'time_per_event': time_per_event,
                            'size_per_event': size_per_event,
                            'input': {
                                'dataset': '',
                                'request': ''
                            }
                        }

                        if step_index == 0:
                            new_request_json['input'][
                                'dataset'] = input_dataset
                        else:
                            new_request_json['input'][
                                'request'] = last_request_prepid

                        try:
                            runs = request_controller.get_runs(
                                subcampaign_name, input_dataset)
                            new_request_json['runs'] = runs
                            lumis = request_controller.get_lumisections(
                                subcampaign_name, runs)
                            new_request_json['lumisections'] = lumis
                        except Exception as ex:
                            self.logger.error(
                                'Error getting runs or lumis for %s %s %s: \n%s',
                                subcampaign_name, input_dataset,
                                processing_string, ex)

                        request = request_controller.create(new_request_json)
                        created_requests.append(request)
                        last_request_prepid = request.get('prepid')

                        self.logger.info('Created %s', last_request_prepid)

                created_request_prepids = [
                    r.get('prepid') for r in created_requests
                ]
                ticket.set('created_requests', created_request_prepids)
                ticket.set('status', 'done')
                ticket.add_history('create_requests', created_request_prepids,
                                   None)
                database.save(ticket.get_json())
            except Exception as ex:
                # Delete created requests if there was an Exception
                for created_request in reversed(created_requests):
                    request_controller.delete(
                        {'prepid': created_request.get('prepid')})

                # And reraise the exception
                raise ex

        return [r.get('prepid') for r in created_requests]