Exemplo n.º 1
0
def write_to_ssm( extract, config ):
    """forwards usage records to SSM"""

    # ensure outgoing directory existence
    ssm_input_path = os.path.expanduser(config['ssm_input_path'])
    if not os.access(ssm_input_path, os.F_OK):
        os.makedirs(ssm_input_path, 0755)
    
    # only write non void URs file
    if len(extract) > 0:
        output = config['ssm_input_header'] + "\n"

        # itterate over VMs
        for vmname in extract.keys():
            logging.debug("generating ssm input file for VM %s" % vmname)
            for item in orderedFields:
                logging.debug("generating record %s: %s" % (item, extract[vmname][item]) )
                output += "%s: %s\n" % ( item, extract[vmname][item] )
            output += config['ssm_input_sep'] + "\n"

        # write file
        try:
            dirq = QueueSimple(ssm_input_path)
            dirq.add(output)
        except:
            logging.error('unable to push message in apel-ssm queue <%s>' % ssm_input_path)
    else:
        logging.debug('no usage records, skip forwarding to SSM')
Exemplo n.º 2
0
def write_to_ssm(extract, config):
    """forwards usage records to SSM"""

    # ensure outgoing directory existence
    ssm_input_path = os.path.expanduser(config['ssm_input_path'])
    if not os.access(ssm_input_path, os.F_OK):
        os.makedirs(ssm_input_path, 0755)

    # only write non void URs file
    if len(extract) > 0:
        output = config['ssm_input_header'] + "\n"

        # itterate over VMs
        for vmname in extract.keys():
            logging.debug("generating ssm input file for VM %s" % vmname)
            for item in orderedFields:
                logging.debug("generating record %s: %s" %
                              (item, extract[vmname][item]))
                output += "%s: %s\n" % (item, extract[vmname][item])
            output += config['ssm_input_sep'] + "\n"

        # write file
        try:
            dirq = QueueSimple(ssm_input_path)
            dirq.add(output)
        except:
            logging.error('unable to push message in apel-ssm queue <%s>' %
                          ssm_input_path)
    else:
        logging.debug('no usage records, skip forwarding to SSM')
Exemplo n.º 3
0
class DirectoryQueue(MsgBase):
    def __init__(self, configHolder):
        super(DirectoryQueue, self).__init__(configHolder)

        self.queue = QueueSimple(self.msg_queue)

    def send(self, message):
        self.queue.add(message)
 def test_get_delete_messages_one(self):
     qpub = QueueSimple(self.test_dir, umask=0)
     qpub.add('123')
     
     qsub = DirectoryQueueClient(self.ch)
     qsub.connect()
     assert qsub.get_messages() == [123]
     assert qpub.count() == 0
 def test08_count(self):
     'QueueSimple.count()'
     qs = QueueSimple(self.qdir)
     # add "normal" element
     qs.add('foo')
     # simply add a file (fake element) into the elements directory
     fake_elem = os.listdir(self.qdir)[0] + '/' + 'foo.bar'
     open(self.qdir + '/' + fake_elem, 'w').write('')
     self.assertEqual(qs.count(), 1)
 def test09_remove(self):
     'QueueSimple.remove()'
     qs = QueueSimple(self.qdir, granularity=1)
     for _ in range(5):
         qs.add('foo')
     assert qs.count() == 5
     for elem in qs:
         qs.lock(elem)
         qs.remove(elem)
     self.assertEqual(qs.count(), 0)
 def test10_purge_oneDirOneElement(self):
     'QueueSimple.purge() one directory & element'
     qs = QueueSimple(self.qdir)
     qs.add('foo')
     self.assertEqual(qs.count(), 1)
     elem = qs.first()
     qs.lock(elem)
     elem_path_lock = self.qdir + '/' + elem + LOCKED_SUFFIX
     self.assert_(os.path.exists(elem_path_lock) is True)
     time.sleep(2)
     qs.purge(maxlock=1)
     self.assert_(os.path.exists(elem_path_lock) is False)
     self.assertEqual(qs.count(), 1)
     self.assertEqual(len(os.listdir(self.qdir)), 1)
 def test07_get(self):
     'QueueSimple.get()'
     data = 'foo'.encode()
     qs = QueueSimple(self.qdir)
     elem = qs.add(data)
     qs.lock(elem)
     self.assertEqual(qs.get(elem), data)
    def test_get_delete_messages_max(self):
        num_messages_half = 5
        num_messages_all = 10

        qpub = QueueSimple(self.test_dir)
        for i in range(num_messages_all):
            qpub.add('{"message" : "%i"}' % i)
        
        qsub = DirectoryQueueClient(self.ch)
        qsub.connect()
        messages = qsub.get_messages(num_messages_half)
        assert len(messages) == num_messages_half
        assert qpub.count() == num_messages_half
        messages_read = qsub.get_messages(num_messages_half)
        messages_test = [{'message': str(i)} for i in range(num_messages_half, num_messages_all)]
        assert messages_read == messages_test
        assert qpub.count() == 0
Exemplo n.º 10
0
    def test11_purge_multDirMultElement(self):
        'QueueSimple.purge() multiple directories & elements'
        qs = QueueSimple(self.qdir, granularity=1)

        qs.add('foo')
        assert qs.count() == 1
        time.sleep(2)
        qs.add('bar')
        assert qs.count() == 2
        assert len(os.listdir(self.qdir)) == 2
        qs.purge()
        assert qs.count() == 2

        elem = qs.first()
        qs.lock(elem)
        qs.remove(elem)
        assert qs.count() == 1
        qs.purge()
        assert len(os.listdir(self.qdir)) == 1

        time.sleep(2)
        qs.add('baz')
        assert len(os.listdir(self.qdir)) == 2
        for elem in qs:
            qs.lock(elem)
        elem1 = qs.first()
        lock_path1 = self.qdir + '/' + elem1 + LOCKED_SUFFIX
        assert os.path.exists(lock_path1) is True
        os.utime(lock_path1, (time.time() - 25, time.time() - 25))
        qs.purge(maxlock=10)
        assert os.path.exists(lock_path1) is False

        elem2 = qs.next()
        lock_path2 = self.qdir + '/' + elem2 + LOCKED_SUFFIX
        assert os.path.exists(lock_path2) is True
Exemplo n.º 11
0
 def test04_add(self):
     'QueueSimple.add()'
     data = 'foo bar'
     qs = QueueSimple(self.qdir)
     elem = qs.add(data)
     assert open(self.qdir + '/' + elem).read() == data
Exemplo n.º 12
0
class DbUnloader(object):

    APEL_HEADERS = {
        JobRecord: JOB_MSG_HEADER,
        SummaryRecord: SUMMARY_MSG_HEADER,
        NormalisedSummaryRecord: NORMALISED_SUMMARY_MSG_HEADER,
        SyncRecord: SYNC_MSG_HEADER,
        CloudRecord: CLOUD_MSG_HEADER,
        CloudSummaryRecord: CLOUD_SUMMARY_MSG_HEADER
    }

    RECORD_TYPES = {
        'VJobRecords': JobRecord,
        'VSummaries': SummaryRecord,
        'VSuperSummaries': SummaryRecord,
        'VNormalisedSummaries': NormalisedSummaryRecord,
        'VNormalisedSuperSummaries': NormalisedSummaryRecord,
        'VSyncRecords': SyncRecord,
        'VCloudRecords': CloudRecord,
        'VCloudSummaries': CloudSummaryRecord,
        'VStarRecords': StorageRecord
    }

    # all record types for which withholding DNs is a valid option
    MAY_WITHHOLD_DNS = [JobRecord, SyncRecord, CloudRecord]

    def __init__(self,
                 db,
                 qpath,
                 inc_vos=None,
                 exc_vos=None,
                 local=False,
                 withhold_dns=False):
        self._db = db
        outpath = os.path.join(qpath, "outgoing")
        self._msgq = QueueSimple(outpath)
        self._inc_vos = inc_vos
        self._exc_vos = exc_vos
        self._local = local
        self._withhold_dns = withhold_dns

    def _get_base_query(self, record_type):
        '''
        Set up a query object containing the logic which is common for 
        all users of this DbUnloader.
        '''
        query = Query()

        if record_type in (JobRecord, SummaryRecord, NormalisedSummaryRecord,
                           SyncRecord):
            if self._inc_vos is not None:
                query.VO_in = self._inc_vos
                log.info('Sending only these VOs: ')
                log.info(self._inc_vos)
            elif self._exc_vos is not None:
                log.info('Excluding these VOs: ')
                log.info(self._exc_vos)
                query.VO_notin = self._exc_vos
            if not self._local:
                query.InfrastructureType = 'grid'

        return query

    def unload_all(self, table_name, car=False):
        '''
        Unload all records from the specified table.
        '''
        log.info('Unloading all records from %s.', table_name)

        record_type = self.RECORD_TYPES[table_name]

        query = self._get_base_query(record_type)
        msgs, records = self._write_messages(record_type, table_name, query,
                                             car)
        return msgs, records

    def unload_sync(self):
        '''
        Unload all records from the SyncRecords table or view.
        '''
        log.info('Writing sync messages.')
        query = self._get_base_query(SyncRecord)
        msgs = 0
        records = 0
        for batch in self._db.get_sync_records(query=query):
            records += len(batch)
            self._write_apel(batch)
            msgs += 1
        return msgs, records

    def unload_gap(self, table_name, start, end, ur=False):
        '''
        Unload all records from the JobRecords table whose EndTime falls
        within the provided dates (inclusive).
        '''
        record_type = self.RECORD_TYPES[table_name]

        if record_type != JobRecord:
            raise ApelDbException("Can only gap publish for JobRecords.")

        start_tuple = [int(x) for x in start.split('-')]
        end_tuple = [int(x) for x in end.split('-')]
        # get the start of the start date
        start_date = datetime.date(*start_tuple)
        start_datetime = datetime.datetime.combine(start_date, datetime.time())
        # get the end of the end date
        end_date = datetime.date(*end_tuple)
        end_datetime = datetime.datetime.combine(end_date, datetime.time())
        end_datetime += datetime.timedelta(days=1)

        log.info('Finding records with end times between:')
        log.info(start_datetime)
        log.info(end_datetime)
        query = self._get_base_query(record_type)
        query.EndTime_gt = start_datetime
        query.EndTime_le = end_datetime

        msgs, records = self._write_messages(record_type, table_name, query,
                                             ur)
        return msgs, records

    def unload_latest(self, table_name, ur=False):
        '''
        Unloads any records whose UpdateTime is less than the value 
        in the LastUpdated table.
        
        Returns (number of files, number of records)
        '''
        # Special case for [Normalised]SuperSummaries
        if table_name in ('VSuperSummaries', 'VNormalisedSuperSummaries'):
            msgs, records = self.unload_latest_super_summaries(table_name)
        else:
            record_type = self.RECORD_TYPES[table_name]

            query = self._get_base_query(record_type)
            since = self._db.get_last_updated()

            log.info('Getting records updated since: %s', since)
            if since is not None:
                query.UpdateTime_gt = str(since)

            msgs, records = self._write_messages(record_type, table_name,
                                                 query, ur)

            self._db.set_updated()

        return msgs, records

    def unload_latest_super_summaries(
        self,
        table_name,
        ur=False,
    ):
        """
        Unload (normalised) super summaries for current and preceding month

        Special case for the [Normalised]SuperSummaries table. Since it is
        generally updated by the SummariseJobs() procedure, all records will
        have been updated. Instead, send all records for the current
        month and the preceding month.
        """
        record_type = self.RECORD_TYPES[table_name]

        query = self._get_base_query(record_type)

        # It's actually simpler to use EarliestEndTime or LatestEndTime
        # to deduce the correct records.
        since = get_start_of_previous_month(datetime.datetime.now())

        log.info('Getting summaries for months since: %s', since.date())
        if since is not None:
            query.EarliestEndTime_ge = str(since)

        msgs, records = self._write_messages(record_type, table_name, query,
                                             ur)

        return msgs, records

    def _write_messages(self, record_type, table_name, query, ur):
        '''
        Write messsages for all the records found in the specified table,
        according to the logic contained in the query object.
        '''
        if self._withhold_dns and record_type not in self.MAY_WITHHOLD_DNS:
            raise ApelDbException('Cannot withhold DNs for %s' %
                                  record_type.__name__)
        if record_type == StorageRecord and not ur:
            raise ApelDbException(
                'Cannot unload StorageRecords in APEL format')

        msgs = 0
        records = 0
        for batch in self._db.get_records(record_type, table_name,
                                          query=query):
            records += len(batch)
            if ur:
                self._write_xml(batch)
            else:
                self._write_apel(batch)
            msgs += 1

        return msgs, records

    def _write_xml(self, records):
        '''
        Write one message in the appropriate XML format to the outgoing 
        message queue.
        
        This is currently enabled only for CAR.
        '''
        buf = StringIO.StringIO()
        if type(records[0]) == JobRecord:
            XML_HEADER = '<?xml version="1.0" ?>'
            UR_OPEN = (
                '<urf:UsageRecords xmlns:urf="http://eu-emi.eu/namespace'
                's/2012/11/computerecord" xmlns:xsi="http://www.w3.org/2'
                '001/XMLSchema-instance" xsi:schemaLocation="http://eu-e'
                'mi.eu/namespaces/2012/11/computerecord car_v1.2.xsd">')
            UR_CLOSE = '</urf:UsageRecords>'
        # elif type(records[0]) == SummaryRecord:
        #     XML_HEADER = '<?xml version="1.0" ?>'
        #     UR_OPEN = ('<aur:SummaryRecords xmlns:aur="http://eu-emi.eu/names'
        #                'paces/2012/11/aggregatedcomputerecord" xmlns:urf="htt'
        #                'p://eu-emi.eu/namespaces/2012/11/computerecord" xmlns'
        #                ':xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:'
        #                'schemaLocation="http://eu-emi.eu/namespaces/2012/11/a'
        #                'ggregatedcomputerecord ">')
        #     UR_CLOSE = '</aur:SummaryRecords>'
        elif type(records[0]) == StorageRecord:
            XML_HEADER = '<?xml version="1.0" ?>'
            UR_OPEN = (
                '<sr:StorageUsageRecords xmlns:sr="http://eu-emi.eu/namespaces/2011/02/storagerecord">'
            )
            UR_CLOSE = '</sr:StorageUsageRecords>'
        else:
            raise ApelDbException(
                'Can only send URs for JobRecords and StorageRecords.')

        buf.write(XML_HEADER + '\n')
        buf.write(UR_OPEN + '\n')
        buf.write('\n'.join(
            [record.get_ur(self._withhold_dns) for record in records]))
        buf.write('\n' + UR_CLOSE + '\n')

        self._msgq.add(buf.getvalue())
        buf.close()
        del buf

    def _write_apel(self, records):
        '''
        Write one message in the APEL format to the outgoing 
        message queue.
        '''
        record_type = type(records[0])

        buf = StringIO.StringIO()
        buf.write(self.APEL_HEADERS[record_type] + ' \n')
        buf.write('%%\n'.join(
            [record.get_msg(self._withhold_dns) for record in records]))
        buf.write('%%\n')

        self._msgq.add(buf.getvalue())
        buf.close()
        del buf
Exemplo n.º 13
0
def process_period(config, period):
    period_start = period['instant'] + dateutil.relativedelta.relativedelta(
        seconds=-period['range_sec'])
    print(
        f"Processing year {period['year']}, month {period['month']}, "
        f"querying from {period['instant'].isoformat()} and going back {period['range_sec']} s to {period_start.isoformat()}."
    )
    queries = QueryLogic(queryRange=(str(period['range_sec']) + 's'))

    # SSL generally not used for Prometheus access within a cluster
    # Docs on instant query API: https://prometheus.io/docs/prometheus/latest/querying/api/#instant-queries
    prom = PrometheusConnect(url=config.prometheus_server, disable_ssl=True)
    prom_connect_params = {
        'time': period['instant'].isoformat(),
        'timeout': config.query_timeout
    }

    raw_results, results, result_lengths = {}, {}, []
    # iterate over each query (cputime, starttime, endtime, cores) producing raw_results['cputime'] etc.
    for query_name, query_string in vars(queries).items():
        # Each of these raw_results is a list of dicts. Each dict in the list represents an individual data point, and contains:
        # 'metric': a dict of one or more key-value pairs of labels, one of which is the pod name ('exported_pod').
        # 'value': a list in which the 0th element is the timestamp of the value, and 1th element is the actual value we're interested in.
        print(f'Executing {query_name} query: {query_string}')
        t1 = timer()
        raw_results[query_name] = prom.custom_query(query=query_string,
                                                    params=prom_connect_params)
        t2 = timer()
        results[query_name] = dict(rearrange(raw_results[query_name]))
        result_lengths.append(len(results[query_name]))
        t3 = timer()
        print(
            f'Query finished in {t2 - t1} s, processed in {t3 - t2} s. Got {len(results[query_name])} items from {len(raw_results[query_name])} results. Peak RAM usage: {resource.getrusage(resource.RUSAGE_SELF).ru_maxrss}K.'
        )
        del raw_results[query_name]

    cputime = results['cputime']
    endtime = results['endtime']
    starttime = results['starttime']
    cores = results['cores']

    # Confirm the assumption that cputime should have the fewest entries, while starttime and cores may have additional ones
    # corresponding to jobs that have started but not finished yet, and endtime may have additional ones if there are pods without CPU resource requests.
    # We only want the jobs for which all values are available: start time, end time, CPU request.
    # Note that jobs which started last month and finished this month will be properly included and accounted in this month.
    assert len(cputime) == min(
        result_lengths), "cputime should be the shortest list"
    # However, jobs that finished last month may show up in this month's data if they are still present on the cluster this month (in Completed state).
    # Exclude them by filtering with a lambda (since you can't pass an argument to a function object AFAIK).
    endtime = dict(
        filter(lambda x: x[1] >= datetime.datetime.timestamp(period_start),
               endtime.items()))
    # Prepare to iterate over jobs which meet all criteria.
    valid_jobs = cputime.keys() & endtime.keys()
    # avoid sending empty records
    if len(valid_jobs) == 0:
        print('No records to process.')
        return

    sum_cputime = 0
    t4 = timer()
    for key in valid_jobs:
        assert endtime[key] > starttime[
            key], "job end time is before start time"
        # double check cputime calc of this job
        delta = abs(cputime[key] -
                    (endtime[key] - starttime[key]) * cores[key])
        assert delta < 0.001, "cputime calculation is inaccurate"
        sum_cputime += cputime[key]

    # CPU time as calculated here means (# cores * job duration), which apparently corresponds to
    # the concept of wall time in APEL accounting. It is not clear what CPU time means in APEL;
    # could be the actual CPU usage % integrated over the job (# cores * job duration * usage)
    # but this does not seem to be documented clearly. Some batch systems do not actually measure
    # this so it is not reported consistently or accurately. Some sites have CPU efficiency
    # (presumably defined as CPU time / wall time) time that is up to ~ 500% of the walltime, or
    # always fixed at 100%. In Kubernetes, the actual CPU usage % is tracked by metrics server
    # (not KSM), which is not meant to be used for monitoring or accounting purposes and is not
    # scraped by Prometheus. So just use walltime = cputime
    sum_cputime = round(sum_cputime)
    sum_walltime = sum_cputime

    print(f'total cputime: {sum_cputime}, total walltime: {sum_walltime}')
    # Write output to the message queue on local filesystem
    # https://dirq.readthedocs.io/en/latest/queuesimple.html#directory-structure
    dirq = QueueSimple(str(config.output_path))
    summary_output = summary_message(
        config,
        year=period['year'],
        month=period['month'],
        wall_time=sum_walltime,
        cpu_time=sum_cputime,
        n_jobs=len(endtime),
        # this appears faster than getting min/max during the dict iteration above
        first_end=round(min(endtime.values())),
        last_end=round(max(endtime.values())))
    sync_output = sync_message(config,
                               year=period['year'],
                               month=period['month'],
                               n_jobs=len(endtime))
    t5 = timer()
    summary_file = dirq.add(summary_output)
    sync_file = dirq.add(sync_output)
    print(f'Analyzed {len(endtime)} records in {t5 - t4} s.')
    print(f'Writing summary record to {config.output_path}/{summary_file}:')
    print('--------------------------------\n' + summary_output +
          '--------------------------------')
    print(f'Writing sync record to {config.output_path}/{sync_file}:')
    print('--------------------------------\n' + sync_output +
          '--------------------------------')
Exemplo n.º 14
0
class DbUnloader(object):
    
    APEL_HEADERS = {JobRecord: JOB_MSG_HEADER, 
                    SummaryRecord: SUMMARY_MSG_HEADER,
                    SyncRecord: SYNC_MSG_HEADER,
                    CloudRecord: CLOUD_MSG_HEADER}
    
    RECORD_TYPES = {'VJobRecords': JobRecord,
                    'VSummaries': SummaryRecord,
                    'VSuperSummaries': SummaryRecord,
                    'VSyncRecords': SyncRecord,
                    'VCloudRecords': CloudRecord}
    
    def __init__(self, db, qpath, inc_vos=None, exc_vos=None, local=False):
        self._db = db
        outpath = os.path.join(qpath, "outgoing")
        self._msgq = QueueSimple(outpath)
        self._inc_vos = inc_vos
        self._exc_vos = exc_vos
        self._local = local
        
    def _get_base_query(self, record_type):
        '''
        Set up a query object containing the logic which is common for 
        all users of this DbUnloader.
        ''' 
        query = Query()
        
        if record_type in (JobRecord, SummaryRecord, SyncRecord):
            if self._inc_vos is not None:
                query.VO_in = self._inc_vos
                log.info('Sending only these VOs: ')
                log.info(self._inc_vos)
            elif self._exc_vos is not None:
                log.info('Excluding these VOs: ')
                log.info(self._exc_vos)
                query.VO_notin = self._exc_vos
            if not self._local:
                query.InfrastructureType = 'grid'
            
        return query
        
    def unload_all(self, table_name, car=False):
        '''
        Unload all records from the specified table.
        '''
        log.info('Unloading all records from %s.' % table_name)
        
        record_type = self.RECORD_TYPES[table_name]
        
        query = self._get_base_query(record_type)
        msgs, records = self._write_messages(record_type, table_name, query, car)
        return msgs, records
        
    def unload_sync(self):
        log.info('Writing sync messages.')
        query = self._get_base_query(SyncRecord)
        msgs = 0
        records = 0
        for batch in self._db.get_sync_records(query=query):
            records += len(batch)
            self._write_apel(batch)
            msgs += 1
        return msgs, records
        
        
    def unload_gap(self, table_name, start, end, car=False):
        '''
        Unload all records from the JobRecords table whose EndTime falls
        within the provided dates (inclusive).
        '''
        record_type = self.RECORD_TYPES[table_name]
        
        if record_type != JobRecord:
            raise ApelDbException("Can only gap publish for JobRecords.")
        
        start_tuple = [ int(x) for x in start.split('-') ]
        end_tuple = [ int(x) for x in end.split('-') ]
        # get the start of the start date
        start_date = datetime.date(*start_tuple)
        start_datetime = datetime.datetime.combine(start_date, datetime.time())
        # get the end of the end date
        end_date = datetime.date(*end_tuple)
        end_datetime = datetime.datetime.combine(end_date, datetime.time())
        end_datetime += datetime.timedelta(days=1)
        
        log.info('Finding records with end times between:')
        log.info(start_datetime)
        log.info(end_datetime)
        query = self._get_base_query(record_type)
        query.EndTime_gt = start_datetime
        query.EndTime_le = end_datetime
            
        msgs, records = self._write_messages(record_type, table_name, query, car)
        return msgs, records
    
    def unload_latest(self, table_name, car=False):
        '''
        Unloads records from database to file.
        
        Returns the number of created files.
        '''
        record_type = self.RECORD_TYPES[table_name]
        
        query = self._get_base_query(record_type)
        since = self._db.get_last_updated()
        
        log.info('Getting records updated since: %s' % since)
        if since is not None:
            query.UpdateTime_gt = str(since)
            
        msgs, records = self._write_messages(record_type, table_name, query, car)
        
        self._db.set_updated()
        
        return msgs, records
            
    def _write_messages(self, record_type, table_name, query, car):
        '''
        Write messsages for all the records found in the specified table,
        according to the logic contained in the query object.
        '''
        msgs = 0
        records = 0
        for batch in self._db.get_records(record_type, table_name, query=query):
            records += len(batch)
            if car:
                self._write_xml(batch)
            else:
                self._write_apel(batch)
            msgs += 1
        
        return msgs, records
    
    def _write_xml(self, records):
        '''
        Write one message in the appropriate XML format to the outgoing 
        message queue.
        
        This currently works only for CAR.
        '''
        buf = StringIO.StringIO()
        if type(records[0]) == JobRecord:
            XML_HEADER = '<?xml version="1.0" ?>'
            UR_OPEN = '<urf:UsageRecords xmlns:urf="http://eu-emi.eu/namespaces/2012/11/computerecord" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://eu-emi.eu/namespaces/2012/11/computerecord car_v1.2.xsd">'
            UR_CLOSE = '</urf:UsageRecords>'
#        elif type(records[0]) == SummaryRecord:
#            XML_HEADER = '<?xml version="1.0" ?>'
#            UR_OPEN = '<aur:SummaryRecords xmlns:aur="http://eu-emi.eu/namespaces/2012/11/aggregatedcomputerecord" xmlns:urf="http://eu-emi.eu/namespaces/2012/11/computerecord" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://eu-emi.eu/namespaces/2012/11/aggregatedcomputerecord ">'
#            UR_CLOSE = '</aur:SummaryRecords>'
        else:
            raise ApelDbException('Can only send URs for JobRecords.')
            
        buf.write(XML_HEADER + '\n')
        buf.write(UR_OPEN + '\n')
        buf.write('\n'.join( [ record.get_ur() for record in records ] ))
        buf.write('\n' + UR_CLOSE + '\n')
        
        self._msgq.add(buf.getvalue())
        buf.close()
        del buf
    
    def _write_apel(self, records):
        '''
        Write one message in the APEL format to the outgoing 
        message queue.
        '''
        record_type = type(records[0])
    
        buf = StringIO.StringIO()
        buf.write(self.APEL_HEADERS[record_type] + ' \n')
        buf.write('%%\n'.join( [ record.get_msg() for record in records ] ))
        buf.write('%%\n')
        
        self._msgq.add(buf.getvalue())
        buf.close()
        del buf
Exemplo n.º 15
0
def queue(ip, comment, duration):
    from dirq.QueueSimple import QueueSimple
    dirq = QueueSimple(QDIR)
    rec = dict(ip=ip, comment=comment, duration=duration, ts=time.time())
    dirq.add(json.dumps(rec))
    return True
Exemplo n.º 16
0
class DbUnloader(object):
    
    APEL_HEADERS = {JobRecord: JOB_MSG_HEADER, 
                    SummaryRecord: SUMMARY_MSG_HEADER,
                    SyncRecord: SYNC_MSG_HEADER,
                    CloudRecord: CLOUD_MSG_HEADER,
                    CloudSummaryRecord: CLOUD_SUMMARY_MSG_HEADER}
    
    RECORD_TYPES = {'VJobRecords': JobRecord,
                    'VSummaries': SummaryRecord,
                    'VSuperSummaries': SummaryRecord,
                    'VSyncRecords': SyncRecord,
                    'VCloudRecords': CloudRecord,
                    'VCloudSummaries': CloudSummaryRecord}
    
    # all record types for which withholding DNs is a valid option
    MAY_WITHHOLD_DNS = [JobRecord, SyncRecord, CloudRecord]
    
    def __init__(self, db, qpath, inc_vos=None, exc_vos=None, local=False, withhold_dns=False):
        self._db = db
        outpath = os.path.join(qpath, "outgoing")
        self._msgq = QueueSimple(outpath)
        self._inc_vos = inc_vos
        self._exc_vos = exc_vos
        self._local = local
        self._withhold_dns = withhold_dns
        
    def _get_base_query(self, record_type):
        '''
        Set up a query object containing the logic which is common for 
        all users of this DbUnloader.
        ''' 
        query = Query()
        
        if record_type in (JobRecord, SummaryRecord, SyncRecord):
            if self._inc_vos is not None:
                query.VO_in = self._inc_vos
                log.info('Sending only these VOs: ')
                log.info(self._inc_vos)
            elif self._exc_vos is not None:
                log.info('Excluding these VOs: ')
                log.info(self._exc_vos)
                query.VO_notin = self._exc_vos
            if not self._local:
                query.InfrastructureType = 'grid'
            
        return query
        
    def unload_all(self, table_name, car=False):
        '''
        Unload all records from the specified table.
        '''
        log.info('Unloading all records from %s.' % table_name)
        
        record_type = self.RECORD_TYPES[table_name]
        
        query = self._get_base_query(record_type)
        msgs, records = self._write_messages(record_type, table_name, query, car)
        return msgs, records
        
    def unload_sync(self):
        '''
        Unload all records from the SyncRecords table or view.
        '''
        log.info('Writing sync messages.')
        query = self._get_base_query(SyncRecord)
        msgs = 0
        records = 0
        for batch in self._db.get_sync_records(query=query):
            records += len(batch)
            self._write_apel(batch)
            msgs += 1
        return msgs, records
        
        
    def unload_gap(self, table_name, start, end, ur=False):
        '''
        Unload all records from the JobRecords table whose EndTime falls
        within the provided dates (inclusive).
        '''
        record_type = self.RECORD_TYPES[table_name]
        
        if record_type != JobRecord:
            raise ApelDbException("Can only gap publish for JobRecords.")
        
        start_tuple = [ int(x) for x in start.split('-') ]
        end_tuple = [ int(x) for x in end.split('-') ]
        # get the start of the start date
        start_date = datetime.date(*start_tuple)
        start_datetime = datetime.datetime.combine(start_date, datetime.time())
        # get the end of the end date
        end_date = datetime.date(*end_tuple)
        end_datetime = datetime.datetime.combine(end_date, datetime.time())
        end_datetime += datetime.timedelta(days=1)
        
        log.info('Finding records with end times between:')
        log.info(start_datetime)
        log.info(end_datetime)
        query = self._get_base_query(record_type)
        query.EndTime_gt = start_datetime
        query.EndTime_le = end_datetime
            
        msgs, records = self._write_messages(record_type, table_name, query, ur)
        return msgs, records
    
    def unload_latest(self, table_name, ur=False):
        '''
        Unloads any records whose UpdateTime is less than the value 
        in the LastUpdated table.
        
        Returns (number of files, number of records)
        '''
        # special case for SuperSummaries
        if table_name == 'VSuperSummaries':
            msgs, records = self.unload_latest_super_summaries()
        else:
            record_type = self.RECORD_TYPES[table_name]
            
            query = self._get_base_query(record_type)
            since = self._db.get_last_updated()
            
            log.info('Getting records updated since: %s' % since)
            if since is not None:
                query.UpdateTime_gt = str(since)
                
            msgs, records = self._write_messages(record_type, table_name, query, ur)
            
            self._db.set_updated()
        
        return msgs, records
        
    def unload_latest_super_summaries(self, ur=False):
        '''
        Special case for the SuperSummaries table.  Since it is generally
        updated by the SummariseJobs() procedure, all records will 
        have been updated.  Instead, send all records for the current
        month and the preceding month.
        ''' 
        table_name = 'VSuperSummaries'
        
        record_type = self.RECORD_TYPES[table_name]
        
        query = self._get_base_query(record_type)
        
        # It's actually simpler to use EarliestEndTime or LatestEndTime
        # to deduce the correct records.
        since = get_start_of_previous_month(datetime.datetime.now())
        
        log.info('Getting summaries for months since: %s' % since.date())
        if since is not None:
            query.EarliestEndTime_gt = str(since)
            
        msgs, records = self._write_messages(record_type, table_name, query, ur)
        
        return msgs, records
            
    def _write_messages(self, record_type, table_name, query, ur):
        '''
        Write messsages for all the records found in the specified table,
        according to the logic contained in the query object.
        '''
        if self._withhold_dns and record_type not in self.MAY_WITHHOLD_DNS:
            raise ApelDbException('Cannot withhold DNs for %s' % record_type.__name__)
        
        msgs = 0
        records = 0
        for batch in self._db.get_records(record_type, table_name, query=query):
            records += len(batch)
            if ur:
                self._write_xml(batch)
            else:
                self._write_apel(batch)
            msgs += 1
        
        return msgs, records
    
    def _write_xml(self, records):
        '''
        Write one message in the appropriate XML format to the outgoing 
        message queue.
        
        This is currently enabled only for CAR.
        '''
        buf = StringIO.StringIO()
        if type(records[0]) == JobRecord:
            XML_HEADER = '<?xml version="1.0" ?>'
            UR_OPEN = '<urf:UsageRecords xmlns:urf="http://eu-emi.eu/namespaces/2012/11/computerecord" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://eu-emi.eu/namespaces/2012/11/computerecord car_v1.2.xsd">'
            UR_CLOSE = '</urf:UsageRecords>'
#        elif type(records[0]) == SummaryRecord:
#            XML_HEADER = '<?xml version="1.0" ?>'
#            UR_OPEN = '<aur:SummaryRecords xmlns:aur="http://eu-emi.eu/namespaces/2012/11/aggregatedcomputerecord" xmlns:urf="http://eu-emi.eu/namespaces/2012/11/computerecord" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://eu-emi.eu/namespaces/2012/11/aggregatedcomputerecord ">'
#            UR_CLOSE = '</aur:SummaryRecords>'
        else:
            raise ApelDbException('Can only send URs for JobRecords.')
            
        buf.write(XML_HEADER + '\n')
        buf.write(UR_OPEN + '\n')
        buf.write('\n'.join( [ record.get_ur(self._withhold_dns) for record in records ] ))
        buf.write('\n' + UR_CLOSE + '\n')
        
        self._msgq.add(buf.getvalue())
        buf.close()
        del buf
    
    def _write_apel(self, records):
        '''
        Write one message in the APEL format to the outgoing 
        message queue.
        '''
        record_type = type(records[0])
    
        buf = StringIO.StringIO()
        buf.write(self.APEL_HEADERS[record_type] + ' \n')
        buf.write('%%\n'.join( [ record.get_msg(self._withhold_dns) for record in records ] ))
        buf.write('%%\n')
        
        self._msgq.add(buf.getvalue())
        buf.close()
        del buf