Пример #1
0
def insert_or_update_alert(alert):
    try:
        existing_alert = db.session.query(Alert).filter_by(alert_candid=alert.alert_candid).limit(1).all()
        if not existing_alert:
            db.session.add(alert)
            db.session.commit()
            ingest_delay = datetime.now() - alert.wall_time
            logger.info('Successfully inserted object', extra={'tags': {
                'candid': alert.alert_candid,
                'ingest_delay': str(ingest_delay),
                'ingest_delay_seconds': ingest_delay.total_seconds(),
                'successful_ingest': 'true'
            }})
            return IngestionStatus.SUCCESS
        else:
            logger.info('Alert already exists in database.', extra={'tags': {
                'candid': alert.alert_candid,
                'successful_ingest': 'false'
            }})
        return IngestionStatus.DUPLICATE
    except exc.SQLAlchemyError as e:
        db.session.rollback()
        logger.warn('Failed to insert object', extra={'tags': {
            'candid': alert.alert_candid,
            'sql_error': e.orig.args[0],
            'successful_ingest': 'false'
        }})
        return IngestionStatus.FAILED
Пример #2
0
def do_ingest(encoded_packet):
    f_data = base64.b64decode(encoded_packet)
    freader = fastavro.reader(io.BytesIO(f_data))
    for packet in freader:
        start_ingest = datetime.now()
        ingestion_status, candid = ingest_avro(packet)
        if ingestion_status == IngestionStatus.SUCCESS:
            logger.info('Time to ingest avro', extra={'tags': {
                'ingest_time': (datetime.now() - start_ingest).total_seconds()
            }})
            fname = '{}.avro'.format(packet['candid'])
            start_upload = datetime.now()
            upload_avro(io.BytesIO(f_data), fname, packet)
            logger.info('Time to upload avro', extra={'tags': {
                'upload_time': (datetime.now() - start_upload).total_seconds()
            }})
        return ingestion_status.not_failed(), candid
Пример #3
0
def upload_avro(f, fname, packet):
    date_key = packet_path(packet)
    filename = '{0}{1}'.format(date_key, fname)
    try:
        s3.Object(BUCKET_NAME, filename).put(
            Body=f,
            ContentDisposition=f'attachment; filename={filename}',
            ContentType='avro/binary'
        )
        logger.info('Successfully uploaded file to s3', extra={'tags': {
            'filename': filename,
            'successful_upload': 'true'
        }})
    except ClientError:
        logger.warn('Failed to upload file to s3', extra={'tags': {
            'filename': filename,
            'successful_upload': 'false'
        }})
Пример #4
0
def update_topic_list(consumer, current_topic_date=None):
    current_date = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0)
    if current_topic_date is None or (current_date - current_topic_date).days > 0:
        current_topics = []
        for i in range(0, 7):
            topic_date = current_date - timedelta(days=i)
            current_topics.append('ztf_{}{:02}{:02}_programid1'.format(
                topic_date.year,
                topic_date.month,
                topic_date.day
            ))  # Add ZTF topics
            current_topics.append('ztf_{}{:02}{:02}_programid3_public'.format(
                topic_date.year,
                topic_date.month,
                topic_date.day
            ))  # Add TESS public topics
        consumer.subscribe(current_topics)

        logger.info('New topics', extra={'tags': {
            'subscribed_topics': ['{0} - {1}'.format(topic.topic, topic.partition) for topic in consumer.assignment()],
            'subscribed_topics_count': len(consumer.assignment())
        }})
    return current_date
Пример #5
0
def read_avros(url):
    with requests.get(url, stream=True) as response:
        try:
            with tarfile.open(fileobj=response.raw, mode='r|gz') as tar:
                while True:
                    member = tar.next()
                    if member is None:
                        logger.info('Done ingesting this package')
                        break
                    with tar.extractfile(member) as f:
                        if f:
                            fencoded = base64.b64encode(
                                f.read()).decode('UTF-8')
                            do_ingest(fencoded)
                logger.info('done sending tasks',
                            extra={'tags': {
                                'processed_tarfile': url
                            }})
        except tarfile.ReadError as e:
            logger.info('tarfile is empty', extra={'tags': {'tarfile': url}})
Пример #6
0
def start_consumer():
    logger.info('Starting consumer', extra={'tags': {
        'group_id': GROUP_ID
    }})
    consumer = Consumer({
        'bootstrap.servers': f'{PRODUCER_HOST}:{PRODUCER_PORT}',
        'group.id': GROUP_ID,
        'auto.offset.reset': 'earliest',
        'queued.max.messages.kbytes': 100000,
        'enable.auto.commit': 'false',
        'on_commit': on_commit
    })
    current_date = update_topic_list(consumer)

    while True:
        if int(time.time()) % 300 == 0:
            current_date = update_topic_list(consumer, current_topic_date=current_date)
        msg = consumer.poll(1)
        if msg is None:
            continue
        if msg.error():
            logger.error('Consumer error: {}'.format(msg.error()))
            continue

        process_start_time = datetime.now()
        alert = base64.b64encode(msg.value()).decode('utf-8')
        logger.info('Received alert from stream')
        success, candid = do_ingest(alert)
        logger.info('Finished processing message from {topic} with offset {offset}'.format(
                    topic=msg.topic() + '-' + str(msg.partition()), offset=msg.offset()),
                    extra={'tags': {
                                'candid': candid,
                                'success': success,
                                'record_processing_time': (datetime.now() - process_start_time).total_seconds(),
                                'processing_latency': datetime.now().timestamp() - msg.timestamp()[1]/1000
                          }}
                    )
        consumer.commit(msg)

    consumer.close()