def insert_or_update_alert(alert): try: existing_alert = db.session.query(Alert).filter_by(alert_candid=alert.alert_candid).limit(1).all() if not existing_alert: db.session.add(alert) db.session.commit() ingest_delay = datetime.now() - alert.wall_time logger.info('Successfully inserted object', extra={'tags': { 'candid': alert.alert_candid, 'ingest_delay': str(ingest_delay), 'ingest_delay_seconds': ingest_delay.total_seconds(), 'successful_ingest': 'true' }}) return IngestionStatus.SUCCESS else: logger.info('Alert already exists in database.', extra={'tags': { 'candid': alert.alert_candid, 'successful_ingest': 'false' }}) return IngestionStatus.DUPLICATE except exc.SQLAlchemyError as e: db.session.rollback() logger.warn('Failed to insert object', extra={'tags': { 'candid': alert.alert_candid, 'sql_error': e.orig.args[0], 'successful_ingest': 'false' }}) return IngestionStatus.FAILED
def do_ingest(encoded_packet): f_data = base64.b64decode(encoded_packet) freader = fastavro.reader(io.BytesIO(f_data)) for packet in freader: start_ingest = datetime.now() ingestion_status, candid = ingest_avro(packet) if ingestion_status == IngestionStatus.SUCCESS: logger.info('Time to ingest avro', extra={'tags': { 'ingest_time': (datetime.now() - start_ingest).total_seconds() }}) fname = '{}.avro'.format(packet['candid']) start_upload = datetime.now() upload_avro(io.BytesIO(f_data), fname, packet) logger.info('Time to upload avro', extra={'tags': { 'upload_time': (datetime.now() - start_upload).total_seconds() }}) return ingestion_status.not_failed(), candid
def upload_avro(f, fname, packet): date_key = packet_path(packet) filename = '{0}{1}'.format(date_key, fname) try: s3.Object(BUCKET_NAME, filename).put( Body=f, ContentDisposition=f'attachment; filename={filename}', ContentType='avro/binary' ) logger.info('Successfully uploaded file to s3', extra={'tags': { 'filename': filename, 'successful_upload': 'true' }}) except ClientError: logger.warn('Failed to upload file to s3', extra={'tags': { 'filename': filename, 'successful_upload': 'false' }})
def update_topic_list(consumer, current_topic_date=None): current_date = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0) if current_topic_date is None or (current_date - current_topic_date).days > 0: current_topics = [] for i in range(0, 7): topic_date = current_date - timedelta(days=i) current_topics.append('ztf_{}{:02}{:02}_programid1'.format( topic_date.year, topic_date.month, topic_date.day )) # Add ZTF topics current_topics.append('ztf_{}{:02}{:02}_programid3_public'.format( topic_date.year, topic_date.month, topic_date.day )) # Add TESS public topics consumer.subscribe(current_topics) logger.info('New topics', extra={'tags': { 'subscribed_topics': ['{0} - {1}'.format(topic.topic, topic.partition) for topic in consumer.assignment()], 'subscribed_topics_count': len(consumer.assignment()) }}) return current_date
def read_avros(url): with requests.get(url, stream=True) as response: try: with tarfile.open(fileobj=response.raw, mode='r|gz') as tar: while True: member = tar.next() if member is None: logger.info('Done ingesting this package') break with tar.extractfile(member) as f: if f: fencoded = base64.b64encode( f.read()).decode('UTF-8') do_ingest(fencoded) logger.info('done sending tasks', extra={'tags': { 'processed_tarfile': url }}) except tarfile.ReadError as e: logger.info('tarfile is empty', extra={'tags': {'tarfile': url}})
def start_consumer(): logger.info('Starting consumer', extra={'tags': { 'group_id': GROUP_ID }}) consumer = Consumer({ 'bootstrap.servers': f'{PRODUCER_HOST}:{PRODUCER_PORT}', 'group.id': GROUP_ID, 'auto.offset.reset': 'earliest', 'queued.max.messages.kbytes': 100000, 'enable.auto.commit': 'false', 'on_commit': on_commit }) current_date = update_topic_list(consumer) while True: if int(time.time()) % 300 == 0: current_date = update_topic_list(consumer, current_topic_date=current_date) msg = consumer.poll(1) if msg is None: continue if msg.error(): logger.error('Consumer error: {}'.format(msg.error())) continue process_start_time = datetime.now() alert = base64.b64encode(msg.value()).decode('utf-8') logger.info('Received alert from stream') success, candid = do_ingest(alert) logger.info('Finished processing message from {topic} with offset {offset}'.format( topic=msg.topic() + '-' + str(msg.partition()), offset=msg.offset()), extra={'tags': { 'candid': candid, 'success': success, 'record_processing_time': (datetime.now() - process_start_time).total_seconds(), 'processing_latency': datetime.now().timestamp() - msg.timestamp()[1]/1000 }} ) consumer.commit(msg) consumer.close()