def main(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('broker', type=str, help='Hostname or IP and port of Kafka broker.') parser.add_argument('topic', type=str, help='Name of Kafka topic stream to push to.') args = parser.parse_args() # Configure Avro writer schema and data schema_files = [ "../sample-avro-alert/schema/diasource.avsc", "../sample-avro-alert/schema/diaobject.avsc", "../sample-avro-alert/schema/ssobject.avsc", "../sample-avro-alert/schema/cutout.avsc", "../sample-avro-alert/schema/alert.avsc" ] # Configure producer connection to Kafka broker conf = {'bootstrap.servers': args.broker} streamProducer = alertProducer.AlertProducer(args.topic, schema_files, **conf) # Scan for avro files root = "./data" files = [f for f in glob.glob("/".join([root, "*.avro"]))] files.sort() def send_visit(f): print('visit:', f[15:20], '\ttime:', time.time()) # Load alert contents with open(f, mode='rb') as file_data: data = avroUtils.readSchemaData(file_data) # TODO replace Avro files with visits having better S/N cut # for now, limit to first 10,000 alerts (current have ~70,000) alert_count = 0 for record in data: if alert_count < 10000: streamProducer.send(record, encode=True) alert_count += 1 else: break streamProducer.flush() loop = asyncio.get_event_loop() asyncio.ensure_future(schedule_delays(loop, send_visit, files)) loop.run_forever() loop.close()
def main(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('topic', type=str, help='Name of Kafka topic stream to push to.') parser.add_argument('avrofile', type=str, help='File from which to read alerts.') avrogroup = parser.add_mutually_exclusive_group() avrogroup.add_argument('--encode', dest='avroFlag', action='store_true', help='Encode to Avro format. (default)') avrogroup.add_argument('--encode-off', dest='avroFlag', action='store_false', help='Do not encode to Avro format.') parser.set_defaults(avroFlag=True) args = parser.parse_args() # Configure Avro writer schema and data schema_files = [ "../sample-avro-alert/schema/diasource.avsc", "../sample-avro-alert/schema/diaobject.avsc", "../sample-avro-alert/schema/ssobject.avsc", "../sample-avro-alert/schema/cutout.avsc", "../sample-avro-alert/schema/alert.avsc" ] # Configure producer connection to Kafka broker conf = {'bootstrap.servers': 'kafka:9092'} streamProducer = alertProducer.AlertProducer(args.topic, schema_files, **conf) # Scan for avro files root = "../data" afile = "/".join((root, args.avrofile)) print('visit:', args.avrofile[7:12]) # Load template alert contents with open(afile, mode='rb') as file_data: data = avroUtils.readSchemaData(file_data) for record in data: streamProducer.send(record, encode=True)
def main(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('broker', type=str, help='Hostname or IP and port of Kafka broker.') parser.add_argument('topic', type=str, help='Name of Kafka topic stream to push to.') args = parser.parse_args() # Configure producer connection to Kafka broker conf = {'bootstrap.servers': args.broker} streamProducer = alertProducer.AlertProducer(args.topic, **conf) # Scan for avro files root = "./data" files = [f for f in glob.glob("/".join([root, "*.avro"]))] files.sort() def send_visit(f): print('visit:', f[15:20], '\ttime:', time.time()) # Load alert contents with open(f, mode='rb') as file_data: # TODO replace Avro files with visits having better S/N cut # for now, limit to first 10,000 alerts (current have ~70,000) schema, alert_packets = retrieve_alerts(file_data) alert_count = 0 for record in alert_packets: if alert_count < 10000: streamProducer.send(schema, record) alert_count += 1 else: break streamProducer.flush() loop = asyncio.get_event_loop() asyncio.ensure_future(schedule_delays(loop, send_visit, files)) loop.run_forever() loop.close()
def main(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('topic', type=str, help='Name of Kafka topic stream to push to.') parser.add_argument('alertnum', type=int, help='Number of alerts to send.') stampgroup = parser.add_mutually_exclusive_group() stampgroup.add_argument('--stamps', dest='stamps', action='store_true', help='Send postage stamp cutouts. (default)') stampgroup.add_argument('--no-stamps', dest='stamps', action='store_false', help='Do not send postage stamp cutouts.') avrogroup = parser.add_mutually_exclusive_group() avrogroup.add_argument('--encode', dest='avroFlag', action='store_true', help='Encode to Avro format. (default)') avrogroup.add_argument('--encode-off', dest='avroFlag', action='store_false', help='Do not encode to Avro format.') parser.add_argument('--repeat', action='store_true', help='Send alert batches repeating every 39th second.' ' Default of 2215 batches (~24 hours).') parser.add_argument('--max-repeats', type=int, dest='batchnum', help='Override default number of batches to send.') parser.set_defaults(stamps=True, avroFlag=True, batchnum=2215) args = parser.parse_args() # Configure producer connection to Kafka broker conf = {'bootstrap.servers': 'kafka:9092'} # Configure Avro writer schema and data schema_files = [ "../sample-avro-alert/schema/diasource.avsc", "../sample-avro-alert/schema/diaobject.avsc", "../sample-avro-alert/schema/ssobject.avsc", "../sample-avro-alert/schema/cutout.avsc", "../sample-avro-alert/schema/alert.avsc" ] json_path = "../sample-avro-alert/data/alert.json" cutoutdiff_path = "../sample-avro-alert/examples/stamp-676.fits" cutouttemp_path = "../sample-avro-alert/examples/stamp-677.fits" # Load template alert contents with open(json_path) as file_text: json_data = json.load(file_text) # Add postage stamp cutouts if args.stamps: json_data['cutoutDifference'] = load_stamp(cutoutdiff_path) json_data['cutoutTemplate'] = load_stamp(cutouttemp_path) # Configure Kafka producer with topic and schema streamProducer = alertProducer.AlertProducer(args.topic, schema_files, **conf) def send_batch(): start_time = time.time() print('batch start time:{:.3f}'.format(start_time)) for i in range(args.alertnum): streamProducer.send(json_data, encode=args.avroFlag) streamProducer.flush() finish_time = time.time() print('batch finish time:{:.3f}'.format(finish_time)) print('batch delta time:{:.3f}'.format(finish_time - start_time)) # Send alerts to producer print('start: {}'.format(time.time())) if args.repeat: loop = asyncio.get_event_loop() asyncio.ensure_future(schedule_delays(loop, send_batch, args.batchnum)) loop.run_forever() loop.close() else: send_batch() print('finish: {}'.format(time.time()))
def main(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('broker', type=str, help='Hostname or IP and port of Kafka broker.') parser.add_argument('topic', type=str, help='Name of Kafka topic to listen to.') parser.add_argument('filterNum', type=int, help='Number of the filter in range ' '(1-100) to deploy.') parser.add_argument('--group', type=str, help='Globally unique name of the consumer group. ' 'Consumers in the same group will share messages ' '(i.e., only one consumer will receive a message, ' 'as in a queue). Default is value of $HOSTNAME.') avrogroup = parser.add_mutually_exclusive_group() avrogroup.add_argument('--decode', dest='avroFlag', action='store_true', help='Decode from Avro format. (default)') avrogroup.add_argument('--decode-off', dest='avroFlag', action='store_false', help='Do not decode from Avro format.') parser.set_defaults(avroFlag=True) args = parser.parse_args() fnum = args.filterNum # Configure Avro reader schema schema_files = ["../sample-avro-alert/schema/diasource.avsc", "../sample-avro-alert/schema/diaobject.avsc", "../sample-avro-alert/schema/ssobject.avsc", "../sample-avro-alert/schema/cutout.avsc", "../sample-avro-alert/schema/alert.avsc"] # Configure consumer connection to Kafka broker cconf = {'bootstrap.servers': args.broker, 'default.topic.config': {'auto.offset.reset': 'smallest'}} if args.group: cconf['group.id'] = args.group else: cconf['group.id'] = os.environ['HOSTNAME'] pconf = {'bootstrap.servers': args.broker} # Choose filter class to deploy from filters module filter_class = inspect.getmembers(filters, inspect.isclass)[fnum][1] # Name output stream using filter class name topic_name = filter_class.__name__ prod = alertProducer.AlertProducer(topic_name, schema_files, **pconf) exp = filterBase.StreamExporter(prod) apply_filter = filter_class(exp) # Start consumer and print alert stream with alertConsumer.AlertConsumer(args.topic, schema_files, **cconf) as streamReader: while True: try: msg = streamReader.poll(decode=True) if msg is None: continue else: # Apply filter to each alert apply_filter(msg) except alertConsumer.EopError as e: # Write when reaching end of partition sys.stderr.write(e.message) except IndexError: sys.stderr.write('%% Data cannot be decoded\n') except UnicodeDecodeError: sys.stderr.write('%% Unexpected data format received\n') except KeyboardInterrupt: sys.stderr.write('%% Aborted by user\n') sys.exit()