import istarmap # noqa: F401 from utils import ( deg2dms, deg2hms, init_db_sync, load_config, log, Mongo, ) """ load config and secrets """ config = load_config(config_file="config.yaml")["kowalski"] # init db if necessary init_db_sync(config=config) def process_file(file, collection, batch_size): # connect to MongoDB: log("Connecting to DB") mongo = Mongo( host=config["database"]["host"], port=config["database"]["port"], replica_set=config["database"]["replica_set"], username=config["database"]["username"], password=config["database"]["password"], db=config["database"]["db"], verbose=0, )
def test_ingester(self): init_db_sync(config=config, verbose=True) log("Setting up paths") # path_kafka = pathlib.Path(config["path"]["kafka"]) path_logs = pathlib.Path(config["path"]["logs"]) if not path_logs.exists(): path_logs.mkdir(parents=True, exist_ok=True) if config["misc"]["broker"]: log("Setting up test groups and filters in Fritz") program = Program(group_name="FRITZ_TEST", group_nickname="test") Filter( collection="ZTF_alerts", group_id=program.group_id, filter_id=program.filter_id, ) program2 = Program(group_name="FRITZ_TEST_AUTOSAVE", group_nickname="test2") Filter( collection="ZTF_alerts", group_id=program2.group_id, filter_id=program2.filter_id, autosave=True, pipeline=[{"$match": {"objectId": "ZTF20aaelulu"}}], ) program3 = Program( group_name="FRITZ_TEST_UPDATE_ANNOTATIONS", group_nickname="test3" ) Filter( collection="ZTF_alerts", group_id=program3.group_id, filter_id=program3.filter_id, update_annotations=True, pipeline=[ {"$match": {"objectId": "ZTF20aapcmur"}} ], # there are 3 alerts in the test set for this oid ) # clean up old Kafka logs log("Cleaning up Kafka logs") subprocess.run(["rm", "-rf", path_logs / "kafka-logs", "/tmp/zookeeper"]) log("Starting up ZooKeeper at localhost:2181") # start ZooKeeper in the background cmd_zookeeper = [ os.path.join(config["path"]["kafka"], "bin", "zookeeper-server-start.sh"), "-daemon", os.path.join(config["path"]["kafka"], "config", "zookeeper.properties"), ] with open(path_logs / "zookeeper.stdout", "w") as stdout_zookeeper: # p_zookeeper = subprocess.run( cmd_zookeeper, stdout=stdout_zookeeper, stderr=subprocess.STDOUT ) # take a nap while it fires up time.sleep(3) log("Starting up Kafka Server at localhost:9092") # start the Kafka server: cmd_kafka_server = [ os.path.join(config["path"]["kafka"], "bin", "kafka-server-start.sh"), "-daemon", os.path.join(config["path"]["kafka"], "config", "server.properties"), ] with open( os.path.join(config["path"]["logs"], "kafka_server.stdout"), "w" ) as stdout_kafka_server: # p_kafka_server = subprocess.Popen(cmd_kafka_server, stdout=stdout_kafka_server, stderr=subprocess.STDOUT) # p_kafka_server = subprocess.run(cmd_kafka_server) # take a nap while it fires up time.sleep(3) # get kafka topic names with kafka-topics command cmd_topics = [ os.path.join(config["path"]["kafka"], "bin", "kafka-topics.sh"), "--zookeeper", config["kafka"]["zookeeper.test"], "-list", ] topics = ( subprocess.run(cmd_topics, stdout=subprocess.PIPE) .stdout.decode("utf-8") .split("\n")[:-1] ) log(f"Found topics: {topics}") # create a test ZTF topic for the current UTC date date = datetime.datetime.utcnow().strftime("%Y%m%d") topic_name = f"ztf_{date}_programid1_test" if topic_name in topics: # topic previously created? remove first cmd_remove_topic = [ os.path.join(config["path"]["kafka"], "bin", "kafka-topics.sh"), "--zookeeper", config["kafka"]["zookeeper.test"], "--delete", "--topic", topic_name, ] # print(kafka_cmd) remove_topic = ( subprocess.run(cmd_remove_topic, stdout=subprocess.PIPE) .stdout.decode("utf-8") .split("\n")[:-1] ) log(f"{remove_topic}") log(f"Removed topic: {topic_name}") time.sleep(1) if topic_name not in topics: log(f"Creating topic {topic_name}") cmd_create_topic = [ os.path.join(config["path"]["kafka"], "bin", "kafka-topics.sh"), "--create", "--bootstrap-server", config["kafka"]["bootstrap.test.servers"], "--replication-factor", "1", "--partitions", "1", "--topic", topic_name, ] with open( os.path.join(config["path"]["logs"], "create_topic.stdout"), "w" ) as stdout_create_topic: # p_create_topic = \ subprocess.run( cmd_create_topic, stdout=stdout_create_topic, stderr=subprocess.STDOUT, ) log("Starting up Kafka Producer") # spin up Kafka producer producer = Producer( {"bootstrap.servers": config["kafka"]["bootstrap.test.servers"]} ) # small number of alerts that come with kowalski path_alerts = pathlib.Path("/app/data/ztf_alerts/20200202/") # grab some more alerts from gs://ztf-fritz/sample-public-alerts try: log("Grabbing more alerts from gs://ztf-fritz/sample-public-alerts") r = requests.get("https://www.googleapis.com/storage/v1/b/ztf-fritz/o") aa = r.json()["items"] ids = [pathlib.Path(a["id"]).parent for a in aa if "avro" in a["id"]] except Exception as e: log( "Grabbing alerts from gs://ztf-fritz/sample-public-alerts failed, but it is ok" ) log(f"{e}") ids = [] subprocess.run( [ "gsutil", "-m", "cp", "-n", "gs://ztf-fritz/sample-public-alerts/*.avro", "/app/data/ztf_alerts/20200202/", ] ) log(f"Fetched {len(ids)} alerts from gs://ztf-fritz/sample-public-alerts") # push! for p in path_alerts.glob("*.avro"): with open(str(p), "rb") as data: # Trigger any available delivery report callbacks from previous produce() calls producer.poll(0) log(f"Pushing {p}") # Asynchronously produce a message, the delivery report callback # will be triggered from poll() above, or flush() below, when the message has # been successfully delivered or failed permanently. producer.produce(topic_name, data.read(), callback=delivery_report) # Wait for any outstanding messages to be delivered and delivery report # callbacks to be triggered. producer.flush() log("Starting up Ingester") # digest and ingest watchdog(obs_date=date, test=True) log("Digested and ingested: all done!") # shut down Kafka server and ZooKeeper time.sleep(20) log("Shutting down Kafka Server at localhost:9092") # start the Kafka server: cmd_kafka_server_stop = [ os.path.join(config["path"]["kafka"], "bin", "kafka-server-stop.sh"), os.path.join(config["path"]["kafka"], "config", "server.properties"), ] with open( os.path.join(config["path"]["logs"], "kafka_server.stdout"), "w" ) as stdout_kafka_server: # p_kafka_server_stop = \ subprocess.run( cmd_kafka_server_stop, stdout=stdout_kafka_server, stderr=subprocess.STDOUT, ) log("Shutting down ZooKeeper at localhost:2181") cmd_zookeeper_stop = [ os.path.join(config["path"]["kafka"], "bin", "zookeeper-server-stop.sh"), os.path.join(config["path"]["kafka"], "config", "zookeeper.properties"), ] with open( os.path.join(config["path"]["logs"], "zookeeper.stdout"), "w" ) as stdout_zookeeper: # p_zookeeper_stop = \ subprocess.run( cmd_zookeeper_stop, stdout=stdout_zookeeper, stderr=subprocess.STDOUT ) log("Checking the ZTF alert collection states") mongo = Mongo( host=config["database"]["host"], port=config["database"]["port"], replica_set=config["database"]["replica_set"], username=config["database"]["username"], password=config["database"]["password"], db=config["database"]["db"], verbose=True, ) collection_alerts = config["database"]["collections"]["alerts_ztf"] collection_alerts_aux = config["database"]["collections"]["alerts_ztf_aux"] n_alerts = mongo.db[collection_alerts].count_documents({}) assert n_alerts == 313 n_alerts_aux = mongo.db[collection_alerts_aux].count_documents({}) assert n_alerts_aux == 145 if config["misc"]["broker"]: log("Checking that posting to SkyPortal succeeded") # check number of candidates that passed the first filter resp = requests.get( program.base_url + f"/api/candidates?groupIDs={program.group_id}", headers=program.headers, timeout=3, ) assert resp.status_code == requests.codes.ok result = resp.json() assert result["status"] == "success" assert "data" in result assert "totalMatches" in result["data"] assert result["data"]["totalMatches"] == 88 # check that the only candidate that passed the second filter (ZTF20aaelulu) got saved as Source resp = requests.get( program2.base_url + f"/api/sources?group_ids={program2.group_id}", headers=program2.headers, timeout=3, ) assert resp.status_code == requests.codes.ok result = resp.json() assert result["status"] == "success" assert "data" in result assert "totalMatches" in result["data"] assert result["data"]["totalMatches"] == 1 assert "sources" in result["data"] assert result["data"]["sources"][0]["id"] == "ZTF20aaelulu"