def test_match_dns_fingerprints_match_country(): fp.setup_fingerprints() msm = { "probe_cc": "TR", "test_keys": { "queries": [{ "engine": "system", "resolver_hostname": None, "query_type": "A", "hostname": "beeg.com", "answers": [ { "hostname": "beeg.com", "answer_type": "CNAME", "ttl": 0 }, { "ipv4": "195.175.254.2", "answer_type": "A", "ttl": 0 }, ], "failure": None, "resolver_port": None, }] }, } matches = fp.match_fingerprints(msm) assert matches == [{"dns_full": "195.175.254.2", "locality": "country"}]
def main(): conf = parse_args() format_char = "n" collector_id = "L" identity = f"{format_char}{collector_id}" log.info(f"From bucket {conf.src_bucket} to {conf.dst_bucket}") s3sig = create_s3_client(conf) # signed client for writing db_conn = psycopg2.connect(conf.db_uri) db.setup(conf) # setup db conn inside db module setup_fingerprints() # Fetch msmts for one day buf = {} # "<cc> <testname>" -> jsonlf / fd / jsonl_s3path seen_uids = set() # Avoid uploading duplicates # raw/20210601/00/SA/webconnectivity/2021060100_SA_webconnectivity.n0.0.jsonl.gz # jsonl_s3path = f"raw/{ts}/00/{cc}/{testname}/{jsonlf.name}" s3uns = s3f.create_s3_client() # unsigned client for reading cans_fns = s3f.list_cans_on_s3_for_a_day(s3uns, conf.day) cans_fns = sorted(cans_fns) # this is not enough to sort by time tot_size = sum(size for _, size in cans_fns) processed_size = 0 log.info(f"{tot_size/1024/1024/1024} GB to process") log.info(f"{len(cans_fns)} cans to process") # TODO make assertions on msmt # TODO add consistency check on trivial id found in fastpath table for can in cans_fns: can_fn, size = can log.info(f"Processed percentage: {100 * processed_size / tot_size}") log.info(f"Opening can {can_fn}") Path(can_fn).parent.mkdir(parents=True, exist_ok=True) s3uns.download_file(conf.src_bucket, can_fn, can_fn) for msm_tup in s3f.load_multiple(can_fn): process_measurement(msm_tup, buf, seen_uids, conf, s3sig, db_conn) processed_size += size Path(can_fn).unlink() log.info("Finish jsonl files still open") for json_entities in buf.values(): for e in json_entities: if e.fd.closed: continue finalize_jsonl(s3sig, db_conn, conf, e) log.info("Exiting")
def test_match_fingerprints_match_country(): fp.setup_fingerprints() msm = { "probe_cc": "MY", "test_keys": { "requests": [{ "response": { "body": "foo ... Makluman/Notification ... foo" } }] }, } matches = fp.match_fingerprints(msm) assert matches == [{ "body_match": "Makluman/Notification", "locality": "country" }]
def test_match_fingerprints_dict_body(): fp.setup_fingerprints() # from 20200108T054856Z-web_connectivity-20200109T102441Z_AS42610_613KNyjuQqiuloY1a391dhZccSDz9M1MD30P6EpUIWSByjcq4T-AS42610-RU-probe-0.2.0.json msm = { "probe_cc": "MY", "test_keys": { "requests": [{ "response": { "body": { "data": "q82BgAABAAEAAAAAA3d3dwdleGFtcGxlA2NvbQAAAQABwAwAAQABAAA/+AAEXbjYIg==", "format": "base64", } } }] }, } assert fp.match_fingerprints(msm) == []
def test_match_fingerprints_match_zz(): fp.setup_fingerprints() msm = { "probe_cc": "IE", "test_keys": { "requests": [{ "response": { "body": "", "headers": { "Server": "Kerio Control Embedded Web Server" }, } }] }, } matches = fp.match_fingerprints(msm) assert matches == [{ "header_full": "Kerio Control Embedded Web Server", "header_name": "server", "locality": "local", }], matches
def test_match_fingerprints_no_match(): fp.setup_fingerprints() msm = {"probe_cc": "IE", "test_keys": {"requests": []}} assert fp.match_fingerprints(msm) == []
def setup_module(module): fp.conf.devel = True fp.conf.update = False fp.conf.interact = False fp.setup_dirs(fp.conf, Path(os.getcwd())) fp.setup_fingerprints()