def api_private_website_stats() -> Response: """TODO --- responses: '200': description: TODO """ # uses_pg_index counters_day_cc_asn_input_idx a BRIN index was not used at # all, but BTREE on (measurement_start_day, probe_cc, probe_asn, input) # made queries go from full scan to 50ms url = request.args.get("input") probe_cc = validate_probe_cc_query_param() probe_asn = validate_probe_asn_query_param() s = """SELECT toDate(measurement_start_time) AS test_day, countIf(anomaly = 't') as anomaly_count, countIf(confirmed = 't') as confirmed_count, countIf(msm_failure = 't') as failure_count, COUNT() AS total_count FROM fastpath WHERE measurement_start_time >= today() - interval '31 day' AND measurement_start_time < today() AND probe_cc = :probe_cc AND probe_asn = :probe_asn AND input = :input GROUP BY test_day ORDER BY test_day """ d = {"probe_cc": probe_cc, "probe_asn": probe_asn, "input": url} results = query_click(sql.text(s), d) return cachedjson("0s", results=results)
def api_private_test_names(): """TODO --- responses: '200': description: TODO """ TEST_NAMES = { "bridge_reachability": "Bridge Reachability", "dash": "DASH", "dns_consistency": "DNS Consistency", "facebook_messenger": "Facebook Messenger", "http_header_field_manipulation": "HTTP Header Field Manipulation", "http_host": "HTTP Host", "http_invalid_request_line": "HTTP Invalid Request Line", "http_requests": "HTTP Requests", "meek_fronted_requests_test": "Meek Fronted Requests", "multi_protocol_traceroute": "Multi Protocol Traceroute", "ndt": "NDT", "psiphon": "Psiphon", "tcp_connect": "TCP Connect", "telegram": "Telegram", "tor": "Tor", "vanilla_tor": "Vanilla Tor", "web_connectivity": "Web Connectivity", "whatsapp": "WhatsApp", "riseupvpn": "RiseUp VPN", } test_names = [{"id": k, "name": v} for k, v in TEST_NAMES.items()] return cachedjson(1, test_names=test_names)
def check_report_id() -> Response: """Legacy. Used to check if a report_id existed in the fastpath table. Used by https://github.com/ooni/probe/issues/1034 --- produces: - application/json parameters: - name: report_id in: query type: string responses: 200: description: Always returns True. schema: type: object properties: v: type: integer description: version number of this response found: type: boolean description: True example: { "found": true, "v": 0 } """ return cachedjson("1s", v=0, found=True)
def api_private_global_by_month() -> Response: """Provide global summary of measurements Sources: global_by_month db table --- responses: '200': description: JSON struct TODO """ q = """SELECT COUNT(DISTINCT probe_asn) AS networks_by_month, COUNT(DISTINCT probe_cc) AS countries_by_month, COUNT() AS measurements_by_month, toStartOfMonth(measurement_start_time) AS month FROM fastpath WHERE measurement_start_time > toStartOfMonth(today() - interval 2 year) AND measurement_start_time < toStartOfMonth(today() + interval 1 month) GROUP BY month ORDER BY month """ rows = query_click(sql.text(q), {}) rows = list(rows) n = [{"date": r["month"], "value": r["networks_by_month"]} for r in rows] c = [{"date": r["month"], "value": r["countries_by_month"]} for r in rows] m = [{"date": r["month"], "value": r["measurements_by_month"]} for r in rows] expand_dates(n) expand_dates(c) expand_dates(m) return cachedjson( "1d", networks_by_month=n, countries_by_month=c, measurements_by_month=m )
def api_private_website_network_tests() -> Response: """TODO --- parameters: - name: probe_cc in: query type: string description: The two letter country code responses: '200': description: TODO """ probe_cc = validate_probe_cc_query_param() s = """SELECT COUNT() AS count, probe_asn FROM fastpath WHERE measurement_start_time >= today() - interval '31 day' AND measurement_start_time < today() AND probe_cc = :probe_cc GROUP BY probe_asn ORDER BY count DESC """ results = query_click(sql.text(s), {"probe_cc": probe_cc}) return cachedjson("0s", results=results)
def api_private_country_overview() -> Response: """Country-specific overview --- responses: '200': description: { "first_bucket_date":"2012-12-01", "measurement_count":6659891, "network_count":333} """ # TODO: add circumvention_tools_blocked im_apps_blocked # middlebox_detected_networks websites_confirmed_blocked probe_cc = validate_probe_cc_query_param() s = """SELECT toDate(MIN(measurement_start_time)) AS first_bucket_date, COUNT() AS measurement_count, COUNT(DISTINCT probe_asn) AS network_count FROM fastpath WHERE probe_cc = :probe_cc AND measurement_start_time > '2012-12-01' """ r = query_click_one_row(sql.text(s), {"probe_cc": probe_cc}) assert r # FIXME: websites_confirmed_blocked return cachedjson("1d", **r)
def api_private_country_overview(): """Country-specific overview --- responses: '200': description: { "first_bucket_date":"2012-12-01", "measurement_count":6659891, "network_count":333} """ # TODO: add circumvention_tools_blocked im_apps_blocked # middlebox_detected_networks websites_confirmed_blocked probe_cc = validate_probe_cc_query_param() # OLAP-use-case s = sql.text("""SELECT MIN(measurement_start_day) AS first_bucket_date, SUM(measurement_count) AS measurement_count, COUNT(DISTINCT probe_asn) AS network_count FROM counters_asn_noinput WHERE probe_cc = :probe_cc """) q = current_app.db_session.execute(s, {"probe_cc": probe_cc}) r = q.fetchone() # NOTE: we are hardcoding a fixed first_bucket_date # FIXME: websites_confirmed_blocked return cachedjson( 24, first_bucket_date=r.first_bucket_date, measurement_count=r.measurement_count, network_count=r.network_count, )
def api_private_network_stats() -> Response: """Network speed statistics - not implemented --- parameters: - name: probe_cc in: query type: string description: The two letter country code responses: '200': description: TODO """ # TODO: implement the stats from NDT in fastpath and then here probe_cc = validate_probe_cc_query_param() return cachedjson( "1d", { "metadata": { "current_page": 1, "limit": 10, "next_url": None, "offset": 0, "total_count": 0, }, "results": [], }, )
def list_collectors() -> Response: """Probe Services: list collectors --- responses: '200': description: List available collectors """ # TODO load from configuration file j = [ {"address": "httpo://guegdifjy7bjpequ.onion", "type": "onion"}, {"address": "https://ams-pg.ooni.org:443", "type": "https"}, { "address": "https://dkyhjv0wpi2dk.cloudfront.net", "front": "dkyhjv0wpi2dk.cloudfront.net", "type": "cloudfront", }, {"address": "httpo://guegdifjy7bjpequ.onion", "type": "onion"}, {"address": "https://ams-pg.ooni.org:443", "type": "https"}, { "address": "https://dkyhjv0wpi2dk.cloudfront.net", "front": "dkyhjv0wpi2dk.cloudfront.net", "type": "cloudfront", }, ] return cachedjson("1h", j)
def api_private_circumvention_runtime_stats() -> Response: """Runtime statistics on protocols used for circumvention, grouped by date, country, test_name. --- responses: 200: description: List of dicts with keys probe_cc and cnt """ q = """SELECT toDate(measurement_start_time) AS date, test_name, probe_cc, quantile(.5)(JSONExtractFloat(scores, 'extra', 'test_runtime')) AS p50, quantile(.9)(JSONExtractFloat(scores, 'extra', 'test_runtime')) AS p90, count() as cnt FROM fastpath WHERE test_name IN ['torsf', 'tor', 'stunreachability', 'psiphon','riseupvpn'] AND measurement_start_time > today() - interval 6 month AND measurement_start_time < today() - interval 1 day AND JSONHas(scores, 'extra', 'test_runtime') GROUP BY date, probe_cc, test_name """ try: r = query_click(sql.text(q), {}) result = pivot_circumvention_runtime_stats(r) return cachedjson("1d", v=0, results=result) except Exception as e: return jsonify({"v": 0, "error": str(e)})
def check_report_id(): """Check if a report_id exists either in the fastpath table Used by https://github.com/ooni/probe/issues/1034 --- responses: '200': description: TODO """ report_id = request.args.get("report_id") s = sql.text("SELECT 1 FROM fastpath WHERE report_id = :rid LIMIT 1") try: q = current_app.db_session.execute(s, dict(rid=report_id)) found = q.fetchone() is not None return cachedjson(5 / 3600, v=0, found=found) # cache 5min except Exception as e: return cachedjson(0, v=0, error=str(e))
def close_report(report_id) -> Response: """Probe Services: Close report --- responses: '200': description: Close a report """ return cachedjson("1h")
def api_private_vanilla_tor_stats() -> Response: """Tor statistics over ASN for a given CC --- parameters: - name: probe_cc in: query type: string minLength: 2 required: true responses: '200': description: TODO """ probe_cc = validate_probe_cc_query_param() blocked = 0 nets = [] s = """SELECT countIf(msm_failure = 't') as failure_count, toDate(MAX(measurement_start_time)) AS last_tested, probe_asn, COUNT() as total_count, total_count - countIf(anomaly = 't') AS success_count FROM fastpath WHERE measurement_start_time > today() - INTERVAL 6 MONTH AND probe_cc = :probe_cc GROUP BY probe_asn """ q = query_click(sql.text(s), {"probe_cc": probe_cc}) extras = { "test_runtime_avg": None, "test_runtime_max": None, "test_runtime_min": None, } for n in q: n.update(extras) nets.append(n) if n["total_count"] > 5: if float(n["success_count"]) / float(n["total_count"]) < 0.6: blocked += 1 if not nets: return cachedjson("0s", networks=[], notok_networks=0, last_tested=None) lt = max(n["last_tested"] for n in nets) return cachedjson("0s", networks=nets, notok_networks=blocked, last_tested=lt)
def api_private_circumvention_stats_by_country() -> Response: """Aggregated statistics on protocols used for circumvention, grouped by country. --- responses: 200: description: List of dicts with keys probe_cc and cnt """ q = """SELECT probe_cc, COUNT(*) as cnt FROM fastpath WHERE measurement_start_time > today() - interval 6 month AND measurement_start_time < today() - interval 1 day AND test_name IN ['torsf', 'tor', 'stunreachability', 'psiphon','riseupvpn'] GROUP BY probe_cc ORDER BY probe_cc """ try: result = query_click(sql.text(q), {}) return cachedjson("1d", v=0, results=result) except Exception as e: return cachedjson("0d", v=0, error=str(e))
def api_private_im_networks(): """TODO --- responses: '200': description: TODO """ log = current_app.logger probe_cc = validate_probe_cc_query_param() test_names = [ sql.literal_column("test_name") == t for t in TEST_GROUPS["im"] ] s = ( select([ sql.text("SUM(measurement_count) as msm_count"), sql.text("MAX(measurement_start_day) AS last_tested"), sql.text("probe_asn"), sql.text("test_name"), ]).where( and_( sql.text( "measurement_start_day >= current_date - interval '31 day'" ), # We exclude the last day to wait for the pipeline sql.text("measurement_start_day < current_date"), sql.text("probe_cc = :probe_cc"), or_(*test_names), )).group_by(sql.text("test_name, probe_asn")).select_from( sql.table("counters_asn_noinput")).order_by( sql.text("test_name, msm_count DESC"))) results = {} q = current_app.db_session.execute(s, {"probe_cc": probe_cc}) for r in q: e = results.get( r.test_name, { "anomaly_networks": [], "ok_networks": [], "last_tested": r.last_tested }, ) e["ok_networks"].append({ "asn": r.probe_asn, "name": "", "total_count": r.msm_count, "last_tested": r.last_tested, }) if e["last_tested"] < r.last_tested: e["last_tested"] = r.last_tested results[r.test_name] = e return cachedjson(1, **results) # TODO caching
def api_private_im_stats(): """TODO --- responses: '200': description: TODO """ test_name = request.args.get("test_name") if not test_name or test_name not in TEST_GROUPS["im"]: raise BadRequest("invalid test_name") probe_cc = validate_probe_cc_query_param() probe_asn = validate_probe_asn_query_param() probe_asn = int(probe_asn.replace("AS", "")) s = sql.text("""SELECT SUM(measurement_count) as total_count, measurement_start_day FROM counters_asn_noinput WHERE probe_cc = :probe_cc AND test_name = :test_name AND probe_asn = :probe_asn AND measurement_start_day >= current_date - interval '31 day' AND measurement_start_day < current_date GROUP BY measurement_start_day ORDER BY measurement_start_day """) query_params = { "probe_cc": probe_cc, "probe_asn": probe_asn, "test_name": test_name, } q = current_app.db_session.execute(s, query_params) tmp = {r.measurement_start_day: r for r in q} results = [] days = [date.today() + timedelta(days=(d - 31)) for d in range(32)] for d in days: if d in tmp: e = { "test_day": isomid(tmp[d].measurement_start_day), "total_count": tmp[d].total_count, } else: e = {"test_day": isomid(d), "total_count": 0} e["anomaly_count"] = None results.append(e) return cachedjson(1, results=results) # TODO caching
def api_private_countries_by_month(): """Countries count by month --- responses: '200': description: TODO """ # OLAP-use-case cols = [sql.text("countries_by_month"), sql.text("month")] q = select(cols).select_from(sql.table("global_by_month")) q = current_app.db_session.execute(q) li = [dict(date=r[1], value=r[0]) for r in q] return cachedjson(24, li)
def api_private_asn_by_month(): """Network count by month --- responses: '200': description: [{"date":"2018-08-31","value":4411}, ... ] """ # OLAP-use-case cols = [sql.text("networks_by_month"), sql.text("month")] q = select(cols).select_from(sql.table("global_by_month")) q = current_app.db_session.execute(q) li = [dict(date=r[1], value=r[0]) for r in q] return cachedjson(24, li)
def check_report_id(): """Check if a report_id exists in the fastpath table Used by https://github.com/ooni/probe/issues/1034 --- parameters: - name: report_id in: query example: 20210208T162755Z_ndt_DZ_36947_n1_8swgXi7xNuRUyO9a type: string minLength: 10 required: true responses: 200: description: Check if a report_id exists in the fastpath table. Cached for a short time. schema: type: object properties: v: type: string description: version number of this response found: type: bool description: True if found error: type: string description: error message example: { "found": true, "v": 0 } """ report_id = request.args.get("report_id") s = sql.text("SELECT 1 FROM fastpath WHERE report_id = :rid LIMIT 1") try: q = current_app.db_session.execute(s, dict(rid=report_id)) found = q.fetchone() is not None return cachedjson(2 / 60, v=0, found=found) # cache for 2min except Exception as e: return cachedjson(0, v=0, error=str(e))
def api_private_im_stats() -> Response: """Instant messaging statistics --- responses: '200': description: TODO """ test_name = request.args.get("test_name") if not test_name or test_name not in TEST_GROUPS["im"]: raise BadRequest("invalid test_name") probe_cc = validate_probe_cc_query_param() probe_asn = validate_probe_asn_query_param() probe_asn = int(probe_asn.replace("AS", "")) s = """SELECT COUNT() as total_count, toDate(measurement_start_time) AS test_day FROM fastpath WHERE probe_cc = :probe_cc AND test_name = :test_name AND probe_asn = :probe_asn AND measurement_start_time >= today() - interval '31 day' AND measurement_start_time < today() GROUP BY test_day ORDER BY test_day """ query_params = { "probe_cc": probe_cc, "probe_asn": probe_asn, "test_name": test_name, } q = query_click(sql.text(s), query_params) tmp = {r["test_day"]: r for r in q} results = [] days = [date.today() + timedelta(days=(d - 31)) for d in range(32)] for d in days: if d in tmp: test_day = isomid(tmp[d]["test_day"]) total_count = tmp[d]["total_count"] else: test_day = isomid(d) total_count = 0 e = dict(anomaly_count=None, test_day=test_day, total_count=total_count) results.append(e) return cachedjson("1h", results=results) # TODO caching
def api_private_im_networks() -> Response: """Instant messaging networks statistics --- responses: '200': description: TODO """ probe_cc = validate_probe_cc_query_param() s = """SELECT COUNT() AS total_count, '' AS name, toDate(MAX(measurement_start_time)) AS last_tested, probe_asn, test_name FROM fastpath WHERE measurement_start_time >= today() - interval 31 day AND measurement_start_time < today() AND probe_cc = :probe_cc AND test_name IN :test_names GROUP BY test_name, probe_asn ORDER BY test_name ASC, total_count DESC """ results: Dict[str, Dict] = {} test_names = sorted(TEST_GROUPS["im"]) q = query_click(sql.text(s), {"probe_cc": probe_cc, "test_names": test_names}) for r in q: e = results.get( r["test_name"], { "anomaly_networks": [], "ok_networks": [], "last_tested": r["last_tested"], }, ) e["ok_networks"].append( { "asn": r["probe_asn"], "name": "", "total_count": r["total_count"], "last_tested": r["last_tested"], } ) if e["last_tested"] < r["last_tested"]: e["last_tested"] = r["last_tested"] results[r["test_name"]] = e return cachedjson("1h", **results) # TODO caching
def api_private_global_overview() -> Response: """Provide global summary of measurements Sources: global_stats db table --- responses: '200': description: JSON struct TODO """ q = """SELECT COUNT(DISTINCT(probe_asn)) AS network_count, COUNT(DISTINCT probe_cc) AS country_count, COUNT(*) AS measurement_count FROM fastpath """ r = query_click_one_row(q, {}) assert r return cachedjson("1d", **r)
def api_private_asn_by_month() -> Response: """Network count by month --- responses: '200': description: [{"date":"2018-08-31","value":4411}, ... ] """ q = """SELECT COUNT(DISTINCT(probe_asn)) AS value, toStartOfMonth(measurement_start_time) AS date FROM fastpath WHERE measurement_start_time < toStartOfMonth(addMonths(now(), 1)) AND measurement_start_time > toStartOfMonth(subtractMonths(now(), 24)) GROUP BY date ORDER BY date """ li = list(query_click(q, {})) expand_dates(li) return cachedjson("1d", li)
def api_private_test_coverage(): """Return number of measurements per day across test categories --- responses: '200': description: '{"network_coverage: [...], "test_coverage": [...]}' """ # TODO: merge the two queries into one? # TODO: remove test categories or move aggregation to the front-end? # OLAP-use-case probe_cc = validate_probe_cc_query_param() test_groups = request.args.get("test_groups") if test_groups is not None: test_groups = test_groups.split(",") tc = get_recent_test_coverage(probe_cc) nc = get_recent_network_coverage(probe_cc, test_groups) return cachedjson(24, network_coverage=nc, test_coverage=tc)
def api_private_global_overview(): """Provide global summary of measurements Sources: global_stats db table --- responses: '200': description: JSON struct TODO """ # OLAP-use-case # CREATE MATERIALIZED VIEW global_stats AS # SELECT # COUNT(DISTINCT probe_asn) AS network_count, # COUNT(DISTINCT probe_cc) AS country_count, # SUM(measurement_count) AS measurement_count # FROM counters; q = select([sql.text("*")]).select_from(sql.table("global_stats")) r = current_app.db_session.execute(q).fetchone() return cachedjson(24, **dict(r))
def api_private_countries_by_month() -> Response: """Countries count by month --- responses: '200': description: TODO """ q = """SELECT COUNT(DISTINCT(probe_cc)) AS value, toStartOfMonth(measurement_start_time) AS date FROM fastpath WHERE measurement_start_time < toStartOfMonth(addMonths(now(), 1)) AND measurement_start_time > toStartOfMonth(subtractMonths(now(), 24)) GROUP BY date ORDER BY date """ li = list(query_click(q, {})) expand_dates(li) return cachedjson("1d", li)
def api_private_website_stats(): """TODO --- responses: '200': description: TODO """ # uses_pg_index counters_day_cc_asn_input_idx a BRIN index was not used at # all, but BTREE on (measurement_start_day, probe_cc, probe_asn, input) # made queries go from full scan to 50ms url = request.args.get("input") probe_cc = validate_probe_cc_query_param() probe_asn = validate_probe_asn_query_param() # disable bitmapscan otherwise PG uses the BRIN indexes instead of BTREE s = sql.text(""" SET enable_bitmapscan = off; SELECT measurement_start_day, SUM(anomaly_count) as anomaly_count, SUM(confirmed_count) as confirmed_count, SUM(failure_count) as failure_count, SUM(measurement_count) as measurement_count FROM counters WHERE measurement_start_day >= current_date - interval '31 day' AND measurement_start_day < current_date AND probe_cc = :probe_cc AND probe_asn = :probe_asn AND input = :input GROUP BY measurement_start_day """) q = current_app.db_session.execute(s, { "probe_cc": probe_cc, "probe_asn": probe_asn, "input": url }) results = [{ "test_day": r.measurement_start_day, "anomaly_count": int(r.anomaly_count or 0), "confirmed_count": int(r.confirmed_count or 0), "failure_count": int(r.failure_count or 0), "total_count": int(r.measurement_count or 0), } for r in q] return cachedjson(1, results=results)
def api_private_countries(): """Summary of countries --- responses: '200': description: TODO """ cols = [sql.text("measurement_count"), sql.text("probe_cc")] q = select(cols).select_from(sql.table("country_stats")) c = [] for r in current_app.db_session.execute(q): try: name = lookup_country(r.probe_cc) c.append( dict(alpha_2=r.probe_cc, name=name, count=r.measurement_count)) except KeyError: pass return cachedjson(24, countries=c)
def api_private_vanilla_tor_stats(): """Tor statistics over ASN for a given CC --- responses: '200': description: TODO """ probe_cc = validate_probe_cc_query_param() s = sql.text("""SELECT SUM(failure_count) as failure_count, MAX(measurement_start_day) AS last_tested, probe_asn, SUM(anomaly_count) as anomaly_count, SUM(measurement_count) as total_count FROM counters_asn_noinput WHERE measurement_start_day > (current_date - interval '6 months') AND probe_cc = :probe_cc GROUP BY probe_asn """) q = current_app.db_session.execute(s, {"probe_cc": probe_cc}) nets = [] blocked = 0 for n in q: total_count = n.total_count or 0 success_count = total_count - (n.anomaly_count or 0) nets.append({ "failure_count": n.failure_count or 0, "last_tested": n.last_tested, "probe_asn": n.probe_asn, "success_count": success_count, "test_runtime_avg": None, "test_runtime_max": None, "test_runtime_min": None, "total_count": total_count, }) if total_count > 5 and float(success_count) / float(total_count) < 0.6: blocked += 1 lt = max(n["last_tested"] for n in nets) return cachedjson(0, networks=nets, notok_networks=blocked, last_tested=lt)
def api_private_website_network_tests(): """TODO --- responses: '200': description: TODO """ probe_cc = validate_probe_cc_query_param() s = sql.text("""SELECT SUM(measurement_count) AS count, probe_asn FROM counters_asn_noinput WHERE measurement_start_day >= current_date - interval '31 day' AND measurement_start_day < current_date AND probe_cc = :probe_cc GROUP BY probe_asn ORDER BY count DESC """) q = current_app.db_session.execute(s, {"probe_cc": probe_cc}) results = [dict(r) for r in q] return cachedjson(1, results=results)