def api_private_website_stats() -> Response: """TODO --- responses: '200': description: TODO """ # uses_pg_index counters_day_cc_asn_input_idx a BRIN index was not used at # all, but BTREE on (measurement_start_day, probe_cc, probe_asn, input) # made queries go from full scan to 50ms url = request.args.get("input") probe_cc = validate_probe_cc_query_param() probe_asn = validate_probe_asn_query_param() s = """SELECT toDate(measurement_start_time) AS test_day, countIf(anomaly = 't') as anomaly_count, countIf(confirmed = 't') as confirmed_count, countIf(msm_failure = 't') as failure_count, COUNT() AS total_count FROM fastpath WHERE measurement_start_time >= today() - interval '31 day' AND measurement_start_time < today() AND probe_cc = :probe_cc AND probe_asn = :probe_asn AND input = :input GROUP BY test_day ORDER BY test_day """ d = {"probe_cc": probe_cc, "probe_asn": probe_asn, "input": url} results = query_click(sql.text(s), d) return cachedjson("0s", results=results)
def api_private_circumvention_runtime_stats() -> Response: """Runtime statistics on protocols used for circumvention, grouped by date, country, test_name. --- responses: 200: description: List of dicts with keys probe_cc and cnt """ q = """SELECT toDate(measurement_start_time) AS date, test_name, probe_cc, quantile(.5)(JSONExtractFloat(scores, 'extra', 'test_runtime')) AS p50, quantile(.9)(JSONExtractFloat(scores, 'extra', 'test_runtime')) AS p90, count() as cnt FROM fastpath WHERE test_name IN ['torsf', 'tor', 'stunreachability', 'psiphon','riseupvpn'] AND measurement_start_time > today() - interval 6 month AND measurement_start_time < today() - interval 1 day AND JSONHas(scores, 'extra', 'test_runtime') GROUP BY date, probe_cc, test_name """ try: r = query_click(sql.text(q), {}) result = pivot_circumvention_runtime_stats(r) return cachedjson("1d", v=0, results=result) except Exception as e: return jsonify({"v": 0, "error": str(e)})
def fetch_reactive_url_list(cc: str, probe_asn: int): """Select all citizenlab URLs for the given probe_cc + ZZ Select measurements count from the current and previous week using a left outer join (without any info about priority)""" q = """ SELECT category_code, domain, url, cc, COALESCE(msmt_cnt, 0) AS msmt_cnt FROM ( SELECT domain, url, cc, category_code FROM citizenlab WHERE citizenlab.cc = :cc_low OR citizenlab.cc = :cc OR citizenlab.cc = 'ZZ' ) AS citiz LEFT OUTER JOIN ( SELECT input, SUM(msmt_cnt) AS msmt_cnt FROM counters_asn_test_list WHERE probe_cc = :cc AND (week IN (toStartOfWeek(now()), toStartOfWeek(now() - interval 1 week))) --asn-filter-- GROUP BY input ) AS cnt ON (citiz.url = cnt.input) """ if probe_asn != 0: q = q.replace("--asn-filter--", "AND probe_asn = :asn") # support uppercase or lowercase match r = query_click(sa.text(q), dict(cc=cc, cc_low=cc.lower(), asn=probe_asn)) return tuple(r)
def api_private_website_network_tests() -> Response: """TODO --- parameters: - name: probe_cc in: query type: string description: The two letter country code responses: '200': description: TODO """ probe_cc = validate_probe_cc_query_param() s = """SELECT COUNT() AS count, probe_asn FROM fastpath WHERE measurement_start_time >= today() - interval '31 day' AND measurement_start_time < today() AND probe_cc = :probe_cc GROUP BY probe_asn ORDER BY count DESC """ results = query_click(sql.text(s), {"probe_cc": probe_cc}) return cachedjson("0s", results=results)
def get_recent_network_coverage_ch(probe_cc, test_groups): """Count ASNs with at least one measurements, grouped by day, for a given CC, and filtered by test groups Return [{"count": 58, "test_day": "2021-10-16" }, ... ]""" s = """SELECT toDate(measurement_start_time) AS test_day, COUNT(DISTINCT probe_asn) as count FROM fastpath WHERE test_day >= today() - interval 31 day AND test_day < today() - interval 1 day AND probe_cc = :probe_cc --mark-- GROUP BY test_day ORDER BY test_day WITH FILL FROM today() - interval 31 day TO today() - interval 1 day """ if test_groups: assert isinstance(test_groups, list) test_names = set() for tg in test_groups: tnames = TEST_GROUPS.get(tg, []) test_names.update(tnames) test_names = sorted(test_names) s = s.replace("--mark--", "AND test_name IN :test_names") d = {"probe_cc": probe_cc, "test_names": test_names} else: s = s.replace("--mark--", "") d = {"probe_cc": probe_cc} return query_click(sql.text(s), d)
def api_private_global_by_month() -> Response: """Provide global summary of measurements Sources: global_by_month db table --- responses: '200': description: JSON struct TODO """ q = """SELECT COUNT(DISTINCT probe_asn) AS networks_by_month, COUNT(DISTINCT probe_cc) AS countries_by_month, COUNT() AS measurements_by_month, toStartOfMonth(measurement_start_time) AS month FROM fastpath WHERE measurement_start_time > toStartOfMonth(today() - interval 2 year) AND measurement_start_time < toStartOfMonth(today() + interval 1 month) GROUP BY month ORDER BY month """ rows = query_click(sql.text(q), {}) rows = list(rows) n = [{"date": r["month"], "value": r["networks_by_month"]} for r in rows] c = [{"date": r["month"], "value": r["countries_by_month"]} for r in rows] m = [{"date": r["month"], "value": r["measurements_by_month"]} for r in rows] expand_dates(n) expand_dates(c) expand_dates(m) return cachedjson( "1d", networks_by_month=n, countries_by_month=c, measurements_by_month=m )
def get_recent_test_coverage_ch(probe_cc): """Returns [{"count": 4888, "test_day": "2021-10-16", "test_group": "websites"}, ... ] """ q = "SELECT DISTINCT(test_group) FROM test_groups ORDER BY test_group" rows = query_click(sql.text(q), {}) test_group_names = [r["test_group"] for r in rows] q = """SELECT toDate(measurement_start_time) as measurement_start_day, test_group, COUNT() as msmt_cnt FROM fastpath ANY LEFT JOIN test_groups USING (test_name) WHERE measurement_start_day >= today() - interval 32 day AND measurement_start_day < today() - interval 2 day AND probe_cc = :probe_cc GROUP BY measurement_start_day, test_group """ rows = query_click(sql.text(q), dict(probe_cc=probe_cc)) rows = tuple(rows) l30d = tuple(last_30days(32, 2)) return pivot_test_coverage(rows, test_group_names, l30d)
def api_private_im_stats() -> Response: """Instant messaging statistics --- responses: '200': description: TODO """ test_name = request.args.get("test_name") if not test_name or test_name not in TEST_GROUPS["im"]: raise BadRequest("invalid test_name") probe_cc = validate_probe_cc_query_param() probe_asn = validate_probe_asn_query_param() probe_asn = int(probe_asn.replace("AS", "")) s = """SELECT COUNT() as total_count, toDate(measurement_start_time) AS test_day FROM fastpath WHERE probe_cc = :probe_cc AND test_name = :test_name AND probe_asn = :probe_asn AND measurement_start_time >= today() - interval '31 day' AND measurement_start_time < today() GROUP BY test_day ORDER BY test_day """ query_params = { "probe_cc": probe_cc, "probe_asn": probe_asn, "test_name": test_name, } q = query_click(sql.text(s), query_params) tmp = {r["test_day"]: r for r in q} results = [] days = [date.today() + timedelta(days=(d - 31)) for d in range(32)] for d in days: if d in tmp: test_day = isomid(tmp[d]["test_day"]) total_count = tmp[d]["total_count"] else: test_day = isomid(d) total_count = 0 e = dict(anomaly_count=None, test_day=test_day, total_count=total_count) results.append(e) return cachedjson("1h", results=results) # TODO caching
def api_private_im_networks() -> Response: """Instant messaging networks statistics --- responses: '200': description: TODO """ probe_cc = validate_probe_cc_query_param() s = """SELECT COUNT() AS total_count, '' AS name, toDate(MAX(measurement_start_time)) AS last_tested, probe_asn, test_name FROM fastpath WHERE measurement_start_time >= today() - interval 31 day AND measurement_start_time < today() AND probe_cc = :probe_cc AND test_name IN :test_names GROUP BY test_name, probe_asn ORDER BY test_name ASC, total_count DESC """ results: Dict[str, Dict] = {} test_names = sorted(TEST_GROUPS["im"]) q = query_click(sql.text(s), {"probe_cc": probe_cc, "test_names": test_names}) for r in q: e = results.get( r["test_name"], { "anomaly_networks": [], "ok_networks": [], "last_tested": r["last_tested"], }, ) e["ok_networks"].append( { "asn": r["probe_asn"], "name": "", "total_count": r["total_count"], "last_tested": r["last_tested"], } ) if e["last_tested"] < r["last_tested"]: e["last_tested"] = r["last_tested"] results[r["test_name"]] = e return cachedjson("1h", **results) # TODO caching
def api_private_vanilla_tor_stats() -> Response: """Tor statistics over ASN for a given CC --- parameters: - name: probe_cc in: query type: string minLength: 2 required: true responses: '200': description: TODO """ probe_cc = validate_probe_cc_query_param() blocked = 0 nets = [] s = """SELECT countIf(msm_failure = 't') as failure_count, toDate(MAX(measurement_start_time)) AS last_tested, probe_asn, COUNT() as total_count, total_count - countIf(anomaly = 't') AS success_count FROM fastpath WHERE measurement_start_time > today() - INTERVAL 6 MONTH AND probe_cc = :probe_cc GROUP BY probe_asn """ q = query_click(sql.text(s), {"probe_cc": probe_cc}) extras = { "test_runtime_avg": None, "test_runtime_max": None, "test_runtime_min": None, } for n in q: n.update(extras) nets.append(n) if n["total_count"] > 5: if float(n["success_count"]) / float(n["total_count"]) < 0.6: blocked += 1 if not nets: return cachedjson("0s", networks=[], notok_networks=0, last_tested=None) lt = max(n["last_tested"] for n in nets) return cachedjson("0s", networks=nets, notok_networks=blocked, last_tested=lt)
def api_private_asn_by_month() -> Response: """Network count by month --- responses: '200': description: [{"date":"2018-08-31","value":4411}, ... ] """ q = """SELECT COUNT(DISTINCT(probe_asn)) AS value, toStartOfMonth(measurement_start_time) AS date FROM fastpath WHERE measurement_start_time < toStartOfMonth(addMonths(now(), 1)) AND measurement_start_time > toStartOfMonth(subtractMonths(now(), 24)) GROUP BY date ORDER BY date """ li = list(query_click(q, {})) expand_dates(li) return cachedjson("1d", li)
def failover_fetch_citizenlab_data() -> Dict[str, List[CTZ]]: """Fetches the citizenlab table from the database. Used only once at startime for failover.""" log = current_app.logger log.info("Started failover_fetch_citizenlab_data") sql = """SELECT category_code, url FROM citizenlab WHERE cc = 'ZZ' """ out: Dict[str, List[CTZ]] = {} query = query_click(sql, {}) for e in query: catcode = e["category_code"] c = CTZ(e["url"], catcode) out.setdefault(catcode, []).append(c) log.info("Fetch done: %d" % len(out)) return out
def api_private_countries_by_month() -> Response: """Countries count by month --- responses: '200': description: TODO """ q = """SELECT COUNT(DISTINCT(probe_cc)) AS value, toStartOfMonth(measurement_start_time) AS date FROM fastpath WHERE measurement_start_time < toStartOfMonth(addMonths(now(), 1)) AND measurement_start_time > toStartOfMonth(subtractMonths(now(), 24)) GROUP BY date ORDER BY date """ li = list(query_click(q, {})) expand_dates(li) return cachedjson("1d", li)
def list_url_priorities() -> Response: """List URL priority rules --- responses: 200: type: string """ global log log = current_app.logger log.debug("listing URL prio rules") query = """SELECT category_code, cc, domain, url, priority FROM url_priorities FINAL ORDER BY category_code, cc, domain, url, priority """ # The url_priorities table is CollapsingMergeTree q = query_click(sql.text(query), {}) rows = list(q) try: return make_response(jsonify(rules=rows)) except BaseOONIException as e: return jerror(e)
def api_private_circumvention_stats_by_country() -> Response: """Aggregated statistics on protocols used for circumvention, grouped by country. --- responses: 200: description: List of dicts with keys probe_cc and cnt """ q = """SELECT probe_cc, COUNT(*) as cnt FROM fastpath WHERE measurement_start_time > today() - interval 6 month AND measurement_start_time < today() - interval 1 day AND test_name IN ['torsf', 'tor', 'stunreachability', 'psiphon','riseupvpn'] GROUP BY probe_cc ORDER BY probe_cc """ try: result = query_click(sql.text(q), {}) return cachedjson("1d", v=0, results=result) except Exception as e: return cachedjson("0d", v=0, error=str(e))
def api_private_countries() -> Response: """Summary of countries --- responses: '200': description: {"countries": [{"alpha_2": x, "count": y, "name": z}, ... ]} """ q = """ SELECT probe_cc, COUNT() AS measurement_count FROM fastpath GROUP BY probe_cc ORDER BY probe_cc """ c = [] rows = query_click(q, {}) for r in rows: try: name = lookup_country(r["probe_cc"]) c.append( dict(alpha_2=r["probe_cc"], name=name, count=r["measurement_count"]) ) except KeyError: pass return cachedjson("1d", countries=c)
def fetch_prioritization_rules(cc: str) -> tuple: sql = """SELECT category_code, cc, domain, url, priority FROM url_priorities WHERE cc = :cc OR cc = '*' """ q = query_click(sa.text(sql), dict(cc=cc)) return tuple(q)
def api_private_website_test_urls() -> Response: """TODO --- responses: '200': description: TODO """ limit = int(request.args.get("limit", 10)) if limit <= 0: limit = 10 offset = int(request.args.get("offset", 0)) probe_cc = validate_probe_cc_query_param() probe_asn = validate_probe_asn_query_param() probe_asn = int(probe_asn.replace("AS", "")) # Count how many distinct inputs we have in this CC / ASN / period s = """ SELECT COUNT(DISTINCT(input)) as input_count FROM fastpath WHERE measurement_start_time >= today() - interval '31 day' AND measurement_start_time < today() AND test_name = 'web_connectivity' AND probe_cc = :probe_cc AND probe_asn = :probe_asn """ q = query_click_one_row( sql.text(s), dict(probe_cc=probe_cc, probe_asn=probe_asn) ) total_count = q["input_count"] if q else 0 # Group msmts by CC / ASN / period with LIMIT and OFFSET s = """SELECT input, countIf(anomaly = 't') as anomaly_count, countIf(confirmed = 't') as confirmed_count, countIf(msm_failure = 't') as failure_count, COUNT() AS total_count FROM fastpath WHERE measurement_start_time >= today() - interval '31 day' AND measurement_start_time < today() AND test_name = 'web_connectivity' AND probe_cc = :probe_cc AND probe_asn = :probe_asn GROUP BY input ORDER BY confirmed_count DESC, total_count DESC, anomaly_count DESC, input ASC LIMIT :limit OFFSET :offset """ d = { "probe_cc": probe_cc, "probe_asn": probe_asn, "limit": limit, "offset": offset, } results = query_click(sql.text(s), d) current_page = math.ceil(offset / limit) + 1 metadata = { "offset": offset, "limit": limit, "current_page": current_page, "total_count": total_count, "next_url": None, } # Create next_url if len(results) >= limit: args = dict( limit=limit, offset=offset + limit, probe_asn=probe_asn, probe_cc=probe_cc, ) # TODO: remove BASE_URL? next_url = urljoin( current_app.config["BASE_URL"], "/api/_/website_urls?%s" % urlencode(args), ) metadata["next_url"] = next_url return cachedjson("1h", metadata=metadata, results=results) # TODO caching
def _list_measurements_click( since, until, report_id: ostr, probe_cc: ostr, probe_asn: ostr, test_name: ostr, anomaly, confirmed, failure, input_: ostr, domain: ostr, category_code, order: ostr, order_by, limit, offset, ) -> Any: INULL = "" # Special value for input = NULL to merge rows with FULL OUTER JOIN ## Create fastpath columns for query # TODO castscores, coalesce inputas "" fpwhere = [] query_params = {} # Populate WHERE clauses and query_params dict if since is not None: query_params["since"] = since fpwhere.append(sql.text("measurement_start_time > :since")) if until is not None: query_params["until"] = until fpwhere.append(sql.text("measurement_start_time <= :until")) if report_id: query_params["report_id"] = report_id fpwhere.append(sql.text("report_id = :report_id")) if probe_cc: if probe_cc == "ZZ": log.info("Refusing list_measurements with probe_cc set to ZZ") abort(403) query_params["probe_cc"] = probe_cc fpwhere.append(sql.text("probe_cc = :probe_cc")) else: fpwhere.append(sql.text("probe_cc != 'ZZ'")) if probe_asn is not None: if probe_asn == 0: log.info("Refusing list_measurements with probe_asn set to 0") abort(403) query_params["probe_asn"] = probe_asn fpwhere.append(sql.text("probe_asn = :probe_asn")) else: # https://ooni.org/post/2020-ooni-probe-asn-incident-report/ # https://github.com/ooni/explorer/issues/495 fpwhere.append(sql.text("probe_asn != 0")) if test_name is not None: query_params["test_name"] = test_name fpwhere.append(sql.text("test_name = :test_name")) # Filter on anomaly, confirmed and failure: # The database stores anomaly and confirmed as boolean + NULL and stores # failures in different columns. This leads to many possible combinations # but only a subset is used. # On anomaly and confirmed: any value != TRUE is treated as FALSE # See test_list_measurements_filter_flags_fastpath if anomaly is True: fpwhere.append(sql.text("fastpath.anomaly = 't'")) elif anomaly is False: fpwhere.append(sql.text("fastpath.anomaly = 'f'")) if confirmed is True: fpwhere.append(sql.text("fastpath.confirmed = 't'")) elif confirmed is False: fpwhere.append(sql.text("fastpath.confirmed = 'f'")) if failure is True: # residual_no is never NULL, msm_failure is always NULL fpwhere.append(sql.text("fastpath.msm_failure = 't'")) elif failure is False: # on success measurement.exc is NULL fpwhere.append(sql.text("fastpath.msm_failure = 'f'")) fpq_table = sql.table("fastpath") if input_: # input_ overrides domain and category_code query_params["input"] = input_ fpwhere.append(sql.text("input = :input")) elif domain or category_code: # both domain and category_code can be set at the same time if domain: query_params["domain"] = domain fpwhere.append(sql.text("domain = :domain")) if category_code: query_params["category_code"] = category_code fpq_table = fpq_table.join( sql.table("citizenlab"), sql.text("citizenlab.url = fastpath.input"), ) fpwhere.append( sql.text("citizenlab.category_code = :category_code")) fp_query = select("*").where(and_(*fpwhere)).select_from(fpq_table) # .limit(offset + limit) # SELECT * FROM fastpath WHERE measurement_start_time <= '2019-01-01T00:00:00'::timestamp AND probe_cc = 'YT' ORDER BY test_start_time desc LIMIT 100 OFFSET 0; # is using BRIN and running slowly if order_by is None: order_by = "measurement_start_time" fp_query = fp_query.order_by(text("{} {}".format(order_by, order))) # Assemble the "external" query. Run a final order by followed by limit and # offset query = fp_query.offset(offset).limit(limit) query_params["param_1"] = limit query_params["param_2"] = offset # Run the query, generate the results list iter_start_time = time.time() try: rows = query_click(query, query_params) tmpresults = [] for row in rows: if row["input"] in (None, ""): url = genurl("/api/v1/raw_measurement", report_id=row["report_id"]) else: url = genurl( "/api/v1/raw_measurement", report_id=row["report_id"], input=row["input"], ) tmpresults.append({ "measurement_url": url, "report_id": row["report_id"], "probe_cc": row["probe_cc"], "probe_asn": "AS{}".format(row["probe_asn"]), "test_name": row["test_name"], "measurement_start_time": row["measurement_start_time"], "input": row["input"], "anomaly": row["anomaly"] == "t", "confirmed": row["confirmed"] == "t", "failure": row["msm_failure"] == "t", "scores": json.loads(row["scores"]), }) except OperationalError as exc: log.error(exc) if isinstance(exc.orig, QueryCanceledError): # Timeout due to a slow query. Generate metric and do not feed it # to Sentry. abort(504) raise exc # For each report_id / input tuple, we want at most one entry. results = _merge_results(tmpresults) # Replace the special value INULL for "input" with None for i, r in enumerate(results): if r["input"] == INULL: results[i]["input"] = None pages = -1 count = -1 current_page = math.ceil(offset / limit) + 1 # We got less results than what we expected, we know the count and that # we are done if len(tmpresults) < limit: count = offset + len(results) pages = math.ceil(count / limit) next_url = None else: # XXX this is too intensive. find a workaround # count_start_time = time.time() # count = q.count() # pages = math.ceil(count / limit) # current_page = math.ceil(offset / limit) + 1 # query_time += time.time() - count_start_time next_args = request.args.to_dict() next_args["offset"] = str(offset + limit) next_args["limit"] = str(limit) next_url = genurl("/api/v1/measurements", **next_args) query_time = time.time() - iter_start_time metadata = { "offset": offset, "limit": limit, "count": count, "pages": pages, "current_page": current_page, "next_url": next_url, "query_time": query_time, } response = jsonify({"metadata": metadata, "results": results[:limit]}) response.cache_control.max_age = 1 return response
def get_torsf_stats() -> Response: """Tor Pluggable Transports statistics Average / percentiles / total_count grouped by day Either group-by or filter by probe_cc Returns a format similar to get_aggregated --- parameters: - name: probe_cc in: query type: string description: The two letter country code minLength: 2 - name: since in: query type: string description: >- The start date of when measurements were run (ex. "2016-10-20T10:30:00") - name: until in: query type: string description: >- The end date of when measurement were run (ex. "2016-10-20T10:30:00") responses: '200': description: Returns aggregated counters """ log = current_app.logger param = request.args.get probe_cc = param("probe_cc") since = param("since") until = param("until") cacheable = False cols = [ sql.text("toDate(measurement_start_time) AS measurement_start_day"), column("probe_cc"), sql.text("countIf(anomaly = 't') AS anomaly_count"), sql.text("countIf(confirmed = 't') AS confirmed_count"), sql.text("countIf(msm_failure = 't') AS failure_count"), ] table = sql.table("fastpath") where = [sql.text("test_name = 'torsf'")] query_params = {} if probe_cc: where.append(sql.text("probe_cc = :probe_cc")) query_params["probe_cc"] = probe_cc if since: since = parse_date(since) where.append(sql.text("measurement_start_time > :since")) query_params["since"] = since if until: until = parse_date(until) where.append(sql.text("measurement_start_time <= :until")) query_params["until"] = until cacheable = until < datetime.now() - timedelta(hours=72) # Assemble query where_expr = and_(*where) query = select(cols).where(where_expr).select_from(table) query = query.group_by(column("measurement_start_day"), column("probe_cc")) query = query.order_by(column("measurement_start_day"), column("probe_cc")) try: q = query_click(query, query_params) result = [] for row in q: row = dict(row) row["anomaly_rate"] = row["anomaly_count"] / row[ "measurement_count"] result.append(row) response = jsonify({"v": 0, "result": result}) if cacheable: response.cache_control.max_age = 3600 * 24 return response except Exception as e: return jsonify({"v": 0, "error": str(e)})
def _clickhouse_aggregation( resp_format: str, download: bool, since, until, inp: ostr, domain: ostr, category_code: ostr, probe_cc: ostr, probe_asn: Optional[int], test_name: ostr, axis_x: ostr, axis_y: ostr, ): log = current_app.logger dimension_cnt = int(bool(axis_x)) + int(bool(axis_y)) cacheable = until and until < datetime.now() - timedelta(hours=72) cacheable = False # FIXME # Assemble query colnames = [ "anomaly_count", "confirmed_count", "failure_count", "ok_count", "measurement_count", ] cols = [ sql.text( "countIf(anomaly = 't' AND confirmed = 'f' AND msm_failure = 'f') AS anomaly_count" ), sql.text( "countIf(confirmed = 't' AND msm_failure = 'f') AS confirmed_count" ), sql.text("countIf(msm_failure = 't') AS failure_count"), sql.text( "countIf(anomaly = 'f' AND confirmed = 'f' AND msm_failure = 'f') AS ok_count" ), sql.text("COUNT(*) AS measurement_count"), ] table = sql.table("fastpath") where = [] query_params = {} if domain: where.append(sql.text("domain = :domain")) query_params["domain"] = domain if inp: where.append(sql.text("input = :input")) query_params["input"] = inp if category_code: where.append(sql.text("citizenlab.category_code = :category_code")) query_params["category_code"] = category_code if probe_cc: where.append( sql.text("(citizenlab.cc = :lcc OR citizenlab.cc = 'ZZ')")) query_params["lcc"] = probe_cc.lower() else: where.append(sql.text("citizenlab.cc = 'ZZ'")) if probe_cc: where.append(sql.text("probe_cc = :probe_cc")) query_params["probe_cc"] = probe_cc if probe_asn is not None: where.append(sql.text("probe_asn = :probe_asn")) query_params["probe_asn"] = str(probe_asn) if since: where.append(sql.text("measurement_start_time > :since")) query_params["since"] = since if until: where.append(sql.text("measurement_start_time <= :until")) query_params["until"] = until if test_name: where.append(sql.text("test_name = :test_name")) query_params["test_name"] = test_name if axis_x == axis_y and axis_x is not None: raise ValueError("Axis X and Y cannot be the same") def group_by_date(since, until, cols, colnames, group_by): if since and until: delta = abs((until - since).days) else: delta = 0 # TODO: add a granularity parameter and a check around query weight / # / response size. Also add support in CSV format. granularity = ("day", "toDate") tcol, fun = granularity tcol = "measurement_start_day" # TODO: support dynamic axis names cols.append(sql.text(f"{fun}(measurement_start_time) AS {tcol}")) colnames.append(tcol) group_by.append(column(tcol)) def add_axis(axis, cols, colnames, group_by): if axis == "blocking_type": t = "JSONExtractString(scores, 'analysis', 'blocking_type') AS blocking_type" cols.append(sql.text(t)) else: validate_axis_name(axis) cols.append(sql.text(axis)) colnames.append(axis) group_by.append(column(axis)) group_by: List = [] if axis_x == "measurement_start_day": group_by_date(since, until, cols, colnames, group_by) elif axis_x: add_axis(axis_x, cols, colnames, group_by) if axis_y == "measurement_start_day": group_by_date(since, until, cols, colnames, group_by) elif axis_y: add_axis(axis_y, cols, colnames, group_by) # Assemble query if category_code or axis_x == "category_code" or axis_y == "category_code": # Join in the citizenlab table if we need to filter on category_code # or perform group-by on it table = table.join( sql.table("citizenlab"), sql.text("citizenlab.url = fastpath.input"), ) where_expr = and_(*where) query = select(cols).where(where_expr).select_from(table) # Add group-by for g in group_by: query = query.group_by(g).order_by(g) try: if dimension_cnt > 0: r: Any = list(query_click(query, query_params)) else: r = query_click_one_row(query, query_params) pq = current_app.click.last_query msg = f"Stats: {pq.progress.rows} {pq.progress.bytes} {pq.progress.total_rows} {pq.elapsed}" log.info(msg) if resp_format == "CSV": csv_data = _convert_to_csv(r) response = make_response(csv_data) response.headers["Content-Type"] = "text/csv" if download: set_dload(response, "ooni-aggregate-data.csv") else: resp_d = { "v": 0, "dimension_count": dimension_cnt, "result": r, "db_stats": { "row_count": pq.progress.rows, "bytes": pq.progress.bytes, "total_row_count": pq.progress.total_rows, "elapsed_seconds": pq.elapsed, }, } response = jsonify(resp_d) if download: set_dload(response, "ooni-aggregate-data.json") if cacheable: response.cache_control.max_age = 3600 * 24 return response except Exception as e: return jsonify({"v": 0, "error": str(e)})