def get_values( taxonomy: str, tag: str, limit: int = config['default_limit'], offset: int = config['default_offset'] ): """ Returns all categories optionally filtered by taxonomy. **GET Parameters:** * limit ....... how many entries should we return? * offset....... starting at {offset} * taxonomy_id.. ID of the taxonomy to be looked up **Output (JSON List):** * category_name (string) """ params = { 'tag': tag, 'taxonomy': taxonomy, 'limit': limit, 'offset': offset } SQL = """ SELECT DISTINCT taxonomy_tag_val.id AS value_id, taxonomy_tag_val.value AS value FROM taxonomy_tag_val JOIN tags USING (tag_id) JOIN taxonomy ON (tags.taxonomy_id = taxonomy.id) WHERE (taxonomy.name = %(taxonomy)s) AND (tags.tag_name = %(tag)s) LIMIT %(limit)s OFFSET %(offset)s """ rows = execute_db(SQL, params, dict_=True) return rows
def test_execute_db_correct_stmt_on_closed_connection(self): config, _ = parse_test_db_config() connect_db(config) tag2domain_api.app.util.db._db_conn.close() rows = execute_db("SELECT 1;") assert len(rows) == 1 assert rows[0][0] == 1 disconnect_db()
def test_execute_db_correct_stmt(self): config, _ = parse_test_db_config() connection_args = Psycopg2Adapter.to_psycopg_args(config) conn = psycopg2.connect(**connection_args) set_db(conn) rows = execute_db("SELECT 1;") assert len(rows) == 1 assert rows[0][0] == 1 disconnect_db()
def get_types(): """ Returns filter categories defined in the filter table. **Output (JSON list):** str - name of filter """ rows = execute_db(""" SELECT DISTINCT ON (tag_name) tag_name FROM v_tag2domain_domain_filter ORDER BY tag_name """, ()) return [name for name, in rows]
def get_tags_by_domain(domain: str, at_time: datetime.datetime = None, limit: int = config['default_limit'], offset: int = config['default_offset']): """ Returns all taxonomies and tags of a given {domain} **GET Parameters:** * domain ... the domain name to query (required) * at_time .. reference time to look at. If empty, open tags are returned. (YYYY-MM-DDTHH:mm:ss) * limit .... how many entries should we return? * offset.... starting at {offset} **Output (JSON):** * domain name... string * domain_id ... ID of the domain * tag_id... int * tag_name ... name of the tag * taxonomy_id ... int * taxonomy_name ... name of the linked taxonomy """ parameters = { "domain": domain, "limit": limit, "offset": offset, "at_time": at_time } base_table, base_table_params = get_sql_base_table(at_time, domain=domain) parameters.update(base_table_params) SQL = """ SELECT taxonomy_id, taxonomy.name AS taxonomy_name, tag_table.tag_id, tag_name, value_id, value, start_time, measured_at, end_time FROM %s AS tag_table -- base_table JOIN tags USING (tag_id) JOIN taxonomy ON (tags.taxonomy_id = taxonomy.id) LEFT JOIN taxonomy_tag_val ON (tag_table.value_id = taxonomy_tag_val.id) ORDER BY domain_id, tag_table.tag_id asc LIMIT %%(limit)s OFFSET %%(offset)s""" % (base_table) rows = execute_db(SQL, parameters, dict_=True) return rows
def get_stats_taxonomies( at_time: datetime.datetime = None, filter: str = Query(None, regex=RE_FILTER), limit: int = config['default_limit'], offset: int = config['default_offset'] ): """ Returns all taxonomies with the number of domains that are associated with at least one tag from this taxonomy. A filter is a string of the form Tag=Value. The available tags and values can be fetched using the /api/v1/filters/types and /api/v1/filters/values endpoints, respectively. **GET Parameters:** | parameter | description | | --------------------------- | ------------------------------------------------------------------------------- | | at_time | point in time to query | | filter | filter domains by entries in the filter table before counting | | limit | limit answer to {limit} entries | | offset | start answer at the {offset}-th entry | **Output (JSON):** JSON list of objects, ordered by count, descending. Each object has the following keys: | key | description | | --------------------------- | ------------------------------------------------------------------------------- | | taxonomy_name | name of the taxonomy | | count | number of domains labeled by at least one tag from this taxonomy | """ base_table, base_table_params = get_sql_base_table(at_time, filter) # Build the SQL statement SQL = """ SELECT COUNT(DISTINCT domain_id), taxonomy.name AS taxonomy_name FROM %s AS tag_table -- base_table JOIN tags USING (tag_id) JOIN taxonomy ON (tags.taxonomy_id = taxonomy.id) GROUP by taxonomy.id, taxonomy.name ORDER BY count DESC LIMIT %%(limit)s OFFSET %%(offset)s """ % (base_table) params = { 'at_time': at_time, 'limit': limit, 'offset': offset } params.update(base_table_params) rows = execute_db(SQL, params, dict_=True) return rows
def get_categories( taxonomy: Optional[str] = None, limit: int = config['default_limit'], offset: int = config['default_offset'] ): """ Returns all categories optionally filtered by taxonomy. **GET Parameters:** * limit ....... how many entries should we return? * offset....... starting at {offset} * taxonomy_id.. ID of the taxonomy to be looked up **Output (JSON List):** * category_name (string) """ params = { 'limit': limit, 'offset': offset, 'taxonomy': taxonomy } if taxonomy is None: taxonomy_clause = "TRUE" else: taxonomy_clause = "(taxonomy.name = %(taxonomy)s)" SQL = """ SELECT DISTINCT ON (category) (REGEXP_REPLACE(tag_name, '^(.+)::.+$', '\\1')) AS category FROM tags JOIN taxonomy ON (tags.taxonomy_id = taxonomy.id) WHERE STRPOS(tag_name, '::') != 0 AND {0} UNION ALL SELECT category FROM ( SELECT '' AS category FROM tags JOIN taxonomy ON (tags.taxonomy_id = taxonomy.id) WHERE STRPOS(tag_name, '::') = 0 AND {0} LIMIT 1 ) AS t LIMIT %(limit)s OFFSET %(offset)s """.format(taxonomy_clause) rows = execute_db(SQL, params, dict_=False) return [_elem[0] for _elem in rows]
def test_reconnect_db(self): config, _ = parse_test_db_config() old_db_conn = connect_db(config) self.assertIsInstance(old_db_conn, psycopg2.extensions.connection) db_conn = connect_db(config) self.assertIsInstance(db_conn, psycopg2.extensions.connection) self.assertRaisesRegex( psycopg2.Error, "connection already closed", old_db_conn.cursor ) rows = execute_db("SELECT 1;") assert len(rows) == 1 assert rows[0][0] == 1 disconnect_db()
def get_values(filter: str): """ Returns filter values found in the filter table. **Output (JSON list):** str - value """ rows = execute_db( """ SELECT DISTINCT ON (value) value FROM v_tag2domain_domain_filter WHERE (tag_name = %s) AND (value IS NOT NULL) ORDER BY value """, (filter, ) ) return [name for name, in rows]
def get_tag_history_by_domain(domain: str, limit: int = config['default_limit'], offset: int = config['default_offset']): """ Returns the tag history of a single domain. **GET Parameters:** * domain ... the domain name to query (required) * limit .... how many entries should we return? * offset.... starting at {offset} **Output (JSON):** * domain name... string * domain_id ... ID of the domain * tag_id... int * tag_name ... name of the tag * value_id ... ID of the value associated with the tag * value ... value associated with the tag * taxonomy_id ... int * taxonomy_name ... name of the linked taxonomy """ parameters = {"domain": domain, "limit": limit, "offset": offset} SQL = """ SELECT taxonomy_id, taxonomy.name AS taxonomy_name, tag_table.tag_id, tag_name, value_id, value, start_time, measured_at, end_time FROM tag2domain_get_all_tags_domain(%(domain)s) AS tag_table JOIN tags USING (tag_id) JOIN taxonomy ON (tags.taxonomy_id = taxonomy.id) LEFT JOIN taxonomy_tag_val ON (tag_table.value_id = taxonomy_tag_val.id) ORDER BY domain_id, tag_id ASC LIMIT %(limit)s OFFSET %(offset)s """ rows = execute_db(SQL, parameters, dict_=True) return rows
def get_taxonomies( limit: int = config['default_limit'], offset: int = config['default_offset'] ): """ Returns all known taxonomies. **GET Parameters:** * limit .... how many entries should we return? * offset.... starting at {offset} **Output (JSON):** * id... integer * name ... name of the tag * description ... long description * is_actionable ... value between 0 and 1 on how actionable this is on data (can we automatically do an action based on the tag?) * is_automatically_classifiable... bool. Can we automatically tag a domain based on this taxonomy? * is_stable... bool. Does this taxonomy change a lot? * allows_auto_tags... bool. If a new tag appears, may it be automatically added to the taxonomy? * allows_auto_values... bool. If a new tag appears, may it be automatically added to the taxonomy? * for_numbers... bool. Is this taxonomy meant for numbers (IP addresses)? * for_domains... bool. Same but is it meant for domains? * url ... string. URL to further documentation. """ SQL = """SELECT id, name, description, is_actionable, is_automatically_classifiable, is_stable,allows_auto_tags, for_numbers, for_domains, url FROM taxonomy ORDER BY id asc LIMIT %s OFFSET %s""" rows = execute_db(SQL, (limit, offset), dict_=True) return rows
def get_stats_values( taxonomy: str, tag: str, at_time: datetime.datetime = None, filter: str = Query(None, regex=RE_FILTER), limit: int = config['default_limit'], offset: int = config['default_offset'] ): """ Returns stats on the values associated with a tag in a taxonomy. A filter is a string of the form Tag=Value. The available tags and values can be fetched using the /api/v1/filters/types and /api/v1/filters/values endpoints, respectively. **GET Parameters:** | parameter | description | | --------------------------- | ------------------------------------------------------------------------------- | | taxonomy | name of taxonomy the tag to be looked up belongs to | | tag | name of the tag to be looked up | | at_time | point in time to query | | filter | filter domains by entries in the filter table before counting | | limit | limit answer to {limit} entries | | offset | start answer at the {offset}-th entry | **Output (JSON):** JSON list of objects, ordered by count, descending. Each object has the following keys: | key | description | | --------------------------- | ------------------------------------------------------------------------------- | | value | value | | count | number of domains labeled by value and tag {tag} | """ base_table, base_table_params = get_sql_base_table(at_time, filter) SQL = """ SELECT value, COUNT(domain_id) AS count FROM ( SELECT DISTINCT domain_id, value FROM %s AS tag_table -- base_table JOIN tags USING (tag_id) JOIN taxonomy ON (tags.taxonomy_id = taxonomy.id) JOIN taxonomy_tag_val ON (tag_table.value_id = taxonomy_tag_val.id) WHERE (taxonomy.name = %%(taxonomy)s) AND (tag_name = %%(tag)s) ) AS t GROUP BY value ORDER BY count DESC """ % base_table params = { 'taxonomy': taxonomy, 'tag': tag, 'at_time': at_time, 'limit': limit, 'offset': offset } params.update(base_table_params) rows = execute_db(SQL, params, dict_=True) return rows
async def selftest(): try: execute_db("SELECT 1", ()) except Exception: raise HTTPException(status_code=503, detail="failed DB execute") return {"message": "OK"}
def get_domains_by_tag(tag: str, at_time: datetime.datetime = None, filter_by_value: Optional[bool] = False, value: Optional[str] = None, filter: str = Query(None, regex=RE_FILTER), limit: int = config['default_limit'], offset: int = config['default_offset']): """ Returns all domains of a given {tag} A filter is a string of the form Tag=Value. The available tags and values can be fetched using the /api/v1/filters/types and /api/v1/filters/values endpoints, respectively. **GET Parameters:** * tag ... the tag name to query (required) * at_time .. reference time to look at. If empty, open tags are returned. (YYYY-MM-DDTHH:mm:ss) * limit .... how many entries should we return? * offset.... starting at {offset} **Output (JSON):** * domain name... string * domain_id ... ID of the domain * tag_id... int * tag_name ... name of the tag :type tag: str """ base_table, base_table_params = get_sql_base_table(at_time, filter) parameters = { "tag_name": tag, "limit": limit, "offset": offset, "at_time": at_time, "value": value } parameters.update(base_table_params) whereclause_list = [ "(tag_name = %(tag_name)s)", ] if filter_by_value: if value is None: whereclause_list.append("(value IS NULL)") elif isinstance(value, str): whereclause_list.append("(value = %(value)s)") else: raise ValueError("expected value to be str, got '%s'" % str(type(value))) whereclause = ' AND '.join(whereclause_list) SQL = (""" SELECT domain_id, domain_name, tag_type, value, start_time, measured_at, end_time FROM %s AS tag_table -- base_table JOIN tags USING (tag_id) LEFT JOIN taxonomy_tag_val ON (tag_table.value_id = taxonomy_tag_val.id) WHERE (%s) -- whereclause ORDER BY domain_id, tag_table.tag_id ASC LIMIT %%(limit)s OFFSET %%(offset)s""" % (base_table, whereclause)) rows = execute_db(SQL, parameters, dict_=True) return rows
def get_domains_by_version( taxonomy: str, tag: str, version: Optional[str] = Query( None, regex="^(?:[0-9]+)(?:\.[0-9]+)*$" # noqa: W605 ), operator: VersionComparisonOperatorParameter = "=", at_time: datetime.datetime = None, filter: str = Query(None, regex=RE_FILTER), limit: int = config['default_limit'], offset: int = config['default_offset']): """ Return domains by the version of the tag {tag} in taxonomy {taxonomy}. Only tags with a value that looks like a version number (e.g. 1.2.3.4) will be returned. Versioning schemes with letters are not considered. A filter is a string of the form Tag=Value. The available tags and values can be fetched using the /api/v1/filters/types and /api/v1/filters/values endpoints, respectively. **GET Parameters:** | parameter | description | | ----------------- | -------------------------------------------------------------------------------- | | taxonomy | taxonomy to query from | | tag | tag to query from | | version | version to compare to. Must be in format 1.2.3.4 ... or null | | operator | operator used to compare version strings. One of =, <, <=, >, >= | | at_time | point in time to query | | filter | filter domains by entries in the filter table before counting | | limit | limit answer to {limit} entries | | offset | start answer at the {offset}-th entry | **Output (JSON):** JSON list of objects, ordered by count, descending. Each object has the following keys: | key | description | | ----------------| ------------------------------------------------------------------------------- | | domain_id | ID of the found domain | | domain_name | name of the found domain | | version | version found in the value field | | start_time | start time of the tag | | measured_at | time of last measurement of the tag | | end_time | end time of the tag | """ if version is None and operator != VersionComparisonOperatorParameter.equal: raise HTTPException( status_code=400, detail="null tags can only be searched with operator '='") parameters = { "taxonomy": taxonomy, "tag_name": tag, "version": version, "limit": limit, "offset": offset, "at_time": at_time } base_table, base_table_params = get_sql_base_table(at_time, filter) parameters.update(base_table_params) if version is None: value_clause = "(value IS NULL)" else: if operator == VersionComparisonOperatorParameter.equal: op = "=" elif operator == VersionComparisonOperatorParameter.lessthan: op = "<" elif operator == VersionComparisonOperatorParameter.lessthanequal: op = "<=" elif operator == VersionComparisonOperatorParameter.greaterthan: op = ">" elif operator == VersionComparisonOperatorParameter.greaterthanequal: op = ">=" else: raise HTTPException(status_code=400, detail="invalid operator") if op == "=": value_clause = "(value = %(version)s)" else: value_clause = ( "(value ~ '^(?:[0-9]+)(?:\.[0-9]+)*$')" # noqa: W605 " AND (string_to_array(value, '.')::bigint[] %s " "string_to_array(%%(version)s, '.')::bigint[])" % op) SQL = """ SELECT domain_id, domain_name, start_time, measured_at, end_time, value AS version FROM %s AS tag_table -- base_table JOIN tags USING (tag_id) JOIN taxonomy ON (tags.taxonomy_id = taxonomy.id) LEFT JOIN taxonomy_tag_val ON (tag_table.value_id = taxonomy_tag_val.id) WHERE (taxonomy.name = %%(taxonomy)s) AND (tag_name = %%(tag_name)s) AND (%s) -- value_clause ORDER BY domain_id, tag_table.tag_id ASC LIMIT %%(limit)s OFFSET %%(offset)s """ % (base_table, value_clause) rows = execute_db(SQL, parameters, dict_=True) return rows
def get_domains_by_category(taxonomy: str, category: Optional[str] = '', at_time: datetime.datetime = None, filter: str = Query(None, regex=RE_FILTER), limit: int = config['default_limit'], offset: int = config['default_offset']): """ Returns all domains of a given {taxonomy} A filter is a string of the form Tag=Value. The available tags and values can be fetched using the /api/v1/filters/types and /api/v1/filters/values endpoints, respectively. **GET Parameters:** * taxonomy ... the taxonomy name to query (required) * at_time .. reference time to look at. If empty, open tags are returned. (YYYY-MM-DDTHH:mm:ss) * limit .... how many entries should we return? * offset.... starting at {offset} **Output (JSON):** * domain name... string * domain_id ... ID of the domain * taxonomy_id... int * taxonomy_name ... name of the taxonomy :type taxonomy: str """ parameters = { "taxonomy_name": taxonomy, "category": category, "limit": limit, "offset": offset, "at_time": at_time } base_table, base_table_params = get_sql_base_table(at_time, filter) parameters.update(base_table_params) SQL = """SELECT domain_id, domain_name, tag_id, tag_name, start_time, measured_at, end_time FROM %s AS tag_table -- base_table JOIN tags USING (tag_id) JOIN taxonomy ON (tags.taxonomy_id = taxonomy.id) WHERE (taxonomy.name = %%(taxonomy_name)s) AND ( ( -- tags with a category (STRPOS(tag_name, '::') != 0) AND ( (REGEXP_REPLACE(tag_name, '^(.+)::.+$', '\\1')) = %%(category)s) ) OR ( -- tags without a category (STRPOS(tag_name, '::') = 0) AND (%%(category)s = '') ) ) ORDER BY domain_id, tag_id asc LIMIT %%(limit)s OFFSET %%(offset)s""" % base_table rows = execute_db(SQL, parameters, dict_=True) start = time.time() ret = [] cur_domain = None for row in rows: if cur_domain is None or row["domain_id"] != cur_domain["domain_id"]: if cur_domain is not None: ret.append(cur_domain) cur_domain = { "domain_id": row["domain_id"], "domain_name": row["domain_name"], "tags": [] } cur_domain["tags"].append({ "tag_id": row["tag_id"], "tag_name": row["tag_name"], "start_time": row["start_time"], "measured_at": row["measured_at"], "end_time": row["end_time"] }) if cur_domain is not None: ret.append(cur_domain) logger.debug("reshuffled results in %f s", time.time() - start) logger.debug("preparing response...") start = time.time() response = ORJSONResponse(content=ret) logger.debug("response prepared in %f s", time.time() - start) return response
def get_stats_categories( taxonomy: str, at_time: datetime.datetime = None, filter: str = Query(None, regex=RE_FILTER), limit: int = config['default_limit'], offset: int = config['default_offset'] ): """Returns the categories within a taxonomy with the number of domains that are associated with at least one tag from the category. A filter is a string of the form Tag=Value. The available tags and values can be fetched using the /api/v1/filters/types and /api/v1/filters/values endpoints, respectively. **GET Parameters:** | parameter | description | | --------------------------- | ------------------------------------------------------------------------------- | | taxonomy | name of taxonomy to be looked up | | at_time | point in time to query | | filter | filter domains by entries in the filter table before counting | | limit | limit answer to {limit} entries | | offset | start answer at the {offset}-th entry | **Output (JSON):** JSON list of objects, ordered by count, descending. Each object has the following keys: | key | description | | --------------------------- | ------------------------------------------------------------------------------- | | category | name of the category | | count | number of domains labeled by at least one tag from this category | """ base_table, base_table_params = get_sql_base_table(at_time, filter) SQL = """ WITH t AS ( SELECT domain_id, REGEXP_REPLACE(tag_name, '^(.+)::.+$', '\\1') AS category FROM %s AS tag_table -- base_table JOIN tags USING (tag_id) JOIN taxonomy ON (tags.taxonomy_id = taxonomy.id) WHERE (taxonomy.name = %%(taxonomy)s) AND (STRPOS(tag_name, '::') != 0) UNION ALL SELECT domain_id, '' AS category FROM %s AS tag_table -- base_table JOIN tags USING (tag_id) JOIN taxonomy ON (tags.taxonomy_id = taxonomy.id) WHERE (taxonomy.name = %%(taxonomy)s) AND (STRPOS(tag_name, '::') = 0) ) SELECT category, COUNT(*) AS count FROM ( SELECT DISTINCT ON (domain_id, category) category FROM t ) AS subquery GROUP BY category ORDER BY count DESC LIMIT %%(limit)s OFFSET %%(offset)s""" % (base_table, base_table) params = { 'taxonomy': taxonomy, 'at_time': at_time, 'limit': limit, 'offset': offset } params.update(base_table_params) rows = execute_db(SQL, params, dict_=True) return rows
def get_tag_info( taxonomy: str, tag: str, ): """ Returns information about a single tag. **GET Parameters:** + taxonomy - name of the taxonomy the tag is in + tag - name of the tag to be fetched **Output (JSON Object):** + tag + name + description + category + extras + taxonomy + name + description + url + flags + is_actionable + is_automatically_classifiable + is_stable + for_numbers + for_domains + allows_auto_tags + allows_auto_values + values + count - number of distinct values that are associated with the tag See the [tag2domain docs](https://docu.labs.nic.at/doku.php?id=rd:topics:tag2domain:konzept) for further information about the fields. """ params = { 'tag': tag, 'taxonomy': taxonomy } SQL = """ SELECT tags.tag_name AS tag_name, tags.tag_description AS tag_description, REGEXP_REPLACE(tags.tag_name, '^(.+)::.+$', '\\1') AS tag_category, tags.extras AS tag_extras, taxonomy.name AS taxonomy_name, taxonomy.description AS taxonomy_description, taxonomy.url AS taxonomy_url, taxonomy.is_actionable AS taxonomy_flags_is_actionable, taxonomy.is_automatically_classifiable AS taxonomy_flags_is_automatically_classifiable, taxonomy.is_stable AS taxonomy_flags_is_stable, taxonomy.for_numbers AS taxonomy_flags_for_numbers, taxonomy.for_domains AS taxonomy_flags_for_domains, taxonomy.allows_auto_tags AS taxonomy_flags_allows_auto_tags, taxonomy.allows_auto_values AS taxonomy_flags_allows_auto_values, COUNT(DISTINCT taxonomy_tag_val.id) AS values_count FROM tags JOIN taxonomy ON (taxonomy.id = tags.taxonomy_id) LEFT JOIN taxonomy_tag_val ON (taxonomy_tag_val.tag_id = tags.tag_id) WHERE tags.tag_name = %(tag)s AND taxonomy.name = %(taxonomy)s AND (STRPOS(tag_name, '::') != 0) GROUP BY tags.tag_name, tags.tag_description, tags.extras, taxonomy.name, taxonomy.description, taxonomy.description, taxonomy.url, taxonomy.is_actionable, taxonomy.is_automatically_classifiable, taxonomy.is_stable, taxonomy.for_numbers, taxonomy.for_domains, taxonomy.allows_auto_tags, taxonomy.allows_auto_values UNION ALL SELECT tags.tag_name AS tag_name, tags.tag_description AS tag_description, NULL AS tag_category, tags.extras AS tag_extras, taxonomy.name AS taxonomy_name, taxonomy.description AS taxonomy_description, taxonomy.url AS taxonomy_url, taxonomy.is_actionable AS taxonomy_flags_is_actionable, taxonomy.is_automatically_classifiable AS taxonomy_flags_is_automatically_classifiable, taxonomy.is_stable AS taxonomy_flags_is_stable, taxonomy.for_numbers AS taxonomy_flags_for_numbers, taxonomy.for_domains AS taxonomy_flags_for_domains, taxonomy.allows_auto_tags AS taxonomy_flags_allows_auto_tags, taxonomy.allows_auto_values AS taxonomy_flags_allows_auto_values, COUNT(DISTINCT taxonomy_tag_val.id) AS values_count FROM tags JOIN taxonomy ON (taxonomy.id = tags.taxonomy_id) LEFT JOIN taxonomy_tag_val ON (taxonomy_tag_val.tag_id = tags.tag_id) WHERE tags.tag_name = %(tag)s AND taxonomy.name = %(taxonomy)s AND (STRPOS(tag_name, '::') = 0) GROUP BY tags.tag_name, tags.tag_description, tags.extras, taxonomy.name, taxonomy.description, taxonomy.description, taxonomy.url, taxonomy.is_actionable, taxonomy.is_automatically_classifiable, taxonomy.is_stable, taxonomy.for_numbers, taxonomy.for_domains, taxonomy.allows_auto_tags, taxonomy.allows_auto_values ; """ rows = execute_db(SQL, params, dict_=True) if len(rows) == 0: raise HTTPException( status_code=404, detail="no tag '%s' in taxonomy '%s' found" % (tag, taxonomy) ) elif len(rows) > 1: raise HTTPException(status_code=500, detail="more than 1 tag found") row, = rows ret = { 'tag': { 'name': row['tag_name'], 'description': row['tag_description'], 'category': row['tag_category'], 'extras': row['tag_extras'], }, 'taxonomy': { 'name': row['taxonomy_name'], 'description': row['taxonomy_description'], 'url': row['taxonomy_url'], 'flags': { 'is_actionable': row['taxonomy_flags_is_actionable'], 'is_automatically_classifiable': row['taxonomy_flags_is_automatically_classifiable'], 'is_stable': row['taxonomy_flags_is_actionable'], 'for_numbers': row['taxonomy_flags_for_numbers'], 'for_domains': row['taxonomy_flags_for_domains'], 'allows_auto_tags': row['taxonomy_flags_allows_auto_tags'], 'allows_auto_values': row['taxonomy_flags_allows_auto_values'] } }, 'values': { 'count': row["values_count"] } } return ret
def get_stats_bycategory( taxonomy: str, category: Optional[str] = None, at_time: datetime.datetime = None, filter: str = Query(None, regex=RE_FILTER), limit: int = config['default_limit'], offset: int = config['default_offset'] ): """ Returns stats on all tags that are within a taxonomy, optionally filtered by categories. A filter is a string of the form Tag=Value. The available tags and values can be fetched using the /api/v1/filters/types and /api/v1/filters/values endpoints, respectively. **GET Parameters:** | parameter | description | | ------------------ | -------------------------------------------------------------------------------------------------------------------- | | taxonomy | taxonomy to calculate stats for | | category | filter for this category. If not given all tags in taxonomy are returned. Set empty string for tags in root category | | at_time | point in time to query | | filter | filter domains by entries in the filter table before counting | | limit | limit answer to {limit} entries | | offset | start answer at the {offset}-th entry | **Output (JSON):** JSON list of objects, ordered by count, descending. Each object has the following keys: | key | description | | ------------------ | ------------------------------------------------------------------------------- | | tag_name | name of the tag | | count | number of domains labeled by this tag in taxonomy {taxonomy} | """ base_table, base_table_params = get_sql_base_table(at_time, filter) if category is not None: category_clause = """ ( ( -- tags with a category (STRPOS(tag_name, '::') != 0) AND ( (REGEXP_REPLACE(tag_name, '^(.+)::.+$', '\\1')) = %(category)s ) ) OR ( -- tags without a category (STRPOS(tag_name, '::') = 0) AND (%(category)s = '') ) ) """ else: category_clause = "TRUE" SQL = """ SELECT tag_name, COUNT(DISTINCT domain_id) AS count FROM %s AS tag_table -- base_table JOIN tags USING (tag_id) JOIN taxonomy ON (tags.taxonomy_id = taxonomy.id) WHERE (taxonomy.name = %%(taxonomy)s) AND (%s) -- category clause GROUP BY tag_id, tag_name ORDER BY count DESC """ % (base_table, category_clause) params = { 'taxonomy': taxonomy, 'category': category, 'at_time': at_time, 'limit': limit, 'offset': offset } params.update(base_table_params) rows = execute_db(SQL, params, dict_=True) return rows
def get_tags( taxonomy: str = None, category: str = None, limit: int = config['default_limit'], offset: int = config['default_offset'] ): """ Returns all known tags. **GET Parameters:** * limit .... how many entries should we return? * offset.... starting at {offset} **Output (JSON):** * tag_id... integer * tag_name ... name of the tag * tag_description ... long description * taxonomy_id ... id of the linked taxonomy * extras... JSON dict of potential extra information """ params = { 'taxonomy': taxonomy, 'category': category, 'limit': limit, 'offset': offset } if taxonomy is not None: taxonomy_where_clause = "(taxonomy.name = %(taxonomy)s)" else: taxonomy_where_clause = "True" if category is not None: if taxonomy is None: raise HTTPException( status_code=400, detail="querying category tags requires taxonomy to be set" ) if category == "": category_where_clause = "(STRPOS(tag_name, '::') = 0)" else: category_where_clause = ( "(STRPOS(tag_name, '::') != 0) " "AND (" "(REGEXP_REPLACE(tag_name,'^(.+)::.+$', '\\1')) = %(category)s" ")" ) else: category_where_clause = "True" SQL = """ SELECT tag_id,tag_name,tag_description,taxonomy_id,extras FROM tags JOIN taxonomy ON (tags.taxonomy_id = taxonomy.id) WHERE ( %s ) AND ( %s ) -- taxonomy_where_clause, category_where_clause ORDER BY tag_id ASC LIMIT %%(limit)s OFFSET %%(offset)s """ % (taxonomy_where_clause, category_where_clause) rows = execute_db(SQL, params, dict_=True) return rows