def post_apply(self, updated: t.Dict = {}): """ After the fetcher applies an update, check for matches to any of the signals in data_store_table and if found update their tags. """ table = dynamodb.Table(self.data_store_table) for update in updated.values(): row = update.as_csv_row() # example row format: ('<signal>', '<id>', '<time added>', '<tag1 tags2>') # e.g ('096a6f9...064f', 1234567891234567, '2020-07-31T18:47:45+0000', 'true_positive hma_test') if PDQSignalMetadata( signal_id=int(row[1]), ds_id=str(self.privacy_group), updated_at=datetime.now(), signal_source=S3ThreatDataConfig.SOURCE_STR, signal_hash=row[ 0], # note: not used by update_tags_in_table_if_exists tags=row[3].split(" ") if row[3] else [], ).update_tags_in_table_if_exists(table): logger.info( "Updated Signal Tags in DB for signal id: %s source: %s for privacy group: %d", row[1], S3ThreatDataConfig.SOURCE_STR, self.privacy_group, )
def post_apply(self, updated: t.Dict = {}): """ After the fetcher applies an update, check for matches to any of the signals in data_store_table and if found update their tags. TODO: Additionally, if writebacks are enabled for this privacy group write back INGESTED to ThreatExchange """ table = dynamodb.Table(self.data_store_table) for update in updated.values(): row: t.List[str] = update.as_csv_row() # example row format: ('<raw_indicator>', '<indicator-id>', '<descriptor-id>', '<time added>', '<space-separated-tags>') # e.g (10736405276340','096a6f9...064f', '1234567890', '2020-07-31T18:47:45+0000', 'true_positive hma_test') new_tags = row[4].split(" ") if row[4] else [] metadata = PDQSignalMetadata.get_from_signal_and_ds_id( table, int(row[1]), S3ThreatDataConfig.SOURCE_STR, str(self.privacy_group), ) if metadata: new_pending_opinion_change = self.get_new_pending_opinion_change( metadata, new_tags) else: # If this is a new indicator without metadata there is nothing for us to update return metadata = PDQSignalMetadata( signal_id=row[1], ds_id=str(self.privacy_group), updated_at=datetime.now(), signal_source=S3ThreatDataConfig.SOURCE_STR, signal_hash=row[ 0], # note: not used by update_tags_in_table_if_exists tags=new_tags, pending_opinion_change=new_pending_opinion_change, ) # TODO: Combine 2 update functions into single function if metadata.update_tags_in_table_if_exists(table): logger.info( "Updated Signal Tags in DB for indicator id: %s source: %s for privacy group: %d", row[1], S3ThreatDataConfig.SOURCE_STR, self.privacy_group, ) if metadata.update_pending_opinion_change_in_table_if_exists( table): logger.info( "Updated Pending Opinion in DB for indicator id: %s source: %s for privacy group: %d", row[1], S3ThreatDataConfig.SOURCE_STR, self.privacy_group, )
def extract_orgs(domains: typing.Dict) -> typing.Dict: organizations = {} for doc in domains.values(): slug = doc['organization_slug'] organization = organizations.setdefault(slug, { "name_en": doc['organization_name_en'], "name_fr": doc['organization_name_fr'], "slug": slug, "total_domains": 0, }) organization["total_domains"] += 1 return organizations
def compute_idcg(gain_map: typing.Dict, cutoff: int) -> float: """ Method to compute Ideal Discounted Cumulative Gain :param gain_map: :param cutoff: :return: """ gains: typing.List = sorted(list(gain_map.values())) n: int = min(len(gains), cutoff) m: int = len(gains) return sum( map(lambda g, r: gains[m - r - 1] * compute_dicount(r), gains, range(n)))
def fill_table_fields(self, fields_dict: typing.Dict) -> typing.NoReturn: fields_list = list(fields_dict.keys()) types_list = list(fields_dict.values()) if len(types_list) != len(fields_list): raise exception.DifferentCount() self.types = types_list self.fields = fields_list for index, type_name in enumerate(self.types): if self.__check_type_name(type_name): self.types[index] = self.types_dict[type_name[:3]] else: raise exception.TypeNotExists(type_name) self.fields_count = len(self.fields)
def set_alias(datasets: typing.Dict): """ see esmvalcore._recipe.Recipe.set_alias method. """ datasets_info = set() def _key_str(obj): if isinstance(obj, str): return obj try: return '-'.join(obj) except TypeError: return str(obj) for variables in datasets.values(): for dataset in variables: alias = tuple( _key_str(dataset.get(key, None)) for key in INFO_KEYS) datasets_info.add(alias) if 'alias' not in dataset: dataset['alias'] = alias alias = dict() for info in datasets_info: alias[info] = [] datasets_info = list(datasets_info) _get_next_alias(alias, datasets_info, 0) for info in datasets_info: alias[info] = '_'.join( [str(value) for value in alias[info] if value is not None]) if not alias[info]: alias[info] = info[INFO_KEYS.index('dataset')] for variable in datasets.values(): for dataset in variable: dataset['alias'] = alias.get(dataset['alias'], dataset['alias'])
def extract_orgs(domain_map: typing.Dict) -> typing.Dict: organization_map: typing.Dict = {} for doc in domain_map.values(): slug = doc['organization_slug'] if slug in organization_map: organization_map[slug]['total_domains'] += 1 else: organization_map[slug] = { "name_en": doc['organization_name_en'], "name_fr": doc['organization_name_fr'], "slug": slug, "total_domains": 1, } return organization_map
def alerts_to_speech_output(alerts: typing.Dict) -> typing.AnyStr: """ Checks whether the alert dictionary contains any entries. Returns a string that contains all alerts or a message that city services are operating normally. :param alerts: pruned alert dictionary :return: a string containing all alerts, or if no alerts are found, a message indicating there are no alerts at this time """ logger.debug('alerts: ' + str(alerts)) all_alerts = "" if Services.ALERT_HEADER.value in all_alerts: all_alerts += alerts.pop(Services.ALERT_HEADER.value) for alert in alerts.values(): all_alerts += alert + ' ' if all_alerts.strip( ) == "": # this is a kludgy fix for the {'alert header': ''} bug return constants.NO_ALERTS else: return all_alerts.rstrip()
def v(self): return List(*Dict.values(self))
def values_view(self): return Dict.values(self)
def from_schema(schema_dict: typing.Dict) -> "AirtableSchema": return AirtableSchema(tables=[ Airtable.from_schema(table) for table in schema_dict.values() ])
def v(self) -> List[B]: return Lists.wrap(Dict.values(self))
def values_view(self): return Dict.values(self)