def create_privacy_group_if_not_exists( privacy_group_id: str, privacy_group_name: str, description: str = "", in_use: bool = True, fetcher_active: bool = FETCHER_ACTIVE_DEFAULT, matcher_active: bool = MATCHER_ACTIVE_DEFAULT, write_back: bool = WRITE_BACK_DEFAULT, ): logger.info("Adding collaboration name %s", privacy_group_name) config = ThreatExchangeConfig( privacy_group_id, fetcher_active=fetcher_active, privacy_group_name=privacy_group_name, in_use=in_use, description=description, matcher_active=matcher_active, write_back=write_back, ) try: hmaconfig.create_config(config) except ClientError as e: if e.response["Error"]["Code"] == "ConditionalCheckFailedException": logger.warning( "Can't insert duplicated config, %s", e.response["Error"]["Message"], ) if description: update_privacy_group_description(privacy_group_id, description) else: raise
def get_privacy_group_matcher_active(privacy_group_id: str, _) -> bool: config = ThreatExchangeConfig.get(privacy_group_id) if not config: logger.warning("Privacy group %s is not found!", privacy_group_id) return False logger.info("matcher_active for %s is %s", privacy_group_id, config.matcher_active) return config.matcher_active
def delete_dataset(key=None) -> DeleteDatasetResponse: """ Delete the dataset with key=<key> """ config = ThreatExchangeConfig.getx(str(key)) hmaconfig.delete_config(config) return DeleteDatasetResponse(response="The privacy group is deleted")
def update_dataset(request: UpdateDatasetRequest) -> Dataset: """ Update dataset values: fetcher_active, write_back, and matcher_active. """ config = ThreatExchangeConfig.getx(str(request.privacy_group_id)) config.fetcher_active = request.fetcher_active config.write_back = request.write_back config.matcher_active = request.matcher_active updated_config = hmaconfig.update_config(config).__dict__ updated_config["privacy_group_id"] = updated_config["name"] additional_config = AdditionalMatchSettingsConfig.get( str(request.privacy_group_id)) if request.pdq_match_threshold: if additional_config: additional_config.pdq_match_threshold = int( request.pdq_match_threshold) hmaconfig.update_config(additional_config) else: additional_config = AdditionalMatchSettingsConfig( str(request.privacy_group_id), int(request.pdq_match_threshold)) hmaconfig.create_config(additional_config) elif additional_config: # pdq_match_threshold was set and now should be removed hmaconfig.delete_config(additional_config) return Dataset.from_dict(updated_config)
def writeback_is_enabled(self, writeback_signal: BankedSignal) -> bool: privacy_group_id = writeback_signal.bank_id privacy_group_config = ThreatExchangeConfig.cached_get( privacy_group_id) if isinstance(privacy_group_config, ThreatExchangeConfig): return privacy_group_config.write_back # If no config, dont write back logger.warn("No config found for privacy group " + str(privacy_group_id)) return False
def _get_all_matcher_active_privacy_groups(cache_buster) -> t.List[str]: configs = ThreatExchangeConfig.get_all() return list( map( lambda c: c.name, filter( lambda c: c.matcher_active, configs, ), ))
def update_dataset(request: UpdateDatasetRequest) -> Dataset: """ Update dataset values: fetcher_active, write_back, and matcher_active. """ config = ThreatExchangeConfig.getx(str(request.privacy_group_id)) config.fetcher_active = request.fetcher_active config.write_back = request.write_back config.matcher_active = request.matcher_active updated_config = hmaconfig.update_config(config).__dict__ updated_config["privacy_group_id"] = updated_config["name"] return Dataset.from_dict(updated_config)
def _create_privacy_groups(self): # Since we already have a mock_dynamodb2 courtesy BanksTableTestBase, # re-use it for initing configs. Requires some clever hot-wiring. config_test_mock = config_test.ConfigTest() config_test_mock.mock_dynamodb2 = self.__class__.mock_dynamodb2 config_test_mock.create_mocked_table() HMAConfig.initialize(config_test_mock.TABLE_NAME) # Hot wiring ends... self.active_pg = ThreatExchangeConfig( "ACTIVE_PG", True, "", True, True, True, "ACTIVE_PG" ) create_config(self.active_pg) # Active PG has a distance threshold of 31. create_config(AdditionalMatchSettingsConfig("ACTIVE_PG", 31)) self.inactive_pg = ThreatExchangeConfig( "INACTIVE_PG", True, "", True, True, False, "INACTIVE_PG" ) create_config(self.inactive_pg)
def _get_threat_exchange_datasets( table: Table, threat_exchange_data_bucket_name: str, threat_exchange_data_folder: str, ) -> t.List[ThreatExchangeDatasetSummary]: collaborations = ThreatExchangeConfig.get_all() hash_counts: t.Dict[str, t.Tuple[ int, str]] = _get_signal_hash_count_and_last_modified( threat_exchange_data_bucket_name, threat_exchange_data_folder, ) summaries = [] for collab in collaborations: if additional_config := AdditionalMatchSettingsConfig.get( str(collab.privacy_group_id)): pdq_match_threshold = str(additional_config.pdq_match_threshold) else: pdq_match_threshold = "" summaries.append( ThreatExchangeDatasetSummary( collab.privacy_group_id, collab.privacy_group_name, collab.description, collab.fetcher_active, collab.matcher_active, collab.write_back, collab.in_use, hash_count=t.cast( int, hash_counts.get( collab.privacy_group_id, [-1, ""], )[0], ), match_count=-1, # fix will be based on new count system pdq_match_threshold=pdq_match_threshold, ))
def _get_threat_exchange_datasets( table: Table, threat_exchange_data_bucket_name: str, threat_exchange_data_folder: str, threat_exchange_pdq_file_extension: str, ) -> t.List[ThreatExchangeDatasetSummary]: collaborations = ThreatExchangeConfig.get_all() hash_counts: t.Dict[str, t.Tuple[ int, str]] = _get_signal_hash_count_and_last_modified( threat_exchange_data_bucket_name, threat_exchange_data_folder, threat_exchange_pdq_file_extension, ) match_counts: t.Dict[str, int] = MatchByPrivacyGroupCounter.get_all_counts( table) return [ ThreatExchangeDatasetSummary( collab.privacy_group_id, collab.privacy_group_name, collab.description, collab.fetcher_active, collab.matcher_active, collab.write_back, collab.in_use, hash_count=t.cast( int, hash_counts.get( f"{threat_exchange_data_folder}{collab.privacy_group_id}{threat_exchange_pdq_file_extension}", [0, ""], )[0], ), match_count=match_counts.get(collab.privacy_group_id, 0), ) for collab in collaborations ]
def load_defaults(_args): """ Load a hardcoded set of defaults which are useful in testing """ # Could also put the default on the class, but seems too fancy configs = [ ThreatExchangeConfig( name="303636684709969", fetcher_active=True, privacy_group_name="Test Config 1", write_back=True, in_use=True, description="test description", matcher_active=True, ), ThreatExchangeConfig( name="258601789084078", fetcher_active=True, privacy_group_name="Test Config 2", write_back=True, in_use=True, description="test description", matcher_active=True, ), WebhookPostActionPerformer( name="EnqueueForReview", url="https://webhook.site/ff7ebc37-514a-439e-9a03-46f86989e195", headers='{"Connection":"keep-alive"}', # monitoring page: # https://webhook.site/#!/ff7ebc37-514a-439e-9a03-46f86989e195 ), WebhookPostActionPerformer( name="EnqueueMiniCastleForReview", url="https://webhook.site/01cef721-bdcc-4681-8430-679c75659867", headers='{"Connection":"keep-alive"}', # monitoring page: # https://webhook.site/#!/01cef721-bdcc-4681-8430-679c75659867 ), WebhookPostActionPerformer( name="EnqueueSailboatForReview", url="https://webhook.site/fa5c5ad5-f5cc-4692-bf03-a03a4ae3f714", headers='{"Connection":"keep-alive"}', # monitoring page: # https://webhook.site/#!/fa5c5ad5-f5cc-4692-bf03-a03a4ae3f714 ), ActionRule( name="Enqueue Mini-Castle for Review", action_label=ActionLabel("EnqueueMiniCastleForReview"), must_have_labels=set([ BankIDClassificationLabel("303636684709969"), ClassificationLabel("true_positive"), ]), must_not_have_labels=set( [BankedContentIDClassificationLabel("3364504410306721")]), ), ActionRule( name="Enqueue Sailboat for Review", action_label=ActionLabel("EnqueueSailboatForReview"), must_have_labels=set([ BankIDClassificationLabel("303636684709969"), ClassificationLabel("true_positive"), BankedContentIDClassificationLabel("3364504410306721"), ]), must_not_have_labels=set(), ), ] for config in configs: # Someday maybe can do filtering or something, I dunno # Add try catch block to avoid test failure try: hmaconfig.create_config(config) except ClientError as e: if e.response["Error"][ "Code"] == "ConditionalCheckFailedException": print( "Can't insert duplicated config, " + e.response["Error"]["Message"], ) else: raise print(config)
class WritebackerTestCase(unittest.TestCase): banked_signals = [ BankedSignal("2862392437204724", "pg 4", "te"), BankedSignal("4194946153908639", "pg 4", "te"), BankedSignal("3027465034605137", "pg 3", "te"), BankedSignal("evil.jpg", "bank 4", "non-te-source"), ] match_message = MatchMessage("key", "hash", banked_signals) # Writebacks are enabled for the trustworth privacy group not for # the untrustworthy one configs = [ ThreatExchangeConfig("pg 4", True, "Trustworthy PG", "test description", True, True, True), ThreatExchangeConfig("pg 3", True, "UnTrustworthy PG", "test description", True, False, True), ] for config in configs: hmaconfig.mock_create_config(config) def test_saw_this_too(self): os.environ["MOCK_TE_API"] = "True" os.environ["CONFIG_TABLE_NAME"] = "test-HMAConfig" writeback = WritebackTypes.SawThisToo writeback_message = WritebackMessage.from_match_message_and_type( self.match_message, writeback) event = {"Records": [{"body": writeback_message.to_aws_json()}]} result = lambda_handler(event, None) assert result == { "writebacks_performed": { "te": [ "Reacted SAW_THIS_TOO to descriptor a2|2862392437204724\nReacted SAW_THIS_TOO to descriptor a3|2862392437204724", "Reacted SAW_THIS_TOO to descriptor a2|4194946153908639\nReacted SAW_THIS_TOO to descriptor a3|4194946153908639", "No writeback performed for banked content id 3027465034605137 becuase writebacks were disabled", ] } } os.environ["MOCK_TE_API"] = "False" def test_false_positive(self): os.environ["MOCK_TE_API"] = "True" os.environ["CONFIG_TABLE_NAME"] = "test-HMAConfig" writeback = WritebackTypes.FalsePositive writeback_message = WritebackMessage.from_match_message_and_type( self.match_message, writeback) event = {"Records": [{"body": writeback_message.to_aws_json()}]} result = lambda_handler(event, None) assert result == { "writebacks_performed": { "te": [ "Reacted DISAGREE_WITH_TAGS to descriptor a2|2862392437204724\nReacted DISAGREE_WITH_TAGS to descriptor a3|2862392437204724", "Reacted DISAGREE_WITH_TAGS to descriptor a2|4194946153908639\nReacted DISAGREE_WITH_TAGS to descriptor a3|4194946153908639", "No writeback performed for banked content id 3027465034605137 becuase writebacks were disabled", ] } } os.environ["MOCK_TE_API"] = "False" def test_true_positve(self): os.environ["MOCK_TE_API"] = "True" os.environ["CONFIG_TABLE_NAME"] = "test-HMAConfig" writeback = WritebackTypes.TruePositive writeback_message = WritebackMessage.from_match_message_and_type( self.match_message, writeback) event = {"Records": [{"body": writeback_message.to_aws_json()}]} result = lambda_handler(event, None) assert result == { "writebacks_performed": { "te": [ "Wrote back TruePositive for indicator 2862392437204724\nBuilt descriptor a1|2862392437204724 with privacy groups pg 4", "Wrote back TruePositive for indicator 4194946153908639\nBuilt descriptor a1|4194946153908639 with privacy groups pg 4", "No writeback performed for banked content id 3027465034605137 becuase writebacks were disabled", ] } } os.environ["MOCK_TE_API"] = "False" def test_remove_opinion(self): os.environ["MOCK_TE_API"] = "True" os.environ["CONFIG_TABLE_NAME"] = "test-HMAConfig" writeback = WritebackTypes.RemoveOpinion writeback_message = WritebackMessage.from_match_message_and_type( self.match_message, writeback) event = {"Records": [{"body": writeback_message.to_aws_json()}]} result = lambda_handler(event, None) assert result == { "writebacks_performed": { "te": [ "\n".join(( "Deleted decriptor a1|2862392437204724 for indicator 2862392437204724", "Removed reaction DISAGREE_WITH_TAGS from descriptor a2|2862392437204724", "Removed reaction DISAGREE_WITH_TAGS from descriptor a3|2862392437204724", )), "\n".join(( "Deleted decriptor a1|4194946153908639 for indicator 4194946153908639", "Removed reaction DISAGREE_WITH_TAGS from descriptor a2|4194946153908639", "Removed reaction DISAGREE_WITH_TAGS from descriptor a3|4194946153908639", )), "No writeback performed for banked content id 3027465034605137 becuase writebacks were disabled", ] } } os.environ["MOCK_TE_API"] = "False"
def lambda_handler(event, context): lambda_init_once() config = FetcherConfig.get() collabs = ThreatExchangeConfig.get_all() now = datetime.now() current_time = now.strftime("%H:%M:%S") names = [collab.privacy_group_name for collab in collabs[:5]] if len(names) < len(collabs): names[-1] = "..." data = f"Triggered at time {current_time}, found {len(collabs)} collabs: {', '.join(names)}" logger.info(data) api_key = AWSSecrets().te_api_key() api = ThreatExchangeAPI(api_key) for collab in collabs: logger.info( "Processing updates for collaboration %s", collab.privacy_group_name ) if not is_int(collab.privacy_group_id): logger.info( f"Fetch skipped because privacy_group_id({collab.privacy_group_id}) is not an int" ) continue indicator_store = ThreatUpdateS3Store( int(collab.privacy_group_id), api.app_id, s3_client=get_s3_client(), s3_bucket_name=config.s3_bucket, s3_te_data_folder=config.s3_te_data_folder, data_store_table=config.data_store_table, supported_signal_types=[VideoMD5Signal, PdqSignal], ) indicator_store.load_checkpoint() if indicator_store.stale: logger.warning( "Store for %s - %d stale! Resetting.", collab.privacy_group_name, int(collab.privacy_group_id), ) indicator_store.reset() if indicator_store.fetch_checkpoint >= now.timestamp(): continue delta = indicator_store.next_delta try: delta.incremental_sync_from_threatexchange( api, ) except: # Don't need to call .exception() here because we're just re-raising logger.error("Exception occurred! Attempting to save...") # Force delta to show finished delta.end = delta.current raise finally: if delta: logging.info("Fetch complete, applying %d updates", len(delta.updates)) indicator_store.apply_updates( delta, post_apply_fn=indicator_store.post_apply ) else: logging.error("Failed before fetching any records")
def update_privacy_groups_in_use(priavcy_group_id_in_use: set) -> None: collabs = ThreatExchangeConfig.get_all() for collab in collabs: if str(collab.privacy_group_id) not in priavcy_group_id_in_use: collab.in_use = False hmaconfig.update_config(collab)
def update_privacy_group_description(privacy_group_id: str, description: str) -> None: config = ThreatExchangeConfig.getx(privacy_group_id) config.description = description hmaconfig.update_config(config)
def lambda_handler(_event, _context): """ Run through threatexchange privacy groups and fetch updates to them. If this is the first time for a privacy group, will fetch from the start, else only updates since the last time. Note: since this is a scheduled job, we swallow all exceptions. We only log exceptions and move on. """ lambda_init_once() config = FetcherConfig.get() collabs = ThreatExchangeConfig.get_all() now = datetime.now() current_time = now.strftime("%H:%M:%S") names = [collab.privacy_group_name for collab in collabs[:5]] if len(names) < len(collabs): names[-1] = "..." data = f"Triggered at time {current_time}, found {len(collabs)} collabs: {', '.join(names)}" logger.info(data) api_token = AWSSecrets().te_api_token() api = ThreatExchangeAPI(api_token) for collab in collabs: logger.info( "Processing updates for collaboration %s", collab.privacy_group_name ) if not is_int(collab.privacy_group_id): logger.info( f"Fetch skipped because privacy_group_id({collab.privacy_group_id}) is not an int" ) continue if not collab.fetcher_active: logger.info( f"Fetch skipped because configs has `fetcher_active` set to false for privacy_group_id({collab.privacy_group_id})" ) continue indicator_store = ThreatUpdateS3Store( int(collab.privacy_group_id), api.app_id, s3_client=get_s3_client(), s3_bucket_name=config.s3_bucket, s3_te_data_folder=config.s3_te_data_folder, data_store_table=config.data_store_table, supported_signal_types=[VideoMD5Signal, PdqSignal], ) try: indicator_store.load_checkpoint() if indicator_store.stale: logger.warning( "Store for %s - %d stale! Resetting.", collab.privacy_group_name, int(collab.privacy_group_id), ) indicator_store.reset() if indicator_store.fetch_checkpoint >= now.timestamp(): continue delta = indicator_store.next_delta delta.incremental_sync_from_threatexchange( api, limit=MAX_DESCRIPTORS_UPDATED, progress_fn=ProgressLogger() ) except Exception: # pylint: disable=broad-except logger.exception( "Encountered exception while getting updates. Will attempt saving.." ) # Force delta to show finished delta.end = delta.current finally: if delta: logging.info("Fetch complete, applying %d updates", len(delta.updates)) indicator_store.apply_updates( delta, post_apply_fn=indicator_store.post_apply ) else: logging.error("Failed before fetching any records")