def _get_threat_exchange_datasets( table: Table, threat_exchange_data_bucket_name: str, threat_exchange_data_folder: str, threat_exchange_pdq_file_extension: str, ) -> t.List[ThreatExchangeDatasetSummary]: collaborations = ThreatExchangeConfig.get_all() hash_counts: t.Dict[ str, t.Tuple[int, str] ] = _get_signal_hash_count_and_last_modified( threat_exchange_data_bucket_name, threat_exchange_data_folder, threat_exchange_pdq_file_extension, ) match_counts: t.Dict[str, int] = MatchByPrivacyGroupCounter.get_all_counts(table) return [ ThreatExchangeDatasetSummary( collab.privacy_group_id, collab.privacy_group_name, collab.description, collab.fetcher_active, collab.matcher_active, collab.write_back, collab.in_use, hash_count=t.cast( int, hash_counts.get( f"{threat_exchange_data_folder}{collab.privacy_group_id}{threat_exchange_pdq_file_extension}", [0, ""], )[0], ), match_count=match_counts.get(collab.privacy_group_id, 0), ) for collab in collaborations ]
def update_privacy_groups_in_use(priavcy_group_id_in_use: set) -> None: collabs = ThreatExchangeConfig.get_all() for collab in collabs: if str(collab.privacy_group_id) not in priavcy_group_id_in_use: collab.in_use = False hmaconfig.update_config(collab)
def lambda_handler(event, context): lambda_init_once() config = FetcherConfig.get() collabs = ThreatExchangeConfig.get_all() now = datetime.now() current_time = now.strftime("%H:%M:%S") names = [collab.privacy_group_name for collab in collabs[:5]] if len(names) < len(collabs): names[-1] = "..." data = f"Triggered at time {current_time}, found {len(collabs)} collabs: {', '.join(names)}" logger.info(data) api_key = AWSSecrets().te_api_key() api = ThreatExchangeAPI(api_key) te_data_bucket = s3.Bucket(config.s3_bucket) stores = [] for collab in collabs: logger.info("Processing updates for collaboration %s", collab.privacy_group_name) if not is_int(collab.privacy_group_id): logger.info( f"Fetch skipped because privacy_group_id({collab.privacy_group_id}) is not an int" ) continue indicator_store = ThreatUpdateS3PDQStore( int(collab.privacy_group_id), api.app_id, te_data_bucket, config.s3_te_data_folder, config.data_store_table, ) stores.append(indicator_store) indicator_store.load_checkpoint() if indicator_store.stale: logger.warning( "Store for %s - %d stale! Resetting.", collab.privacy_group_name, int(collab.privacy_group_id), ) indicator_store.reset() if indicator_store.fetch_checkpoint >= now.timestamp(): continue delta = indicator_store.next_delta try: delta.incremental_sync_from_threatexchange(api, ) except: # Don't need to call .exception() here because we're just re-raising logger.error("Exception occurred! Attempting to save...") # Force delta to show finished delta.end = delta.current raise finally: if delta: logging.info("Fetch complete, applying %d updates", len(delta.updates)) indicator_store.apply_updates( delta, post_apply_fn=indicator_store.post_apply) else: logging.error("Failed before fetching any records")