Пример #1
0
 def delete_dataset(key=None) -> DeleteDatasetResponse:
     """
     Delete dataset
     """
     config = ThreatExchangeConfig.getx(str(key))
     hmaconfig.delete_config(config)
     return DeleteDatasetResponse(response="The privacy group is deleted")
Пример #2
0
def create_privacy_group_if_not_exists(
    privacy_group_id: str,
    privacy_group_name: str,
    description: str = "",
    in_use: bool = True,
    fetcher_active: bool = FETCHER_ACTIVE_DEFAULT,
    matcher_active: bool = MATCHER_ACTIVE_DEFAULT,
    write_back: bool = WRITE_BACK_DEFAULT,
):
    logger.info("Adding collaboration name %s", privacy_group_name)
    config = ThreatExchangeConfig(
        privacy_group_id,
        fetcher_active=fetcher_active,
        privacy_group_name=privacy_group_name,
        in_use=in_use,
        description=description,
        matcher_active=matcher_active,
        write_back=write_back,
    )
    try:
        hmaconfig.create_config(config)
    except ClientError as e:
        if e.response["Error"]["Code"] == "ConditionalCheckFailedException":
            logger.warning(
                "Can't insert duplicated config, %s",
                e.response["Error"]["Message"],
            )
            if description:
                update_privacy_group_description(privacy_group_id, description)
        else:
            raise
Пример #3
0
def get_privacy_group_matcher_active(privacy_group_id: str, _) -> bool:
    config = ThreatExchangeConfig.get(privacy_group_id)
    if not config:
        logger.warning("Privacy group %s is not found!", privacy_group_id)
        return False
    logger.info("matcher_active for %s is %s", privacy_group_id,
                config.matcher_active)
    return config.matcher_active
Пример #4
0
 def update_dataset(request: UpdateDatasetRequest) -> Dataset:
     """
     Update dataset fetcher_active, write_back and matcher_active
     """
     config = ThreatExchangeConfig.getx(str(request.privacy_group_id))
     config.fetcher_active = request.fetcher_active
     config.write_back = request.write_back
     config.matcher_active = request.matcher_active
     updated_config = hmaconfig.update_config(config).__dict__
     updated_config["privacy_group_id"] = updated_config["name"]
     return Dataset.from_dict(updated_config)
Пример #5
0
def _get_threat_exchange_datasets(
    table: Table,
    threat_exchange_data_bucket_name: str,
    threat_exchange_data_folder: str,
    threat_exchange_pdq_file_extension: str,
) -> t.List[ThreatExchangeDatasetSummary]:
    collaborations = ThreatExchangeConfig.get_all()
    hash_counts: t.Dict[
        str, t.Tuple[int, str]
    ] = _get_signal_hash_count_and_last_modified(
        threat_exchange_data_bucket_name,
        threat_exchange_data_folder,
        threat_exchange_pdq_file_extension,
    )

    match_counts: t.Dict[str, int] = MatchByPrivacyGroupCounter.get_all_counts(table)

    return [
        ThreatExchangeDatasetSummary(
            collab.privacy_group_id,
            collab.privacy_group_name,
            collab.description,
            collab.fetcher_active,
            collab.matcher_active,
            collab.write_back,
            collab.in_use,
            hash_count=t.cast(
                int,
                hash_counts.get(
                    f"{threat_exchange_data_folder}{collab.privacy_group_id}{threat_exchange_pdq_file_extension}",
                    [0, ""],
                )[0],
            ),
            match_count=match_counts.get(collab.privacy_group_id, 0),
        )
        for collab in collaborations
    ]
Пример #6
0
def update_privacy_groups_in_use(priavcy_group_id_in_use: set) -> None:
    collabs = ThreatExchangeConfig.get_all()
    for collab in collabs:
        if str(collab.privacy_group_id) not in priavcy_group_id_in_use:
            collab.in_use = False
            hmaconfig.update_config(collab)
Пример #7
0
def update_privacy_group_description(privacy_group_id: str,
                                     description: str) -> None:
    config = ThreatExchangeConfig.getx(privacy_group_id)
    config.description = description
    hmaconfig.update_config(config)
Пример #8
0
class WritebackerTestCase(unittest.TestCase):

    banked_signals = [
        BankedSignal("2862392437204724", "pg 4", "te"),
        BankedSignal("4194946153908639", "pg 4", "te"),
        BankedSignal("3027465034605137", "pg 3", "te"),
        BankedSignal("evil.jpg", "bank 4", "non-te-source"),
    ]

    match_message = MatchMessage("key", "hash", banked_signals)

    # Writebacks are enabled for the trustworth privacy group not for
    # the untrustworthy one
    configs = [
        ThreatExchangeConfig(
            "pg 4", True, "Trustworthy PG", "test description", True, True, True
        ),
        ThreatExchangeConfig(
            "pg 3", True, "UnTrustworthy PG", "test description", True, False, True
        ),
    ]

    for config in configs:
        hmaconfig.mock_create_config(config)

    def test_saw_this_too(self):
        os.environ["MOCK_TE_API"] = "True"
        os.environ["CONFIG_TABLE_NAME"] = "test-HMAConfig"

        writeback = WritebackTypes.SawThisToo
        writeback_message = WritebackMessage.from_match_message_and_type(
            self.match_message, writeback
        )
        event = {"Records": [{"body": writeback_message.to_aws_json()}]}

        result = lambda_handler(event, None)
        assert result == {
            "writebacks_performed": {
                "te": [
                    "Reacted SAW_THIS_TOO to descriptor a2|2862392437204724\nReacted SAW_THIS_TOO to descriptor a3|2862392437204724",
                    "Reacted SAW_THIS_TOO to descriptor a2|4194946153908639\nReacted SAW_THIS_TOO to descriptor a3|4194946153908639",
                    "No writeback performed for banked content id 3027465034605137 becuase writebacks were disabled",
                ]
            }
        }

        os.environ["MOCK_TE_API"] = "False"

    def test_false_positive(self):
        os.environ["MOCK_TE_API"] = "True"
        os.environ["CONFIG_TABLE_NAME"] = "test-HMAConfig"

        writeback = WritebackTypes.FalsePositive
        writeback_message = WritebackMessage.from_match_message_and_type(
            self.match_message, writeback
        )
        event = {"Records": [{"body": writeback_message.to_aws_json()}]}

        result = lambda_handler(event, None)
        assert result == {
            "writebacks_performed": {
                "te": [
                    "Reacted DISAGREE_WITH_TAGS to descriptor a2|2862392437204724\nReacted DISAGREE_WITH_TAGS to descriptor a3|2862392437204724",
                    "Reacted DISAGREE_WITH_TAGS to descriptor a2|4194946153908639\nReacted DISAGREE_WITH_TAGS to descriptor a3|4194946153908639",
                    "No writeback performed for banked content id 3027465034605137 becuase writebacks were disabled",
                ]
            }
        }

        os.environ["MOCK_TE_API"] = "False"

    def test_true_positve(self):
        os.environ["MOCK_TE_API"] = "True"
        os.environ["CONFIG_TABLE_NAME"] = "test-HMAConfig"

        writeback = WritebackTypes.TruePositive
        writeback_message = WritebackMessage.from_match_message_and_type(
            self.match_message, writeback
        )
        event = {"Records": [{"body": writeback_message.to_aws_json()}]}

        result = lambda_handler(event, None)
        assert result == {
            "writebacks_performed": {
                "te": [
                    "Wrote back TruePositive for indicator 2862392437204724\nBuilt descriptor a1|2862392437204724 with privacy groups pg 4",
                    "Wrote back TruePositive for indicator 4194946153908639\nBuilt descriptor a1|4194946153908639 with privacy groups pg 4",
                    "No writeback performed for banked content id 3027465034605137 becuase writebacks were disabled",
                ]
            }
        }

        os.environ["MOCK_TE_API"] = "False"

    def test_remove_opinion(self):
        os.environ["MOCK_TE_API"] = "True"
        os.environ["CONFIG_TABLE_NAME"] = "test-HMAConfig"

        writeback = WritebackTypes.RemoveOpinion
        writeback_message = WritebackMessage.from_match_message_and_type(
            self.match_message, writeback
        )
        event = {"Records": [{"body": writeback_message.to_aws_json()}]}

        result = lambda_handler(event, None)
        assert result == {
            "writebacks_performed": {
                "te": [
                    "\n".join(
                        (
                            "Deleted decriptor a1|2862392437204724 for indicator 2862392437204724",
                            "Removed reaction DISAGREE_WITH_TAGS from descriptor a2|2862392437204724",
                            "Removed reaction DISAGREE_WITH_TAGS from descriptor a3|2862392437204724",
                        )
                    ),
                    "\n".join(
                        (
                            "Deleted decriptor a1|4194946153908639 for indicator 4194946153908639",
                            "Removed reaction DISAGREE_WITH_TAGS from descriptor a2|4194946153908639",
                            "Removed reaction DISAGREE_WITH_TAGS from descriptor a3|4194946153908639",
                        )
                    ),
                    "No writeback performed for banked content id 3027465034605137 becuase writebacks were disabled",
                ]
            }
        }

        os.environ["MOCK_TE_API"] = "False"
Пример #9
0
def lambda_handler(event, context):
    lambda_init_once()
    config = FetcherConfig.get()
    collabs = ThreatExchangeConfig.get_all()

    now = datetime.now()
    current_time = now.strftime("%H:%M:%S")

    names = [collab.privacy_group_name for collab in collabs[:5]]
    if len(names) < len(collabs):
        names[-1] = "..."

    data = f"Triggered at time {current_time}, found {len(collabs)} collabs: {', '.join(names)}"
    logger.info(data)

    api_key = AWSSecrets().te_api_key()
    api = ThreatExchangeAPI(api_key)

    te_data_bucket = s3.Bucket(config.s3_bucket)

    stores = []
    for collab in collabs:
        logger.info("Processing updates for collaboration %s",
                    collab.privacy_group_name)

        if not is_int(collab.privacy_group_id):
            logger.info(
                f"Fetch skipped because privacy_group_id({collab.privacy_group_id}) is not an int"
            )
            continue

        indicator_store = ThreatUpdateS3PDQStore(
            int(collab.privacy_group_id),
            api.app_id,
            te_data_bucket,
            config.s3_te_data_folder,
            config.data_store_table,
        )
        stores.append(indicator_store)
        indicator_store.load_checkpoint()
        if indicator_store.stale:
            logger.warning(
                "Store for %s - %d stale! Resetting.",
                collab.privacy_group_name,
                int(collab.privacy_group_id),
            )
            indicator_store.reset()

        if indicator_store.fetch_checkpoint >= now.timestamp():
            continue

        delta = indicator_store.next_delta

        try:
            delta.incremental_sync_from_threatexchange(api, )
        except:
            # Don't need to call .exception() here because we're just re-raising
            logger.error("Exception occurred! Attempting to save...")
            # Force delta to show finished
            delta.end = delta.current
            raise
        finally:
            if delta:
                logging.info("Fetch complete, applying %d updates",
                             len(delta.updates))
                indicator_store.apply_updates(
                    delta, post_apply_fn=indicator_store.post_apply)
            else:
                logging.error("Failed before fetching any records")
Пример #10
0
def load_defaults(_args):
    """
    Load a hardcoded set of defaults which are useful in testing
    """

    # Could also put the default on the class, but seems too fancy

    configs = [
        ThreatExchangeConfig(
            name="303636684709969",
            fetcher_active=True,
            privacy_group_name="Test Config 1",
            write_back=True,
            in_use=True,
            description="test description",
            matcher_active=True,
        ),
        ThreatExchangeConfig(
            name="258601789084078",
            fetcher_active=True,
            privacy_group_name="Test Config 2",
            write_back=True,
            in_use=True,
            description="test description",
            matcher_active=True,
        ),
        WebhookPostActionPerformer(
            name="EnqueueForReview",
            url="https://webhook.site/ff7ebc37-514a-439e-9a03-46f86989e195",
            headers='{"Connection":"keep-alive"}',
            # monitoring page:
            # https://webhook.site/#!/ff7ebc37-514a-439e-9a03-46f86989e195
        ),
        WebhookPostActionPerformer(
            name="EnqueueMiniCastleForReview",
            url="https://webhook.site/01cef721-bdcc-4681-8430-679c75659867",
            headers='{"Connection":"keep-alive"}',
            # monitoring page:
            # https://webhook.site/#!/01cef721-bdcc-4681-8430-679c75659867
        ),
        WebhookPostActionPerformer(
            name="EnqueueSailboatForReview",
            url="https://webhook.site/fa5c5ad5-f5cc-4692-bf03-a03a4ae3f714",
            headers='{"Connection":"keep-alive"}',
            # monitoring page:
            # https://webhook.site/#!/fa5c5ad5-f5cc-4692-bf03-a03a4ae3f714
        ),
        ActionRule(
            name="Enqueue Mini-Castle for Review",
            action_label=ActionLabel("EnqueueMiniCastleForReview"),
            must_have_labels=set([
                BankIDClassificationLabel("303636684709969"),
                ClassificationLabel("true_positive"),
            ]),
            must_not_have_labels=set(
                [BankedContentIDClassificationLabel("3364504410306721")]),
        ),
        ActionRule(
            name="Enqueue Sailboat for Review",
            action_label=ActionLabel("EnqueueSailboatForReview"),
            must_have_labels=set([
                BankIDClassificationLabel("303636684709969"),
                ClassificationLabel("true_positive"),
                BankedContentIDClassificationLabel("3364504410306721"),
            ]),
            must_not_have_labels=set(),
        ),
    ]

    for config in configs:
        # Someday maybe can do filtering or something, I dunno
        # Add try catch block to avoid test failure

        try:
            hmaconfig.create_config(config)
        except ClientError as e:
            if e.response["Error"][
                    "Code"] == "ConditionalCheckFailedException":
                print(
                    "Can't insert duplicated config, " +
                    e.response["Error"]["Message"], )
            else:
                raise
        print(config)