def get_example_action_event():
        enqueue_mini_castle_for_review_action_label = ActionLabel(
            TestContentModels.TEST_ACTION_LABEL)
        action_rules = [
            ActionRule(
                name="Enqueue Mini-Castle for Review",
                action_label=enqueue_mini_castle_for_review_action_label,
                must_have_labels=set([
                    ClassificationLabel("true_positive"),
                ]),
                must_not_have_labels=set(),
            ),
        ]

        banked_signal = BankedSignal(
            banked_content_id="4169895076385542",
            bank_id="303636684709969",
            bank_source="te",
        )
        banked_signal.add_classification("true_positive")

        action_performer = WebhookPostActionPerformer(
            name="EnqueueForReview",
            url="https://webhook.site/ff7ebc37-514a-439e-9a03-46f86989e195",
            headers='{"Connection":"keep-alive"}',
            # monitoring page:
            # https://webhook.site/#!/ff7ebc37-514a-439e-9a03-46f86989e195
        )

        action_message = ActionMessage(
            content_key=TestContentModels.TEST_CONTENT_ID,
            content_hash=
            "361da9e6cf1b72f5cea0344e5bb6e70939f4c70328ace762529cac704297354a",
            matching_banked_signals=[banked_signal],
            action_label=enqueue_mini_castle_for_review_action_label,
            action_rules=action_rules,
        )

        return ActionEvent(
            content_id=action_message.content_key,
            performed_at=TestContentModels.TEST_TIME,
            action_label=action_message.action_label.value,
            action_performer=action_performer.to_aws_json(),
            action_rules=[
                rule.to_aws_json() for rule in action_message.action_rules
            ],
        )  # .write_to_table(table)
    def test_action_message_serialization_and_deserialization(self):
        enqueue_mini_castle_for_review_action_label = ActionLabel(
            "EnqueueMiniCastleForReview")

        action_rules = [
            ActionRule(
                name="Enqueue Mini-Castle for Review",
                action_label=enqueue_mini_castle_for_review_action_label,
                must_have_labels=set([
                    BankIDClassificationLabel("303636684709969"),
                    ClassificationLabel("true_positive"),
                ]),
                must_not_have_labels=set(
                    [BankedContentIDClassificationLabel("3364504410306721")]),
            ),
        ]

        banked_signal = BankedSignal(
            banked_content_id="4169895076385542",
            bank_id="303636684709969",
            bank_source="te",
        )
        banked_signal.add_classification("true_positive")

        action_message = ActionMessage(
            content_key="images/mini-castle.jpg",
            content_hash=
            "361da9e6cf1b72f5cea0344e5bb6e70939f4c70328ace762529cac704297354a",
            matching_banked_signals=[banked_signal],
            action_label=enqueue_mini_castle_for_review_action_label,
            action_rules=action_rules,
        )

        action_message_aws_json = action_message.to_aws_json()

        action_message_2 = ActionMessage.from_aws_json(action_message_aws_json)

        self.assertEqual(action_message_2.action_label,
                         enqueue_mini_castle_for_review_action_label)
示例#3
0
    def request_signal_opinion_change() -> ChangeSignalOpinionResponse:
        """
        request a change to the opinion for a signal in a dataset
        """
        signal_id = bottle.request.query.signal_q or None
        signal_source = bottle.request.query.signal_source or None
        ds_id = bottle.request.query.dataset_q or None
        opinion_change = bottle.request.query.opinion_change or None

        if not signal_id or not signal_source or not ds_id or not opinion_change:
            return ChangeSignalOpinionResponse(False)

        signal_id = str(signal_id)
        pending_opinion_change = PendingOpinionChange(opinion_change)

        writeback_message = WritebackMessage.from_banked_signal_and_opinion_change(
            BankedSignal(signal_id, ds_id, signal_source),
            pending_opinion_change)
        writeback_message.send_to_queue()
        logger.info(
            f"Opinion change enqueued for {signal_source}:{signal_id} in {ds_id} change={opinion_change}"
        )

        signal = PDQSignalMetadata(
            signal_id=signal_id,
            ds_id=ds_id,
            updated_at=datetime.datetime.now(),
            signal_source=signal_source,
            signal_hash="",  # SignalHash not needed for update
            tags=[],  # Tags not needed for update
            pending_opinion_change=pending_opinion_change,
        )
        success = signal.update_pending_opinion_change_in_table_if_exists(
            dynamodb_table)
        if not success:
            logger.info(f"Attempting to update {signal} in db failed")

        return ChangeSignalOpinionResponse(success)
    """Hit an arbitrary endpoint with a PUT"""
    def call(self, data: str) -> Response:
        return put(self.url, data)


@dataclass
class WebhookDeleteActionPerformer(WebhookActionPerformer):
    """Hit an arbitrary endpoint with a DELETE"""
    def call(self, _data: str) -> Response:
        return delete(self.url)


if __name__ == "__main__":

    banked_signals = [
        BankedSignal("2862392437204724", "bank 4", "te"),
        BankedSignal("4194946153908639", "bank 4", "te"),
    ]
    match_message = MatchMessage("key", "hash", banked_signals)

    configs: t.List[ActionPerformer] = [
        WebhookDeleteActionPerformer(
            "DeleteWebhook",
            "https://webhook.site/ff7ebc37-514a-439e-9a03-46f86989e195"),
        WebhookPutActionPerformer(
            "PutWebook",
            "https://webhook.site/ff7ebc37-514a-439e-9a03-46f86989e195"),
    ]

    for action_config in configs:
        action_config.perform_action(match_message)
def lambda_handler(event, context):
    """
    Listens to SQS events fired when new hash is generated. Loads the index
    stored in an S3 bucket and looks for a match.

    As per the default configuration
    - the index data bucket is INDEXES_BUCKET_NAME
    - the key name must be PDQ_INDEX_KEY

    When matched, publishes a notification to an SNS endpoint. Note this is in
    contrast with hasher and indexer. They publish to SQS directly. Publishing
    to SQS implies there can be only one consumer.

    Because, here, in the matcher, we publish to SNS, we can plug multiple
    queues behind it and profit!
    """
    records_table = dynamodb.Table(DYNAMODB_TABLE)

    hash_index: PDQIndex = get_index(INDEXES_BUCKET_NAME, PDQ_INDEX_KEY)
    logger.info("loaded_hash_index")

    for sqs_record in event["Records"]:
        message = json.loads(sqs_record["body"])
        if message.get("Event") == "TestEvent":
            logger.info("Disregarding Test Event")
            continue

        hash_str = message["hash"]
        key = message["key"]
        current_datetime = datetime.datetime.now()

        with metrics.timer(metrics.names.pdq_matcher_lambda.search_index):
            results = hash_index.query(hash_str)

        if results:
            match_ids = []
            matching_banked_signals: t.List[BankedSignal] = []
            for match in results:
                metadata = match.metadata
                logger.info("Match found for key: %s, hash %s -> %s", key,
                            hash_str, metadata)
                privacy_group_list = metadata.get("privacy_groups", [])
                metadata["privacy_groups"] = list(
                    filter(
                        lambda x: get_privacy_group_matcher_active(
                            str(x),
                            time.time() // CACHED_TIME,
                            # CACHED_TIME default to 300 seconds, this will convert time.time() to an int parameter which changes every 300 seconds
                        ),
                        privacy_group_list,
                    ))
                if metadata["privacy_groups"]:
                    signal_id = str(metadata["id"])

                    with metrics.timer(metrics.names.pdq_matcher_lambda.
                                       write_match_record):
                        # TODO: Add source (threatexchange) tags to match record
                        PDQMatchRecord(
                            key,
                            hash_str,
                            current_datetime,
                            signal_id,
                            metadata["source"],
                            metadata["hash"],
                        ).write_to_table(records_table)

                    for pg in metadata.get("privacy_groups", []):
                        # Only update the metadata if it is not found in the table
                        # once intally created it is the fetcher's job to keep the item up to date
                        PDQSignalMetadata(
                            signal_id,
                            pg,
                            current_datetime,
                            metadata["source"],
                            metadata["hash"],
                            metadata["tags"].get(pg, []),
                        ).write_to_table_if_not_found(records_table)

                    match_ids.append(signal_id)

                    # TODO: change naming upstream and here from privacy_group[s]
                    # to dataset[s]
                    for privacy_group in metadata.get("privacy_groups", []):
                        banked_signal = BankedSignal(str(signal_id),
                                                     str(privacy_group),
                                                     str(metadata["source"]))
                        for tag in metadata["tags"].get(privacy_group, []):
                            banked_signal.add_classification(tag)
                        matching_banked_signals.append(banked_signal)

            # TODO: Add source (threatexchange) tags to match message
            if matching_banked_signals:
                match_message = MatchMessage(
                    content_key=key,
                    content_hash=hash_str,
                    matching_banked_signals=matching_banked_signals,
                )

                logger.info(f"Publishing match_message: {match_message}")

                # Publish one message for the set of matches.
                sns_client.publish(TopicArn=OUTPUT_TOPIC_ARN,
                                   Message=match_message.to_aws_json())

        else:
            logger.info(f"No matches found for key: {key} hash: {hash_str}")

    metrics.flush()
示例#6
0
class WritebackerTestCase(unittest.TestCase):

    banked_signals = [
        BankedSignal("2862392437204724", "pg 4", "te"),
        BankedSignal("4194946153908639", "pg 4", "te"),
        BankedSignal("3027465034605137", "pg 3", "te"),
        BankedSignal("evil.jpg", "bank 4", "non-te-source"),
    ]

    match_message = MatchMessage("key", "hash", banked_signals)

    # Writebacks are enabled for the trustworth privacy group not for
    # the untrustworthy one
    configs = [
        ThreatExchangeConfig(
            "pg 4", True, "Trustworthy PG", "test description", True, True, True
        ),
        ThreatExchangeConfig(
            "pg 3", True, "UnTrustworthy PG", "test description", True, False, True
        ),
    ]

    for config in configs:
        hmaconfig.mock_create_config(config)

    def test_saw_this_too(self):
        os.environ["MOCK_TE_API"] = "True"
        os.environ["CONFIG_TABLE_NAME"] = "test-HMAConfig"

        writeback = WritebackTypes.SawThisToo
        writeback_message = WritebackMessage.from_match_message_and_type(
            self.match_message, writeback
        )
        event = {"Records": [{"body": writeback_message.to_aws_json()}]}

        result = lambda_handler(event, None)
        assert result == {
            "writebacks_performed": {
                "te": [
                    "Reacted SAW_THIS_TOO to descriptor a2|2862392437204724\nReacted SAW_THIS_TOO to descriptor a3|2862392437204724",
                    "Reacted SAW_THIS_TOO to descriptor a2|4194946153908639\nReacted SAW_THIS_TOO to descriptor a3|4194946153908639",
                    "No writeback performed for banked content id 3027465034605137 becuase writebacks were disabled",
                ]
            }
        }

        os.environ["MOCK_TE_API"] = "False"

    def test_false_positive(self):
        os.environ["MOCK_TE_API"] = "True"
        os.environ["CONFIG_TABLE_NAME"] = "test-HMAConfig"

        writeback = WritebackTypes.FalsePositive
        writeback_message = WritebackMessage.from_match_message_and_type(
            self.match_message, writeback
        )
        event = {"Records": [{"body": writeback_message.to_aws_json()}]}

        result = lambda_handler(event, None)
        assert result == {
            "writebacks_performed": {
                "te": [
                    "Reacted DISAGREE_WITH_TAGS to descriptor a2|2862392437204724\nReacted DISAGREE_WITH_TAGS to descriptor a3|2862392437204724",
                    "Reacted DISAGREE_WITH_TAGS to descriptor a2|4194946153908639\nReacted DISAGREE_WITH_TAGS to descriptor a3|4194946153908639",
                    "No writeback performed for banked content id 3027465034605137 becuase writebacks were disabled",
                ]
            }
        }

        os.environ["MOCK_TE_API"] = "False"

    def test_true_positve(self):
        os.environ["MOCK_TE_API"] = "True"
        os.environ["CONFIG_TABLE_NAME"] = "test-HMAConfig"

        writeback = WritebackTypes.TruePositive
        writeback_message = WritebackMessage.from_match_message_and_type(
            self.match_message, writeback
        )
        event = {"Records": [{"body": writeback_message.to_aws_json()}]}

        result = lambda_handler(event, None)
        assert result == {
            "writebacks_performed": {
                "te": [
                    "Wrote back TruePositive for indicator 2862392437204724\nBuilt descriptor a1|2862392437204724 with privacy groups pg 4",
                    "Wrote back TruePositive for indicator 4194946153908639\nBuilt descriptor a1|4194946153908639 with privacy groups pg 4",
                    "No writeback performed for banked content id 3027465034605137 becuase writebacks were disabled",
                ]
            }
        }

        os.environ["MOCK_TE_API"] = "False"

    def test_remove_opinion(self):
        os.environ["MOCK_TE_API"] = "True"
        os.environ["CONFIG_TABLE_NAME"] = "test-HMAConfig"

        writeback = WritebackTypes.RemoveOpinion
        writeback_message = WritebackMessage.from_match_message_and_type(
            self.match_message, writeback
        )
        event = {"Records": [{"body": writeback_message.to_aws_json()}]}

        result = lambda_handler(event, None)
        assert result == {
            "writebacks_performed": {
                "te": [
                    "\n".join(
                        (
                            "Deleted decriptor a1|2862392437204724 for indicator 2862392437204724",
                            "Removed reaction DISAGREE_WITH_TAGS from descriptor a2|2862392437204724",
                            "Removed reaction DISAGREE_WITH_TAGS from descriptor a3|2862392437204724",
                        )
                    ),
                    "\n".join(
                        (
                            "Deleted decriptor a1|4194946153908639 for indicator 4194946153908639",
                            "Removed reaction DISAGREE_WITH_TAGS from descriptor a2|4194946153908639",
                            "Removed reaction DISAGREE_WITH_TAGS from descriptor a3|4194946153908639",
                        )
                    ),
                    "No writeback performed for banked content id 3027465034605137 becuase writebacks were disabled",
                ]
            }
        }

        os.environ["MOCK_TE_API"] = "False"
    """
    return action_label_to_action_rules


if __name__ == "__main__":
    # For basic debugging
    HMAConfig.initialize(os.environ["CONFIG_TABLE_NAME"])
    action_rules = get_action_rules()
    match_message = MatchMessage(
        content_key="images/200200.jpg",
        content_hash=
        "20f66f3a2e6eff06d895a8f421c045e1c76f0bf87652d72ce7249412d8d52acc",
        matching_banked_signals=[
            BankedSignal(
                banked_content_id="3534976909868947",
                bank_id="303636684709969",
                bank_source="te",
                classifications={
                    Label(key="BankIDClassification", value="303636684709969"),
                    Label(key="Classification", value="true_positive"),
                    Label(key="BankSourceClassification", value="te"),
                    Label(key="BankedContentIDClassification",
                          value="3534976909868947"),
                },
            )
        ],
    )
    action_label_to_action_rules = get_actions_to_take(match_message,
                                                       action_rules)
    action_labels = list(action_label_to_action_rules.keys())
    def test_get_action_labels(self):

        enqueue_for_review_action_label = ActionLabel("EnqueueForReview")
        bank_id = "12345"

        banked_signal_without_foo = BankedSignal("67890", bank_id, "Test")
        banked_signal_without_foo.add_classification("Bar")
        banked_signal_without_foo.add_classification("Xyz")

        banked_signal_with_foo = BankedSignal("67890", bank_id, "Test")
        banked_signal_with_foo.add_classification("Foo")
        banked_signal_with_foo.add_classification("Bar")
        banked_signal_with_foo.add_classification("Xyz")

        match_message_without_foo = MatchMessage(
            "key", "hash", [banked_signal_without_foo]
        )
        match_message_with_foo = MatchMessage("key", "hash", [banked_signal_with_foo])

        action_rules = [
            ActionRule(
                enqueue_for_review_action_label.value,
                enqueue_for_review_action_label,
                set([BankIDClassificationLabel(bank_id)]),
                set([ClassificationLabel("Foo")]),
            )
        ]

        action_label_to_action_rules: t.Dict[
            ActionLabel, t.List[ActionRule]
        ] = get_actions_to_take(match_message_without_foo, action_rules)

        assert len(action_label_to_action_rules) == 1
        self.assertIn(
            enqueue_for_review_action_label,
            action_label_to_action_rules,
            "enqueue_for_review_action_label should be in action_label_to_action_rules",
        )

        action_label_to_action_rules = get_actions_to_take(
            match_message_with_foo, action_rules
        )

        assert len(action_label_to_action_rules) == 0

        enqueue_mini_castle_for_review_action_label = ActionLabel(
            "EnqueueMiniCastleForReview"
        )
        enqueue_sailboat_for_review_action_label = ActionLabel(
            "EnqueueSailboatForReview"
        )

        action_rules = [
            ActionRule(
                name="Enqueue Mini-Castle for Review",
                action_label=enqueue_mini_castle_for_review_action_label,
                must_have_labels=set(
                    [
                        BankIDClassificationLabel("303636684709969"),
                        ClassificationLabel("true_positive"),
                    ]
                ),
                must_not_have_labels=set(
                    [BankedContentIDClassificationLabel("3364504410306721")]
                ),
            ),
            ActionRule(
                name="Enqueue Sailboat for Review",
                action_label=enqueue_sailboat_for_review_action_label,
                must_have_labels=set(
                    [
                        BankIDClassificationLabel("303636684709969"),
                        ClassificationLabel("true_positive"),
                        BankedContentIDClassificationLabel("3364504410306721"),
                    ]
                ),
                must_not_have_labels=set(),
            ),
        ]

        mini_castle_banked_signal = BankedSignal(
            banked_content_id="4169895076385542",
            bank_id="303636684709969",
            bank_source="te",
        )
        mini_castle_banked_signal.add_classification("true_positive")

        mini_castle_match_message = MatchMessage(
            content_key="images/mini-castle.jpg",
            content_hash="361da9e6cf1b72f5cea0344e5bb6e70939f4c70328ace762529cac704297354a",
            matching_banked_signals=[mini_castle_banked_signal],
        )

        sailboat_banked_signal = BankedSignal(
            banked_content_id="3364504410306721",
            bank_id="303636684709969",
            bank_source="te",
        )
        sailboat_banked_signal.add_classification("true_positive")

        sailboat_match_message = MatchMessage(
            content_key="images/sailboat-mast-and-sun.jpg",
            content_hash="388ff5e1084efef10096df9cb969296dff2b04d67a94065ecd292129ef6b1090",
            matching_banked_signals=[sailboat_banked_signal],
        )

        action_label_to_action_rules = get_actions_to_take(
            mini_castle_match_message, action_rules
        )

        assert len(action_label_to_action_rules) == 1
        self.assertIn(
            enqueue_mini_castle_for_review_action_label,
            action_label_to_action_rules,
            "enqueue_mini_castle_for_review_action_label should be in action_label_to_action_rules",
        )

        action_label_to_action_rules = get_actions_to_take(
            sailboat_match_message, action_rules
        )

        assert len(action_label_to_action_rules) == 1
        self.assertIn(
            enqueue_sailboat_for_review_action_label,
            action_label_to_action_rules,
            "enqueue_sailboat_for_review_action_label should be in action_label_to_action_rules",
        )
if __name__ == "__main__":
    # For basic debugging

    action_rules = [
        ActionRule(
            name="Enqueue Mini-Castle for Review",
            action_label=ActionLabel("EnqueueMiniCastleForReview"),
            must_have_labels=set([
                BankIDClassificationLabel("303636684709969"),
                ClassificationLabel("true_positive"),
            ]),
            must_not_have_labels=set(
                [BankedContentIDClassificationLabel("3364504410306721")]),
        ),
    ]

    banked_signal = BankedSignal(
        "4169895076385542",
        "303636684709969",
        "te",
    )
    banked_signal.add_classification("true_positive")

    match_message = MatchMessage("key", "hash", [banked_signal])

    action_label_to_action_rules = get_actions_to_take(match_message,
                                                       action_rules)

    print(action_label_to_action_rules)
示例#10
0
        sources = {
            banked_signal.bank_source
            for banked_signal in writeback_message.banked_signals
        }
        source_writebackers = [
            Writebacker.get_writebacker_for_source(source)
            for source in sources
            if Writebacker.get_writebacker_for_source(source)
        ]
        for writebacker in source_writebackers:
            result = writebacker.perform_writeback(writeback_message)
            logger.info("Writeback result: %s", result)
            writebacks_performed[writebacker.source] = result

    return {"writebacks_performed": writebacks_performed}


if __name__ == "__main__":
    # For basic debugging
    # This will react to real descriptors if WRITEBACK_LOCAL is on
    if os.environ.get("WRITEBACK_LOCAL"):
        writeback_message = WritebackMessage(
            [
                BankedSignal("2915547128556957", "303636684709969", "te"),
            ],
            WritebackTypes.RemoveOpinion,
        )
        event = {"Records": [{"body": writeback_message.to_aws_json()}]}
        result = lambda_handler(event, None)
        print(result)