def test_match_same_uuid(self):
        """Test if there is a match when compares identities with the same UUID"""

        uid1 = UniqueIdentity(uuid='John Smith')
        uid2 = UniqueIdentity(uuid='John Smith')

        matcher = EmailNameMatcher()

        result = matcher.match(uid1, uid2)
        self.assertEqual(result, True)

        result = matcher.match(uid2, uid1)
        self.assertEqual(result, True)

        # None UUIDs do not produce a positive match
        uid1 = UniqueIdentity(uuid=None)
        uid2 = UniqueIdentity(uuid=None)

        matcher = EmailNameMatcher()

        result = matcher.match(uid1, uid2)
        self.assertEqual(result, False)

        result = matcher.match(uid2, uid1)
        self.assertEqual(result, False)
    def test_match_with_sources_list(self):
        """Test match when a list of sources to filter is given"""

        jsmith = UniqueIdentity(uuid='jsmith')
        jsmith.identities = [
            Identity(name='John Smith',
                     email='*****@*****.**',
                     source='scm'),
            Identity(name='John Smith', source='scm'),
            Identity(username='******', source='scm'),
            Identity(email='', source='scm')
        ]

        jsmith_alt = UniqueIdentity(uuid='J. Smith')
        jsmith_alt.identities = [
            Identity(name='John Smith JR', username='******',
                     source='alt'),
            Identity(name='John Smith', username='******', source='alt'),
            Identity(email='', source='alt'),
            Identity(email='jsmith', source='alt')
        ]

        # With these lists there are not matches
        matcher = EmailNameMatcher(sources=['github'])
        result = matcher.match(jsmith, jsmith_alt)
        self.assertEqual(result, False)

        matcher = EmailNameMatcher(sources=['scm'])
        result = matcher.match(jsmith, jsmith_alt)
        self.assertEqual(result, False)

        # Only when scm and alt are combined there is a match
        matcher = EmailNameMatcher(sources=['scm', 'alt'])
        result = matcher.match(jsmith, jsmith_alt)
        self.assertEqual(result, True)
    def test_match_same_uuid(self):
        """Test if there is a match when compares identities with the same UUID"""

        uid1 = UniqueIdentity(uuid='John Smith')
        uid2 = UniqueIdentity(uuid='John Smith')

        matcher = EmailNameMatcher()

        result = matcher.match(uid1, uid2)
        self.assertEqual(result, True)

        result = matcher.match(uid2, uid1)
        self.assertEqual(result, True)

        # None UUIDs do not produce a positive match
        uid1 = UniqueIdentity(uuid=None)
        uid2 = UniqueIdentity(uuid=None)

        matcher = EmailNameMatcher()

        result = matcher.match(uid1, uid2)
        self.assertEqual(result, False)

        result = matcher.match(uid2, uid1)
        self.assertEqual(result, False)
Пример #4
0
    def test_match_with_sources_list(self):
        """Test match when a list of sources to filter is given"""

        jsmith = UniqueIdentity(uuid='jsmith')
        jsmith.identities = [Identity(name='John Smith', email='*****@*****.**', source='scm'),
                             Identity(name='John Smith', source='scm'),
                             Identity(username='******', source='scm'),
                             Identity(email='', source='scm')]

        jsmith_alt = UniqueIdentity(uuid='J. Smith')
        jsmith_alt.identities = [Identity(name='John Smith JR', username='******', source='alt'),
                                 Identity(name='John Smith', username='******', source='alt'),
                                 Identity(email='', source='alt'),
                                 Identity(email='jsmith', source='alt')]

        # With these lists there are not matches
        matcher = EmailNameMatcher(sources=['github'])
        result = matcher.match(jsmith, jsmith_alt)
        self.assertEqual(result, False)

        matcher = EmailNameMatcher(sources=['scm'])
        result = matcher.match(jsmith, jsmith_alt)
        self.assertEqual(result, False)

        # Only when scm and alt are combined there is a match
        matcher = EmailNameMatcher(sources=['scm', 'alt'])
        result = matcher.match(jsmith, jsmith_alt)
        self.assertEqual(result, True)
    def test_match_strict(self):
        """Test strict matching"""

        # Let's define some identities first
        jsmith_alt = UniqueIdentity(uuid='J. Smith')
        jsmith_alt.identities = [
            Identity(name='J. Smith', username='******', source='alt'),
            Identity(name='John Smith', username='******', source='alt'),
            Identity(email='', source='alt'),
            Identity(email='jsmith', source='alt')
        ]

        jsmith_not_email = UniqueIdentity(uuid='John Smith')
        jsmith_not_email.identities = [Identity(email='jsmith', source='mls')]

        jrae = UniqueIdentity(uuid='jrae')
        jrae.identities = [
            Identity(name='Jane Rae', source='scm'),
            Identity(name='Jane Rae Doe',
                     email='*****@*****.**',
                     source='mls')
        ]

        jrae_doe = UniqueIdentity(uuid='jraedoe')
        jrae_doe.identities = [
            Identity(name='Jane Rae Doe',
                     email='*****@*****.**',
                     source='mls'),
            Identity(name='jrae', source='scm')
        ]

        jrae_no_name = UniqueIdentity(uuid='Jane Rae')
        jrae_no_name.identities = [Identity(name='jrae', source='scm')]

        # Tests
        strict_matcher = EmailNameMatcher(strict=True)
        no_strict_matcher = EmailNameMatcher(strict=False)

        # This two unique identities have the same email address
        # but due to 'jsmith' is not a valid email address, they
        # do not match
        result = strict_matcher.match(jsmith_alt, jsmith_not_email)
        self.assertEqual(result, False)

        # But with strict mode off they do
        result = no_strict_matcher.match(jsmith_alt, jsmith_not_email)
        self.assertEqual(result, True)

        # This two do not match although they share the same name.
        # In this case the name is invalid because is not formed
        # like "firstname lastname"
        result = strict_matcher.match(jrae_doe, jrae_no_name)
        self.assertEqual(result, False)

        # But with strict mode off they do
        result = no_strict_matcher.match(jrae_doe, jrae_no_name)
        self.assertEqual(result, True)
    def test_match_same_identity(self):
        """Test whether there is a match comparing the same identity"""

        uid = UniqueIdentity(uuid='John Smith')

        matcher = EmailNameMatcher()
        result = matcher.match(uid, uid)

        self.assertEqual(result, True)
    def test_match_same_identity(self):
        """Test whether there is a match comparing the same identity"""

        uid = UniqueIdentity(uuid='John Smith')

        matcher = EmailNameMatcher()
        result = matcher.match(uid, uid)

        self.assertEqual(result, True)
    def test_match_with_blacklist(self):
        """Test match when there are entries in the blacklist"""

        jsmith = UniqueIdentity(uuid='jsmith')
        jsmith.identities = [
            Identity(name='John Smith',
                     email='*****@*****.**',
                     source='scm'),
            Identity(name='John Smith', source='scm'),
            Identity(username='******', source='scm'),
            Identity(email='', source='scm')
        ]

        john_smith = UniqueIdentity(uuid='js')
        john_smith.identities = [
            Identity(name='John Smith JR', username='******',
                     source='scm'),
            Identity(username='******', source='scm'),
            Identity(name='Smith. J', source='mls'),
            Identity(name='Smith. J', email='*****@*****.**', source='mls')
        ]

        jsmith_alt = UniqueIdentity(uuid='J. Smith')
        jsmith_alt.identities = [
            Identity(name='John Smith JR', username='******',
                     source='alt'),
            Identity(name='John Smith', username='******', source='alt'),
            Identity(email='', source='alt'),
            Identity(email='jsmith', source='alt')
        ]

        # Tests
        bl = [
            MatchingBlacklist(excluded='John Smith'),
            MatchingBlacklist(excluded='*****@*****.**')
        ]

        matcher = EmailNameMatcher(blacklist=bl)

        result = matcher.match(jsmith, john_smith)
        self.assertEqual(result, False)

        result = matcher.match(john_smith, jsmith)
        self.assertEqual(result, False)

        # John Smith is blacklisted, so no match
        result = matcher.match(jsmith, jsmith_alt)
        self.assertEqual(result, False)

        result = matcher.match(jsmith_alt, jsmith)
        self.assertEqual(result, False)

        result = matcher.match(john_smith, jsmith_alt)
        self.assertEqual(result, True)

        result = matcher.match(jsmith_alt, john_smith)
        self.assertEqual(result, True)
    def test_match_with_blacklist(self):
        """Test match when there are entries in the blacklist"""

        jsmith = UniqueIdentity(uuid='jsmith')
        jsmith.identities = [Identity(name='John Smith', email='*****@*****.**', source='scm'),
                             Identity(name='John Smith', source='scm'),
                             Identity(username='******', source='scm'),
                             Identity(email='', source='scm')]

        john_smith = UniqueIdentity(uuid='js')
        john_smith.identities = [Identity(name='John Smith JR', username='******', source='scm'),
                                 Identity(username='******', source='scm'),
                                 Identity(name='Smith. J', source='mls'),
                                 Identity(name='Smith. J', email='*****@*****.**', source='mls')]

        jsmith_alt = UniqueIdentity(uuid='J. Smith')
        jsmith_alt.identities = [Identity(name='John Smith JR', username='******', source='alt'),
                                 Identity(name='John Smith', username='******', source='alt'),
                                 Identity(email='', source='alt'),
                                 Identity(email='jsmith', source='alt')]

        # Tests
        bl = [MatchingBlacklist(excluded='John Smith'),
              MatchingBlacklist(excluded='*****@*****.**')]

        matcher = EmailNameMatcher(blacklist=bl)

        result = matcher.match(jsmith, john_smith)
        self.assertEqual(result, False)

        result = matcher.match(john_smith, jsmith)
        self.assertEqual(result, False)

        # John Smith is blacklisted, so no match
        result = matcher.match(jsmith, jsmith_alt)
        self.assertEqual(result, False)

        result = matcher.match(jsmith_alt, jsmith)
        self.assertEqual(result, False)

        result = matcher.match(john_smith, jsmith_alt)
        self.assertEqual(result, True)

        result = matcher.match(jsmith_alt, john_smith)
        self.assertEqual(result, True)
    def test_match(self):
        """Test match method"""

        # Let's define some identities first
        jsmith = UniqueIdentity(uuid='jsmith')
        jsmith.identities = [
            Identity(name='John Smith',
                     email='*****@*****.**',
                     source='scm'),
            Identity(name='John Smith', source='scm'),
            Identity(username='******', source='scm'),
            Identity(email='', source='scm')
        ]

        john_smith = UniqueIdentity(uuid='js')
        john_smith.identities = [
            Identity(name='J. Smith', username='******', source='scm'),
            Identity(username='******', source='scm'),
            Identity(name='Smith. J', source='mls'),
            Identity(name='Smith. J', email='*****@*****.**', source='mls')
        ]

        jsmith_alt = UniqueIdentity(uuid='J. Smith')
        jsmith_alt.identities = [
            Identity(name='J. Smith', username='******', source='alt'),
            Identity(name='John Smith', username='******', source='alt'),
            Identity(email='', source='alt'),
            Identity(email='jsmith', source='alt')
        ]

        jsmith_not_email = UniqueIdentity(uuid='John Smith')
        jsmith_not_email.identities = [Identity(email='jsmith', source='mls')]

        jrae = UniqueIdentity(uuid='jrae')
        jrae.identities = [
            Identity(name='Jane Rae', source='scm'),
            Identity(name='Jane Rae Doe',
                     email='*****@*****.**',
                     source='mls')
        ]

        jrae_doe = UniqueIdentity(uuid='jraedoe')
        jrae_doe.identities = [
            Identity(name='Jane Rae Doe',
                     email='*****@*****.**',
                     source='mls'),
            Identity(name='jrae', source='scm')
        ]

        jrae_no_name = UniqueIdentity(uuid='Jane Rae')
        jrae_no_name.identities = [Identity(name='jrae', source='scm')]

        # Tests
        matcher = EmailNameMatcher()

        # First two unique identities must match
        result = matcher.match(jsmith, john_smith)
        self.assertEqual(result, True)

        result = matcher.match(john_smith, jsmith)
        self.assertEqual(result, True)

        # Comparing with the third only the first one
        # produces a match because of "John Smith" name
        result = matcher.match(jsmith, jsmith_alt)
        self.assertEqual(result, True)

        result = matcher.match(jsmith_alt, jsmith)
        self.assertEqual(result, True)

        result = matcher.match(john_smith, jsmith_alt)
        self.assertEqual(result, False)

        result = matcher.match(jsmith_alt, john_smith)
        self.assertEqual(result, False)

        # Jane Rae matches Jane Rae Doe because they share
        # the same name "Jane Rae Doe"
        result = matcher.match(jrae, jrae_doe)
        self.assertEqual(result, True)

        result = matcher.match(jrae, jrae_doe)
        self.assertEqual(result, True)

        # No match with Jane Rae
        result = matcher.match(jsmith, jrae)
        self.assertEqual(result, False)

        result = matcher.match(jsmith, jrae_doe)
        self.assertEqual(result, False)

        result = matcher.match(john_smith, jrae)
        self.assertEqual(result, False)

        result = matcher.match(john_smith, jrae_doe)
        self.assertEqual(result, False)

        result = matcher.match(jsmith_alt, jrae)
        self.assertEqual(result, False)

        result = matcher.match(jsmith_alt, jrae_doe)
        self.assertEqual(result, False)

        # This two unique identities have the same email address
        # but due to 'jsmith' is not a valid email address, they
        # do not match
        result = matcher.match(jsmith_alt, jsmith_not_email)
        self.assertEqual(result, False)

        # This two do not match although they share the same name.
        # In this case the name is invalid because is not formed
        # like "firstname lastname"
        result = matcher.match(jrae_doe, jrae_no_name)
        self.assertEqual(result, False)
    def test_match(self):
        """Test match method"""

        # Let's define some identities first
        jsmith = UniqueIdentity(uuid='jsmith')
        jsmith.identities = [Identity(name='John Smith', email='*****@*****.**', source='scm'),
                             Identity(name='John Smith', source='scm'),
                             Identity(username='******', source='scm'),
                             Identity(email='', source='scm')]

        john_smith = UniqueIdentity(uuid='js')
        john_smith.identities = [Identity(name='J. Smith', username='******', source='scm'),
                                 Identity(username='******', source='scm'),
                                 Identity(name='Smith. J', source='mls'),
                                 Identity(name='Smith. J', email='*****@*****.**', source='mls')]

        jsmith_alt = UniqueIdentity(uuid='J. Smith')
        jsmith_alt.identities = [Identity(name='J. Smith', username='******', source='alt'),
                                 Identity(name='John Smith', username='******', source='alt'),
                                 Identity(email='', source='alt'),
                                 Identity(email='jsmith', source='alt')]

        jsmith_not_email = UniqueIdentity(uuid='John Smith')
        jsmith_not_email.identities = [Identity(email='jsmith', source='mls')]

        jrae = UniqueIdentity(uuid='jrae')
        jrae.identities = [Identity(name='Jane Rae', source='scm'),
                           Identity(name='Jane Rae Doe', email='*****@*****.**', source='mls')]

        jrae_doe = UniqueIdentity(uuid='jraedoe')
        jrae_doe.identities = [Identity(name='Jane Rae Doe', email='*****@*****.**', source='mls'),
                               Identity(name='jrae', source='scm')]

        jrae_no_name = UniqueIdentity(uuid='Jane Rae')
        jrae_no_name.identities = [Identity(name='jrae', source='scm')]

        # Tests
        matcher = EmailNameMatcher()

        # First two unique identities must match
        result = matcher.match(jsmith, john_smith)
        self.assertEqual(result, True)

        result = matcher.match(john_smith, jsmith)
        self.assertEqual(result, True)

        # Comparing with the third only the first one
        # produces a match because of "John Smith" name
        result = matcher.match(jsmith, jsmith_alt)
        self.assertEqual(result, True)

        result = matcher.match(jsmith_alt, jsmith)
        self.assertEqual(result, True)

        result = matcher.match(john_smith, jsmith_alt)
        self.assertEqual(result, False)

        result = matcher.match(jsmith_alt, john_smith)
        self.assertEqual(result, False)

        # Jane Rae matches Jane Rae Doe because they share
        # the same name "Jane Rae Doe"
        result = matcher.match(jrae, jrae_doe)
        self.assertEqual(result, True)

        result = matcher.match(jrae, jrae_doe)
        self.assertEqual(result, True)

        # No match with Jane Rae
        result = matcher.match(jsmith, jrae)
        self.assertEqual(result, False)

        result = matcher.match(jsmith, jrae_doe)
        self.assertEqual(result, False)

        result = matcher.match(john_smith, jrae)
        self.assertEqual(result, False)

        result = matcher.match(john_smith, jrae_doe)
        self.assertEqual(result, False)

        result = matcher.match(jsmith_alt, jrae)
        self.assertEqual(result, False)

        result = matcher.match(jsmith_alt, jrae_doe)
        self.assertEqual(result, False)

        # This two unique identities have the same email address
        # but due to 'jsmith' is not a valid email address, they
        # do not match
        result = matcher.match(jsmith_alt, jsmith_not_email)
        self.assertEqual(result, False)

        # This two do not match although they share the same name.
        # In this case the name is invalid because is not formed
        # like "firstname lastname"
        result = matcher.match(jrae_doe, jrae_no_name)
        self.assertEqual(result, False)