Пример #1
0
 def test_mismatch(self):
     self.assertAlmostEqual(
             name_similarity(('Robin K.', 'Ryder'), ('Robin J.', 'Ryder')), 0)
     self.assertAlmostEqual(
             name_similarity(('Claire', 'Mathieu'), ('Claire', 'Kenyon-Mathieu')), 0)
     self.assertAlmostEqual(
             name_similarity(('Amanda P.', 'Brown'), ('Patrick', 'Brown')), 0)
Пример #2
0
 def test_mismatch(self):
     self.assertAlmostEqual(
         name_similarity(('Robin K.', 'Ryder'), ('Robin J.', 'Ryder')), 0)
     self.assertAlmostEqual(
         name_similarity(('Claire', 'Mathieu'),
                         ('Claire', 'Kenyon-Mathieu')), 0)
     self.assertAlmostEqual(
         name_similarity(('Amanda P.', 'Brown'), ('Patrick', 'Brown')), 0)
Пример #3
0
 def test_symmetric(self):
     pairs = [
         (('Robin', 'Ryder'), ('Robin', 'Ryder')),
         (('Robin', 'Ryder'), ('R.', 'Ryder')),
         (('R.', 'Ryder'), ('R.', 'Ryder')),
         (('Robin J.', 'Ryder'), ('R.', 'Ryder')),
         (('Robin J.', 'Ryder'), ('R. J.', 'Ryder')),
         (('R. J.', 'Ryder'), ('J.', 'Ryder')),
         (('Robin', 'Ryder'), ('Robin J.', 'Ryder')),
         (('W. Timothy', 'Gowers'), ('Timothy', 'Gowers')),
         (('Robin K.', 'Ryder'), ('Robin J.', 'Ryder')),
         (('Claire', 'Mathieu'), ('Claire', 'Kenyon-Mathieu')),
     ]
     for a, b in pairs:
         self.assertAlmostEqual(name_similarity(a, b), name_similarity(b, a))
Пример #4
0
 def test_symmetric(self):
     pairs = [
         (('Robin', 'Ryder'), ('Robin', 'Ryder')),
         (('Robin', 'Ryder'), ('R.', 'Ryder')),
         (('R.', 'Ryder'), ('R.', 'Ryder')),
         (('Robin J.', 'Ryder'), ('R.', 'Ryder')),
         (('Robin J.', 'Ryder'), ('R. J.', 'Ryder')),
         (('R. J.', 'Ryder'), ('J.', 'Ryder')),
         (('Robin', 'Ryder'), ('Robin J.', 'Ryder')),
         (('W. Timothy', 'Gowers'), ('Timothy', 'Gowers')),
         (('Robin K.', 'Ryder'), ('Robin J.', 'Ryder')),
         (('Claire', 'Mathieu'), ('Claire', 'Kenyon-Mathieu')),
     ]
     for a, b in pairs:
         self.assertAlmostEqual(name_similarity(a, b),
                                name_similarity(b, a))
Пример #5
0
 def score(self, dataA, dataB):
     # TODO: this score function is far from optimal
     # refine it so that 'Claire Mathieu' and 'Claire Mathieu-Kenyon' gets
     # a decent score
     firstA, lastA = dataA
     firstB, lastB = dataB
     return name_similarity(dataA,dataB)
Пример #6
0
    def get_or_create_by_orcid(cls, orcid, profile=None, user=None):
        researcher = None
        try:
            researcher = Researcher.objects.get(orcid=orcid)
        except Researcher.DoesNotExist:
            if profile is None:
                profile = OrcidProfile(id=orcid)
            else:
                profile = OrcidProfile(json=profile)
            name = profile.name
            homepage = profile.homepage
            email = profile.email
            researcher = Researcher.create_by_name(name[0],
                                                   name[1],
                                                   orcid=orcid,
                                                   user=user,
                                                   homepage=homepage,
                                                   email=email)

            # Ensure that extra info is added.
            save = False
            for kw, val in [('homepage', homepage), ('orcid', orcid),
                            ('email', email)]:
                if not researcher.__dict__[kw] and val:
                    researcher.__dict__[kw] = val
                    save = True
            if save:
                researcher.save()

            for variant in profile.other_names:
                confidence = name_similarity(variant, variant)
                name = Name.lookup_name(variant)
                researcher.add_name_variant(name, confidence)

        return researcher
Пример #7
0
    def update_variants(self, reset=False):
        """
        Sets the variants of this name to the candidates returned by variants_queryset
        and which have a positive name similarity with the reference name.

        .. todo::
            This should rather rely on the name variants with confidence 1.0
        """
        nvqs = self.namevariant_set.all()
        if reset:
            for nv in nvqs:
                name = nv.name
                nv.delete()
                name.update_best_confidence()

            current_name_variants = set()
        else:
            current_name_variants = set([nv.name_id for nv in nvqs])

        last = self.name.last
        for name in self.variants_queryset():
            sim = name_similarity((name.first, name.last),
                                  (self.name.first, self.name.last))
            if sim > 0 and name.id not in current_name_variants:
                self.add_name_variant(name, sim, force_update=reset)
Пример #8
0
    def get_or_create_by_orcid(cls, orcid, profile=None, user=None):
        researcher = None
        try:
            researcher = Researcher.objects.get(orcid=orcid)
        except Researcher.DoesNotExist:
            if profile is None:
                profile = OrcidProfile(id=orcid) 
            else:
                profile = OrcidProfile(json=profile)
            name = profile.name
            homepage = profile.homepage
            email = profile.email
            researcher = Researcher.create_by_name(name[0],name[1], orcid=orcid,
                    user=user, homepage=homepage, email=email)

            # Ensure that extra info is added.
            save = False
            for kw, val in [('homepage',homepage),('orcid',orcid),('email',email)]:
                if not researcher.__dict__[kw] and val:
                    researcher.__dict__[kw] = val
                    save = True
            if save:
                researcher.save()

            for variant in profile.other_names:
                confidence = name_similarity(variant, variant)
                name = Name.lookup_name(variant)
                researcher.add_name_variant(name, confidence)

        return researcher
Пример #9
0
    def update_variants(self, reset=False):
        """
        Sets the variants of this name to the candidates returned by variants_queryset
        and which have a positive name similarity with the reference name.

        .. todo::
            This should rather rely on the name variants with confidence 1.0
        """
        nvqs = self.namevariant_set.all()
        if reset:
            for nv in nvqs:
                name = nv.name
                nv.delete()
                name.update_best_confidence()

            current_name_variants = set()
        else:
            current_name_variants = set([nv.name_id for nv in nvqs])

        last = self.name.last
        for name in self.variants_queryset():
            sim = name_similarity((name.first,name.last),
                                  (self.name.first,self.name.last))
            if sim > 0 and name.id not in current_name_variants:
                self.add_name_variant(name, sim, force_update=reset)
Пример #10
0
 def update_variants(self):
     """
     Sets the variants of this name to the candidates returned by variants_queryset
     """
     for researcher in self.variants_queryset():
         sim = name_similarity((researcher.name.first,researcher.name.last), (self.first,self.last))
         if sim > 0:
             old_sim = self.best_confidence
             self.best_confidence = sim
             if self.pk is None or old_sim < sim:
                 self.save()
             NameVariant.objects.get_or_create(name=self,researcher=researcher,
                     defaults={'confidence':sim})
Пример #11
0
 def score(self, dataA, dataB):
     score = 0.
     if dataA is None or dataB is None:
         return 0.
     for a in dataA:
         for b in dataB:
             firstA, lastA = a
             firstB, lastB = b
             score += name_similarity(a,b)
             #score += name_tools.match(firstA+' '+lastA,firstB+' '+lastB)
             # Previously, it was:
             #if match_names(a,b):
             #    score += 1.
     return score
Пример #12
0
 def update_variants(self):
     """
     Sets the variants of this name to the candidates returned by variants_queryset
     """
     for researcher in self.variants_queryset():
         sim = name_similarity(
             (researcher.name.first, researcher.name.last),
             (self.first, self.last))
         if sim > 0:
             old_sim = self.best_confidence
             self.best_confidence = sim
             if self.pk is None or old_sim < sim:
                 self.save()
             NameVariant.objects.get_or_create(name=self,
                                               researcher=researcher,
                                               defaults={'confidence': sim})
Пример #13
0
 def test_matching(self):
     self.assertAlmostEqual(
             name_similarity(('Robin', 'Ryder'), ('Robin', 'Ryder')), 0.8)
     self.assertAlmostEqual(
             name_similarity(('Robin', 'Ryder'), ('R.', 'Ryder')), 0.4)
     self.assertAlmostEqual(
             name_similarity(('R.', 'Ryder'), ('R.', 'Ryder')), 0.4)
     self.assertAlmostEqual(
             name_similarity(('Robin J.', 'Ryder'), ('R.', 'Ryder')), 0.3)
     self.assertAlmostEqual(
             name_similarity(('Robin J.', 'Ryder'), ('R. J.', 'Ryder')), 0.8)
     self.assertAlmostEqual(
             name_similarity(('R. J.', 'Ryder'), ('J.', 'Ryder')), 0.3)
     self.assertAlmostEqual(
             name_similarity(('Robin', 'Ryder'), ('Robin J.', 'Ryder')), 0.7)
Пример #14
0
 def test_matching(self):
     self.assertAlmostEqual(
         name_similarity(('Robin', 'Ryder'), ('Robin', 'Ryder')), 0.8)
     self.assertAlmostEqual(
         name_similarity(('Robin', 'Ryder'), ('R.', 'Ryder')), 0.4)
     self.assertAlmostEqual(
         name_similarity(('R.', 'Ryder'), ('R.', 'Ryder')), 0.4)
     self.assertAlmostEqual(
         name_similarity(('Robin J.', 'Ryder'), ('R.', 'Ryder')), 0.3)
     self.assertAlmostEqual(
         name_similarity(('Robin J.', 'Ryder'), ('R. J.', 'Ryder')), 0.8)
     self.assertAlmostEqual(
         name_similarity(('R. J.', 'Ryder'), ('J.', 'Ryder')), 0.3)
     self.assertAlmostEqual(
         name_similarity(('Robin', 'Ryder'), ('Robin J.', 'Ryder')), 0.7)
Пример #15
0
    def get_or_create_by_orcid(cls, orcid, profile=None, user=None):
        """
        Creates (or returns an existing) researcher from its ORCID id.

        :param profile: an :class:`OrcidProfile` object if it has already been fetched
                        from the API (otherwise we will fetch it ourselves)
        :param user: an user to associate with the profile.
        :returns: a :class:`Researcher` if everything went well, raises MetadataSourceException otherwise
        """
        researcher = None
        if orcid is None:
            raise MetadataSourceException('Invalid ORCID id')
        try:
            researcher = Researcher.objects.get(orcid=orcid)
        except Researcher.DoesNotExist:
            if profile is None:
                profile = OrcidProfile(id=orcid)
            else:
                profile = OrcidProfile(json=profile)
            name = profile.name
            homepage = profile.homepage
            email = profile.email
            researcher = Researcher.create_by_name(name[0],
                                                   name[1],
                                                   orcid=orcid,
                                                   user=user,
                                                   homepage=homepage,
                                                   email=email)

            # Ensure that extra info is added.
            save = False
            for kw, val in [('homepage', homepage), ('orcid', orcid),
                            ('email', email)]:
                if not researcher.__dict__[kw] and val:
                    researcher.__dict__[kw] = val
                    save = True
            if save:
                researcher.save()

            for variant in profile.other_names:
                confidence = name_similarity(variant, variant)
                name = Name.lookup_name(variant)
                researcher.add_name_variant(name, confidence)

        return researcher
Пример #16
0
    def get_or_create_by_orcid(cls, orcid, profile=None, user=None):
        """
        Creates (or returns an existing) researcher from its ORCID id.

        :param profile: an :class:`OrcidProfile` object if it has already been fetched
                        from the API (otherwise we will fetch it ourselves)
        :param user: an user to associate with the profile.
        :returns: a :class:`Researcher` if everything went well, raises MetadataSourceException otherwise
        """
        researcher = None
        if orcid is None:
            raise MetadataSourceException('Invalid ORCID id')
        try:
            researcher = Researcher.objects.get(orcid=orcid)
        except Researcher.DoesNotExist:
            if profile is None:
                profile = OrcidProfile(id=orcid) 
            else:
                profile = OrcidProfile(json=profile)
            name = profile.name
            homepage = profile.homepage
            email = profile.email
            researcher = Researcher.create_by_name(name[0],name[1], orcid=orcid,
                    user=user, homepage=homepage, email=email)

            # Ensure that extra info is added.
            save = False
            for kw, val in [('homepage',homepage),('orcid',orcid),('email',email)]:
                if not researcher.__dict__[kw] and val:
                    researcher.__dict__[kw] = val
                    save = True
            if save:
                researcher.save()

            for variant in profile.other_names:
                confidence = name_similarity(variant, variant)
                name = Name.lookup_name(variant)
                researcher.add_name_variant(name, confidence)

        return researcher
Пример #17
0
 def test_multiple(self):
     self.assertAlmostEqual(
         name_similarity(('Juan Pablo', 'Corella'),
                         ('J. Pablo', 'Corella')), 1.0)
Пример #18
0
 def test_multiple(self):
     self.assertAlmostEqual(
             name_similarity(('Juan Pablo', 'Corella'), ('J. Pablo', 'Corella')), 1.0)
Пример #19
0
 def test_reverse(self):
     self.assertAlmostEqual(
             name_similarity(('W. Timothy', 'Gowers'), ('Timothy', 'Gowers')), 0.7)
Пример #20
0
 def test_reverse(self):
     self.assertAlmostEqual(
         name_similarity(('W. Timothy', 'Gowers'), ('Timothy', 'Gowers')),
         0.7)