Пример #1
0
def check_player_against_rankings(player, rankings):
    rankings = csv.DictReader(open(rankings))
    matches = []
    #print("checking for \"%s\" in rankings with initial search" % player)
    for row in rankings:
        split_name = player.lower().split(" ")
        stored_name = row['playername']
        # using this for the refinement search so we get better results
        stored_name_lower = row['playername'].lower()
        # remove any single character tuples as it will unnecessarily cause
        # it to search any row with that character in the name
        split_name = [el for el in split_name if len(el) > 1]
        if any(s in stored_name_lower for s in split_name):
            match_score = name_tools.match(player, stored_name)
            # if we get a perfect match, automatically return that
            if match_score == 1.0:
                return (stored_name)
            elif match_score > 0.61:
                #print(match_score, stored_name)
                matches.append((match_score, stored_name))
    if matches == []:
        #print("could not find a match for \"%s\"" % player)
        return None
    else:
        sorted_matches = sorted(matches, key=lambda tup: tup[0], reverse=True)
        best_match = sorted_matches[0]
        #print("best match for %s is %s" % (player, best_match))
        return best_match[1]
Пример #2
0
    def get(self, name):
        print(name)

        t = 0.1
        with open(config.database) as json_file:
            data = json.load(json_file)

            for p in data['nodes']:
                dist = name_tools.match(p['id'], name)
                if dist > t: t = dist

        return t * 10
Пример #3
0
def check_player_by_nickname(player, nicknames):
    #print("Seeing if \"%s\" is a known nickname..." % player)
    nicknames = csv.DictReader(open(nicknames))
    for row in nicknames:
        stored_nickname = row['nickname']
        match_score = name_tools.match(player, stored_nickname)
        if match_score >= 0.95:
            identified_player = row['playername']
            #print("Found a match: " + player + "=" + stored_nickname,identified_player, match_score)
            return identified_player
    else:
        #("Didn't find %s in nicknames list" % player)
        return None
Пример #4
0
    def first_pass(self):

        # first check for an explicit same as
        if self.value1 and self.value2:
            self.match.append('match= ' + self.value1 + ' and ' + self.value2 + ' --')
            if self.sameas(self.value1, self.value2):
                self.match.append('match-sameas--confidence=' + str(
                    1.0) + '--There is an explicit sameas (or equivalent) relationship between ' + self.value1label + ' (' + self.value1 + ') and ' + self.value2label + ' (' + self.value1 + '). These are the same person.--')
                self.overallconfidence = 1.0
                return self.match
            # if no explicit sameas is in place, check the names
            else:
                self.match.append('match-sameas--There is no explicit sameas relationship between ' + self.value1label + ' (' + self.value1 + ') and ' + self.value2label + ' (' + self.value2 + '). Invstigate further ... --')
                names1 = self.checkname(self.value1, 1)
                names2 = self.checkname(self.value2, 2)
                scores = []

                if names1 and names2:
                    for nm in names1:
                        for n in names2:
                            # get a confidence score using name_tools
                            scores.append(name_tools.match(name_tools.canonicalize(nm), name_tools.canonicalize(n)))
                    confidence = 0
                    for score in scores:
                        if score > confidence:
                            confidence = score
                        if confidence >= 0.75:
                            self.match.append('match-name--confidence=' + str(
                                confidence) + '--The names are alike. Checking for dates ... --')
                        elif confidence < 0.75 and confidence > 0.5:
                            self.match.append('match-name--confidence=' + str(
                                confidence) + '--The names have some similarities, they could be the same person. Checking dates to be sure ... --')
                        elif confidence < 0.5:
                            self.match.append('match-name--confidence=' + str(
                                confidence) + '--The names are different, indicating that these are not likely to be the same person. Checking dates to be sure ... --')
                        self.overallconfidence = score

                        # check the dates
                        self.dates1 = self.checkdates(self.value1)

                        self.dates2 = self.checkdates(self.value2)

                        # if there are no dates for either check for sameas links to other sources
                        if not self.dates1 and not self.dates2:
                            self.match.append('match-dates--Neither of the names have associated dates. Checking for sameas links to other sources ... )' + '--')
                            # run a second pass on value 1
                            dates = self.secondpass(self.value1)
                            if dates:
                                for d in dates:
                                    self.dates1 = d
                                    self.match.append(
                                        'match-dates--These dates were found: ' + d + ' but there is nothing to compare them to--')

                            else:
                                self.match.append(
                                    'match-dates--No dates were found for ' + self.value1label + ' (' + self.value1 + ')--')

                            # run a second pass on value 2
                            dates = self.secondpass(self.value2)
                            if dates:
                                for d in dates:
                                    self.dates2 = d
                                    if self.dates1 and d in self.dates1:
                                        self.match.append(
                                            'match-dates--confidence=1.0--There is an exact match with birth and death dates (' + str(d) + ' and ' + str(self.dates1) + '). These are very likely the same person.--')
                                    elif self.dates1 and d not in self.dates1:
                                        self.match.append(
                                            'match-dates--confidence=0--The dates do not match (' + str(d) + ' and ' + str(self.dates1) +'). These are not likely to be the same person.--')
                                    else:
                                        self.match.append(
                                            'match-dates--These dates were found: ' + d + ' but there is nothing to compare them to--')

                            else:
                                self.match.append(
                                    'match-dates--No dates were found for ' + self.value2label + ' (' + self.value2 + ')--')
                                #return self.match
                            return self.match

                        # if there is one date, look for sameas links for the other
                        elif not self.dates1 and self.dates2:
                            self.match.append('match-dates--There are no birth or death dates for ' + self.value1label + ' (' + self.value1 + '). Checking for sameas links to other sources ... --')
                            # we could have more than one
                            dates = self.secondpass(self.value1)
                            if dates:
                                for d in dates:
                                    if d in self.dates2:
                                        self.match.append(
                                            'match-dates--confidence=1.0--There is an exact match with birth and death dates (' + str(d) + ' and ' + str(self.dates2) + '). These are very likely the same person.--')
                                        self.overallconfidence = 1.0
                                        return self.match
                                else:

                                    self.match.append(
                                        'match-dates--confidence=0--The dates do not match (' + str(d) + ' and ' + str(self.dates2) +'). These are not likely to be the same person.--')
                                    self.overallconfidence = 0
                                    return self.match

                            else:
                                self.match.append(
                                    'match-dates--confidence=0--We only have one date (' + str(self.dates2) + ') and nothing to compare it to.--')
                                self.overallconfidence = 0
                                return self.match

                        # if there is one date, look for sameas links for the other
                        elif not self.dates2 and self.dates1:
                            self.match.append('match-dates--There are no birth or death dates for ' + self.value2label + ' (' + self.value2 + '). Checking for sameas links to other sources ... --')
                            dates = self.secondpass(self.value2)
                            if dates:
                                for d in dates:
                                    if d in self.dates1:
                                        self.match.append(
                                            'match-dates--confidence=1.0--There is an exact match with birth and death dates (' + str(self.dates1) + ' and ' + str(d) + '). These are very likely the same person.--')
                                        self.overallconfidence = 1.0
                                        return self.match
                                    else:
                                        self.match.append(
                                            'match-dates--confidence=0--The dates do not match (' + str(self.dates1) + ' and ' + str(d) +'). These are not likely to be the same person.--')
                                        self.overallconfidence = 0
                                        return self.match

                            else:
                                self.match.append(
                                    'match-dates--confidence=0--We only have one date (' + str(self.dates1) + ') and nothing to compare it to.--')
                                return self.match

                        # otherwise we have two dates, test them
                        else:
                            for dt in self.dates1:
                                for d in self.dates2:
                                    if dt == d:
                                        self.match.append('match-dates--confidence=' + str(
                                            1.0) + '--There is an exact match with birth and death dates (' + str(self.dates1) + ' and ' + str(self.dates2) + '). These are very likely the same person.--')
                                        self.overallconfidence = 1.0
                                        return self.match
                                    else:
                                        self.match.append('match-dates--confidence=' + str(
                                            0) + '--The dates do not match (' + str(self.dates1) + ' and ' + str(self.dates2) + '). These are not likely to be the same person.--')
                                        self.overallconfidence = 0
                                        return self.match
                # if we don't have a name one or both there's not much more we can do
                else:
                    if not names1 and not names2:
                        self.match.append('match-name--There are no names specified--')
                    elif not names1:
                        self.match.append('match-name--There is no name for ' + self.value1label + ' (' + self.value1 + ')--')
                    elif not names2:
                        self.match.append('match-name--There is no name for ' + self.value2label + ' (' + self.value2 + ')--')

                    return self.match

        else:
            count = self.listcount - 1
            if count < 2:
                self.match.append('match--insufficient items in the list count=' + str(count))
            return self.match