def _updateLabelersStats(self, expertise, expertId, regionName, userData): ''' Generates the statistics for the labeling users Calculates that what percentage of the labeling users lie in a certain city within in a given radius. ''' userLocation = userData[2], userData[3] region = self._regions[regionName] regionCenter = region['center'] distance = Utility.haversine(regionCenter[0], regionCenter[1], userLocation[0], userLocation[1]) if expertise not in self._labelerStatsDict: self._labelerStatsDict[expertise] = {} if expertId not in self._labelerStatsDict[expertise]: self._labelerStatsDict[expertise][expertId] = {} for regionName in self._regions: self._labelerStatsDict[expertise][expertId][regionName] = {} self._labelerStatsDict[expertise][expertId][regionName]['inside'] = 0 self._labelerStatsDict[expertise][expertId][regionName]['outside'] = 0 if distance < self._getEffectiveRadius(regionName): self._labelerStatsDict[expertise][expertId][regionName]['inside'] += 1 else: self._labelerStatsDict[expertise][expertId][regionName]['outside'] += 1
def _getScore(self, userData, regionName): ''' Calculates the score for an expert given by a user the score is discounted using the log10(distance) therefore the user far away from the expertise location of the expert will be ble to assign a lower score for that expertise. ''' userLocation = userData[2], userData[3] regionCenter = self._regions[regionName]['center'] distance = Utility.haversine(regionCenter[0], regionCenter[1], userLocation[0], userLocation[1]) # This is the equation used for calculating the score for the # expert. score = self._getEffectiveRadius(regionName)/ (self._Dmin + distance) score = pow(score, self._alpha) return score
def _bucketUserData(self): #print 'Bucketing ', len(self._usersData), ' users !' #count = 0 expertise = self._region.getExpertise() #print 'bucketing for ', self._region.getName() for userData in self._usersData: userLocation = (userData[2], userData[3]) # print userData # The root expertise region from which this child has descended parentRegion = None if self._region.isParent(): parentRegion = self._region else: parentRegion = self._region.getParent() if parentRegion.boundsLocation(userData): #count += 1 # if the user data is corresponding to a location # belonging to the expertise region only then we include it in # our calculation userConfidence = userData[1] userDistance = Utility.haversine(self._center[1], self._center[0], userLocation[1], userLocation[0]) isExpert = (expertise == userData[4]) distanceBucketKey = self._getBucketKey(userDistance) requiredKey, nonRequiredKey = self._getKeys(isExpert) if not distanceBucketKey in self._bucketedUserData: self._bucketedUserData[distanceBucketKey] = {} self._bucketedUserData[distanceBucketKey][requiredKey] = {'distanceSum': userDistance, 'confidenceSum': userConfidence, 'usersCount': 1 } self._bucketedUserData[distanceBucketKey][nonRequiredKey] = {'distanceSum': 0.0, 'confidenceSum': 0.0, 'usersCount': 0 } else: # calculating the sum of all users' distance and confidence within a # certain radius denoted by the bucketKey #print 'Incrementing------------------------------------' self._bucketedUserData[distanceBucketKey][requiredKey]['distanceSum'] += userDistance self._bucketedUserData[distanceBucketKey][requiredKey]['confidenceSum'] += userConfidence self._bucketedUserData[distanceBucketKey][requiredKey]['usersCount'] += 1