def generateModelForExpertise(self, expertise): print "Generating model for", expertise # extracting the regions for the given expertise self._optimizeParameters(expertise) expertiseRegions = self._dictExpertiseRegions[expertise] dataQueue = Queue(len(expertiseRegions)) print "Initially computed models:-" pprint(self._dictExpertModels) iternum = 0 while iternum < Settings.numberOfIterations: UsersData.partitionUsers(self._dictExpertModels[expertise], expertise) print "Recomputed Bounding boxes for regions-----" self._updateExpertiseRegions(expertise) self._displayRegionsInfo(expertise) self._dictExpertModels.clear() expertiseRegions = self._dictExpertiseRegions[expertise] print "---- Recomputing the centers------------" start = time.time() processes = self._computeModelsParallely(expertiseRegions, dataQueue) self._waitForProcesses(processes) self._populateResultsFromQueue(expertise, dataQueue) print "It took ", start - time.time(), " seconds" print "---------------Generated multiple center model----------" pprint(self._dictExpertModels) iternum += 1
def main(): print 'Main' dataDirectory = 'data/' start = time.time() data = DataExtractorFactory.getDataExtractor('expertisemodel', dataDirectory) expertUsersData = data.getAllExpertsData() region = Region((50, -125), (25.255, -60),center = (30,-60) ,expertise='vc') UsersData.addUserDataToRegions(expertUsersData) usersBucket = BucketUsers(region, 50) print time.time() - start,' is the time taken' usersBucket.printBuckets()
def __init__(self, dataDirectory, dataExtractor=None): self._dictExpertiseRegions.clear() self._dictExpertModels.clear() self._dataDirectory = dataDirectory if dataExtractor == None: self._expertDataExtractor = DataExtractorFactory.getDataExtractor("expertisemodel", self._dataDirectory) self._expertDataExtractor.populateData(self._dataDirectory) else: self._expertDataExtractor = dataExtractor self._dictExpertUsersData = self._expertDataExtractor.getAllExpertsData() self._createParentRegions() UsersData.addUserDataToRegions(self._dictExpertUsersData)
def __init__(self, region, interval): self._bucketedUserData.clear() self._region = region self._center = self._region.getCenter() self._interval = interval self._usersData = UsersData.getUsersData() self._bucketUserData() self._normalizeBuckets()
def _updateExpertiseRegions(self, expertise): dictRegionPartition = {} usersData = UsersData.getUsersData() for userData in usersData: regionName = userData[5] if regionName in dictRegionPartition: dictRegionPartition[regionName].append(userData) else: dictRegionPartition[regionName] = [userData] print "Regions: ", dictRegionPartition.keys() dictCenters = {} for model in self._dictExpertModels[expertise]: dictCenters[model["regionName"]] = model["center"] expertRegions = [] for regionName in dictRegionPartition: usersData = dictRegionPartition[regionName] print regionName, " has ", len(usersData), " users assigned to it out of", len( UsersData.getUsersData() ), " users" leftTop, rightBottom = self._getBoundingBox(usersData) try: expertRegion = Region( leftTop, rightBottom, center=dictCenters[regionName], name=regionName, isParent=True, expertise=expertise, ) expertRegions.append(expertRegion) except: print "Region invalid.. discarded!!" self._dictExpertiseRegions[expertise] = expertRegions
def boundsLocation(self, userData): try: #print 'Checking boundsLocation for :', userData if UsersData.isPartitioned(): #print 'data is partitioned' # If the users data has been partitioned and each user has a label that # corresponds to the region it is assigned to if userData[4] == self._expertise: #print 'User is an expert' # If this user is an expert then it will have the label of the region # to which it belongs to. So we just compare the label to the name of the current # region if userData[5] == self._name: #print 'User belongs to the region ', self._name return True else: #print 'User does not belong to region', self._name return False else: # If this user is not an expert then we only consider its location # to judge whether it belongs to the region or not #print 'User is not an expert' pass #print 'Checking location' location = (userData[2], userData[3]) if location[0] >= self._rightBottom[0] and location[0] <= self._leftTop[0] : if location[1] <= self._rightBottom[1] and location[1] >= self._leftTop[1]: #print 'Passed!!' return True #print 'Failed!!' except: print userData return False