def recentSocialHealthScores2(): profiles = Profile.objects.all() startTime = getStartTime(6, True) currentTime = time.time() timeRanges = [(start, start + 3600*4) for start in range(int(startTime), int(currentTime), 3600*4)] sums = {"activity": 0, "social": 0, "focus": 0} activeUsers = [] data = {} for profile in profiles: token = getToken(profile, "app-uuid") internalDataStore = getInternalDataStore(profile, "Living Lab", "Social Health Tracker", token) activityLevels = aggregateForUser(internalDataStore, "RecentActivityByHour", timeRanges, activityForTimeRange, False) if len(activityLevels) > 0: socialLevels = aggregateForUser(internalDataStore, "RecentSocialByHour", timeRanges, socialForTimeRange, True) focusLevels = aggregateForUser(internalDataStore, "RecentFocusByHour", timeRanges, focusForTimeRange, True) activityScore = computeActivityScore(activityLevels) socialScore = computeSocialScore(socialLevels) focusScore = computeFocusScore(focusLevels) sums["activity"] += activityScore sums["social"] += socialScore sums["focus"] += focusScore activeUsers.append(profile) data[profile.uuid] = {} data[profile.uuid]["user"] = { "activity": activityScore, "social": socialScore, "focus": focusScore } numUsers = len(activeUsers) if numUsers > 0: averages = { k: sums[k] / numUsers for k in sums } variances = { k: [(data[p.uuid]["user"][k] - averages[k])**2 for p in activeUsers] for k in averages } stdDevs = { k: math.sqrt(sum(variances[k]) / len(variances[k])) for k in variances } for profile in activeUsers: token = getToken(profile, "app-uuid") internalDataStore = getInternalDataStore(profile, "Living Lab", "Social Health Tracker", token) data[profile.uuid]["averageLow"] = { k: max(0, averages[k] - stdDevs[k]) for k in stdDevs } data[profile.uuid]["averageHigh"] = { k: min(averages[k] + stdDevs[k], 10) for k in stdDevs } internalDataStore.saveAnswer("socialhealth", data[profile.uuid]) return data
def findRecentPlaces(): currentTime = time.time() today = date.fromtimestamp(currentTime) startTime = time.mktime((today - timedelta(days=14)).timetuple()) # Note: we're not taking the full 9-5 sampling. Clustering is expensive, so anything we can leave out helps... # Combined with the fact that "lunch" time might not be indicative of work locations, this might be more accurate anyway nineToFives = [(nine, nine + 3600*8) for nine in range(int(startTime + 3600*9), int(currentTime), 3600*24)] #nineToFives.extend([(two, two + 3600*2) for two in range(int(startTime + 3600*14), int(currentTime), 3600*24)]) midnightToSixes = [(midnight, midnight + 3600*6) for midnight in range(int(startTime), int(currentTime), 3600* 24)] data = {} profiles = Profile.objects.all() for profile in profiles: ids = getInternalDataStore(profile, "Living Lab", "My Places", "") #ids.saveAnswer("RecentPlaces", []) work = findTopBoundingRegion(ids, nineToFives) home = findTopBoundingRegion(ids, midnightToSixes) data[profile.uuid] = [] if work is not None: data[profile.uuid].append({ "key": "work", "bounds": work}) if home is not None: data[profile.uuid].append({ "key": "home", "bounds": home}) ids.saveAnswer("RecentPlaces", data[profile.uuid]) #print "Finding work locations..." #data = findRecentPlaceBounds("work", nineToFives) #print "Finding home locations..." #data = findRecentPlaceBounds("home", midnightToSixes) print "... done with RecentPlaces" return data
def recentSocialHealthScores(): profiles = Profile.objects.all() data = {} activityScores = recentActivityScore() socialScores = recentSocialScore() focusScores = recentFocusScore() scoresList = [activityScores.values(), socialScores.values(), focusScores.values()] print scoresList # scoresList = [[d for d in scoreList if d > 0.0] for scoreList in scoresList] averages = [sum(scores) / len(scores) if len(scores) > 0 else 0 for scores in scoresList] variances = [map(lambda x: (x - averages[i]) * (x - averages[i]), scoresList[i]) for i in range(len(scoresList))] stdDevs = [math.sqrt(sum(variances[i]) / len(scoresList[i])) for i in range(len(scoresList))] activityStdDev = stdDevs[0] socialStdDev = stdDevs[1] focusStdDev = stdDevs[2] print "Averages (activity, social, focus):" print averages print "Standard Deviations (activity, social, focus):" print stdDevs for profile in [p for p in profiles if p.uuid in activityScores.keys()]: print "storing %s" % profile.uuid internalDataStore = getInternalDataStore(profile, "Living Lab", "Social Health Tracker", "") data[profile.uuid] = [] #pdb.set_trace() #data[profile.uuid].append({ "key": "activity", "layer": "User", "value": activityScores.get(profile.uuid, 0) }) data[profile.uuid].append({ "key": "social", "layer": "User", "value": socialScores.get(profile.uuid, 0) }) #data[profile.uuid].append({ "key": "focus", "layer": "User", "value": focusScores.get(profile.uuid, 0) }) #data[profile.uuid].append({ "key": "activity", "layer": "averageLow", "value": max(0, averages[0] - stdDevs[0])}) data[profile.uuid].append({ "key": "social", "layer": "averageLow", "value": max(0, averages[1] - stdDevs[1]) }) #data[profile.uuid].append({ "key": "focus", "layer": "averageLow", "value": max(0, averages[2] - stdDevs[2]) }) #data[profile.uuid].append({ "key": "activity", "layer": "averageHigh", "value": min(averages[0] + stdDevs[0], 10) }) data[profile.uuid].append({ "key": "social", "layer": "averageHigh", "value": min(averages[1] + stdDevs[1], 10) }) #data[profile.uuid].append({ "key": "focus", "layer": "averageHigh", "value": min(averages[2] + stdDevs[2], 10) }) data[profile.uuid].append({ "key": "regularity", "layer": "User", "value": focusScores.get(profile.uuid, 0) }) data[profile.uuid].append({ "key": "regularity", "layer": "averageLow", "value": max(0, averages[2] - stdDevs[2]) }) data[profile.uuid].append({ "key": "regularity", "layer": "averageHigh", "value": min(averages[2] + stdDevs[2], 10) }) data[profile.uuid].append({ "key": "physical activity", "layer": "User", "value": activityScores.get(profile.uuid, 0) }) data[profile.uuid].append({ "key": "physical activity", "layer": "averageLow", "value": max(0, averages[0] - stdDevs[0])}) data[profile.uuid].append({ "key": "physical activity", "layer": "averageHigh", "value": min(averages[0] + stdDevs[0], 10) }) internalDataStore.saveAnswer("socialhealth", data[profile.uuid]) # After we're done, re-compute the time graph data to include zeros for blanks # not ideal to compute this twice, but it gets the job done recentActivityLevels(True) # Purposely excluding social and focus scores - blanks are includede in their calculations as blank could imply actual zeroes, rather than missing data #recentSocialLevels(True) #recentFocusLevels(True) return data
def aggregateForAllUsers(answerKey, timeRanges, aggregator, serviceId, includeBlanks = False, mean = None, dev = None): profiles = Profile.objects.all() aggregates = {} for profile in profiles: # NOTE: need a means of getting at a token for authorizing this task to run. For now, we're not checking anyway, so it's blank internalDataStore = getInternalDataStore(profile, "Living Lab", "Social Health Tracker", "") # if mean is None or dev is None: data = aggregateForUser(internalDataStore, answerKey, timeRanges, aggregator, includeBlanks) # else: # data = aggregateForUser(profile, answerKey, timeRanges, aggregator, includeBlanks, mean.get(profile.uuid), dev.get(profile.uuid)) if data is not None and len(data) > 0: aggregates[profile.uuid] = data return aggregates
def aggregateForAllUsers(answerKey, timeRanges, aggregator, serviceId, includeBlanks=False, mean=None, dev=None): profiles = Profile.objects.all() aggregates = {} for profile in profiles: # NOTE: need a means of getting at a token for authorizing this task to run. For now, we're not checking anyway, so it's blank internalDataStore = getInternalDataStore(profile, "Living Lab", "Social Health Tracker", "") # if mean is None or dev is None: data = aggregateForUser(internalDataStore, answerKey, timeRanges, aggregator, includeBlanks) # else: # data = aggregateForUser(profile, answerKey, timeRanges, aggregator, includeBlanks, mean.get(profile.uuid), dev.get(profile.uuid)) if data is not None and len(data) > 0: aggregates[profile.uuid] = data return aggregates
def recentGfsaScores(): for profile in Profile.objects.all(): ids = getInternalDataStore(profile, "Living Lab", "gfsa", "") recentGfsaScore(ids)
def findRecentPlaceBounds(recentPlaceKey, timeRanges, numPlaces=1, answerKey="RecentPlaces"): profiles = Profile.objects.all() data = {} for profile in profiles: # TODO: figure out how to get at a token here... internalDataStore = getInternalDataStore(profile, "Living Lab", "My Places", "") #dbName = profile.getDBName() #collection = connection[dbName]["funf"] locations = [] # An explanation for why we're doing things the way we are below # (there are a few obvious strategies for finding places in location data): # 1) Naive approach - take all location samples in all time ranges, find clusters within them, # take the one with the most points in it. # 2) Faster, but more complicated - do 1) for each time range individually to get candidate regions. # Loop over candidate regions, collapsing and "voting" for those that overlap. Take the one with the most votes. # Notes: This is essentially 2-levels of clustering with the simplification that overlapping regions would # have been clustered together anyway (ie; bounding boxes should be similar, but not the same, as strategy 1) # Pros: Faster - each clustering is limited to 100 entries. In practice, this is more than enough. # If this poses an issue, time ranges can be chosen more carefully (more / shorter time ranges) # Cons: Bounding boxes aren't the same as 1). In particular, two candidate boxes may not overlap, but should # have been clustered together anyway. # 3) Binning pre-process - Same as 1), but perform a binning pre-process on the location data, collapsing multiple # samples into single entries, with associaated weights. # Notes: This is essentially a lower-resolution version of strategy 1. Bounding boxes should be lower-resolution # versions of those from strategy 1. # Pros: Bounding boxes should be the same as #1. Takes into account all entries when clustering. # Cons: Less fine-grained control over the number of entries per cluster than #2. In particular, for sparse # location data, this may not reduce the number of entries we must cluster. # The following is an implementation of method #2: potentialRegions = [] #pdb.set_trace() for timeRange in timeRanges: # NOTE: is a limit on the number of entries still necessary, if we're choosing the timeRanges carefully? values = [entry["value"] for entry in internalDataStore.getData("LocationProbe", timeRange[0], timeRange[1]) or []] # Use all locations except the most gratuitously inaccurate ones values = [value for value in values if float(value["maccuracy"]) < 100] clusters = clusterFunfLocations(values, 100) if (len(clusters) > 0): #clusters.sort(key = lambda cluster: -len(cluster)) #topClusters = clusters[:min(len(clusters), numPlaces)] clusterLocations = max(clusters, key= lambda cluster: len(cluster)) if isinstance(clusterLocations, list): lats = [loc[0] for loc in clusterLocations] longs = [loc[1] for loc in clusterLocations] if min(lats) != max(lats) and min(longs) != max(longs): #Only add regions that aren't degenerate (single points) potentialRegions.append([min(lats), min(longs), max(lats), max(longs)]) if len(potentialRegions) > 0: overlaps = [{ "region": r1, "overlapList": [r2 for r2 in potentialRegions if r2 is not r1 and boundsOverlap(r1, r2)]} for r1 in potentialRegions] reduced = [{ "region": reduce(lambda r1, r2: mergeBoxes(r1,r2), r["overlapList"], r["region"]), "votes": len(r["overlapList"])} for r in overlaps] reduced.sort(key = lambda r: -r["votes"]) final = [] for r in reduced: if not listContainsOverlap([f["region"] for f in final], r["region"]): final.append(r) mostOverlap = final[:min(len(final), numPlaces)] mostVoted = [r["region"] for r in mostOverlap] if numPlaces == 1: mostVoted = mostVoted[0] answer = internalDataStore.getAnswerList(answerKey) answer = answer[0]["value"] if answer.count() > 0 else [] data[profile.uuid] = [datum for datum in answer if datum["key"] != recentPlaceKey] data[profile.uuid].append({ "key": recentPlaceKey, "bounds": mostVoted}) answer = data[profile.uuid] internalDataStore.saveAnswer(answerKey, answer) return data
def recentSocialHealthScores2(): profiles = Profile.objects.all() startTime = getStartTime(6, True) currentTime = time.time() timeRanges = [ (start, start + 3600 * 4) for start in range(int(startTime), int(currentTime), 3600 * 4) ] sums = {"activity": 0, "social": 0, "focus": 0} activeUsers = [] data = {} for profile in profiles: token = getToken(profile, "app-uuid") internalDataStore = getInternalDataStore(profile, "Living Lab", "Social Health Tracker", token) activityLevels = aggregateForUser(internalDataStore, "RecentActivityByHour", timeRanges, activityForTimeRange, False) if len(activityLevels) > 0: socialLevels = aggregateForUser(internalDataStore, "RecentSocialByHour", timeRanges, socialForTimeRange, True) focusLevels = aggregateForUser(internalDataStore, "RecentFocusByHour", timeRanges, focusForTimeRange, True) activityScore = computeActivityScore(activityLevels) socialScore = computeSocialScore(socialLevels) focusScore = computeFocusScore(focusLevels) sums["activity"] += activityScore sums["social"] += socialScore sums["focus"] += focusScore activeUsers.append(profile) data[profile.uuid] = {} data[profile.uuid]["user"] = { "activity": activityScore, "social": socialScore, "focus": focusScore } numUsers = len(activeUsers) if numUsers > 0: averages = {k: sums[k] / numUsers for k in sums} variances = { k: [(data[p.uuid]["user"][k] - averages[k])**2 for p in activeUsers] for k in averages } stdDevs = { k: math.sqrt(sum(variances[k]) / len(variances[k])) for k in variances } for profile in activeUsers: token = getToken(profile, "app-uuid") internalDataStore = getInternalDataStore(profile, "Living Lab", "Social Health Tracker", token) data[profile.uuid]["averageLow"] = { k: max(0, averages[k] - stdDevs[k]) for k in stdDevs } data[profile.uuid]["averageHigh"] = { k: min(averages[k] + stdDevs[k], 10) for k in stdDevs } internalDataStore.saveAnswer("socialhealth", data[profile.uuid]) return data
def recentSocialHealthScores(): profiles = Profile.objects.all() data = {} activityScores = recentActivityScore() socialScores = recentSocialScore() focusScores = recentFocusScore() scoresList = [ activityScores.values(), socialScores.values(), focusScores.values() ] print scoresList # scoresList = [[d for d in scoreList if d > 0.0] for scoreList in scoresList] averages = [ sum(scores) / len(scores) if len(scores) > 0 else 0 for scores in scoresList ] variances = [ map(lambda x: (x - averages[i]) * (x - averages[i]), scoresList[i]) for i in range(len(scoresList)) ] stdDevs = [ math.sqrt(sum(variances[i]) / len(scoresList[i])) for i in range(len(scoresList)) ] activityStdDev = stdDevs[0] socialStdDev = stdDevs[1] focusStdDev = stdDevs[2] print "Averages (activity, social, focus):" print averages print "Standard Deviations (activity, social, focus):" print stdDevs for profile in [p for p in profiles if p.uuid in activityScores.keys()]: print "storing %s" % profile.uuid internalDataStore = getInternalDataStore(profile, "Living Lab", "Social Health Tracker", "") data[profile.uuid] = [] #pdb.set_trace() #data[profile.uuid].append({ "key": "activity", "layer": "User", "value": activityScores.get(profile.uuid, 0) }) data[profile.uuid].append({ "key": "social", "layer": "User", "value": socialScores.get(profile.uuid, 0) }) #data[profile.uuid].append({ "key": "focus", "layer": "User", "value": focusScores.get(profile.uuid, 0) }) #data[profile.uuid].append({ "key": "activity", "layer": "averageLow", "value": max(0, averages[0] - stdDevs[0])}) data[profile.uuid].append({ "key": "social", "layer": "averageLow", "value": max(0, averages[1] - stdDevs[1]) }) #data[profile.uuid].append({ "key": "focus", "layer": "averageLow", "value": max(0, averages[2] - stdDevs[2]) }) #data[profile.uuid].append({ "key": "activity", "layer": "averageHigh", "value": min(averages[0] + stdDevs[0], 10) }) data[profile.uuid].append({ "key": "social", "layer": "averageHigh", "value": min(averages[1] + stdDevs[1], 10) }) #data[profile.uuid].append({ "key": "focus", "layer": "averageHigh", "value": min(averages[2] + stdDevs[2], 10) }) data[profile.uuid].append({ "key": "regularity", "layer": "User", "value": focusScores.get(profile.uuid, 0) }) data[profile.uuid].append({ "key": "regularity", "layer": "averageLow", "value": max(0, averages[2] - stdDevs[2]) }) data[profile.uuid].append({ "key": "regularity", "layer": "averageHigh", "value": min(averages[2] + stdDevs[2], 10) }) data[profile.uuid].append({ "key": "physical activity", "layer": "User", "value": activityScores.get(profile.uuid, 0) }) data[profile.uuid].append({ "key": "physical activity", "layer": "averageLow", "value": max(0, averages[0] - stdDevs[0]) }) data[profile.uuid].append({ "key": "physical activity", "layer": "averageHigh", "value": min(averages[0] + stdDevs[0], 10) }) internalDataStore.saveAnswer("socialhealth", data[profile.uuid]) # After we're done, re-compute the time graph data to include zeros for blanks # not ideal to compute this twice, but it gets the job done recentActivityLevels(True) # Purposely excluding social and focus scores - blanks are includede in their calculations as blank could imply actual zeroes, rather than missing data #recentSocialLevels(True) #recentFocusLevels(True) return data