def join_on_field(self, other_getter, map_fxn=None, new_field_name=None, join_field='id', join_field_1=None, join_field_2=None, discard_orphans=True): """ Joins this getter to another by indexing on a field and applying a mapping function to generate new outputs discard_orphans : do we discard elements in one that don't match the other? """ if join_field != 'id' or join_field_1 or join_field_2: join_field_1 = join_field_1 or join_field join_field_2 = join_field_2 or join_field getter_by_join_field = {element[join_field_1]: element for element in self.fields} other_by_join_field = {element[join_field_2]: element for element in other_getter.fields} else: getter_by_join_field = self._fields_by_id other_by_join_field = other_getter._fields_by_id keys_in_both = set(getter_by_join_field) & set(other_by_join_field) # Run the mapping function if map_fxn: joined = [{ new_field_name: map_fxn(getter_by_join_field[key], other_by_join_field[key]), # join field is same in both, by definition join_field: getter_by_join_field[key][join_field] } for key in keys_in_both] # If no mapping function, just take all existing fields else: joined = [merge_dicts(getter_by_join_field[key], other_by_join_field[key]) for key in keys_in_both] if not discard_orphans: # Append the orphans # Note that this may cause problems if the # mapping function introduced any new fields [joined.append(getter_by_join_field[key]) for key in set(getter_by_join_field) - keys_in_both] [joined.append(other_by_join_field[key]) for key in set(other_by_join_field) - keys_in_both] # Return a new getter return self.from_fields(joined)
def get_top_friends_and_groups(results, user): """ This takes input from PhotosOfMe and TaggedWithMe, TaggedWithThisYear, and TopPostersFromYear For convenience, it fires off the group that depends on PhotosOfMe """ # Flatten the results returned from the group results = merge_dicts(*results) on_photos_of_me = group([ get_photos_by_year.subtask((results,user,)) | get_top_albums.subtask((user,)) | get_top_albums_photos.subtask((user,)) ]) on_photos_of_me_async = on_photos_of_me.apply_async() top_friends = results['most_tagged_recently'].join_on_field( results['top_posters_from_year'], map_fxn=lambda x, y: x['count'] + y['count'], new_field_name='count', discard_orphans=False ) # Get ids of gf/bf and immediate family family_ids = [] for family_member in user.family.all(): if family_member.relationship in IMMEDIATE_FAMILY: family_ids.append(family_member.facebook_id) # For each top friend, pull the photos they are tagged in # Gf/bf and immediate family to the front, the rest in top friends order top_friend_photos = [] pulled_gfbf = False pulled_gfbf_family = 0 for friend in top_friends.order_by('count'): friend_tags = results['tagged_with_me']['tagged_with_me'].filter(lambda x: x['subject']==friend['id']) if len(friend_tags) > TOP_FRIEND_MIN_PHOTOS: # Perform a join on `photos_of_me` to get the photo scores, # and sort by year, then score friend_photos = friend_tags.join_on_field(results['photos_of_me'], join_field_1='object_id')\ .get_in_decending_year_score_order() if len(friend_tags) != len(friend_photos): logger.warn('Received a top friend photo that wasn\'t in \'photos_of_me\'. Odd.') # Bring photos of gf/bf and immediate family to the front if user.profile.significant_other_id and friend['id'] == user.profile.significant_other_id: top_friend_photos.insert(0, friend_photos) pulled_gfbf_family += 1 pulled_gfbf = True elif friend['id'] in family_ids and pulled_gfbf_family < NUM_GFBF_FAMILY_FIRST: if pulled_gfbf: # Insert behind their gfbf top_friend_photos.insert(1, friend_photos) else: top_friend_photos.insert(0, friend_photos) pulled_gfbf_family += 1 else: top_friend_photos.append(friend_photos) # For each group photo, grab its score from 'photos_of_me' # and filter to photos from this year group_photos = [] for group_photo in results['tagged_with_me']['group_photos'].fields: group_photo_id = group_photo['id'] if group_photo_id in results['photos_of_me'].fields_by_id: group_photos.append(results['photos_of_me'].fields_by_id[group_photo_id]) else: logger.warn('Received a group photo %s that wasn\'t in \'photos_of_me\'. Odd.' % group_photo_id ) # Sort by year, score group_photos_getter = ResultGetter.from_fields(group_photos) group_shots = group_photos_getter.get_in_decending_year_score_order() # Return the lists and the subtask results['top_friends'] = top_friend_photos results['group_shots'] = group_shots results['on_photos_of_me_async'] = on_photos_of_me_async return results
def run_book(user, results): runtime_start = time.time() task_id = current_task.request.id import pdb pdb.set_trace() # See if user has a yearbook # try: # yearbook = Yearbook(owner=user) # except Yearbook.DoesNotExist: # Run the top friends task # Run separate, async tasks to facebook # "rt.s" == "run_task.subtask" fql_job = group([ rt.subtask(kwargs={'task_cls': PhotosOfMeTask, 'end_time': UNIX_THIS_YEAR_END, 'user_id': user.id, }), rt.subtask(kwargs={'task_cls': CommentsOnPhotosOfMeTask, 'end_time': UNIX_THIS_YEAR_END, 'user_id': user.id, }), rt.subtask(kwargs={'task_cls': OwnerPostsFromYearTask, 'user_id': user.id, }), rt.subtask(kwargs={'task_cls': OthersPostsFromYearTask, 'user_id': user.id, }), rt.subtask(kwargs={'task_cls': FamilyTask, 'user_id': user.id, }), ]) # fql_job = group([ # rt.s(kwargs={'task_cls': PhotosOfMeTask, 'end_time': UNIX_THIS_YEAR_END, 'user_id': user.id, 'parent_id': task_id}, # link=update_task_state.s(kwargs={'uuid': task_id, 'current_task': current_task})), # rt.s(kwargs={'task_cls': CommentsOnPhotosOfMeTask, 'end_time': UNIX_THIS_YEAR_END, 'user_id': user.id, 'parent_id': task_id}), # rt.s(kwargs={'task_cls': OwnerPostsFromYearTask, 'user_id': user.id, 'parent_id': task_id}), # rt.s(kwargs={'task_cls': OthersPostsFromYearTask, 'user_id': user.id, 'parent_id': task_id}), # rt.s(kwargs={'task_cls': FamilyTask, 'user_id': user.id, 'parent_id': task_id}), # ]) job_async = fql_job.apply_async() job_results = job_async.get() results = merge_dicts(results, *job_results) ## Results contains # 'get_friends' all friends (already saved to db) # 'tagged_with_me' `subject, object_id, created` from tags of photos I am in # 'comments_on_photos_of_me' # 'others_posts_from_year' # 'owner_posts_from_year' # 'photos_of_me' # Toss any results in 'tagged_with_me' that aren't in 'photos_of_me' results['tagged_with_me'] = results['tagged_with_me'].filter( lambda x: x['object_id'] in results['photos_of_me'].ids ) # Get number of people in each photo num_tags_by_photo_id = FreqDistResultGetter(results['tagged_with_me'], id_field='object_id') comments_by_photo_id = defaultdict(list) comments_score_by_user_id = defaultdict(lambda: 0) for comment in results['comments_on_photos_of_me']: # Get the comments in each photo comments_by_photo_id[comment['object_id']].append(comment) # Get the number of commments by each user, discounted by year comments_score_by_user_id[comment['fromid']] += \ TOP_FRIEND_POINTS_FOR_PHOTO_COMMENT / max((THIS_YEAR.year - comment['time'].year + 1.0), 1.0) # Save the photos to the database photos_of_me = [] for photo in results['photos_of_me']: photo_db = FacebookPhoto( facebook_id = photo['id'], created = photo['created'], people_in_photo = num_tags_by_photo_id.fields_by_id[photo['id']]['count'] + 1 \ if photo['id'] in num_tags_by_photo_id.ids else 0, height = photo['height'], width = photo['width'], fb_url = photo['fb_url'], comments = comments_by_photo_id[photo['id']], # it's a defaultdict caption = photo['caption'] ) photos_of_me.append(photo_db) # Save photos, profile fields, and family to db save_to_db_async = save_to_db.delay(user, results['family'], photos_of_me) ## Calculate top friends # Combine the lists of posts all_posts_this_year = ResultGetter.from_fields(itertools.chain( results['others_posts_from_year'], results['owner_posts_from_year'], )) # Strip posts that have an attachment that is a photo? # .filter(lambda x: 'attachment' in x and 'fb_object_type' in x['attachment'] and x['attachment']) # Assign each friend points for each post they made posts_score_by_user_id = defaultdict(lambda: 0) for post in all_posts_this_year: # if 'score' not in post: # post['score'] = 0 # post['score'] += TOP_FRIEND_POINTS_FOR_POST posts_score_by_user_id[post['actor_id']] += TOP_FRIEND_POINTS_FOR_POST # Calculate photo score for each user, discounted by year tags_by_user_id = defaultdict(list) for tag in results['tagged_with_me']: tags_by_user_id[tag['subject']].append(tag) photos_score_by_user_id = defaultdict(lambda: 0.0) for friend_id, tag_list in tags_by_user_id.iteritems(): for tag in tag_list: photo_id = tag['object_id'] peeps_in_photo = num_tags_by_photo_id.fields_by_id[photo_id]['count'] + 1 # num tags + me photo = results['photos_of_me'].fields_by_id[photo_id] # photo_age = 2012 - photo['created'].year + 1.0 photo_age = datetime.date.today().year - photo['created'].year + 1.0 if peeps_in_photo == 2: photos_score_by_user_id[friend_id] += TOP_FRIEND_POINTS_FOR_PHOTO_OF_2 / photo_age elif peeps_in_photo == 3: photos_score_by_user_id[friend_id] += TOP_FRIEND_POINTS_FOR_PHOTO_OF_3 / photo_age elif peeps_in_photo >= 4: photos_score_by_user_id[friend_id] += TOP_FRIEND_POINTS_FOR_PHOTO_OF_4 / photo_age # Add em up top_friend_ids = (set(comments_score_by_user_id) | set(posts_score_by_user_id) | set(photos_score_by_user_id)) top_friend_ids.remove(user.profile.facebook_id) top_friend_score_by_id = { friend_id: comments_score_by_user_id[friend_id] + posts_score_by_user_id[friend_id] + photos_score_by_user_id[friend_id] for friend_id in top_friend_ids } top_20_friends_score_by_id = dict(sorted(top_friend_score_by_id.iteritems(), key=lambda x: x[1], reverse=True)[:20]) ## Calculate top photos # For each photo, get the number of top friends in the photo num_top_friends_by_photo_id = defaultdict(lambda: 0) for tag in results['tagged_with_me']: if tag['subject'] in top_friend_ids: points = 1 # Double points if the user in top-20 # TODO: does this make sense? if tag['subject'] in top_20_friends_score_by_id: points += 1 num_top_friends_by_photo_id[tag['object_id']] += points # Photos of all time top_photo_score_by_id = {} for photo in results['photos_of_me']: # How many comments by friends of mine? comments_from_friends = 0 for comment in comments_by_photo_id[photo['id']]: if comment['fromid'] in results['get_friends'].ids: comments_from_friends += 1 score = ((TOP_PHOTO_POINTS_FOR_TOP_FRIENDS * num_top_friends_by_photo_id[photo['id']] + TOP_PHOTO_POINTS_FOR_COMMENT * comments_from_friends + TOP_PHOTO_POINTS_FOR_LIKE * photo['like_count']) / max(num_tags_by_photo_id.fields_by_id[photo['id']]['count'] - 2.0, 1.0) if photo['id'] in num_tags_by_photo_id.fields_by_id else 1) top_photo_score_by_id[photo['id']] = photo['score'] = score # Update list to have scores tags_by_user_id = defaultdict(list) for tag in results['tagged_with_me']: tags_by_user_id[tag['subject']].append(tag) ## Calculate top group photos # group_photos_this_year is only 1 for me group_photos = results['photos_of_me'] \ .filter(lambda x: x['id'] in num_tags_by_photo_id.fields_by_id) \ .filter(lambda x: num_tags_by_photo_id.fields_by_id[x['id']]['count'] >= GROUP_PHOTO_IS)\ .filter(lambda x: x['created'] > GROUP_PHOTO_CUTOFF) group_photo_score_by_id = {} for photo in group_photos: score = GROUP_PHOTO_POINTS_FOR_TOP_FRIENDS * num_top_friends_by_photo_id[photo['id']] +\ GROUP_PHOTO_POINTS_FOR_COMMENT * photo['comment_count'] +\ GROUP_PHOTO_POINTS_FOR_LIKE * photo['like_count'] group_photo_score_by_id[photo['id']] = {'score': score, 'created': photo['created']} ## Calculate top albums album_score_and_date_by_id = defaultdict(lambda: {'score': 0, 'created': None}) for photo in results['photos_of_me']: album_score_and_date_by_id[photo['album_object_id']]['score'] += photo['score'] # Also tag with the date album_score_and_date_by_id[photo['album_object_id']]['created'] = photo['created'] ## Calculate top post for post in all_posts_this_year: top_friend_comments = 0 for comment in post['comments']['comment_list']: if comment['fromid'] in top_friend_ids: top_friend_comments += 1 post['score'] = \ (COMMENT_POINTS_FOR_MADE_BY_ME * 1 if post['actor_id'] == user.profile.facebook_id else 0) +\ COMMENT_POINTS_FOR_COMMENT * top_friend_comments + \ COMMENT_POINTS_FOR_LIKE * post['like_count'] ## Pull out birthday posts birthday_posts = [] if user.profile.date_of_birth: birthday = user.profile.date_of_birth birthday_this_year = datetime.datetime(2012, birthday.month, birthday.day, 0, 0, 0, tzinfo=utc) start_time = birthday_this_year - datetime.timedelta(days=1) end_time = birthday_this_year + datetime.timedelta(days=3) birthday_posts = all_posts_this_year.filter( lambda x: start_time < x['created_time'] < end_time and x['message'] and x['actor_id'] in results['get_friends'].ids ) ## Save fields to the PhotoRankings class rankings = PhotoRankings(user=user) # rankings, created = PhotoRankings.objects.get_or_create(user=user) top_photos_this_year = results['photos_of_me'].filter(lambda x: x['created'] > THIS_YEAR)\ .order_by('score') rankings.top_photos = top_photos_this_year rankings.group_shots = [ k for k, v in sorted( group_photo_score_by_id.iteritems(), # Sort by year, score key=lambda x: (x[1]['created'].year, x[1]['score']), reverse=True ) ] rankings.top_posts = all_posts_this_year.order_by('score')[:10] # Back in time max_year, photos_of_me_by_year = results['photos_of_me'].bucket_by_year() years = list(sorted(photos_of_me_by_year.iterkeys(), reverse=True)) back_in_time = [] for index, year in enumerate(years[1:NUM_PREV_YEARS + 1]): year_photo_ids = [] for photo in photos_of_me_by_year[year].order_by('score'): year_photo_ids.append(photo['id']) back_in_time.append(year_photo_ids) rankings.back_in_time = back_in_time ## Assign photos to the Yearbook, avoiding duplicates # try: # old_yb = Yearbook.objects.get(rankings=rankings) # old_yb.delete() # except Yearbook.DoesNotExist: pass yb = Yearbook(rankings=rankings) yb.top_post = 0 yb.birthday_posts = list(birthday_posts.fields) yb.top_photo_1 = yb.get_first_unused_photo_landscape(rankings.top_photos) # landscape yb.top_photo_2 = yb.get_first_unused_photo(rankings.top_photos) yb.top_photo_3 = yb.get_first_unused_photo(rankings.top_photos) yb.top_photo_4 = yb.get_first_unused_photo(rankings.top_photos) yb.top_photo_5 = yb.get_first_unused_photo(rankings.top_photos) # `assign_group_photos` uses FacebookPhoto classes to determine portrait/landscape # make sure they finished saving to the db # print('save_to_db state: %s' % save_to_db_async.state) save_to_db_async.get() # Assign the group photos from different albums, if possible # Make one pass assigning from different albums, # then a second filling in the gaps assigned_group_photos = assign_group_photos(yb, rankings, results['photos_of_me'], do_unique_albums=True) if assigned_group_photos < NUM_GROUP_PHOTOS: assign_group_photos(yb, rankings, results['photos_of_me'], do_unique_albums=False) ## Top friends # Do this after we assign the top photos and top group photos, # so we can make sure there are enough unused photos of them # We need to make sure the user exists in the db # Users that came back from the db are still in results['get_friends'] saved_friends_ids = results['get_friends'].ids family_ids = user.family.all().values_list('facebook_id', flat=True) top_friend_ids = [] gfbf_added = False for user_id, score in sorted(top_friend_score_by_id.iteritems(), key=lambda x: x[1], reverse=True): if yb.num_unused_photos(tags_by_user_id[user_id]) >= TOP_FRIEND_MIN_UNUSED_PHOTOS and user_id in saved_friends_ids: # If user is family or gfbf, insert at front if user_id == user.profile.significant_other_id: top_friend_ids.insert(0, user_id) gfbf_added = True elif user_id in family_ids: top_friend_ids.insert(1 if gfbf_added else 0, user_id) else: top_friend_ids.append(user_id) # Need to build another list that combines tag and photo score rankings.top_friends_ids = top_friend_ids[:NUM_TOP_FRIENDS_STORED] top_friends_photos = [] for friend_id in top_friend_ids: friend_tags = tags_by_user_id[friend_id] top_friend_photos = [] for tag in friend_tags: tag_id = tag['object_id'] photo = results['photos_of_me'].fields_by_id[tag_id] top_friend_photos.append({'id': tag_id, 'score': top_photo_score_by_id[tag_id], 'width': photo['width'], 'height': photo['height']}) top_friend_photos = list(sorted(top_friend_photos, key=lambda x: x['score'], reverse=True)) top_friends_photos.append(top_friend_photos) rankings.top_friends_photos = top_friends_photos ## Assign the top friends # used_albums = [] for index in range(NUM_TOP_FRIENDS): # Index setattr(yb, 'top_friend_%d' % (index + 1), index) # Friend stat if top_friend_ids[index] == user.profile.significant_other_id: friend_stat = SIGNIFICANT_OTHER_STAT elif top_friend_ids[index] in family_ids: friend_stat = FAMILY_STAT else: num_tags = len(rankings.top_friends_photos[index]) friend_stat = 'Tagged in %d photo%s with you' % (num_tags, 's' if num_tags > 1 else '') setattr(yb, 'top_friend_%d_stat' % (index + 1), friend_stat) # Set photo # tf_photo_index = yb.get_first_unused_photo(rankings.top_friends_photos[index]) tf_photo_index = yb.get_first_unused_photo_landscape(rankings.top_friends_photos[index]) setattr(yb, 'top_friend_%d_photo_1' % (index + 1), tf_photo_index) # If photo was portrait, grab another one # tf_photo_id = rankings.top_friends_photos[index][tf_photo_index]['id'] # tf_photo = results['photos_of_me'].fields_by_id[tf_photo_id] # if tf_photo['width'] / float(tf_photo['height']) < HIGHEST_SQUARE_ASPECT_RATIO: # tf_photo_index_2 = yb.get_first_unused_photo(rankings.top_friends_photos[index]) # setattr(yb, 'top_friend_%d_photo_2' % (index + 1), tf_photo_index_2) ## Top albums # Start pulling album names, photos # Can't pickle defaultdict? so just call it here, wouldn't save us much time anyway # pull_albums_async = pull_album_photos.delay(user, album_score_and_date_by_id) # album_photos_by_score, albums_ranked = pull_albums_async.get() album_photos_by_score, albums_ranked = pull_album_photos(user, album_score_and_date_by_id, album_photo_score) rankings.top_albums_photos = album_photos_by_score rankings.top_albums_ranked = albums_ranked albums_assigned = 0 all_top_albums = rankings.top_albums_photos[:] curr_album_index = -1 while all_top_albums: curr_album = all_top_albums.pop(0) curr_album_index += 1 photos_to_show = [] no_more_pics_of_user = False while True: if len(photos_to_show) < PICS_OF_USER_TO_PROMOTE and not no_more_pics_of_user: # Want a pic of the user, loop through album photos looking for one photo_of_user = get_next_unused_photo_of_user( yb, curr_album, results['photos_of_me'], used_indices=photos_to_show ) if photo_of_user: photos_to_show.append(photo_of_user) else: # No more pics of user, just take the next highest unused photo no_more_pics_of_user = True else: next_photo = yb.get_first_unused_photo(curr_album, used_indices=photos_to_show) if next_photo is not None: photos_to_show.append(next_photo) else: # No photos left, break break if len(photos_to_show) >= ALBUM_PHOTOS_TO_SHOW: break if len(photos_to_show) < ALBUM_MIN_PHOTOS: # Didn't have enough photos, try the next album continue # Save the fields album_str = 'top_album_%d' % (albums_assigned + 1) setattr(yb, album_str, curr_album_index) for field_num in range(len(photos_to_show)): setattr(yb, album_str + '_photo_%d' % (field_num + 1), photos_to_show[field_num]) albums_assigned += 1 if albums_assigned >= NUM_TOP_ALBUMS: break ## Throughout the year photos yb.year_photo_1 = yb.get_first_unused_photo_landscape(rankings.top_photos) yb.year_photo_2 = yb.get_first_unused_photo(rankings.top_photos) yb.year_photo_6 = get_unused_if_portrait(yb.year_photo_2, rankings.top_photos, yb, results['photos_of_me']) yb.year_photo_3 = yb.get_first_unused_photo(rankings.top_photos) yb.year_photo_7 = get_unused_if_portrait(yb.year_photo_3, rankings.top_photos, yb, results['photos_of_me']) yb.year_photo_4 = yb.get_first_unused_photo(rankings.top_photos) yb.year_photo_8 = get_unused_if_portrait(yb.year_photo_4, rankings.top_photos, yb, results['photos_of_me']) yb.year_photo_5 = yb.get_first_unused_photo(rankings.top_photos) yb.year_photo_9 = get_unused_if_portrait(yb.year_photo_5, rankings.top_photos, yb, results['photos_of_me']) ## Back in time photos years_to_show = [] for year_index, year in enumerate(back_in_time): curr_year_unused = yb.get_first_unused_photo(year) if curr_year_unused is None: continue years_to_show.append({'year_index': year_index, 'photo_index': curr_year_unused}) if len(years_to_show) > NUM_PREV_YEARS: break # Special case: if only found one year, pull an additional photo from that year if len(years_to_show) == 1: that_year_index = years_to_show[0]['index'] unused_photo_2 = yb.get_first_unused_photo(back_in_time[that_year_index]) if unused_photo_2 is not None: years_to_show.append({'year_index': that_year_index, 'photo_index': unused_photo_2}) # Save for year_num in range(len(years_to_show)): field_str = 'back_in_time_%d' % (year_num + 1) setattr(yb, field_str, years_to_show[year_num]['year_index']) setattr(yb, field_str + '_photo_1', years_to_show[year_num]['photo_index']) # Save everything rankings.save() yb.rankings = rankings yb.run_time = time.time() - runtime_start yb.save() # Log the yearbook run time to mixpanel tracker.delay('Book Created', properties={ 'distinct_id': user.username, 'mp_name_tag': user.username, 'time': time.time(), 'Book': 'Yearbook 2012', 'Run Time (sec)': '%.1f' % yb.run_time }) # Initiate a task to start downloading user's yearbook phointos? return yb