def fanout_to_followers(cls, tweet): newsfeeds = [ NewsFeed(user=follower, tweet=tweet) for follower in FriendshipService.get_followers(tweet.user) ] newsfeeds.append(NewsFeed(user=tweet.user, tweet=tweet)) NewsFeed.objects.bulk_create(newsfeeds)
def fanout_to_followers(cls, tweet): newsfeeds = [ NewsFeed(user=follower, tweet=tweet) for follower in FriendshipService.get_followers(tweet.user) ] newsfeeds.append(NewsFeed(user=tweet.user, tweet=tweet)) NewsFeed.objects.bulk_create(newsfeeds) # bulk create 不会触发 post_save 的 signal,所以需要手动 push 到 cache 里 for newsfeed in newsfeeds: cls.push_newsfeed_to_cache(newsfeed)
def fan_out_to_followers(cls, tweet): followers = FriendshipService.get_followers(tweet.user) newsfeeds = [ NewsFeed(tweet=tweet, user=follower, created_at=tweet.created_at) for follower in followers ] #should be able to see own posts newsfeeds.append( NewsFeed(tweet=tweet, user=tweet.user, created_at=tweet.created_at)) NewsFeed.objects.bulk_create(newsfeeds)
def fanout_to_followers(self, tweet): # 错误的方法 # 不可以将数据库操作放在 for 循环里面,效率会非常低 # for follower in FriendshipService.get_followers(tweet.user): # NewsFeed.objects.create( # user=follower, # tweet=tweet, # ) # 正确的方法:使用 bulk_create,会把 insert 语句合成一条 newsfeeds = [ NewsFeed(user=follower, tweet=tweet) for follower in FriendshipService.get_followers(tweet.user) ] newsfeeds.append(NewsFeed(user=tweet.user, tweet=tweet)) NewsFeed.objects.bulk_create(newsfeeds)
def batch_create(cls, batch_params): # 错误的方法 # 不可以将数据库操作放在 for 循环里面,效率会非常低 # for follower in FriendshipService.get_followers(tweet.user): # NewsFeed.objects.create( # user=follower, # tweet=tweet, # ) # 正确的方法:使用 bulk_create,会把 insert 语句合成一条 # newsfeeds = [ # NewsFeed(user_id=follower_id, tweet_id=tweet_id) # for follower_id in follower_ids # ] # NewsFeed.objects.bulk_create(newsfeeds) if GateKeeper.is_switch_on('switch_newsfeed_to_hbase'): newsfeeds = HBaseNewsFeed.batch_create(batch_params) else: newsfeeds = [NewsFeed(**params) for params in batch_params] NewsFeed.objects.bulk_create(newsfeeds) # bulk create 不会触发 post_save 的 signal,所以需要手动 push 到 cache 里 for newsfeed in newsfeeds: NewsFeedService.push_newsfeed_to_cache(newsfeed) return newsfeeds
def batch_create(cls, batch_params): if GateKeeper.is_switch_on('switch_newsfeed_to_hbase'): newsfeeds = HBaseNewsFeed.batch_create(batch_params) else: newsfeeds = [NewsFeed(**params) for params in batch_params] NewsFeed.objects.bulk_create(newsfeeds) # bulk create 不会触发 post_save 的 signal,所以需要手动 push 到 cache 里 for newsfeed in newsfeeds: NewsFeedService.push_newsfeed_to_cache(newsfeed) return newsfeeds
def fanout_to_followers(cls, tweet): # Wrong way to do the creation: # cannot put database op in a for loop, very inefficient # for follower in FriendshipService.get_followers(tweet.user): # NewsFeed.objects.create( # user=follower, # tweet=tweet, # ) # 正确的方法:使用 bulk_create,会把 insert 语句合成一条 # Right way: use bulk_create, which combines all insert SQLs into one # line newsfeeds = [ # Request for update will not be made until .save() NewsFeed(user=follower, tweet=tweet) for follower in FriendshipService.get_followers(tweet.user) ] newsfeeds.append(NewsFeed(user=tweet.user, tweet=tweet)) NewsFeed.objects.bulk_create(newsfeeds) # create newsfeeds in a batch
def fanout_to_followers(cls, tweet): # 错误的方法 # 在production 是不允许for + query的 效率很低 # 原因是 web server和 db通常不在一台机器上 多次查询会产生大量延迟 (round trip多 -- 用户信息的验证) # for follower in FriendshipService.get_followers(tweet.user): # NewsFeed.objects.create( # user=follower, # tweet=tweet, # ) # 正确的方法: 使用bulk_create, 会把insert语句合成一条 newsfeeds = [ NewsFeed(user=follower, tweet=tweet) # 由于此处没有.save 所以不会产生数据库存储请求操作 for follower in FriendshipService.get_followers(tweet.user) ] # 由于用户自己不是自己的follower # 此处是把用户本身也加入到follower中使得自己也可以看到自己的newsfeed newsfeeds.append(NewsFeed(user=tweet.user, tweet=tweet)) NewsFeed.objects.bulk_create(newsfeeds)
def fanout_newsfeeds_batch_task(tweet_id, follower_ids): from newsfeeds.services import NewsFeedService newsfeeds = [ NewsFeed(user_id=follower_id, tweet_id=tweet_id) for follower_id in follower_ids ] NewsFeed.objects.bulk_create(newsfeeds) for newsfeed in newsfeeds: NewsFeedService.push_newsfeed_to_cache(newsfeed) return "{} newsfeeds created".format(len(newsfeeds))
def populate_newsfeed_for_friendship(cls, friendship): # logging.error(friendship) from_user, to_user = friendship.from_user, friendship.to_user newsfeeds = [ NewsFeed(tweet=tweet, user=from_user, created_at=tweet.created_at) for tweet in Tweet.objects.filter(user=to_user) ] #create newsfeed in reverse order newsfeeds = newsfeeds[::-1] NewsFeed.objects.bulk_create(newsfeeds) for newsfeed in newsfeeds: cls.push_newsfeed_to_cache(newsfeed)
def fanout_newsfeeds_batch_task(tweet_id, follower_ids): from newsfeeds.services import NewsFeedService newsfeeds = [ NewsFeed(user_id=follower_id, tweet_id=tweet_id) for follower_id in follower_ids # notice here the tweet.user is the creator of tweet ] NewsFeed.objects.bulk_create(newsfeeds) # bulk create will not invoke post_save signal, manually push newsfeed into cache for newsfeed in newsfeeds: NewsFeedService.push_newsfeed_to_cache(newsfeed) return "{} newsfeeds created".format(len(newsfeeds))
def fanout_newsfeeds_batch_task(tweet_id, follower_ids): # import 写在里面避免循环依赖 from newsfeeds.services import NewsFeedService # 错误的方法:将数据库操作放在 for 循环里面,效率会非常低 -> # for follower_id in follower_ids: # NewsFeed.objects.create(user_id=follower_id, tweet_id=tweet_id) # 正确的方法:使用 bulk_create,会把 insert 语句合成一条 -> newsfeeds = [ NewsFeed(user_id=follower_id, tweet_id=tweet_id) for follower_id in follower_ids ] NewsFeed.objects.bulk_create(newsfeeds) # bulk create 不会触发 post_save 的 signal,所以需要手动 push 到 cache 里 for newsfeed in newsfeeds: NewsFeedService.push_newsfeed_to_cache(newsfeed) return "{} newsfeeds created".format(len(newsfeeds))
def fanout_newsfeeds_batch_task(tweet_id, follower_ids): # import written inside of method to avoid reference loop. from newsfeeds.services import NewsFeedService # wrong way: # can not put db operation in for loop. efficiency is super low and slow # for follower in FriendshipService.get_followers(tweet.user): # NewsFeed.objects.create( # user=follower, # tweet=tweet, # ) # correct way: use bulk_create, then insert once newsfeeds = [ NewsFeed(user_id=follower_id, tweet_id=tweet_id) for follower_id in follower_ids ] NewsFeed.objects.bulk_create(newsfeeds) # bulk create doesn't emit post_save signal, so, need to push each newsfeed to cache manually for newsfeed in newsfeeds: NewsFeedService.push_newsfeed_to_cache(newsfeed) return '{} newsfeeds created'.format(len(newsfeeds))