def save_source_user(is_update, user, user_recent_media): # Compute the additional values for the user, and a save as source user most_recent_engagement_rating = engagement_rating(user_recent_media[0: 1], user.followers) averaged_engagement_rating = engagement_rating(user_recent_media, user.followers) is_trending, trending_value = trending(user_recent_media, user.followers) locations = find_location(user_recent_media) recent_media_ids = map(lambda media: media.id, user_recent_media) updated_time = datetime.datetime.now() created_time = user.created_time if is_update else datetime.datetime.now() # In the even the column is Null because this field was added after initial data entry if created_time is None: created_time = datetime.datetime.now() source_user = SourceUserEntity(user_id=user.user_id, username=user.username, full_name=user.full_name, bio=user.bio, locations=locations, website=user.website, media_count=user.media_count, follows=user.follows, followers=user.followers, recent_media_ids=recent_media_ids, most_recent_engagement_rating=most_recent_engagement_rating, averaged_engagement_rating=averaged_engagement_rating, trending=is_trending, trending_value=trending_value, created_time=created_time, updated_time=updated_time) if is_update: print("UPDATE user: "******"INSERT user: " + repr(source_user)) source_user.save()
def open_cassandra_session(): setup_env() connection.setup([CASSANDRA_CONFIG['ip']], CASSANDRA_CONFIG['keyspace'], protocol_version=3) SourceUserEntity.sync_table() RawRecentMediaEntity.sync_table() RawUserEntity.sync_table() RawUserRecentMediaEntity.sync_table()
def handle_user_info(parsed_media, user_recent_media_added, users_added, users_updated): # Find the user info user = RawUserEntity.parse(api.user(parsed_media.user_id)) user.save() # Find and parse the users recent media user_recent_media = [] recents = api.user_recent_media(user_id=parsed_media.user_id, count=64) for recent in recents[0]: parsed_user_recent_media = RawUserRecentMediaEntity.parse(recent) parsed_user_recent_media.save() user_recent_media.append(parsed_user_recent_media) user_recent_media_added += 1 source_user_model_obj = SourceUserEntity.objects( SourceUserEntity.user_id == user.user_id) if source_user_model_obj.first(): save_source_user(True, source_user_model_obj.first(), user_recent_media) users_updated += 1 else: save_source_user(False, user, user_recent_media) users_added += 1 return user_recent_media_added, users_added, users_updated
def handle_user_info(parsed_media, user_recent_media_added, users_added, users_updated): # Find the user info user = RawUserEntity.parse(api.user(parsed_media.user_id)) user.save() # Find and parse the users recent media user_recent_media = [] recents = api.user_recent_media(user_id=parsed_media.user_id, count=64) for recent in recents[0]: parsed_user_recent_media = RawUserRecentMediaEntity.parse(recent) parsed_user_recent_media.save() user_recent_media.append(parsed_user_recent_media) user_recent_media_added += 1 source_user_model_obj = SourceUserEntity.objects(SourceUserEntity.user_id == user.user_id) if source_user_model_obj.first(): save_source_user(True, source_user_model_obj.first(), user_recent_media) users_updated += 1 else: save_source_user(False, user, user_recent_media) users_added += 1 return user_recent_media_added, users_added, users_updated
def save_source_user(is_update, user, user_recent_media): # Compute the additional values for the user, and a save as source user most_recent_engagement_rating = engagement_rating(user_recent_media[0:1], user.followers) averaged_engagement_rating = engagement_rating(user_recent_media, user.followers) is_trending, trending_value = trending(user_recent_media, user.followers) locations = find_location(user_recent_media) recent_media_ids = map(lambda media: media.id, user_recent_media) updated_time = datetime.datetime.now() created_time = user.created_time if is_update else datetime.datetime.now() # In the even the column is Null because this field was added after initial data entry if created_time is None: created_time = datetime.datetime.now() source_user = SourceUserEntity( user_id=user.user_id, username=user.username, full_name=user.full_name, bio=user.bio, locations=locations, website=user.website, media_count=user.media_count, follows=user.follows, followers=user.followers, recent_media_ids=recent_media_ids, most_recent_engagement_rating=most_recent_engagement_rating, averaged_engagement_rating=averaged_engagement_rating, trending=is_trending, trending_value=trending_value, created_time=created_time, updated_time=updated_time) if is_update: print("UPDATE user: "******"INSERT user: " + repr(source_user)) source_user.save()
import datetime from src.models.source.source_user import SourceUserEntity from src.utils.connection import open_cassandra_session if __name__ == '__main__': open_cassandra_session() tsv_file = open( 'SOURCE_USERS_%s.txt' % str(datetime.datetime.now().strftime("%Y-%m-%d")), 'w') tsv_file.write(SourceUserEntity.tsv_header()) users = SourceUserEntity.all() for user in users: print(user.tsv_repr()) tsv_file.write(user.tsv_repr()) tsv_file.flush() tsv_file.close()
import datetime from src.models.source.source_user import SourceUserEntity from src.utils.connection import open_cassandra_session if __name__ == '__main__': open_cassandra_session() tsv_file = open('SOURCE_USERS_%s.txt' % str(datetime.datetime.now().strftime("%Y-%m-%d")), 'w') tsv_file.write(SourceUserEntity.tsv_header()) users = SourceUserEntity.all() for user in users: print(user.tsv_repr()) tsv_file.write(user.tsv_repr()) tsv_file.flush() tsv_file.close()