Пример #1
0
def save_source_user(is_update, user, user_recent_media):
    # Compute the additional values for the user, and a save as source user
    most_recent_engagement_rating = engagement_rating(user_recent_media[0: 1], user.followers)
    averaged_engagement_rating = engagement_rating(user_recent_media, user.followers)
    is_trending, trending_value = trending(user_recent_media, user.followers)
    locations = find_location(user_recent_media)
    recent_media_ids = map(lambda media: media.id, user_recent_media)
    updated_time = datetime.datetime.now()
    created_time = user.created_time if is_update else datetime.datetime.now()

    # In the even the column is Null because this field was added after initial data entry
    if created_time is None:
        created_time = datetime.datetime.now()

    source_user = SourceUserEntity(user_id=user.user_id, username=user.username, full_name=user.full_name,
                                   bio=user.bio, locations=locations, website=user.website,
                                   media_count=user.media_count,
                                   follows=user.follows, followers=user.followers, recent_media_ids=recent_media_ids,
                                   most_recent_engagement_rating=most_recent_engagement_rating,
                                   averaged_engagement_rating=averaged_engagement_rating, trending=is_trending,
                                   trending_value=trending_value, created_time=created_time, updated_time=updated_time)

    if is_update:
        print("UPDATE user: "******"INSERT user: " + repr(source_user))
        source_user.save()
Пример #2
0
def open_cassandra_session():
    setup_env()
    connection.setup([CASSANDRA_CONFIG['ip']], CASSANDRA_CONFIG['keyspace'], protocol_version=3)
    SourceUserEntity.sync_table()
    RawRecentMediaEntity.sync_table()
    RawUserEntity.sync_table()
    RawUserRecentMediaEntity.sync_table()
Пример #3
0
def open_cassandra_session():
    setup_env()
    connection.setup([CASSANDRA_CONFIG['ip']],
                     CASSANDRA_CONFIG['keyspace'],
                     protocol_version=3)
    SourceUserEntity.sync_table()
    RawRecentMediaEntity.sync_table()
    RawUserEntity.sync_table()
    RawUserRecentMediaEntity.sync_table()
Пример #4
0
def handle_user_info(parsed_media, user_recent_media_added, users_added,
                     users_updated):
    # Find the user info
    user = RawUserEntity.parse(api.user(parsed_media.user_id))
    user.save()

    # Find and parse the users recent media
    user_recent_media = []
    recents = api.user_recent_media(user_id=parsed_media.user_id, count=64)
    for recent in recents[0]:
        parsed_user_recent_media = RawUserRecentMediaEntity.parse(recent)
        parsed_user_recent_media.save()
        user_recent_media.append(parsed_user_recent_media)
        user_recent_media_added += 1

    source_user_model_obj = SourceUserEntity.objects(
        SourceUserEntity.user_id == user.user_id)
    if source_user_model_obj.first():
        save_source_user(True, source_user_model_obj.first(),
                         user_recent_media)
        users_updated += 1
    else:
        save_source_user(False, user, user_recent_media)
        users_added += 1

    return user_recent_media_added, users_added, users_updated
Пример #5
0
def handle_user_info(parsed_media, user_recent_media_added, users_added, users_updated):
    # Find the user info
    user = RawUserEntity.parse(api.user(parsed_media.user_id))
    user.save()

    # Find and parse the users recent media
    user_recent_media = []
    recents = api.user_recent_media(user_id=parsed_media.user_id, count=64)
    for recent in recents[0]:
        parsed_user_recent_media = RawUserRecentMediaEntity.parse(recent)
        parsed_user_recent_media.save()
        user_recent_media.append(parsed_user_recent_media)
        user_recent_media_added += 1

    source_user_model_obj = SourceUserEntity.objects(SourceUserEntity.user_id == user.user_id)
    if source_user_model_obj.first():
        save_source_user(True, source_user_model_obj.first(), user_recent_media)
        users_updated += 1
    else:
        save_source_user(False, user, user_recent_media)
        users_added += 1

    return user_recent_media_added, users_added, users_updated
Пример #6
0
def save_source_user(is_update, user, user_recent_media):
    # Compute the additional values for the user, and a save as source user
    most_recent_engagement_rating = engagement_rating(user_recent_media[0:1],
                                                      user.followers)
    averaged_engagement_rating = engagement_rating(user_recent_media,
                                                   user.followers)
    is_trending, trending_value = trending(user_recent_media, user.followers)
    locations = find_location(user_recent_media)
    recent_media_ids = map(lambda media: media.id, user_recent_media)
    updated_time = datetime.datetime.now()
    created_time = user.created_time if is_update else datetime.datetime.now()

    # In the even the column is Null because this field was added after initial data entry
    if created_time is None:
        created_time = datetime.datetime.now()

    source_user = SourceUserEntity(
        user_id=user.user_id,
        username=user.username,
        full_name=user.full_name,
        bio=user.bio,
        locations=locations,
        website=user.website,
        media_count=user.media_count,
        follows=user.follows,
        followers=user.followers,
        recent_media_ids=recent_media_ids,
        most_recent_engagement_rating=most_recent_engagement_rating,
        averaged_engagement_rating=averaged_engagement_rating,
        trending=is_trending,
        trending_value=trending_value,
        created_time=created_time,
        updated_time=updated_time)

    if is_update:
        print("UPDATE user: "******"INSERT user: " + repr(source_user))
        source_user.save()
import datetime
from src.models.source.source_user import SourceUserEntity
from src.utils.connection import open_cassandra_session

if __name__ == '__main__':
    open_cassandra_session()

    tsv_file = open(
        'SOURCE_USERS_%s.txt' %
        str(datetime.datetime.now().strftime("%Y-%m-%d")), 'w')
    tsv_file.write(SourceUserEntity.tsv_header())
    users = SourceUserEntity.all()
    for user in users:
        print(user.tsv_repr())
        tsv_file.write(user.tsv_repr())

    tsv_file.flush()
    tsv_file.close()
import datetime
from src.models.source.source_user import SourceUserEntity
from src.utils.connection import open_cassandra_session

if __name__ == '__main__':
    open_cassandra_session()

    tsv_file = open('SOURCE_USERS_%s.txt' % str(datetime.datetime.now().strftime("%Y-%m-%d")), 'w')
    tsv_file.write(SourceUserEntity.tsv_header())
    users = SourceUserEntity.all()
    for user in users:
        print(user.tsv_repr())
        tsv_file.write(user.tsv_repr())

    tsv_file.flush()
    tsv_file.close()