def merge_articles(): """ Merge cached articles into the working set """ load_working_set() load_registry('Article', 'article_id') for article_cached in tqdm(TC['Article.article_id'], '[MERGE] Merging Articles', bar_format=PROGRESS_FORMAT): if article_cached.steam_data is None and article_cached.newsapi_data is None: continue steam_data = article_cached.steam_data newsapi_data = article_cached.newsapi_data title = steam_data[ 'title'] if steam_data is not None else newsapi_data['title'] article = WS.build_article(article_cached.article_id, title, condition(title)) steam.build_article(article, steam_data) newsapi.build_article(article, newsapi_data) related_game = WS.games.get(article_cached.game_id) if related_game is not None: xappend(related_game.articles, article) for developer in related_game.developers: xappend(developer.articles, article)
def collect_developers(): """ Download missing developers from IGDB. """ load_registry('Developer', 'igdb_id') generic_collect(rq_developers, TC['Developer.igdb_id'], '[COLLECT] Downloading Developers', [igdb_id for igdb_id in DEV_RANGE if not TC['Developer.igdb_id'].exists(igdb_id)])
def collect_games(): """ Download missing games from IGDB. """ load_registry('Game', 'igdb_id') generic_collect(rq_games, TC['Game.igdb_id'], '[COLLECT] Downloading Games', [igdb_id for igdb_id in GAME_RANGE if not TC['Game.igdb_id'].exists(igdb_id)])
def gather_tweets(): """ Search for tweets related to games and download them to the cache """ load_working_set() load_registry('Tweet', 'game_id') generic_gather(rq_tweets, TC['Tweet.game_id'], '[GATHER] Downloading Tweets', [ game for game in WS.games.values() if not TC['Tweet.game_id'].exists(game.game_id) ])
def gather_videos(): """ Download videos from YouTube by game """ load_working_set() load_registry('Video', 'game_id') generic_gather(rq_videos, TC['Video.game_id'], '[GATHER] Downloading Videos', [ game for game in WS.games.values() if not TC['Video.game_id'].exists(game.game_id) ])
def gather_articles(): """ Search for articles related to games and download them to the cache """ load_working_set() load_registry('Article', 'game_id') generic_gather(rq_articles, TC['Article.game_id'], '[GATHER] Downloading Articles', [ game for game in WS.games.values() if not TC['Article.game_id'].exists(game.game_id) ])
def clean_videos(): """ Remove unwanted videos from the registry """ load_registry('Video', 'video_id') removals = [] for video_cached in tqdm(TC['Video.video_id'], '[CLEAN] Scanning Videos', bar_format=PROGRESS_FORMAT): if not google.validate_video(video_cached.youtube_data): removals.append(video_cached) if input("Delete %d low quality videos? " % len(removals)) == 'y': for video_cached in removals: db.session.delete(video_cached) db.session.commit()
def clean_articles(): """ Remove unwanted articles from the registry """ load_registry('Article', 'article_id') removals = [] for article_cached in tqdm(TC['Article.article_id'], '[CLEAN] Scanning Articles', bar_format=PROGRESS_FORMAT): if not newsapi.validate_article(article_cached.newsapi_data) and not \ steam.validate_article(article_cached.steam_data): removals.append(article_cached) if input("Delete %d low quality articles? " % len(removals)) == 'y': for article_cached in removals: db.session.delete(article_cached) db.session.commit()
def clean_tweets(): """ Remove unwanted tweets from the registry """ load_working_set() load_registry('Tweet', 'tweet_id') removals = [] for tweet_cached in tqdm(TC['Tweet.tweet_id'], '[CLEAN] Scanning Tweets', bar_format=PROGRESS_FORMAT): game = WS.games.get(tweet_cached.game_id) if not twitter.validate_tweet(tweet_cached.twitter_data) or not \ twitter.relevant_tweet(game, tweet_cached.twitter_data): removals.append(tweet_cached) if input("Delete %d low quality tweets? " % len(removals)) == 'y': for tweet_cached in removals: db.session.delete(tweet_cached) db.session.commit()
def merge_developers(): """ Merge cached developers into the working set """ load_working_set() load_registry('Developer', 'igdb_id') for developer_cached in tqdm(TC['Developer.igdb_id'], '[MERGE] Merging Developers', bar_format=PROGRESS_FORMAT): if developer_cached.igdb_data is None: continue igdb_data = developer_cached.igdb_data developer = WS.build_developer(developer_cached.developer_id, developer_cached.igdb_id, igdb_data['name'], condition_developer(igdb_data['name'])) igdb.build_developer(developer, igdb_data)
def link_developers(): """ Compute Game-Developer links according to IGDB ID for IGDB games """ load_working_set() load_registry('Developer', 'igdb_id') for developer in tqdm(WS.developers.values(), '[LINK] Linking Developers', bar_format=PROGRESS_FORMAT): dev_json = TC['Developer.igdb_id'].get(developer.igdb_id).igdb_data for igdb_id in chain(dev_json.get('published', []), dev_json.get('developed', [])): game = WS.games_igdb.get(igdb_id) if game is not None: # Set the primary developer to the first one if game.developer is None: game.developer = developer.name # Link the models xappend(developer.games, game)
def merge_games(): """ Merge cached games into the working set """ load_working_set() load_registry('Game', 'game_id') for game_cached in tqdm(TC['Game.game_id'], '[MERGE] Merging Games', bar_format=PROGRESS_FORMAT): if game_cached.steam_data is None and game_cached.igdb_data is None: continue steam_data = game_cached.steam_data igdb_data = game_cached.igdb_data name = steam_data['name'] if steam_data is not None else igdb_data[ 'name'] game = WS.build_game(game_cached.game_id, game_cached.steam_id, game_cached.igdb_id, name, condition(name)) steam.build_game(game, steam_data) igdb.build_game(game, igdb_data)
def merge_tweets(): """ Merge cached tweets into the working set """ load_working_set() load_registry('Tweet', 'tweet_id') for tweet_cached in tqdm(TC['Tweet.tweet_id'], '[MERGE] Merging Tweets', bar_format=PROGRESS_FORMAT): if tweet_cached.twitter_data is None: continue tweet_data = tweet_cached.twitter_data tweet = WS.build_tweet(tweet_cached.tweet_id, tweet_data['user']['name'], tweet_data['text']) twitter.build_tweet(tweet, tweet_data) related_game = WS.games.get(tweet_cached.game_id) if related_game is not None and len(related_game.tweets) < 75: xappend(related_game.tweets, tweet) unload_registry('Tweet', 'tweet_id')
def merge_videos(): """ Merge cached videos into the working set """ load_working_set() load_registry('Video', 'video_id') for video_cached in tqdm(TC['Video.video_id'], '[MERGE] Merging Videos', bar_format=PROGRESS_FORMAT): if video_cached.youtube_data is None: continue youtube_data = video_cached.youtube_data video = WS.build_video(video_cached.video_id, youtube_data['snippet']['title']) google.build_video(video, video_cached.youtube_data) related_game = WS.games.get(video_cached.game_id) if related_game is not None: xappend(related_game.videos, video) unload_registry('Video', 'video_id')