def __init__(self): self.sports_data = SportsData() self.keyword_generator = KeywordGenerator() self.tick_time_in_seconds = 60.0 self.time_to_check_games_for_the_day = '09:30' self.base_path = os.getcwd() + '/Twitter_Utils/data/daily-logs/' self.APP_KEY = os.environ['TWITTER_APP_KEY'] self.APP_SECRET = os.environ['TWITTER_APP_SECRET'] self.OAUTH_TOKEN = os.environ['TWITTER_OAUTH_TOKEN'] self.OAUTH_TOKEN_SECRET = os.environ['TWITTER_OAUTH_TOKEN_SECRET'] self.stream_list = [] self.end_times_list = []
def __init__(self): self.logger = logging.getLogger(__name__) self.team_data_path = '' self.sports_data = SportsData()
class KeywordGenerator: def __init__(self): self.logger = logging.getLogger(__name__) self.team_data_path = '' self.sports_data = SportsData() @staticmethod def get_team_data_path(sport): wd = os.getcwd() pos = wd.find("BigDataMonsters") if pos > 0: # pragma: no cover path = wd[0:pos+15] else: path = wd add = '' if sport == "nhl": add = 'nhl-teams-data.json' elif sport == "nba": add = 'nba-teams-data.json' return path + '/Twitter_Utils/data/' + add def generate_search_terms(self, team_id, sport): """ Creates list of key words to search for. :param sport: currently "nba" or "nhl" :param team_id: id of team, given by Stattleship API :return: returns list of key words """ self.team_data_path = self.get_team_data_path(sport) try: with open(self.team_data_path, 'r') as f: data = json.loads(f.read()) f.close() search_terms_list = [] team_data = data['teams'] players_list = [] for team in team_data: if team['id'] == team_id: if team['hashtag']: search_terms_list.append(team['hashtag']) for hashtag in team['hashtags']: search_terms_list.append(hashtag) if team['nickname']: search_terms_list.append(team['nickname']) if team['slug']: players_list = self.append_players_name(team['slug'], team_id) for name in players_list: search_terms_list.append(name.replace(" ", "")) search_terms_list = self.append_word_with_go_to_list(search_terms_list) for name in players_list: search_terms_list.append(name) return search_terms_list except IOError: self.logger.exception(IOError) self.logger.error('Search terms not found at ' + self.team_data_path) raise IOError @staticmethod def append_word_with_go_to_list(word_list): """ Appends go to word, ex Bulldogs - goBulldogs :param word_list: words to append go to :return: returns new list of words, with new go words """ search_terms_list_with_go = [] for word in word_list: search_terms_list_with_go.append('go' + word) word_list += search_terms_list_with_go return word_list def append_players_name(self, team_slug_name, team_id): return self.sports_data.get_nba_players_for_today(team_slug_name, team_id)
class EternalProcess: def __init__(self): self.sports_data = SportsData() self.keyword_generator = KeywordGenerator() self.tick_time_in_seconds = 60.0 self.time_to_check_games_for_the_day = '09:30' self.base_path = os.getcwd() + '/Twitter_Utils/data/daily-logs/' self.APP_KEY = os.environ['TWITTER_APP_KEY'] self.APP_SECRET = os.environ['TWITTER_APP_SECRET'] self.OAUTH_TOKEN = os.environ['TWITTER_OAUTH_TOKEN'] self.OAUTH_TOKEN_SECRET = os.environ['TWITTER_OAUTH_TOKEN_SECRET'] self.stream_list = [] self.end_times_list = [] def start_process(self): """ This process is our workhorse, it has to check if it should log games. It has to check if a game is starting and if that is the case, fork the process, And in that new process check for game data during the time period assigned to it. """ print 50 * '*' + '\n' + 10 * '*' + ' STARTING SCANNING PROCESS ' + 10 * '*' + '\n' + 50 * '*' while True: print str(self.stream_list) + str(self.end_times_list) self.check_if_stream_should_end() if self.is_time_to_get_game_data_for_day: self.write_days_games_data() # Read in file to see if it is time to analyze twitter read_path = self.get_write_path_for_days_games() try: with open(read_path) as f: data = json.load(f) current_time = datetime.datetime.now().strftime('%H:%M') for idx, game in enumerate(data): game_time = dateutil.parser.parse( game['start_time']).strftime('%H:%M') if game_time == current_time and not game[ 'being_streamed']: # TODO - Figure out how to call a fork or child process for a certain amount of time # TODO - Refactor this self.update_is_streamed_json(index=idx) print 'Time to get twitter data.' search_terms_home = self.keyword_generator.generate_search_terms( game['home_team_id']) search_terms_away = self.keyword_generator.generate_search_terms( game['away_team_id']) keyword_string_home = ','.join(search_terms_home) keyword_string_away = ','.join(search_terms_away) keyword_string = keyword_string_home + ',' + keyword_string_away game_name = datetime.datetime.now().strftime( '%Y-%m-%d') + '-' + game['title'].replace( ' ', '-') data_gatherer = DataGatherer() stream = data_gatherer.get_tweet_stream( keyword_string, game['uuid'], game_name) self.stream_list.append(stream) self.end_times_list.append( self.get_time_to_end_stream(1)) except IOError: print 'File not found' # restart loop after sleep, given by our tick_time self.sleep_for(self.tick_time_in_seconds) def update_is_streamed_json(self, index): """ Replaces json file to reflect that game is being streamed :param index: index within JSON object """ time_now = datetime.datetime.now() read_path = self.base_path + time_now.strftime('%Y-%m-%d') + '.json' try: json_file = open(read_path, 'r') data = json.load(json_file) json_file.close() data[index]['being_streamed'] = True json_file = open(read_path, 'w+') json_file.write(json.dumps(data)) json_file.close() except IOError: print 'File not found' @staticmethod def get_time_to_end_stream(minutes): """ Function creates a time to end stream, currently in minutes :param minutes: :return: Time object """ time_now = datetime.datetime.now() now_plus_10 = time_now + datetime.timedelta(minutes=minutes) return now_plus_10.strftime('%H:%M') def check_if_stream_should_end(self): if self.end_times_list: hour_min_time_now = datetime.datetime.now().strftime('%H:%M') did_delete = False # TODO - Refactor the line below this for i in xrange(len(self.end_times_list) - 1, -1, -1): if self.end_times_list[i] == hour_min_time_now: stream = self.stream_list[i] print 'Stopping: ' + str(stream) stream.disconnect() del self.stream_list[i] del self.end_times_list[i] did_delete = True return did_delete else: return False def get_write_path_for_days_games(self): return self.base_path + datetime.datetime.now().strftime( '%Y-%m-%d') + '.json' def is_time_to_get_game_data_for_day(self): if self.time_to_check_games_for_the_day == datetime.datetime.now( ).strftime('%H:%M'): return True else: return False def write_days_games_data(self): write_path = self.get_write_path_for_days_games() data_to_write = self.sports_data.get_nba_games_for_today() try: with open(write_path, 'w+') as f: f.write(data_to_write) f.close() except IOError: print 'File not found' @staticmethod def sleep_for(tick_time): start_time = time.time() time.sleep(tick_time - ((time.time() - start_time) % tick_time))