def __init__(self, game, streamer_dicts=None): self.path = os.path.join(os.getcwd(), 'data', game, '{}_data.db'.format(game)) self.game = game if self.db_exists(): self.db = Pysqlite('{} Stream Database'.format(game), self.path, verbose=False) else: self.create_db() self.db = Pysqlite('{} Stream Database'.format(game), self.path, verbose=False) # If the DB does not exist, then create the tables self.create_global_data_table() self.create_streamers_data_table() self.create_tier_bounds_table() self.create_tier_data_table() self.streamer_dicts = streamer_dicts
class AllStreamerOverviewsDataPagination: def __init__(self, game_name, per_page=10): self.game_name = game_name self.page = 1 self.per_page = per_page self.max_page = 0 self.data_list_length = 0 self.pages = [] self.db = None def run(self): # Open a DB connection db_path = os.path.join(os.getcwd(), 'data', self.game_name, '{}_data.db'.format(self.game_name)) self.db = Pysqlite(database_name='{} Page DB'.format(self.game_name), database_file=db_path) def get_page(self, page_number): # figure out which indices relate to that page # page_number is NOT zero indexed, so we subtract 1 to make it zero indexed # lower bound: (page_number - 1) * self.per_page # upper bound: (page_number - 1) * self.per_page + (self.per_page - 1) # EXAMPLE: # I want page 2 (which is actually page 1, since -1) and I show 10 per page. Page 1's bounds are 10 -> 19, thus: # (2 - 1) * 10 = 10 for the lower bound # (2 - 1) * 10 + (10 - 1) = 10 + 9 = 19 for the upper bound page_indices = { 'lower': (page_number - 1) * self.per_page, 'upper': (page_number - 1) * self.per_page + (self.per_page - 1) } # get the streamer overviews, ordered by average viewership ordered_data = self.db.get_specific_rows( table='streamers_data', filter_string='id IS NOT NULL ORDER BY viewers_average DESC') self.data_list_length = len(ordered_data) page_data = ordered_data[page_indices['lower']:page_indices['upper']] # map that data to dictionaries streamer_dicts = [ { 'name': streamer[1], 'last_update': streamer[2], 'viewers_average': streamer[3], 'viewers_peak': streamer[4], 'followers': streamer[5], 'stream_count': streamer[6], 'viewer_reach': int(streamer[3] * convert_to_hours(streamer[8])), 'duration_average': convert_to_hours(streamer[7]), 'duration_total': convert_to_hours(streamer[8]), 'partnership': streamer[10] } for streamer in page_data ] return streamer_dicts def get_page_count(self): return int(ceil(self.data_list_length / float(self.per_page))) def has_previous_page(self): return self.page > 1 def has_next_page(self): return self.page < self.get_page_count()
def __init__(self, game, streamer_name, stream_dicts): self.path = os.path.join(os.getcwd(), 'data', game, 'streamers', '{}.db'.format(streamer_name)) self.game = game self.streamer_name = streamer_name self.next_stream_count = 0 if self.db_exists(): self.db = Pysqlite('{} {} Stream Database'.format(game, streamer_name), self.path, verbose=False) self.next_stream_count = len(self.db.get_table_names()) - 3 print('DB for: {} already exists and already holds {} stream tables'.format(streamer_name, self.next_stream_count)) else: self.create_db() self.db = Pysqlite('{} {} Stream Database'.format(game, streamer_name), self.path, verbose=False) # This means that the overview and the streams table need to be created self.create_streams_table() self.create_overview_table() self.next_stream_count = len(self.db.get_table_names()) - 3 self.last_stream_stored = len(self.db.get_table_names()) - 3 self.stream_dicts = stream_dicts
class GameGlobalData: def __init__(self, game_url_name): self.game_url_name = game_url_name self.global_data_list = [] short_name = convert_name('url', self.game_url_name, 'short') self.db_path = os.path.join(os.getcwd(), 'data', short_name, '{}_data.db'.format(short_name)) self.db = Pysqlite(database_name='{} Global Overview DB'.format(self.game_url_name), database_file=self.db_path) def return_global_overview_dict(self): row = self.db.get_specific_rows(table='global_data', filter_string='id = (SELECT MAX(id) FROM global_data)') data_list = list(row[0]) game_dict = { 'last_updated': data_list[1], 'streamer_count': data_list[2], 'stream_count': data_list[3], 'stream_duration_average': convert_to_hours(data_list[4]), 'stream_duration_total': convert_to_hours(data_list[5]), 'stream_duration_max': convert_to_hours(data_list[6]) } return game_dict def return_tier_bounds(self): tier_bounds = self.db.get_all_rows(table='tier_bounds') tier_bounds_dict = [{'tier': bound[1], 'upper': bound[2], 'lower': bound[3]} for bound in tier_bounds] return tier_bounds_dict def return_tier_streamers(self): streamer_tiers = self.db.get_all_rows(table='tier_data') streamer_tiers_dict = dict() for index, streamer, tier in streamer_tiers: streamer_tiers_dict[streamer] = tier return streamer_tiers_dict def return_tier_count(self, tier_number=0): streamer_tiers = self.db.get_all_rows(table='tier_data') return len([tier for tier in streamer_tiers if tier[2] == tier_number]) def return_tier_counts(self): tier_count_list = [] tier_bounds = self.db.get_all_rows(table='tier_bounds') for i in range(1, len(tier_bounds) + 1): tier_count_list.append(self.return_tier_count(tier_number=i)) return tier_count_list
class NewsArticlesPagination: def __init__(self, per_page=4): self.page = 1 self.per_page = per_page self.data_list_length = 0 self.db = None def run(self): # Open a DB connection db_path = os.path.join(os.getcwd(), 'meta', 'news.db') self.db = Pysqlite(database_name='News DB', database_file=db_path) def get_page(self, page_number): # figure out which indices relate to that page # page_number is NOT zero indexed, so we subtract 1 to make it zero indexed # lower bound: (page_number - 1) * self.per_page # upper bound: (page_number - 1) * self.per_page + (self.per_page - 1) # EXAMPLE: # I want page 2 (which is actually page 1, since -1) and I show 10 per page. Page 1's bounds are 10 -> 19, thus: # (2 - 1) * 10 = 10 for the lower bound # (2 - 1) * 10 + (10 - 1) = 10 + 9 = 19 for the upper bound page_indices = { 'lower': (page_number - 1) * self.per_page, 'upper': (page_number - 1) * self.per_page + (self.per_page - 1) } # get an ordered list of stream overviews ordered_data = self.db.get_specific_rows( table='articles', filter_string='id IS NOT NULL ORDER BY timestamp DESC') self.data_list_length = len(ordered_data) page_data = ordered_data[page_indices['lower']:page_indices['upper']] # map that data to dictionaries article_dicts = [ { 'id': article[0], 'date_written': article[1].split(' ')[0], # pass only the date part and not the time 'title': article[2], 'contents': article[3][:150] + '...', # truncate the contents string up to the first 150 characters 'word_count': int(article[4]), # TODO: Implement not showing the article if it is not marked as published 'published': True if int(article[5]) == 1 else 0 } for article in page_data ] return article_dicts def get_page_count(self): return int(ceil(self.data_list_length / float(self.per_page))) def has_previous_page(self): return self.page > 1 def has_next_page(self): return self.page < self.get_page_count()
class StreamData: def __init__(self, streamer_name, game_name, stream_id): self.streamer_name = streamer_name self.game_name = game_name self.stream_id = int(stream_id) - 1 # Backend is zero indexed, frontend is not self.max_stream_id = 0 self.db = None def run(self): # Open a DB connection db_path = os.path.join(os.getcwd(), 'data', self.game_name, 'streamers', '{}.db'.format(self.streamer_name)) self.db = Pysqlite(database_name='{} {} DB'.format(self.game_name, self.streamer_name), database_file=db_path) # set the max stream id self.max_stream_id = len(self.db.get_table_names()) - 3 def get_stream_data(self): stream_overview_row = self.db.get_specific_rows( table='streams', filter_string='id IS {}'.format(self.stream_id + 1)) # the db index is also not zero indexed... an oversight I know stream_dict = { 'id': self.stream_id + 1, 'max_id': self.max_stream_id, 'time_start': stream_overview_row[0][1], 'duration': convert_to_hours(stream_overview_row[0][2]), 'viewers_average': stream_overview_row[0][3], 'viewers_peak': stream_overview_row[0][4], 'follower_delta': stream_overview_row[0][5] } return stream_dict def get_stream_raw_data(self): raw_stream_data = self.db.get_all_rows(table='stream_{}'.format(self.stream_id)) return raw_stream_data def get_stream_viewer_data_json(self): # Timestamp in the X axis, viewer count in the Y axis data = [ [row[1], row[2]] for row in self.get_stream_raw_data() ] return json.dumps(data)
class NewsArticle: def __init__(self, article_number=1): self.article_number = article_number self.db_path = os.path.join(os.getcwd(), 'meta', 'news.db') self.db = Pysqlite(database_name='News DB', database_file=self.db_path) def get_article(self): # get the article data by the ID article = self.db.get_specific_rows( table='articles', filter_string='id IS {}'.format(self.article_number))[0] # map that data to dictionaries article_dict = { 'id': article[0], 'date_written': article[1].split(' ')[0], # pass only the date part and not the time 'title': article[2], 'contents': article[3], 'word_count': int(article[4]), # TODO: Implement not showing the article if it is not marked as published 'published': True if int(article[5]) == 1 else 0 } return article_dict
def run(self): # Open a DB connection db_path = os.path.join(os.getcwd(), 'meta', 'news.db') self.db = Pysqlite(database_name='News DB', database_file=db_path)
def run(self): # Open a DB connection db_path = os.path.join(os.getcwd(), 'data', self.game_name, 'streamers', '{}.db'.format(self.streamer_name)) self.db = Pysqlite(database_name='{} {} DB'.format(self.game_name, self.streamer_name), database_file=db_path) # set the max stream id self.max_stream_id = len(self.db.get_table_names()) - 3
def run(self): # Open a DB connection db_path = os.path.join(os.getcwd(), 'data', self.game_name, '{}_data.db'.format(self.game_name)) self.db = Pysqlite(database_name='{} Page DB'.format(self.game_name), database_file=db_path)
class StreamsDataPagination: def __init__(self, game_name, streamer_name, per_page=10): self.game_name = game_name self.streamer_name = streamer_name self.page = 1 self.per_page = per_page self.data_list_length = 0 self.db = None def run(self): # Open a DB connection db_path = os.path.join(os.getcwd(), 'data', self.game_name, 'streamers', '{}.db'.format(self.streamer_name)) self.db = Pysqlite(database_name='{} {} DB'.format(self.game_name, self.streamer_name), database_file=db_path) def get_page(self, page_number): # figure out which indices relate to that page # page_number is NOT zero indexed, so we subtract 1 to make it zero indexed # lower bound: (page_number - 1) * self.per_page # upper bound: (page_number - 1) * self.per_page + (self.per_page - 1) # EXAMPLE: # I want page 2 (which is actually page 1, since -1) and I show 10 per page. Page 1's bounds are 10 -> 19, thus: # (2 - 1) * 10 = 10 for the lower bound # (2 - 1) * 10 + (10 - 1) = 10 + 9 = 19 for the upper bound page_indices = { 'lower': (page_number - 1) * self.per_page, 'upper': (page_number - 1) * self.per_page + (self.per_page - 1) } # get an ordered list of stream overviews ordered_data = self.db.get_specific_rows( table='streams', filter_string='id IS NOT NULL ORDER BY timestamp DESC') self.data_list_length = len(ordered_data) page_data = ordered_data[page_indices['lower']:page_indices['upper']] # map that data to dictionaries stream_dicts = [ { 'id': stream[0], 'start_time': stream[1], 'duration': convert_to_hours(stream[2]), 'viewers_average': stream[3], 'viewers_peak': stream[4], 'follower_delta': stream[5], } for stream in page_data ] return stream_dicts def get_all_streams_dicts(self): ordered_data = self.db.get_specific_rows( table='streams', filter_string='id IS NOT NULL ORDER BY timestamp DESC') stream_dicts = [ { 'id': stream[0], 'start_time': stream[1], 'duration': convert_to_hours(stream[2]), 'viewers_average': stream[3], 'viewers_peak': stream[4], 'follower_delta': stream[5], } for stream in ordered_data ] return stream_dicts def get_average_viewer_count_dicts(self): stream_dicts = self.get_all_streams_dicts() return [ { 'start_time': stream['start_time'], 'viewers_average': stream['viewers_average'] } for stream in stream_dicts ] def get_page_count(self): return int(ceil(self.data_list_length / float(self.per_page))) def has_previous_page(self): return self.page > 1 def has_next_page(self): return self.page < self.get_page_count()
def __init__(self, game_name, game_shorthand, db_mid_directory, db_name_format='{}_stats.db', verbose=False): self.name = game_name self.shorthand = game_shorthand self.db_file_path = os.path.join(os.getcwd(), db_mid_directory, db_name_format.format(game_shorthand)) self.db = Pysqlite(database_name='twitch_stats', database_file=self.db_file_path) self.verbose = verbose
class TwitchStatisticsOutput: # bounds for the tiers of streamers tier_one_bounds = {'upper': 999999, 'lower': 100} tier_two_bounds = {'upper': 99, 'lower': 50} tier_three_bounds = {'upper': 49, 'lower': 15} tier_four_bounds = {'upper': 14, 'lower': 0} def __init__(self, game_name, game_shorthand, db_mid_directory, db_name_format='{}_stats.db', verbose=False): self.name = game_name self.shorthand = game_shorthand self.db_file_path = os.path.join(os.getcwd(), db_mid_directory, db_name_format.format(game_shorthand)) self.db = Pysqlite(database_name='twitch_stats', database_file=self.db_file_path) self.verbose = verbose def run(self): if self.verbose: print('Processing data for game: {}'.format(self.name)) tables = self.db.get_table_names() tables = [table for table in tables if table not in ['test', 'sqlite_sequence']] # get the table names which do not start with a number valid_named_tables = [table for table in tables if not table[0][0].isdigit()] # get the table names which start with a number number_start_tables = [table for table in tables if table[0][0].isdigit()] # reod the original table names valid_named_tables.extend(['_' + table for table in number_start_tables]) # initialise list for all the data all_streamer_data = [] # list any streamers to ignore streamers_to_ignore = ['legenddolby1986'] for streamer in tqdm(valid_named_tables): if streamer in streamers_to_ignore: # skip if its on the ignore list continue # get the db data from the table of the same name as the streamer and put it in the list^TM all_streamer_data.append(self.get_streamer_dict(streamer)) # write the data to the text file self.write_text_file(streamer_data=all_streamer_data) def return_streamer_tier(self, average_viewers): if self.tier_one_bounds['upper'] >= average_viewers >= self.tier_one_bounds['lower']: return 1 if self.tier_two_bounds['upper'] >= average_viewers >= self.tier_two_bounds['lower']: return 2 if self.tier_three_bounds['upper'] >= average_viewers >= self.tier_three_bounds['lower']: return 3 if self.tier_four_bounds['upper'] >= average_viewers >= self.tier_four_bounds['lower']: return 4 return 0 def get_streamer_dict(self, streamer): streamer_dict = dict() streamer_dict['name'] = streamer # catch an exception where the table cannot be found and return an empty dictionary instead try: data = self.db.get_all_rows(table=streamer) except PysqliteCouldNotRetrieveData: streamer_dict['partnership'] = False streamer_dict['tier'] = 4 streamer_dict['viewers'] = [] streamer_dict['viewers_max'] = 0 streamer_dict['viewers_average'] = 0.0 streamer_dict['followers'] = [] streamer_dict['followers_max'] = 0 streamer_dict['times'] = [] streamer_dict['durations'] = [] streamer_dict['durations_max'] = 0 streamer_dict['durations_average'] = 0.0 streamer_dict['durations_total'] = 0.0 streamer_dict['stream_count'] = 0 return streamer_dict streamer_dict['partnership'] = data[-1][3] == 1 viewers = [field[1] for field in data] streamer_dict['viewers'] = [field[1] for field in data] streamer_dict['viewers_max'] = max(viewers) streamer_dict['viewers_average'] = sum(viewers) // len(viewers) streamer_dict['tier'] = self.return_streamer_tier(streamer_dict['viewers_average']) followers = [field[2] for field in data] streamer_dict['followers'] = followers streamer_dict['followers_max'] = followers[-1] streamer_dict['times'] = [field[4] for field in data] # times streamer_dict['durations'] = get_stream_durations(streamer_dict['times']) streamer_dict['durations_max'] = max(streamer_dict['durations']) streamer_dict['durations_average'] = round(sum(streamer_dict['durations']), 2) streamer_dict['durations_total'] = round(sum(streamer_dict['durations']), 2) streamer_dict['stream_count'] = len(streamer_dict['durations']) return streamer_dict def write_text_file(self, streamer_data): durations = [streamer['durations'] for streamer in streamer_data] # get the longest consecutive stream non_empty_durations = [duration for duration in durations if not duration == []] longest_stream = max([max(duration_set) for duration_set in non_empty_durations]) # calculate total time streamed over all streamers total_duration_sums = sum([sum(duration_set) for duration_set in non_empty_durations]) total_duration = round(total_duration_sums, 2) total_streams = 0 # calculate the total number of discrete streams for streamer in streamer_data: total_streams += streamer['stream_count'] text_file_path = os.path.join(os.getcwd(), 'data', '{}_Twitch_Stats.txt'.format(self.shorthand)) with open(text_file_path, mode='w', encoding='utf-8') as file: file.write('{} Twitch Streamer Statistics\n'.format(self.name)) file.write('Data recorded 24/7 via twitch\'s public API every ~20 seconds\n') file.write('Script written by Simon Agius Muscat / CMDR Purrcat\n') file.write('More information can be found at: https://github.com/purrcat259/twitch-statistics\n') file.write('Total streamers recorded: {}\n'.format(len(streamer_data))) file.write('Total streams recorded: {}\n'.format(total_streams)) file.write('Total time streamed: {} hours\n'.format(total_duration)) file.write('Longest single stream: {} hours\n'.format(round(longest_stream, 2))) file.write('Tier One Bounds: {} >= Average Viewers >= {}\n'.format(self.tier_one_bounds['upper'], self.tier_one_bounds['lower'])) file.write('Tier One Streamers: {}\n'.format(len([s for s in streamer_data if self.return_streamer_tier(s['viewers_average']) == 1]))) file.write('Tier Two Bounds: {} >= Average Viewers >= {}\n'.format(self.tier_two_bounds['upper'], self.tier_two_bounds['lower'])) file.write('Tier Two Streamers: {}\n'.format(len([s for s in streamer_data if self.return_streamer_tier(s['viewers_average']) == 2]))) file.write('Tier Three Bounds: {} >= Average Viewers >= {}\n'.format(self.tier_three_bounds['upper'], self.tier_three_bounds['lower'])) file.write('Tier Three Streamers: {}\n'.format(len([s for s in streamer_data if self.return_streamer_tier(s['viewers_average']) == 3]))) file.write('Tier Four Bounds: {} >= Average Viewers >= {}\n'.format(self.tier_four_bounds['upper'], self.tier_four_bounds['lower'])) file.write('Tier Four Streamers: {}\n'.format(len([s for s in streamer_data if self.return_streamer_tier(s['viewers_average']) == 4]))) for streamer in streamer_data: # skip streamers with total durations less than 10 minutes if streamer['durations_total'] < 0.2: continue file.write('\nStreamer: {} (T{})\n'.format(streamer['name'], streamer['tier'])) file.write('Partnered: {} \n'.format(streamer['partnership'])) file.write('Average Viewers: {}\n'.format(streamer['viewers_average'])) file.write('Peak Viewers: {}\n'.format(streamer['viewers_max'])) file.write('Followers: {}\n'.format(streamer['followers_max'])) file.write('Stream count: {}\n'.format(streamer['stream_count'])) file.write('Average Stream duration: {} hours\n'.format(streamer['durations_average'])) file.write('Longest Stream duration: {} hours\n'.format(streamer['durations_max'])) file.write('Total time streamed: {} hours\n'.format(streamer['durations_total'])) time_percentage = round((streamer['durations_total'] / total_duration) * 100, 3) file.write('Percentage streamed of total duration: {}%\n'.format(time_percentage)) file.write('Stream durations:\n') for duration in streamer['durations']: if duration < 1.0: duration = round(duration * 60, 2) # skip stream durations less than 5 m inutes if duration < 5.0: continue file.write('\t{} minutes\n'.format(duration)) else: file.write('\t{} hours\n'.format(duration))
from neopysqlite.neopysqlite import Pysqlite from tqdm import tqdm fix_stream_tables = True fix_game_tables = True games = ['ED', 'PC'] for game in games: streamers = os.listdir(os.path.join(os.getcwd(), 'data', game, 'streamers')) streamers.remove('base') print('Processing timestamps for streamers of game: {}'.format(game)) if fix_stream_tables: for streamer in tqdm(streamers): streamer_db_path = os.path.join(os.getcwd(), 'data', game, 'streamers', streamer) db = Pysqlite(database_name='{} DB'.format(streamer), database_file=streamer_db_path) table_count = len(db.get_table_names()) - 3 # print('{} has {} stream tables'.format(streamer, table_count)) table_names = ['stream_{}'.format(number) for number in range(0, table_count)] table_names.append('overview') table_names.append('streams') for table_name in table_names: rows = db.get_all_rows(table=table_name) for row in tqdm(rows): # convert anything in DD-MM-YYYY HH:MM:SS to YYYY-MM-DD HH:MM:SS old_timestamp = row[1] split_string = old_timestamp.split(' ') date_part = split_string[0].split('-') time_part = split_string[1].split(':') year, month, day = int(date_part[0]), int(date_part[1]), int(date_part[2]) hour, minute, second = int(time_part[0]), int(time_part[1]), int(time_part[2])
class StreamerDB: def __init__(self, game, streamer_name, stream_dicts): self.path = os.path.join(os.getcwd(), 'data', game, 'streamers', '{}.db'.format(streamer_name)) self.game = game self.streamer_name = streamer_name self.next_stream_count = 0 if self.db_exists(): self.db = Pysqlite('{} {} Stream Database'.format(game, streamer_name), self.path, verbose=False) self.next_stream_count = len(self.db.get_table_names()) - 3 print('DB for: {} already exists and already holds {} stream tables'.format(streamer_name, self.next_stream_count)) else: self.create_db() self.db = Pysqlite('{} {} Stream Database'.format(game, streamer_name), self.path, verbose=False) # This means that the overview and the streams table need to be created self.create_streams_table() self.create_overview_table() self.next_stream_count = len(self.db.get_table_names()) - 3 self.last_stream_stored = len(self.db.get_table_names()) - 3 self.stream_dicts = stream_dicts def run(self): self.import_csv_data() self.generate_overview_for_all_streams() def db_exists(self): return os.path.isfile(self.path) def create_db(self): if not self.db_exists(): print('Database for {} does not exist. Creating DB now.'.format(self.streamer_name)) copy_file( src=os.path.join(os.getcwd(), 'data', self.game, 'streamers', 'base', 'test_streamer.db'), dst=self.path ) else: print('Database for {} already exists'.format(self.streamer_name)) def create_overview_table(self): print('Creating the overview table for: {}'.format(self.streamer_name)) time.sleep(1) create_statement = 'CREATE TABLE `overview` (`id` INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,' \ '`timestamp` TEXT NOT NULL,' \ '`viewers_average` INTEGER NOT NULL,' \ '`viewers_peak` INTEGER NOT NULL,' \ '`followers` INTEGER NOT NULL,' \ '`average_time_streamed` INTEGER,' \ '`total_time_streamed` INTEGER NOT NULL,' \ '`partnership` INTEGER NOT NULL DEFAULT 0);' self.db.execute_sql(create_statement) def create_streams_table(self): print('Creating the streams table for: {}'.format(self.streamer_name)) time.sleep(1) create_statement = 'CREATE TABLE `streams` (`id` INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, ' \ '`timestamp` TEXT NOT NULL, `duration` INTEGER NOT NULL, `viewers_average` ' \ 'INTEGER NOT NULL, `viewers_peak` INTEGER NOT NULL, `follower_increase` INTEGER NOT NULL)' self.db.execute_sql(create_statement) def create_stream_table(self): print('Creating stream_{} table for: {}'.format(self.next_stream_count, self.streamer_name)) time.sleep(1) create_statement = 'CREATE TABLE "stream_{}" (`id` INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,' \ '`timestamp` TEXT NOT NULL, `viewers` INTEGER NOT NULL, `followers` INTEGER NOT NULL, ' \ '`partnership`INTEGER NOT NULL DEFAULT 0)'.format(self.next_stream_count) self.db.execute_sql(create_statement) def import_csv_data(self): print('Importing CSV data into stream tables for: {}'.format(self.streamer_name)) for stream_dict in self.stream_dicts: # create a table for each CSV self.create_stream_table() # CSV schema is NAME, VIEWERS, FOLLOWERS, PARTNERSHIP, TIMESTAMP # DB schema is ID, TIMESTAMP, VIEWERS, FOLLOWERS, PARTNERSHIP raw_data_list = stream_dict['raw_data'] fixed_schema_list = [[row[4], row[1], row[2], row[3]] for row in raw_data_list] """ self.db.insert_rows( table='stream_{}'.format(self.next_stream_count), row_string='(NULL, ?, ?, ?, ?)', row_data_list=fixed_schema_list ) """ for row in tqdm(fixed_schema_list): self.db.insert_row( table='stream_{}'.format(self.next_stream_count), row_string='(NULL, ?, ?, ?, ?)', row_data=row) # generate a stream data row for the streams table self.generate_stream_data_row(stream_dict=stream_dict) # iterate the stream counter self.next_stream_count += 1 # update the number of streams stored self.last_stream_stored = len(self.db.get_table_names()) - 3 def generate_stream_data_row(self, stream_dict): # print('Generating stream overview') # Streams table schema: # ID, Date + start time, duration (seconds), average viewership, peak viewership, follower differential timestamp = stream_dict['start_timestamp'] duration = stream_dict['duration'] viewers_list = [row[1] for row in stream_dict['raw_data']] viewers_average = calculate_average_from_list(viewers_list) viewers_peak = max(viewers_list) # last follower count - first follower count follower_delta = int(stream_dict['raw_data'][-1][2]) - int(stream_dict['raw_data'][0][2]) self.db.insert_row( table='streams', row_string='(NULL, ?, ?, ?, ?, ?)', row_data=[timestamp, duration, viewers_average, viewers_peak, follower_delta] ) def generate_overview_for_all_streams(self): print('Generating overview for all streams so far') # Streams table schema: # ID, Date + start time, duration (seconds), average viewership, peak viewership, follower differential data = self.db.get_all_rows('streams') # get the duration data durations = [int(field[2]) for field in data] total_duration = sum(durations) total_average_duration = calculate_average_from_list(durations) # get the viewer data average_viewers_list = [int(field[3]) for field in data] total_average_viewers = calculate_average_from_list(average_viewers_list) peak_viewers_list = [int(field[4]) for field in data] try: highest_peak_viewers = max(peak_viewers_list) except ValueError: highest_peak_viewers = 0 # get the follower data from the latest stream table and not the overview data data = self.db.get_all_rows('stream_{}'.format(self.last_stream_stored - 1)) last_follower_count = data[-1][3] # get last partnership data from the latest stream table too partnered = data[-1][4] # Overview table schema: # ID, CURRENT TIMESTAMP, AVERAGE VIEWERS, PEAK VIEWERS, FOLLOWERS, AVERAGE STREAM DURATION, # TOTAL STREAM DURATION, PARTNERSHIP self.db.insert_row( table='overview', row_string='(NULL, CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?)', row_data=[total_average_viewers, highest_peak_viewers, last_follower_count, total_average_duration, total_duration, partnered]) def return_last_overview(self): overviews = self.db.get_all_rows(table='overview') if len(overviews) == 0: return [] return overviews[-1] def return_stream_count(self): return self.next_stream_count
def __init__(self, article_number=1): self.article_number = article_number self.db_path = os.path.join(os.getcwd(), 'meta', 'news.db') self.db = Pysqlite(database_name='News DB', database_file=self.db_path)
def run(self): # Open a DB connection db_path = os.path.join(os.getcwd(), 'data', self.game_name, 'streamers', '{}.db'.format(self.streamer_name)) self.db = Pysqlite(database_name='{} {} DB'.format(self.game_name, self.streamer_name), database_file=db_path)
def __init__(self, game_url_name): self.game_url_name = game_url_name self.global_data_list = [] short_name = convert_name('url', self.game_url_name, 'short') self.db_path = os.path.join(os.getcwd(), 'data', short_name, '{}_data.db'.format(short_name)) self.db = Pysqlite(database_name='{} Global Overview DB'.format(self.game_url_name), database_file=self.db_path)
class GameDB: def __init__(self, game, streamer_dicts=None): self.path = os.path.join(os.getcwd(), 'data', game, '{}_data.db'.format(game)) self.game = game if self.db_exists(): self.db = Pysqlite('{} Stream Database'.format(game), self.path, verbose=False) else: self.create_db() self.db = Pysqlite('{} Stream Database'.format(game), self.path, verbose=False) # If the DB does not exist, then create the tables self.create_global_data_table() self.create_streamers_data_table() self.create_tier_bounds_table() self.create_tier_data_table() self.streamer_dicts = streamer_dicts def run(self): # update the streamers data streamers_to_update = self.get_streamers_already_stored() print('Additions: {}'.format(len(self.streamer_dicts) - len(streamers_to_update))) print('Updates: {}'.format(len(streamers_to_update))) time.sleep(0.1) # avoids same line progress bar for streamer_dict in tqdm(self.streamer_dicts): if streamer_dict['name'] in streamers_to_update: self.update_streamer_data(streamer_dict) self.update_streamer_tier(streamer_dict) else: self.insert_streamer_data(streamer_dict) self.add_streamer_tier(streamer_dict) # commit the data after updating as it does not do so itself self.db.dbcon.commit() # update the global data self.update_global_data() print('Vacuuming Database to retrieve space') # vacuum the old space now self.db.execute_sql('VACUUM') # commit the vacuum self.db.dbcon.commit() def db_exists(self): return os.path.isfile(self.path) def create_db(self): if not self.db_exists(): print('Database for the game: {} does not exist. Creating DB now.'.format(self.game)) copy_file( src=os.path.join(os.getcwd(), 'data', 'base', 'test_game.db'), dst=self.path ) else: print('Database for game: {} already exists'.format(self.game)) def create_global_data_table(self): print('Creating global data table for: {}'.format(self.game)) time.sleep(1) create_statement = 'CREATE TABLE "global_data" (' \ '`id` INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,' \ '`timestamp` TEXT NOT NULL,' \ '`streamer_count` INTEGER NOT NULL,' \ '`stream_count` INTEGER NOT NULL,' \ '`average_time_streamed` INTEGER NOT NULL,' \ '`total_time_streamed` INTEGER NOT NULL,' \ '`longest_stream` INTEGER NOT NULL)' self.db.execute_sql(create_statement) def create_streamers_data_table(self): print('Creating streamers data table for: {}'.format(self.game)) time.sleep(1) create_statement = 'CREATE TABLE "streamers_data" (`id` INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, ' \ '`name` TEXT NOT NULL, ' \ '`last_updated` TEXT NOT NULL, ' \ '`viewers_average` INTEGER NOT NULL, ' \ '`viewers_peak` INTEGER NOT NULL, ' \ '`followers` INTEGER NOT NULL, ' \ '`stream_count` INTEGER NOT NULL, ' \ '`average_time_streamed` INTEGER NOT NULL, ' \ '`total_time_streamed` INTEGER NOT NULL, ' \ '`percentage_duration` REAL NOT NULL,' \ '`partnership` INTEGER NOT NULL)' self.db.execute_sql(create_statement) def create_tier_bounds_table(self): print('Creating tier bounds table for: {}'.format(self.game)) time.sleep(1) create_statement = 'CREATE TABLE "tier_bounds" (`id` INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,' \ '`number` INTEGER NOT NULL, ' \ '`upper_bound` INTEGER NOT NULL, ' \ '`lower_bound` INTEGER NOT NULL)' self.db.execute_sql(create_statement) time.sleep(1) tier_amount = int(input('Please enter the number of tiers that will be present: ')) for i in range(0, tier_amount): i += 1 print('BOUND NUMBERS ARE BOTH INCLUSIVE. FOR 100 TO 50, ENTER 100 AS UPPER AND 50 AS LOWER') upper_bound = int(input('Please enter the upper bound for tier {}: '.format(i))) lower_bound = int(input('Please enter the lower bound for tier {}: '.format(i))) self.db.insert_row( table='tier_bounds', row_string='(NULL, ?, ?, ?)', row_data=[i, upper_bound, lower_bound]) def create_tier_data_table(self): print('Creating tier data table for: {}'.format(self.game)) time.sleep(1) create_statement = 'CREATE TABLE `tier_data` (`id` INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, ' \ '`streamer_name` TEXT NOT NULL, ' \ '`streamer_tier` INTEGER NOT NULL)' self.db.execute_sql(create_statement) def return_streamer_tier(self, average_viewers): bounds = self.db.get_all_rows('tier_bounds') for i, tier, upper, lower in bounds: if upper >= average_viewers >= lower: return tier else: return 0 # return the names of the streamers already stored def get_streamers_already_stored(self): streamers = self.db.get_all_rows('streamers_data') return [row[1] for row in streamers] def insert_streamer_data(self, streamer_dict): # print('Adding row for: {}'.format(streamer_dict['name'])) self.db.insert_row( table='streamers_data', row_string='(NULL, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', row_data=[ streamer_dict['name'], streamer_dict['last_update'], streamer_dict['viewers_average'], streamer_dict['viewers_peak'], streamer_dict['followers'], streamer_dict['stream_count'], streamer_dict['average_duration'], streamer_dict['total_duration'], streamer_dict['percentage_duration'], streamer_dict['partnership'] ] ) def update_streamer_data(self, streamer_dict): # no neopysqlite method for updating rows yet :( # UPDATE table_name SET column1 = value1, columnN = valueN... WHERE name = `streamer_name` self.db.dbcur.execute('UPDATE streamers_data SET ' 'last_updated = ?,' 'viewers_average = ?,' 'viewers_peak = ?,' 'followers = ?,' 'stream_count = ?,' 'total_time_streamed = ?,' 'average_time_streamed = ?,' 'percentage_duration = ?,' 'partnership = ?' 'WHERE name = ?', ( streamer_dict['last_update'], streamer_dict['viewers_average'], streamer_dict['viewers_peak'], streamer_dict['followers'], streamer_dict['stream_count'], streamer_dict['total_duration'], streamer_dict['average_duration'], streamer_dict['percentage_duration'], streamer_dict['partnership'], streamer_dict['name'] )) def add_streamer_tier(self, streamer_dict): self.db.insert_row( table='tier_data', row_string='(NULL, ?, ?)', row_data=[ streamer_dict['name'], self.return_streamer_tier(average_viewers=streamer_dict['viewers_average']) ]) def update_streamer_tier(self, streamer_dict): self.db.dbcur.execute('UPDATE tier_data SET ' 'streamer_tier = ? ' 'WHERE streamer_name = ?', ( self.return_streamer_tier(average_viewers=streamer_dict['viewers_average']), streamer_dict['name'] )) def update_global_data(self): # update the global data table from all the new streamer data streamers_data = self.db.get_all_rows(table='streamers_data') # GLOBAL DATA SCHEMA: # ID, TIMESTAMP, STREAMER COUNT, STREAM COUNT, AVERAGE GLOBAL DURATION, TOTAL TIME STREAMED, LONGEST STREAM streamer_count = len(streamers_data) stream_count = sum([int(row[6]) for row in streamers_data]) durations = [int(row[8]) for row in streamers_data] total_global_duration = sum(durations) average_global_duration = calculate_average_from_list(durations) longest_stream = max(durations) self.db.insert_row( table='global_data', row_string='(NULL, CURRENT_TIMESTAMP, ?, ?, ?, ?, ?)', row_data=[ streamer_count, stream_count, average_global_duration, total_global_duration, longest_stream ] )