def __init__(self, game, streamer_dicts=None):
     self.path = os.path.join(os.getcwd(), 'data', game, '{}_data.db'.format(game))
     self.game = game
     if self.db_exists():
         self.db = Pysqlite('{} Stream Database'.format(game), self.path, verbose=False)
     else:
         self.create_db()
         self.db = Pysqlite('{} Stream Database'.format(game), self.path, verbose=False)
         # If the DB does not exist, then create the tables
         self.create_global_data_table()
         self.create_streamers_data_table()
         self.create_tier_bounds_table()
         self.create_tier_data_table()
     self.streamer_dicts = streamer_dicts
示例#2
0
class AllStreamerOverviewsDataPagination:
    def __init__(self, game_name, per_page=10):
        self.game_name = game_name
        self.page = 1
        self.per_page = per_page
        self.max_page = 0
        self.data_list_length = 0
        self.pages = []
        self.db = None

    def run(self):
        # Open a DB connection
        db_path = os.path.join(os.getcwd(), 'data', self.game_name, '{}_data.db'.format(self.game_name))
        self.db = Pysqlite(database_name='{} Page DB'.format(self.game_name), database_file=db_path)

    def get_page(self, page_number):
        # figure out which indices relate to that page
        # page_number is NOT zero indexed, so we subtract 1 to make it zero indexed
        # lower bound: (page_number - 1) * self.per_page
        # upper bound: (page_number - 1) * self.per_page + (self.per_page - 1)
        # EXAMPLE:
        # I want page 2 (which is actually page 1, since -1) and I show 10 per page. Page 1's bounds are 10 -> 19, thus:
        # (2 - 1) * 10 = 10 for the lower bound
        # (2 - 1) * 10 + (10 - 1) = 10 + 9 = 19 for the upper bound
        page_indices = {
            'lower': (page_number - 1) * self.per_page,
            'upper': (page_number - 1) * self.per_page + (self.per_page - 1)
        }
        # get the streamer overviews, ordered by average viewership
        ordered_data = self.db.get_specific_rows(
                table='streamers_data',
                filter_string='id IS NOT NULL ORDER BY viewers_average DESC')
        self.data_list_length = len(ordered_data)
        page_data = ordered_data[page_indices['lower']:page_indices['upper']]
        # map that data to dictionaries
        streamer_dicts = [
            {
                'name': streamer[1],
                'last_update': streamer[2],
                'viewers_average': streamer[3],
                'viewers_peak': streamer[4],
                'followers': streamer[5],
                'stream_count': streamer[6],
                'viewer_reach': int(streamer[3] * convert_to_hours(streamer[8])),
                'duration_average': convert_to_hours(streamer[7]),
                'duration_total': convert_to_hours(streamer[8]),
                'partnership': streamer[10]
            } for streamer in page_data
        ]
        return streamer_dicts

    def get_page_count(self):
        return int(ceil(self.data_list_length / float(self.per_page)))

    def has_previous_page(self):
        return self.page > 1

    def has_next_page(self):
        return self.page < self.get_page_count()
 def __init__(self, game, streamer_name, stream_dicts):
     self.path = os.path.join(os.getcwd(), 'data', game, 'streamers', '{}.db'.format(streamer_name))
     self.game = game
     self.streamer_name = streamer_name
     self.next_stream_count = 0
     if self.db_exists():
         self.db = Pysqlite('{} {} Stream Database'.format(game, streamer_name), self.path, verbose=False)
         self.next_stream_count = len(self.db.get_table_names()) - 3
         print('DB for: {} already exists and already holds {} stream tables'.format(streamer_name, self.next_stream_count))
     else:
         self.create_db()
         self.db = Pysqlite('{} {} Stream Database'.format(game, streamer_name), self.path, verbose=False)
         # This means that the overview and the streams table need to be created
         self.create_streams_table()
         self.create_overview_table()
         self.next_stream_count = len(self.db.get_table_names()) - 3
     self.last_stream_stored = len(self.db.get_table_names()) - 3
     self.stream_dicts = stream_dicts
示例#4
0
class GameGlobalData:
    def __init__(self, game_url_name):
        self.game_url_name = game_url_name
        self.global_data_list = []
        short_name = convert_name('url', self.game_url_name, 'short')
        self.db_path = os.path.join(os.getcwd(), 'data', short_name, '{}_data.db'.format(short_name))
        self.db = Pysqlite(database_name='{} Global Overview DB'.format(self.game_url_name), database_file=self.db_path)

    def return_global_overview_dict(self):
        row = self.db.get_specific_rows(table='global_data', filter_string='id = (SELECT MAX(id) FROM global_data)')
        data_list = list(row[0])
        game_dict = {
            'last_updated': data_list[1],
            'streamer_count': data_list[2],
            'stream_count': data_list[3],
            'stream_duration_average': convert_to_hours(data_list[4]),
            'stream_duration_total': convert_to_hours(data_list[5]),
            'stream_duration_max': convert_to_hours(data_list[6])
        }
        return game_dict

    def return_tier_bounds(self):
        tier_bounds = self.db.get_all_rows(table='tier_bounds')
        tier_bounds_dict = [{'tier': bound[1], 'upper': bound[2], 'lower': bound[3]} for bound in tier_bounds]
        return tier_bounds_dict

    def return_tier_streamers(self):
        streamer_tiers = self.db.get_all_rows(table='tier_data')
        streamer_tiers_dict = dict()
        for index, streamer, tier in streamer_tiers:
            streamer_tiers_dict[streamer] = tier
        return streamer_tiers_dict

    def return_tier_count(self, tier_number=0):
        streamer_tiers = self.db.get_all_rows(table='tier_data')
        return len([tier for tier in streamer_tiers if tier[2] == tier_number])

    def return_tier_counts(self):
        tier_count_list = []
        tier_bounds = self.db.get_all_rows(table='tier_bounds')
        for i in range(1, len(tier_bounds) + 1):
            tier_count_list.append(self.return_tier_count(tier_number=i))
        return tier_count_list
示例#5
0
class NewsArticlesPagination:
    def __init__(self, per_page=4):
        self.page = 1
        self.per_page = per_page
        self.data_list_length = 0
        self.db = None

    def run(self):
        # Open a DB connection
        db_path = os.path.join(os.getcwd(), 'meta', 'news.db')
        self.db = Pysqlite(database_name='News DB', database_file=db_path)

    def get_page(self, page_number):
        # figure out which indices relate to that page
        # page_number is NOT zero indexed, so we subtract 1 to make it zero indexed
        # lower bound: (page_number - 1) * self.per_page
        # upper bound: (page_number - 1) * self.per_page + (self.per_page - 1)
        # EXAMPLE:
        # I want page 2 (which is actually page 1, since -1) and I show 10 per page. Page 1's bounds are 10 -> 19, thus:
        # (2 - 1) * 10 = 10 for the lower bound
        # (2 - 1) * 10 + (10 - 1) = 10 + 9 = 19 for the upper bound
        page_indices = {
            'lower': (page_number - 1) * self.per_page,
            'upper': (page_number - 1) * self.per_page + (self.per_page - 1)
        }
        # get an ordered list of stream overviews
        ordered_data = self.db.get_specific_rows(
                table='articles',
                filter_string='id IS NOT NULL ORDER BY timestamp DESC')
        self.data_list_length = len(ordered_data)
        page_data = ordered_data[page_indices['lower']:page_indices['upper']]
        # map that data to dictionaries
        article_dicts = [
            {
                'id': article[0],
                'date_written': article[1].split(' ')[0],  # pass only the date part and not the time
                'title': article[2],
                'contents': article[3][:150] + '...',  # truncate the contents string up to the first 150 characters
                'word_count': int(article[4]),
                # TODO: Implement not showing the article if it is not marked as published
                'published': True if int(article[5]) == 1 else 0
            } for article in page_data
        ]
        return article_dicts

    def get_page_count(self):
        return int(ceil(self.data_list_length / float(self.per_page)))

    def has_previous_page(self):
        return self.page > 1

    def has_next_page(self):
        return self.page < self.get_page_count()
示例#6
0
class StreamData:
    def __init__(self, streamer_name, game_name, stream_id):
        self.streamer_name = streamer_name
        self.game_name = game_name
        self.stream_id = int(stream_id) - 1  # Backend is zero indexed, frontend is not
        self.max_stream_id = 0
        self.db = None

    def run(self):
        # Open a DB connection
        db_path = os.path.join(os.getcwd(), 'data', self.game_name, 'streamers', '{}.db'.format(self.streamer_name))
        self.db = Pysqlite(database_name='{} {} DB'.format(self.game_name, self.streamer_name), database_file=db_path)
        # set the max stream id
        self.max_stream_id = len(self.db.get_table_names()) - 3

    def get_stream_data(self):
        stream_overview_row = self.db.get_specific_rows(
            table='streams',
            filter_string='id IS {}'.format(self.stream_id + 1))  # the db index is also not zero indexed... an oversight I know
        stream_dict = {
            'id': self.stream_id + 1,
            'max_id': self.max_stream_id,
            'time_start': stream_overview_row[0][1],
            'duration': convert_to_hours(stream_overview_row[0][2]),
            'viewers_average': stream_overview_row[0][3],
            'viewers_peak': stream_overview_row[0][4],
            'follower_delta': stream_overview_row[0][5]
        }
        return stream_dict

    def get_stream_raw_data(self):
        raw_stream_data = self.db.get_all_rows(table='stream_{}'.format(self.stream_id))
        return raw_stream_data

    def get_stream_viewer_data_json(self):
        # Timestamp in the X axis, viewer count in the Y axis
        data = [
            [row[1], row[2]] for row in self.get_stream_raw_data()
        ]
        return json.dumps(data)
示例#7
0
class NewsArticle:
    def __init__(self, article_number=1):
        self.article_number = article_number
        self.db_path = os.path.join(os.getcwd(), 'meta', 'news.db')
        self.db = Pysqlite(database_name='News DB', database_file=self.db_path)

    def get_article(self):
        # get the article data by the ID
        article = self.db.get_specific_rows(
                table='articles',
                filter_string='id IS {}'.format(self.article_number))[0]
        # map that data to dictionaries
        article_dict = {
            'id': article[0],
            'date_written': article[1].split(' ')[0],  # pass only the date part and not the time
            'title': article[2],
            'contents': article[3],
            'word_count': int(article[4]),
            # TODO: Implement not showing the article if it is not marked as published
            'published': True if int(article[5]) == 1 else 0
        }
        return article_dict
示例#8
0
 def run(self):
     # Open a DB connection
     db_path = os.path.join(os.getcwd(), 'meta', 'news.db')
     self.db = Pysqlite(database_name='News DB', database_file=db_path)
示例#9
0
 def run(self):
     # Open a DB connection
     db_path = os.path.join(os.getcwd(), 'data', self.game_name, 'streamers', '{}.db'.format(self.streamer_name))
     self.db = Pysqlite(database_name='{} {} DB'.format(self.game_name, self.streamer_name), database_file=db_path)
     # set the max stream id
     self.max_stream_id = len(self.db.get_table_names()) - 3
示例#10
0
 def run(self):
     # Open a DB connection
     db_path = os.path.join(os.getcwd(), 'data', self.game_name, '{}_data.db'.format(self.game_name))
     self.db = Pysqlite(database_name='{} Page DB'.format(self.game_name), database_file=db_path)
示例#11
0
class StreamsDataPagination:
    def __init__(self, game_name, streamer_name, per_page=10):
        self.game_name = game_name
        self.streamer_name = streamer_name
        self.page = 1
        self.per_page = per_page
        self.data_list_length = 0
        self.db = None

    def run(self):
        # Open a DB connection
        db_path = os.path.join(os.getcwd(), 'data', self.game_name, 'streamers', '{}.db'.format(self.streamer_name))
        self.db = Pysqlite(database_name='{} {} DB'.format(self.game_name, self.streamer_name), database_file=db_path)

    def get_page(self, page_number):
        # figure out which indices relate to that page
        # page_number is NOT zero indexed, so we subtract 1 to make it zero indexed
        # lower bound: (page_number - 1) * self.per_page
        # upper bound: (page_number - 1) * self.per_page + (self.per_page - 1)
        # EXAMPLE:
        # I want page 2 (which is actually page 1, since -1) and I show 10 per page. Page 1's bounds are 10 -> 19, thus:
        # (2 - 1) * 10 = 10 for the lower bound
        # (2 - 1) * 10 + (10 - 1) = 10 + 9 = 19 for the upper bound
        page_indices = {
            'lower': (page_number - 1) * self.per_page,
            'upper': (page_number - 1) * self.per_page + (self.per_page - 1)
        }
        # get an ordered list of stream overviews
        ordered_data = self.db.get_specific_rows(
                table='streams',
                filter_string='id IS NOT NULL ORDER BY timestamp DESC')
        self.data_list_length = len(ordered_data)
        page_data = ordered_data[page_indices['lower']:page_indices['upper']]
        # map that data to dictionaries
        stream_dicts = [
            {
                'id': stream[0],
                'start_time': stream[1],
                'duration': convert_to_hours(stream[2]),
                'viewers_average': stream[3],
                'viewers_peak': stream[4],
                'follower_delta': stream[5],
            } for stream in page_data
        ]
        return stream_dicts

    def get_all_streams_dicts(self):
        ordered_data = self.db.get_specific_rows(
            table='streams',
            filter_string='id IS NOT NULL ORDER BY timestamp DESC')
        stream_dicts = [
            {
                'id': stream[0],
                'start_time': stream[1],
                'duration': convert_to_hours(stream[2]),
                'viewers_average': stream[3],
                'viewers_peak': stream[4],
                'follower_delta': stream[5],
            } for stream in ordered_data
        ]
        return stream_dicts

    def get_average_viewer_count_dicts(self):
        stream_dicts = self.get_all_streams_dicts()
        return [
            {
                'start_time': stream['start_time'],
                'viewers_average': stream['viewers_average']
            } for stream in stream_dicts
        ]

    def get_page_count(self):
        return int(ceil(self.data_list_length / float(self.per_page)))

    def has_previous_page(self):
        return self.page > 1

    def has_next_page(self):
        return self.page < self.get_page_count()
示例#12
0
 def __init__(self, game_name, game_shorthand, db_mid_directory, db_name_format='{}_stats.db', verbose=False):
     self.name = game_name
     self.shorthand = game_shorthand
     self.db_file_path = os.path.join(os.getcwd(), db_mid_directory, db_name_format.format(game_shorthand))
     self.db = Pysqlite(database_name='twitch_stats', database_file=self.db_file_path)
     self.verbose = verbose
示例#13
0
class TwitchStatisticsOutput:
    # bounds for the tiers of streamers
    tier_one_bounds = {'upper': 999999, 'lower': 100}
    tier_two_bounds = {'upper': 99, 'lower': 50}
    tier_three_bounds = {'upper': 49, 'lower': 15}
    tier_four_bounds = {'upper': 14, 'lower': 0}

    def __init__(self, game_name, game_shorthand, db_mid_directory, db_name_format='{}_stats.db', verbose=False):
        self.name = game_name
        self.shorthand = game_shorthand
        self.db_file_path = os.path.join(os.getcwd(), db_mid_directory, db_name_format.format(game_shorthand))
        self.db = Pysqlite(database_name='twitch_stats', database_file=self.db_file_path)
        self.verbose = verbose

    def run(self):
        if self.verbose:
            print('Processing data for game: {}'.format(self.name))
        tables = self.db.get_table_names()
        tables = [table for table in tables if table not in ['test', 'sqlite_sequence']]
        # get the table names which do not start with a number
        valid_named_tables = [table for table in tables if not table[0][0].isdigit()]
        # get the table names which start with a number
        number_start_tables = [table for table in tables if table[0][0].isdigit()]
        # reod the original table names
        valid_named_tables.extend(['_' + table for table in number_start_tables])
        # initialise list for all the data
        all_streamer_data = []
        # list any streamers to ignore
        streamers_to_ignore = ['legenddolby1986']
        for streamer in tqdm(valid_named_tables):
            if streamer in streamers_to_ignore:
                # skip if its on the ignore list
                continue
            # get the db data from the table of the same name as the streamer and put it in the list^TM
            all_streamer_data.append(self.get_streamer_dict(streamer))
        # write the data to the text file
        self.write_text_file(streamer_data=all_streamer_data)

    def return_streamer_tier(self, average_viewers):
        if self.tier_one_bounds['upper'] >= average_viewers >= self.tier_one_bounds['lower']:
            return 1
        if self.tier_two_bounds['upper'] >= average_viewers >= self.tier_two_bounds['lower']:
            return 2
        if self.tier_three_bounds['upper'] >= average_viewers >= self.tier_three_bounds['lower']:
            return 3
        if self.tier_four_bounds['upper'] >= average_viewers >= self.tier_four_bounds['lower']:
            return 4
        return 0

    def get_streamer_dict(self, streamer):
        streamer_dict = dict()
        streamer_dict['name'] = streamer
        # catch an exception where the table cannot be found and return an empty dictionary instead
        try:
            data = self.db.get_all_rows(table=streamer)
        except PysqliteCouldNotRetrieveData:
            streamer_dict['partnership'] = False
            streamer_dict['tier'] = 4
            streamer_dict['viewers'] = []
            streamer_dict['viewers_max'] = 0
            streamer_dict['viewers_average'] = 0.0
            streamer_dict['followers'] = []
            streamer_dict['followers_max'] = 0
            streamer_dict['times'] = []
            streamer_dict['durations'] = []
            streamer_dict['durations_max'] = 0
            streamer_dict['durations_average'] = 0.0
            streamer_dict['durations_total'] = 0.0
            streamer_dict['stream_count'] = 0
            return streamer_dict
        streamer_dict['partnership'] = data[-1][3] == 1
        viewers = [field[1] for field in data]
        streamer_dict['viewers'] = [field[1] for field in data]
        streamer_dict['viewers_max'] = max(viewers)
        streamer_dict['viewers_average'] = sum(viewers) // len(viewers)
        streamer_dict['tier'] = self.return_streamer_tier(streamer_dict['viewers_average'])
        followers = [field[2] for field in data]
        streamer_dict['followers'] = followers
        streamer_dict['followers_max'] = followers[-1]
        streamer_dict['times'] = [field[4] for field in data]  # times
        streamer_dict['durations'] = get_stream_durations(streamer_dict['times'])
        streamer_dict['durations_max'] = max(streamer_dict['durations'])
        streamer_dict['durations_average'] = round(sum(streamer_dict['durations']), 2)
        streamer_dict['durations_total'] = round(sum(streamer_dict['durations']), 2)
        streamer_dict['stream_count'] = len(streamer_dict['durations'])
        return streamer_dict

    def write_text_file(self, streamer_data):
        durations = [streamer['durations'] for streamer in streamer_data]
        # get the longest consecutive stream
        non_empty_durations = [duration for duration in durations if not duration == []]
        longest_stream = max([max(duration_set) for duration_set in non_empty_durations])
        # calculate total time streamed over all streamers
        total_duration_sums = sum([sum(duration_set) for duration_set in non_empty_durations])
        total_duration = round(total_duration_sums, 2)
        total_streams = 0
        # calculate the total number of discrete streams
        for streamer in streamer_data:
            total_streams += streamer['stream_count']
        text_file_path = os.path.join(os.getcwd(), 'data', '{}_Twitch_Stats.txt'.format(self.shorthand))
        with open(text_file_path, mode='w', encoding='utf-8') as file:
            file.write('{} Twitch Streamer Statistics\n'.format(self.name))
            file.write('Data recorded 24/7 via twitch\'s public API every ~20 seconds\n')
            file.write('Script written by Simon Agius Muscat / CMDR Purrcat\n')
            file.write('More information can be found at: https://github.com/purrcat259/twitch-statistics\n')
            file.write('Total streamers recorded: {}\n'.format(len(streamer_data)))
            file.write('Total streams recorded: {}\n'.format(total_streams))
            file.write('Total time streamed: {} hours\n'.format(total_duration))
            file.write('Longest single stream: {} hours\n'.format(round(longest_stream, 2)))
            file.write('Tier One Bounds: {} >= Average Viewers >= {}\n'.format(self.tier_one_bounds['upper'], self.tier_one_bounds['lower']))
            file.write('Tier One Streamers: {}\n'.format(len([s for s in streamer_data if self.return_streamer_tier(s['viewers_average']) == 1])))
            file.write('Tier Two Bounds: {} >= Average Viewers >= {}\n'.format(self.tier_two_bounds['upper'], self.tier_two_bounds['lower']))
            file.write('Tier Two Streamers: {}\n'.format(len([s for s in streamer_data if self.return_streamer_tier(s['viewers_average']) == 2])))
            file.write('Tier Three Bounds: {} >= Average Viewers >= {}\n'.format(self.tier_three_bounds['upper'], self.tier_three_bounds['lower']))
            file.write('Tier Three Streamers: {}\n'.format(len([s for s in streamer_data if self.return_streamer_tier(s['viewers_average']) == 3])))
            file.write('Tier Four Bounds: {} >= Average Viewers >= {}\n'.format(self.tier_four_bounds['upper'], self.tier_four_bounds['lower']))
            file.write('Tier Four Streamers: {}\n'.format(len([s for s in streamer_data if self.return_streamer_tier(s['viewers_average']) == 4])))
            for streamer in streamer_data:
                # skip streamers with total durations less than 10 minutes
                if streamer['durations_total'] < 0.2:
                    continue
                file.write('\nStreamer: {} (T{})\n'.format(streamer['name'], streamer['tier']))
                file.write('Partnered: {} \n'.format(streamer['partnership']))
                file.write('Average Viewers: {}\n'.format(streamer['viewers_average']))
                file.write('Peak Viewers: {}\n'.format(streamer['viewers_max']))
                file.write('Followers: {}\n'.format(streamer['followers_max']))
                file.write('Stream count: {}\n'.format(streamer['stream_count']))
                file.write('Average Stream duration: {} hours\n'.format(streamer['durations_average']))
                file.write('Longest Stream duration: {} hours\n'.format(streamer['durations_max']))
                file.write('Total time streamed: {} hours\n'.format(streamer['durations_total']))
                time_percentage = round((streamer['durations_total'] / total_duration) * 100, 3)
                file.write('Percentage streamed of total duration: {}%\n'.format(time_percentage))
                file.write('Stream durations:\n')
                for duration in streamer['durations']:
                    if duration < 1.0:
                        duration = round(duration * 60, 2)
                        # skip stream durations less than 5 m inutes
                        if duration < 5.0:
                            continue
                        file.write('\t{} minutes\n'.format(duration))
                    else:
                        file.write('\t{} hours\n'.format(duration))
from neopysqlite.neopysqlite import Pysqlite
from tqdm import tqdm


fix_stream_tables = True
fix_game_tables = True

games = ['ED',  'PC']
for game in games:
    streamers = os.listdir(os.path.join(os.getcwd(), 'data', game, 'streamers'))
    streamers.remove('base')
    print('Processing timestamps for streamers of game: {}'.format(game))
    if fix_stream_tables:
        for streamer in tqdm(streamers):
            streamer_db_path = os.path.join(os.getcwd(), 'data', game, 'streamers', streamer)
            db = Pysqlite(database_name='{} DB'.format(streamer), database_file=streamer_db_path)
            table_count = len(db.get_table_names()) - 3
            # print('{} has {} stream tables'.format(streamer, table_count))
            table_names = ['stream_{}'.format(number) for number in range(0, table_count)]
            table_names.append('overview')
            table_names.append('streams')
            for table_name in table_names:
                rows = db.get_all_rows(table=table_name)
                for row in tqdm(rows):
                    # convert anything in DD-MM-YYYY HH:MM:SS to YYYY-MM-DD HH:MM:SS
                    old_timestamp = row[1]
                    split_string = old_timestamp.split(' ')
                    date_part = split_string[0].split('-')
                    time_part = split_string[1].split(':')
                    year, month, day = int(date_part[0]), int(date_part[1]), int(date_part[2])
                    hour, minute, second = int(time_part[0]), int(time_part[1]), int(time_part[2])
class StreamerDB:
    def __init__(self, game, streamer_name, stream_dicts):
        self.path = os.path.join(os.getcwd(), 'data', game, 'streamers', '{}.db'.format(streamer_name))
        self.game = game
        self.streamer_name = streamer_name
        self.next_stream_count = 0
        if self.db_exists():
            self.db = Pysqlite('{} {} Stream Database'.format(game, streamer_name), self.path, verbose=False)
            self.next_stream_count = len(self.db.get_table_names()) - 3
            print('DB for: {} already exists and already holds {} stream tables'.format(streamer_name, self.next_stream_count))
        else:
            self.create_db()
            self.db = Pysqlite('{} {} Stream Database'.format(game, streamer_name), self.path, verbose=False)
            # This means that the overview and the streams table need to be created
            self.create_streams_table()
            self.create_overview_table()
            self.next_stream_count = len(self.db.get_table_names()) - 3
        self.last_stream_stored = len(self.db.get_table_names()) - 3
        self.stream_dicts = stream_dicts

    def run(self):
        self.import_csv_data()
        self.generate_overview_for_all_streams()

    def db_exists(self):
        return os.path.isfile(self.path)

    def create_db(self):
        if not self.db_exists():
            print('Database for {} does not exist. Creating DB now.'.format(self.streamer_name))
            copy_file(
                src=os.path.join(os.getcwd(), 'data', self.game, 'streamers', 'base', 'test_streamer.db'),
                dst=self.path
            )
        else:
            print('Database for {} already exists'.format(self.streamer_name))

    def create_overview_table(self):
        print('Creating the overview table for: {}'.format(self.streamer_name))
        time.sleep(1)
        create_statement = 'CREATE TABLE `overview` (`id`	INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,' \
                           '`timestamp`	TEXT NOT NULL,' \
                           '`viewers_average` INTEGER NOT NULL,' \
                           '`viewers_peak` INTEGER NOT NULL,' \
                           '`followers`	INTEGER NOT NULL,' \
                           '`average_time_streamed`	INTEGER,' \
                           '`total_time_streamed`	INTEGER NOT NULL,' \
                           '`partnership`	INTEGER NOT NULL DEFAULT 0);'
        self.db.execute_sql(create_statement)

    def create_streams_table(self):
        print('Creating the streams table for: {}'.format(self.streamer_name))
        time.sleep(1)
        create_statement = 'CREATE TABLE `streams` (`id` INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, ' \
                           '`timestamp`	TEXT NOT NULL, `duration` INTEGER NOT NULL, `viewers_average` ' \
                           'INTEGER NOT NULL, `viewers_peak` INTEGER NOT NULL, `follower_increase` INTEGER NOT NULL)'
        self.db.execute_sql(create_statement)

    def create_stream_table(self):
        print('Creating stream_{} table for: {}'.format(self.next_stream_count, self.streamer_name))
        time.sleep(1)
        create_statement = 'CREATE TABLE "stream_{}" (`id` INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,' \
                           '`timestamp`	TEXT NOT NULL, `viewers` INTEGER NOT NULL, `followers` INTEGER NOT NULL, ' \
                           '`partnership`INTEGER NOT NULL DEFAULT 0)'.format(self.next_stream_count)
        self.db.execute_sql(create_statement)

    def import_csv_data(self):
        print('Importing CSV data into stream tables for: {}'.format(self.streamer_name))
        for stream_dict in self.stream_dicts:
            # create a table for each CSV
            self.create_stream_table()
            # CSV schema is NAME, VIEWERS, FOLLOWERS, PARTNERSHIP, TIMESTAMP
            # DB schema is ID, TIMESTAMP, VIEWERS, FOLLOWERS, PARTNERSHIP
            raw_data_list = stream_dict['raw_data']
            fixed_schema_list = [[row[4], row[1], row[2], row[3]] for row in raw_data_list]
            """
            self.db.insert_rows(
                table='stream_{}'.format(self.next_stream_count),
                row_string='(NULL, ?, ?, ?, ?)',
                row_data_list=fixed_schema_list
            )
            """
            for row in tqdm(fixed_schema_list):
                self.db.insert_row(
                    table='stream_{}'.format(self.next_stream_count),
                    row_string='(NULL, ?, ?, ?, ?)',
                    row_data=row)
            # generate a stream data row for the streams table
            self.generate_stream_data_row(stream_dict=stream_dict)
            # iterate the stream counter
            self.next_stream_count += 1
        # update the number of streams stored
        self.last_stream_stored = len(self.db.get_table_names()) - 3

    def generate_stream_data_row(self, stream_dict):
        # print('Generating stream overview')
        # Streams table schema:
        # ID, Date + start time, duration (seconds), average viewership, peak viewership, follower differential
        timestamp = stream_dict['start_timestamp']
        duration = stream_dict['duration']
        viewers_list = [row[1] for row in stream_dict['raw_data']]
        viewers_average = calculate_average_from_list(viewers_list)
        viewers_peak = max(viewers_list)
        # last follower count - first follower count
        follower_delta = int(stream_dict['raw_data'][-1][2]) - int(stream_dict['raw_data'][0][2])
        self.db.insert_row(
            table='streams',
            row_string='(NULL, ?, ?, ?, ?, ?)',
            row_data=[timestamp, duration, viewers_average, viewers_peak, follower_delta]
        )

    def generate_overview_for_all_streams(self):
        print('Generating overview for all streams so far')
        # Streams table schema:
        # ID, Date + start time, duration (seconds), average viewership, peak viewership, follower differential
        data = self.db.get_all_rows('streams')
        # get the duration data
        durations = [int(field[2]) for field in data]
        total_duration = sum(durations)
        total_average_duration = calculate_average_from_list(durations)
        # get the viewer data
        average_viewers_list = [int(field[3]) for field in data]
        total_average_viewers = calculate_average_from_list(average_viewers_list)
        peak_viewers_list = [int(field[4]) for field in data]
        try:
            highest_peak_viewers = max(peak_viewers_list)
        except ValueError:
            highest_peak_viewers = 0
        # get the follower data from the latest stream table and not the overview data
        data = self.db.get_all_rows('stream_{}'.format(self.last_stream_stored - 1))
        last_follower_count = data[-1][3]
        # get last partnership data from the latest stream table too
        partnered = data[-1][4]
        # Overview table schema:
        # ID, CURRENT TIMESTAMP, AVERAGE VIEWERS, PEAK VIEWERS, FOLLOWERS, AVERAGE STREAM DURATION,
        # TOTAL STREAM DURATION, PARTNERSHIP
        self.db.insert_row(
            table='overview',
            row_string='(NULL, CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?)',
            row_data=[total_average_viewers, highest_peak_viewers, last_follower_count, total_average_duration, total_duration, partnered])

    def return_last_overview(self):
        overviews = self.db.get_all_rows(table='overview')
        if len(overviews) == 0:
            return []
        return overviews[-1]

    def return_stream_count(self):
        return self.next_stream_count
示例#16
0
 def __init__(self, article_number=1):
     self.article_number = article_number
     self.db_path = os.path.join(os.getcwd(), 'meta', 'news.db')
     self.db = Pysqlite(database_name='News DB', database_file=self.db_path)
示例#17
0
 def run(self):
     # Open a DB connection
     db_path = os.path.join(os.getcwd(), 'data', self.game_name, 'streamers', '{}.db'.format(self.streamer_name))
     self.db = Pysqlite(database_name='{} {} DB'.format(self.game_name, self.streamer_name), database_file=db_path)
示例#18
0
 def __init__(self, game_url_name):
     self.game_url_name = game_url_name
     self.global_data_list = []
     short_name = convert_name('url', self.game_url_name, 'short')
     self.db_path = os.path.join(os.getcwd(), 'data', short_name, '{}_data.db'.format(short_name))
     self.db = Pysqlite(database_name='{} Global Overview DB'.format(self.game_url_name), database_file=self.db_path)
class GameDB:
    def __init__(self, game, streamer_dicts=None):
        self.path = os.path.join(os.getcwd(), 'data', game, '{}_data.db'.format(game))
        self.game = game
        if self.db_exists():
            self.db = Pysqlite('{} Stream Database'.format(game), self.path, verbose=False)
        else:
            self.create_db()
            self.db = Pysqlite('{} Stream Database'.format(game), self.path, verbose=False)
            # If the DB does not exist, then create the tables
            self.create_global_data_table()
            self.create_streamers_data_table()
            self.create_tier_bounds_table()
            self.create_tier_data_table()
        self.streamer_dicts = streamer_dicts

    def run(self):
        # update the streamers data
        streamers_to_update = self.get_streamers_already_stored()
        print('Additions: {}'.format(len(self.streamer_dicts) - len(streamers_to_update)))
        print('Updates: {}'.format(len(streamers_to_update)))
        time.sleep(0.1) # avoids same line progress bar
        for streamer_dict in tqdm(self.streamer_dicts):
            if streamer_dict['name'] in streamers_to_update:
                self.update_streamer_data(streamer_dict)
                self.update_streamer_tier(streamer_dict)
            else:
                self.insert_streamer_data(streamer_dict)
                self.add_streamer_tier(streamer_dict)
        # commit the data after updating as it does not do so itself
        self.db.dbcon.commit()
        # update the global data
        self.update_global_data()
        print('Vacuuming Database to retrieve space')
        # vacuum the old space now
        self.db.execute_sql('VACUUM')
        # commit the vacuum
        self.db.dbcon.commit()

    def db_exists(self):
        return os.path.isfile(self.path)

    def create_db(self):
        if not self.db_exists():
            print('Database for the game: {} does not exist. Creating DB now.'.format(self.game))
            copy_file(
                src=os.path.join(os.getcwd(), 'data', 'base', 'test_game.db'),
                dst=self.path
            )
        else:
            print('Database for game: {} already exists'.format(self.game))

    def create_global_data_table(self):
        print('Creating global data table for: {}'.format(self.game))
        time.sleep(1)
        create_statement = 'CREATE TABLE "global_data" (' \
                           '`id` INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,' \
                           '`timestamp`	TEXT NOT NULL,' \
                           '`streamer_count` INTEGER NOT NULL,' \
                           '`stream_count` INTEGER NOT NULL,' \
                           '`average_time_streamed`	INTEGER NOT NULL,' \
                           '`total_time_streamed` INTEGER NOT NULL,' \
                           '`longest_stream` INTEGER NOT NULL)'
        self.db.execute_sql(create_statement)

    def create_streamers_data_table(self):
        print('Creating streamers data table for: {}'.format(self.game))
        time.sleep(1)
        create_statement = 'CREATE TABLE "streamers_data" (`id`	INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, ' \
                           '`name`	TEXT NOT NULL, ' \
                           '`last_updated` TEXT NOT NULL, ' \
                           '`viewers_average` INTEGER NOT NULL, ' \
                           '`viewers_peak` INTEGER NOT NULL, ' \
                           '`followers`	INTEGER NOT NULL, ' \
                           '`stream_count` INTEGER NOT NULL, ' \
                           '`average_time_streamed` INTEGER NOT NULL, ' \
                           '`total_time_streamed` INTEGER NOT NULL, ' \
                           '`percentage_duration` REAL NOT NULL,' \
                           '`partnership` INTEGER NOT NULL)'
        self.db.execute_sql(create_statement)

    def create_tier_bounds_table(self):
        print('Creating tier bounds table for: {}'.format(self.game))
        time.sleep(1)
        create_statement = 'CREATE TABLE "tier_bounds" (`id` INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,' \
                           '`number` INTEGER NOT NULL, ' \
                           '`upper_bound` INTEGER NOT NULL, ' \
                           '`lower_bound` INTEGER NOT NULL)'
        self.db.execute_sql(create_statement)
        time.sleep(1)
        tier_amount = int(input('Please enter the number of tiers that will be present: '))
        for i in range(0, tier_amount):
            i += 1
            print('BOUND NUMBERS ARE BOTH INCLUSIVE. FOR 100 TO 50, ENTER 100 AS UPPER AND 50 AS LOWER')
            upper_bound = int(input('Please enter the upper bound for tier {}: '.format(i)))
            lower_bound = int(input('Please enter the lower bound for tier {}: '.format(i)))
            self.db.insert_row(
                table='tier_bounds',
                row_string='(NULL, ?, ?, ?)',
                row_data=[i, upper_bound, lower_bound])

    def create_tier_data_table(self):
        print('Creating tier data table for: {}'.format(self.game))
        time.sleep(1)
        create_statement = 'CREATE TABLE `tier_data` (`id` INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, ' \
                           '`streamer_name`	TEXT NOT NULL, ' \
                           '`streamer_tier` INTEGER NOT NULL)'
        self.db.execute_sql(create_statement)

    def return_streamer_tier(self, average_viewers):
        bounds = self.db.get_all_rows('tier_bounds')
        for i, tier, upper, lower in bounds:
            if upper >= average_viewers >= lower:
                return tier
        else:
            return 0

    # return the names of the streamers already stored
    def get_streamers_already_stored(self):
        streamers = self.db.get_all_rows('streamers_data')
        return [row[1] for row in streamers]

    def insert_streamer_data(self, streamer_dict):
        # print('Adding row for: {}'.format(streamer_dict['name']))
        self.db.insert_row(
            table='streamers_data',
            row_string='(NULL, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)',
            row_data=[
                streamer_dict['name'],
                streamer_dict['last_update'],
                streamer_dict['viewers_average'],
                streamer_dict['viewers_peak'],
                streamer_dict['followers'],
                streamer_dict['stream_count'],
                streamer_dict['average_duration'],
                streamer_dict['total_duration'],
                streamer_dict['percentage_duration'],
                streamer_dict['partnership']
            ]
        )

    def update_streamer_data(self, streamer_dict):
        # no neopysqlite method for updating rows yet :(
        # UPDATE table_name SET column1 = value1, columnN = valueN... WHERE name = `streamer_name`
        self.db.dbcur.execute('UPDATE streamers_data SET '
                              'last_updated = ?,'
                              'viewers_average = ?,'
                              'viewers_peak = ?,'
                              'followers = ?,'
                              'stream_count = ?,'
                              'total_time_streamed = ?,'
                              'average_time_streamed = ?,'
                              'percentage_duration = ?,'
                              'partnership = ?'
                              'WHERE name = ?',
                              (
                                  streamer_dict['last_update'],
                                  streamer_dict['viewers_average'],
                                  streamer_dict['viewers_peak'],
                                  streamer_dict['followers'],
                                  streamer_dict['stream_count'],
                                  streamer_dict['total_duration'],
                                  streamer_dict['average_duration'],
                                  streamer_dict['percentage_duration'],
                                  streamer_dict['partnership'],
                                  streamer_dict['name']
                              ))

    def add_streamer_tier(self, streamer_dict):
        self.db.insert_row(
                    table='tier_data',
                    row_string='(NULL, ?, ?)',
                    row_data=[
                        streamer_dict['name'],
                        self.return_streamer_tier(average_viewers=streamer_dict['viewers_average'])
                    ])

    def update_streamer_tier(self, streamer_dict):
        self.db.dbcur.execute('UPDATE tier_data SET '
                              'streamer_tier = ? '
                              'WHERE streamer_name = ?',
                              (
                                  self.return_streamer_tier(average_viewers=streamer_dict['viewers_average']),
                                  streamer_dict['name']
                              ))

    def update_global_data(self):
        # update the global data table from all the new streamer data
        streamers_data = self.db.get_all_rows(table='streamers_data')
        # GLOBAL DATA SCHEMA:
        # ID, TIMESTAMP, STREAMER COUNT, STREAM COUNT, AVERAGE GLOBAL DURATION, TOTAL TIME STREAMED, LONGEST STREAM
        streamer_count = len(streamers_data)
        stream_count = sum([int(row[6]) for row in streamers_data])
        durations = [int(row[8]) for row in streamers_data]
        total_global_duration = sum(durations)
        average_global_duration = calculate_average_from_list(durations)
        longest_stream = max(durations)
        self.db.insert_row(
                table='global_data',
                row_string='(NULL, CURRENT_TIMESTAMP, ?, ?, ?, ?, ?)',
                row_data=[
                    streamer_count,
                    stream_count,
                    average_global_duration,
                    total_global_duration,
                    longest_stream
                ]
        )