def write_network_table(update=True): ''' Writes network-level downloads table https://stackoverflow.com/questions/23103962/how-to-write-dataframe-to-postgres-table ''' engine = create_engine(dns) # Normal update table with last day's data if update: start_date = (datetime.datetime.now()).date() dat = json.loads( getSimplecastResponse( f'/analytics/downloads?account={account_id}&start_date={start_date}' )) df = pd.DataFrame(dat['by_interval']) print(df) df.to_sql('network_level', con=engine, if_exists='append') # Write all-time data to db elif not update: dat = json.loads( getSimplecastResponse( f'/analytics/downloads?account={account_id}')) df = pd.DataFrame(dat['by_interval']) df.to_sql('network_level', engine) print('Database Network table updated!')
def network_pod_table(): ''' We're sitting down and writng this table Should output table (can be csv for now) with following cols: ['Pod Title', '# Downloads', '# Listeners', '#Avg Downloads'] ''' pod_ids = [x['value'] for x in podIDs()] print('Getting Network Table Data') podcasts = [] # pod_dt = [] n = 1 for p in pod_ids: # Dictionary to hold 4 values: Title, # Downloads, # Listeners, Avg Downloads pod_data = {} # Getting Pod Title; this should be simpler title = json.loads(getSimplecastResponse(f'/podcasts/{p}'))['title'] pod_data['Podcast Title'] = title # Getting total downloads for pod downloads = json.loads( getSimplecastResponse( f'/analytics/downloads?podcast={p}'))['total'] pod_data['Total Downloads'] = downloads # Getting listener data listeners = json.loads( getSimplecastResponse( f'/analytics/episodes/listeners?podcast={p}'))['total'] pod_data['Total Listeners'] = listeners # Getting Avg Download data avg_downloads = json.loads( getSimplecastResponse( f'/analytics/episodes/average_downloads?podcast={p}'))['total'] pod_data['Average Downloads'] = avg_downloads # Setting up pod ID to be included pod_data['Podcast ID'] = p print('Pod Data:', pod_data) podcasts.append(pod_data) # pod_dt.append(tuple(n, title, downloads, listeners, avg_downloads)) print('######################') n += 1 df = pd.DataFrame(podcasts) print('Final Dataframe:\n', df) csv_path = os.path.join('.', 'db', 'podcast-table.csv') df.to_csv(csv_path, index=False) return df
def format_response(query, tag): ''' Format Simplecast response into proper data format for update_db Should return data as a list of tuples, length of the tuples should match the # of datatable columns for that stat query: str, Simplecast API call, passed as a param to getSimplecast Response tag: str, Key name for Simplecast response, typically 'by_interval' or 'collection' depending on type ''' res = json.loads(getSimplecastResponse(query)) data = res[tag] # print('Response data:', data) new_data = [] n = 1 # Looping through items in data response for d in data: dlist = list(d.values()) # dlist[1] = dlist.insert(0, n) new_data.append(tuple(dlist)) n += 1 print(new_data[0:10], '...') return new_data
def get_network_downloads(): ''' Function to grab network download data Returns df with columns ['interval', 'downloads_total', 'downloads_percent'] & total downloads for network ''' response = getSimplecastResponse( f'/analytics/downloads?account={account_id}') # &limit=1000 total_downloads = json.loads(response)['total'] df = pd.DataFrame(json.loads(response)['by_interval']) print(type(total_downloads)) df.to_csv(os.path.join('.', 'db', 'network-downloads.csv'), index=False) return df, total_downloads
def network_pull(): ''' Function to get netweork data with one pull from our BW simplecast account From Lem's email add '/current_user' to query string ''' # Figure out how to set this as an env variable res = getSimplecastResponse( f'/analytics/podcasts?account={account_id}&limit=1000') res_dict = json.loads(res) # print(res_dict['collection']) # print(res_dict.keys(), len(res_dict['collection'])) df = pd.DataFrame(res_dict['collection']) print(df) return df
def get_listeners(): ''' Getting unique listener data for each pod ''' listener_dfs = [] for p in pod_ids: listener_dat = json.loads( getSimplecastResponse(f'/analytics/listeners?podcast={p}')) print(listener_dat['by_interval']) listener_dfs.append(pd.DataFrame(listener_dat['by_interval'])) listeners = pd.concat(listener_dfs) listener_path = os.path.join('.', 'db', 'network-listeners-by-date.csv') listeners = listeners.groupby('interval', as_index=False)['total'].sum() print('Listeners by date:\n', listeners) listeners.to_csv(listener_path, index=False) return listeners
def update_podcast_table(): ''' Updating pods_by_interval table ''' for p in pod_ids: print(p) res = json.loads( getSimplecastResponse( f'/analytics/podcasts?account={account_id}&limit=1000')) commands = (""" ALTER TABLE podcasts_by_interval ADD COLUMN IF NOT EXISTS interval DATE NOT NULL, ADD COLUMN IF NOT EXISTS downloads_total INT NOT NULL, """) conn = None try: # https://stackoverflow.com/questions/61022590/psycopg2-cant-execute-an-empty-query print('Connecting to db...') # connect to the PostgreSQL server conn = psycopg2.connect(dns) #, sslmode='require') cur = conn.cursor() print('DB connection established!') # create table one by one for command in commands: # cur.execute(command) print('Table cols created!') m = 1 # for d in res['collection']: # print(d) # cur.execute(...) cur.close() # commit the changes conn.commit() except (Exception, psycopg2.DatabaseError) as error: print(error) finally: if conn is not None: conn.close()
def network_data(): ''' Function to get network-level data (# pods, Total # downloads, Total # Episodes) Should be scheduled run (i.e. runs once per day/hour) Want this to update a db/file for our app to pull from ''' # pod_ids = [x['value'] for x in podIDs()] # Getting download and episode totals for each pod in network episodes = [] downloads = [] downloads_by_interval = [] for p in pod_ids: print(f'Getting data for podcast: {p}') # Download data grab download_dat = json.loads( getSimplecastResponse(f'/analytics/downloads?podcast={p}')) print(type(download_dat['by_interval']), download_dat) downloads_by_interval.append(pd.DataFrame(download_dat['by_interval'])) pod_downloads = download_dat['total'] print(f'Total # of downloads: {pod_downloads}') # Episode data grab episode_data = json.loads( getSimplecastResponse(f'/podcasts/{p}/episodes')) n_episodes = episode_data['count'] print(f'Number of episodes: {n_episodes}') # listener data for each pod # listener_dat = json.loads(getSimplecastResponse(f'/analytics/episodes/listeners?podcast={p}')) # unique_listeners = listener_dat['total'] # print('LISTENERS:',listener_dat.keys()) print('#########################') # Writing data to list, then a file downloads.append(pod_downloads) episodes.append(n_episodes) # Getting Network totals downloads_total = np.sum(downloads) episodes_total = np.sum(episodes) # Setting up a DF with network stats network_stats = pd.DataFrame.from_dict({ 'Number of Podcasts': [len(pod_ids)], 'Total Downloads': [downloads_total], 'Total Episodes': [episodes_total] }) print('#########################') print('NETWORK TOTALS') print(f'Total Downloads: {downloads_total}') print(f'Total Episodes: {episodes_total}', '\n') # Getting network downloads table -- want to sum downlaods based on common interval vals network_downloads = pd.concat(downloads_by_interval) network_downloads_grouped = network_downloads.groupby( 'interval', as_index=False)['downloads_total'].sum() print('NETWORK DOWNLOADS BY DAY:') print(network_downloads_grouped, network_downloads_grouped.columns, type(network_downloads_grouped)) # Writing to a csv for both network stats and downloads network_downloads_grouped.to_csv(os.path.join('.', 'db', 'network-downloads.csv'), index=False) network_stats.to_csv(os.path.join('.', 'db', 'network-stats.csv'), index=False)