#create from df_mem df_sust = df_mem[['id', 'sustainer']].copy() df_sust['sustainer'] = df_sust['sustainer'].str.strip() df_sust = df_sust[df_sust['sustainer'] == 'ACT'] #clean dups #print '\nDUPS IN DF_SUST:',df_sust.set_index('id').index.get_duplicates() df_sust = df_sust.drop_duplicates() df_sust = df_sust.set_index('id') #print '\nCHECK DEDUP:\n',df_sust.loc[[290825, 451922, 1237064, 1499961]] #run search based on segmented member ids df_sust_ids = df_sust.index.values.tolist() df_sust_ids = ','.join(str(x) for x in df_sust_ids) df_sust = get_views(date_start, date_end, df_sust_ids) df_sust.columns = ['channel', 'id', 'count'] print '\n\nSUSTAINERS:', df_sust.shape print 'TOTAL VIEWS:', '{:,}'.format(df_sust['count'].sum()) #sum and print df_sust = df_sust.groupby(['channel'])[['count']].sum() df_sust = df_sust.sort_values('count', ascending=False) print df_sust.head(10) df_sust.to_csv(root_tables + '/df_sust_{}.csv'.format(date_naming), encoding='utf-8') #plot results inputf = df_sust.reset_index() outputf = os.path.join(root_graphics, 'Sustainer_Channels.jpg')
get segmented channel and episode viewers ''' df = df_mem_pass.copy() #clean dups output += '\n\nDUPS IN df: ' + str(df.set_index('id').index.get_duplicates()) df = df.drop_duplicates() df = df.set_index('id') cols = df.columns.tolist() #print '\nCHECK DEDUP:\n',df.loc[[290825, 451922, 1237064, 1499961]] #run search based on segmented member ids mem_pass_ids = df.index.values.tolist() mem_pass_ids = ','.join(str(x) for x in mem_pass_ids) df_views = get_views(date_start, date_end, mem_pass_ids) df_views.columns = ['channel', 'title', 'id', 'count'] ''' get top channel views ''' df_channels = df_views.copy() df_channels = df_channels.groupby(['channel'])[['count']].sum() df_channels = df_channels.sort_values('count', ascending=False) output += '\n\n\nTOP CHANNELS:\n' + df_channels.head(10).to_string() df_channels.to_csv(output_folder + output_head + '_channels_' + output_tail + '.csv', encoding='utf-8-sig') ''' get top episode views
''' other setup ''' #where output files go root_graphics = 'output_graphics' root_tables = 'output_tables' #create needed folders if they don't exist if not os.path.isdir(root_graphics): os.mkdir(root_graphics) if not os.path.isdir(root_tables): os.mkdir(root_tables) ''' process ''' df = get_views(date_start, date_end) df = normalize_shows(df, 'content_channel') df = clean_views(df) cols, plot_devices, aggreg_clust = set_aggregate(df, include_age=False) #get rid of unique ids/viewers cols.remove('viewers') aggreg_clust.pop('viewers') df = df.drop(['id'], axis=1) #structure df for plot sort_by = 'views' df = df.groupby('show').agg(aggreg_clust) df = df.sort_values(by=sort_by, ascending=False) df = df[cols]