#create from df_mem
df_sust = df_mem[['id', 'sustainer']].copy()
df_sust['sustainer'] = df_sust['sustainer'].str.strip()
df_sust = df_sust[df_sust['sustainer'] == 'ACT']

#clean dups
#print '\nDUPS IN DF_SUST:',df_sust.set_index('id').index.get_duplicates()
df_sust = df_sust.drop_duplicates()
df_sust = df_sust.set_index('id')
#print '\nCHECK DEDUP:\n',df_sust.loc[[290825, 451922, 1237064, 1499961]]

#run search based on segmented member ids
df_sust_ids = df_sust.index.values.tolist()
df_sust_ids = ','.join(str(x) for x in df_sust_ids)
df_sust = get_views(date_start, date_end, df_sust_ids)
df_sust.columns = ['channel', 'id', 'count']
print '\n\nSUSTAINERS:', df_sust.shape
print 'TOTAL VIEWS:', '{:,}'.format(df_sust['count'].sum())

#sum and print
df_sust = df_sust.groupby(['channel'])[['count']].sum()
df_sust = df_sust.sort_values('count', ascending=False)
print df_sust.head(10)

df_sust.to_csv(root_tables + '/df_sust_{}.csv'.format(date_naming),
               encoding='utf-8')

#plot results
inputf = df_sust.reset_index()
outputf = os.path.join(root_graphics, 'Sustainer_Channels.jpg')
get segmented channel and episode viewers 
'''

df = df_mem_pass.copy()

#clean dups
output += '\n\nDUPS IN df: ' + str(df.set_index('id').index.get_duplicates())
df = df.drop_duplicates()
df = df.set_index('id')
cols = df.columns.tolist()
#print '\nCHECK DEDUP:\n',df.loc[[290825, 451922, 1237064, 1499961]]

#run search based on segmented member ids
mem_pass_ids = df.index.values.tolist()
mem_pass_ids = ','.join(str(x) for x in mem_pass_ids)
df_views = get_views(date_start, date_end, mem_pass_ids)
df_views.columns = ['channel', 'title', 'id', 'count']
'''
get top channel views 
'''

df_channels = df_views.copy()
df_channels = df_channels.groupby(['channel'])[['count']].sum()
df_channels = df_channels.sort_values('count', ascending=False)
output += '\n\n\nTOP CHANNELS:\n' + df_channels.head(10).to_string()

df_channels.to_csv(output_folder + output_head + '_channels_' + output_tail +
                   '.csv',
                   encoding='utf-8-sig')
'''
get top episode views
'''
other setup
'''

#where output files go
root_graphics = 'output_graphics'
root_tables = 'output_tables'

#create needed folders if they don't exist
if not os.path.isdir(root_graphics): os.mkdir(root_graphics)
if not os.path.isdir(root_tables): os.mkdir(root_tables)
'''
process
'''

df = get_views(date_start, date_end)
df = normalize_shows(df, 'content_channel')
df = clean_views(df)
cols, plot_devices, aggreg_clust = set_aggregate(df, include_age=False)

#get rid of unique ids/viewers
cols.remove('viewers')
aggreg_clust.pop('viewers')
df = df.drop(['id'], axis=1)

#structure df for plot
sort_by = 'views'
df = df.groupby('show').agg(aggreg_clust)
df = df.sort_values(by=sort_by, ascending=False)
df = df[cols]