from ast import literal_eval from anon_id_utils import get_anon_id get_all = 0 print "Getting users..." if get_all: # this is for ALL users in the zooniverse #and is gonna take a loooooong time cursor = the_cnx.cursor() getusers = "select login as user_name, id as zooniverse_id from zoohome_300315.users" cursor.execute(getusers) the_users = pd.DataFrame(cursor.fetchall()) the_users.columns = cursor.column_names the_users['anon_id'] = [get_anon_id(q) for q in the_users.zooniverse_id] cursor.close() else: user_file = '/Volumes/Brooke_SD/VOLCROWE/survey/survey_answered_ids_CONFIDENTIAL.csv' the_users = pd.read_csv(user_file) the_users.set_index('zooniverse_id', drop=False, inplace=True) # We are going to loop through users and check each project for the existence of a user stats file for that project. # paths and project names come from sessions_allproj_byuser.py all_poss_proj = 'galaxy_zoo galaxy_zoo_starburst solarstormwatch moon_zoo ancient_lives whales milky_way oldweather sea_floor cyclone_center bat_detective cancer_cells andromeda serengeti planet_four notes_from_nature spacewarp leaf worms plankton radio war_diary m83 wise sunspot condor asteroid kelp penguin higgs_hunter chicago planet_hunter illustratedlife chimp orchid wisconsin crater'.split( ) # just make sure you're starting clean here (since this appends later) for thisproj in all_poss_proj:
# cursor = the_cnx.cursor() get_all = 0 print "Getting users..." if get_all: # this is for ALL users in the zooniverse #and is gonna take a loooooong time cursor = the_cnx.cursor() getusers = "select login as user_name, id as zooniverse_id, from zoohome_300315.users" cursor.execute(getusers) the_users = pd.DataFrame(cursor.fetchall()) the_users.columns = cursor.column_names the_users['anon_id'] = [get_anon_id(q) for q in the_users.zooniverse_id] cursor.close() else: user_file = '/Volumes/Brooke_SD/VOLCROWE/survey/survey_answered_ids_CONFIDENTIAL.csv' the_users = pd.read_csv(user_file) the_users.set_index('zooniverse_id', drop=False, inplace=True) filestr_base = "/Volumes/Brooke_SD/VOLCROWE/user_stats_out" # if you're not starting from scratch, this != 0 starting_row = 0 # if you only need to go up to a certain point ending_row = 10000
elif '_body' in discussions_all.columns: body = '_body' # if this isn't a column then we have to extract it from the body column tags = 'none' if 'tags' in discussions_all.columns: tags = 'tags' elif '_tags' in discussions_all.columns: tags = '_tags' anon_id = 'anon_id' if not anon_id in discussions_all.columns: print project_name,"Warning: anon_id wasn't defined, defining from ",userid," i.e. numeric Zooniverse user ID" discussions_all['anon_id'] = (discussions_all[userid] < 1).astype(str) discussions_all.anon_id = [get_anon_id(q) for q in discussions_all[userid]] createdat = 'created_at' # not sure what happens if this isn't a list, as it isn't with the ouroboros talk projects if body != 'none' and (project_name == 'sea_floor' or project_name == 'planet_hunters'): discussions_all['body_arr'] = discussions_all[body].apply(literal_eval) elif body != 'none': discussions_all['body_arr'] = discussions_all[body].astype(str) discussions_all['body_arr'] = discussions_all['body_arr'].apply(lambda x:x.split(' ')) else: discussions_all['body_arr'] = ['' for q in discussions_all.anon_id]