def main(args): outdir = args.outdir # global database db = s_qs.SpecimenQueries(database_path=args.database) if not args.userids: cvd_user_info = get_cvd(db, args.n_users, min_obs=args.min_obs) cvd_user_info = rank_cvd(cvd_user_info, alpha=args.alpha).head(args.n_users) else: args.userids = args.userids.split(',') cvd_user_info = compute_cvd_info(db, args.userids) cvd_user_info = rank_cvd(cvd_user_info, alpha=args.alpha) # save a copy of the info to a folder for later reference table_path = os.path.join(outdir, 'cvd_users_info.csv') cvd_user_info.to_csv(table_path, index=False) # just the user ids cvd_user_ids = cvd_user_info['userid'].values # produce diagnostic plots for each user if args.plot: for user in cvd_user_ids: print("Plotting %s" % user) ps = plot_cvd_diagnostics(db, user, combined_plots=args.combine) for p_nm, p in ps.items(): plot_path = os.path.join( outdir, 'cvd_diagnostics_%s_%s.pdf' % (p_nm, user)) p.savefig(plot_path) plt.close('all') print(args.sep.join(map(str, cvd_user_ids)))
def _get_userid(self, n=0): """ Get userid for the nth user, when sorted by descending number of selections """ db = s_qs.SpecimenQueries(database_path=self.database_path) cts_by_users = db.execute_adhoc(""" select selectionevents.uniqueId as userid, count(*) as ct from selectionevents inner join users on selectionevents.userid = users.id where userid > 1 group by selectionevents.uniqueId """) cts_by_users = cts_by_users.sort_values('ct', ascending=False) return cts_by_users.iloc[n]['userid']
def main(database_path, targetdir, specimendir, scorer, userid, outdir, target_ncolors): dfs = [ compute_hue_accuracies(targetdir, specimendir, scorer, userid, ncolors) for ncolors in target_ncolors ] db = s_qs.SpecimenQueries(database_path=database_path) images_df = pd.concat(dfs, axis=0) observed = get_user_data(db, userid) observed = observed.groupby( 'target_hue_bucket')['correct'].mean().reset_index() observed = observed.rename(columns={ 'target_hue_bucket': 'hue', 'correct': 'acc' }) observed['ncolors'] = 'Specimen' dfs.append(observed) combined = pd.concat(dfs, axis=0) print("Correlation of accuracies") print( combined.pivot_table(index='hue', columns='ncolors', values='acc').corr()) p = plot_hue_accuracies(combined) plot_path = os.path.join(outdir, '%s_image_hue_accuracies.pdf' % scorer) p.get_figure().savefig(plot_path) csv_path = os.path.join(outdir, '%s_image_hue_accuracies.csv' % scorer) combined.to_csv(csv_path, index=False) # plot as scatter plot of points along the kodak and # specimen accuracies specimen_df = observed[['hue', 'acc']].rename(columns={'acc': 'specimen'}) images_df = pd.merge(images_df, specimen_df, on='hue', how='left') for ncolors in images_df['ncolors'].unique(): for norm in [True, False]: scatter_plot = plot_hue_accuracies_lm(images_df, ncolors, norm=norm) norm_str = 'normalized' if norm else 'raw' scatter_plot_path = os.path.join( outdir, '%s-%d-image-hue-accuracies-scatter-plot-%s.pdf' % (scorer, ncolors, norm_str)) scatter_plot.savefig(scatter_plot_path) plt.close('all')
def _get_user_data(self): db = s_qs.SpecimenQueries(database_path=self.database_path) df = db.execute_adhoc(""" select users.uniqueid as userid, specimen_h, target_h, specimen_r, specimen_g, specimen_b, target_r, target_g, target_b, specimen_lab_l, specimen_lab_a, specimen_lab_b, target_lab_l, target_lab_a, target_lab_b, correct from selectionevents inner join users on selectionevents.userid = users.id where users.uniqueId = '%s' """ % self.userid) # make sure correct is boolean, sqlite represents as integer, which messes pandas up # sometimes when using as a predicate for row selection df['correct'] = df['correct'] == 1 return df
lambda args: cvd_user_accuracy(db, args.input_path, args.output_path), 'average_absolute_filesizes': lambda args: average_absolute_filesizes(args.input_path, args. output_path) } available_actions_str = ','.join(list(available_actions.keys())) parser = argparse.ArgumentParser(description=description) parser.add_argument('database', type=str, help='Path to Specimen database file') parser.add_argument('input_path', type=str, help="Path to results relevant for given function") parser.add_argument( 'output_path', type=str, help="Path to save down plot/result relevant for given function") parser.add_argument('action', type=str, help='One of [%s]' % available_actions_str) args = parser.parse_args() db = s_qs.SpecimenQueries(database_path=args.database) if not args.action in available_actions: raise ValueError("Undefined action: %s, must be one of [%s]" % (args.action, available_actions_str)) available_actions.get(args.action)(args)