示例#1
0
def main(args):
    outdir = args.outdir
    # global database
    db = s_qs.SpecimenQueries(database_path=args.database)
    if not args.userids:
        cvd_user_info = get_cvd(db, args.n_users, min_obs=args.min_obs)
        cvd_user_info = rank_cvd(cvd_user_info,
                                 alpha=args.alpha).head(args.n_users)
    else:
        args.userids = args.userids.split(',')
        cvd_user_info = compute_cvd_info(db, args.userids)
        cvd_user_info = rank_cvd(cvd_user_info, alpha=args.alpha)

    # save a copy of the info to a folder for later reference
    table_path = os.path.join(outdir, 'cvd_users_info.csv')
    cvd_user_info.to_csv(table_path, index=False)
    # just the user ids
    cvd_user_ids = cvd_user_info['userid'].values
    # produce diagnostic plots for each user
    if args.plot:
        for user in cvd_user_ids:
            print("Plotting %s" % user)
            ps = plot_cvd_diagnostics(db, user, combined_plots=args.combine)
            for p_nm, p in ps.items():
                plot_path = os.path.join(
                    outdir, 'cvd_diagnostics_%s_%s.pdf' % (p_nm, user))
                p.savefig(plot_path)
            plt.close('all')

    print(args.sep.join(map(str, cvd_user_ids)))
示例#2
0
 def _get_userid(self, n=0):
     """ Get userid for the nth user, when sorted by descending number of selections """
     db = s_qs.SpecimenQueries(database_path=self.database_path)
     cts_by_users = db.execute_adhoc("""
   select selectionevents.uniqueId as userid, count(*) as ct 
   from selectionevents inner join users on selectionevents.userid = users.id
   where userid > 1 group by selectionevents.uniqueId
   """)
     cts_by_users = cts_by_users.sort_values('ct', ascending=False)
     return cts_by_users.iloc[n]['userid']
示例#3
0
def main(database_path, targetdir, specimendir, scorer, userid, outdir,
         target_ncolors):
    dfs = [
        compute_hue_accuracies(targetdir, specimendir, scorer, userid, ncolors)
        for ncolors in target_ncolors
    ]
    db = s_qs.SpecimenQueries(database_path=database_path)
    images_df = pd.concat(dfs, axis=0)

    observed = get_user_data(db, userid)
    observed = observed.groupby(
        'target_hue_bucket')['correct'].mean().reset_index()
    observed = observed.rename(columns={
        'target_hue_bucket': 'hue',
        'correct': 'acc'
    })
    observed['ncolors'] = 'Specimen'
    dfs.append(observed)
    combined = pd.concat(dfs, axis=0)

    print("Correlation of accuracies")
    print(
        combined.pivot_table(index='hue', columns='ncolors',
                             values='acc').corr())
    p = plot_hue_accuracies(combined)
    plot_path = os.path.join(outdir, '%s_image_hue_accuracies.pdf' % scorer)
    p.get_figure().savefig(plot_path)
    csv_path = os.path.join(outdir, '%s_image_hue_accuracies.csv' % scorer)
    combined.to_csv(csv_path, index=False)

    # plot as scatter plot of points along the kodak and
    # specimen accuracies
    specimen_df = observed[['hue', 'acc']].rename(columns={'acc': 'specimen'})
    images_df = pd.merge(images_df, specimen_df, on='hue', how='left')
    for ncolors in images_df['ncolors'].unique():
        for norm in [True, False]:
            scatter_plot = plot_hue_accuracies_lm(images_df,
                                                  ncolors,
                                                  norm=norm)
            norm_str = 'normalized' if norm else 'raw'
            scatter_plot_path = os.path.join(
                outdir, '%s-%d-image-hue-accuracies-scatter-plot-%s.pdf' %
                (scorer, ncolors, norm_str))
            scatter_plot.savefig(scatter_plot_path)
    plt.close('all')
示例#4
0
 def _get_user_data(self):
     db = s_qs.SpecimenQueries(database_path=self.database_path)
     df = db.execute_adhoc("""
   select users.uniqueid as userid,
     specimen_h,
     target_h,
     specimen_r, specimen_g, specimen_b,
     target_r, target_g, target_b,
     specimen_lab_l, specimen_lab_a, specimen_lab_b,
     target_lab_l, target_lab_a, target_lab_b,
     correct
     from selectionevents inner join users 
     on selectionevents.userid = users.id
     where users.uniqueId = '%s'
     """ % self.userid)
     # make sure correct is boolean, sqlite represents as integer, which messes pandas up
     # sometimes when using as a predicate for row selection
     df['correct'] = df['correct'] == 1
     return df
示例#5
0
        lambda args: cvd_user_accuracy(db, args.input_path, args.output_path),
        'average_absolute_filesizes':
        lambda args: average_absolute_filesizes(args.input_path, args.
                                                output_path)
    }
    available_actions_str = ','.join(list(available_actions.keys()))

    parser = argparse.ArgumentParser(description=description)
    parser.add_argument('database',
                        type=str,
                        help='Path to Specimen database file')
    parser.add_argument('input_path',
                        type=str,
                        help="Path to results relevant for given function")
    parser.add_argument(
        'output_path',
        type=str,
        help="Path to save down plot/result relevant for given function")
    parser.add_argument('action',
                        type=str,
                        help='One of [%s]' % available_actions_str)
    args = parser.parse_args()

    db = s_qs.SpecimenQueries(database_path=args.database)

    if not args.action in available_actions:
        raise ValueError("Undefined action: %s, must be one of [%s]" %
                         (args.action, available_actions_str))

    available_actions.get(args.action)(args)