def get_user_data(db, userid): cvd_data_query = """ select users.uniqueid as userid, target_r, target_g, target_b, specimen_r, specimen_g, specimen_b, correct, target_h, specimen_h, target_lab_l, target_lab_a, target_lab_b, specimen_lab_l, specimen_lab_a, specimen_lab_b from selectionevents inner join users on selectionevents.userid = users.id where users.uniqueid = "%s" """ selections = db.execute_adhoc(cvd_data_query % userid) selections['target_hue_bucket'] = s_utils.munsell_buckets( selections['target_h'], labels=True)[1] selections['specimen_hue_bucket'] = s_utils.munsell_buckets( selections['specimen_h'], labels=True)[1] selections['hue_pair'] = selections[[ 'target_hue_bucket', 'specimen_hue_bucket' ]].apply(tuple, axis=1).map(s_utils.munsell_pair_map) selections['dist'] = get_LAB_distance(selections) # make sure we type cast to boolean, to avoid issues with pandas and integer rep (sqlite) of boolean selections['correct'] = (selections['correct'] == 1) return selections
def wrong_hue(df): """ Fraction of selections in wrong hue bucket vs target (all selections, not just wrong) """ df['target_hue_bucket'] = s_utils.munsell_buckets(df['target_h'], labels=True)[1] df['specimen_hue_bucket'] = s_utils.munsell_buckets(df['specimen_h'], labels=True)[1] df['wrong_hue'] = df['target_hue_bucket'] != df['specimen_hue_bucket'] return df.groupby('userid')[['wrong_hue']].mean()
def _score_samples(self, dists, colors1, colors2): # TODO: this is very slow! rgb_to_hue = lambda x: colorsys.rgb_to_hsv(*x)[0] # Note that we need RGB to be in [0.0, 1.0], so divide by 255 hue1 = np.apply_along_axis(rgb_to_hue, 1, colors1 / 255.0) hue2 = np.apply_along_axis(rgb_to_hue, 1, colors2 / 255.0) hue_bucket1 = s_utils.munsell_buckets(hue1, labels=True)[1] hue_bucket2 = s_utils.munsell_buckets(hue2, labels=True)[1] data = np.vstack([dists, hue_bucket1, hue_bucket2]) # get appropriate estimator, or fall back on to general one get_kde = lambda h1, h2: self.hue_kdes.get( s_utils.munsell_pair_map[(h1, h2)], self.kde) # get actual density estimate for each observation get_density = np.vectorize( lambda d, h1, h2: get_kde(h1, h2).score_samples(d)) get_density_vec = np.vectorize(get_density) return get_density_vec(dists, hue_bucket1, hue_bucket2)
def images_to_specimen(target_path, specimen_path): print "Comparing %s[target] vs %s[specimen]" % (target_path, specimen_path) target = Image.open(target_path) specimen = Image.open(specimen_path) pixels_t = image_to_pixels(target) pixels_s = image_to_pixels(specimen) correct = (pixels_t == pixels_s).all(axis=1) rgb_to_hue = lambda x: colorsys.rgb_to_hsv(*x)[0] # divide pixels (which are in RGB) by 255 to be within 0 and 1.0 hues_t = np.apply_along_axis(rgb_to_hue, 1, pixels_t / 255.) hue_bucket = s_utils.munsell_buckets(hues_t, labels=True)[1] df = pd.DataFrame({'hue': hue_bucket, 'correct': correct}) return df.groupby('hue')['correct'].agg({'ct': len, 'correct': np.sum}).reset_index()
def _fit(self, df, kernel='gaussian', plot=False, **kwargs): """ Estimate density for errors as a function of perceptual distance """ df = df.copy() # add hue pairs df['target_hue'] = s_utils.munsell_buckets(df['target_h'], labels=True)[1] df['specimen_hue'] = s_utils.munsell_buckets(df['specimen_h'], labels=True)[1] df['hue_pair'] = df[['target_hue', 'specimen_hue' ]].apply(tuple, axis=1).map(s_utils.munsell_pair_map) errors = df[~df['correct']] hue_pair_cts = errors.groupby('hue_pair')['target_h'].agg({ 'ct': np.size }).reset_index() enough_cts = hue_pair_cts[hue_pair_cts['ct'] >= self.min_obs] for pair in enough_cts['hue_pair']: pair_errors = errors[errors['hue_pair'] == pair] pair_kde = KernelDensity(kernel=kernel) pair_kde.fit(pair_errors['distance'].values.reshape(-1, 1)) self.hue_kdes[pair] = pair_kde # fit a global kde to fall back on self.kde = KernelDensity(kernel=kernel) self.kde.fit(errors['distance'].values.reshape(-1, 1))