def morph_table_gz2(): # Print LaTeX-formatted tables of the GZ vote counts and fractions, and plot as pie chart. overlap = True survey = 'decals' # Get weights try: fitsfile = "{0}/dr10/dr10_gz2_main_specz.fits".format(gzpath) hdr = fits.getheader(fitsfile, 1) colnames = [] for i in range(hdr['TFIELDS']): colnames.append(hdr['TTYPE{0}'.format(i + 1)]) if overlap: if survey == 'gz2': collation_file = "{0}/decals/csv/decals_gz2_main.csv".format( gzpath) elif survey == 'stripe82': collation_file = "{0}/decals/csv/decals_gz2_stripe82c1.csv".format( gzpath) elif survey == 'decals': collation_file = "{0}/decals/csv/decals_gz2_union.csv".format( gzpath) collated = pd.read_csv(collation_file) else: if survey == 'gz2': collation_file = "{0}/dr10/dr10_gz2_main_specz.csv".format( gzpath) elif survey == 'stripe82': collation_file = "{0}/dr10/dr10_gz2_stripe82_coadd1.csv".format( gzpath) collated = pd.read_csv(collation_file, names=colnames) except IOError: print "Collation file for {0:} does not exist. Aborting.".format( survey) return None columns = collated.columns fraccols, colnames = [], [] if survey == 'decals': for c in columns: if len(c) > 10: if c[-4:] == 'frac' and c[:6] == 'decals': fraccols.append(c) if c[7] == 't' and is_number(c[8:10]): colnames.append(c[7:10]) else: for c in columns: if c[-17:] == 'weighted_fraction': fraccols.append(c) if c[0] == 't' and is_number(c[1:3]): colnames.append(c[:3]) collist = list(set(colnames)) collist.sort() # Plot distribution of vote fractions for each task ntasks = len(collist) ncols = 4 if ntasks > 9 else int(np.sqrt(ntasks)) nrows = int(ntasks / ncols) if ntasks % ncols == 0 else int(ntasks / ncols) + 1 sd = survey_dict()[survey] survey_name = sd['name'] def f7(seq): seen = set() seen_add = seen.add return [x for x in seq if not (x in seen or seen_add(x))] if survey == 'decals': tasklabels = f7([re.split("[ax][0-9]", f)[0][11:-1] for f in fraccols]) labels = [re.split("[ax][0-9]", f)[-1][1:-5] for f in fraccols] else: tasklabels = f7([re.split("[ax][0-9]", f)[0][4:-1] for f in fraccols]) labels = [re.split("[ax][0-9]", f[4:-18])[-1][2:] for f in fraccols] # Make pie charts of the plurality votes votearr = np.array(collated[fraccols]) class_arr, task_arr, task_ans = [], [], [] for v in votearr: e, a = plurality(v, survey) task_arr.append(e) task_ans.append(a) task_arr = np.array(task_arr) task_ans = np.array(task_ans) fig, axarr = plt.subplots(nrows=nrows, ncols=ncols, figsize=(15, 12)) colors = [ u'#377EB8', u'#E41A1C', u'#4DAF4A', u'#984EA3', u'#FF7F00', u'#A6761D', u'#1B9E77' ] n = (task_arr.shape)[1] for i in range(n): ax = axarr.ravel()[i] c = Counter(task_ans[:, i][task_arr[:, i] == True]) pv, pl = [], [] task_total = sum(c.values()) for k in c: pv.append(c[k]) pl.append(labels[k]) # Print to screen in LaTeX format print "{0:20} & {1:6} & {3:.2f} & {2:.2f}".format( labels[k], c[k], c[k] * 1. / task_total, c[k] * 1. / len(collated)) print "" ax.pie(pv, labels=pl, colors=colors, autopct=lambda (p): '{:.0f}'.format(p * sum(pv) / 100)) title = '{0:} - t{1:02} {2:}'.format( survey_name, i, tasklabels[i]) if i == 0 else 't{0:02} {1:}'.format( i, tasklabels[i]) ax.set_title(title) ax.set_aspect('equal') # Remove empty axes from subplots if axarr.size > ntasks: for i in range(axarr.size - ntasks): ax = axarr.ravel()[axarr.size - (i + 1)] ax.set_axis_off() fig.set_tight_layout(True) suffix = '_overlap' if overlap else '' plt.savefig('{1}/decals/plots/pie_{0}{2}.eps'.format( survey, gzpath, suffix)) plt.close() return None
def morphology_distribution(survey,absolute_counts=False): # What's the distribution of morphologies so far? """ Get the updated version of Brooke's aggregation and weighting code Try running it on the DECaLS and Illustris data Assuming it works, match positionally against GZ2 in TOPCAT (both samples). This might have to be manual unless I find a faster implementation of positional matching in Python. Adapt the GZ2 code for plurality classifications and run on GZ4 data Summarize overall results and split by luminosity List galaxies with large changes? """ # Get weights try: collation_file = "{0:}/gz_reduction_sandbox/data/{1:}_unweighted_classifications_00.csv".format(gzdir,survey) collated = pd.read_csv(collation_file) except IOError: print "Collation file for {0:} does not exist. Aborting.".format(survey) return None columns = collated.columns fraccols,colnames = [],[] for c in columns: if c[-4:] == 'frac': fraccols.append(c) if c[0] == 't' and is_number(c[1:3]): colnames.append(c[:3]) collist = list(set(colnames)) collist.sort() # Plot distribution of vote fractions for each task ntasks = len(collist) ncols = 4 if ntasks > 9 else int(np.sqrt(ntasks)) nrows = int(ntasks / ncols) if ntasks % ncols == 0 else int(ntasks / ncols) + 1 sd = survey_dict()[survey] survey_name = sd['name'] def f7(seq): seen = set() seen_add = seen.add return [x for x in seq if not (x in seen or seen_add(x))] tasklabels = f7([re.split("[ax][0-9]",f)[0][4:-1] for f in fraccols]) labels = [re.split("[ax][0-9]",f[4:-5])[-1][1:] for f in fraccols] fig,axarr = plt.subplots(nrows=nrows,ncols=ncols,figsize=(15,12)) for i,c in enumerate(collist): ax = axarr.ravel()[i] title = '{0:} - t{1:02} {2:}'.format(survey_name,i,tasklabels[i]) if i == 0 else 't{0:02} {1:}'.format(i,tasklabels[i]) ax.set_title(title) for f in fraccols: if f[:3] == c and f[-15:-5] != 'a0_discuss': label = re.split("[ax][0-9]",f[4:-5])[-1][1:] ax.hist(collated[f],alpha=0.7,label=label) ax.set_xlim(0,1) ax.legend(loc='upper left',fontsize=6) # Remove empty axes from subplots if axarr.size > ntasks: for i in range(axarr.size - ntasks): ax = axarr.ravel()[axarr.size-(i+1)] ax.set_axis_off() fig.set_tight_layout(True) plt.savefig('{0:}/progress/{1:}/votefractions_{1:}.png'.format(gzdir,survey)) # Make pie charts of the plurality votes votearr = np.array(collated[fraccols]) class_arr,task_arr,task_ans = [],[],[] for v in votearr: e,a = plurality(v,survey) task_arr.append(e) task_ans.append(a) task_arr = np.array(task_arr) task_ans = np.array(task_ans) fig,axarr = plt.subplots(nrows=nrows,ncols=ncols,figsize=(15,12)) colors=[u'#377EB8', u'#E41A1C', u'#4DAF4A', u'#984EA3', u'#FF7F00',u'#A6761D',u'#1B9E77'] n = (task_arr.shape)[1] for i in range(n): ax = axarr.ravel()[i] c = Counter(task_ans[:,i][task_arr[:,i] == True]) pv,pl = [],[] for k in c: pv.append(c[k]) pl.append(labels[k]) if absolute_counts: ax.pie(pv,labels=pl,colors=colors,autopct=lambda(p): '{:.0f}'.format(p * sum(pv) / 100)) else: ax.pie(pv,labels=pl,colors=colors,autopct='%1.0f%%') title = '{0:} - t{1:02} {2:}'.format(survey_name,i,tasklabels[i]) if i == 0 else 't{0:02} {1:}'.format(i,tasklabels[i]) ax.set_title(title) ax.set_aspect('equal') # Remove empty axes from subplots if axarr.size > ntasks: for i in range(axarr.size - ntasks): ax = axarr.ravel()[axarr.size-(i+1)] ax.set_axis_off() fig.set_tight_layout(True) plt.savefig('{0:}/progress/{1:}/pie_{1:}.png'.format(gzdir,survey)) plt.close() # How does the distribution compare to GZ1 and GZ2? # Have they discovered anything interesting? """ Look at long discussions? Maybe tag the ones that science team hasn't participated in yet? """ return None
def feature_comparison(savefig=False): # Plot the difference in vote fractions for the matched galaxies filename = '{0}/fits/decals_gz2_union.fits'.format(decals_path) data = fits.getdata(filename, 1) # Map the columns matched_cols = [{ 'title': 'smooth', 'gz2': "gz2_t01_smooth_or_features_a01_smooth_fraction", "decals": "decals_t00_smooth_or_features_a0_smooth_frac" }, { 'title': 'features/disk', 'gz2': "gz2_t01_smooth_or_features_a02_features_or_disk_fraction", "decals": "decals_t00_smooth_or_features_a1_features_frac" }, { 'title': 'star', 'gz2': "gz2_t01_smooth_or_features_a03_star_or_artifact_fraction", "decals": "decals_t00_smooth_or_features_a2_artifact_frac" }, { 'title': 'edge-on', 'gz2': "gz2_t02_edgeon_a04_yes_fraction", "decals": "decals_t01_disk_edge_on_a0_yes_frac" }, { 'title': 'not edge-on', 'gz2': "gz2_t02_edgeon_a05_no_fraction", "decals": "decals_t01_disk_edge_on_a1_no_frac" }, { 'title': 'bar', 'gz2': "gz2_t03_bar_a06_bar_fraction", "decals": "decals_t02_bar_a0_bar_frac" }, { 'title': 'no bar', 'gz2': "gz2_t03_bar_a07_no_bar_fraction", "decals": "decals_t02_bar_a1_no_bar_frac" }, { 'title': 'spiral', 'gz2': "gz2_t04_spiral_a08_spiral_fraction", "decals": "decals_t03_spiral_a0_spiral_frac" }, { 'title': 'no spiral', 'gz2': "gz2_t04_spiral_a09_no_spiral_fraction", "decals": "decals_t03_spiral_a1_no_spiral_frac" }, { 'title': 'no bulge', 'gz2': "gz2_t05_bulge_prominence_a10_no_bulge_fraction", "decals": "decals_t04_bulge_prominence_a0_no_bulge_frac" }, { 'title': 'medium bulge', 'gz2': "gz2_t05_bulge_prominence_a11_just_noticeable_fraction", "decals": "decals_t04_bulge_prominence_a1_obvious_frac" }, { 'title': 'obvious bulge', 'gz2': "gz2_t05_bulge_prominence_a12_obvious_fraction", "decals": "decals_t04_bulge_prominence_a2_dominant_frac" }, { 'title': 'completely round', 'gz2': "gz2_t07_rounded_a16_completely_round_fraction", "decals": "decals_t08_rounded_a0_completely_round_frac" }, { 'title': 'in between', 'gz2': "gz2_t07_rounded_a17_in_between_fraction", "decals": "decals_t08_rounded_a1_in_between_frac" }, { 'title': 'cigar shaped', 'gz2': "gz2_t07_rounded_a18_cigar_shaped_fraction", "decals": "decals_t08_rounded_a2_cigar_shaped_frac" }, { 'title': 'ring', 'gz2': "gz2_t08_odd_feature_a19_ring_fraction", "decals": "decals_t10_odd_feature_x1_ring_frac" }, { 'title': 'lens/arc', 'gz2': "gz2_t08_odd_feature_a20_lens_or_arc_fraction", "decals": "decals_t10_odd_feature_x2_lens_frac" }, { 'title': 'irregular', 'gz2': "gz2_t08_odd_feature_a22_irregular_fraction", "decals": "decals_t10_odd_feature_x4_irregular_frac" }, { 'title': 'other', 'gz2': "gz2_t08_odd_feature_a23_other_fraction", "decals": "decals_t10_odd_feature_x5_other_frac" }, { 'title': 'dust lane', 'gz2': "gz2_t08_odd_feature_a38_dust_lane_fraction", "decals": "decals_t10_odd_feature_x3_dustlane_frac" }, { 'title': 'rounded bulge', 'gz2': "gz2_t09_bulge_shape_a25_rounded_fraction", "decals": "decals_t07_bulge_shape_a0_rounded_frac" }, { 'title': 'boxy bulge', 'gz2': "gz2_t09_bulge_shape_a26_boxy_fraction", "decals": "decals_t07_bulge_shape_a1_boxy_frac" }, { 'title': 'no bulge', 'gz2': "gz2_t09_bulge_shape_a27_no_bulge_fraction", "decals": "decals_t07_bulge_shape_a2_no_bulge_frac" }, { 'title': 'tight arms', 'gz2': "gz2_t10_arms_winding_a28_tight_fraction", "decals": "decals_t05_arms_winding_a0_tight_frac" }, { 'title': 'medium arms', 'gz2': "gz2_t10_arms_winding_a29_medium_fraction", "decals": "decals_t05_arms_winding_a1_medium_frac" }, { 'title': 'loose arms', 'gz2': "gz2_t10_arms_winding_a30_loose_fraction", "decals": "decals_t05_arms_winding_a2_loose_frac" }, { 'title': '1 arm', 'gz2': "gz2_t11_arms_number_a31_1_fraction", "decals": "decals_t06_arms_number_a0_1_frac" }, { 'title': '2 arms', 'gz2': "gz2_t11_arms_number_a32_2_fraction", "decals": "decals_t06_arms_number_a1_2_frac" }, { 'title': '3 arms', 'gz2': "gz2_t11_arms_number_a33_3_fraction", "decals": "decals_t06_arms_number_a2_3_frac" }, { 'title': '4 arms', 'gz2': "gz2_t11_arms_number_a34_4_fraction", "decals": "decals_t06_arms_number_a3_4_frac" }, { 'title': '5+ arms', 'gz2': "gz2_t11_arms_number_a36_more_than_4_fraction", "decals": "decals_t06_arms_number_a4_more_than_4_frac" }] # Working, but still needs to sort for questions that are ACTUALLY ANSWERED. Lots of pileup at 0,0. columns = data.columns decals_fraccols, gz2_fraccols = [], [] for c in columns: colname = c.name if len(colname) > 6: if colname[-4:] == 'frac' and colname[:6] == 'decals': decals_fraccols.append(c) if len(colname) > 17: if colname[-8:] == 'fraction' and colname[ -17:] != "weighted_fraction" and colname[:3] == 'gz2': gz2_fraccols.append(c) decals_votearr = data.from_columns(decals_fraccols) gz2_votearr = data.from_columns(gz2_fraccols) decals_tasks, gz2_tasks = [], [] for v in decals_votearr: e_decals, a_decals = plurality(np.array(list(v)), 'decals') decals_tasks.append(e_decals) for v in gz2_votearr: e_gz2, a_gz2 = plurality(np.array(list(v)), 'gz2') gz2_tasks.append(e_gz2) fig, axarr = plt.subplots(num=1, nrows=4, ncols=8, figsize=(16, 10)) nrows = axarr.shape[0] ncols = axarr.shape[1] def plot_features(ax, taskno, indices): plotind = indices.flatten() ax.hist2d(data[matched_cols[taskno]['gz2']][plotind], data[matched_cols[taskno]['decals']][plotind], bins=(20, 20), range=[[0, 1], [0, 1]], norm=LogNorm(), cmap=cm.viridis) ax.plot([0, 1], [0, 1], linestyle='--', color='red', lw=2) ax.set_title(matched_cols[taskno]['title'], fontsize=8) ax.get_xaxis().set_ticks([]) ax.get_yaxis().set_ticks([]) ax.set_xlabel(r'$f_{GZ2}$', fontsize=10) ax.set_ylabel(r'$f_{DECaLS}$', fontsize=10) ax.set_aspect('equal') # Smooth/features answers_per_task = [3, 2, 2, 2, 3, 3, 5, 3, 3, 5] match_tasks = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [6, 8], [7, 10], [8, 7], [9, 5], [10, 6]] n = 0 for a, m in zip(answers_per_task, match_tasks): inds = np.array( ([np.array(decals_tasks)[:, m[1]] == True])) & np.array( ([np.array(gz2_tasks)[:, m[0]] == True])) for i in range(a): plot_features(axarr.ravel()[n], n, inds) n += 1 ''' for i in range(nrows): ax = axarr.ravel()[i*ncols] ax.set_ylabel(r'$f_{GZ2}$',fontsize=10) for i in range(ncols): ax = axarr.ravel()[(nrows - 1)*ncols + i] ax.set_xlabel(r'$f_{DECaLS}$',fontsize=10) ''' for di in range((nrows * ncols) - n): fig.delaxes(axarr.ravel()[(nrows * ncols) - (di + 1)]) fig.tight_layout() if savefig: plt.savefig('{0}/decals_gz2_feature_comparison.pdf'.format(plot_path)) else: plt.show() return None
def morphology_distribution(survey='decals'): # What's the plurality distribution of morphologies? try: collation_file = "{0}/gz_reduction_sandbox/data/decals_unweighted_classifications_00.csv".format( gzpath) collated = pd.read_csv(collation_file) except IOError: print "Collation file for {0:} does not exist. Aborting.".format( survey) return None columns = collated.columns fraccols, colnames = [], [] for c in columns: if c[-4:] == 'frac': fraccols.append(c) if c[0] == 't' and is_number(c[1:3]): colnames.append(c[:3]) collist = list(set(colnames)) collist.sort() # Plot distribution of vote fractions for each task ntasks = len(collist) ncols = 4 if ntasks > 9 else int(np.sqrt(ntasks)) nrows = int(ntasks / ncols) if ntasks % ncols == 0 else int(ntasks / ncols) + 1 sd = survey_dict()[survey] survey_name = sd['name'] def f7(seq): seen = set() seen_add = seen.add return [x for x in seq if not (x in seen or seen_add(x))] tasklabels = f7([re.split("[ax][0-9]", f)[0][11:-1] for f in fraccols]) labels = [re.split("[ax][0-9]", f)[-1][1:-5] for f in fraccols] # Make pie charts of the plurality votes votearr = np.array(collated[fraccols]) class_arr, task_arr, task_ans = [], [], [] for v in votearr: e, a = plurality(v, survey) task_arr.append(e) task_ans.append(a) task_arr = np.array(task_arr) task_ans = np.array(task_ans) fig, axarr = plt.subplots(nrows=nrows, ncols=ncols, figsize=(15, 12)) colors = [ u'#377EB8', u'#E41A1C', u'#4DAF4A', u'#984EA3', u'#FF7F00', u'#A6761D', u'#1B9E77' ] n = (task_arr.shape)[1] for i in range(n): ax = axarr.ravel()[i] c = Counter(task_ans[:, i][task_arr[:, i] == True]) pv, pl = [], [] for k in c: pv.append(c[k]) pl.append(labels[k]) ax.pie(pv, labels=pl, colors=colors, autopct=lambda (p): '{:.0f}'.format(p * sum(pv) / 100)) title = '{0:} - t{1:02} {2:}'.format( survey_name, i, tasklabels[i]) if i == 0 else 't{0:02} {1:}'.format( i, tasklabels[i]) ax.set_title(title) ax.set_aspect('equal') # Remove empty axes from subplots if axarr.size > ntasks: for i in range(axarr.size - ntasks): ax = axarr.ravel()[axarr.size - (i + 1)] ax.set_axis_off() fig.set_tight_layout(True) plt.savefig('{1}/decals/plots/pie_{0:}.eps'.format(survey, gzpath)) plt.close() return None
def morphology_distribution(survey,absolute_counts=False,dr=None): # What's the distribution of morphologies so far? """ Get the updated version of Brooke's aggregation and weighting code Try running it on the DECaLS and Illustris data Assuming it works, match positionally against GZ2 in TOPCAT (both samples). This might have to be manual unless I find a faster implementation of positional matching in Python. Adapt the GZ2 code for plurality classifications and run on GZ4 data Summarize overall results and split by luminosity List galaxies with large changes? """ survey_str = survey_dr(survey,dr) # Get weights try: collation_file = "{0:}/gz_reduction_sandbox/data/{1:}_unweighted_classifications_00.csv".format(gzdir,survey_str) collated = pd.read_csv(collation_file) except IOError: print "Collation file for {0:} does not exist. Aborting.".format(survey_str) return None columns = collated.columns fraccols,colnames = [],[] for c in columns: if c[-4:] == 'frac': fraccols.append(c) if c[0] == 't' and is_number(c[1:3]): colnames.append(c[:3]) collist = list(set(colnames)) collist.sort() # Plot distribution of vote fractions for each task ntasks = len(collist) ncols = 4 if ntasks > 9 else int(np.sqrt(ntasks)) nrows = int(ntasks / ncols) if ntasks % ncols == 0 else int(ntasks / ncols) + 1 sd = survey_dict()[survey] survey_name = sd['name'] def f7(seq): seen = set() seen_add = seen.add return [x for x in seq if not (x in seen or seen_add(x))] tasklabels = f7([re.split("[ax][0-9]",f)[0][4:-1] for f in fraccols]) labels = [re.split("[ax][0-9]",f[4:-5])[-1][1:] for f in fraccols] fig,axarr = plt.subplots(nrows=nrows,ncols=ncols,figsize=(15,12)) for i,c in enumerate(collist): ax = axarr.ravel()[i] title = '{0:} - t{1:02} {2:}'.format(survey_name,i,tasklabels[i]) if i == 0 else 't{0:02} {1:}'.format(i,tasklabels[i]) ax.set_title(title) for f in fraccols: if f[:3] == c and f[-15:-5] != 'a0_discuss': label = re.split("[ax][0-9]",f[4:-5])[-1][1:] ax.hist(collated[f],alpha=0.7,label=label) ax.set_xlim(0,1) ax.legend(loc='upper left',fontsize=6) # Remove empty axes from subplots if axarr.size > ntasks: for i in range(axarr.size - ntasks): ax = axarr.ravel()[axarr.size-(i+1)] ax.set_axis_off() fig.set_tight_layout(True) plt.savefig('{0:}/progress/{1:}/votefractions_{1:}.png'.format(gzdir,survey_str)) # Make pie charts of the plurality votes votearr = np.array(collated[fraccols]) class_arr,task_arr,task_ans = [],[],[] for v in votearr: e,a = plurality(v,survey) task_arr.append(e) task_ans.append(a) task_arr = np.array(task_arr) task_ans = np.array(task_ans) fig,axarr = plt.subplots(nrows=nrows,ncols=ncols,figsize=(15,12)) colors=[u'#377EB8', u'#E41A1C', u'#4DAF4A', u'#984EA3', u'#FF7F00',u'#A6761D',u'#1B9E77'] n = (task_arr.shape)[1] for i in range(n): ax = axarr.ravel()[i] c = Counter(task_ans[:,i][task_arr[:,i] == True]) pv,pl = [],[] for k in c: pv.append(c[k]) pl.append(labels[k]) if absolute_counts: ax.pie(pv,labels=pl,colors=colors,autopct=lambda(p): '{:.0f}'.format(p * sum(pv) / 100)) else: ax.pie(pv,labels=pl,colors=colors,autopct='%1.0f%%') title = '{0:} - t{1:02} {2:}'.format(survey_name,i,tasklabels[i]) if i == 0 else 't{0:02} {1:}'.format(i,tasklabels[i]) ax.set_title(title) ax.set_aspect('equal') # Remove empty axes from subplots if axarr.size > ntasks: for i in range(axarr.size - ntasks): ax = axarr.ravel()[axarr.size-(i+1)] ax.set_axis_off() fig.set_tight_layout(True) plt.savefig('{0:}/progress/{1:}/pie_{1:}.png'.format(gzdir,survey_str)) plt.close() # How does the distribution compare to GZ1 and GZ2? # Have they discovered anything interesting? """ Look at long discussions? Maybe tag the ones that science team hasn't participated in yet? """ return None
b0 = df[df['background'] == 0] grouped = b0.groupby(['subhalo_id']) fraccols = [] for c in b0.columns: if c[-4:] == 'frac': fraccols.append(c) d = {1:0,2:0,3:0,4:0} for name,group in grouped: votearr = np.array(group[fraccols]) answers = [] for v in votearr: e,a = plurality(v,'illustris') answers.append(np.array(a)[np.array(e) == 1]) answers_hashable = map(tuple, answers) d[len(set(answers_hashable))] += 1 from matplotlib import pyplot as plt fig = plt.figure(figsize=(6,6)) ax = fig.add_subplot(111) ax.scatter(d.keys(),d.values(),s=40) ax.set_xlim(0,5) ax.set_xlabel('Number of different GZ classifications',fontsize=16) ax.set_ylabel('Count',fontsize=20) ax.set_title("Illustris - fixed_mass") plt.savefig('initial_exploration_diffs.png')
def morph_table_gz2(): # Print LaTeX-formatted tables of the GZ vote counts and fractions, and plot as pie chart. overlap = True survey = 'decals' # Get weights try: fitsfile = "{0}/dr10/dr10_gz2_main_specz.fits".format(gzpath) hdr = fits.getheader(fitsfile,1) colnames = [] for i in range(hdr['TFIELDS']): colnames.append(hdr['TTYPE{0}'.format(i+1)]) if overlap: if survey == 'gz2': collation_file = "{0}/decals/csv/decals_gz2_main.csv".format(gzpath) elif survey == 'stripe82': collation_file = "{0}/decals/csv/decals_gz2_stripe82c1.csv".format(gzpath) elif survey == 'decals': collation_file = "{0}/decals/csv/decals_gz2_union.csv".format(gzpath) collated = pd.read_csv(collation_file) else: if survey == 'gz2': collation_file = "{0}/dr10/dr10_gz2_main_specz.csv".format(gzpath) elif survey == 'stripe82': collation_file = "{0}/dr10/dr10_gz2_stripe82_coadd1.csv".format(gzpath) collated = pd.read_csv(collation_file,names=colnames) except IOError: print "Collation file for {0:} does not exist. Aborting.".format(survey) return None columns = collated.columns fraccols,colnames = [],[] if survey == 'decals': for c in columns: if len(c) > 10: if c[-4:] == 'frac' and c[:6] == 'decals': fraccols.append(c) if c[7] == 't' and is_number(c[8:10]): colnames.append(c[7:10]) else: for c in columns: if c[-17:] == 'weighted_fraction': fraccols.append(c) if c[0] == 't' and is_number(c[1:3]): colnames.append(c[:3]) collist = list(set(colnames)) collist.sort() # Plot distribution of vote fractions for each task ntasks = len(collist) ncols = 4 if ntasks > 9 else int(np.sqrt(ntasks)) nrows = int(ntasks / ncols) if ntasks % ncols == 0 else int(ntasks / ncols) + 1 sd = survey_dict()[survey] survey_name = sd['name'] def f7(seq): seen = set() seen_add = seen.add return [x for x in seq if not (x in seen or seen_add(x))] if survey == 'decals': tasklabels = f7([re.split("[ax][0-9]",f)[0][11:-1] for f in fraccols]) labels = [re.split("[ax][0-9]",f)[-1][1:-5] for f in fraccols] else: tasklabels = f7([re.split("[ax][0-9]",f)[0][4:-1] for f in fraccols]) labels = [re.split("[ax][0-9]",f[4:-18])[-1][2:] for f in fraccols] # Make pie charts of the plurality votes votearr = np.array(collated[fraccols]) class_arr,task_arr,task_ans = [],[],[] for v in votearr: e,a = plurality(v,survey) task_arr.append(e) task_ans.append(a) task_arr = np.array(task_arr) task_ans = np.array(task_ans) fig,axarr = plt.subplots(nrows=nrows,ncols=ncols,figsize=(15,12)) colors=[u'#377EB8', u'#E41A1C', u'#4DAF4A', u'#984EA3', u'#FF7F00',u'#A6761D',u'#1B9E77'] n = (task_arr.shape)[1] for i in range(n): ax = axarr.ravel()[i] c = Counter(task_ans[:,i][task_arr[:,i] == True]) pv,pl = [],[] task_total = sum(c.values()) for k in c: pv.append(c[k]) pl.append(labels[k]) # Print to screen in LaTeX format print "{0:20} & {1:6} & {3:.2f} & {2:.2f}".format(labels[k],c[k],c[k] * 1./task_total,c[k] * 1./len(collated)) print "" ax.pie(pv,labels=pl,colors=colors,autopct=lambda(p): '{:.0f}'.format(p * sum(pv) / 100)) title = '{0:} - t{1:02} {2:}'.format(survey_name,i,tasklabels[i]) if i == 0 else 't{0:02} {1:}'.format(i,tasklabels[i]) ax.set_title(title) ax.set_aspect('equal') # Remove empty axes from subplots if axarr.size > ntasks: for i in range(axarr.size - ntasks): ax = axarr.ravel()[axarr.size-(i+1)] ax.set_axis_off() fig.set_tight_layout(True) suffix = '_overlap' if overlap else '' plt.savefig('{1}/decals/plots/pie_{0}{2}.eps'.format(survey,gzpath,suffix)) plt.close() return None
def morphology_distribution(survey='decals'): # What's the plurality distribution of morphologies? try: collation_file = "{0}/gz_reduction_sandbox/data/decals_unweighted_classifications_00.csv".format(gzpath) collated = pd.read_csv(collation_file) except IOError: print "Collation file for {0:} does not exist. Aborting.".format(survey) return None columns = collated.columns fraccols,colnames = [],[] for c in columns: if c[-4:] == 'frac': fraccols.append(c) if c[0] == 't' and is_number(c[1:3]): colnames.append(c[:3]) collist = list(set(colnames)) collist.sort() # Plot distribution of vote fractions for each task ntasks = len(collist) ncols = 4 if ntasks > 9 else int(np.sqrt(ntasks)) nrows = int(ntasks / ncols) if ntasks % ncols == 0 else int(ntasks / ncols) + 1 sd = survey_dict()[survey] survey_name = sd['name'] def f7(seq): seen = set() seen_add = seen.add return [x for x in seq if not (x in seen or seen_add(x))] tasklabels = f7([re.split("[ax][0-9]",f)[0][11:-1] for f in fraccols]) labels = [re.split("[ax][0-9]",f)[-1][1:-5] for f in fraccols] # Make pie charts of the plurality votes votearr = np.array(collated[fraccols]) class_arr,task_arr,task_ans = [],[],[] for v in votearr: e,a = plurality(v,survey) task_arr.append(e) task_ans.append(a) task_arr = np.array(task_arr) task_ans = np.array(task_ans) fig,axarr = plt.subplots(nrows=nrows,ncols=ncols,figsize=(15,12)) colors=[u'#377EB8', u'#E41A1C', u'#4DAF4A', u'#984EA3', u'#FF7F00',u'#A6761D',u'#1B9E77'] n = (task_arr.shape)[1] for i in range(n): ax = axarr.ravel()[i] c = Counter(task_ans[:,i][task_arr[:,i] == True]) pv,pl = [],[] for k in c: pv.append(c[k]) pl.append(labels[k]) ax.pie(pv,labels=pl,colors=colors,autopct=lambda(p): '{:.0f}'.format(p * sum(pv) / 100)) title = '{0:} - t{1:02} {2:}'.format(survey_name,i,tasklabels[i]) if i == 0 else 't{0:02} {1:}'.format(i,tasklabels[i]) ax.set_title(title) ax.set_aspect('equal') # Remove empty axes from subplots if axarr.size > ntasks: for i in range(axarr.size - ntasks): ax = axarr.ravel()[axarr.size-(i+1)] ax.set_axis_off() fig.set_tight_layout(True) plt.savefig('{1}/decals/plots/pie_{0:}.eps'.format(survey,gzpath)) plt.close() return None
def feature_comparison(savefig=False): # Plot the difference in vote fractions for the matched galaxies filename = '{0}/fits/decals_gz2_union.fits'.format(decals_path) data = fits.getdata(filename,1) # Map the columns matched_cols = [{'title':'smooth', 'gz2':"gz2_t01_smooth_or_features_a01_smooth_fraction", "decals":"decals_t00_smooth_or_features_a0_smooth_frac"}, {'title':'features/disk', 'gz2':"gz2_t01_smooth_or_features_a02_features_or_disk_fraction", "decals":"decals_t00_smooth_or_features_a1_features_frac"}, {'title':'star', 'gz2':"gz2_t01_smooth_or_features_a03_star_or_artifact_fraction", "decals":"decals_t00_smooth_or_features_a2_artifact_frac"}, {'title':'edge-on', 'gz2':"gz2_t02_edgeon_a04_yes_fraction", "decals":"decals_t01_disk_edge_on_a0_yes_frac"}, {'title':'not edge-on', 'gz2':"gz2_t02_edgeon_a05_no_fraction", "decals":"decals_t01_disk_edge_on_a1_no_frac"}, {'title':'bar', 'gz2':"gz2_t03_bar_a06_bar_fraction", "decals":"decals_t02_bar_a0_bar_frac"}, {'title':'no bar', 'gz2':"gz2_t03_bar_a07_no_bar_fraction", "decals":"decals_t02_bar_a1_no_bar_frac"}, {'title':'spiral', 'gz2':"gz2_t04_spiral_a08_spiral_fraction", "decals":"decals_t03_spiral_a0_spiral_frac"}, {'title':'no spiral', 'gz2':"gz2_t04_spiral_a09_no_spiral_fraction", "decals":"decals_t03_spiral_a1_no_spiral_frac"}, {'title':'no bulge', 'gz2':"gz2_t05_bulge_prominence_a10_no_bulge_fraction", "decals":"decals_t04_bulge_prominence_a0_no_bulge_frac"}, {'title':'medium bulge', 'gz2':"gz2_t05_bulge_prominence_a11_just_noticeable_fraction", "decals":"decals_t04_bulge_prominence_a1_obvious_frac"}, {'title':'obvious bulge', 'gz2':"gz2_t05_bulge_prominence_a12_obvious_fraction", "decals":"decals_t04_bulge_prominence_a2_dominant_frac"}, {'title':'completely round', 'gz2':"gz2_t07_rounded_a16_completely_round_fraction", "decals":"decals_t08_rounded_a0_completely_round_frac"}, {'title':'in between', 'gz2':"gz2_t07_rounded_a17_in_between_fraction", "decals":"decals_t08_rounded_a1_in_between_frac"}, {'title':'cigar shaped', 'gz2':"gz2_t07_rounded_a18_cigar_shaped_fraction", "decals":"decals_t08_rounded_a2_cigar_shaped_frac"}, {'title':'ring', 'gz2':"gz2_t08_odd_feature_a19_ring_fraction", "decals":"decals_t10_odd_feature_x1_ring_frac"}, {'title':'lens/arc', 'gz2':"gz2_t08_odd_feature_a20_lens_or_arc_fraction", "decals":"decals_t10_odd_feature_x2_lens_frac"}, {'title':'irregular', 'gz2':"gz2_t08_odd_feature_a22_irregular_fraction", "decals":"decals_t10_odd_feature_x4_irregular_frac"}, {'title':'other', 'gz2':"gz2_t08_odd_feature_a23_other_fraction", "decals":"decals_t10_odd_feature_x5_other_frac"}, {'title':'dust lane', 'gz2':"gz2_t08_odd_feature_a38_dust_lane_fraction", "decals":"decals_t10_odd_feature_x3_dustlane_frac"}, {'title':'rounded bulge', 'gz2':"gz2_t09_bulge_shape_a25_rounded_fraction", "decals":"decals_t07_bulge_shape_a0_rounded_frac"}, {'title':'boxy bulge', 'gz2':"gz2_t09_bulge_shape_a26_boxy_fraction", "decals":"decals_t07_bulge_shape_a1_boxy_frac"}, {'title':'no bulge', 'gz2':"gz2_t09_bulge_shape_a27_no_bulge_fraction", "decals":"decals_t07_bulge_shape_a2_no_bulge_frac"}, {'title':'tight arms', 'gz2':"gz2_t10_arms_winding_a28_tight_fraction", "decals":"decals_t05_arms_winding_a0_tight_frac"}, {'title':'medium arms', 'gz2':"gz2_t10_arms_winding_a29_medium_fraction", "decals":"decals_t05_arms_winding_a1_medium_frac"}, {'title':'loose arms', 'gz2':"gz2_t10_arms_winding_a30_loose_fraction", "decals":"decals_t05_arms_winding_a2_loose_frac"}, {'title':'1 arm', 'gz2':"gz2_t11_arms_number_a31_1_fraction", "decals":"decals_t06_arms_number_a0_1_frac"}, {'title':'2 arms', 'gz2':"gz2_t11_arms_number_a32_2_fraction", "decals":"decals_t06_arms_number_a1_2_frac"}, {'title':'3 arms', 'gz2':"gz2_t11_arms_number_a33_3_fraction", "decals":"decals_t06_arms_number_a2_3_frac"}, {'title':'4 arms', 'gz2':"gz2_t11_arms_number_a34_4_fraction", "decals":"decals_t06_arms_number_a3_4_frac"}, {'title':'5+ arms', 'gz2':"gz2_t11_arms_number_a36_more_than_4_fraction", "decals":"decals_t06_arms_number_a4_more_than_4_frac"}] # Working, but still needs to sort for questions that are ACTUALLY ANSWERED. Lots of pileup at 0,0. columns = data.columns decals_fraccols,gz2_fraccols = [],[] for c in columns: colname = c.name if len(colname) > 6: if colname[-4:] == 'frac' and colname[:6] == 'decals': decals_fraccols.append(c) if len(colname) > 17: if colname[-8:] == 'fraction' and colname[-17:] != "weighted_fraction" and colname[:3] == 'gz2': gz2_fraccols.append(c) decals_votearr = data.from_columns(decals_fraccols) gz2_votearr = data.from_columns(gz2_fraccols) decals_tasks,gz2_tasks = [],[] for v in decals_votearr: e_decals,a_decals = plurality(np.array(list(v)),'decals') decals_tasks.append(e_decals) for v in gz2_votearr: e_gz2,a_gz2 = plurality(np.array(list(v)),'gz2') gz2_tasks.append(e_gz2) fig,axarr = plt.subplots(num=1,nrows=4,ncols=8,figsize=(16,10)) nrows = axarr.shape[0] ncols = axarr.shape[1] def plot_features(ax,taskno,indices): plotind = indices.flatten() ax.hist2d(data[matched_cols[taskno]['gz2']][plotind],data[matched_cols[taskno]['decals']][plotind],bins=(20,20),range=[[0,1],[0,1]],norm=LogNorm(),cmap = cm.viridis) ax.plot([0,1],[0,1],linestyle='--',color='red',lw=2) ax.set_title(matched_cols[taskno]['title'],fontsize=8) ax.get_xaxis().set_ticks([]) ax.get_yaxis().set_ticks([]) ax.set_xlabel(r'$f_{GZ2}$',fontsize=10) ax.set_ylabel(r'$f_{DECaLS}$',fontsize=10) ax.set_aspect('equal') # Smooth/features answers_per_task = [3,2,2,2,3,3,5,3,3,5] match_tasks = [[ 0, 0], [ 1, 1], [ 2, 2], [ 3, 3], [ 4, 4], [ 6, 8], [ 7,10], [ 8, 7], [ 9, 5], [10, 6]] n = 0 for a,m in zip(answers_per_task,match_tasks): inds = np.array(([np.array(decals_tasks)[:,m[1]] == True])) & np.array(([np.array(gz2_tasks)[:,m[0]] == True])) for i in range(a): plot_features(axarr.ravel()[n],n,inds) n += 1 ''' for i in range(nrows): ax = axarr.ravel()[i*ncols] ax.set_ylabel(r'$f_{GZ2}$',fontsize=10) for i in range(ncols): ax = axarr.ravel()[(nrows - 1)*ncols + i] ax.set_xlabel(r'$f_{DECaLS}$',fontsize=10) ''' for di in range((nrows*ncols)-n): fig.delaxes(axarr.ravel()[(nrows*ncols)-(di+1)]) fig.tight_layout() if savefig: plt.savefig('{0}/decals_gz2_feature_comparison.pdf'.format(plot_path)) else: plt.show() return None