def plot_feat_score_type_comp(offset_aucs=False, results_dir='fig_data_logo', all_spec_types=['avi','herp','avi-rl']): ''' Compare different methods of aggregating likelihood ratios (at the per feature level) into one classification confidence per audio file ''' all_feats = ['raw_audioset_feats_096s','raw_audioset_feats_2s','raw_audioset_feats_3s','raw_audioset_feats_4s','raw_audioset_feats_5s','raw_audioset_feats_6s','raw_audioset_feats_7s','raw_audioset_feats_8s','raw_audioset_feats_9s','raw_audioset_feats_10s','raw_audioset_feats_30s','raw_audioset_feats_60s','raw_audioset_feats_300s'] all_score_types = ['min', 'p10', 'p20','p30', 'p40', 'p50', 'p60','p70', 'p80', 'p90','max', 'mean'] all_hists = [] all_spec_aucs = [] all_spec_mean_aucs = [] all_specs = [] for feat in all_feats: # Loop through each feature time resolution st_aucs = [] for score_type in all_score_types: # Loop through different methods for obtaining classification scores all_spec_aucs = [] for spec_type in all_spec_types: # Loop through species types # Option to offset AUCs by that possible without audio data (just using AGB data of each site) if offset_aucs: no_audio_f = 'no_audio_classif_scores_{}.pickle'.format(spec_type) with open(os.path.join(results_dir, no_audio_f), 'rb') as f: _, _, _, baseline_aucs, _, _ = pickle.load(f) # Load classification results fname = 'classif_scores_sorted_{}_{}_{}.pickle'.format(feat, score_type, spec_type) load_path = os.path.join(results_dir, fname) with open(load_path, 'rb') as f: all_specs, _, _, all_auc_, all_auc_ks_, _, _, _, _, _, _ = pickle.load(f) if offset_aucs: all_auc_ = all_auc_ - baseline_aucs all_spec_aucs.extend(all_auc_) # Calculate mean AUC for given score type spec_auc_means = np.nanmean(all_spec_aucs, axis=0) st_aucs.append(spec_auc_means) all_spec_mean_aucs.append(st_aucs) # Transpose matrix of mean AUCs per score type all_spec_mean_aucs = np.asarray(all_spec_mean_aucs).T # Place an asterix over the score type / feature combo that gives the max mean AUC across all species max_auc_ix = np.unravel_index(np.argmax(all_spec_mean_aucs),all_spec_mean_aucs.shape) plt.scatter(max_auc_ix[1], max_auc_ix[0], marker='*',s=250,edgecolors='k',facecolors='none') # Plot matrix and label score types and feature timescales plt.matshow(all_spec_mean_aucs, aspect='equal', fignum=0) plt.yticks(range(len(all_score_types)), labels=[get_nice_lab(l) for l in all_score_types]) secs = [get_secs_per_audio_feat(f) for f in all_feats] plt.gca().xaxis.set_ticks_position('bottom') plt.xticks(range(len(secs)), labels=secs, fontsize=20,rotation=70) cb = plt.colorbar(ticklocation='left') if offset_aucs: cb.set_label('\nMean AUC gain (across all species)') else: cb.set_label('\nMean AUC (across all species)') plt.xlabel('Audio feature timescale (s)') plt.ylabel('Classification score method') plt.tight_layout() best_feat = all_feats[max_auc_ix[1]] best_score_type = all_score_types[max_auc_ix[0]] print('best_feat {}, best_score_type {} (mean AUC = {})'.format(best_feat,best_score_type,all_spec_mean_aucs[max_auc_ix])) return best_feat, best_score_type
def do_auc_tscale_plot(lab_specs, lab_all_specs=False, all_spec_types=['herp','avi','avi-rl'], offset_aucs=False, score_type='p60', results_dir='fig_data_logo', print_all=False): ''' Plot how AUC per species varies with timescale of audio features ''' # Choose which timescales to plot f_exts = ['096','2','3','4','5','6','7','8','9','10','30','60','300'] max_auc = 0 for spec_ix, spec_type in enumerate(all_spec_types): # Option to plot increase in AUC over non-audio AGB based predictions if offset_aucs: no_audio_f = 'no_audio_classif_scores_{}.pickle'.format(spec_type) with open(os.path.join(results_dir, no_audio_f), 'rb') as f: _, _, _, baseline_aucs, _, _ = pickle.load(f) # Load classification results from stored files all_fnames = ['classif_scores_sorted_raw_audioset_feats_{}s_{}_{}.pickle'.format(f,score_type,spec_type) for f in f_exts] all_specs = None all_auc_ks = [] all_test_scores_ks = [] all_test_labs_ks = [] for f in all_fnames: load_path = os.path.join(results_dir, f) with open(load_path, 'rb') as f: all_specs_, _, _, all_auc_, all_auc_ks_, _, _, all_test_scores_ks_, all_test_labs_ks_, _, _ = pickle.load(f) if all_specs is None: all_specs = all_specs_ all_auc_ks.append(all_auc_ks_) all_test_scores_ks.append(all_test_scores_ks_) all_test_labs_ks.append(all_test_labs_ks_) all_auc_ks = np.asarray(all_auc_ks) all_test_scores_ks = np.asarray(all_test_scores_ks) all_test_labs_ks = np.asarray(all_test_labs_ks) print('all_auc_ks shape {}'.format(all_auc_ks.shape)) print('all_test_scores_ks shape {}'.format(all_test_scores_ks.shape)) print('all_test_labs_ks shape {}'.format(all_test_labs_ks.shape)) # Extract seconds per feature used in each results file secs = [] for f in all_fnames: f_s = f.split('_')[-3].split('s')[0] secs.append(get_secs_per_audio_feat('raw_audioset_feats_{}s'.format(f_s))) # Array to store which features were the best performing across all species max_secs = [] # Loop through each species in turn done_lab = False for s_ix, spec in enumerate(all_specs): ys = [] # Get AUCs for the given species spec_test_scores_ks = all_test_scores_ks[:,s_ix] spec_test_labs_ks = all_test_labs_ks[:,s_ix] spec_auc_ks = all_auc_ks[:,s_ix] # Get mean AUC across all K folds for the species (for each feature timescale) for x_ix, x in enumerate(secs): mean_auc = np.nanmean(spec_auc_ks[x_ix]) ys.append(mean_auc) # If offset_aucs set, offset AUC by that attained by a model based on AGB values only if offset_aucs: ys = ys - baseline_aucs[s_ix] # Calculate null distributions for AUC values to assign p values for each real AUC result null_aucs = [] null_perms = 100 np.random.seed(42) for test_scores_k, test_labs_k in zip(spec_test_scores_ks.T, spec_test_labs_ks.T): if len(np.unique(test_labs_k[2])) > 1: labs_shuffled = np.copy(test_labs_k[2]) k_aucs = [] for n in range(null_perms): np.random.shuffle(labs_shuffled) k_aucs.append(roc_auc_score(labs_shuffled,test_scores_k[2])) null_aucs.append(k_aucs) else: # If there's only one class (e.g., the species is never present or absent) then AUC is undefined null_aucs.append([np.nan] * null_perms) # Calculate p value of true AUC based on a null distribution null_aucs = np.asarray(null_aucs) null_aucs = np.nanmean(null_aucs,axis=0) nulls_higher = null_aucs[null_aucs > ys[2]] auc_p_val = len(nulls_higher) / len(null_aucs) print('{} AUC = {} (p = {})'.format(spec.comm_name, np.round(ys[2],2), np.round(auc_p_val,2))) c = get_spec_type_col(spec_type) # Annotate the chosen species in lab_specs spec_str = spec.comm_name.lower().replace(' ','-') if lab_all_specs or spec_str in lab_specs: lw = 1 if not lab_all_specs: lw = 3 alpha = 1 plt.text(secs[-1]-10, ys[-1], spec.comm_name, horizontalalignment='right',verticalalignment='top', fontsize=14, color=c) if spec_str == 'sooty-capped-babbler' or spec_str == 'bold-striped-tit-babbler' or spec_str == 'tree-hole-frog' or spec_str == 'rhinoceros-hornbill': print(ys) else: alpha = 0.5 lw = 1 if np.max(ys) > max_auc: max_auc = np.max(ys) max_ix = np.argmax(ys) max_secs.append(secs[max_ix]) ls = '-' if auc_p_val > 0.05: ls = '--' # Plot a line showing AUC for the given species across different timescales of audio features if done_lab or auc_p_val > 0.05: lab = '' else: lab = get_nice_lab(spec_type) done_lab = True p = plt.plot(secs,ys, lw=lw, ls=ls, alpha=alpha, c=c,label=lab) bins = secs + [(secs[-1]+1)] hist, _ = np.histogram(max_secs,bins) print(hist) print('Max AUC is {}'.format(max_auc)) # Set x limits on the plot plt.xlim([secs[0],secs[-1]]) plt.xscale('log') plt.gca().xaxis.grid(True,alpha=0.3) # Legend for the plot leg = plt.legend(loc='lower left') for legobj in leg.legendHandles: legobj.set_linewidth(2) legobj.set_alpha(1) # Labelling of x axis secs = np.asarray(secs) show_xtlabs = [0,1,2,4,6,9,10,11,12] xtls = [] for s_ix, s in enumerate(secs): if s_ix in show_xtlabs: xtls.append(s) else: xtls.append('') xts = secs[show_xtlabs] plt.xticks(secs, labels=xtls) plt.xlabel('\nAudio feature timescale (s)') # Labelling of y axis if offset_aucs: plt.ylabel('AUC (gain over naive estimator, {})'.format(get_nice_lab(score_type))) plt.gca().axhline(0,lw=3,alpha=0.6,ls='--',c='k') else: plt.ylabel('AUC ({})'.format(get_nice_lab(score_type)))
def plot_auc_by_site_fig(all_spec_types, score_type='p60', feat='raw_audioset_feats_3s', results_dir='fig_data_logo'): ''' Plot AUC for each species at each site as a scatter plot ''' auc_per_site = None site_names = None all_sites = None spec_types = [] leg_elements = [] for spec_type in all_spec_types: # Load classification results for this species type f = 'classif_scores_sorted_{}_{}_{}.pickle'.format( feat, score_type, spec_type) load_path = os.path.join(results_dir, f) with open(load_path, 'rb') as f: auc_specs, spec_n_occs, _, aucs, auc_ks, _, _, _, _, _, all_k_sites = pickle.load( f) # Make sure all site names are in the same order across K folds if site_names is None: site_names = [s.name for s in all_k_sites[0]] site_agbs = [s.get_agb() for s in all_k_sites[0]] site_sort_ix_agb = np.argsort(site_agbs) all_sites = all_k_sites[0] else: for k_sites in all_k_sites: snames = [s.name for s in k_sites] assert (snames == site_names) # Append AUC results to an nparray storing results for all species types auc_per_site_st = np.asarray(auc_ks) if auc_per_site is None: auc_per_site = auc_per_site_st else: auc_per_site = np.vstack([auc_per_site, auc_per_site_st]) # Track which rows correspond to which species type (for colouring points later) spec_types.extend([spec_type] * auc_per_site_st.shape[0]) # Add an element to the legend for this species type leg_elements.append( Line2D([0], [0], marker='o', color=get_spec_type_col(spec_type), label=get_nice_lab(spec_type), markerfacecolor=get_spec_type_col(spec_type), markersize=10, lw=0)) site_names = np.asarray(site_names) xs_agb = [] ys_auc = [] for site_ix, site_aucs in enumerate(auc_per_site.T): # For each site, plot scatters for AUCs of all species coloured by species type col_array = [get_spec_type_col(st) for st in spec_types] plt.scatter([site_sort_ix_agb[site_ix]] * len(site_aucs), site_aucs, c=col_array) # Save AGB and AUC for checking correlation later xs_agb.extend([site_agbs[site_sort_ix_agb[site_ix]]] * len(site_aucs)) ys_auc.extend(site_aucs) xs_agb = np.asarray(xs_agb) ys_auc = np.asarray(ys_auc) # Check if there's any correlation between AGB and AUC x_notnan = xs_agb[~np.isnan(ys_auc)] y_notnan = ys_auc[~np.isnan(ys_auc)] rho, p = stats.spearmanr(x_notnan, y_notnan) print('Spearman corr between AGB and AUC: rho = {}, p = {}'.format(rho, p)) # Determine pairwise distances between sites coords = [] for site in all_sites: coords.append([site.lat, site.long]) coords = np.asarray(coords) # Using the vincenty distance function to determine pairwise distances m_dist = pdist( coords, # Coordinates matrix or tuples list # Vicenty distance in lambda function lambda u, v: vincenty(u, v).kilometers) print('Closest distance = {}, mean distance = {}'.format( np.min(m_dist), np.mean(m_dist))) plt.gca().legend(handles=leg_elements) plt.xticks(range(len(site_names)), site_names[site_sort_ix_agb], rotation=35) plt.xlabel('Site') plt.ylabel('AUC per species') plt.tight_layout()
def plot_chi2_fig(lab_specs, lab_all_specs, all_spec_types, score_type='p60', feat='raw_audioset_feats_3s', results_dir='fig_data_logo'): ''' Plot figure showing correlation between AUC of species prediction and chi^2 statistics ''' xs_auc = [] ys_site_chi2 = [] ys_hr_chi2 = [] ys_site_hr_chi2 = [] xs_specs = [] all_n_occs = [] xs_spec_types = [] for spec_type in all_spec_types: # Load classification results f = 'classif_scores_sorted_{}_{}_{}.pickle'.format(feat,score_type,spec_type) load_path = os.path.join(results_dir, f) with open(load_path, 'rb') as f: auc_specs, spec_n_occs, _, aucs, auc_ks, _, _, _, _, _, _ = pickle.load(f) auc_spec_names = np.asarray([s.comm_name for s in auc_specs]) # Load point count dataset audio_feat_name, all_sites, all_taxa, all_pcs = load_pc_dataset('pc_data_parsed_{}'.format(feat)) if spec_type == 'avi': chosen_pcs = get_avi_pcs_no_water_sites(all_pcs, all_sites) chosen_specs = get_avi_specs_min_pres(all_taxa, chosen_pcs) elif spec_type == 'avi-rl': chosen_pcs = get_avi_pcs_no_water_sites(all_pcs, all_sites) chosen_specs = get_red_list_avi_specs_min_pres(all_taxa, chosen_pcs) elif spec_type == 'herp': chosen_pcs = get_herp_pcs_no_water_sites(all_pcs, all_sites) chosen_specs = get_herp_specs_min_pres(all_taxa, chosen_pcs) for spec_ix, spec in enumerate(chosen_specs): # Compute chi2 statistics for each species in turn # Create a vector with labels for each point count - 1 if species is present, 0 if absent pcs_spec_labs = np.asarray([0] * len(chosen_pcs)) for ix, pc in enumerate(chosen_pcs): if spec_type == 'avi' or spec_type == 'avi-rl': pcs_spec_labs[ix] = 1 if spec in pc.avi_spec_comm else 0 elif spec_type == 'herp': pcs_spec_labs[ix] = 1 if spec in pc.herp_spec_comm else 0 pres_ixs = np.where((pcs_spec_labs == 1))[0] abs_ixs = np.where((pcs_spec_labs == 0))[0] # Get mean AUC for species auc_ix = np.where((auc_spec_names == spec.comm_name))[0] auc = aucs[auc_ix][0] #print('{} AUC {}'.format(spec.comm_name,auc)) # Create vectors for point count sites, hours, and site hours pc_sites = np.asarray([pc.site.name for pc in chosen_pcs]) pc_hrs = np.asarray([pc.dt.hour for pc in chosen_pcs]) pc_site_hrs = np.asarray(['{} {}'.format(pc.site.name,pc.dt.hour) for pc in chosen_pcs]) # Create contingency table based on point count sites unq_sites = np.unique(pc_sites) site_cont_tab = [] for site in unq_sites: site_ixs = np.where((pc_sites == site))[0] pres_s_ixs = np.intersect1d(site_ixs,pres_ixs) abs_s_ixs = np.intersect1d(site_ixs,abs_ixs) site_cont_tab.append([len(pres_s_ixs), len(abs_s_ixs)]) site_cont_tab = np.asarray(site_cont_tab) # Calculate chi^2 statistic on contingency table s_chi2, s_p, _, _ = stats.chi2_contingency(site_cont_tab) # Create contingency table based on point count hour of days unq_hrs = np.unique(pc_hrs) hr_cont_tab = [] for hr in unq_hrs: hr_ixs = np.where((pc_hrs == hr))[0] pres_hr_ixs = np.intersect1d(hr_ixs,pres_ixs) abs_hr_ixs = np.intersect1d(hr_ixs,abs_ixs) hr_cont_tab.append([len(pres_hr_ixs), len(abs_hr_ixs)]) hr_cont_tab = np.asarray(hr_cont_tab) # Calculate chi^2 statistic on contingency table h_chi2, h_p, _, _ = stats.chi2_contingency(hr_cont_tab) # Create contingency table based on point count site/hour combos unq_site_hrs = np.unique(pc_site_hrs) site_hr_cont_tab = [] for site_hr in unq_site_hrs: site_hr_ixs = np.where((pc_site_hrs == site_hr))[0] pres_site_hr_ixs = np.intersect1d(site_hr_ixs,pres_ixs) abs_site_hr_ixs = np.intersect1d(site_hr_ixs,abs_ixs) site_hr_cont_tab.append([len(pres_site_hr_ixs), len(abs_site_hr_ixs)]) site_hr_cont_tab = np.asarray(site_hr_cont_tab) # Calculate chi^2 statistic on contingency table s_h_chi2, s_h_p, _, _ = stats.chi2_contingency(site_hr_cont_tab) xs_auc.append(auc) ys_site_chi2.append(s_chi2) ys_hr_chi2.append(h_chi2) ys_site_hr_chi2.append(s_h_chi2) xs_specs.append(spec) xs_spec_types.append(spec_type) all_n_occs.append(spec_n_occs[spec_ix]) xs_auc = np.asarray(xs_auc) ys_hr_chi2 = np.asarray(ys_hr_chi2) ys_site_chi2 = np.asarray(ys_site_chi2) ys_site_hr_chi2 = np.asarray(ys_site_hr_chi2) print('AUC max: {}, min {}'.format(np.max(xs_auc), np.min(xs_auc))) # Calculate spearman correlations between chi^2 statistics and AUCs s_rho, s_p = stats.spearmanr(xs_auc,ys_site_chi2) print('site {} {}'.format(s_rho,s_p)) h_rho, h_p = stats.spearmanr(xs_auc,ys_hr_chi2) print('hour {} {}'.format(h_rho,h_p)) sh_rho, sh_p = stats.spearmanr(xs_auc,ys_site_hr_chi2) print('site hour {} {}'.format(sh_rho,sh_p)) noccs_rho, noccs_p = stats.spearmanr(xs_auc,all_n_occs) print('all_n_occs {} {}'.format(noccs_rho,noccs_p)) plt_ys = ys_hr_chi2 plt_rho, plt_p = stats.spearmanr(xs_auc,plt_ys) avi_chi2s = [] avi_rl_chi2s = [] herp_chi2s = [] avi_aucs = [] avi_rl_aucs = [] herp_aucs = [] for s_ix, s in enumerate(xs_specs): # Plot points on scatter for each species in turn c = get_spec_type_col(xs_spec_types[s_ix]) pt_sz = 50 pt_a = 0.5 spec_str = s.comm_name.lower().replace(' ','-') # Annotate chosen species on scatter plot if lab_all_specs or spec_str in lab_specs: pt_sz = 50 if not lab_all_specs: pt_sz = 90 # Hack to make species name annotations non-overlapping ha = 'left' hoffs = 0.008 voffs=0 if xs_auc[s_ix] > 0.82 or 'rough-guardian-frog' in spec_str: ha = 'right' hoffs = -hoffs plt.text(xs_auc[s_ix]+hoffs,plt_ys[s_ix]+voffs,s.comm_name,color=c,verticalalignment='center',horizontalalignment=ha) pt_a = 1 plt.scatter(xs_auc[s_ix],plt_ys[s_ix],c=c,s=pt_sz,alpha=pt_a) if xs_spec_types[s_ix] == 'avi': avi_chi2s.append(plt_ys[s_ix]) avi_aucs.append(xs_auc[s_ix]) elif xs_spec_types[s_ix] == 'herp': herp_chi2s.append(plt_ys[s_ix]) herp_aucs.append(xs_auc[s_ix]) elif xs_spec_types[s_ix] == 'avi-rl': avi_rl_chi2s.append(plt_ys[s_ix]) avi_rl_aucs.append(xs_auc[s_ix]) # Perform T test between species types to determine if one type is more temporally niched / spatially niched than the other t_stat_chi2, t_p_chi2 = stats.ttest_ind(avi_chi2s,herp_chi2s) t_stat_auc, t_p_auc = stats.ttest_ind(avi_aucs,herp_aucs) t_stat_auc_rl, t_p_auc_rl = stats.ttest_ind(avi_aucs,avi_rl_aucs) print('Herp vs avi chi2 t-test: stat = {}, p = {}. Mean avi: {} herp: {}'.format(t_stat_chi2, t_p_chi2, np.mean(avi_chi2s),np.mean(herp_chi2s))) print('Herp vs avi auc t-test: stat = {}, p = {}. Mean avi: {} herp: {}'.format(t_stat_auc, t_p_auc, np.mean(avi_aucs),np.mean(herp_aucs))) print('Avi vs avi-rl auc t-test: stat = {}, p = {}. Mean avi: {} avi-rl: {}'.format(t_stat_auc_rl, t_p_auc_rl, np.mean(avi_aucs),np.mean(avi_rl_aucs))) # Plot line of best fit m, c = np.polyfit(xs_auc, plt_ys, 1) # plt.plot(xs_auc, m*xs_auc + c, c='k', alpha=0.2) plt.ylabel('Occurrence/hour $\chi^2$ statistic') plt.xlabel('\nAUC ({}s per feature, {})'.format(get_secs_per_audio_feat(feat),get_nice_lab(score_type))) if plt_p < 0.001: p_txt = 'p < 0.001' else: p_txt = 'p = {}'.format(round(plt_p,6)) plt.text(sorted(xs_auc)[0], sorted(plt_ys)[-1]-20, 'Spearman correlation:\n' + r'$\rho$ = {}, {}'.format(round(plt_rho,2), p_txt), verticalalignment='top') plt.tight_layout()
def plot_noccs_fig(lab_specs, lab_all_specs, all_spec_types, score_type='p60', feat='raw_audioset_feats_3s', results_dir='fig_data_logo'): ''' Plot the relationship between AUC of species occurrence predictions and number of occurrences of each species ''' xs_auc = [] xs_specs = [] all_n_occs = [] xs_spec_types = [] for spec_type in all_spec_types: # Load classification results f = 'classif_scores_sorted_{}_{}_{}.pickle'.format( feat, score_type, spec_type) load_path = os.path.join(results_dir, f) with open(load_path, 'rb') as f: auc_specs, spec_n_occs, _, aucs, auc_ks, _, _, _, _, _, _ = pickle.load( f) auc_spec_names = np.asarray([s.comm_name for s in auc_specs]) audio_feat_name, all_sites, all_taxa, all_pcs = load_pc_dataset( 'pc_data_parsed_{}'.format(feat)) # Load the appropriate point counts and species lists if spec_type == 'avi': chosen_pcs = get_avi_pcs_no_water_sites(all_pcs, all_sites) chosen_specs = get_avi_specs_min_pres(all_taxa, chosen_pcs) elif spec_type == 'avi-rl': chosen_pcs = get_avi_pcs_no_water_sites(all_pcs, all_sites) chosen_specs = get_red_list_avi_specs_min_pres( all_taxa, chosen_pcs) elif spec_type == 'herp': chosen_pcs = get_herp_pcs_no_water_sites(all_pcs, all_sites) chosen_specs = get_herp_specs_min_pres(all_taxa, chosen_pcs) # Loop through each species extracting number of occurrences and species AUCs for spec_ix, spec in enumerate(chosen_specs): auc_ix = np.where((auc_spec_names == spec.comm_name))[0] auc = aucs[auc_ix][0] xs_auc.append(auc) xs_specs.append(spec) xs_spec_types.append(spec_type) all_n_occs.append(spec_n_occs[spec_ix]) xs_auc = np.asarray(xs_auc) # Calculate correlation between AUC and number of occurrences per species plt_ys = all_n_occs plt_rho, plt_p = stats.spearmanr(xs_auc, plt_ys) for s_ix, s in enumerate(xs_specs): c = get_spec_type_col(xs_spec_types[s_ix]) pt_sz = 50 pt_a = 0.3 spec_str = s.comm_name.lower().replace(' ', '-') # Label species on the scatter plot if lab_all_specs or spec_str in lab_specs: pt_sz = 50 fsz = 7 if not lab_all_specs: pt_sz = 90 fsz = 18 # Hack to make sure species annotations don't overlap ha = 'left' hoffs = 0.008 voffs = 0 if xs_auc[s_ix] > 0.82 or 'rough-guardian-frog' in spec_str: ha = 'right' hoffs = -hoffs plt.text(xs_auc[s_ix] + hoffs, plt_ys[s_ix] + voffs, s.comm_name, color=c, verticalalignment='center', horizontalalignment=ha, fontsize=fsz) pt_a = 1 # Scatter point for given species plt.scatter(xs_auc[s_ix], plt_ys[s_ix], c=c, s=pt_sz, alpha=pt_a) # Plot line of best fit m, c = np.polyfit(xs_auc, plt_ys, 1) #plt.plot(xs_auc, m*xs_auc + c, c='k', alpha=0.2) plt.ylabel('Number of occurrences in point counts') plt.xlabel('\nAUC ({}s per feature, {})'.format( get_secs_per_audio_feat(feat), get_nice_lab(score_type))) if plt_p < 0.001: p_txt = 'p < 0.001' else: p_txt = 'p = {}'.format(round(plt_p, 6)) plt.text(sorted(xs_auc)[0], sorted(plt_ys)[-1], 'Spearman correlation:\n' + r'$\rho$ = {}, {}'.format(round(plt_rho, 2), p_txt), verticalalignment='top') plt.tight_layout()