def zip_result_files(blind_files, matrix_files, pair_files): m_files = list(matrix_files) p_files = list(pair_files) fname_tuples = [] # (blind data, scoring matrix, scoring matrix w/ pair coeffs) for index, blind_fname in enumerate(blind_files): allele_name = get_allele_name_from_path(blind_fname) matrix_fname = get_file_by_allele(m_files, allele_name) pairs_fname = get_file_by_allele(p_files, allele_name) if matrix_fname is None or pairs_fname is None: continue else: fname_tuples.append((blind_fname, matrix_fname, pairs_fname)) return fname_tuples
def zip_matrix_and_pair_data(blind_files, matrix_files, pair_files): """ Returns a list of tuples, each corresponding to an allele, of form (blind data fname, scoring matrix fname, pair matrix fname) Assumes these lists have been sorted. However, they aren't necessarily of the same length (i.e. not all alleles have pair coefficients) So this method gets rid of the alleles for which all 3 files don't exist :param blind_files: :param matrix_files: :param pair_files: :return: """ m_files = list(matrix_files) b_files = list(blind_files) allele_fnames = [] for pair_file in pair_files: allele_name = pair_file.split('/')[-1][:-10] blind_file = get_file_by_allele(b_files, allele_name) matrix_file = get_file_by_allele(m_files, allele_name) if None not in (blind_file, matrix_file): allele_fnames.append((blind_file, matrix_file, pair_file)) return allele_fnames
def create_graphs(): fname_tuples = [] # (blind data, scoring matrix, scoring matrix w/ pair coeffs) for index, blind_fname in enumerate(blind_files): allele_name = blind_fname[:-10] matrix_fname = get_file_by_allele(matrix_files, allele_name) pairs_fname = get_file_by_allele(pair_files, allele_name) if matrix_fname is None or pairs_fname is None: continue else: fname_tuples.append((blind_fname, matrix_fname, pairs_fname)) matrix_pcc = [] pair_pcc = [] matrix_auc = [] pair_auc = [] position_pair_lists = [] for allele_files in fname_tuples: allele_name = allele_files[0][:-10] matrix_pssm = PSSM(matrix_dir + allele_files[1], False) matrix_pssm.load_peptides(allele_data_dir + allele_files[0], True) matrix_pssm.predict() matrix_performance = get_performance(matrix_pssm.measured_values, matrix_pssm.predicted_values) pair_pssm = PSSM(pairs_dir + allele_files[2], True) pair_pssm.load_peptides(allele_data_dir + allele_files[0], True) pair_pssm.predict() pair_performance = get_performance(pair_pssm.measured_values, pair_pssm.predicted_values) matrix_pcc.append(matrix_performance[0]) pair_pcc.append(pair_performance[0]) matrix_auc.append(matrix_performance[1]) pair_auc.append(pair_performance[1]) if allele_name in top_ten_allele_names: position_pair_lists.append(pair_pssm.position_pair_vals) if 'A-0101' in allele_name or 'A-0201' in allele_name or 'A-0202' in allele_name or 'A-0301' in allele_name: plt.scatter(matrix_pssm.measured_values, matrix_pssm.predicted_values) plt.plot([0, 8], [0, 8], 'k-') plt.axis((0, 8.0, 0, 8.0)) plt.xlabel('Measured log10(IC50)') plt.ylabel('Predicted log10(IC50)') plt.title(allele_name + ' Matrix') plt.savefig(output_dir + 'all_alleles_' + version + '/' + allele_name + '_matrix_meas_vs_pred.png') plt.clf() plt.cla() plt.close() plt.scatter(pair_pssm.measured_values, pair_pssm.predicted_values) plt.plot([0, 8], [0, 8], 'k-') plt.axis((0, 8.0, 0, 8.0)) plt.xlabel('Measured log10(IC50)') plt.ylabel('Predicted log10(IC50)') plt.title(allele_name + ' with ' + str(pair_pssm.num_pairs) + ' pairs selected') plt.savefig(output_dir + 'all_alleles_' + version + '/' + allele_name + '_pair_meas_vs_pred.png') plt.clf() plt.cla() plt.close() calc_avg_pair_rms(position_pair_lists) plt.scatter(matrix_pcc, pair_pcc) plt.plot([0, 1], [0, 1], 'k-') plt.axis((0.5, 1.0, 0.5, 1.0)) plt.xlabel('Pairs(-)') plt.ylabel('Pairs(+)') plt.title('Matrix vs Pair PCC') plt.savefig(output_dir + 'all_alleles_' + version + '/mat_vs_pair_pcc.png') plt.clf() plt.cla() plt.close() plt.scatter(matrix_auc, pair_auc) plt.plot([0, 1], [0, 1], 'k-') plt.axis((0.75, 1.0, 0.75, 1.0)) plt.xlabel('Pairs(-)') plt.ylabel('Pairs(+)') plt.title('Matrix vs Pair AUC') plt.savefig(output_dir + 'all_alleles_' + version + '/mat_vs_pair_auc.png') plt.clf() plt.cla() plt.close()