def correlation(mat_file_1, mat_file_2): """ Draws the plot """ blockades_1 = read_mat(mat_file_1) blockades_1 = sp._fractional_blockades(blockades_1) blockades_1 = sp._filter_by_duration(blockades_1, 0.5, 20) blockades_1 = map(lambda b: sp.discretize(sp._trim_flank_noise(b.eventTrace), 20), blockades_1) blockades_2 = read_mat(mat_file_2) blockades_2 = sp._fractional_blockades(blockades_2) blockades_2 = sp._filter_by_duration(blockades_2, 0.5, 20) blockades_2 = map(lambda b: sp.discretize(sp._trim_flank_noise(b.eventTrace), 20), blockades_2) self_corr = [] cross_corr = [] for blockade in blockades_1: block_self = [] for other in blockades_1: block_self.append(1 - distance.correlation(blockade, other)) block_cross = [] for other in blockades_2: block_cross.append(1 - distance.correlation(blockade, other)) self_corr.append(np.mean(block_self)) cross_corr.append(np.mean(block_cross)) mean_self = np.median(self_corr) mean_cross = np.median(cross_corr) matplotlib.rcParams.update({"font.size": 16}) fig = plt.subplot() fig.spines["right"].set_visible(False) fig.spines["top"].set_visible(False) fig.get_xaxis().tick_bottom() fig.get_yaxis().tick_left() fig.set_xlim(-0.6, 0.6) fig.set_ylim(-0.6, 0.6) fig.set_xlabel("(H3 tail, H3 tail) correlation") fig.set_ylabel("(H3 tail, CCL5) correlation") for y in [-0.4, -0.2, 0, 0.2, 0.4]: plt.plot((-0.6, 0.6), (y, y), "--", lw=0.5, color="black") plt.plot((y, y), (-0.6, 0.6), "--", lw=0.5, color="black") plt.plot((-0.6, 0.6), (mean_cross, mean_cross), "--", lw=1.5, color="red") plt.plot((mean_self, mean_self), (-0.6, 0.6), "--", lw=1.5, color="red") fig.scatter(self_corr, cross_corr, linewidth=0.5, c="dodgerblue", s=30, edgecolor="blue") plt.tight_layout() plt.show()
def get_bias(blockades_file, model_file, cluster_size): """ Gets AA-specific bias between the empirical and theoretical signals """ WINDOW = 4 blockades = read_mat(blockades_file) clusters = sp.preprocess_blockades(blockades, cluster_size=cluster_size, min_dwell=0.5, max_dwell=20) peptide = clusters[0].blockades[0].peptide blockade_model = load_model(model_file) errors = defaultdict(list) model_signal = blockade_model.peptide_signal(peptide) for cluster in clusters: discr_signal = sp.discretize(cluster.consensus, len(peptide)) flanked_peptide = ("-" * (WINDOW - 1) + peptide + "-" * (WINDOW - 1)) num_peaks = len(peptide) + WINDOW - 1 for i in xrange(0, num_peaks): kmer = flanked_peptide[i:i + WINDOW] if "-" not in kmer: for aa in kmer: errors[aa].append(discr_signal[i] - model_signal[i]) return errors
def flip(blockades, model_file): """ Flips blockades """ blockade_model = load_model(model_file) identifier = Identifier(blockade_model) peptide = blockades[0].peptide clusters = sp.preprocess_blockades(blockades, cluster_size=1, min_dwell=0.0, max_dwell=1000) print("Num\tFwd_dst\tRev_dst\t\tNeeds_flip", file=sys.stderr) num_reversed = 0 new_blockades = [] for num, cluster in enumerate(clusters): discr_signal = sp.discretize(cluster.consensus, len(peptide)) fwd_dist = identifier.signal_protein_distance(discr_signal, peptide) rev_dist = identifier.signal_protein_distance(discr_signal, peptide[::-1]) print("{0}\t{1:5.2f}\t{2:5.2f}\t\t{3}" .format(num + 1, fwd_dist, rev_dist, fwd_dist > rev_dist), file=sys.stderr) new_blockades.append(cluster.blockades[0]) if fwd_dist > rev_dist: new_blockades[-1].eventTrace = new_blockades[-1].eventTrace[::-1] num_reversed += 1 print("Reversed:", num_reversed, "of", len(blockades), file=sys.stderr) return new_blockades
def get_bias(blockades_file, model_file, cluster_size): """ Gets AA-specific bias between the empirical and theoretical signals """ WINDOW = 4 blockades = read_mat(blockades_file) clusters = sp.preprocess_blockades(blockades, cluster_size=cluster_size, min_dwell=0.5, max_dwell=20) peptide = clusters[0].blockades[0].peptide blockade_model = load_model(model_file) errors = defaultdict(list) model_signal = blockade_model.peptide_signal(peptide) for cluster in clusters: discr_signal = sp.discretize(cluster.consensus, len(peptide)) flanked_peptide = ("-" * (WINDOW - 1) + peptide + "-" * (WINDOW - 1)) num_peaks = len(peptide) + WINDOW - 1 for i in xrange(0, num_peaks): kmer = flanked_peptide[i : i + WINDOW] if "-" not in kmer: for aa in kmer: errors[aa].append(discr_signal[i] - model_signal[i]) return errors
def _get_peptides_signals(mat_files): TRAIN_AVG = 1 peptides = [] signals = [] for mat in mat_files: blockades = read_mat(mat) clusters = sp.preprocess_blockades(blockades, cluster_size=TRAIN_AVG, min_dwell=0.5, max_dwell=20) mat_peptide = clusters[0].blockades[0].peptide peptides.extend([mat_peptide] * len(clusters)) for cluster in clusters: signals.append(sp.discretize(cluster.consensus, len(mat_peptide))) return peptides, signals
def rank_db_proteins(self, signal): """ Rank database proteins wrt to the similarity to a given signal """ assert self.database is not None distances = {} discretized = {} for prot_id, prot_seq in self.database.items(): if len(prot_seq) not in discretized: discretized[len(prot_seq)] = sp.discretize(signal, len(prot_seq)) distance = self.signal_protein_distance(discretized[len(prot_seq)], prot_seq) distances[prot_id] = distance return sorted(distances.items(), key=lambda i: i[1])
def correlation(mat_file_1, mat_file_2): """ Draws the plot """ blockades_1 = read_mat(mat_file_1) blockades_1 = sp._fractional_blockades(blockades_1) blockades_1 = sp._filter_by_duration(blockades_1, 0.5, 20) blockades_1 = map( lambda b: sp.discretize(sp._trim_flank_noise(b.eventTrace), 20), blockades_1) blockades_2 = read_mat(mat_file_2) blockades_2 = sp._fractional_blockades(blockades_2) blockades_2 = sp._filter_by_duration(blockades_2, 0.5, 20) blockades_2 = map( lambda b: sp.discretize(sp._trim_flank_noise(b.eventTrace), 20), blockades_2) self_corr = [] cross_corr = [] for blockade in blockades_1: block_self = [] for other in blockades_1: block_self.append(1 - distance.correlation(blockade, other)) block_cross = [] for other in blockades_2: block_cross.append(1 - distance.correlation(blockade, other)) self_corr.append(np.mean(block_self)) cross_corr.append(np.mean(block_cross)) mean_self = np.median(self_corr) mean_cross = np.median(cross_corr) matplotlib.rcParams.update({"font.size": 16}) fig = plt.subplot() fig.spines["right"].set_visible(False) fig.spines["top"].set_visible(False) fig.get_xaxis().tick_bottom() fig.get_yaxis().tick_left() fig.set_xlim(-0.6, 0.6) fig.set_ylim(-0.6, 0.6) fig.set_xlabel("(H3 tail, H3 tail) correlation") fig.set_ylabel("(H3 tail, CCL5) correlation") for y in [-0.4, -0.2, 0, 0.2, 0.4]: plt.plot((-0.6, 0.6), (y, y), "--", lw=0.5, color="black") plt.plot((y, y), (-0.6, 0.6), "--", lw=0.5, color="black") plt.plot((-0.6, 0.6), (mean_cross, mean_cross), "--", lw=1.5, color="red") plt.plot((mean_self, mean_self), (-0.6, 0.6), "--", lw=1.5, color="red") fig.scatter(self_corr, cross_corr, linewidth=0.5, c="dodgerblue", s=30, edgecolor="blue") plt.tight_layout() plt.show()