Пример #1
0
def get_bias(blockades_file, model_file, cluster_size):
    """
    Gets AA-specific bias between the empirical and theoretical signals
    """
    WINDOW = 4

    blockades = read_mat(blockades_file)
    clusters = sp.preprocess_blockades(blockades, cluster_size=cluster_size,
                                       min_dwell=0.5, max_dwell=20)
    peptide = clusters[0].blockades[0].peptide

    blockade_model = load_model(model_file)

    errors = defaultdict(list)
    model_signal = blockade_model.peptide_signal(peptide)
    for cluster in clusters:
        discr_signal = sp.discretize(cluster.consensus, len(peptide))

        flanked_peptide = ("-" * (WINDOW - 1) + peptide +
                           "-" * (WINDOW - 1))
        num_peaks = len(peptide) + WINDOW - 1

        for i in xrange(0, num_peaks):
            kmer = flanked_peptide[i : i + WINDOW]
            if "-" not in kmer:
                for aa in kmer:
                    errors[aa].append(discr_signal[i] - model_signal[i])

    return errors
Пример #2
0
def get_bias(blockades_file, model_file, cluster_size):
    """
    Gets AA-specific bias between the empirical and theoretical signals
    """
    WINDOW = 4

    blockades = read_mat(blockades_file)
    clusters = sp.preprocess_blockades(blockades,
                                       cluster_size=cluster_size,
                                       min_dwell=0.5,
                                       max_dwell=20)
    peptide = clusters[0].blockades[0].peptide

    blockade_model = load_model(model_file)

    errors = defaultdict(list)
    model_signal = blockade_model.peptide_signal(peptide)
    for cluster in clusters:
        discr_signal = sp.discretize(cluster.consensus, len(peptide))

        flanked_peptide = ("-" * (WINDOW - 1) + peptide + "-" * (WINDOW - 1))
        num_peaks = len(peptide) + WINDOW - 1

        for i in xrange(0, num_peaks):
            kmer = flanked_peptide[i:i + WINDOW]
            if "-" not in kmer:
                for aa in kmer:
                    errors[aa].append(discr_signal[i] - model_signal[i])

    return errors
Пример #3
0
def flip(blockades, model_file):
    """
    Flips blockades
    """
    blockade_model = load_model(model_file)
    identifier = Identifier(blockade_model)

    peptide = blockades[0].peptide
    clusters = sp.preprocess_blockades(blockades, cluster_size=1,
                                       min_dwell=0.0, max_dwell=1000)

    print("Num\tFwd_dst\tRev_dst\t\tNeeds_flip", file=sys.stderr)

    num_reversed = 0
    new_blockades = []
    for num, cluster in enumerate(clusters):
        discr_signal = sp.discretize(cluster.consensus, len(peptide))

        fwd_dist = identifier.signal_protein_distance(discr_signal, peptide)
        rev_dist = identifier.signal_protein_distance(discr_signal,
                                                      peptide[::-1])
        print("{0}\t{1:5.2f}\t{2:5.2f}\t\t{3}"
                .format(num + 1, fwd_dist, rev_dist, fwd_dist > rev_dist),
                file=sys.stderr)

        new_blockades.append(cluster.blockades[0])
        if fwd_dist > rev_dist:
            new_blockades[-1].eventTrace = new_blockades[-1].eventTrace[::-1]
            num_reversed += 1

    print("Reversed:", num_reversed, "of", len(blockades), file=sys.stderr)
    return new_blockades
Пример #4
0
def main():
    parser = argparse.ArgumentParser(description="Nano-Align protein "
                                     "identification", formatter_class= \
                                     argparse.ArgumentDefaultsHelpFormatter)

    parser.add_argument("blockades_file", metavar="blockades_file",
                        help="path to blockades file (in mat format)")
    parser.add_argument("model_file", metavar="model_file",
                        help="path to trained model file ('-' for MV model)")
    parser.add_argument("-c", "--cluster-size", dest="cluster_size", type=int,
                        default=10, help="blockades cluster size")
    parser.add_argument("-d", "--database", dest="database",
                        metavar="database", help="database file (in FASTA "
                        "format). If not set, random database is generated",
                        default=None)
    parser.add_argument("-s", "--single-blockades", action="store_true",
                        default=False, dest="single_blockades",
                        help="print statistics for each blockade in a cluster")

    parser.add_argument("--version", action="version", version=__version__)
    args = parser.parse_args()

    model = load_model(args.model_file)
    pvalues_test(args.blockades_file, args.cluster_size, model,
                 args.database, args.single_blockades, sys.stderr)
    return 0
def full_identify(blockades_file, model_file, db_file):
    """
    Computes pvalues
    """
    blockade_model = load_model(model_file)
    #svr_model = SvrBlockade()
    #svr_model.load_from_pickle(svr_file)

    boxes = []
    for avg in xrange(1, 21):
        p_values = []
        for _ in xrange(avg):
            p_value, rank = pvalues_test(blockades_file, avg, blockade_model,
                                         db_file, False, open(os.devnull, "w"))
            p_values.append(p_value)

        boxes.append(p_values)
        print(avg, np.median(p_values), file=sys.stderr)

    plot_pvalues(boxes)
def full_identify(blockades_file, model_file, db_file):
    """
    Computes pvalues
    """
    blockade_model = load_model(model_file)
    #svr_model = SvrBlockade()
    #svr_model.load_from_pickle(svr_file)

    boxes = []
    for avg in xrange(1, 21):
        p_values = []
        for _ in xrange(avg):
            p_value, rank = pvalues_test(blockades_file, avg, blockade_model, db_file,
                                         False, open(os.devnull, "w"))
            p_values.append(p_value)

        boxes.append(p_values)
        print(avg, np.median(p_values), file=sys.stderr)

    plot_pvalues(boxes)
Пример #7
0
def main():
    parser = argparse.ArgumentParser(description="Nano-Align protein "
                                     "identification", formatter_class= \
                                     argparse.ArgumentDefaultsHelpFormatter)

    parser.add_argument("nanospectra_file",
                        metavar="nanospectra_file",
                        help="path to nanospectra file (in mat format)")
    parser.add_argument("model_file",
                        metavar="model_file",
                        help="path to trained model file ('-' for MV model)")
    parser.add_argument("-c",
                        "--cluster-size",
                        dest="cluster_size",
                        type=int,
                        default=10,
                        help="blockades cluster size")
    parser.add_argument("-d",
                        "--database",
                        dest="database",
                        metavar="database",
                        help="database file (in FASTA "
                        "format). If not set, random database is generated",
                        default=None)
    parser.add_argument(
        "-s",
        "--single-nanospectra",
        action="store_true",
        default=False,
        dest="single_nanospectra",
        help="print statistics for each nanospectra in a cluster")

    parser.add_argument("--version", action="version", version=__version__)
    args = parser.parse_args()

    model = load_model(args.model_file)
    pvalues_test(args.nanospectra_file, args.cluster_size, model,
                 args.database, args.single_nanospectra, sys.stderr)
    return 0
Пример #8
0
def plot_blockades(blockades_file, model_files,
                   cluster_size, show_text):
    """
    Pretty plotting
    """
    WINDOW = 4

    blockades = read_mat(blockades_file)
    clusters = sp.preprocess_blockades(blockades, cluster_size=cluster_size,
                                       min_dwell=0.5, max_dwell=20)
    peptide = clusters[0].blockades[0].peptide

    models = []
    for model_file in model_files:
        models.append(load_model(model_file))
    #svr_signal = model.peptide_signal(peptide)
    #mv_signal = MvBlockade().peptide_signal(peptide)

    for cluster in clusters:
        #cluster.consensus = sp.discretize(cluster.consensus, len(peptide))
        signal_length = len(cluster.consensus)

        x_axis = np.linspace(0, len(peptide) + 1, signal_length)
        matplotlib.rcParams.update({"font.size": 16})
        fig = plt.subplot()

        fig.spines["right"].set_visible(False)
        fig.spines["top"].set_visible(False)
        fig.get_xaxis().tick_bottom()
        fig.get_yaxis().tick_left()
        fig.set_xlim(0, len(peptide) + 1)
        fig.set_xlabel("Putative AA position")
        fig.set_ylabel("Normalized signal")

        fig.plot(x_axis, cluster.consensus, label="Empirical signal", linewidth=1.5)

        ################
        for model in models:
            model_signal = model.peptide_signal(peptide)
            model_grid = [i * signal_length / (len(model_signal) - 1)
                          for i in xrange(len(model_signal))]

            interp_fun = interp1d(model_grid, model_signal, kind="linear")
            model_interp = interp_fun(xrange(signal_length))

            corr = 1 - distance.correlation(cluster.consensus, model_interp)
            print("{0} correlation: {1:5.2f}\t".format(model.name, corr),
                  file=sys.stderr)
            fig.plot(x_axis, model_interp, label=model.name, linewidth=2)
        ##############

        legend = fig.legend(loc="lower left", frameon=False)
        for label in legend.get_lines():
            label.set_linewidth(2)
        for label in legend.get_texts():
            label.set_fontsize(16)

        if show_text:
            #adding AAs text:
            event_mean = np.mean(cluster.consensus)
            acids_pos = _get_aa_positions(peptide, WINDOW, x_axis[-1])
            for i, aa in enumerate(peptide):
                fig.text(acids_pos[i], event_mean - 2, aa, fontsize=16)

        plt.show()
Пример #9
0
def plot_blockades(blockades_file, model_files, cluster_size, show_text):
    """
    Pretty plotting
    """
    WINDOW = 4

    blockades = read_mat(blockades_file)
    clusters = sp.preprocess_blockades(blockades,
                                       cluster_size=cluster_size,
                                       min_dwell=0.5,
                                       max_dwell=20)
    peptide = clusters[0].blockades[0].peptide

    models = []
    for model_file in model_files:
        models.append(load_model(model_file))
    #svr_signal = model.peptide_signal(peptide)
    #mv_signal = MvBlockade().peptide_signal(peptide)

    for cluster in clusters:
        #cluster.consensus = sp.discretize(cluster.consensus, len(peptide))
        signal_length = len(cluster.consensus)

        x_axis = np.linspace(0, len(peptide) + 1, signal_length)
        matplotlib.rcParams.update({"font.size": 16})
        fig = plt.subplot()

        fig.spines["right"].set_visible(False)
        fig.spines["top"].set_visible(False)
        fig.get_xaxis().tick_bottom()
        fig.get_yaxis().tick_left()
        fig.set_xlim(0, len(peptide) + 1)
        fig.set_xlabel("Putative AA position")
        fig.set_ylabel("Normalized signal")

        fig.plot(x_axis,
                 cluster.consensus,
                 label="Empirical signal",
                 linewidth=1.5)

        ################
        for model in models:
            model_signal = model.peptide_signal(peptide)
            model_grid = [
                i * signal_length / (len(model_signal) - 1)
                for i in xrange(len(model_signal))
            ]

            interp_fun = interp1d(model_grid, model_signal, kind="linear")
            model_interp = interp_fun(xrange(signal_length))

            corr = 1 - distance.correlation(cluster.consensus, model_interp)
            print("{0} correlation: {1:5.2f}\t".format(model.name, corr),
                  file=sys.stderr)
            fig.plot(x_axis, model_interp, label=model.name, linewidth=2)
        ##############

        legend = fig.legend(loc="lower left", frameon=False)
        for label in legend.get_lines():
            label.set_linewidth(2)
        for label in legend.get_texts():
            label.set_fontsize(16)

        if show_text:
            #adding AAs text:
            event_mean = np.mean(cluster.consensus)
            acids_pos = _get_aa_positions(peptide, WINDOW, x_axis[-1])
            for i, aa in enumerate(peptide):
                fig.text(acids_pos[i], event_mean - 2, aa, fontsize=16)

        plt.show()