Exemplo n.º 1
0
def main(args=None):
    args = parse_arguments().parse_args(args)

    corr = Correlation(
        args.corData,
        labels=args.labels,
    )

    args.plotFile.close()

    corr.plot_pca(args.plotFile.name,
                  plot_title=args.plotTitle,
                  image_format=args.plotFileFormat)

    if args.outFileNameData is not None:
        import matplotlib
        mlab_pca = matplotlib.mlab.PCA(corr.matrix)
        n = len(corr.labels)
        of = args.outFileNameData
        of.write("Component\t{}\tEigenvalue\n".format("\t".join(corr.labels)))
        for i in xrange(n):
            of.write("{}".format(i + 1))
            for v in mlab_pca.Wt[i, :]:
                of.write("\t{}".format(v))
            of.write("\t{}\n".format(mlab_pca.s[i]))
        args.outFileNameData.close()
Exemplo n.º 2
0
def main(args=None):
    args = parse_arguments().parse_args(args)

    corr = Correlation(args.corData,
                       labels=args.labels,)

    args.plotFile.close()
    corr.rowCenter = args.rowCenter

    corr.plot_pca(args.plotFile.name,
                  plot_title=args.plotTitle,
                  image_format=args.plotFileFormat)

    if args.outFileNameData is not None:
        import matplotlib
        mlab_pca = matplotlib.mlab.PCA(corr.matrix)
        n = len(corr.labels)
        of = args.outFileNameData
        of.write("Component\t{}\tEigenvalue\n".format("\t".join(corr.labels)))
        for i in range(n):
            of.write("{}".format(i + 1))
            for v in mlab_pca.Wt[i, :]:
                of.write("\t{}".format(v))
            of.write("\t{}\n".format(mlab_pca.s[i]))
        args.outFileNameData.close()
Exemplo n.º 3
0
def main(args=None):
    args = parse_arguments().parse_args(args)

    corr = Correlation(args.corData,
                       labels=args.labels,)

    args.plotFile.close()

    corr.plot_pca(args.plotFile.name,
                  plot_title=args.plotTitle,
                  image_format=args.plotFileFormat)
Exemplo n.º 4
0
def main(args=None):

    args = parse_arguments().parse_args(args)

    if args.plotFile is None and args.outFileCorMatrix is None:
        sys.exit("At least one of --plotFile and --outFileCorMatrix must be specified!\n")

    corr = Correlation(args.corData,
                       args.corMethod,
                       labels=args.labels,
                       remove_outliers=args.removeOutliers,
                       skip_zeros=args.skipZeros)

    if args.corMethod == 'pearson':
        # test if there are outliers and write a message recommending the removal
        if len(corr.get_outlier_indices(np.asarray(corr.matrix).flatten())) > 0:
            if args.removeOutliers:
                sys.stderr.write("\nOutliers were detected in the data. They "
                                 "will be removed to avoid bias "
                                 "in the pearson correlation.\n")

            else:
                sys.stderr.write("\nOutliers were detected in the data. Consider "
                                 "using the --removeOutliers parameter to avoid a bias "
                                 "in the pearson correlation.\n")

    if args.colorMap:
        try:
            plt.get_cmap(args.colorMap)
        except ValueError as error:
            sys.stderr.write(
                "A problem was found. Message: {}\n".format(error))
            exit()

    if args.plotFile is not None:
        if args.whatToPlot == 'scatterplot':
            corr.plot_scatter(args.plotFile,
                              plot_title=args.plotTitle,
                              image_format=args.plotFileFormat,
                              xRange=args.xRange,
                              yRange=args.yRange,
                              log1p=args.log1p)
        else:
            corr.plot_correlation(args.plotFile,
                                  vmax=args.zMax,
                                  vmin=args.zMin,
                                  colormap=args.colorMap,
                                  plot_title=args.plotTitle,
                                  image_format=args.plotFileFormat,
                                  plot_numbers=args.plotNumbers,
                                  plotWidth=args.plotWidth,
                                  plotHeight=args.plotHeight)

    if args.outFileCorMatrix:
        o = open(args.outFileCorMatrix, "w")
        o.write("#plotCorrelation --outFileCorMatrix\n")
        corr.save_corr_matrix(o)
        o.close()
Exemplo n.º 5
0
def main(args=None):

    args = parse_arguments().parse_args(args)

    if args.plotFile is None and args.outFileCorMatrix is None:
        sys.exit("At least one of --plotFile and --outFileCorMatrix must be specified!\n")

    corr = Correlation(args.corData,
                       args.corMethod,
                       labels=args.labels,
                       remove_outliers=args.removeOutliers,
                       skip_zeros=args.skipZeros)

    if args.corMethod == 'pearson':
        # test if there are outliers and write a message recommending the removal
        if len(corr.get_outlier_indices(np.asarray(corr.matrix).flatten())) > 0:
            if args.removeOutliers:
                sys.stderr.write("\nOutliers were detected in the data. They "
                                 "will be removed to avoid bias "
                                 "in the pearson correlation.\n")

            else:
                sys.stderr.write("\nOutliers were detected in the data. Consider "
                                 "using the --removeOutliers parameter to avoid a bias "
                                 "in the pearson correlation.\n")

    if args.colorMap:
        try:
            plt.get_cmap(args.colorMap)
        except ValueError as error:
            sys.stderr.write(
                "A problem was found. Message: {}\n".format(error))
            exit()

    if args.plotFile is not None:
        if args.whatToPlot == 'scatterplot':
            corr.plot_scatter(args.plotFile,
                              plot_title=args.plotTitle,
                              image_format=args.plotFileFormat,
                              xRange=args.xRange,
                              yRange=args.yRange,
                              log1p=args.log1p)
        else:
            corr.plot_correlation(args.plotFile,
                                  vmax=args.zMax,
                                  vmin=args.zMin,
                                  colormap=args.colorMap,
                                  plot_title=args.plotTitle,
                                  image_format=args.plotFileFormat,
                                  plot_numbers=args.plotNumbers,
                                  plotWidth=args.plotWidth,
                                  plotHeight=args.plotHeight)

    if args.outFileCorMatrix:
        o = open(args.outFileCorMatrix, "w")
        o.write("#plotCorrelation --outFileCorMatrix\n")
        corr.save_corr_matrix(o)
        o.close()
Exemplo n.º 6
0
def main(args=None):
    args = parse_arguments().parse_args(args)

    if args.plotFile is None and args.outFileNameData is None:
        sys.exit(
            "At least one of --plotFile and --outFileNameData must be specified!\n"
        )

    if args.ntop < 0:
        sys.exit("The value specified for --ntop must be >= 0!\n")

    if args.PCs[0] == args.PCs[1]:
        sys.exit("You must specify different principal components!\n")
    if args.PCs[0] <= 0 or args.PCs[1] <= 0:
        sys.exit("The specified principal components must be at least 1!\n")

    corr = Correlation(
        args.corData,
        labels=args.labels,
    )

    corr.rowCenter = args.rowCenter
    corr.transpose = args.transpose
    corr.ntop = args.ntop
    corr.log2 = args.log2

    Wt, eigenvalues = corr.plot_pca(args.plotFile,
                                    PCs=args.PCs,
                                    plot_title=args.plotTitle,
                                    image_format=args.plotFileFormat,
                                    plotWidth=args.plotWidth,
                                    plotHeight=args.plotHeight,
                                    cols=args.colors,
                                    marks=args.markers)

    if args.outFileNameData is not None:
        of = open(args.outFileNameData, "w")
        of.write("#plotPCA --outFileNameData\n")
        of.write("Component\t{}\tEigenvalue\n".format("\t".join(corr.labels)))
        n = eigenvalues.shape[0]
        for i in range(n):
            of.write("{}\t{}\t{}\n".format(
                i + 1, "\t".join(["{}".format(x) for x in Wt[i, :]]),
                eigenvalues[i]))
        of.close()
Exemplo n.º 7
0
def main(args=None):
    args = parse_arguments().parse_args(args)

    corr = Correlation(
        args.corData,
        labels=args.labels,
    )

    if args.outFileCorMatrix:
        corr.save_corr_matrix(args.outFileCorMatrix)

    args.plotFile.close()

    corr.plot_pca(args.plotFile.name,
                  plot_title=args.plotTitle,
                  image_format=args.plotFileFormat)
Exemplo n.º 8
0
def main(args=None):
    args = parse_arguments().parse_args(args)

    if args.plotFile is None and args.outFileNameData is None:
        sys.exit("At least one of --plotFile and --outFileNameData must be specified!\n")

    if args.ntop < 0:
        sys.exit("The value specified for --ntop must be >= 0!\n")

    if args.PCs[0] == args.PCs[1]:
        sys.exit("You must specify different principal components!\n")
    if args.PCs[0] <= 0 or args.PCs[1] <= 0:
        sys.exit("The specified principal components must be at least 1!\n")

    corr = Correlation(args.corData,
                       labels=args.labels,)

    corr.rowCenter = args.rowCenter
    corr.transpose = args.transpose
    corr.ntop = args.ntop
    corr.log2 = args.log2

    Wt, eigenvalues = corr.plot_pca(args.plotFile,
                                    PCs=args.PCs,
                                    plot_title=args.plotTitle,
                                    image_format=args.plotFileFormat,
                                    plotWidth=args.plotWidth,
                                    plotHeight=args.plotHeight,
                                    cols=args.colors,
                                    marks=args.markers)

    if args.outFileNameData is not None:
        of = open(args.outFileNameData, "w")
        of.write("#plotPCA --outFileNameData\n")
        of.write("Component\t{}\tEigenvalue\n".format("\t".join(corr.labels)))
        n = eigenvalues.shape[0]
        for i in range(n):
            of.write("{}\t{}\t{}\n".format(i + 1, "\t".join(["{}".format(x) for x in Wt[i, :]]), eigenvalues[i]))
        of.close()
Exemplo n.º 9
0
def get_labels_and_correlation(
        bw_files,
        # chrs_to_skip,
        bin_size=10000,
        method='pearson',
        fileset_name='result',
        blacklist=None,
        labels=bw_labels,
        output_dir=BASE_DIR):
    my_listUnnested = []
    # my_labels_list = []
    assert method in ('pearson', 'spearman'), 'Invalid correlation method'
    # Autogenerate labels from filename if not provided
    if not labels:
        labels = [
            filename.split('/')[-1].split('.')[0] for filename in bw_files
        ]
    # Generate a name for the unique combination
    test_name = fileset_name + '_' + method
    if blacklist:
        blacklist_title = 'Blacklisted'
        test_name += '_blacklisted'
    else:
        blacklist_title = ''
    image_name = test_name + '.png'
    # Bin the bigwig data in 10kb increments
    num_reads_per_bin = score_bw.getScorePerBin(
        bw_files,
        bin_size,
        # chrsToSkip=chrs_to_skip,
        blackListFileName=blacklist)
    # Write to npz file
    print("right before npz")
    os.system('pwd')
    os.system('ls -lat')
    print('svo')
    print(output_dir)
    print(test_name)
    filename = output_dir + test_name + '.npz'
    print(filename)
    print('svo.. ')
    with open(filename, "wb") as f:
        np.savez_compressed(f, matrix=num_reads_per_bin, labels=labels)
    # Compute the correlations
    corr = Correlation(filename, method, labels=labels)
    np_array = corr.compute_correlation()
    print("ALERT CORR")
    print(np_array)

    listNested = np_array.tolist()

    def removeNestings(listNest):
        for i in listNest:
            if type(i) == list:
                print(str(i) + "is: " + str(type(i)))
                removeNestings(i)
                print(str(i) + "is after: " + str(type(i)))
            else:
                print(str(i) + "is finally " + str(type(i)))
                my_listUnnested.append(i)

    removeNestings(listNested)

    print("FINAL CORRELATION VALUES")
    print(my_listUnnested)

    with open("corrScores.txt", "w") as f:
        f.write(str(my_listUnnested))

    plot_title = '{}{} Correlation of {}'.format(blacklist_title,
                                                 method.capitalize(),
                                                 fileset_name)
    # Create a png file of correlation heatmap
    image_path = output_dir + image_name
    corr.plot_correlation(image_path, plot_title=plot_title)

    # return np_ar
    my_labels_list = labels
    return image_path, my_labels_list, my_listUnnested
Exemplo n.º 10
0
def get_labels_and_correlation(
        bw_files,
        # chrs_to_skip,
        bin_size=10000,
        method='pearson',
        fileset_name='result',
        blacklist=None,
        labels=bw_labels,
        output_dir='/Users/baditya02/Downloads/treatment-data/graphs/test/'
):
    my_listUnnested = []
    my_labels_list = []
    assert method in ('pearson', 'spearman'), 'Invalid correlation method'
    # Autogenerate labels from filename if not provided
    if not labels:
        labels = [filename.split( '/' )[-1].split( '.' )[0] for filename in bw_files]
    # Generate a name for the unique combination
    test_name = fileset_name + '_' + method
    if blacklist:
        blacklist_title = 'Blacklisted '
        test_name += '_blacklisted'
    else:
        blacklist_title = ''
    image_name = test_name + '.png'
    # Bin the bigwig data in 10kb increments
    num_reads_per_bin = score_bw.getScorePerBin(
        bw_files,
        bin_size,
        # chrsToSkip=chrs_to_skip,
        blackListFileName=blacklist
    )
    # Write to npz file
    filename = output_dir + test_name + '.npz'
    with open(filename, "wb") as f:
        np.savez_compressed(f, matrix=num_reads_per_bin, labels=labels)
    # Compute the correlations
    corr = Correlation(filename, method, labels=labels)
    np_array = corr.compute_correlation()
    listNested = np_array.tolist()

    def removeNestings(listNest):
        for i in listNest:
            if type(i) == list:
                removeNestings(i)
            else:
                my_listUnnested.append(i)

    removeNestings(listNested)

    plot_title = '{}{} Correlation of {}'.format(
        blacklist_title,
        method.capitalize(),
        fileset_name
    )
    # Create a png file of correlation heatmap
    image_path = output_dir + image_name
    corr.plot_correlation( image_path, plot_title=plot_title )

    # return np_ar
    my_labels_list = labels
    return image_path, my_labels_list, my_listUnnested