Exemplo n.º 1
0
def main():

    in_fname = sys.argv[1]
    out_fname = sys.argv[2]
    try:
        columns = int( sys.argv[3] ) - 1, int( sys.argv[4] ) - 1
    except:
        stop_err( "Columns not specified, your query does not contain a column of numerical data." )
    title = sys.argv[5]
    xlab = sys.argv[6]
    ylab = sys.argv[7]

    matrix = []
    skipped_lines = 0
    first_invalid_line = 0
    invalid_value = ''
    invalid_column = 0
    i = 0
    for i, line in enumerate( file( in_fname ) ):
        valid = True
        line = line.rstrip( '\r\n' )
        if line and not line.startswith( '#' ): 
            row = []
            fields = line.split( "\t" )
            for column in columns:
                try:
                    val = fields[column]
                    if val.lower() == "na": 
                        row.append( float( "nan" ) )
                    else:
                        row.append( float( fields[column] ) )
                except:
                    valid = False
                    skipped_lines += 1
                    if not first_invalid_line:
                        first_invalid_line = i + 1
                        try:
                            invalid_value = fields[column]
                        except:
                            invalid_value = ''
                        invalid_column = column + 1
                    break
        else:
            valid = False
            skipped_lines += 1
            if not first_invalid_line:
                first_invalid_line = i+1

        if valid:
            matrix.append( row )

    if skipped_lines < i:
        try:
            r.pdf( out_fname, 8, 8 )
            r.plot( array(matrix), type="p", main=title, xlab=xlab, ylab=ylab, col="blue", pch=19 )
            r['dev.off']()
        except Exception, exc:
            stop_err( "%s" %str( exc ) )
Exemplo n.º 2
0
def plotIDR(output_file, input_prefixes):
    '''create IDR plots.

    This code is taken from the R script

    batch-consistency-plot.r

    within the IDR package.
    '''

    dirname = os.path.dirname(__file__)
    R.source(os.path.join(dirname, "WrapperIDR.r"))

    R('''df.txt = 10''')

    R('''uri.list <- list()
         uri.list.match <- list()
         ez.list <- list()
         legend.txt <- c()
         em.output.list <- list()
         uri.output.list <- list()''')

    npair = len(input_prefixes)
    for x, input_prefix in enumerate(input_prefixes):

        R.load(input_prefix + "-uri.sav")
        R.load(input_prefix + "-em.sav")
        i = x + 1

        R('''uri.output.list[[%(i)i]] <- uri.output;
              em.output.list[[%(i)i]] <- em.output;
              # reverse =T for error rate;''' % locals())
        R('''
              ez.list[[%(i)i]] <- get.ez.tt.all(em.output, uri.output.list[[%(i)i]]$data12.enrich$merge1,
                                        uri.output.list[[%(i)i]]$data12.enrich$merge2);'''
          % locals())
        R('''
              # URI for all peaks
              uri.list[[%(i)i]] <- uri.output$uri.n;

              # URI for matched peaks
              uri.match <- get.uri.matched(em.output$data.pruned, df=df.txt);
              uri.list.match[[%(i)i]] <- uri.match$uri.n;
         ''' % locals())

        legend = "%(i)i = %(input_prefix)s" % locals()
        R('''
              legend.txt[%(i)i] <- '%(legend)s';
        ''' % locals())

    R.pdf(output_file)
    R('''par(mfcol=c(2,3), mar=c(5,6,4,2)+0.1)''')
    R('''plot.uri.group(uri.list, NULL, file.name=NULL, c(1:%(npair)i), title.txt="all peaks");
         plot.uri.group(uri.list.match, NULL, file.name=NULL, c(1:%(npair)i), title.txt="matched peaks");
         plot.ez.group(ez.list, plot.dir=NULL, file.name=NULL, legend.txt=c(1:%(npair)i), y.lim=c(0, 0.6));
         plot(0, 1, type="n", xlim=c(0,1), ylim=c(0,1), xlab="", ylab="", xaxt="n", yaxt="n"); 
         legend(0, 1, legend.txt, cex=0.6);''' % locals())
    R["dev.off"]()
Exemplo n.º 3
0
def plotIDR( output_file, input_prefixes ):
    '''create IDR plots.

    This code is taken from the R script

    batch-consistency-plot.r

    within the IDR package.
    '''

    dirname = os.path.dirname(__file__)
    R.source(os.path.join( dirname, "WrapperIDR.r"))

    R('''df.txt = 10''')    

    R('''uri.list <- list()
         uri.list.match <- list()
         ez.list <- list()
         legend.txt <- c()
         em.output.list <- list()
         uri.output.list <- list()''')

    npair = len(input_prefixes)
    for x, input_prefix in enumerate(input_prefixes):

        R.load( input_prefix + "-uri.sav" )
        R.load( input_prefix + "-em.sav" )
        i = x + 1

        R( '''uri.output.list[[%(i)i]] <- uri.output;
              em.output.list[[%(i)i]] <- em.output;
              # reverse =T for error rate;''' % locals())
        R('''
              ez.list[[%(i)i]] <- get.ez.tt.all(em.output, uri.output.list[[%(i)i]]$data12.enrich$merge1,
                                        uri.output.list[[%(i)i]]$data12.enrich$merge2);''' % locals())
        R('''
              # URI for all peaks
              uri.list[[%(i)i]] <- uri.output$uri.n;

              # URI for matched peaks
              uri.match <- get.uri.matched(em.output$data.pruned, df=df.txt);
              uri.list.match[[%(i)i]] <- uri.match$uri.n;
         ''' % locals() )

        legend = "%(i)i = %(input_prefix)s" % locals()
        R('''
              legend.txt[%(i)i] <- '%(legend)s';
        '''% locals())
        
    R.pdf( output_file )
    R('''par(mfcol=c(2,3), mar=c(5,6,4,2)+0.1)''')
    R('''plot.uri.group(uri.list, NULL, file.name=NULL, c(1:%(npair)i), title.txt="all peaks");
         plot.uri.group(uri.list.match, NULL, file.name=NULL, c(1:%(npair)i), title.txt="matched peaks");
         plot.ez.group(ez.list, plot.dir=NULL, file.name=NULL, legend.txt=c(1:%(npair)i), y.lim=c(0, 0.6));
         plot(0, 1, type="n", xlim=c(0,1), ylim=c(0,1), xlab="", ylab="", xaxt="n", yaxt="n"); 
         legend(0, 1, legend.txt, cex=0.6);''' % locals())
    R["dev.off"]()
Exemplo n.º 4
0
    def run(self):
        # open(self.outpaths(final=False)["visualization"], "w")

        edges = load_edges(self.options)
        cluster = edges.loc[edges["cluster"] == self.cluster]
        # breakpoints = get_cluster_breakpoints(self.options, self.cluster)

        from rpy2.robjects import r
        r.pdf(self.outpaths(final=False)["visualization"])

        for sample, dataset in sorted(self.options.iter_10xdatasets()):
            graphing.plot_frags(cluster, self.options, sample, dataset)
            for i, row in cluster.iterrows():
                plot_triangles(row, self.options)
        # print "::", breakpoints
        # graphing.visualize_frag_cluster(breakpoints, self.options)

        r["dev.off"]()
Exemplo n.º 5
0
def visualize_frags(outdir, graphs, options):
    from rpy2.robjects import r

    utilities.ensure_dir(outdir)

    for i, graph in enumerate(graphs):
        r.pdf(os.path.join(outdir, "fragments.cluster_{}.pdf".format(i)))

        for component in networkx.connected_components(graph):
            subgraph = graph.subgraph(component)
            
            ends = [node for node,degree in subgraph.degree_iter() if degree==1]
            breakends = [node for node in list(networkx.shortest_simple_paths(subgraph, ends[0], ends[1]))[0]]
            # breakends = [breakend_from_label(node) for node in breakends]
            breakends = breakends[:-1:2] + breakends[-1:]
            # print ")"*100, breakends

            for sample, dataset in sorted(options.iter_10xdatasets()):
                plot_frags(breakends, options, sample, dataset)
        # plot_frags(breakpoints, options, sample, dataset)
        r["dev.off"]()
Exemplo n.º 6
0
    def val(self):
        """ Estimate value functions with b-splines and compare """
        new_data = pd.DataFrame({'OverallRank': np.linspace(1, 194, 1000)})
        fit_a = self.spline_est(self.policy_a['value'], new_data)
        fit_b = self.spline_est(self.policy_b['value'], new_data)

        r.pdf(os.path.join(os.path.dirname(self.out_dir), 'value.pdf'))
        r.plot(new_data['OverallRank'], fit_a, type='l', xlab='Rank_M',
               ylab='V(Rank)')
        r.lines(new_data['OverallRank'], fit_b, col='red')
        r.points(self.policy_a['value']['OverallRank'],
                 self.policy_a['value']['val'],
                 col='black')
        r.points(self.policy_b['value']['OverallRank'],
                 self.policy_b['value']['val'],
                 col='red')
        r.legend('topright', np.array(['No Info', 'Info']),
                 lty=np.array([1, 1]), col=np.array(['black', 'red']))
        r('dev.off()')

        diff = np.array(fit_b) - np.array(fit_a)
        r.pdf(os.path.join(os.path.dirname(self.out_dir), 'value_diff.pdf'))
        r.plot(new_data['OverallRank'], diff, type='l', xlab='Rank',
               ylab='V(Rank|info=1) - V(Rank|info=0)')
        r.abline(h=0, lty=2)
        r('dev.off()')

        diff = (np.array(fit_b) - np.array(fit_a)) / np.array(fit_a)
        r.pdf(os.path.join(os.path.dirname(self.out_dir),
                           'value_percent_diff.pdf'))
        r.plot(new_data['OverallRank'], diff, type='l', xlab='Rank',
               ylab='(V(Rank|info=1) - V(Rank|info=0)) / V(Rank|info=0)')
        r.abline(h=0, lty=2)
        r('dev.off()')

        data_path = dirname(dirname(__file__))
        data_path = join(data_path, 'data', 'lawData.csv')
        data = pd.read_csv(data_path)
        new_data = deepcopy(data.loc[data['year'] == 2013, 'OverallRank'])
        #new_data = np.concatenate((
        #    new_data, np.zeros(lc.N_SCHOOLS - len(new_data))
        #))
        new_data = pd.DataFrame({'OverallRank': np.array(new_data)})
        fit_a = self.spline_est(self.policy_a['value'], new_data)
        fit_b = self.spline_est(self.policy_b['value'], new_data)
        diff = np.sum(np.array(fit_b) - np.array(fit_a))
        pdiff = diff / np.sum(fit_a)
        print("      - Change in Producer Surplus: {0}".format(diff))
        print("      - Percent change in Producer Surplus: {0}".format(pdiff))
        return diff
Exemplo n.º 7
0
    #r.barplot(py2ri(qc_df.num_reads))
#    r.plot(py2ri(qc_df))


if __name__ == "__main__":
    print "Plotting"

    from rpy2.robjects.lib import ggplot2
    from rpy2.robjects import r
    from rpy2.robjects.packages import importr

    scales = importr('scales')

    iris = r('iris')

    r.pdf("/home/yarden/jaen/Musashi/rtest.pdf")

    iris_py = pandas.read_csv("/home/yarden/iris.csv")
    iris_py = iris_py.rename(columns={"Name": "Species"})
    corrs = []
    from scipy.stats import spearmanr
    for species in set(iris_py.Species):
        entries = iris_py[iris_py["Species"] == species]
        c = spearmanr(entries["SepalLength"], entries["SepalWidth"])
        print "c: ", c

    # compute r.cor(x, y) and divide up by Species
    # Assume we get a vector of length Species saying what the
    # correlation is for each Species' Petal Length/Width
    p = ggplot2.ggplot(iris) + \
        ggplot2.geom_point(ggplot2.aes_string(x="Sepal.Length", y="Sepal.Width")) + \