Python DataFrame.from_csvfile示例，rpy2.robjects.vectors.DataFrame.from_csvfile Python示例

示例#1

0

显示文件

文件： runR.py 项目： talonsensei/Bfx_scripts

def createGraphSeries(cohort):
   hm = ['bothR5','bothDX','esR5','esDX','trofileR5','trofileDX','mixedR5DX']
   stat_data = DataFrame.from_csvfile(cohort+'.seq.out', sep = "\t")
   
   # Begin witing graphs to pdf
   grdevices.pdf(file=cohort+".stats.pdf",width=7,height=7)
   
   # graph 1 scatter plot
   scatterPlot(stat_data,'bothR5','bothDX',cohort.upper()+' cohort\n bothR5/bothDX correlation')
   scatterPlot(stat_data,'esR5','esDX',cohort.upper()+' cohort\n esR5/esDX correlation')
   scatterPlot(stat_data,'trofileR5','trofileDX',cohort.upper()+' cohort\n trofileR5/trofileDX correlation')
   scatterPlot(stat_data,'bothR5','esR5',cohort.upper()+' cohort\n bothR5/esR5 correlation')
   scatterPlot(stat_data,'trofileR5','esR5',cohort.upper()+' cohort\n trofileR5/esR5 correlation')
   scatterPlot(stat_data,'bothDX','esDX',cohort.upper()+' cohort\n bothDX/esDX correlation')
   scatterPlot(stat_data,'trofileDX','esDX',cohort.upper()+' cohort\n trofileDX/esDX correlation')
   scatterPlot(stat_data,'bothDX','mixedR5DX',cohort.upper()+' cohort\n bothDX/mixedR5DX correlation')

   for hmm in hm:
      hmm_scores = DataFrame.from_csvfile(cohort+'.seq.'+hmm+'.stats',sep="\t")
      # graph 3
      plot(hmm_scores.rx2("Cutoff"),hmm_scores.rx2("Accuracy"),type='o',main='Accuracy vs. Cutoff '+cohort.upper()+' Cohort\n'+hmm+'.hmm',xlab='Cutoff',ylab='Accuracy')
      # graph 4
      plot(hmm_scores.rx2("FPP"),hmm_scores.rx2("TPP"),type='o',xlim=base.c(0,1),ylim=base.c(0,1),main=cohort.upper()+' Cohort ROC\n'+hmm+'.hmm',xlab='% False Pos',ylab='% False Neg')
      # graph 5
      plot(hmm_scores.rx2("Cutoff"),hmm_scores.rx2("Phi"),type='o',xlim=base.c(50,100),ylim=base.c(0,0.75),main=cohort.upper()+' Cohort Association Coeff\n'+hmm+'.hmm',xlab='Cutoff',ylab='Association Coefficient')
   # close pdf file
   grdevices.dev_off()
   return

示例#2

0

显示文件

文件： processPatchesv4_Rpy1.py 项目： talonsensei/Bfx_scripts

def analyzeR():
    mypatchData = DataFrame.from_csvfile(inputfile,header=True, sep = "\t")
    
    ici = list(mypatchData.colnames).index('ic50_val')
    starti = ici + 1
    lasti = mypatchData.nrow
    myData = mypatchData[,ici:lasti]

示例#3

0

显示文件

文件： compute_adj.py 项目： MPBA/renette

    def loadfiles(self):
        """
        Load files into R environment
        """
        rcount = 0
        names = robjects.r['names']
        
        # Set the default parameter for reading from csv
        param = {'sep': '\t', 'header': True, 'as_is': True,
                 'row.names': ri.NULL}


        # Check the correct parameter and set the default        
        for p in param.keys():
            if p in self.param:
                if self.param[p] is not None:
                    param[p] = self.param[p]
        
        self.param.update(param)

        # Read all the files in the R-environment
        for f, s in zip(self.filelist, self.seplist):
            try:
                tmpdata = DataFrame.from_csvfile(f,
                                                 sep=str(s),
                                                 header=param['header'],
                                                 as_is=param['as_is'],
                                                 row_names=param['row.names'])
                self.mylist.append(tmpdata)
                fdir, fname = os.path.split(os.path.splitext(f)[0])
                self.listname.append(fname)
                rcount += 1
            except IOError, e:
                self.error += e

示例#4

0

显示文件

文件： adCluster.py 项目： tiagoantao/mega-analysis

def clusterPop(admix, indexes):
    if len(indexes)==1:
        return [indexes[0]]
    subf = "adm.%d" % (os.getpid(),)
    w = open(subf, "w")
    f = open(admix)
    fPos = 0
    oldOrder = []
    for l in f:
        if fPos in indexes:
            w.write(l)
            oldOrder.append(fPos)
        fPos += 1
    f.close()
    w.close()
    df = DataFrame.from_csvfile(subf, sep=" ", header=False)
    d=robjects.r.hclust(robjects.r.dist(df))
    for name, value in d.items():
        if name=="order":
            order = value
            break
    os.remove(subf)
    reOrder = []
    for pos in order:
        reOrder.append(oldOrder[pos-1])
    return reOrder

示例#5

0

显示文件

文件： rtools.py 项目： louhos/Vaaliraapija

 def hae_ennakkoilmoitukset(tiedosto):
     if not os.path.exists(tiedosto):
         tiedosto = os.path.join(DATADIR, tiedosto)
         if not os.path.exists(tiedosto):
             raise IOError("Annettua tiedostoa %s ei löydy" % tiedosto)
     return DataFrame.from_csvfile(tiedosto, header=True, sep=',', 
                                   as_is=True)

示例#6

0

显示文件

文件： adCluster.py 项目： tiagoantao/mega-analysis

def clusterAll(admix, myPop):
    f = open(admix)
    ls = f.readlines()
    f.close()
    numK = len(ls[0].split(" "))
    pops = {}
    for i in range(len(ls)):
        vals = [float(x) for x in ls[i].rstrip().split(" ")]
        cnt, accu = pops.get(myPop[i], (0, [0.0]*numK))
        cnt += 1
        for i2 in range(numK): accu[i2] += vals[i2]
        pops[myPop[i]] = cnt, accu
    popNames = list(pops.keys())
    popNames.sort()
    subf = "accu.%d" % (os.getpid(),)
    w = open(subf, "w")
    for popName in popNames:
        myVals = pops[popName][1]
        cnt = pops[popName][0]
        myVals = [x/cnt for x in myVals]
        w.write(" ".join([str(x) for x in myVals]))
        w.write("\n")
    w.close()
    df = DataFrame.from_csvfile(subf, sep=" ", header=False)
    d=robjects.r.hclust(robjects.r.dist(df, method="max"), method="complete")
    for name, value in d.items():
        if name=="order":
            order = value
            break
    os.remove(subf)
    popOrder = []
    for pos in order:
        popOrder.append(popNames[pos-1])
    return popOrder

示例#7

0

显示文件

文件： runR4.py 项目： talonsensei/Bfx_scripts

def createGraphSeries(cohort,t,sfiles,ofile):
   hm = ['bothR5','bothDX','esR5','esDX','trofileR5','trofileDX','mixedR5DX']
   
   # Begin witing graphs to pdf
   grdevices.pdf(file=cohort+".stats.pdf",width=7,height=7)
   
   if t == 'both' or t =='out':
      stat_data = DataFrame.from_csvfile(ofile, sep = "\t")
      # graph 1 scatter plot
      scatterPlot(stat_data,'bothR5','bothDX',cohort.upper()+' cohort\n bothR5/bothDX correlation')
      scatterPlot(stat_data,'esR5','esDX',cohort.upper()+' cohort\n esR5/esDX correlation')
      scatterPlot(stat_data,'trofileR5','trofileDX',cohort.upper()+' cohort\n trofileR5/trofileDX correlation')
      scatterPlot(stat_data,'bothR5','esR5',cohort.upper()+' cohort\n bothR5/esR5 correlation')
      scatterPlot(stat_data,'trofileR5','esR5',cohort.upper()+' cohort\n trofileR5/esR5 correlation')
      scatterPlot(stat_data,'bothDX','esDX',cohort.upper()+' cohort\n bothDX/esDX correlation')
      scatterPlot(stat_data,'trofileDX','esDX',cohort.upper()+' cohort\n trofileDX/esDX correlation')
      scatterPlot(stat_data,'bothDX','mixedR5DX',cohort.upper()+' cohort\n bothDX/mixedR5DX correlation')

   if t == 'both' or t =='stats':
      for hmm in hm:
         if sfiles.has_key(hmm):
            f = sfiles[hmm]
            hmm_scores = DataFrame.from_csvfile(f,sep="\t")
            # graph 3
            plot(hmm_scores.rx2("Cutoff"),hmm_scores.rx2("Accuracy"),type='o',main='Accuracy vs. Cutoff '+cohort.upper()+' Cohort\n'+hmm+'.hmm',xlab='Cutoff',ylab='Accuracy')
            # graph 4
            plot(hmm_scores.rx2("FPP"),hmm_scores.rx2("TPP"),type='o',xlim=base.c(0,1),ylim=base.c(0,1),main=cohort.upper()+' Cohort ROC\n'+hmm+'.hmm',xlab='% False Pos',ylab='% False Neg')
            # graph 5
            plot(hmm_scores.rx2("Cutoff"),hmm_scores.rx2("Phi"),type='o',xlim=base.c(50,100),ylim=base.c(0,0.75),main=cohort.upper()+' Cohort Association Coeff\n'+hmm+'.hmm',xlab='Cutoff',ylab='Association Coefficient')
            # graph 6
            plot(hmm_scores.rx2("Cutoff"),hmm_scores.rx2("Specificity"),type='o',main='Specificity vs. Cutoff '+cohort.upper()+' Cohort\n'+hmm+'.hmm',xlab='Cutoff',ylab='Specificity')
            # graph 7
            plot(hmm_scores.rx2("Cutoff"),hmm_scores.rx2("Sensitivity"),type='o',main='Sensitivity vs. Cutoff '+cohort.upper()+' Cohort\n'+hmm+'.hmm',xlab='Cutoff',ylab='Sensitivity')
            # graph 8
            plot(hmm_scores.rx2("Sensitivity"),hmm_scores.rx2("Specificity"),type='o',main='Sensitivity vs. Specificity '+cohort.upper()+' Cohort\n'+hmm+'.hmm',xlab='Sensitivity',ylab='Specificity')
   
   # close pdf file
   grdevices.dev_off()
   return

示例#8

0

显示文件

文件： compute_stats.py 项目： MPBA/renette

    def loadfiles(self):
        """
        Load files into R environment
        """
        rcount = 0
        asmatrix = robjects.r['as.matrix']
        diag = robjects.r['diag']
        names = robjects.r['names']
        
        ## Set the default parameter for reading from csv
        param = {'header': True, 'as_is': True, 'row.names': ri.RNULLArg}
        ## Check the correct parameter and set the default
        for p in param.keys():
            if p in self.param:
                if self.param[p] is not None:
                    param[p] = self.param[p]
        for f, s in zip(self.filelist, self.seplist):
            try:
                dataf = DataFrame.from_csvfile(f,
                                               sep=str(s),
                                               header=param['header'],
                                               as_is=param['as_is'],
                                               row_names=param['row.names'])

                dataf = asmatrix(dataf)

                # Should be the diagonal set to 0?
                # Do it for all the inputs, just to be sure
                zcount = 0
                for i in xrange(dataf.ncol):
                    if (dataf.rx(i+1,i+1)[0] - 0.0 >= 1e-8):
                        zcount += 1
                        dataf.rx[i+1,i+1] = 0

                if zcount:
                    self.e += f
                    
                self.mylist.append(dataf)
                fdir, fname = os.path.split(os.path.splitext(f)[0])
                self.listname.append(fname)

                rcount += 1
            except IOError, e:
                self.error += e
            
            except RRuntimeError, e:
                self.error += e

示例#9

0

显示文件

文件： Regression.py 项目： Neil-Liang/FintechHack

def stepwise_regression(data, d_v, i_vs):
#    __file = tempfile.NamedTemporaryFile(delete=False)
    __file = open('/home/foodfan/haha','wb')
    __file.writelines(data)
    stats = importr('stats')
    pat = '%s~%s' % (d_v, '+'.join(i_vs))
    print pat
#    return None
    __file.close()
    data_from_input = DataFrame.from_csvfile(__file.name)
    reg = stats.lm(pat, data_from_input)
    st = stats.step(reg, direction = 'backward')
    ret = str(st[0])
#    print '------------------------------------------------------------'
#    
#    print '------------------------------------------------------------'
    return ret

示例#10

0

显示文件

文件： rutils.py 项目： MPBA/renette

def csv2graph(csvfiles, seplist=[], param={},filepath='.', graph_format='gml'):
    """
    Utility to convert from csv file to igraph format file
    """
    
    igraph = importr('igraph')
    gadj = igraph.graph_adjacency
    wgraph = igraph.write_graph
    
    if len(seplist) != len(csvfiles):
        raise IOError('Not enought separators')
        
    for i,f in enumerate(csvfiles):
        myfname = f + ".%s" % format
        tmpdata = DataFrame.from_csvfile(f,
                                         sep=seplist[i],
                                         header=param['header'] if param.has_key('header') else True,
                                         as_is=True,
                                         row_names=param['row.names'] if param.has_key('row_names') else False)
        g = gadj(reslist, mode='undirected', weighted=True)
        wgraph(g, file=os.path.join(filepath,myfname), format=format)
        
    return True

示例#11

0

显示文件

文件： r.py 项目： jerome-white/sakai-book

 def from_csv(cls, data):
     return cls(DataFrame.from_csvfile(str(data)))

示例#12

0

显示文件

文件： gui.py 项目： kenziD/A-Wonderful-Life

def show4():
	open4()
	r.source('D:/Postgraduate/Course/2-semester/R-language/TimeAnalyze/Programe/R/end.R',encoding="utf-8")
	data = DataFrame.from_csvfile('D:/Postgraduate/Course/2-semester/R-language/TimeAnalyze/Programe/temp/project2.csv')
	pp = ggplot2.ggplot(data)+ggplot2.aes_string(x='day', y='time',fill = 'factor(project)')+ggplot2.geom_bar(stat ='identity',position = 'dodge')+ggplot2.ggtitle("两项目时间对比图")+ggplot2.labs(x='日期',y='时间 (min)')+ggplot2.theme(**{'axis.text.x': ggplot2.element_text(angle = 45)})
	pp.plot()

示例#13

0

显示文件

文件： rtools.py 项目： louhos/Vaaliraapija

 def hae_ehdokkaat():
     in_tiedosto = os.path.join(DATADIR, 'e2011ehd.csv')
     return DataFrame.from_csvfile(in_tiedosto, header=True, sep='\t', 
                                   as_is=True)

示例#14

0

显示文件

文件： gsummary.py 项目： stemcellcommons/galaxytools

def main():
    try:
        datafile = sys.argv[1]
        outfile_name = sys.argv[2]
        expression = sys.argv[3]
    except:
        stop_err('Usage: python gsummary.py input_file ouput_file expression')

    math_allowed = S3_METHODS()['Math']
    ops_allowed = S3_METHODS()['Ops']

    # Check for invalid expressions
    for word in re.compile('[a-zA-Z]+').findall(expression):
        if word and not word in math_allowed:
            stop_err(
                "Invalid expression '%s': term '%s' is not recognized or allowed"
                % (expression, word))
    symbols = set()
    for symbol in re.compile('[^a-z0-9\s]+').findall(expression):
        if symbol and not symbol in ops_allowed:
            stop_err(
                "Invalid expression '%s': operator '%s' is not recognized or allowed"
                % (expression, symbol))
        else:
            symbols.add(symbol)
    if len(symbols) == 1 and ',' in symbols:
        # User may have entered a comma-separated list r_data_frame columns
        stop_err(
            "Invalid columns '%s': this tool requires a single column or expression"
            % expression)

    # Find all column references in the expression
    cols = []
    for col in re.compile('c[0-9]+').findall(expression):
        try:
            cols.append(int(col[1:]) - 1)
        except:
            pass

    tmp_file = tempfile.NamedTemporaryFile('w+b')
    # Write the R header row to the temporary file
    hdr_str = "\t".join("c%s" % str(col + 1) for col in cols)
    tmp_file.write("%s\n" % hdr_str)
    skipped_lines = 0
    first_invalid_line = 0
    i = 0
    for i, line in enumerate(file(datafile)):
        line = line.rstrip('\r\n')
        if line and not line.startswith('#'):
            valid = True
            fields = line.split('\t')
            # Write the R data row to the temporary file
            for col in cols:
                try:
                    float(fields[col])
                except:
                    skipped_lines += 1
                    if not first_invalid_line:
                        first_invalid_line = i + 1
                    valid = False
                    break
            if valid:
                data_str = "\t".join(fields[col] for col in cols)
                tmp_file.write("%s\n" % data_str)
    tmp_file.flush()

    if skipped_lines == i + 1:
        stop_err(
            "Invalid column or column data values invalid for computation.  See tool tips and syntax for data requirements."
        )
    else:
        # summary function and return labels
        summary_func = r(
            "function( x ) { c( sum=sum( as.numeric( x ), na.rm=T ), mean=mean( as.numeric( x ), na.rm=T ), stdev=sd( as.numeric( x ), na.rm=T ), quantile( as.numeric( x ), na.rm=TRUE ) ) }"
        )
        headings = ['sum', 'mean', 'stdev', '0%', '25%', '50%', '75%', '100%']
        headings_str = "\t".join(headings)

        #r.set_default_mode( NO_CONVERSION )
        #r_data_frame = r.read_table( tmp_file.name, header=True, sep="\t" )
        r_data_frame = DataFrame.from_csvfile(tmp_file.name,
                                              header=True,
                                              sep="\t")

        outfile = open(outfile_name, 'w')

        for col in re.compile('c[0-9]+').findall(expression):
            r.assign(col, r["$"](r_data_frame, col))
        try:
            summary = summary_func(r(expression))
        except RException, s:
            outfile.close()
            stop_err("Computation resulted in the following error: %s" %
                     str(s))
        #summary = summary.as_py( BASIC_CONVERSION )
        outfile.write("#%s\n" % headings_str)
        print summary
        print summary.r_repr()
        outfile.write(
            "%s\n" % "\t".join(["%g" % (summary.rx2(k)[0]) for k in headings]))
        outfile.close()

        if skipped_lines:
            print "Skipped %d invalid lines beginning with line #%d.  See tool tips for data requirements." % (
                skipped_lines, first_invalid_line)

示例#15

0

显示文件

文件： gsummary.py 项目： Ahsanzia/galaxytools

def main():
    try:
        datafile = sys.argv[1]
        outfile_name = sys.argv[2]
        expression = sys.argv[3]
    except: 
        stop_err( 'Usage: python gsummary.py input_file ouput_file expression' )

    math_allowed = S3_METHODS()[ 'Math' ]
    ops_allowed = S3_METHODS()[ 'Ops' ]

    # Check for invalid expressions
    for word in re.compile( '[a-zA-Z]+' ).findall( expression ):
        if word and not word in math_allowed: 
            stop_err( "Invalid expression '%s': term '%s' is not recognized or allowed" %( expression, word ) )
    symbols = set()
    for symbol in re.compile( '[^a-z0-9\s]+' ).findall( expression ):
        if symbol and not symbol in ops_allowed:
            stop_err( "Invalid expression '%s': operator '%s' is not recognized or allowed" % ( expression, symbol ) )
        else:
            symbols.add( symbol )
    if len( symbols ) == 1 and ',' in symbols:
        # User may have entered a comma-separated list r_data_frame columns
        stop_err( "Invalid columns '%s': this tool requires a single column or expression" % expression )

    # Find all column references in the expression
    cols = []
    for col in re.compile( 'c[0-9]+' ).findall( expression ):
        try:
            cols.append( int( col[1:] ) - 1 )
        except:
            pass
 
    tmp_file = tempfile.NamedTemporaryFile( 'w+b' )
    # Write the R header row to the temporary file
    hdr_str = "\t".join( "c%s" % str( col+1 ) for col in cols )
    tmp_file.write( "%s\n" % hdr_str )
    skipped_lines = 0
    first_invalid_line = 0
    i = 0
    for i, line in enumerate( file( datafile ) ):
        line = line.rstrip( '\r\n' )
        if line and not line.startswith( '#' ):
            valid = True
            fields = line.split( '\t' )
            # Write the R data row to the temporary file
            for col in cols:
                try:
                    float( fields[ col ] )
                except:
                    skipped_lines += 1
                    if not first_invalid_line:
                        first_invalid_line = i + 1
                    valid = False
                    break
            if valid:
                data_str = "\t".join( fields[ col ] for col in cols )
                tmp_file.write( "%s\n" % data_str )
    tmp_file.flush()

    if skipped_lines == i + 1:
        stop_err( "Invalid column or column data values invalid for computation.  See tool tips and syntax for data requirements." )
    else:
        # summary function and return labels
        summary_func = r( "function( x ) { c( sum=sum( as.numeric( x ), na.rm=T ), mean=mean( as.numeric( x ), na.rm=T ), stdev=sd( as.numeric( x ), na.rm=T ), quantile( as.numeric( x ), na.rm=TRUE ) ) }" )
        headings = [ 'sum', 'mean', 'stdev', '0%', '25%', '50%', '75%', '100%' ]
        headings_str = "\t".join( headings )
        
        #r.set_default_mode( NO_CONVERSION )
        #r_data_frame = r.read_table( tmp_file.name, header=True, sep="\t" )
        r_data_frame = DataFrame.from_csvfile( tmp_file.name, header=True, sep="\t" )
        
        outfile = open( outfile_name, 'w' )

        for col in re.compile( 'c[0-9]+' ).findall( expression ):
            r.assign( col, r[ "$" ]( r_data_frame, col ) )
        try:
            summary = summary_func( r( expression ) )
        except RException, s:
            outfile.close()
            stop_err( "Computation resulted in the following error: %s" % str( s ) )
        #summary = summary.as_py( BASIC_CONVERSION )
        outfile.write( "#%s\n" % headings_str )
        print summary
        print summary.r_repr()
        outfile.write( "%s\n" % "\t".join( [ "%g" % ( summary.rx2( k )[0] ) for k in headings ] ) )
        outfile.close()

        if skipped_lines:
            print "Skipped %d invalid lines beginning with line #%d.  See tool tips for data requirements." % ( skipped_lines, first_invalid_line )

示例#16

0

显示文件

文件： correspondence_analysis_rpy2.py 项目： fw1121/galaxy_tools

distEisen = robjects.r('''
                       distEisen <- function(x, use = "pairwise.complete.obs") {
                       co.x <- cor(x, use = use)
                       dist.co.x <- 1 - co.x
                       return(as.dist(dist.co.x))
                       }
                       ''')

listToDF = robjects.r('''
           listToDF <- function(inputList, fill = NA){
               # Use fill = NULL for regular recycling behavior
               maxLen = max(sapply(inputList, length))
               for(i in seq_along(inputList))
                   inputList[[i]] <- c(inputList[[i]], rep(fill, maxLen -
           length(inputList[[i]])))
               return(as.data.frame(inputList))
           }
           ''')
annotations = DataFrame.from_csvfile(annotation_classes_input_file,
                                     header=True,
                                     sep='\t',
                                     quote='"',
                                     row_names=1)


R = robjects.r
R["library"]("utils")
R["library"]("tools")

示例#17

0

显示文件

文件： gui.py 项目： kenziD/A-Wonderful-Life

def show1():
	open1()
	r.source('D:/Postgraduate/Course/2-semester/R-language/TimeAnalyze/Programe/R/head1.r',encoding="utf-8")
	data = DataFrame.from_csvfile('D:/Postgraduate/Course/2-semester/R-language/TimeAnalyze/Programe/temp/day1.csv')
	pp = ggplot2.ggplot(data)+ggplot2.aes_string(x='project', y='time',fill = 'project')+ggplot2.geom_bar(stat ='identity')+ggplot2.ggtitle("今日项目时间分布图")+ggplot2.labs(x='项目',y='时间 (min)')+ggplot2.theme(**{'axis.text.x': ggplot2.element_text(angle = 45)})
	pp.plot()

示例#18

0

显示文件

文件： dexcom_charts.py 项目： bewest/iPancreas

    def __init__(self, dex_name):

        self.dexcom_data = DataFrame.from_csvfile(dex_name)

示例#19

0

显示文件

文件： foundislengthhistogram.py 项目： TheodenS/ISsuite

import rpy2.robjects.pandas2ri
from rpy2.robjects.vectors import DataFrame
import math
import datetime

parser = argparse.ArgumentParser()


parser.add_argument("-in_csv", help="")
parser.add_argument("-out", help="")

args=parser.parse_args()



dataf = DataFrame.from_csvfile(args.in_csv, sep = ",",header=True)

# Get statistics for investigated seqs
rmean = robjects.r['mean']
rmed = robjects.r['median']
rmax = robjects.r['max']
rsd = robjects.r['sd']
rsum = robjects.r['sum']

ma=rmax(dataf.rx('hitlen'))

as_vec = robjects.r['as.vector']
as_num = robjects.r['as.numeric']
as_mat = robjects.r['as.matrix']

#test22=as_vec(dataf.rx('Length'))

示例#20

0

显示文件

文件： test_rpy.py 项目： Neil-Liang/FintechHack

##    print pat
##    return None
#    __file.close()
#    data_from_input = DataFrame.from_csvfile(__file.name)
#    reg = stats.lm(pat, data_from_input)
#    st = stats.step(reg, direction = 'backward')
##    ret = ''
##    for key, value in st.iteritems():
##        ret += key + ',' + str(value) + '\n'
##    return ret
#    return ''

# stepwise_regression()
d_v = "inflat"
i_vs = ["money", "output", "initial", "poprate", "inv", "school"]
data_from_input = DataFrame.from_csvfile("/home/foodfan/money.csv")
stats = importr("stats")
pat = "%s~%s" % (d_v, "+".join(i_vs))
#    print pat
#    return None
reg = stats.lm(pat, data_from_input)
st = stats.step(reg, direction="backward")
ret = ""
print "------------------------------------"

print str(st[0])
# print st[1]

# for key, value in st.iteritems():
#    ret += key + ',' + str(value) + '\n'
print ret

示例#21

0

显示文件

文件： make_dna_sbjct_histo.py 项目： TheodenS/ISsuite

from rpy2.robjects.vectors import DataFrame
import math
import datetime

parser = argparse.ArgumentParser()


parser.add_argument("-in_csv", help="")
parser.add_argument("-out", help="")

args=parser.parse_args()

infile="/Users/security/science/bigoutput.csv"


dataf = DataFrame.from_csvfile(infile, sep = ",",header=True)

# Get statistics for investigated seqs
#rmean = robjects.r['mean']
#rmed = robjects.r['median']
#rmax = robjects.r['max']
#rsd = robjects.r['sd']
#rsum = robjects.r['sum']
#
#ma=rmax(dataf.rx('Length'))
#
#as_vec = robjects.r['as.vector']
#as_num = robjects.r['as.numeric']
#as_mat = robjects.r['as.matrix']
#
#test22=as_vec(dataf.rx('Length'))