def write_html_report(self): """ write the report as html """ out_folder = self.opts.outdir my_template = Template( filename=os.path.join(self.tool_folder, self.__class__.HTML_REPORT_TEMPLATE), strict_undefined=True) if os.path.exists(os.path.abspath(out_folder)): files = [os.path.join(out_folder, x) for x in os.listdir(out_folder) if not x.startswith('.')] files.sort(key=lambda f: os.path.getmtime(f)) file_info = [(os.path.split(f)[-1], getFileString(os.path.split(f)[-1], self.opts.outdir)) for f in files] else: file_info = [] template_parameters = { 'program_name': str(self.program_name), 'timestamp' : str(timenow()), 'file_info': file_info, 'log_data' : open(self.tlogname).read().replace('\n', '\n<BR />'), 'command_string': str(self.cl) } f = open(self.opts.htmlout, 'w') f.write(my_template.render(**template_parameters)) f.close()
def write_html_report(self): """ write the report as html """ out_folder = self.opts.outdir my_template = Template(filename=os.path.join( self.tool_folder, self.__class__.HTML_REPORT_TEMPLATE), strict_undefined=True) if os.path.exists(os.path.abspath(out_folder)): files = [ os.path.join(out_folder, x) for x in os.listdir(out_folder) if not x.startswith('.') ] files.sort(key=lambda f: os.path.getmtime(f)) file_info = [ (os.path.split(f)[-1], getFileString(os.path.split(f)[-1], self.opts.outdir)) for f in files ] else: file_info = [] template_parameters = { 'program_name': str(self.program_name), 'timestamp': str(timenow()), 'file_info': file_info, 'log_data': open(self.tlogname).read().replace('\n', '\n<BR />'), 'command_string': str(self.cl) } f = open(self.opts.htmlout, 'w') f.write(my_template.render(**template_parameters)) f.close()
def makehtml(self): """ write the report as html """ logdat = open(self.tlogname,'r').readlines() res = [] res.append(galhtmlprefix % progname) res.append(galhtmlattr % (progname,timenow())) res.append('<b>Your job produced the following outputs - check here for a record of what was done and any unexpected events</b><hr/>') try: flist = os.listdir(self.opts.outdir) except: flist = [] if len(flist) > 0: # show what's left flist = [x for x in flist if not (x.startswith('.') or x == 'None')] tlist = [(os.path.getmtime(os.path.join(self.opts.outdir,x)),x) for x in flist] tlist.sort() flist = [x[1] for x in tlist] res.append('<div><b>Output files.</b><hr/>\n') res.append('<table>\n') for i,f in enumerate(flist): fn = os.path.split(f)[-1] fs = getFileString(fn,self.opts.outdir) res.append('<tr><td><a href="%s">%s</a></td></tr>\n' % (fn,fs)) res.append('</table></div>\n') res.append('<b>Log of activity</b><hr/>\n') res.append('\n%s' % '<br/>'.join(logdat)) res.append('<hr/>Note: The freely available <a href="http://picard.sourceforge.net/command-line-overview.shtml">Picard software</a> \n') res.append('generated all outputs reported here. These third party tools were') res.append('orchestrated by the Galaxy rgEstLibComplexity wrapper and this command line from the Galaxy form:<br/>\n%s' % (self.cl)) res.append(galhtmlpostfix) f = open(self.opts.htmlout,'w') f.write('\n'.join(res)) f.close()
def __init__(self, opts=None, cl=[], tidy=True): """ """ self.ourname = 'rgGATKRecal' self.opts = opts self.tidy = tidy self.cl = ' '.join(cl) # ready for the htmlfile output self.delme = [] killme = string.punctuation + string.whitespace trantab = string.maketrans(killme, '_' * len(killme)) self.title = self.opts.title.translate(trantab) self.tlogname = os.path.join( self.opts.outdir, '%s_rg%s_Log.txt' % (self.title, self.ourname)) self.tlog = open(self.tlogname, 'w') self.outtxt = '%s_%s_Out.txt' % (self.title, self.ourname) self.GATK_CVFlags = opts.GATK_CVflags self.Rscriptpath = whereis('Rscript') self.info = '%s on %s at %s' % (self.ourname, self.title, timenow()) if self.Rscriptpath == None: # GATK wants the explicit path to Rscript which comes with R now p = os.environ.get('PATH', '') self.tlog.write('### Cannot find %s on %s\n' % (program, p)) self.Rscriptpath = '/share/shared/lx26-amd64/bin/Rscript' self.pdfoutdir = os.path.join(self.opts.outdir, 'pdfplots') self.preplotprefix = 'rgPreRecal_' self.postplotprefix = 'rgPostRecal_' try: os.makedirs(self.pdfoutdir) except: self.tlog.write('## unable to create pdf output dir %s' % self.pdfoutdir) self.delme.append(self.pdfoutdir) self.runGATK() self.writehtml()
def __init__(self, opts=None, cl=[], tidy=True): """ """ self.ourname = "rgGATKRecal" self.opts = opts self.tidy = tidy self.cl = " ".join(cl) # ready for the htmlfile output self.delme = [] killme = string.punctuation + string.whitespace trantab = string.maketrans(killme, "_" * len(killme)) self.title = self.opts.title.translate(trantab) self.tlogname = os.path.join(self.opts.outdir, "%s_rg%s_Log.txt" % (self.title, self.ourname)) self.tlog = open(self.tlogname, "w") self.outtxt = "%s_%s_Out.txt" % (self.title, self.ourname) self.GATK_CVFlags = opts.GATK_CVflags self.Rscriptpath = whereis("Rscript") self.info = "%s on %s at %s" % (self.ourname, self.title, timenow()) if self.Rscriptpath == None: # GATK wants the explicit path to Rscript which comes with R now p = os.environ.get("PATH", "") self.tlog.write("### Cannot find %s on %s\n" % (program, p)) self.Rscriptpath = "/share/shared/lx26-amd64/bin/Rscript" self.pdfoutdir = os.path.join(self.opts.outdir, "pdfplots") self.preplotprefix = "rgPreRecal_" self.postplotprefix = "rgPostRecal_" try: os.makedirs(self.pdfoutdir) except: self.tlog.write("## unable to create pdf output dir %s" % self.pdfoutdir) self.delme.append(self.pdfoutdir) self.runGATK() self.writehtml()
def makehtml(self): """ write the report as html """ logdat = open(self.tlogname, 'r').readlines() res = [] res.append(galhtmlprefix % progname) res.append(galhtmlattr % (progname, timenow())) res.append( '<b>Your job produced the following outputs - check here for a record of what was done and any unexpected events</b><hr/>' ) imghref = '%s.jpg' % os.path.splitext(self.isPDF)[0] # removes .pdf res.append('<table cellpadding="10"><tr><td>\n') res.append( '<a href="%s"><img src="%s" alt="%s" hspace="10" align="middle"></a>\n' % (self.isPDF, imghref, imghref)) res.append('</tr><td></table>\n') try: flist = os.listdir(self.opts.outdir) except: flist = [] if len( flist ) > 0: # we should clean everything up - picard doesn't tell us what it did in cleansam unfortunately flist = [ x for x in flist if not (x.startswith('.') or x == 'None') ] tlist = [(os.path.getmtime(os.path.join(self.opts.outdir, x)), x) for x in flist] tlist.sort() flist = [x[1] for x in tlist] res.append('<div><b>Output files.</b><hr/>\n') res.append('<table>\n') for i, f in enumerate(flist): fn = os.path.split(f)[-1] fs = getFileString(fn, self.opts.outdir) res.append('<tr><td><a href="%s">%s</a></td></tr>\n' % (fn, fs)) res.append('</table></div>\n') res.append('<b>Log of activity</b><hr/>\n') res.append('\n%s' % '<br/>'.join(logdat)) res.append( '<hr/>Note: The freely available <a href="http://picard.sourceforge.net/command-line-overview.shtml">Picard software</a> \n' ) res.append( 'generated all outputs reported here. These third party tools were' ) res.append( 'orchestrated by the Galaxy rgInsertSize wrapper and this command line from the Galaxy form:<br/>\n%s' % (self.cl)) res.append(galhtmlpostfix) f = open(self.opts.htmlout, 'w') f.write('\n'.join(res)) f.close()
def writehtml(self): """ write the report as html """ logdat = open(self.tlogname,'r').readlines() res = [] res.append(galhtmlprefix % progname) res.append(galhtmlattr % (progname,timenow())) try: flist = os.listdir(self.opts.outdir) except: flist = [] if len(flist) > 0: # show what's left flist = [x for x in flist if not (x.startswith('.') or x == 'None')] pdfs = [x for x in flist if os.path.splitext(x)[-1].lower() == '.pdf'] tlist = [(os.path.getmtime(os.path.join(self.opts.outdir,x)),x) for x in flist] tlist.sort() flist = [x[1] for x in tlist] if len(pdfs) > 0: cells = [] pdfs.sort() res.append('<div><table cellpadding="5" cellspacing="10">\n') for p in pdfs: pfname = os.path.split(p)[-1] pfroot = os.path.splitext(pfname)[0] imghref = '%s.jpg' % pfroot # thumbnail name from mogrify cl = ['mogrify', '-resize x300 -write %s %s' % (imghref,pfname),] self.run(cl) s = '<a href="%s"><img src="%s" alt="%s" hspace="10" align="middle"></a>' % (pfname,imghref,pfname) cells.append('<td>%s</br>%s</td>' % (pfroot,s)) ncells = len(cells) for i in range(ncells): if i % 2 == 1: res.append('<tr>%s%s</tr>\n' % (cells[i-1],cells[i])) if ncells % 2 == 0: # last one res.append('<tr colspan="2">%s</tr>\n' % (cells[-1])) res.append('</table></div>\n') res.append('<div><b>Output files.</b><hr/>\n') res.append('<table>\n') for i,f in enumerate(flist): fn = os.path.split(f)[-1] fs = getFileString(fn,self.opts.outdir) res.append('<tr><td><a href="%s">%s</a></td></tr>\n' % (fn,fs)) res.append('</table></div>\n') res.append('<b>Your job produced the following log of activity - check here for a record of what was done and any unexpected events</b><hr/>') res.append('\n%s' % '<br/>'.join(logdat)) res.append('<hr/>Note: The freely available <a href="http://www.broadinstitute.org/gsa/wiki/index.php/Main_Page">GATK</a> \n') res.append('did all the work reportexampleBAM.bam ed here. GATK is an independent non-Galaxy community resource, whose third party tools were') res.append('orchestrated by the Galaxy rgGATKRecalibrate wrapper and this command line from the Galaxy form:<br/>\n%s' % (self.cl)) res.append(galhtmlpostfix) f = open(self.opts.htmlout,'w') f.write('\n'.join(res)) f.close()
def writehtml(self): """ write the report as html """ logdat = open(self.tlogname,'r').readlines() res = [] res.append(galhtmlprefix % progname) res.append(galhtmlattr % (progname,timenow())) try: flist = os.listdir(self.opts.outdir) except: flist = [] if len(flist) > 0: # show what's left flist = [x for x in flist if not (x.startswith('.') or x == 'None')] pdfs = [x for x in flist if os.path.splitext(x)[-1].lower() == '.pdf'] tlist = [(os.path.getmtime(os.path.join(self.opts.outdir,x)),x) for x in flist] tlist.sort() flist = [x[1] for x in tlist] if len(pdfs) > 0: cells = [] pdfs.sort() res.append('<div><table cellpadding="5" cellspacing="10">\n') for p in pdfs: pfname = os.path.split(p)[-1] pfroot = os.path.splitext(pfname)[0] imghref = '%s.jpg' % pfroot # thumbnail name from mogrify cl = ['mogrify', '-resize x300 -write %s %s' % (imghref,pfname),] self.run(cl) s = '<a href="%s"><img src="%s" title="%s" hspace="10" align="middle"></a>' % (pfname,imghref,pfname) cells.append('<td>%s</br>%s</td>' % (pfroot,s)) ncells = len(cells) for i in range(ncells): if i % 2 == 1: res.append('<tr>%s%s</tr>\n' % (cells[i-1],cells[i])) if ncells % 2 == 0: # last one res.append('<tr colspan="2">%s</tr>\n' % (cells[-1])) res.append('</table></div>\n') res.append('<div><b>Output files.</b><hr/>\n') res.append('<table>\n') for i,f in enumerate(flist): fn = os.path.split(f)[-1] fs = getFileString(fn,self.opts.outdir) res.append('<tr><td><a href="%s">%s</a></td></tr>\n' % (fn,fs)) res.append('</table></div>\n') res.append('<b>Your job produced the following log of activity - check here for a record of what was done and any unexpected events</b><hr/>') res.append('\n%s' % '<br/>'.join(logdat)) res.append('<hr/>Note: The freely available <a href="http://www.broadinstitute.org/gsa/wiki/index.php/Main_Page">GATK</a> \n') res.append('did all the work reportexampleBAM.bam ed here. GATK is an independent non-Galaxy community resource, whose third party tools were') res.append('orchestrated by the Galaxy rgGATKRecalibrate wrapper and this command line from the Galaxy form:<br/>\n%s' % (self.cl)) res.append(galhtmlpostfix) f = open(self.opts.htmlout,'w') f.write('\n'.join(res)) f.close()
def __init__(self,opts=None,cl=[],tidy=True): """ """ self.ourname = 'rgSortBam' self.opts = opts self.tidy = tidy self.cl = ' '.join(cl) # ready for the htmlfile output self.delme = [] killme = string.punctuation + string.whitespace trantab = string.maketrans(killme,'_'*len(killme)) self.title = self.opts.title.translate(trantab) fd,self.tlogname = tempfile.mkstemp(dir=self.opts.tmpdir,suffix='rgSortBam.log') self.tlog = open(self.tlogname,'w') self.info = '%s on %s at %s' % (self.ourname,self.opts.title,timenow())
def __init__(self,opts=None,cl=[],tidy=False): """ """ self.ourname = 'rgPicardInsertSize' self.opts = opts self.tidy = tidy self.cl = ' '.join(cl) # ready for the htmlfile output self.delme = [] killme = string.punctuation + string.whitespace trantab = string.maketrans(killme,'_'*len(killme)) self.title = self.opts.title.translate(trantab) self.tlogname = os.path.join(self.opts.outdir,'rgInsertSizeMetrics.txt') self.tlog = open(self.tlogname,'w') self.isPDF = 'InsertSizeHist.pdf' self.info = '%s on %s at %s' % (self.ourname,self.title,timenow())
def writehtml(self): """ write the report as html note complications needed to write pre and post reports - they have to be separated since gatk insists on giving them all the same names but at least allows a separate output directory... """ logdat = open(self.tlogname, 'r').readlines() res = [] res.append(galhtmlprefix % progname) res.append(galhtmlattr % (progname, timenow())) res.append( '<font size="-2">Note: The freely available <a href="http://www.broadinstitute.org/gsa/wiki/index.php/Main_Page">GATK</a>' ) res.append( 'did all the calculations arranged here in your Galaxy history') try: flist = os.listdir(self.opts.outdir) except: flist = [] if len(flist) > 0: # show what's left after cleanup flist = [ x for x in flist if not (x.startswith('.') or x == 'None') ] pdfs = [ x for x in flist if os.path.splitext(x)[-1].lower() == '.pdf' ] tlist = [(os.path.getmtime(os.path.join(self.opts.outdir, x)), x) for x in flist] tlist.sort() flist = [x[1] for x in tlist] if len(pdfs) > 0: res = self.writeImages(pdfs, res) res.append('<div><b>Output files.</b><hr/>\n') res.append('<table>\n') for i, f in enumerate(flist): fn = os.path.split(f)[-1] fs = getFileString(fn, self.opts.outdir) res.append('<tr><td><a href="%s">%s</a></td></tr>\n' % (fn, fs)) res.append('</table></div>\n') res.append( '<b>Your job produced the following log of activity - check here for a record of what was done and any unexpected events</b><hr/>' ) res.append('\n%s' % '<br/>'.join(logdat)) res.append(galhtmlpostfix) f = open(self.opts.htmlout, 'w') f.write('\n'.join(res)) f.close()
def __init__(self, opts=None, cl=[], tidy=False): """ """ self.ourname = 'rgPicardInsertSize' self.opts = opts self.tidy = tidy self.cl = ' '.join(cl) # ready for the htmlfile output self.delme = [] killme = string.punctuation + string.whitespace trantab = string.maketrans(killme, '_' * len(killme)) self.title = self.opts.title.translate(trantab) self.tlogname = os.path.join(self.opts.outdir, 'rgInsertSizeMetrics.txt') self.tlog = open(self.tlogname, 'w') self.isPDF = 'InsertSizeHist.pdf' self.info = '%s on %s at %s' % (self.ourname, self.title, timenow())
def __init__(self,opts=None,cl=[],tidy=True): """ """ self.ourname = 'rgGATKRecal' self.opts = opts self.tidy = tidy self.cl = ' '.join(cl) # ready for the htmlfile output self.delme = [] killme = string.punctuation + string.whitespace trantab = string.maketrans(killme,'_'*len(killme)) self.title = self.opts.title.translate(trantab) self.tlogname = os.path.join(self.opts.outdir,'%s_rg%s_Log.txt' % (self.title,self.ourname)) self.tlog = open(self.tlogname,'w') self.outtxt = '%s_%s_Out.txt' % (self.title,self.ourname) self.info = '%s on %s at %s' % (self.ourname,self.title,timenow()) self.runGATK() self.writehtml()
def __init__(self, opts=None, cl=[], tidy=True): """ """ self.ourname = 'rgGATKRecal' self.opts = opts self.tidy = tidy self.cl = ' '.join(cl) # ready for the htmlfile output self.delme = [] killme = string.punctuation + string.whitespace trantab = string.maketrans(killme, '_' * len(killme)) self.title = self.opts.title.translate(trantab) self.tlogname = os.path.join( self.opts.outdir, '%s_rg%s_Log.txt' % (self.title, self.ourname)) self.tlog = open(self.tlogname, 'w') self.outtxt = '%s_%s_Out.txt' % (self.title, self.ourname) self.info = '%s on %s at %s' % (self.ourname, self.title, timenow()) self.runGATK() self.writehtml()
def __init__(self,opts=None,cl=[],fargs=[],tidy=True): """ """ self.ourname = 'rgGATKCoverDepth' self.fargs = fargs self.opts = opts self.tidy = tidy self.cl = ' '.join(cl) # ready for the htmlfile output self.delme = [] killme = string.punctuation + string.whitespace trantab = string.maketrans(killme,'_'*len(killme)) self.title = self.opts.title.translate(trantab) self.tlogname = os.path.join(self.opts.outdir,'%s_rg%s_Log.txt' % (self.title,self.ourname)) self.tlog = open(self.tlogname,'w') self.info = '%s on %s at %s' % (self.ourname,self.title,timenow()) try: os.makedirs(self.opts.outdir) self.tlog.write('# made out dir %s\n' % self.opts.outdir) except: pass
def writehtml(self): """ write the report as html note complications needed to write pre and post reports - they have to be separated since gatk insists on giving them all the same names but at least allows a separate output directory... """ logdat = open(self.tlogname, "r").readlines() res = [] res.append(galhtmlprefix % progname) res.append(galhtmlattr % (progname, timenow())) res.append( '<font size="-2">Note: The freely available <a href="http://www.broadinstitute.org/gsa/wiki/index.php/Main_Page">GATK</a>' ) res.append("did all the calculations arranged here in your Galaxy history") try: flist = os.listdir(self.opts.outdir) except: flist = [] if len(flist) > 0: # show what's left after cleanup flist = [x for x in flist if not (x.startswith(".") or x == "None")] pdfs = [x for x in flist if os.path.splitext(x)[-1].lower() == ".pdf"] tlist = [(os.path.getmtime(os.path.join(self.opts.outdir, x)), x) for x in flist] tlist.sort() flist = [x[1] for x in tlist] if len(pdfs) > 0: res = self.writeImages(pdfs, res) res.append("<div><b>Output files.</b><hr/>\n") res.append("<table>\n") for i, f in enumerate(flist): fn = os.path.split(f)[-1] fs = getFileString(fn, self.opts.outdir) res.append('<tr><td><a href="%s">%s</a></td></tr>\n' % (fn, fs)) res.append("</table></div>\n") res.append( "<b>Your job produced the following log of activity - check here for a record of what was done and any unexpected events</b><hr/>" ) res.append("\n%s" % "<br/>".join(logdat)) res.append(galhtmlpostfix) f = open(self.opts.htmlout, "w") f.write("\n".join(res)) f.close()
def makehtml(self): """ write the report as html """ logdat = open(self.tlogname,'r').readlines() res = [] res.append(galhtmlprefix % progname) res.append(galhtmlattr % (progname,timenow())) res.append('<b>Your job produced the following outputs - check here for a record of what was done and any unexpected events</b><hr/>') imghref = '%s.jpg' % os.path.splitext(self.isPDF)[0] # removes .pdf res.append('<table cellpadding="10"><tr><td>\n') res.append('<a href="%s"><img src="%s" alt="%s" hspace="10" align="middle"></a>\n' % (self.isPDF,imghref,imghref)) res.append('</tr><td></table>\n') try: flist = os.listdir(self.opts.outdir) except: flist = [] if len(flist) > 0: # we should clean everything up - picard doesn't tell us what it did in cleansam unfortunately flist = [x for x in flist if not (x.startswith('.') or x == 'None')] tlist = [(os.path.getmtime(os.path.join(self.opts.outdir,x)),x) for x in flist] tlist.sort() flist = [x[1] for x in tlist] res.append('<div><b>Output files.</b><hr/>\n') res.append('<table>\n') for i,f in enumerate(flist): fn = os.path.split(f)[-1] fs = getFileString(fn,self.opts.outdir) res.append('<tr><td><a href="%s">%s</a></td></tr>\n' % (fn,fs)) res.append('</table></div>\n') res.append('<b>Log of activity</b><hr/>\n') res.append('\n%s' % '<br/>'.join(logdat)) res.append('<hr/>Note: The freely available <a href="http://picard.sourceforge.net/command-line-overview.shtml">Picard software</a> \n') res.append('generated all outputs reported here. These third party tools were') res.append('orchestrated by the Galaxy rgInsertSize wrapper and this command line from the Galaxy form:<br/>\n%s' % (self.cl)) res.append(galhtmlpostfix) f = open(self.opts.htmlout,'w') f.write('\n'.join(res)) f.close()
outfname = sys.argv[3] logf = sys.argv[4] logoutdir = sys.argv[5] gffout = sys.argv[6] topn = 1000 try: os.makedirs(logoutdir) except: pass map_file = None me = sys.argv[0] amapf = '%s.bim' % bfname # to decode map in xformModel flog = file(logf,'w') logme = [] cdir = os.getcwd() s = 'Rgenetics %s http://rgenetics.org Galaxy Tools, rgCaCo.py started %s\n' % (myversion,timenow()) print >> sys.stdout, s # so will appear as blurb for file logme.append(s) if verbose: s = 'rgCaCo.py: bfname=%s, logf=%s, argv = %s\n' % (bfname, logf, sys.argv) print >> sys.stdout, s # so will appear as blurb for file logme.append(s) twd = tempfile.mkdtemp(suffix='rgCaCo') # make sure plink doesn't spew log file into the root! tname = os.path.join(twd,name) vcl = [plinke,'--noweb','--bfile',bfname,'--out',name,'--model'] p=subprocess.Popen(' '.join(vcl),shell=True,stdout=flog,cwd=twd) retval = p.wait() resf = '%s.model' % tname # plink output is here we hope xformModel(bfname,resf,outfname,name,amapf,flog) # leaves the desired summary file makeGFF(resf=outfname,outfname=gffout,logf=flog,twd=twd,name='rgGLM_TopTable',description=name,topn=topn) flog.write('\n'.join(logme))
def clean(): """ """ if len(sys.argv) < 14: print >> sys.stdout, '## %s expected 14 params in sys.argv, got %d - %s' % (prog,len(sys.argv),sys.argv) print >> sys.stdout, """this script will filter a linkage format ped and map file containing genotypes. It takes 14 parameters - the plink --f parameter and" a new filename root for the output clean data followed by the mind,geno,hwe,maf, mef and mei" documented in the plink docs plus the file to be returned to Galaxy Called as: <command interpreter="python"> rgLDIndep.py '$input_file.extra_files_path' '$input_file.metadata.base_name' '$title' '$mind' '$geno' '$hwe' '$maf' '$mef' '$mei' '$out_file1' '$out_file1.extra_files_path' '$window' '$step' '$r2' </command> """ sys.exit(1) plog = ['## Rgenetics: http://rgenetics.org Galaxy Tools rgLDIndep.py started %s\n' % timenow()] inpath = sys.argv[1] inbase = sys.argv[2] killme = string.punctuation + string.whitespace trantab = string.maketrans(killme,'_'*len(killme)) title = sys.argv[3].translate(trantab) mind = sys.argv[4] geno = sys.argv[5] hwe = sys.argv[6] maf = sys.argv[7] me1 = sys.argv[8] me2 = sys.argv[9] outfname = sys.argv[10] outfpath = sys.argv[11] winsize = sys.argv[12] step = sys.argv[13] r2 = sys.argv[14] output = os.path.join(outfpath,outfname) outpath = os.path.join(outfpath,title) outprunepath = os.path.join(outfpath,'ldprune_%s' % title) try: os.makedirs(outfpath) except: pass bfile = os.path.join(inpath,inbase) filterout = os.path.join(outpath,'filtered_%s' % inbase) outf = file(outfname,'w') outf.write(galhtmlprefix % prog) ldin = bfile plinktasks = [['--bfile',ldin,'--indep-pairwise %s %s %s' % (winsize,step,r2),'--out',outpath, '--mind',mind,'--geno',geno,'--maf',maf,'--hwe',hwe,'--me',me1,me2,], ['--bfile',ldin,'--extract %s.prune.in --make-bed --out %s' % (outpath,outpath)], ['--bfile',outpath,'--recode --out',outpath]] # make map file - don't really need ped but... # subset of ld independent markers for eigenstrat and other requirements vclbase = [plinke,'--noweb'] prunelog = pruneld(plinktasks=plinktasks,cd=outfpath,vclbase = vclbase) """This generates the same output files as the first version; the only difference is that a simple pairwise threshold is used. The first two parameters (50 and 5) are the same as above (window size and step); the third parameter represents the r^2 threshold. Note: this represents the pairwise SNP-SNP metric now, not the multiple correlation coefficient; also note, this is based on the genotypic correlation, i.e. it does not involve phasing. """ plog += prunelog flog = '%s.log' % outpath flogf = open(flog,'w') flogf.write(''.join(plog)) flogf.write('\n') flogf.close() globme = os.path.join(outfpath,'*') flist = glob.glob(globme) flist.sort() for i, data in enumerate( flist ): outf.write('<li><a href="%s">%s</a></li>\n' % (os.path.split(data)[-1],os.path.split(data)[-1])) outf.write('</ol></div>\n') outf.write("</div></body></html>") outf.close()
def main(): u = """<command interpreter="python"> rgManQQ.py '$input_file' "$name" '$out_html' '$out_html.files_path' '$chrom_col' '$offset_col' '$pval_col' </command> """ npar = 8 if len(sys.argv) < npar: print >> sys.stdout, '## error - too few command line parameters - wanting %d' % npar print >> sys.stdout, u sys.exit(1) input_fname = sys.argv[1] title = sys.argv[2] killme = string.punctuation + string.whitespace trantab = string.maketrans(killme,'_'*len(killme)) ctitle = title.translate(trantab) outhtml = sys.argv[3] outdir = sys.argv[4] try: chrom_col = int(sys.argv[5]) except: chrom_col = -1 try: offset_col = int(sys.argv[6]) except: offset_col = -1 p = sys.argv[7].strip().split(',') try: q = [int(x) for x in p] except: p = -1 if chrom_col == -1 or offset_col == -1: # was passed as zero - do not do manhattan plots chrom_col = -1 offset_col = -1 grey = 0 if (sys.argv[8].lower() in ['1','true']): grey = 1 if p == -1: print >> sys.stderr,'## Cannot run rgManQQ - missing pval column' sys.exit(1) p = ['%d' % (int(x) + 1) for x in p] rlog,flist = doManQQ(input_fname,chrom_col+1,offset_col+1,','.join(p),title,grey,ctitle,outdir) flist.sort() html = [galhtmlprefix % progname,] html.append('<h1>%s</h1>' % title) if len(flist) > 0: html.append('<table>\n') for row in flist: fname,expl = row # RRun returns pairs of filenames fiddled for the log and R script n,e = os.path.splitext(fname) if e in ['.png','.jpg']: pdf = '%s.pdf' % n pdff = os.path.join(outdir,pdf) if os.path.exists(pdff): rval = compressPDF(inpdf=pdff) if rval <> 0: pdf = '%s(not_compressed)' % pdf else: pdf = '%s(not_found)' % pdf s= '<tr><td><a href="%s"><img src="%s" title="%s" hspace="10" width="800"></a></td></tr>' \ % (pdf,fname,expl) html.append(s) else: html.append('<tr><td><a href="%s">%s</a></td></tr>' % (fname,expl)) html.append('</table>\n') else: html.append('<h2>### Error - R returned no files - please confirm that parameters are sane</h1>') html.append('<h3>R log follows below</h3><hr><pre>\n') html += rlog html.append('</pre>\n') html.append(galhtmlattr % (progname,timenow())) html.append(galhtmlpostfix) htmlf = file(outhtml,'w') htmlf.write('\n'.join(html)) htmlf.write('\n') htmlf.close()
def pruneld(plinktasks=[] ,cd='./',vclbase = []): """ plink blathers when doing pruning - ignore Linkage disequilibrium based SNP pruning if a million snps in 3 billion base pairs, have mean 3k spacing assume 40-60k of ld in ceu, a window of 120k width is about 40 snps so lots more is perhaps less efficient - each window computational cost is ON^2 unless the code is smart enough to avoid unecessary computation where allele frequencies make it impossible to see ld > the r^2 cutoff threshold So, do a window and move forward 20? from the plink docs at http://pngu.mgh.harvard.edu/~purcell/plink/summary.shtml#prune Sometimes it is useful to generate a pruned subset of SNPs that are in approximate linkage equilibrium with each other. This can be achieved via two commands: --indep which prunes based on the variance inflation factor (VIF), which recursively removes SNPs within a sliding window; second, --indep-pairwise which is similar, except it is based only on pairwise genotypic correlation. Hint The output of either of these commands is two lists of SNPs: those that are pruned out and those that are not. A separate command using the --extract or --exclude option is necessary to actually perform the pruning. The VIF pruning routine is performed: plink --file data --indep 50 5 2 will create files plink.prune.in plink.prune.out Each is a simlpe list of SNP IDs; both these files can subsequently be specified as the argument for a --extract or --exclude command. The parameters for --indep are: window size in SNPs (e.g. 50), the number of SNPs to shift the window at each step (e.g. 5), the VIF threshold. The VIF is 1/(1-R^2) where R^2 is the multiple correlation coefficient for a SNP being regressed on all other SNPs simultaneously. That is, this considers the correlations between SNPs but also between linear combinations of SNPs. A VIF of 10 is often taken to represent near collinearity problems in standard multiple regression analyses (i.e. implies R^2 of 0.9). A VIF of 1 would imply that the SNP is completely independent of all other SNPs. Practically, values between 1.5 and 2 should probably be used; particularly in small samples, if this threshold is too low and/or the window size is too large, too many SNPs may be removed. The second procedure is performed: plink --file data --indep-pairwise 50 5 0.5 This generates the same output files as the first version; the only difference is that a simple pairwise threshold is used. The first two parameters (50 and 5) are the same as above (window size and step); the third parameter represents the r^2 threshold. Note: this represents the pairwise SNP-SNP metric now, not the multiple correlation coefficient; also note, this is based on the genotypic correlation, i.e. it does not involve phasing. To give a concrete example: the command above that specifies 50 5 0.5 would a) consider a window of 50 SNPs, b) calculate LD between each pair of SNPs in the window, b) remove one of a pair of SNPs if the LD is greater than 0.5, c) shift the window 5 SNPs forward and repeat the procedure. To make a new, pruned file, then use something like (in this example, we also convert the standard PED fileset to a binary one): plink --file data --extract plink.prune.in --make-bed --out pruneddata """ logres = ['## Rgenetics %s: http://rgenetics.org Galaxy Tools rgLDIndep.py Plink pruneLD runner\n' % myversion,] for task in plinktasks: # each is a list fplog,plog = tempfile.mkstemp() sto = open(plog,'w') # to catch the blather vcl = vclbase + task s = '## ldindep now executing %s\n' % ' '.join(vcl) print s logres.append(s) x = subprocess.Popen(' '.join(vcl),shell=True,stdout=sto,stderr=sto,cwd=cd) retval = x.wait() sto.close() sto = open(plog,'r') # read try: lplog = sto.readlines() lplog = [x for x in lplog if x.find('Pruning SNP') == -1] logres += lplog logres.append('\n') except: logres.append('### %s Strange - no std out from plink when running command line\n%s' % (timenow(),' '.join(vcl))) sto.close() os.unlink(plog) # no longer needed return logres
def main(): u = """<command interpreter="python"> rgManQQ.py '$input_file' "$name" '$out_html' '$out_html.files_path' '$chrom_col' '$offset_col' '$pval_col' </command> """ print >>sys.stdout, "## rgManQQ.py. cl= \n%s" % " ".join(['"%s"' % x for x in sys.argv]) npar = 8 if len(sys.argv) < npar: print >>sys.stdout, "## error - too few command line parameters - wanting %d" % npar print >>sys.stdout, u sys.exit(1) input_fname = sys.argv[1] title = sys.argv[2] killme = string.punctuation + string.whitespace trantab = string.maketrans(killme, "_" * len(killme)) ctitle = title.translate(trantab) outhtml = sys.argv[3] outdir = sys.argv[4] try: chrom_col = int(sys.argv[5]) + 1 except: chrom_col = 0 try: offset_col = int(sys.argv[6]) + 1 except: offset_col = 0 p = sys.argv[7].strip().split(",") try: p = [int(x) + 1 for x in p] pval_cols = "c(%s)" % ",".join(map(str, p)) except: pval_cols = "c(0)" if chrom_col == 1 or offset_col == 1: # was passed as zero - do not do manhattan plots chrom_col = 0 offset_col = 0 grey = 0 if sys.argv[8].lower() in ["1", "true"]: grey = 1 rlog, flist = doManQQ(input_fname, chrom_col, offset_col, pval_cols, title, grey, ctitle, outdir) flist.sort() html = [galhtmlprefix % progname] html.append("<h1>%s</h1>" % title) if len(flist) > 0: html.append("<table>\n") for row in flist: fname, expl = row # RRun returns pairs of filenames fiddled for the log and R script e = os.path.splitext(fname)[-1] if e in [".png", ".jpg"]: s = ( '<tr><td><a href="%s"><img src="%s" alt="%s hspace="10" width="400"><br>(Click to download image %s)</a></td></tr>' % (fname, fname, expl, expl) ) html.append(s) else: html.append('<tr><td><a href="%s">%s</a></td></tr>' % (fname, expl)) html.append("</table>\n") else: html.append("<h2>### Error - R returned no files - please confirm that parameters are sane</h1>") html.append("<h3>R log follows below</h3><hr><pre>\n") html += rlog html.append("</pre>\n") html.append(galhtmlattr % (progname, timenow())) html.append(galhtmlpostfix) htmlf = file(outhtml, "w") htmlf.write("\n".join(html)) htmlf.write("\n") htmlf.close()
logf = sys.argv[4] logoutdir = sys.argv[5] gffout = sys.argv[6] topn = 1000 try: os.makedirs(logoutdir) except: pass map_file = None me = sys.argv[0] amapf = '%s.bim' % bfname # to decode map in xformModel flog = file(logf, 'w') logme = [] cdir = os.getcwd() s = 'Rgenetics %s http://rgenetics.org Galaxy Tools, rgCaCo.py started %s\n' % ( myversion, timenow()) print >> sys.stdout, s # so will appear as blurb for file logme.append(s) if verbose: s = 'rgCaCo.py: bfname=%s, logf=%s, argv = %s\n' % (bfname, logf, sys.argv) print >> sys.stdout, s # so will appear as blurb for file logme.append(s) twd = tempfile.mkdtemp( suffix='rgCaCo' ) # make sure plink doesn't spew log file into the root! tname = os.path.join(twd, name) vcl = [plinke, '--noweb', '--bfile', bfname, '--out', name, '--model'] p = subprocess.Popen(' '.join(vcl), shell=True, stdout=flog, cwd=twd) retval = p.wait() resf = '%s.model' % tname # plink output is here we hope
def doIBS(n=100): """parse parameters from galaxy expect 'input pbed path' 'basename' 'outpath' 'title' 'logpath' 'n' <command interpreter="python"> rgGRR.py $i.extra_files_path/$i.metadata.base_name "$i.metadata.base_name" '$out_file1' '$out_file1.files_path' "$title1" '$n' '$Z' </command> """ u = """<command interpreter="python"> rgGRR.py $i.extra_files_path/$i.metadata.base_name "$i.metadata.base_name" '$out_file1' '$out_file1.files_path' "$title1" '$n' '$Z' </command> """ if len(sys.argv) < 7: print >> sys.stdout, 'Need pbed inpath, basename, out_htmlname, outpath, title, logpath, nSNP, Zcutoff on command line please' print >> sys.stdout, u sys.exit(1) ts = '%s%s' % (string.punctuation, string.whitespace) ptran = string.maketrans(ts, '_' * len(ts)) inpath = sys.argv[1] ldinpath = os.path.split(inpath)[0] basename = sys.argv[2] outhtml = sys.argv[3] newfilepath = sys.argv[4] title = sys.argv[5].translate(ptran) logfname = 'Log_%s.txt' % title logpath = os.path.join( newfilepath, logfname) # log was a child - make part of html extra_files_path zoo n = int(sys.argv[6]) try: Zcutoff = float(sys.argv[7]) except: Zcutoff = 2.0 try: os.makedirs(newfilepath) except: pass logf = file(logpath, 'w') efp, ibase_name = os.path.split( inpath) # need to use these for outputs in files_path ped = plinkbinJZ.BPed(inpath) ped.parse(quick=True) if ped == None: print >> sys.stderr, '## doIBSpy problem - cannot open %s or %s - cannot run' % ( ldreduced, basename) sys.exit(1) newfiles, explanations, repOut = doIBSpy(ped=ped, basename=basename, outdir=newfilepath, logf=logf, nrsSamples=n, title=title, pdftoo=0, Zcutoff=Zcutoff) logf.close() logfs = file(logpath, 'r').readlines() lf = file(outhtml, 'w') lf.write(galhtmlprefix % PROGNAME) # this is a mess. todo clean up - should each datatype have it's own directory? Yes # probably. Then titles are universal - but userId libraries are separate. s = '<div>Output from %s run at %s<br>\n' % (PROGNAME, timenow()) lf.write('<h4>%s</h4>\n' % s) fixed = ["'%s'" % x for x in sys.argv] # add quotes just in case s = 'If you need to rerun this analysis, the command line was\n<pre>%s</pre>\n</div>' % ( ' '.join(fixed)) lf.write(s) # various ways of displaying svg - experiments related to missing svg mimetype on test (!) #s = """<object data="%s" type="image/svg+xml" width="%d" height="%d"> # <embed src="%s" type="image/svg+xml" width="%d" height="%d" /> # </object>""" % (newfiles[0],PLOT_WIDTH,PLOT_HEIGHT,newfiles[0],PLOT_WIDTH,PLOT_HEIGHT) s = """ <embed src="%s" type="image/svg+xml" width="%d" height="%d" />""" % ( newfiles[0], PLOT_WIDTH, PLOT_HEIGHT) #s = """ <iframe src="%s" type="image/svg+xml" width="%d" height="%d" />""" % (newfiles[0],PLOT_WIDTH,PLOT_HEIGHT) lf.write(s) lf.write( '<div><h4>Click the links below to save output files and plots</h4><br><ol>\n' ) for i in range(len(newfiles)): if i == 0: lf.write('<li><a href="%s" type="image/svg+xml" >%s</a></li>\n' % (newfiles[i], explanations[i])) else: lf.write('<li><a href="%s">%s</a></li>\n' % (newfiles[i], explanations[i])) flist = os.listdir(newfilepath) for fname in flist: if not fname in newfiles: lf.write('<li><a href="%s">%s</a></li>\n' % (fname, fname)) lf.write('</ol></div>') lf.write('<div>%s</div>' % ('\n'.join(repOut))) # repOut is a list of tables lf.write( '<div><hr><h3>Log from this job (also stored in %s)</h3><pre>%s</pre><hr></div>' % (logfname, ''.join(logfs))) lf.write('</body></html>\n') lf.close() logf.close()
def doIBS(n=100): """parse parameters from galaxy expect 'input pbed path' 'basename' 'outpath' 'title' 'logpath' 'n' <command interpreter="python"> rgGRR.py $i.extra_files_path/$i.metadata.base_name "$i.metadata.base_name" '$out_file1' '$out_file1.files_path' "$title1" '$n' '$Z' </command> """ u="""<command interpreter="python"> rgGRR.py $i.extra_files_path/$i.metadata.base_name "$i.metadata.base_name" '$out_file1' '$out_file1.files_path' "$title1" '$n' '$Z' </command> """ if len(sys.argv) < 7: print >> sys.stdout, 'Need pbed inpath, basename, out_htmlname, outpath, title, logpath, nSNP, Zcutoff on command line please' print >> sys.stdout, u sys.exit(1) ts = '%s%s' % (string.punctuation,string.whitespace) ptran = string.maketrans(ts,'_'*len(ts)) inpath = sys.argv[1] ldinpath = os.path.split(inpath)[0] basename = sys.argv[2] outhtml = sys.argv[3] newfilepath = sys.argv[4] title = sys.argv[5].translate(ptran) logfname = 'Log_%s.txt' % title logpath = os.path.join(newfilepath,logfname) # log was a child - make part of html extra_files_path zoo n = int(sys.argv[6]) try: Zcutoff = float(sys.argv[7]) except: Zcutoff = 2.0 try: os.makedirs(newfilepath) except: pass logf = file(logpath,'w') efp,ibase_name = os.path.split(inpath) # need to use these for outputs in files_path ped = plinkbinJZ.BPed(inpath) ped.parse(quick=True) if ped == None: print >> sys.stderr, '## doIBSpy problem - cannot open %s or %s - cannot run' % (ldreduced,basename) sys.exit(1) newfiles,explanations,repOut = doIBSpy(ped=ped,basename=basename,outdir=newfilepath, logf=logf,nrsSamples=n,title=title,pdftoo=0,Zcutoff=Zcutoff) logf.close() logfs = file(logpath,'r').readlines() lf = file(outhtml,'w') lf.write(galhtmlprefix % PROGNAME) # this is a mess. todo clean up - should each datatype have it's own directory? Yes # probably. Then titles are universal - but userId libraries are separate. s = '<div>Output from %s run at %s<br>\n' % (PROGNAME,timenow()) lf.write('<h4>%s</h4>\n' % s) fixed = ["'%s'" % x for x in sys.argv] # add quotes just in case s = 'If you need to rerun this analysis, the command line was\n<pre>%s</pre>\n</div>' % (' '.join(fixed)) lf.write(s) # various ways of displaying svg - experiments related to missing svg mimetype on test (!) #s = """<object data="%s" type="image/svg+xml" width="%d" height="%d"> # <embed src="%s" type="image/svg+xml" width="%d" height="%d" /> # </object>""" % (newfiles[0],PLOT_WIDTH,PLOT_HEIGHT,newfiles[0],PLOT_WIDTH,PLOT_HEIGHT) s = """ <embed src="%s" type="image/svg+xml" width="%d" height="%d" />""" % (newfiles[0],PLOT_WIDTH,PLOT_HEIGHT) #s = """ <iframe src="%s" type="image/svg+xml" width="%d" height="%d" />""" % (newfiles[0],PLOT_WIDTH,PLOT_HEIGHT) lf.write(s) lf.write('<div><h4>Click the links below to save output files and plots</h4><br><ol>\n') for i in range(len(newfiles)): if i == 0: lf.write('<li><a href="%s" type="image/svg+xml" >%s</a></li>\n' % (newfiles[i],explanations[i])) else: lf.write('<li><a href="%s">%s</a></li>\n' % (newfiles[i],explanations[i])) flist = os.listdir(newfilepath) for fname in flist: if not fname in newfiles: lf.write('<li><a href="%s">%s</a></li>\n' % (fname,fname)) lf.write('</ol></div>') lf.write('<div>%s</div>' % ('\n'.join(repOut))) # repOut is a list of tables lf.write('<div><hr><h3>Log from this job (also stored in %s)</h3><pre>%s</pre><hr></div>' % (logfname,''.join(logfs))) lf.write('</body></html>\n') lf.close() logf.close()
alogf = options.logf # absolute paths od = os.path.split(alogf)[0] try: os.path.makedirs(od) except: pass aoutf = options.outfname # absolute paths od = os.path.split(aoutf)[0] try: os.path.makedirs(od) except: pass vcl = [plinke,'--noweb', '--bfile',options.bfname,'--out',title,'--mind','0.5','--tdt'] logme = [] if verbose: s = 'Rgenetics %s http://rgenetics.org Galaxy Tools rgTDT.py started %s\n' % (myversion,timenow()) print >> sys.stdout,s logme.append(s) s ='rgTDT.py: bfname=%s, logf=%s, argv = %s\n' % (options.bfname,alogf, sys.argv) print >> sys.stdout,s logme.append(s) s = 'rgTDT.py: vcl=%s\n' % (' '.join(vcl)) print >> sys.stdout,s logme.append(s) twd = tempfile.mkdtemp(suffix='rgTDT') # make sure plink doesn't spew log file into the root! tname = os.path.join(twd,title) p=subprocess.Popen(' '.join(vcl),shell=True,cwd=twd) retval = p.wait() shutil.copy('%s.log' % tname,alogf) sto = file(alogf,'a') sto.write('\n'.join(logme))
def main(): u = """<command interpreter="python"> rgManQQ.py '$input_file' "$name" '$out_html' '$out_html.files_path' '$chrom_col' '$offset_col' '$pval_col' </command> """ npar = 8 if len(sys.argv) < npar: print >> sys.stdout, '## error - too few command line parameters - wanting %d' % npar print >> sys.stdout, u sys.exit(1) input_fname = sys.argv[1] title = sys.argv[2] killme = string.punctuation + string.whitespace trantab = string.maketrans(killme, '_' * len(killme)) ctitle = title.translate(trantab) outhtml = sys.argv[3] outdir = sys.argv[4] try: chrom_col = int(sys.argv[5]) except: chrom_col = -1 try: offset_col = int(sys.argv[6]) except: offset_col = -1 p = sys.argv[7].strip().split(',') try: q = [int(x) for x in p] except: p = -1 if chrom_col == -1 or offset_col == -1: # was passed as zero - do not do manhattan plots chrom_col = -1 offset_col = -1 grey = 0 if (sys.argv[8].lower() in ['1', 'true']): grey = 1 if p == -1: print >> sys.stderr, '## Cannot run rgManQQ - missing pval column' sys.exit(1) p = ['%d' % (int(x) + 1) for x in p] rlog, flist = doManQQ(input_fname, chrom_col + 1, offset_col + 1, ','.join(p), title, grey, ctitle, outdir) flist.sort() html = [ galhtmlprefix % progname, ] html.append('<h1>%s</h1>' % title) if len(flist) > 0: html.append('<table>\n') for row in flist: fname, expl = row # RRun returns pairs of filenames fiddled for the log and R script n, e = os.path.splitext(fname) if e in ['.png', '.jpg']: pdf = '%s.pdf' % n pdff = os.path.join(outdir, pdf) if os.path.exists(pdff): rval = compressPDF(inpdf=pdff) if rval <> 0: pdf = '%s(not_compressed)' % pdf else: pdf = '%s(not_found)' % pdf s= '<tr><td><a href="%s"><img src="%s" title="%s" hspace="10" width="800"></a></td></tr>' \ % (pdf,fname,expl) html.append(s) else: html.append('<tr><td><a href="%s">%s</a></td></tr>' % (fname, expl)) html.append('</table>\n') else: html.append( '<h2>### Error - R returned no files - please confirm that parameters are sane</h1>' ) html.append('<h3>R log follows below</h3><hr><pre>\n') html += rlog html.append('</pre>\n') html.append(galhtmlattr % (progname, timenow())) html.append(galhtmlpostfix) htmlf = file(outhtml, 'w') htmlf.write('\n'.join(html)) htmlf.write('\n') htmlf.close()
def runEigen(): """ run the smartpca prog - documentation follows smartpca.perl -i fakeped_100.eigenstratgeno -a fakeped_100.map -b fakeped_100.ind -p fakeped_100 -e fakeped_100.eigenvals -l fakeped_100.eigenlog -o fakeped_100.eigenout DOCUMENTATION OF smartpca.perl program: This program calls the smartpca program (see ../POPGEN/README). For this to work, the bin directory containing smartpca MUST be in your path. See ./example.perl for a toy example. ../bin/smartpca.perl -i example.geno : genotype file in EIGENSTRAT format (see ../CONVERTF/README) -a example.snp : snp file (see ../CONVERTF/README) -b example.ind : indiv file (see ../CONVERTF/README) -k k : (Default is 10) number of principal components to output -o example.pca : output file of principal components. Individuals removed as outliers will have all values set to 0.0 in this file. -p example.plot : prefix of output plot files of top 2 principal components. (labeling individuals according to labels in indiv file) -e example.eval : output file of all eigenvalues -l example.log : output logfile -m maxiter : (Default is 5) maximum number of outlier removal iterations. To turn off outlier removal, set -m 0. -t topk : (Default is 10) number of principal components along which to remove outliers during each outlier removal iteration. -s sigma : (Default is 6.0) number of standard deviations which an individual must exceed, along one of topk top principal components, in order to be removed as an outlier. now uses https://www.bx.psu.edu/cgi-bin/trac.cgi/galaxy/changeset/1832 All files can be viewed however, by making links in the primary (HTML) history item like: <img src="display_child?parent_id=2&designation=SomeImage?" alt="Some Image"/> <a href="display_child?parent_id=2&designation=SomeText?">Some Text</a> <command interpreter="python"> rgEigPCA.py "$i.extra_files_path/$i.metadata.base_name" "$title" "$out_file1" "$out_file1.files_path" "$k" "$m" "$t" "$s" "$pca" </command> """ if len(sys.argv) < 9: print 'Need an input genotype file root, a title, a temp id and the temp file path for outputs,' print ' and the 4 integer tuning parameters k,m,t and s in order. Given that, will run smartpca for eigensoft' sys.exit(1) else: print >> sys.stdout, 'rgEigPCA.py got %s' % (' '.join(sys.argv)) skillme = ' %s' % string.punctuation trantab = string.maketrans(skillme,'_'*len(skillme)) ofname = sys.argv[5] progname = os.path.basename(sys.argv[0]) infile = sys.argv[1] infpath,base_name = os.path.split(infile) # now takes precomputed or autoconverted ldreduced dataset title = sys.argv[2].translate(trantab) # must replace all of these for urls containing title outfile1 = sys.argv[3] newfilepath = sys.argv[4] try: os.mkdirs(newfilepath) except: pass op = os.path.split(outfile1)[0] try: # for test - needs this done os.makedirs(op) except: pass eigen_k = sys.argv[5] eigen_m = sys.argv[6] eigen_t = sys.argv[7] eigen_s = sys.argv[8] eigpca = sys.argv[9] # path to new dataset for pca results - for later adjustment eigentitle = os.path.join(newfilepath,title) explanations=['Samples plotted in first 2 eigenvector space','Principle components','Eigenvalues', 'Smartpca log (contents shown below)'] rplotname = 'PCAPlot.pdf' eigenexts = [rplotname, "pca.xls", "eval.xls"] newfiles = ['%s_%s' % (title,x) for x in eigenexts] # produced by eigenstrat rplotout = os.path.join(newfilepath,newfiles[0]) # for R plots eigenouts = [x for x in newfiles] eigenlogf = '%s_log.txt' % title newfiles.append(eigenlogf) # so it will also appear in the links lfname = outfile1 lf = file(lfname,'w') lf.write(galhtmlprefix % progname) try: os.makedirs(newfilepath) except: pass smartCL = '%s -i %s.bed -a %s.bim -b %s.fam -o %s -p %s -e %s -l %s -k %s -m %s -t %s -s %s' % \ (smartpca,infile, infile, infile, eigenouts[1],'%s_eigensoftplot.pdf' % title,eigenouts[2],eigenlogf, \ eigen_k, eigen_m, eigen_t, eigen_s) env = os.environ p=subprocess.Popen(smartCL,shell=True,cwd=newfilepath) retval = p.wait() # copy the eigenvector output file needed for adjustment to the user's eigenstrat library directory elog = file(os.path.join(newfilepath,eigenlogf),'r').read() eeigen = os.path.join(newfilepath,'%s.evec' % eigenouts[1]) # need these for adjusting try: eigpcaRes = file(eeigen,'r').read() except: eigpcaRes = '' file(eigpca,'w').write(eigpcaRes) makePlot(eigpca=eigpca,pdfname=newfiles[0],title=title,nfp=newfilepath,rexe=rexe) s = 'Output from %s run at %s<br/>\n' % (progname,timenow()) lf.write('<h4>%s</h4>\n' % s) lf.write('newfilepath=%s, rexe=%s' % (newfilepath,rexe)) lf.write('(click on the image below to see a much higher quality PDF version)') thumbnail = '%s.png' % newfiles[0] # foo.pdf.png - who cares? if os.path.exists(os.path.join(newfilepath,thumbnail)): lf.write('<table border="0" cellpadding="10" cellspacing="10"><tr><td>\n') lf.write('<a href="%s"><img src="%s" alt="%s" hspace="10" align="left" /></a></td></tr></table><br/>\n' \ % (newfiles[0],thumbnail,explanations[0])) allfiles = os.listdir(newfilepath) allfiles.sort() sizes = [getfSize(x,newfilepath) for x in allfiles] lallfiles = ['<li><a href="%s">%s %s</a></li>\n' % (x,x,sizes[i]) for i,x in enumerate(allfiles)] # html list lf.write('<div class="document">All Files:<ol>%s</ol></div>' % ''.join(lallfiles)) lf.write('<div class="document">Log %s contents follow below<p/>' % eigenlogf) lf.write('<pre>%s</pre></div>' % elog) # the eigenlog s = 'If you need to rerun this analysis, the command line used was\n%s\n<p/>' % (smartCL) lf.write(s) lf.write(galhtmlpostfix) # end galhtmlprefix div lf.close()