def run( self ): self.start() if os.path.isfile( self._inData ): FastaUtils.convertFastaHeadersFromChkToChr( self._inData, self._mapFile, self._outFile ) else: lInFiles = glob.glob( self._inData ) if len(lInFiles) == 0: msg = "WARNING: no file corresponds to pattern '%'" % ( self._inData ) sys.stdout.write( "%s\n" % ( msg ) ) self.end() for inFile in lInFiles: outFile = "%s.onChr" % ( inFile ) FastaUtils.convertFastaHeadersFromChkToChr( inFile, self._mapFile, outFile ) self.end()
def run( self ): """ Run the program. """ self.start() lInitHeaders = FastaUtils.dbHeaders( self.getInputFile(), self.getVerbosityLevel()-1 ) csh = ChangeSequenceHeaders() csh.setInputFile( self.getInputFile() ) csh.setFormat( "fasta" ) csh.setStep( 1 ) csh.setPrefix( "seq" ) csh.setLinkFile( "%s.shortHlink" % ( self.getInputFile() ) ) csh.setOutputFile( "%s.shortH" % ( self.getInputFile() ) ) csh.setVerbosityLevel( self.getVerbosityLevel() - 1 ) csh.run() bsDB = BioseqDB( "%s.shortH" % ( self.getInputFile() ) ) bsDB.upCase() bsDB.save( "%s.shortHtmp" % ( self.getInputFile() ) ) del bsDB os.rename( "%s.shortHtmp" % ( self.getInputFile() ), "%s.shortH" % ( self.getInputFile() ) ) self.setProgramCommandLine() cmd = self.getProgramCommandLine() if self.getVerbosityLevel() > 0: print "LAUNCH: %s" % ( cmd ) sys.stdout.flush() exitStatus = os.system( cmd ) if exitStatus != 0: string = "ERROR: program '%s' returned exit status '%i'" % ( self.getProgramName(), exitStatus ) print string sys.exit(1) csh.setInputFile( "%s.shortH.fa_aln" % ( self.getInputFile() ) ) csh.setFormat( "fasta" ) csh.setStep( 2 ) csh.setLinkFile( "%s.shortHlink" % ( self.getInputFile() ) ) csh.setOutputFile( "%s.shortH.fa_aln.initH" % ( self.getInputFile() ) ) csh.setVerbosityLevel( self.getVerbosityLevel() - 1 ) csh.run() absDB = AlignedBioseqDB( "%s.shortH.fa_aln.initH" % ( self.getInputFile() ) ) outFileHandler = open( self.getOutputFile(), "w" ) for header in lInitHeaders: bs = absDB.fetch( header ) bs.upCase() bs.write( outFileHandler ) outFileHandler.close() os.remove( "%s.shortH.fa_aln.initH" % ( self.getInputFile() ) ) self.end()
def main(): inFileName = "" outFileName = "" verbose = 0 try: opts, args=getopt.getopt(sys.argv[1:],"hi:o:v:") except getopt.GetoptError: help() sys.exit(1) for o,a in opts: if o == "-h": help() sys.exit(0) elif o == "-i": inFileName = a elif o == "-o": outFileName = a elif o == "-v": verbose = int(a) if inFileName == "": print "*** Error: missing compulsory options" help() sys.exit(1) if verbose > 0: print "beginning of %s" % (sys.argv[0].split("/")[-1]) sys.stdout.flush() log = FastaUtils.sortSequencesByIncreasingLength( inFileName, outFileName, verbose ) if log != 0: print "*** Error: sortSequencesByIncreasingLength() returned %i" % ( log ) sys.exit(1) if verbose > 0: print "%s finished successfully" % (sys.argv[0].split("/")[-1]) sys.stdout.flush() return 0
if sbjFileName != "": if not os.path.exists( sbjFileName ): string = "subject file '%s' doesn't exist" % ( sbjFileName ) logging.error( string ) print "ERROR: %s" % ( string ) sys.exit(1) else: sbjFileName = qryFileName logging.info( "remove redundancy among '%s' (queries) compare to '%s' (subjects)" % ( qryFileName, sbjFileName ) ) #-------------------------------------------------------------------------- # check the input files are not empty, otherwise exit nbSeqQry = FastaUtils.dbSize( qryFileName ) if nbSeqQry == 0: string = "query file is empty" logging.info( string ) print "WARNING: %s" % ( string ) logging.info( "finished" ) sys.exit(0) nbSeqSbj = FastaUtils.dbSize( sbjFileName ) if sbjFileName != qryFileName: nbSeqSbj = FastaUtils.dbSize( sbjFileName ) if nbSeqSbj == 0: string = "subject file is empty" logging.info( string ) print "WARNING: %s" % ( string ) logging.info( "finished" )
elif o == "-v": verbose = int(a) if inData == "" or ( not os.path.isfile( inData ) \ and not os.path.isdir( inData ) ): msg = "ERROR: missing input file or directory (-i or -I)" sys.stderr.write( "%s\n" % msg ) help() sys.exit(1) if outData == "": print "ERROR: missing name of output file or directory (-o or -O)" help() sys.exit(1) if verbose > 0: print "START %s" % ( sys.argv[0].split("/")[-1] ) sys.stdout.flush() FastaUtils.dbShuffle( inData, outData, verbose ) if verbose > 0: print "END %s" % ( sys.argv[0].split("/")[-1] ) sys.stdout.flush() return 0 if __name__ == "__main__": main()
print "prepare MSP file"; sys.stdout.flush() prg = os.environ["REPET_PATH"] + "/bin/align2recon.py" cmd = prg cmd += " -i %s" % ( alignFileName ) cmd += " -o %s_MSP_file" % ( projectName ) cmd += " -v %i" % ( verbose - 1 ) returnStatus = os.system( cmd ) if returnStatus != 0: msg = "ERROR: '%s' returned '%i'" % ( prg, returnStatus ) sys.stderr.write( "%s\n" % ( msg ) ) sys.exit(1) if verbose > 0: print "prepare seq file"; sys.stdout.flush() lSeqNames = FastaUtils.dbHeaders( faFileName, verbose - 1 ) lSeqNames.sort() seqListFile = open( projectName + "_seq_list", "w" ) seqListFile.write( str(len(lSeqNames)) + "\n" ) for seqName in lSeqNames: seqListFile.write( ">%s\n" % ( seqName ) ) seqListFile.close() if verbose > 0: print "Recon is running..."; sys.stdout.flush() prg = "recon.pl" cmd = prg cmd += " %s_seq_list" % ( projectName ) cmd += " %s_MSP_file" % ( projectName ) cmd += " 1"
elif o == "-d": newDir = True elif o == "-s": useSeqHeader = True elif o == "-p": prefix = a elif o == "-v": verbose = int(a) if inFile == "": msg = "ERROR: missing input file (-i)" sys.stderr.write( "%s\n" % ( msg ) ) help() sys.exit(1) if verbose > 0: print "START %s" % ( sys.argv[0].split("/")[-1] ) sys.stdout.flush() FastaUtils.dbSplit( inFile, nbSeqPerBatch, newDir, useSeqHeader, prefix, verbose ) if verbose > 0: print "END %s" % ( sys.argv[0].split("/")[-1] ) sys.stdout.flush() return 0 if __name__ == "__main__": main()
sys.stderr.write( "%s\n" % ( msg ) ) help() sys.exit(1) if clusteringMethod == "": msg = "ERROR: missing clustering method (-c)" sys.stderr.write( "%s\n" % ( msg ) ) help() sys.exit(1) if not os.path.exists( inFileName ): msg = "ERROR: can't find file '%s'" % ( inFileName ) sys.stderr.write( "%s\n" % ( msg ) ) help() sys.exit(1) if verbose > 0: print "START %s" % (sys.argv[0].split("/")[-1]) sys.stdout.flush() FastaUtils.splitSeqPerCluster( inFileName, clusteringMethod, simplifyHeader, createDir, "seqCluster", verbose ) if verbose > 0: print "END %s" % (sys.argv[0].split("/")[-1]) sys.stdout.flush() return 0 if __name__ == "__main__": main()