def run( self ):
     self.start()
     
     if os.path.isfile( self._inData ):
         FastaUtils.convertFastaHeadersFromChkToChr( self._inData, self._mapFile, self._outFile )
         
     else:
         lInFiles = glob.glob( self._inData )
         if len(lInFiles) == 0:
             msg = "WARNING: no file corresponds to pattern '%'" % ( self._inData )
             sys.stdout.write( "%s\n" % ( msg ) )
             self.end()
         for inFile in lInFiles:
             outFile = "%s.onChr" % ( inFile )
             FastaUtils.convertFastaHeadersFromChkToChr( inFile, self._mapFile, outFile )
             
     self.end()
 def run( self ):
     """
     Run the program.
     """
     self.start()
     
     lInitHeaders = FastaUtils.dbHeaders( self.getInputFile(), self.getVerbosityLevel()-1 )
     
     csh = ChangeSequenceHeaders()
     csh.setInputFile( self.getInputFile() )
     csh.setFormat( "fasta" )
     csh.setStep( 1 )
     csh.setPrefix( "seq" )
     csh.setLinkFile( "%s.shortHlink" % ( self.getInputFile() ) )
     csh.setOutputFile( "%s.shortH" % ( self.getInputFile() ) )
     csh.setVerbosityLevel( self.getVerbosityLevel() - 1 )
     csh.run()
     
     bsDB = BioseqDB( "%s.shortH" % ( self.getInputFile() ) )
     bsDB.upCase()
     bsDB.save( "%s.shortHtmp" % ( self.getInputFile() ) )
     del bsDB
     os.rename( "%s.shortHtmp" % ( self.getInputFile() ),
                "%s.shortH" % ( self.getInputFile() ) )
     
     self.setProgramCommandLine()
     cmd = self.getProgramCommandLine()
     if self.getVerbosityLevel() > 0:
         print "LAUNCH: %s" % ( cmd )
         sys.stdout.flush()
     exitStatus = os.system( cmd )
     if exitStatus != 0:
         string = "ERROR: program '%s' returned exit status '%i'" % ( self.getProgramName(), exitStatus )
         print string
         sys.exit(1)
         
     csh.setInputFile( "%s.shortH.fa_aln" % ( self.getInputFile() ) )
     csh.setFormat( "fasta" )
     csh.setStep( 2 )
     csh.setLinkFile( "%s.shortHlink" % ( self.getInputFile() ) )
     csh.setOutputFile( "%s.shortH.fa_aln.initH" % ( self.getInputFile() ) )
     csh.setVerbosityLevel( self.getVerbosityLevel() - 1 )
     csh.run()
     
     absDB = AlignedBioseqDB( "%s.shortH.fa_aln.initH" % ( self.getInputFile() ) )
     outFileHandler = open( self.getOutputFile(), "w" )
     for header in lInitHeaders:
         bs = absDB.fetch( header )
         bs.upCase()
         bs.write( outFileHandler )
     outFileHandler.close()
     os.remove( "%s.shortH.fa_aln.initH" % ( self.getInputFile() ) )
     
     self.end()
def main():

    inFileName = ""
    outFileName = ""
    verbose = 0

    try:
        opts, args=getopt.getopt(sys.argv[1:],"hi:o:v:")
    except getopt.GetoptError:
        help()
        sys.exit(1)
    for o,a in opts:
        if o == "-h":
            help()
            sys.exit(0)
        elif o == "-i":
            inFileName = a
        elif o == "-o":
            outFileName = a
        elif o == "-v":
            verbose = int(a)

    if inFileName == "":
        print "*** Error: missing compulsory options"
        help()
        sys.exit(1)

    if verbose > 0:
        print "beginning of %s" % (sys.argv[0].split("/")[-1])
        sys.stdout.flush()
        
    log = FastaUtils.sortSequencesByIncreasingLength( inFileName, outFileName, verbose )
    if log != 0:
        print "*** Error: sortSequencesByIncreasingLength() returned %i" % ( log )
        sys.exit(1)

    if verbose > 0:
        print "%s finished successfully" % (sys.argv[0].split("/")[-1])
        sys.stdout.flush()

    return 0
Пример #4
0
    if sbjFileName != "":
        if not os.path.exists( sbjFileName ):
            string = "subject file '%s' doesn't exist" % ( sbjFileName )
            logging.error( string )
            print "ERROR: %s" % ( string )
            sys.exit(1)
    else:
        sbjFileName = qryFileName

    logging.info( "remove redundancy among '%s' (queries) compare to '%s' (subjects)" % ( qryFileName, sbjFileName ) )

    #--------------------------------------------------------------------------

    # check the input files are not empty, otherwise exit

    nbSeqQry = FastaUtils.dbSize( qryFileName )
    if nbSeqQry == 0:
        string = "query file is empty"
        logging.info( string )
        print "WARNING: %s" % ( string )
        logging.info( "finished" )
        sys.exit(0)

    nbSeqSbj = FastaUtils.dbSize( sbjFileName )
    if sbjFileName != qryFileName:
        nbSeqSbj = FastaUtils.dbSize( sbjFileName )
        if nbSeqSbj == 0:
            string = "subject file is empty"
            logging.info( string )
            print "WARNING: %s" % ( string )
            logging.info( "finished" )
Пример #5
0
        elif o == "-v":
            verbose = int(a)

    if inData == "" or ( not os.path.isfile( inData ) \
                         and not os.path.isdir( inData ) ):
        msg = "ERROR: missing input file or directory (-i or -I)"
        sys.stderr.write( "%s\n" % msg )
        help()
        sys.exit(1)

    if outData == "":
        print "ERROR: missing name of output file or directory (-o or -O)"
        help()
        sys.exit(1)

    if verbose > 0:
        print "START %s" % ( sys.argv[0].split("/")[-1] )
        sys.stdout.flush()
        
    FastaUtils.dbShuffle( inData, outData, verbose )
    
    if verbose > 0:
        print "END %s" % ( sys.argv[0].split("/")[-1] )
        sys.stdout.flush()

    return 0


if __name__ == "__main__":
    main()
Пример #6
0
        print "prepare MSP file"; sys.stdout.flush()
    prg = os.environ["REPET_PATH"] + "/bin/align2recon.py"
    cmd = prg
    cmd += " -i %s" % ( alignFileName )
    cmd += " -o %s_MSP_file" % ( projectName )
    cmd += " -v %i" % ( verbose - 1 )
    returnStatus = os.system( cmd )
    if returnStatus != 0:
        msg = "ERROR: '%s' returned '%i'" % ( prg, returnStatus )
        sys.stderr.write( "%s\n" % ( msg ) )
        sys.exit(1)


    if verbose > 0:
        print "prepare seq file"; sys.stdout.flush()
    lSeqNames = FastaUtils.dbHeaders( faFileName, verbose - 1 )
    lSeqNames.sort()
    seqListFile = open( projectName + "_seq_list", "w" )
    seqListFile.write( str(len(lSeqNames)) + "\n" )
    for seqName in lSeqNames:
        seqListFile.write( ">%s\n" % ( seqName ) )
    seqListFile.close()


    if verbose > 0:
        print "Recon is running..."; sys.stdout.flush()
    prg = "recon.pl"
    cmd = prg
    cmd += " %s_seq_list" % ( projectName )
    cmd += " %s_MSP_file" % ( projectName )
    cmd += " 1"
Пример #7
0
        elif o == "-d":
            newDir = True
        elif o == "-s":
            useSeqHeader = True
        elif o == "-p":
            prefix = a
        elif o == "-v":
            verbose = int(a)

    if inFile == "":
        msg = "ERROR: missing input file (-i)"
        sys.stderr.write( "%s\n" % ( msg ) )
        help()
        sys.exit(1)

    if verbose > 0:
        print "START %s" % ( sys.argv[0].split("/")[-1] )
        sys.stdout.flush()

    FastaUtils.dbSplit( inFile, nbSeqPerBatch, newDir, useSeqHeader, prefix, verbose )

    if verbose > 0:
        print "END %s" % ( sys.argv[0].split("/")[-1] )
        sys.stdout.flush()

    return 0


if __name__ == "__main__":
    main()
Пример #8
0
        sys.stderr.write( "%s\n" % ( msg ) )
        help()
        sys.exit(1)
    if clusteringMethod == "":
        msg = "ERROR: missing clustering method (-c)"
        sys.stderr.write( "%s\n" % ( msg ) )
        help()
        sys.exit(1)
    if not os.path.exists( inFileName ):
        msg = "ERROR: can't find file '%s'" % ( inFileName )
        sys.stderr.write( "%s\n" % ( msg ) )
        help()
        sys.exit(1)

    if verbose > 0:
        print "START %s" % (sys.argv[0].split("/")[-1])
        sys.stdout.flush()

    FastaUtils.splitSeqPerCluster( inFileName, clusteringMethod, simplifyHeader,
                                   createDir, "seqCluster", verbose )

    if verbose > 0:
        print "END %s" % (sys.argv[0].split("/")[-1])
        sys.stdout.flush()

    return 0


if __name__ == "__main__":
    main()