#create log file log_name = args.logfile if os.path.isfile(log_name): os.remove(log_name) #initialize script, log system info and cmd issue at runtime lib.setupLogging(log_name) FNULL = open(os.devnull, 'w') cmd_args = " ".join(sys.argv)+'\n' lib.log.debug(cmd_args) #create tmpdir to store fasta files and output files TMPDIR = 'phobius_' + str(os.getpid()) #split fasta lib.splitFASTA(args.input, TMPDIR) #now get list of files in tmpdir proteins = [] for file in os.listdir(TMPDIR): if file.endswith('.fa'): proteins.append(file) #now run the script if lib.which('phobius.pl'): lib.runMultiProgress(runPhobiusLocal, proteins, multiprocessing.cpu_count()) else: lib.runMultiProgress(runPhobiusRemote, proteins, 29) #max is 30 jobs at a time #collect all results phobius = []
subprocess.call([ os.path.join(parentdir, 'util', 'phobius-multiproc.py'), '-i', Proteins, '-o', phobius_out, '-l', phobiusLog ]) if 'interproscan' in args.methods or 'all' in args.methods: IPRCombined = os.path.join(outputdir, 'annotate_misc', 'iprscan.xml') #run interpro scan IPROUT = os.path.join(outputdir, 'annotate_misc', 'iprscan') PROTS = os.path.join(outputdir, 'annotate_misc', 'protein_tmp') for i in IPROUT, PROTS: if not os.path.exists(i): os.makedirs(i) #now run interproscan #split input into individual files lib.splitFASTA(Proteins, PROTS) #now iterate over list using pool and up to 25 submissions at a time proteins = [] for file in os.listdir(PROTS): if file.endswith('.fa'): file = os.path.join(PROTS, file) proteins.append(file) num_files = len(glob.glob1(IPROUT, "*.xml")) num_prots = len(proteins) lib.log.info( "Now running InterProScan search remotely using EBI servers on " + '{0:,}'.format(num_prots) + ' proteins') #build in a check before running (in case script gets stopped and needs to restart finished = [] for file in os.listdir(IPROUT):
num_annotations = lib.line_count(signalp_out) lib.log.info('{0:,}'.format(num_annotations) + ' annotations added') else: lib.log.info("SignalP not installed, skipping") if not args.skip_iprscan: if not args.iprscan: #run interpro scan IPROUT = os.path.join(outputdir, 'annotate_misc', 'iprscan') PROTS = os.path.join(outputdir, 'annotate_misc', 'protein_tmp') for i in IPROUT,PROTS: if not os.path.exists(i): os.makedirs(i) #now run interproscan #split input into individual files lib.splitFASTA(Proteins, PROTS) #now iterate over list using pool and up to 25 submissions at a time proteins = [] for file in os.listdir(PROTS): if file.endswith('.fa'): file = os.path.join(PROTS, file) proteins.append(file) num_files = len(glob.glob1(IPROUT,"*.xml")) num_prots = len(proteins) lib.log.info("Now running InterProScan search remotely using EBI servers on " + '{0:,}'.format(num_prots) + ' proteins') while (num_files < num_prots): #build in a check before running (in case script gets stopped and needs to restart finished = [] for file in os.listdir(IPROUT):