def _classify(self, mode, inputFastaFile, outLog=None):

        mothur = os.path.join(os.path.normpath(self._config.get('mothurInstallDir')), 'mothur')

        if mode == 16:
            extractedRegionsFasta = os.path.join(self._workingDir, str(os.path.basename(inputFastaFile) + '.16S_rRNA.fna'))
            taxonomyFile = os.path.join(self._refDir, os.path.normpath(self._refDict[('16S_rRNA','taxonomyDNA')][0]))
            templateFile = os.path.join(self._refDir, os.path.normpath(self._refDict[('16S_rRNA','templateDNA')][0]))
            #mothurPredFileName = str(extractedRegionsFasta[0:extractedRegionsFasta.rindex('.')] + '.taxonomy')
            mothurPredFileName = common.getMothurOutputFilePath(extractedRegionsFasta, taxonomyFile)
            predFileName = os.path.join(self._workingDir, str(os.path.basename(inputFastaFile) + '.16P'))

            #extractedRegionsFasta = str(inputFastaFile + '.16S_rRNA.fna')
            #templateFile = os.path.normpath(self._configRRNA16S.get('mothurClassifyParam16STemplate'))
            #taxonomyFile = os.path.normpath(self._configRRNA16S.get('mothurClassifyParam16STaxonomy'))
            #mothurPredFileName = str(inputFastaFile + '.16S_rRNA.bacteria+archaea.taxonomy')
            #mothurPredFileName = os.path.join(self._workingDir, str(os.path.basename(inputFastaFile) + '.16S_rRNA.bacteria+archaea.taxonomy'))
            #mothurPredFileName = os.path.join(self._workingDir, str(os.path.basename(inputFastaFile) + '.16S_rRNA.fasta.taxonomy'))
            #predFileName = str(inputFastaFile + '.16P')
        elif mode == 23:
            extractedRegionsFasta = os.path.join(self._workingDir, str(os.path.basename(inputFastaFile) + '.23S_rRNA.fna'))
            taxonomyFile = os.path.join(self._refDir, os.path.normpath(self._refDict[('23S_rRNA','taxonomyDNA')][0]))
            templateFile = os.path.join(self._refDir, os.path.normpath(self._refDict[('23S_rRNA','templateDNA')][0]))
            #mothurPredFileName = str(extractedRegionsFasta[0:extractedRegionsFasta.rindex('.')] + '.taxonomy')
            mothurPredFileName = common.getMothurOutputFilePath(extractedRegionsFasta, taxonomyFile)
            predFileName = os.path.join(self._workingDir, str(os.path.basename(inputFastaFile) + '.23P'))

            #extractedRegionsFasta = str(inputFastaFile + '.23S_rRNA.fna')
            #templateFile = os.path.normpath(self._configRRNA16S.get('mothurClassifyParam23STemplate'))
            #taxonomyFile = os.path.normpath(self._configRRNA16S.get('mothurClassifyParam23STaxonomy'))
            #mothurPredFileName = str(inputFastaFile + '.23S_rRNA.bacteria+archaea.taxonomy')
            #mothurPredFileName = os.path.join(self._workingDir, str(os.path.basename(inputFastaFile) + '.23S_rRNA.bacteria+archaea.taxonomy'))
            #mothurPredFileName = os.path.join(self._workingDir, str(os.path.basename(inputFastaFile) + '.23S_rRNA.fasta.taxonomy'))
            #predFileName = str(inputFastaFile + '.23P')
        elif mode == 5:
            #extractedRegionsFasta = str(inputFastaFile + '.5S_rRNA.fna')
            extractedRegionsFasta = os.path.join(self._workingDir, str(os.path.basename(inputFastaFile) + '.5S_rRNA.fna'))
            taxonomyFile = os.path.join(self._refDir, os.path.normpath(self._refDict[('5S_rRNA','taxonomyDNA')][0]))
            templateFile = os.path.join(self._refDir, os.path.normpath(self._refDict[('5S_rRNA','templateDNA')][0]))
            mothurPredFileName = common.getMothurOutputFilePath(extractedRegionsFasta, taxonomyFile)
            predFileName = os.path.join(self._workingDir, str(os.path.basename(inputFastaFile) + '.5P'))

            #templateFile = os.path.normpath(self._configRRNA16S.get('mothurClassifyParam5STemplate'))
            #taxonomyFile = os.path.normpath(self._configRRNA16S.get('mothurClassifyParam5STaxonomy'))
            #mothurPredFileName = os.path.join(self._workingDir,
            #                                  str(os.path.basename(inputFastaFile) + '.5S_rRNA.' + os.path.basename(taxonomyFile) + 'onomy'))#.taxonomy
            #predFileName = str(inputFastaFile + '.5P')

        else:
            raise Exception('Wrong branch')

        if not os.path.isfile(mothurPredFileName):
            mothurPredFileName = common.getMothurOutputFilePath(extractedRegionsFasta, taxonomyFile, suffix='.bayesian.taxonomy')

        param = self._config.get('mothurClassifyParamOther')

        cmd = str('time ' + mothur + ' "#classify.seqs(fasta=' + extractedRegionsFasta + ', template=' + templateFile
                + ', taxonomy=' + taxonomyFile + ', ' + param + ')"')

        if os.name == 'posix':
            if outLog is not None:
                stdoutLog = open(outLog, 'w')
            else:
                stdoutLog = subprocess.STDOUT
            mothurProc = subprocess.Popen(cmd, shell=True, bufsize=-1, cwd=self._workingDir, stdout=stdoutLog)
            print 'run cmd:', cmd
            mothurProc.wait()
            if outLog is not None:
                stdoutLog.close()
            print 'mothur return code:', mothurProc.returncode
            if mothurProc.returncode != 0:
                raise Exception("Command returned with non-zero %s status: %s" % (mothurProc.returncode, cmd))
        else:
            print 'Cannot run mothur since your system is not "posix" but', str('"' + os.name + '"'), '\n', cmd



        #transform mothur prediction files to the tab separated files
        self.mothurPredToTabSepPred(mothurPredFileName, predFileName)
示例#2
0
    def runMarkerGeneAnalysis(self, fastaFileDNA, outLog=None):
        """
            Run hmmer HMM and mothur classify (bayesian), same param as for the 16S analysis.
        """
        #read list of marker genes
        mgFiles = forEachLine(self.markerGeneListFile, _MgFiles(self.markerGeneListFileDir))

        #translate DNA to protein sequences
        fastaFileProt = os.path.join(self.markerGeneWorkingDir, str(os.path.basename(fastaFileDNA) + '.PROT'))
        dnaToProt(fastaFileDNA, fastaFileProt)

        #read DNA fasta file
        try:
            handle = open(fastaFileDNA, "rU")
            dnaSeqDict = SeqIO.to_dict(SeqIO.parse(handle, "fasta"))
            handle.close()
        except Exception:
            sys.stderr.write(str('Cannot read file: ' + str(fastaFileDNA)))
            raise

        #to output all predictions in one file
        outPredAllFileName = os.path.join(self.markerGeneWorkingDir,
                                           str(os.path.basename(fastaFileDNA) + '_all.mP'))
        outAllBuffer = OutFileBuffer(outPredAllFileName)

        #run HMM search
        mgList = mgFiles.getGeneNameList()

        if outLog is not None:
            stdoutLog = open(outLog,'w')
        else:
            stdoutLog = subprocess.STDOUT

        #for each gene perform the analysis separately
        for geneName in mgList:

            domFileArray = [os.path.join(self.markerGeneWorkingDir, str(geneName + '_1.dom')),
                            os.path.join(self.markerGeneWorkingDir, str(geneName + '_2.dom'))]
            outFileArray = [os.path.join(self.markerGeneWorkingDir, str(geneName + '_1.out')),
                            os.path.join(self.markerGeneWorkingDir, str(geneName + '_2.out'))]
            hmmFileArray = [mgFiles.getFilePath(geneName, 'hmmPROTPrim'),
                            mgFiles.getFilePath(geneName, 'hmmPROTSec')]
            cmdArray = list([])

            #define cmd
            for i in range(2):
                if hmmFileArray[i] is not None:
                    cmdArray.append(str(os.path.join(self.hmmerBinDir, 'hmmsearch') + ' --domtblout ' + domFileArray[i] + ' -E 0.01'
                               + ' -o ' + outFileArray[i] + ' ' + hmmFileArray[i] + ' ' + fastaFileProt))
                else:
                    cmdArray.append(None)

            #run cmd
            for cmd in cmdArray:
                if cmd is not None and os.name == 'posix':
                    hmmProc = subprocess.Popen(cmd, shell=True, bufsize=-1, cwd=self.hmmInstallDir, stdout=stdoutLog)
                    print 'run cmd:', cmd
                    hmmProc.wait()
                    print 'HMM  return code:', hmmProc.returncode
                    if hmmProc.returncode != 0:
                        raise Exception("Command returned with non-zero %s status: %s" % (hmmProc.returncode, cmd))
                else:
                    print 'Marker genes analysis, doesn`t run (no posix): ', cmd


            #get regions that match to the HMM profile ()
            entryDictList = []
            for i in range(2):
                if cmdArray[i] is not None:
                    entryDictList.append(forEachLine(domFileArray[i], _MgRegions()).getEntryDict())
                else:
                    entryDictList.append(None)

            entryDict1 = entryDictList[0]
            entryDict2 = entryDictList[1]

            #extract regions found in the protein sequences that were found by the HMM and generate corresponding DNA sequences
            regionDnaFasta = os.path.join(self.markerGeneWorkingDir, str(geneName + '_dna.gff'))
            outFileBuffer = OutFileBuffer(regionDnaFasta)

            for seqName in entryDict1:
                i = -1
                for e in entryDict1[seqName]:
                    i += 1
                    from1 = entryDict1[seqName][i][0]
                    to1 = entryDict1[seqName][i][1]
                    assert ((from1 != None) and (to1 != None))
                    #compare the results found by the primary and secondary HMM profiles
                    if (entryDict2 != None) and (seqName in entryDict2):
                        if len(entryDict2[seqName]) >= (i+1):
                            from2 = entryDict2[seqName][i][0]
                            to2 = entryDict2[seqName][i][1]
                            #if from1 != from2 or to1 != to2:
                            #    print str('Different positions in' + seqName + ' from1:' + str(from1) + ' from2:' + str(from2)
                            #                + ' to1:' + str(to1) + ' to2:' + str(to2))

                    #extract regions from the DNA sequences (consider 3 ORF and reverse complements)

                    #name of the whole sequence
                    dnaSeqName = re.sub(r'([0-9]+_[0-9]+)_[pr]+[012]', r'\1', seqName)
                    #whole DNA sequence
                    dnaSeq = dnaSeqDict[dnaSeqName].seq

                    #reverse complement (contains "pr")
                    tagRev = 'p'
                    if re.match(r'[0-9]+_[0-9]+_pr[012]', seqName):
                        dnaSeq = dnaSeq.reverse_complement()
                        tagRev = 'pr'

                    #shift "0"
                    if re.match(r'[0-9]+_[0-9]+_[pr]+0', seqName):
                        tagFrom = ((from1 - 1)*3)
                        tagTo = (to1*3)
                        tagRev += '0'
                        dnaSeq = dnaSeq[tagFrom:tagTo]

                    #shift "1"
                    elif re.match(r'[0-9]+_[0-9]+_[pr]+1', seqName):
                        tagFrom = (((from1 - 1)*3) + 1)
                        tagTo = ((to1*3) + 1)
                        tagRev += '1'
                        dnaSeq = dnaSeq[tagFrom:tagTo]

                    #shift "2"
                    elif re.match(r'[0-9]+_[0-9]+_[pr]+2', seqName):
                        tagFrom = (((from1 - 1)*3) + 2)
                        tagTo = ((to1*3) + 2)
                        tagRev += '2'
                        dnaSeq = dnaSeq[tagFrom:tagTo]

                    #error
                    else:
                        sys.stderr.write('Wrong seq name: ' + seqName + ' \n')
                        dnaSeq = None

                    tag = str(str(tagFrom) + '_' + str(tagTo) + '_' + tagRev)
                    outFileBuffer.writeText(str('>' + dnaSeqName + '_' + tag + '\n' + dnaSeq + '\n'))

            outFileBuffer.close()

            #if no marker gene found
            if outFileBuffer.isEmpty():
                continue

            #run mothur classify (bayesian? the same as for the 16S analysis)
            templateFile = mgFiles.getFilePath(geneName, 'templateDNA')
            taxonomyFile = mgFiles.getFilePath(geneName, 'taxonomyDNA')
            assert ((templateFile is not None) and (taxonomyFile is not None))
            cmd = str('time ' + self.mothur + ' "#classify.seqs(fasta=' + regionDnaFasta + ', template=' + templateFile
                + ', taxonomy=' +  taxonomyFile + ', ' + self.mothurParam + ')"')
            if os.name == 'posix':
                mothurProc = subprocess.Popen(cmd, shell=True, bufsize=-1, cwd=self.markerGeneWorkingDir, stdout=stdoutLog)
                print 'run cmd:', cmd
                mothurProc.wait()
                print 'mothur return code:', mothurProc.returncode
                if mothurProc.returncode != 0:
                    raise Exception("Command returned with non-zero %s status: %s" % (mothurProc.returncode, cmd))
            else:
                print 'Cannot run mothur since your system is not "posix" but', str('"' + os.name + '"'), '\n', cmd

            #transform the mothur output to a simple output (name, ncbid, weight)

            #mothurPredFileName = os.path.join(self.markerGeneWorkingDir,
            #                                  str(geneName + '_dna.' + os.path.basename(taxonomyFile) + 'onomy'))  # taxonomy
            #!!!!!!!!!!!!!
            mothurPredFileName = common.getMothurOutputFilePath(regionDnaFasta, taxonomyFile)
            if not os.path.isfile(mothurPredFileName):
                mothurPredFileName = common.getMothurOutputFilePath(regionDnaFasta, taxonomyFile, suffix='.bayesian.taxonomy')
                if not os.path.isfile(mothurPredFileName):
                    print("Can't open file: %s" % mothurPredFileName)

            outPredFileName = os.path.join(self.markerGeneWorkingDir,
                                           str(os.path.basename(fastaFileDNA) + '_' + geneName + '.mP'))
            outBuffer = OutFileBuffer(outPredFileName, bufferText=True)
            forEachLine(mothurPredFileName, _MothurOutFileParser(outBuffer, geneName))

            if not outAllBuffer.isEmpty():
                outAllBuffer.writeText('\n')
            outAllBuffer.writeText(outBuffer.getTextBuffer())

        if outLog is not None:
            stdoutLog.close()
        outAllBuffer.close()
示例#3
0
    def runMarkerGeneAnalysis(self, fastaFileDNA, outLog=None):
        """
            Run hmmer HMM and mothur classify (bayesian), same param as for the 16S analysis.
        """
        #read list of marker genes
        mgFiles = forEachLine(self.markerGeneListFile, _MgFiles(self.markerGeneListFileDir))

        #translate DNA to protein sequences
        fastaFileProt = os.path.join(self.markerGeneWorkingDir, str(os.path.basename(fastaFileDNA) + '.PROT'))
        dnaToProt(fastaFileDNA, fastaFileProt)

        #read DNA fasta file
        try:
            handle = open(fastaFileDNA, "rU")
            dnaSeqDict = SeqIO.to_dict(SeqIO.parse(handle, "fasta"))
            handle.close()
        except Exception:
            sys.stderr.write(str('Cannot read file: ' + str(fastaFileDNA)))
            raise

        #to output all predictions in one file
        outPredAllFileName = os.path.join(self.markerGeneWorkingDir,
                                           str(os.path.basename(fastaFileDNA) + '_all.mP'))
        outAllBuffer = OutFileBuffer(outPredAllFileName)

        #run HMM search
        mgList = mgFiles.getGeneNameList()

        if outLog is not None:
            stdoutLog = open(outLog, 'w')
        else:
            stdoutLog = subprocess.STDOUT

        #for each gene perform the analysis separately
        for geneName in mgList:

            domFileArray = [os.path.join(self.markerGeneWorkingDir, str(geneName + '_1.dom'))]  #,
                            # os.path.join(self.markerGeneWorkingDir, str(geneName + '_2.dom'))]
            outFileArray = [os.path.join(self.markerGeneWorkingDir, str(geneName + '_1.out'))]  #,
                            # os.path.join(self.markerGeneWorkingDir, str(geneName + '_2.out'))]
            hmmFileArray = [mgFiles.getFilePath(geneName, 'hmmPROTPrim')]  #,
                            # mgFiles.getFilePath(geneName, 'hmmPROTSec')]
            cmdArray = list([])

            #define cmd
            for i in range(1):
                if hmmFileArray[i] is not None:
                    cmdArray.append(str(os.path.join(self.hmmerBinDir, 'hmmsearch') + ' --domtblout ' + domFileArray[i] + ' -E 0.01' + self.processorsHmm
                               + ' -o ' + outFileArray[i] + ' ' + hmmFileArray[i] + ' ' + fastaFileProt))
                else:
                    cmdArray.append(None)

            #run cmd
            for cmd in cmdArray:
                if cmd is not None and os.name == 'posix':

                    cwd = self.hmmInstallDir

                    if parallel.reportFailedCmd(parallel.runCmdSerial([parallel.TaskCmd(cmd, cwd)])) is not None:
                        sys.exit(-1)

                    # hmmProc = subprocess.Popen(cmd, shell=True, bufsize=-1, cwd=self.hmmInstallDir, stdout=stdoutLog)
                    # print 'run cmd:', cmd
                    # hmmProc.wait()
                    # print 'HMM  return code:', hmmProc.returncode
                    # if hmmProc.returncode != 0:
                    #     raise Exception("Command returned with non-zero %s status: %s" % (hmmProc.returncode, cmd))


                else:
                    print 'Marker genes analysis, doesn`t run (no posix): ', cmd


            #get regions that match to the HMM profile ()
            entryDictList = []
            for i in range(1):
                if cmdArray[i] is not None:
                    entryDictList.append(forEachLine(domFileArray[i], _MgRegions()).getEntryDict())
                else:
                    entryDictList.append(None)

            entryDict1 = entryDictList[0]
            # entryDict2 = entryDictList[1]

            #extract regions found in the protein sequences that were found by the HMM and generate corresponding DNA sequences
            regionDnaFasta = os.path.join(self.markerGeneWorkingDir, str(geneName + '_dna.gff'))
            outFileBuffer = OutFileBuffer(regionDnaFasta)

            for seqName in entryDict1:
                i = -1
                for e in entryDict1[seqName]:
                    i += 1
                    from1 = entryDict1[seqName][i][0]
                    to1 = entryDict1[seqName][i][1]
                    assert ((from1 != None) and (to1 != None))
                    #compare the results found by the primary and secondary HMM profiles
                    # if (entryDict2 != None) and (seqName in entryDict2):
                    #     if len(entryDict2[seqName]) >= (i+1):
                    #         from2 = entryDict2[seqName][i][0]
                    #         to2 = entryDict2[seqName][i][1]
                            #if from1 != from2 or to1 != to2:
                            #    print str('Different positions in' + seqName + ' from1:' + str(from1) + ' from2:' + str(from2)
                            #                + ' to1:' + str(to1) + ' to2:' + str(to2))

                    #extract regions from the DNA sequences (consider 3 ORF and reverse complements)

                    #name of the whole sequence
                    dnaSeqName = re.sub(r'([0-9]+_[0-9]+)_[pr]+[012]', r'\1', seqName)
                    #whole DNA sequence
                    dnaSeq = dnaSeqDict[dnaSeqName].seq

                    #reverse complement (contains "pr")
                    tagRev = 'p'
                    if re.match(r'[0-9]+_[0-9]+_pr[012]', seqName):
                        dnaSeq = dnaSeq.reverse_complement()
                        tagRev = 'pr'

                    #shift "0"
                    if re.match(r'[0-9]+_[0-9]+_[pr]+0', seqName):
                        tagFrom = ((from1 - 1)*3)
                        tagTo = (to1*3)
                        tagRev += '0'
                        dnaSeq = dnaSeq[tagFrom:tagTo]

                    #shift "1"
                    elif re.match(r'[0-9]+_[0-9]+_[pr]+1', seqName):
                        tagFrom = (((from1 - 1)*3) + 1)
                        tagTo = ((to1*3) + 1)
                        tagRev += '1'
                        dnaSeq = dnaSeq[tagFrom:tagTo]

                    #shift "2"
                    elif re.match(r'[0-9]+_[0-9]+_[pr]+2', seqName):
                        tagFrom = (((from1 - 1)*3) + 2)
                        tagTo = ((to1*3) + 2)
                        tagRev += '2'
                        dnaSeq = dnaSeq[tagFrom:tagTo]

                    #error
                    else:
                        sys.stderr.write('Wrong seq name: ' + seqName + ' \n')
                        dnaSeq = None

                    tag = str(str(tagFrom) + '_' + str(tagTo) + '_' + tagRev)
                    outFileBuffer.writeText(str('>' + dnaSeqName + '_' + tag + '\n' + dnaSeq + '\n'))

            outFileBuffer.close()

            #if no marker gene found
            if outFileBuffer.isEmpty():
                continue

            #run mothur classify (bayesian? the same as for the 16S analysis)
            templateFile = mgFiles.getFilePath(geneName, 'templateDNA')
            taxonomyFile = mgFiles.getFilePath(geneName, 'taxonomyDNA')
            assert ((templateFile is not None) and (taxonomyFile is not None))
            cmd = str('' + self.mothur + ' "#classify.seqs(fasta=' + regionDnaFasta + ', template=' + templateFile
                + ', taxonomy=' +  taxonomyFile + ', ' + self.mothurParam + ')"')
            if os.name == 'posix':

                print('Mothur processing: %s' % os.path.basename(templateFile).split('_', 1)[0])

                cwd = self.markerGeneWorkingDir

                if parallel.reportFailedCmd(parallel.runCmdSerial([parallel.TaskCmd(cmd, cwd, stdout=stdoutLog)])) is not None:
                    sys.exit(-1)

                # mothurProc = subprocess.Popen(cmd, shell=True, bufsize=-1, cwd=self.markerGeneWorkingDir, stdout=stdoutLog)
                # print 'run cmd:', cmd
                # mothurProc.wait()
                # print 'mothur return code:', mothurProc.returncode
                # if mothurProc.returncode != 0:
                #     raise Exception("Command returned with non-zero %s status: %s" % (mothurProc.returncode, cmd))

            else:
                print 'Cannot run mothur since your system is not "posix" but', str('"' + os.name + '"'), '\n', cmd

            #transform the mothur output to a simple output (name, ncbid, weight)

            #mothurPredFileName = os.path.join(self.markerGeneWorkingDir,
            #                                  str(geneName + '_dna.' + os.path.basename(taxonomyFile) + 'onomy'))  # taxonomy
            #!!!!!!!!!!!!!
            mothurPredFileName = common.getMothurOutputFilePath(regionDnaFasta, taxonomyFile)
            if not os.path.isfile(mothurPredFileName):
                mothurPredFileName = common.getMothurOutputFilePath(regionDnaFasta, taxonomyFile, suffix='.bayesian.taxonomy')
                if not os.path.isfile(mothurPredFileName):
                    print("Can't open file: %s" % mothurPredFileName)

            outPredFileName = os.path.join(self.markerGeneWorkingDir,
                                           str(os.path.basename(fastaFileDNA) + '_' + geneName + '.mP'))
            outBuffer = OutFileBuffer(outPredFileName, bufferText=True)
            forEachLine(mothurPredFileName, _MothurOutFileParser(outBuffer, geneName))

            if not outAllBuffer.isEmpty():
                outAllBuffer.writeText('\n')
            outAllBuffer.writeText(outBuffer.getTextBuffer())

        if outLog is not None:
            stdoutLog.close()
        outAllBuffer.close()
示例#4
0
    def _classify(self, mode, inputFastaFile, outLog=None):

        mothur = os.path.join(os.path.normpath(self._config.get('mothurInstallDir')), 'mothur')

        if mode == 16:
            extractedRegionsFasta = os.path.join(self._workingDir, str(os.path.basename(inputFastaFile) + '.16S_rRNA.fna'))
            taxonomyFile = os.path.join(self._refDir, os.path.normpath(self._refDict[('16S_rRNA','taxonomyDNA')][0]))
            templateFile = os.path.join(self._refDir, os.path.normpath(self._refDict[('16S_rRNA','templateDNA')][0]))
            #mothurPredFileName = str(extractedRegionsFasta[0:extractedRegionsFasta.rindex('.')] + '.taxonomy')
            mothurPredFileName = common.getMothurOutputFilePath(extractedRegionsFasta, taxonomyFile)
            predFileName = os.path.join(self._workingDir, str(os.path.basename(inputFastaFile) + '.16P'))

            #extractedRegionsFasta = str(inputFastaFile + '.16S_rRNA.fna')
            #templateFile = os.path.normpath(self._configRRNA16S.get('mothurClassifyParam16STemplate'))
            #taxonomyFile = os.path.normpath(self._configRRNA16S.get('mothurClassifyParam16STaxonomy'))
            #mothurPredFileName = str(inputFastaFile + '.16S_rRNA.bacteria+archaea.taxonomy')
            #mothurPredFileName = os.path.join(self._workingDir, str(os.path.basename(inputFastaFile) + '.16S_rRNA.bacteria+archaea.taxonomy'))
            #mothurPredFileName = os.path.join(self._workingDir, str(os.path.basename(inputFastaFile) + '.16S_rRNA.fasta.taxonomy'))
            #predFileName = str(inputFastaFile + '.16P')
        elif mode == 23:
            extractedRegionsFasta = os.path.join(self._workingDir, str(os.path.basename(inputFastaFile) + '.23S_rRNA.fna'))
            taxonomyFile = os.path.join(self._refDir, os.path.normpath(self._refDict[('23S_rRNA','taxonomyDNA')][0]))
            templateFile = os.path.join(self._refDir, os.path.normpath(self._refDict[('23S_rRNA','templateDNA')][0]))
            #mothurPredFileName = str(extractedRegionsFasta[0:extractedRegionsFasta.rindex('.')] + '.taxonomy')
            mothurPredFileName = common.getMothurOutputFilePath(extractedRegionsFasta, taxonomyFile)
            predFileName = os.path.join(self._workingDir, str(os.path.basename(inputFastaFile) + '.23P'))

            #extractedRegionsFasta = str(inputFastaFile + '.23S_rRNA.fna')
            #templateFile = os.path.normpath(self._configRRNA16S.get('mothurClassifyParam23STemplate'))
            #taxonomyFile = os.path.normpath(self._configRRNA16S.get('mothurClassifyParam23STaxonomy'))
            #mothurPredFileName = str(inputFastaFile + '.23S_rRNA.bacteria+archaea.taxonomy')
            #mothurPredFileName = os.path.join(self._workingDir, str(os.path.basename(inputFastaFile) + '.23S_rRNA.bacteria+archaea.taxonomy'))
            #mothurPredFileName = os.path.join(self._workingDir, str(os.path.basename(inputFastaFile) + '.23S_rRNA.fasta.taxonomy'))
            #predFileName = str(inputFastaFile + '.23P')
        elif mode == 5:
            #extractedRegionsFasta = str(inputFastaFile + '.5S_rRNA.fna')
            extractedRegionsFasta = os.path.join(self._workingDir, str(os.path.basename(inputFastaFile) + '.5S_rRNA.fna'))
            taxonomyFile = os.path.join(self._refDir, os.path.normpath(self._refDict[('5S_rRNA', 'taxonomyDNA')][0]))
            templateFile = os.path.join(self._refDir, os.path.normpath(self._refDict[('5S_rRNA', 'templateDNA')][0]))
            mothurPredFileName = common.getMothurOutputFilePath(extractedRegionsFasta, taxonomyFile)
            predFileName = os.path.join(self._workingDir, str(os.path.basename(inputFastaFile) + '.5P'))

            #templateFile = os.path.normpath(self._configRRNA16S.get('mothurClassifyParam5STemplate'))
            #taxonomyFile = os.path.normpath(self._configRRNA16S.get('mothurClassifyParam5STaxonomy'))
            #mothurPredFileName = os.path.join(self._workingDir,
            #                                  str(os.path.basename(inputFastaFile) + '.5S_rRNA.' + os.path.basename(taxonomyFile) + 'onomy'))#.taxonomy
            #predFileName = str(inputFastaFile + '.5P')

        else:
            raise Exception('Wrong branch')

        if not os.path.isfile(mothurPredFileName):
            mothurPredFileName = common.getMothurOutputFilePath(extractedRegionsFasta, taxonomyFile, suffix='.bayesian.taxonomy')

        param = self._config.get('mothurClassifyParamOther')

        cmd = str(mothur + ' "#classify.seqs(fasta=' + extractedRegionsFasta + ', template=' + templateFile
                + ', taxonomy=' + taxonomyFile + ', ' + param + ')"')

        if os.name == 'posix':

            print('Mothur processing: %s' % os.path.basename(templateFile).split('_', 1)[0])

            cwd = self._workingDir

            if outLog is not None:
                stdoutLog = open(outLog, 'w')
            else:
                stdoutLog = subprocess.STDOUT

            if parallel.reportFailedCmd(parallel.runCmdSerial([parallel.TaskCmd(cmd, cwd, stdout=stdoutLog)])) is not None:
                sys.exit(-1)

            if outLog is not None:
                stdoutLog.close()

            # mothurProc = subprocess.Popen(cmd, shell=True, bufsize=-1, cwd=self._workingDir, stdout=stdoutLog)
            # print 'run cmd:', cmd
            # mothurProc.wait()
            # if outLog is not None:
            #     stdoutLog.close()
            # print 'mothur return code:', mothurProc.returncode
            # if mothurProc.returncode != 0:
            #     raise Exception("Command returned with non-zero %s status: %s" % (mothurProc.returncode, cmd))

        else:
            print 'Cannot run mothur since your system is not "posix" but', str('"' + os.name + '"'), '\n', cmd



        #transform mothur prediction files to the tab separated files
        self.mothurPredToTabSepPred(mothurPredFileName, predFileName)