示例#1
0
 def __init__(self,
              organisms,
              ncpus=1,
              evalue=1e-10,
              recover=False,
              prefix='',
              matrix='BLOSUM80',
              queue=Queue.Queue()):
     CommonMultiProcess.__init__(self, ncpus, queue)
     # Blast
     self.organisms = list(organisms)
     self.dbs = {}
     self._prot2orgs = {}
     self.out = []
     self.evalue = float(evalue)
     # TODO: implement recovery
     self.recover = recover
     #
     self.results = {}
     self._blast = Blaster()
     self._pangenomeroom = None
     self.prefix = prefix.rstrip('_')
     self.matrix = matrix
     self._already = set()
     # Results
     self.orthologs = {}
     self.core = []
     self.accessory = []
     self.unique = []
示例#2
0
 def __init__(self,
              query,
              target,
              ncpus=1,
              evalue=1e-50,
              buildDB=True,
              bbh=True,
              recover=False,
              queue=Queue.Queue()):
     CommonMultiProcess.__init__(self, ncpus, queue)
     # Blast
     self.query = query
     if buildDB:
         self.target = target
         self.db = None
     else:
         self.target = None
         self.db = target
     self.out = []
     self.evalue = float(evalue)
     self.bbh = bool(bbh)
     self.recover = recover
     self.ncpus = int(ncpus)
     self._kohits = []
     self.results = {}
     self._keggroom = None
     self._blast = Blaster()
示例#3
0
    def serialBBH(self):
        orthindex = 1

        self._maxsubstatus = len(self._prot2orgs)

        for org in self.organisms:
            seqs = [seq for seq in SeqIO.parse(open(org), 'fasta')]
            # Iterate over each protein
            for seq in seqs:
                self._substatus += 1
                self.updateStatus(sub=True)

                # Log some info, might be useful for
                # long running jobs
                logger.debug('Running orthology prediction for protein %d/%d' %
                             (self._substatus, self._maxsubstatus))

                logger.debug('Organism: %s, Protein: %s' % (org, seq.id))

                if seq.id in self._already:
                    continue
                orthname = self.prefix + str(orthindex)
                orgsincluded = [org]
                self.orthologs[orthname] = [seq.id]
                query = '>%s\n%s\n' % (seq.id, str(seq.seq))

                self.initiateParallel()

                # Iterate over each other organism
                for otherorg in self.organisms:
                    if org == otherorg:
                        continue
                    # Go fot it!
                    if len(seq) < 30:
                        short = True
                    else:
                        short = False

                    uniqueid = self.getUniqueID()

                    # Multi process
                    obj = RunBBH(query,
                                 seq.id,
                                 self.dbs[org],
                                 self.dbs[otherorg],
                                 otherorg,
                                 self.evalue,
                                 self.matrix,
                                 short=short,
                                 uniqueid=uniqueid,
                                 useDisk=False)
                    self._paralleltasks.put(obj)

                # Poison pill to stop the workers
                self.addPoison()

                while True:
                    if self.killed:
                        logger.debug('Exiting for a kill signal')
                        return

                    while not self._parallelresults.empty():
                        if self.killed:
                            logger.debug('Exiting for a kill signal')
                            return

                        result = self._parallelresults.get()

                        if not result[2]:
                            logger.error(
                                'An error occurred for BBH on query %s' %
                                seq.id + ' and target %s' % result[1])
                            return False
                        if result[0] and result[0] not in self._already:
                            self.orthologs[orthname].append(result[0])
                            orgsincluded.append(result[1])
                            self._already.add(result[0])

                    if self.isTerminated():
                        break

                    self.sleeper.sleep(0.01)

                while not self._parallelresults.empty():
                    if self.killed:
                        logger.debug('Exiting for a kill signal')
                        return

                    result = self._parallelresults.get()

                    if not result[2]:
                        logger.error('An error occurred for BBH on query %s' %
                                     seq.id + ' and target %s' % result[1])
                        return False
                    if result[0] and result[0] not in self._already:
                        self.orthologs[orthname].append(result[0])
                        orgsincluded.append(result[1])
                        self._already.add(result[0])

                self.killParallel()

                if len(orgsincluded) < len(self.organisms):
                    logger.debug('Additional search on missing organisms for' +
                                 ' ortholog %s' % orthname)
                    for otherprotein in self.orthologs[orthname]:
                        if otherprotein == seq.id:
                            continue
                        neworg = self._prot2orgs[otherprotein]
                        if neworg == org:
                            continue

                        searcher = Blaster(useDisk=False)
                        searcher.retrieveFromDB(self.dbs[neworg], otherprotein)
                        query = searcher.retrieved

                        self.initiateParallel()

                        for evenneworg in self.organisms:
                            if evenneworg in orgsincluded:
                                continue
                            # Go fot it!
                            if len(seq) < 30:
                                short = True
                            else:
                                short = False

                            uniqueid = self.getUniqueID()

                            # Multi process
                            obj = RunBBH(query,
                                         otherprotein,
                                         self.dbs[neworg],
                                         self.dbs[evenneworg],
                                         evenneworg,
                                         self.evalue,
                                         self.matrix,
                                         short=short,
                                         uniqueid=uniqueid,
                                         useDisk=False)
                            self._paralleltasks.put(obj)

                        # Poison pill to stop the workers
                        self.addPoison()

                        while True:
                            if self.killed:
                                logger.debug('Exiting for a kill signal')
                                return

                            while not self._parallelresults.empty():
                                if self.killed:
                                    logger.debug('Exiting for a kill signal')
                                    return

                                result = self._parallelresults.get()

                                if not result[2]:
                                    logger.error(
                                        'An error occurred for BBH on query %s'
                                        % seq.id +
                                        ' and target %s' % result[1])
                                    return False
                                if result[0] and result[0] not in self._already:
                                    self.orthologs[orthname].append(result[0])
                                    orgsincluded.append(result[1])
                                    self._already.add(result[0])

                            if self.isTerminated():
                                break

                            self.sleeper.sleep(0.01)

                        while not self._parallelresults.empty():
                            if self.killed:
                                logger.debug('Exiting for a kill signal')
                                return

                            result = self._parallelresults.get()

                            if not result[2]:
                                logger.error(
                                    'An error occurred for BBH on query %s' %
                                    seq.id + ' and target %s' % result[1])
                                return False
                            if result[0] and result[0] not in self._already:
                                self.orthologs[orthname].append(result[0])
                                orgsincluded.append(result[1])
                                self._already.add(result[0])

                        self.killParallel()

                orthindex += 1
        return True