Python FastBucketQueue примеры использования

Язык программирования: Python

Пространство имен/Пакет: activeLearning.tools.bucketPQ

Класс/Тип: FastBucketQueue

Примеров на hotexamples.com: 2

Python FastBucketQueue - 2 примера найдено. Это лучшие примеры Python кода для activeLearning.tools.bucketPQ.FastBucketQueue, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

max(2)

pop(2)

push(2)

update(1)

Пример #1

Показать файл

Файл: approach_submodularWordCoverage_costRatio.py Проект: MartyMcFlyHasNoFarmHiaHiaHo/asractivelearning

    def _mainLoop(self, wordController, batchSizeSec):
        assert(batchSizeSec > 0.0)

        valueCoveredVectors = self._valueCoveredVectors

        # Init coveredPhonemes
        if self._coveredPhonemes is not None:
            coveredPhonemes = self._coveredPhonemes.copy()
        else:
            coveredPhonemes = Counter()
            # coveredPhonemes = sparse.csc_matrix((1, self.wordController._numPhonemes))


        # Prepare datastructures
        log("Computing submodular gains")

        candidateList = []
        gainDict = self._initGains(wordController, coveredPhonemes, valueCoveredVectors)
        gainQueue = FastBucketQueue()
        checkedCnWordSet = set()
        for (adapter, cnWordIdx), ratio in gainDict.iteritems():
            cnWord = adapter[cnWordIdx]
            if not cnWord._marked and not cnWord._transcribed and not cnWord.ignore:
                gainQueue.push((adapter, cnWordIdx), ratio)

        i = 0
        accumTime = 0.0
        accumTimeFrameDurations = 0.0
        try:
            log('Number of marked words: {0}, -> {1} percent marked'.format(self.wordController.getNumMarkedWords(),\
                                                                        self.wordController.getNumMarkedWords() / self.wordController.getNumWords() ))
        except:
            import ipdb; ipdb.set_trace()
        reportAfterSamples = 10000
        while len(gainQueue) > 0 and accumTime < batchSizeSec:

            # Get top element from list and add to selected set
            (adapter, cnWordIdx), candidateRatio = gainQueue.pop()
            candidateCnWord = adapter[cnWordIdx]

            # Update Gain, ratio, cost
            # if candidateCnWord not in checkedCnWordSet:
            candidateGain, candidateRatio, costs = self._computeRatioGain(adapter, cnWordIdx, coveredPhonemes, valueCoveredVectors)
            # debug("Costs: {0}".format(costs))
            # print("\n\nIteration {0}, word: {1}\nGain: {2}, Costs: {3}, Ratio: {4}, Frames: {5}".format(i, candidateCnWord.id, candidateGain, costs, candidateRatio, candidateCnWord.duration))
            # assert(candidateGain >= 0), "Gain must be non-negative in order to be submodular"

            # print "Queue.max(): {0}".format(gainQueue.max())
            if candidateCnWord in checkedCnWordSet or candidateRatio >= gainQueue.max():
                # print("--> Selected!")
                candidateList.append((adapter, cnWordIdx))
                candidateAdapter = adapter
                wordPos = cnWordIdx
                candidateAdapter.assignWordToSegment(wordPos)
                # coveredPhonemes.update(candidateCnWord._tfIdfDict)
                coveredPhonemes += candidateCnWord._tfIdfDict
                valueCoveredVectors += candidateGain
                checkedCnWordSet.clear()
                candidateCnWord._marked = True

                # Cost-insensitive way to compute accumTime
                #accumTime += candidateCnWord.durationSec
                accumTimeFrameDurations += candidateCnWord.durationSec

                # Cost-sensitive way to compute accumTime (inaccurate because costs can be lowered through forming segments)
                accumTime += costs
                candidateCnWord.cost = costs
                candidateCnWord.utility = candidateGain
                candidateCnWord.ratio = candidateRatio
                candidateCnWord._score = i

               


                # Update gain neighbor words
                leftSeg, rightSeg = candidateAdapter.getNeighborSegments(wordPos)
                # if leftSeg is None and wordPos > 0:
                if wordPos > 0 and not candidateAdapter[wordPos-1]._marked and not candidateAdapter[wordPos-1].ignore:
                    try:
                        neighGain, neighRatio, neighCosts = self._computeRatioGain(adapter, wordPos-1, coveredPhonemes, valueCoveredVectors)
                        gainQueue.update((adapter, wordPos-1), neighRatio)
                        adapter[wordPos-1].cost = neighCosts
                        adapter[wordPos-1].utility = neighGain
                        adapter[wordPos-1].ratio = neighRatio
                    except:
                        warn(("Adapter {0} wordpos {1}".format(adapter.cnId, wordPos)))
                        warn(adapter.visualizeWordsChain())
                        warn(adapter.visualizeSegments())
                        raise
                    # print("--> word: {1}\nGain: {2}, Costs: {3}, Ratio: {4}, Frames: {5}"\
                          # .format(i, leftWord.id, neighGain, neighCosts, neighRatio, leftWord.duration))

                # if rightSeg is None and wordPos < candidateAdapter.getNumWords()-1:
                if wordPos < candidateAdapter.getNumWords()-1 and not candidateAdapter[wordPos+1]._marked and not candidateAdapter[wordPos+1].ignore:
                    neighGain, neighRatio, neighCosts = self._computeRatioGain(adapter, wordPos+1, coveredPhonemes, valueCoveredVectors)
                    gainQueue.update((adapter, wordPos+1), neighRatio)
                    adapter[wordPos+1].cost = neighCosts
                    adapter[wordPos+1].utility = neighGain
                    adapter[wordPos+1].ratio = neighRatio
                    # print("--> word: {1}\nGain: {2}, Costs: {3}, Ratio: {4}, Frames: {5}"\
                          # .format(i, rightWord.id, neighGain, neighCosts, neighRatio, rightWord.duration))
                i += 1
                # print candidateAdapter.visualizeSegments()


                # # TODO Remove
                # for adapter, idx in candidateAdapter:
                #     newGain, newRatio, newCosts = self._computeRatioGain(adapter, idx, coveredPhonemes, valueCoveredVectors)
                #     adapter[idx].cost = newCosts
                #     adapter[idx].utility = newGain 
                #     adapter[idx].ratio = newRatio

                # TODO REMOVE
                # debug("\n{0}".format(adapter.visualizeSegments().encode('utf-8')))
                # debug("\nCandidate: \nRatio {0}\nUtility {1}\nCosts {2}".format(candidateRatio, candidateGain, costs))
                # TODO remove
                # import ipdb; ipdb.set_trace()

                if i%reportAfterSamples == 0:
                    log('Number of marked words: {0}, -> {1} percent marked'.format(i, float(i) / float(self.wordController.getNumWords()) ))

            else:
                # print("--> Refused!")
                gainQueue.push((adapter, cnWordIdx), candidateRatio)
                checkedCnWordSet.add(candidateCnWord)

            if len(gainQueue) <= 10:
                debug("Priority queue has only {0} elements left".format(len(gainQueue)))

        log("Sampled {0} hours of words \
                (annotation cost model roughly predicts {0} hours of annotation)".format(accumTimeFrameDurations/3600.0, accumTime))


        return candidateList, coveredPhonemes, valueCoveredVectors

Пример #2

Показать файл

Файл: approach_submodularWordCoverage.py Проект: MartyMcFlyHasNoFarmHiaHiaHo/asractivelearning

    def _mainLoop(self, wordController, batchSizeSec):
        assert(batchSizeSec > 0.0)

        valueCoveredVectors = self._valueCoveredVectors

        # Init coveredPhonemes
        if self._coveredPhonemes is not None:
            coveredPhonemes = self._coveredPhonemes.copy()
        else:
            coveredPhonemes = Counter()
            # coveredPhonemes = sparse.csc_matrix((1, self.wordController._numPhonemes))


        # Prepare datastructures
        log("Computing submodular gains")
        candidateList = []
        gainDict = self._initGains(wordController, coveredPhonemes, valueCoveredVectors)
        gainQueue = FastBucketQueue()
        checkedCnWordSet = set()
        for (adapter, cnWordIdx), gain in gainDict.iteritems():
            cnWord = adapter[cnWordIdx]
            if not cnWord._marked and not cnWord._transcribed and not cnWord.ignore:
                gainQueue.push((adapter, cnWordIdx), gain)

        i = 0
        accumTime = 0.0
        log('Number of marked words: {0}, -> {1} percent marked'.format(self.wordController.getNumMarkedWords(),\
                                                                        self.wordController.getNumMarkedWords() / self.wordController.getNumWords() ))
        reportAfterSamples = 10000
        while len(gainQueue) > 0 and accumTime < batchSizeSec:

            # Get top element from list and add to selected set
            (adapter, cnWordIdx), candidateGain = gainQueue.pop()
            candidateCnWord = adapter[cnWordIdx]

            # Update Gain, ratio, cost
            # if candidateCnWord not in checkedCnWordSet:
            candidateGain = self._computeGain(adapter, cnWordIdx, coveredPhonemes, valueCoveredVectors)
            # print("\n\nIteration {0}, word: {1}\nGain: {2}, Costs: {3}, Ratio: {4}, Frames: {5}".format(i, candidateCnWord.id, candidateGain, costs, candidateRatio, candidateCnWord.duration))
            # assert(candidateGain >= 0), "Gain must be non-negative in order to be submodular"

            # print "Queue.max(): {0}".format(gainQueue.max())
            if candidateCnWord in checkedCnWordSet or candidateGain >= gainQueue.max():
                # print("--> Selected!")
                candidateList.append((adapter, cnWordIdx))
                candidateAdapter = adapter
                wordPos = cnWordIdx
                candidateAdapter.assignWordToSegment(wordPos)
                # coveredPhonemes.update(candidateCnWord._tfIdfDict)
                coveredPhonemes += candidateCnWord._tfIdfDict
                valueCoveredVectors += candidateGain
                checkedCnWordSet.clear()
                candidateCnWord._marked = True
                accumTime += candidateCnWord.durationSec
                candidateCnWord.utility = candidateGain
                # candidateCnWord._score = i

                # # Update gain neighbor words
                # leftSeg, rightSeg = candidateAdapter.getNeighborSegments(wordPos)
                # if leftSeg is None and wordPos > 0:
                #     leftWord = candidateAdapter[wordPos-1]
                #     neighGain, neighRatio, neighCosts = self._computeRatioGain(leftWord, coveredPhonemes, valueCoveredVectors)
                #     gainQueue.update(leftWord, neighRatio)
                #     # print("--> word: {1}\nGain: {2}, Costs: {3}, Ratio: {4}, Frames: {5}"\
                #           # .format(i, leftWord.id, neighGain, neighCosts, neighRatio, leftWord.duration))
                #
                # if rightSeg is None and wordPos < candidateAdapter.getNumWords()-1:
                #     rightWord = candidateAdapter[wordPos+1]
                #     neighGain, neighRatio, neighCosts = self._computeRatioGain(rightWord, coveredPhonemes, valueCoveredVectors)
                #     gainQueue.update(rightWord, neighRatio)
                #     # print("--> word: {1}\nGain: {2}, Costs: {3}, Ratio: {4}, Frames: {5}"\
                #           # .format(i, rightWord.id, neighGain, neighCosts, neighRatio, rightWord.duration))
                # TODO remove
                i += 1
                # print candidateAdapter.visualizeSegments()

                if i%reportAfterSamples == 0:
                    log('Number of marked words: {0}, -> {1} percent marked'.format(i, float(i) / float(self.wordController.getNumWords()) ))

            else:
                # print("--> Refused!")
                gainQueue.push((adapter, cnWordIdx), candidateGain)
                checkedCnWordSet.add(candidateCnWord)

            if len(gainQueue) <= 10:
                debug("Priority queue has only {0} elements left".format(len(gainQueue)))

        log("Sampled {0} hours of words".format(accumTime/3600.0))

        return candidateList, coveredPhonemes, valueCoveredVectors