Exemplo n.º 1
0
def trainCGandEM(distInit, accumulate, ps = d.getDefaultParamSpec(), createAccEM = d.getDefaultCreateAcc(), estimateTotAux = d.getDefaultEstimateTotAux(), iterations = 5, length = -50, afterEst = None, verbosity = 0):
    """Re-estimates a distribution using conjugate gradients and EM.

    See the note in the docstring for this module for information on how the
    log likelihood is scaled. This scaling is presumed to have only a small
    impact on the dist returned by this function (via its impact on trainCG).
    """
    assert iterations >= 1

    dist = distInit
    for it in range(1, iterations + 1):
        if verbosity >= 1:
            print 'trainCGandEM: starting it =', it, 'of CG and EM'

        dist = (timed(trainCG) if verbosity >= 2 else trainCG)(dist, accumulate, ps = ps, length = length, verbosity = verbosity)

        dist, _, _, _ = expectationMaximization(dist, accumulate, createAcc = createAccEM, estimateTotAux = estimateTotAux, verbosity = verbosity)

        if afterEst is not None:
            afterEst(dist = dist, it = it)

        if verbosity >= 1:
            print 'trainCGandEM: finished it =', it, 'of CG and EM'
            print 'trainCGandEM:'

    return dist
Exemplo n.º 2
0
    def addLayers(self, stateInit, questionGroups):
        """Computes all remaining layers of the decision tree."""
        splitInfoDict = dict()
        state = stateInit
        while True:
            addLayer = self.addLayer
            if self.verbosity >= 3:
                addLayer = timed(addLayer)
            state, splitInfoDictMore = addLayer(state, questionGroups)
            splitInfoDict.update(splitInfoDictMore)
            if self.verbosity >= 2:
                print 'cluster: added %s nodes' % len(splitInfoDictMore)
            if not splitInfoDictMore:
                break

        return splitInfoDict
Exemplo n.º 3
0
    def addLayers(self, stateInit, questionGroups):
        """Computes all remaining layers of the decision tree."""
        splitInfoDict = dict()
        state = stateInit
        while True:
            addLayer = self.addLayer
            if self.verbosity >= 3:
                addLayer = timed(addLayer)
            state, splitInfoDictMore = addLayer(state, questionGroups)
            splitInfoDict.update(splitInfoDictMore)
            if self.verbosity >= 2:
                print 'cluster: added %s nodes' % len(splitInfoDictMore)
            if not splitInfoDictMore:
                break

        return splitInfoDict
Exemplo n.º 4
0
def decisionTreeClusterDepthBased(clusteringSpec, labels, labelledAccChunks,
                                  createAcc):
    verbosity = clusteringSpec.verbosity
    accSummer1 = DepthBasedFirstLevelAccSummer(labelledAccChunks, createAcc)
    accSummer2 = SecondLevelAccSummer(createAcc)
    minCount = clusteringSpec.minCount
    leafEstimator = LeafEstimator(
        clusteringSpec.estimateTotAux,
        catchEstimationErrors=clusteringSpec.catchEstimationErrors)

    def getProtoRoot():
        return leafEstimator.est(accSummer1.all())

    if verbosity >= 3:
        getProtoRoot = timed(getProtoRoot)
    protoRoot = getProtoRoot()
    splitValuer = clusteringSpec.utilitySpec(protoRoot.dist,
                                             protoRoot.count,
                                             verbosity=verbosity)
    clusterer = DepthBasedClusterer(accSummer1,
                                    accSummer2,
                                    minCount,
                                    leafEstimator,
                                    splitValuer,
                                    clusteringSpec.goodThresh,
                                    verbosity=verbosity)
    if verbosity >= 1:
        print(
            'cluster: decision tree clustering with perLeafPenalty = %s and'
            ' minCount = %s' % (splitValuer.perLeafPenalty, minCount))

    questionGroups = removeTrivialQuestions(labels,
                                            clusteringSpec.questionGroups)
    stateInit = clusterer.getInitialState(labels, protoRoot)
    splitInfoDict = clusterer.addLayers(stateInit, questionGroups)
    dist, (aux, auxRat) = constructTree(splitInfoDict)

    if verbosity >= 1:
        countRoot = protoRoot.count
        # (FIXME : leaf computation relies on specific form of dist)
        print 'cluster: %s leaves' % len(dist.dist.distDict)
        print('cluster: aux root = %s (%s) -> aux tree = %s (%s) (%s count)' %
              (protoRoot.aux / countRoot, d.Rat.toString(protoRoot.auxRat),
               aux / countRoot, d.Rat.toString(auxRat), countRoot))
    return dist
Exemplo n.º 5
0
 def subTreeSplitInfoIter(self, stateInit):
     agenda = [stateInit]
     while agenda:
         state = agenda.pop()
         labels, questionGroups, answerSeq, protoNoSplit = state
         if self.verbosity >= 2:
             self.printNodeInfo(state)
         if self.verbosity >= 3:
             indent = '    ' + ''.join([('|  ' if answer != 0 else '   ')
                                        for answer in answerSeq])
             computeBestSplit = timed(
                 self.computeBestSplitAndStateAdj,
                 msg='cluster:%schoose and perform split took' % indent)
         else:
             computeBestSplit = self.computeBestSplitAndStateAdj
         splitInfo, stateAdj = computeBestSplit(state)
         nextStates = self.getNextStates(stateAdj, splitInfo)
         agenda.extend(reversed(nextStates))
         yield answerSeq, splitInfo
Exemplo n.º 6
0
 def subTreeSplitInfoIter(self, stateInit):
     agenda = [stateInit]
     while agenda:
         state = agenda.pop()
         labels, questionGroups, answerSeq, protoNoSplit = state
         if self.verbosity >= 2:
             self.printNodeInfo(state)
         if self.verbosity >= 3:
             indent = '    '+''.join([ ('|  ' if answer != 0 else '   ')
                                       for answer in answerSeq ])
             computeBestSplit = timed(
                 self.computeBestSplitAndStateAdj,
                 msg = 'cluster:%schoose and perform split took' % indent
             )
         else:
             computeBestSplit = self.computeBestSplitAndStateAdj
         splitInfo, stateAdj = computeBestSplit(state)
         nextStates = self.getNextStates(stateAdj, splitInfo)
         agenda.extend(reversed(nextStates))
         yield answerSeq, splitInfo
Exemplo n.º 7
0
def decisionTreeClusterDepthBased(clusteringSpec, labels, labelledAccChunks,
                                  createAcc):
    verbosity = clusteringSpec.verbosity
    accSummer1 = DepthBasedFirstLevelAccSummer(labelledAccChunks, createAcc)
    accSummer2 = SecondLevelAccSummer(createAcc)
    minCount = clusteringSpec.minCount
    leafEstimator = LeafEstimator(
        clusteringSpec.estimateTotAux,
        catchEstimationErrors = clusteringSpec.catchEstimationErrors
    )
    def getProtoRoot():
        return leafEstimator.est(accSummer1.all())
    if verbosity >= 3:
        getProtoRoot = timed(getProtoRoot)
    protoRoot = getProtoRoot()
    splitValuer = clusteringSpec.utilitySpec(protoRoot.dist, protoRoot.count,
                                             verbosity = verbosity)
    clusterer = DepthBasedClusterer(accSummer1, accSummer2, minCount,
                                    leafEstimator, splitValuer,
                                    clusteringSpec.goodThresh,
                                    verbosity = verbosity)
    if verbosity >= 1:
        print ('cluster: decision tree clustering with perLeafPenalty = %s and'
               ' minCount = %s' %
               (splitValuer.perLeafPenalty, minCount))

    questionGroups = removeTrivialQuestions(labels,
                                            clusteringSpec.questionGroups)
    stateInit = clusterer.getInitialState(labels, protoRoot)
    splitInfoDict = clusterer.addLayers(stateInit, questionGroups)
    dist, (aux, auxRat) = constructTree(splitInfoDict)

    if verbosity >= 1:
        countRoot = protoRoot.count
        # (FIXME : leaf computation relies on specific form of dist)
        print 'cluster: %s leaves' % len(dist.dist.distDict)
        print ('cluster: aux root = %s (%s) -> aux tree = %s (%s) (%s count)' %
               (protoRoot.aux / countRoot, d.Rat.toString(protoRoot.auxRat),
                aux / countRoot, d.Rat.toString(auxRat),
                countRoot))
    return dist
Exemplo n.º 8
0
def trainCGandEM(distInit,
                 accumulate,
                 ps=d.getDefaultParamSpec(),
                 createAccEM=d.getDefaultCreateAcc(),
                 estimateTotAux=d.getDefaultEstimateTotAux(),
                 iterations=5,
                 length=-50,
                 afterEst=None,
                 verbosity=0):
    """Re-estimates a distribution using conjugate gradients and EM.

    See the note in the docstring for this module for information on how the
    log likelihood is scaled. This scaling is presumed to have only a small
    impact on the dist returned by this function (via its impact on trainCG).
    """
    assert iterations >= 1

    dist = distInit
    for it in range(1, iterations + 1):
        if verbosity >= 1:
            print 'trainCGandEM: starting it =', it, 'of CG and EM'

        dist = (timed(trainCG) if verbosity >= 2 else trainCG)(
            dist, accumulate, ps=ps, length=length, verbosity=verbosity)

        dist, _, _, _ = expectationMaximization(dist,
                                                accumulate,
                                                createAcc=createAccEM,
                                                estimateTotAux=estimateTotAux,
                                                verbosity=verbosity)

        if afterEst is not None:
            afterEst(dist=dist, it=it)

        if verbosity >= 1:
            print 'trainCGandEM: finished it =', it, 'of CG and EM'
            print 'trainCGandEM:'

    return dist
Exemplo n.º 9
0
    def synthComplete(self, dist, uttIds, method, synthOutDir, exptTag, afterSynth=None, verbosity=1):
        synthAcousticSeqIo = feat.AcousticSeqIo(
            synthOutDir,
            [vsio.VecSeqIo(stream.order) for stream in self.streams],
            ["%s.%s" % (exptTag, stream.name) for stream in self.streams],
            [stream.encoder for stream in self.streams],
        )

        if verbosity >= 1:
            print "synth: synthesizing to", synthOutDir, "with tag", exptTag

        for uttId in uttIds:
            synthOutput = self.synth(dist, uttId, method)
            if afterSynth is not None:
                afterSynth(synthOutput=synthOutput, uttId=uttId, exptTag=exptTag)
            synthAcousticSeqIo.writeFiles(uttId, synthOutput)

        (timed(feat.doHtsDemoWaveformGeneration) if verbosity >= 1 else feat.doHtsDemoWaveformGeneration)(
            self.scriptsDir,
            synthOutDir,
            basenames=[uttId + "." + exptTag for uttId in uttIds],
            logFile=os.path.join(synthOutDir, exptTag + ".log"),
        )