def main(self, conceptMap, symbolMap, symbolDt, dcpts, dpmfs, symbolName, outDir):
        conceptMap = LexMap().read(conceptMap)
        symbolMap = LexMap().read(symbolMap)

        _sink_ = int(symbolMap["_sink_"])
        _SINK_ = int(conceptMap["_SINK_"])

        conceptCard = len(conceptMap)
        symbolCard = len(symbolMap)

        jointDtC1C2C3C4 = gmtk.readDt(symbolDt)
        jointDcptC1C2C3C4 = gmtk.readDcpt(dcpts, "jointProbC1C2C3C4")
        jointProbC1C2C3C4 = gmtk.combineDtDcpt1(jointDtC1C2C3C4, jointDcptC1C2C3C4)

        symbolDpmfC1C2C3C4 = gmtk.readDpmf(dpmfs, "%sGivenC1C2C3C4" % symbolName)
        symbolGivenC1C2C3C4 = gmtk.combineDtDcpt2(jointDtC1C2C3C4, symbolDpmfC1C2C3C4)

        jointProbSymbolC1C2C3C4 = symbolGivenC1C2C3C4.multiple([1, 2, 3, 4], jointProbC1C2C3C4)
        gmtk.saveDpmfsProbs(
            outDir,
            "%sGivenC1C2C3C4" % symbolName,
            len(symbolGivenC1C2C3C4.vectSubList([1, 2, 3, 4])),
            symbolCard,
            symbolGivenC1C2C3C4,
        )

        jointProbSymbolC1C2C3 = jointProbSymbolC1C2C3C4.marginalize([0, 1, 2, 3])
        symbolGivenC1C2C3 = jointProbSymbolC1C2C3.conditionalize([1, 2, 3])
        gmtk.saveDpmfsProbs(
            outDir,
            "%sGivenC1C2C3" % symbolName,
            len(symbolGivenC1C2C3.vectSubList([1, 2, 3])),
            symbolCard,
            symbolGivenC1C2C3,
        )

        jointProbSymbolC1C2 = jointProbSymbolC1C2C3.marginalize([0, 1, 2])
        symbolGivenC1C2 = jointProbSymbolC1C2.conditionalize([1, 2])
        gmtk.saveDpmfsProbs(
            outDir, "%sGivenC1C2" % symbolName, len(symbolGivenC1C2.vectSubList([1, 2])), symbolCard, symbolGivenC1C2
        )

        jointProbSymbolC1 = jointProbSymbolC1C2.marginalize([0, 1])
        symbolGivenC1 = jointProbSymbolC1.conditionalize([1])

        # in case of conditioning by _SINK_ I have to enable to generate _sink_ word only
        # otherwise I would see _SINK_ concept in the stack
        # TODO: Turn it into validator
        symbolGivenC1.setValue([_sink_, _SINK_], 1)

        gmtk.saveDcptBigram(outDir, "%sGivenC1" % symbolName, symbolCard, conceptCard, symbolGivenC1)

        symbolUnigram = jointProbSymbolC1.marginalize([0])

        # I need to enable to decode _unseen_ word only ! so set the probability of
        # generating _empty_ to zero
        symbolUnigram.setValue([int(symbolMap["_empty_"])], 0)
        # normalize sum of probabilities to one
        symbolUnigram = symbolUnigram.normJoint()

        gmtk.saveDcptUnigram(outDir, "%sUnigram" % symbolName, symbolCard, symbolUnigram)

        gmtk.saveDcptUnseen(outDir, "%sZerogram" % symbolName, symbolCard, symbolMap)
示例#2
0
concept1GivenC2C3 = jointProbC1C2C3.conditionalize([1, 2])

gmtk.saveDpmfsProbs(dirOut, "concept1GivenC2C3", len(concept1GivenC2C3.vectSubList([1, 2])), conceptCard, concept1GivenC2C3)

##############################################################################
# save bigrams as CPT
jointProbC1C2 = jointProbC1C2C3.marginalize([0, 1])
concept1GivenC2 = jointProbC1C2.conditionalize([1])

# in case of conditioning by _SINK_ I have to enable to generate _SINK_ concept only
# otherwise I would see _SINK_ concept in the stack
concept1GivenC2.setValue([int(conceptMap["_SINK_"]), int(conceptMap["_SINK_"])], 1)
# do the same for _DUMMY_
concept1GivenC2.setValue([int(conceptMap["_SINK_"]), int(conceptMap["_DUMMY_"])], 1)

gmtk.saveDcptBigram(dirOut, "concept1GivenC2", conceptCard, conceptCard, concept1GivenC2)

##############################################################################
# save unigrams as CPT
concept1Unigram = jointProbC1C2.marginalize([0])
gmtk.saveDcptUnigram(dirOut, "concept1Unigram", conceptCard, concept1Unigram)

################################################################################
### save zerograms as CPT
##gmtk.saveDcptZerogram(dirOut, "concept1Zerogram", conceptCard)

if verbose:
    print("-------------------------------------------------")
    print("Finish")
    
示例#3
0
# save trigrams
jointProbPC1C2 = jointProbPC1C2C3.marginalize([0, 1, 2])
pushGivenC1C2 = jointProbPC1C2.conditionalize([1, 2])
pushGivenC1C2 = pushGivenC1C2.insertPenalty(0, penalty, pushCard)

gmtk.saveDpmfsProbs(dirOut, "pushGivenC1C2",
                    len(pushGivenC1C2.vectSubList([1, 2])), pushCard,
                    pushGivenC1C2, -penalty)

##############################################################################
# save bigrams as CPT
jointProbPC1 = jointProbPC1C2.marginalize([0, 1])
pushGivenC1 = jointProbPC1.conditionalize([1])
pushGivenC1 = pushGivenC1.insertPenalty(0, penalty, pushCard)

# in case of conditioning by _SINK_, I have to enable to delete all stack
pushGivenC1.setValue([4, int(conceptMap["_SINK_"])], 1)

gmtk.saveDcptBigram(dirOut, "pushGivenC1", pushCard, conceptCard, pushGivenC1)

##############################################################################
# save unigrams as CPT
pushUnigram = jointProbPC1.marginalize([0])
pushUnigram = pushUnigram.insertPenalty(0, penalty, pushCard)

gmtk.saveDcptUnigram(dirOut, "pushUnigram", pushCard, pushUnigram)

if verbose:
    print("-------------------------------------------------")
    print("Finish")
    def main(self, conceptMap, symbolMap, symbolDt, dcpts, dpmfs, symbolName,
             outDir):
        conceptMap = LexMap().read(conceptMap)
        symbolMap = LexMap().read(symbolMap)

        _sink_ = int(symbolMap['_sink_'])
        _SINK_ = int(conceptMap['_SINK_'])

        conceptCard = len(conceptMap)
        symbolCard = len(symbolMap)

        jointDtC1C2C3C4 = gmtk.readDt(symbolDt)
        jointDcptC1C2C3C4 = gmtk.readDcpt(dcpts, "jointProbC1C2C3C4")
        jointProbC1C2C3C4 = gmtk.combineDtDcpt1(jointDtC1C2C3C4,
                                                jointDcptC1C2C3C4)

        symbolDpmfC1C2C3C4 = gmtk.readDpmf(dpmfs,
                                           "%sGivenC1C2C3C4" % symbolName)
        symbolGivenC1C2C3C4 = gmtk.combineDtDcpt2(jointDtC1C2C3C4,
                                                  symbolDpmfC1C2C3C4)

        jointProbSymbolC1C2C3C4 = symbolGivenC1C2C3C4.multiple(
            [1, 2, 3, 4], jointProbC1C2C3C4)
        gmtk.saveDpmfsProbs(outDir, "%sGivenC1C2C3C4" % symbolName,
                            len(symbolGivenC1C2C3C4.vectSubList([1, 2, 3, 4])),
                            symbolCard, symbolGivenC1C2C3C4)

        jointProbSymbolC1C2C3 = jointProbSymbolC1C2C3C4.marginalize(
            [0, 1, 2, 3])
        symbolGivenC1C2C3 = jointProbSymbolC1C2C3.conditionalize([1, 2, 3])
        gmtk.saveDpmfsProbs(outDir, "%sGivenC1C2C3" % symbolName,
                            len(symbolGivenC1C2C3.vectSubList([1, 2, 3])),
                            symbolCard, symbolGivenC1C2C3)

        jointProbSymbolC1C2 = jointProbSymbolC1C2C3.marginalize([0, 1, 2])
        symbolGivenC1C2 = jointProbSymbolC1C2.conditionalize([1, 2])
        gmtk.saveDpmfsProbs(outDir, "%sGivenC1C2" % symbolName,
                            len(symbolGivenC1C2.vectSubList([1, 2])),
                            symbolCard, symbolGivenC1C2)

        jointProbSymbolC1 = jointProbSymbolC1C2.marginalize([0, 1])
        symbolGivenC1 = jointProbSymbolC1.conditionalize([1])

        # in case of conditioning by _SINK_ I have to enable to generate _sink_ word only
        # otherwise I would see _SINK_ concept in the stack
        # TODO: Turn it into validator
        symbolGivenC1.setValue([_sink_, _SINK_], 1)

        gmtk.saveDcptBigram(outDir, "%sGivenC1" % symbolName, symbolCard,
                            conceptCard, symbolGivenC1)

        symbolUnigram = jointProbSymbolC1.marginalize([0])

        # I need to enable to decode _unseen_ word only ! so set the probability of
        # generating _empty_ to zero
        symbolUnigram.setValue([int(symbolMap["_empty_"])], 0)
        # normalize sum of probabilities to one
        symbolUnigram = symbolUnigram.normJoint()

        gmtk.saveDcptUnigram(outDir, "%sUnigram" % symbolName, symbolCard,
                             symbolUnigram)

        gmtk.saveDcptUnseen(outDir, "%sZerogram" % symbolName, symbolCard,
                            symbolMap)
##############################################################################
# save trigrams
jointProbPC1C2 = jointProbPC1C2C3.marginalize([0, 1, 2])
popGivenC1C2 = jointProbPC1C2.conditionalize([1, 2])
popGivenC1C2 = popGivenC1C2.insertPenalty(0, penalty, popCard)

gmtk.saveDpmfsProbs(dirOut, "popGivenC1C2", len(popGivenC1C2.vectSubList([1, 2])), popCard, popGivenC1C2, -penalty)

##############################################################################
# save bigrams as CPT
jointProbPC1 = jointProbPC1C2.marginalize([0, 1])
popGivenC1 = jointProbPC1.conditionalize([1])
popGivenC1 = popGivenC1.insertPenalty(0, penalty, popCard)

# in case of conditioning by _SINK_, I have to enable to delete all stack
popGivenC1.setValue([4, int(conceptMap["_SINK_"])], 1)

gmtk.saveDcptBigram(dirOut, "popGivenC1", popCard, conceptCard, popGivenC1)

##############################################################################
# save unigrams as CPT
popUnigram = jointProbPC1.marginalize([0])
popUnigram = popUnigram.insertPenalty(0, penalty, popCard)

gmtk.saveDcptUnigram(dirOut, "popUnigram", popCard, popUnigram)

if verbose:
    print("-------------------------------------------------")
    print("Finish")
    
示例#6
0
# save trigrams
jointProbPC1C2 = jointProbPC1C2C3.marginalize([0, 1, 2])
popGivenC1C2 = jointProbPC1C2.conditionalize([1, 2])
popGivenC1C2 = popGivenC1C2.insertPenalty(0, penalty, popCard)

gmtk.saveDpmfsProbs(dirOut, "popGivenC1C2",
                    len(popGivenC1C2.vectSubList([1, 2])), popCard,
                    popGivenC1C2, -penalty)

##############################################################################
# save bigrams as CPT
jointProbPC1 = jointProbPC1C2.marginalize([0, 1])
popGivenC1 = jointProbPC1.conditionalize([1])
popGivenC1 = popGivenC1.insertPenalty(0, penalty, popCard)

# in case of conditioning by _SINK_, I have to enable to delete all stack
popGivenC1.setValue([4, int(conceptMap["_SINK_"])], 1)

gmtk.saveDcptBigram(dirOut, "popGivenC1", popCard, conceptCard, popGivenC1)

##############################################################################
# save unigrams as CPT
popUnigram = jointProbPC1.marginalize([0])
popUnigram = popUnigram.insertPenalty(0, penalty, popCard)

gmtk.saveDcptUnigram(dirOut, "popUnigram", popCard, popUnigram)

if verbose:
    print("-------------------------------------------------")
    print("Finish")
##############################################################################
# save trigrams
jointProbPC1C2 = jointProbPC1C2C3.marginalize([0, 1, 2])
pushGivenC1C2 = jointProbPC1C2.conditionalize([1, 2])
pushGivenC1C2 = pushGivenC1C2.insertPenalty(0, penalty, pushCard)

gmtk.saveDpmfsProbs(dirOut, "pushGivenC1C2", len(pushGivenC1C2.vectSubList([1, 2])), pushCard, pushGivenC1C2, -penalty)

##############################################################################
# save bigrams as CPT
jointProbPC1 = jointProbPC1C2.marginalize([0, 1])
pushGivenC1 = jointProbPC1.conditionalize([1])
pushGivenC1 = pushGivenC1.insertPenalty(0, penalty, pushCard)

# in case of conditioning by _SINK_, I have to enable to delete all stack
pushGivenC1.setValue([4, int(conceptMap["_SINK_"])], 1)

gmtk.saveDcptBigram(dirOut, "pushGivenC1", pushCard, conceptCard, pushGivenC1)

##############################################################################
# save unigrams as CPT
pushUnigram = jointProbPC1.marginalize([0])
pushUnigram = pushUnigram.insertPenalty(0, penalty, pushCard)

gmtk.saveDcptUnigram(dirOut, "pushUnigram", pushCard, pushUnigram)

if verbose:
    print("-------------------------------------------------")
    print("Finish")
##
##gmtk.saveDpmfsProbs(dirOut, "concept1GivenC2C3C4", len(concept1GivenC2C3C4.vectSubList([1, 2, 3])), conceptCard, concept1GivenC2C3C4)
##gmtk.saveDpmfsProbs(dirOut, "concept1GivenC2C3C4X", len(concept1GivenC2C3C4X.vectSubList([1, 2, 3])), conceptCard, concept1GivenC2C3C4X)

##############################################################################
# save bigrams as CPT
jointProbC2C3 = jointProbC2C3C4.marginalize([0, 1])
concept2GivenC3 = jointProbC2C3.conditionalize([1])

# in case of conditioning by _SINK_ I have to enable to generate _SINK_ concept only
# otherwise I would see _SINK_ concept in the stack
concept2GivenC3.setValue([int(conceptMap["_SINK_"]), int(conceptMap["_SINK_"])], 1)
# do the same for _DUMMY_
concept2GivenC3.setValue([int(conceptMap["_SINK_"]), int(conceptMap["_DUMMY_"])], 1)

gmtk.saveDcptBigram(dirOut, "concept2GivenC3", conceptCard, conceptCard, concept2GivenC3)

##############################################################################
# save unigrams as CPT
concept2Unigram = jointProbC2C3.marginalize([0])
gmtk.saveDcptUnigram(dirOut, "concept2Unigram", conceptCard, concept2Unigram)

################################################################################
### save zerograms as CPT
##gmtk.saveDcptZerogram(dirOut, "concept1Zerogram", conceptCard)

if verbose:
    print("-------------------------------------------------")
    print("Finish")
    
示例#9
0
##############################################################################
# save bigrams as CPT
jointProbC2C3 = jointProbC2C3C4.marginalize([0, 1])
concept2GivenC3 = jointProbC2C3.conditionalize([1])

# in case of conditioning by _SINK_ I have to enable to generate _SINK_ concept only
# otherwise I would see _SINK_ concept in the stack
concept2GivenC3.setValue(
    [int(conceptMap["_SINK_"]),
     int(conceptMap["_SINK_"])], 1)
# do the same for _DUMMY_
concept2GivenC3.setValue(
    [int(conceptMap["_SINK_"]),
     int(conceptMap["_DUMMY_"])], 1)

gmtk.saveDcptBigram(dirOut, "concept2GivenC3", conceptCard, conceptCard,
                    concept2GivenC3)

##############################################################################
# save unigrams as CPT
concept2Unigram = jointProbC2C3.marginalize([0])
gmtk.saveDcptUnigram(dirOut, "concept2Unigram", conceptCard, concept2Unigram)

################################################################################
### save zerograms as CPT
##gmtk.saveDcptZerogram(dirOut, "concept1Zerogram", conceptCard)

if verbose:
    print("-------------------------------------------------")
    print("Finish")