def main(self, conceptMap, symbolMap, symbolDt, dcpts, dpmfs, symbolName, outDir): conceptMap = LexMap().read(conceptMap) symbolMap = LexMap().read(symbolMap) _sink_ = int(symbolMap["_sink_"]) _SINK_ = int(conceptMap["_SINK_"]) conceptCard = len(conceptMap) symbolCard = len(symbolMap) jointDtC1C2C3C4 = gmtk.readDt(symbolDt) jointDcptC1C2C3C4 = gmtk.readDcpt(dcpts, "jointProbC1C2C3C4") jointProbC1C2C3C4 = gmtk.combineDtDcpt1(jointDtC1C2C3C4, jointDcptC1C2C3C4) symbolDpmfC1C2C3C4 = gmtk.readDpmf(dpmfs, "%sGivenC1C2C3C4" % symbolName) symbolGivenC1C2C3C4 = gmtk.combineDtDcpt2(jointDtC1C2C3C4, symbolDpmfC1C2C3C4) jointProbSymbolC1C2C3C4 = symbolGivenC1C2C3C4.multiple([1, 2, 3, 4], jointProbC1C2C3C4) gmtk.saveDpmfsProbs( outDir, "%sGivenC1C2C3C4" % symbolName, len(symbolGivenC1C2C3C4.vectSubList([1, 2, 3, 4])), symbolCard, symbolGivenC1C2C3C4, ) jointProbSymbolC1C2C3 = jointProbSymbolC1C2C3C4.marginalize([0, 1, 2, 3]) symbolGivenC1C2C3 = jointProbSymbolC1C2C3.conditionalize([1, 2, 3]) gmtk.saveDpmfsProbs( outDir, "%sGivenC1C2C3" % symbolName, len(symbolGivenC1C2C3.vectSubList([1, 2, 3])), symbolCard, symbolGivenC1C2C3, ) jointProbSymbolC1C2 = jointProbSymbolC1C2C3.marginalize([0, 1, 2]) symbolGivenC1C2 = jointProbSymbolC1C2.conditionalize([1, 2]) gmtk.saveDpmfsProbs( outDir, "%sGivenC1C2" % symbolName, len(symbolGivenC1C2.vectSubList([1, 2])), symbolCard, symbolGivenC1C2 ) jointProbSymbolC1 = jointProbSymbolC1C2.marginalize([0, 1]) symbolGivenC1 = jointProbSymbolC1.conditionalize([1]) # in case of conditioning by _SINK_ I have to enable to generate _sink_ word only # otherwise I would see _SINK_ concept in the stack # TODO: Turn it into validator symbolGivenC1.setValue([_sink_, _SINK_], 1) gmtk.saveDcptBigram(outDir, "%sGivenC1" % symbolName, symbolCard, conceptCard, symbolGivenC1) symbolUnigram = jointProbSymbolC1.marginalize([0]) # I need to enable to decode _unseen_ word only ! so set the probability of # generating _empty_ to zero symbolUnigram.setValue([int(symbolMap["_empty_"])], 0) # normalize sum of probabilities to one symbolUnigram = symbolUnigram.normJoint() gmtk.saveDcptUnigram(outDir, "%sUnigram" % symbolName, symbolCard, symbolUnigram) gmtk.saveDcptUnseen(outDir, "%sZerogram" % symbolName, symbolCard, symbolMap)
concept1GivenC2C3 = jointProbC1C2C3.conditionalize([1, 2]) gmtk.saveDpmfsProbs(dirOut, "concept1GivenC2C3", len(concept1GivenC2C3.vectSubList([1, 2])), conceptCard, concept1GivenC2C3) ############################################################################## # save bigrams as CPT jointProbC1C2 = jointProbC1C2C3.marginalize([0, 1]) concept1GivenC2 = jointProbC1C2.conditionalize([1]) # in case of conditioning by _SINK_ I have to enable to generate _SINK_ concept only # otherwise I would see _SINK_ concept in the stack concept1GivenC2.setValue([int(conceptMap["_SINK_"]), int(conceptMap["_SINK_"])], 1) # do the same for _DUMMY_ concept1GivenC2.setValue([int(conceptMap["_SINK_"]), int(conceptMap["_DUMMY_"])], 1) gmtk.saveDcptBigram(dirOut, "concept1GivenC2", conceptCard, conceptCard, concept1GivenC2) ############################################################################## # save unigrams as CPT concept1Unigram = jointProbC1C2.marginalize([0]) gmtk.saveDcptUnigram(dirOut, "concept1Unigram", conceptCard, concept1Unigram) ################################################################################ ### save zerograms as CPT ##gmtk.saveDcptZerogram(dirOut, "concept1Zerogram", conceptCard) if verbose: print("-------------------------------------------------") print("Finish")
# save trigrams jointProbPC1C2 = jointProbPC1C2C3.marginalize([0, 1, 2]) pushGivenC1C2 = jointProbPC1C2.conditionalize([1, 2]) pushGivenC1C2 = pushGivenC1C2.insertPenalty(0, penalty, pushCard) gmtk.saveDpmfsProbs(dirOut, "pushGivenC1C2", len(pushGivenC1C2.vectSubList([1, 2])), pushCard, pushGivenC1C2, -penalty) ############################################################################## # save bigrams as CPT jointProbPC1 = jointProbPC1C2.marginalize([0, 1]) pushGivenC1 = jointProbPC1.conditionalize([1]) pushGivenC1 = pushGivenC1.insertPenalty(0, penalty, pushCard) # in case of conditioning by _SINK_, I have to enable to delete all stack pushGivenC1.setValue([4, int(conceptMap["_SINK_"])], 1) gmtk.saveDcptBigram(dirOut, "pushGivenC1", pushCard, conceptCard, pushGivenC1) ############################################################################## # save unigrams as CPT pushUnigram = jointProbPC1.marginalize([0]) pushUnigram = pushUnigram.insertPenalty(0, penalty, pushCard) gmtk.saveDcptUnigram(dirOut, "pushUnigram", pushCard, pushUnigram) if verbose: print("-------------------------------------------------") print("Finish")
def main(self, conceptMap, symbolMap, symbolDt, dcpts, dpmfs, symbolName, outDir): conceptMap = LexMap().read(conceptMap) symbolMap = LexMap().read(symbolMap) _sink_ = int(symbolMap['_sink_']) _SINK_ = int(conceptMap['_SINK_']) conceptCard = len(conceptMap) symbolCard = len(symbolMap) jointDtC1C2C3C4 = gmtk.readDt(symbolDt) jointDcptC1C2C3C4 = gmtk.readDcpt(dcpts, "jointProbC1C2C3C4") jointProbC1C2C3C4 = gmtk.combineDtDcpt1(jointDtC1C2C3C4, jointDcptC1C2C3C4) symbolDpmfC1C2C3C4 = gmtk.readDpmf(dpmfs, "%sGivenC1C2C3C4" % symbolName) symbolGivenC1C2C3C4 = gmtk.combineDtDcpt2(jointDtC1C2C3C4, symbolDpmfC1C2C3C4) jointProbSymbolC1C2C3C4 = symbolGivenC1C2C3C4.multiple( [1, 2, 3, 4], jointProbC1C2C3C4) gmtk.saveDpmfsProbs(outDir, "%sGivenC1C2C3C4" % symbolName, len(symbolGivenC1C2C3C4.vectSubList([1, 2, 3, 4])), symbolCard, symbolGivenC1C2C3C4) jointProbSymbolC1C2C3 = jointProbSymbolC1C2C3C4.marginalize( [0, 1, 2, 3]) symbolGivenC1C2C3 = jointProbSymbolC1C2C3.conditionalize([1, 2, 3]) gmtk.saveDpmfsProbs(outDir, "%sGivenC1C2C3" % symbolName, len(symbolGivenC1C2C3.vectSubList([1, 2, 3])), symbolCard, symbolGivenC1C2C3) jointProbSymbolC1C2 = jointProbSymbolC1C2C3.marginalize([0, 1, 2]) symbolGivenC1C2 = jointProbSymbolC1C2.conditionalize([1, 2]) gmtk.saveDpmfsProbs(outDir, "%sGivenC1C2" % symbolName, len(symbolGivenC1C2.vectSubList([1, 2])), symbolCard, symbolGivenC1C2) jointProbSymbolC1 = jointProbSymbolC1C2.marginalize([0, 1]) symbolGivenC1 = jointProbSymbolC1.conditionalize([1]) # in case of conditioning by _SINK_ I have to enable to generate _sink_ word only # otherwise I would see _SINK_ concept in the stack # TODO: Turn it into validator symbolGivenC1.setValue([_sink_, _SINK_], 1) gmtk.saveDcptBigram(outDir, "%sGivenC1" % symbolName, symbolCard, conceptCard, symbolGivenC1) symbolUnigram = jointProbSymbolC1.marginalize([0]) # I need to enable to decode _unseen_ word only ! so set the probability of # generating _empty_ to zero symbolUnigram.setValue([int(symbolMap["_empty_"])], 0) # normalize sum of probabilities to one symbolUnigram = symbolUnigram.normJoint() gmtk.saveDcptUnigram(outDir, "%sUnigram" % symbolName, symbolCard, symbolUnigram) gmtk.saveDcptUnseen(outDir, "%sZerogram" % symbolName, symbolCard, symbolMap)
############################################################################## # save trigrams jointProbPC1C2 = jointProbPC1C2C3.marginalize([0, 1, 2]) popGivenC1C2 = jointProbPC1C2.conditionalize([1, 2]) popGivenC1C2 = popGivenC1C2.insertPenalty(0, penalty, popCard) gmtk.saveDpmfsProbs(dirOut, "popGivenC1C2", len(popGivenC1C2.vectSubList([1, 2])), popCard, popGivenC1C2, -penalty) ############################################################################## # save bigrams as CPT jointProbPC1 = jointProbPC1C2.marginalize([0, 1]) popGivenC1 = jointProbPC1.conditionalize([1]) popGivenC1 = popGivenC1.insertPenalty(0, penalty, popCard) # in case of conditioning by _SINK_, I have to enable to delete all stack popGivenC1.setValue([4, int(conceptMap["_SINK_"])], 1) gmtk.saveDcptBigram(dirOut, "popGivenC1", popCard, conceptCard, popGivenC1) ############################################################################## # save unigrams as CPT popUnigram = jointProbPC1.marginalize([0]) popUnigram = popUnigram.insertPenalty(0, penalty, popCard) gmtk.saveDcptUnigram(dirOut, "popUnigram", popCard, popUnigram) if verbose: print("-------------------------------------------------") print("Finish")
# save trigrams jointProbPC1C2 = jointProbPC1C2C3.marginalize([0, 1, 2]) popGivenC1C2 = jointProbPC1C2.conditionalize([1, 2]) popGivenC1C2 = popGivenC1C2.insertPenalty(0, penalty, popCard) gmtk.saveDpmfsProbs(dirOut, "popGivenC1C2", len(popGivenC1C2.vectSubList([1, 2])), popCard, popGivenC1C2, -penalty) ############################################################################## # save bigrams as CPT jointProbPC1 = jointProbPC1C2.marginalize([0, 1]) popGivenC1 = jointProbPC1.conditionalize([1]) popGivenC1 = popGivenC1.insertPenalty(0, penalty, popCard) # in case of conditioning by _SINK_, I have to enable to delete all stack popGivenC1.setValue([4, int(conceptMap["_SINK_"])], 1) gmtk.saveDcptBigram(dirOut, "popGivenC1", popCard, conceptCard, popGivenC1) ############################################################################## # save unigrams as CPT popUnigram = jointProbPC1.marginalize([0]) popUnigram = popUnigram.insertPenalty(0, penalty, popCard) gmtk.saveDcptUnigram(dirOut, "popUnigram", popCard, popUnigram) if verbose: print("-------------------------------------------------") print("Finish")
############################################################################## # save trigrams jointProbPC1C2 = jointProbPC1C2C3.marginalize([0, 1, 2]) pushGivenC1C2 = jointProbPC1C2.conditionalize([1, 2]) pushGivenC1C2 = pushGivenC1C2.insertPenalty(0, penalty, pushCard) gmtk.saveDpmfsProbs(dirOut, "pushGivenC1C2", len(pushGivenC1C2.vectSubList([1, 2])), pushCard, pushGivenC1C2, -penalty) ############################################################################## # save bigrams as CPT jointProbPC1 = jointProbPC1C2.marginalize([0, 1]) pushGivenC1 = jointProbPC1.conditionalize([1]) pushGivenC1 = pushGivenC1.insertPenalty(0, penalty, pushCard) # in case of conditioning by _SINK_, I have to enable to delete all stack pushGivenC1.setValue([4, int(conceptMap["_SINK_"])], 1) gmtk.saveDcptBigram(dirOut, "pushGivenC1", pushCard, conceptCard, pushGivenC1) ############################################################################## # save unigrams as CPT pushUnigram = jointProbPC1.marginalize([0]) pushUnigram = pushUnigram.insertPenalty(0, penalty, pushCard) gmtk.saveDcptUnigram(dirOut, "pushUnigram", pushCard, pushUnigram) if verbose: print("-------------------------------------------------") print("Finish")
## ##gmtk.saveDpmfsProbs(dirOut, "concept1GivenC2C3C4", len(concept1GivenC2C3C4.vectSubList([1, 2, 3])), conceptCard, concept1GivenC2C3C4) ##gmtk.saveDpmfsProbs(dirOut, "concept1GivenC2C3C4X", len(concept1GivenC2C3C4X.vectSubList([1, 2, 3])), conceptCard, concept1GivenC2C3C4X) ############################################################################## # save bigrams as CPT jointProbC2C3 = jointProbC2C3C4.marginalize([0, 1]) concept2GivenC3 = jointProbC2C3.conditionalize([1]) # in case of conditioning by _SINK_ I have to enable to generate _SINK_ concept only # otherwise I would see _SINK_ concept in the stack concept2GivenC3.setValue([int(conceptMap["_SINK_"]), int(conceptMap["_SINK_"])], 1) # do the same for _DUMMY_ concept2GivenC3.setValue([int(conceptMap["_SINK_"]), int(conceptMap["_DUMMY_"])], 1) gmtk.saveDcptBigram(dirOut, "concept2GivenC3", conceptCard, conceptCard, concept2GivenC3) ############################################################################## # save unigrams as CPT concept2Unigram = jointProbC2C3.marginalize([0]) gmtk.saveDcptUnigram(dirOut, "concept2Unigram", conceptCard, concept2Unigram) ################################################################################ ### save zerograms as CPT ##gmtk.saveDcptZerogram(dirOut, "concept1Zerogram", conceptCard) if verbose: print("-------------------------------------------------") print("Finish")
############################################################################## # save bigrams as CPT jointProbC2C3 = jointProbC2C3C4.marginalize([0, 1]) concept2GivenC3 = jointProbC2C3.conditionalize([1]) # in case of conditioning by _SINK_ I have to enable to generate _SINK_ concept only # otherwise I would see _SINK_ concept in the stack concept2GivenC3.setValue( [int(conceptMap["_SINK_"]), int(conceptMap["_SINK_"])], 1) # do the same for _DUMMY_ concept2GivenC3.setValue( [int(conceptMap["_SINK_"]), int(conceptMap["_DUMMY_"])], 1) gmtk.saveDcptBigram(dirOut, "concept2GivenC3", conceptCard, conceptCard, concept2GivenC3) ############################################################################## # save unigrams as CPT concept2Unigram = jointProbC2C3.marginalize([0]) gmtk.saveDcptUnigram(dirOut, "concept2Unigram", conceptCard, concept2Unigram) ################################################################################ ### save zerograms as CPT ##gmtk.saveDcptZerogram(dirOut, "concept1Zerogram", conceptCard) if verbose: print("-------------------------------------------------") print("Finish")