print("-------------------------------------------------") print("Generating concept1GivenC2C3C4.* files.") conceptMap = LexMap().read(conceptFileName) wordMap = LexMap().read(wordFileName) conceptCard = len(conceptMap) wordCard = len(wordMap) dt3 = {} for fileName in list: if verbose: print("Processing file: " + fileName) HiddenObservation().read(fileName).collectConceptsC2C3C4(dt3) ############################################################################## # save fourgrams numberOfSpmfs = gmtk.saveDt(dirOut, "concept2GivenC3C4", dt3, 2) gmtk.saveCollection(dirOut, "concept2GivenC3C4", numberOfSpmfs) gmtk.saveSpmfs(dirOut, "concept2GivenC3C4", numberOfSpmfs, conceptCard) #gmtk.saveDpmfs(dirOut, "concept2GivenC3C4", numberOfSpmfs, conceptCard) gmtk.saveDpmfsConcept(dirOut, "concept2GivenC3C4", numberOfSpmfs, conceptCard, type="c2") if verbose: print("-------------------------------------------------") print("Finish")
dt2 = {} for fileName in list: if verbose: print("Processing file: " + fileName) ho = HiddenObservation().read(fileName) ho.collectConceptsC1C2C3C4(dt4) ho.collectConceptsC1C2C3(dt3) ho.collectConceptsC1C2(dt2) ############################################################################## # save pentagrams numberOfSpmfs = gmtk.saveDt(dirOut, "popGivenC1C2C3C4", dt4) gmtk.saveCollection(dirOut, "popGivenC1C2C3C4", numberOfSpmfs) gmtk.saveSpmfs(dirOut, "popGivenC1C2C3C4", numberOfSpmfs, popCard) gmtk.saveDpmfsPopPush(dirOut, "popGivenC1C2C3C4", numberOfSpmfs, popCard) ############################################################################## # save quatrograms numberOfSpmfs = gmtk.saveDt(dirOut, "popGivenC1C2C3", dt3, 3) gmtk.saveCollection(dirOut, "popGivenC1C2C3", numberOfSpmfs) gmtk.saveSpmfs(dirOut, "popGivenC1C2C3", numberOfSpmfs, popCard) # do not generate dpmfs because it will be created during smoothing ############################################################################## # save trigrams numberOfSpmfs = gmtk.saveDt(dirOut, "popGivenC1C2", dt2, 2) gmtk.saveCollection(dirOut, "popGivenC1C2", numberOfSpmfs)
for w in range(symCard): index = [ w ] + hist if table.getSafeValue(index) >= EPSILON: new_value = table.getSafeValue(index)/sum table.setValue(index, new_value) return table def storeResults(self, outDir, symName, (dt4, dt3, dt2), word_C, constFile=None, constAppend=True): wordCard = len(self.symMap) # Save 5-grams NAME = '%sGivenC1C2C3C4' % symName numberOfSpmfs = gmtk.saveDt(outDir, NAME, dt4, 4) gmtk.saveCollection(outDir, NAME, numberOfSpmfs) gmtk.saveSpmfs(outDir, NAME, numberOfSpmfs, wordCard) number = len(word_C.vectSubList([1, 2, 3, 4])) gmtk.saveDpmfsProbs(outDir, NAME, number, wordCard, word_C) if constFile: if constAppend: const_fw = file(constFile, 'a') else: const_fw = file(writeConst, 'w') try: const_fw.write("\n% the cardinality should be CONCEPT_CARD^DEPTH_OF_STACK, but I know that the stack values are sparse\n") const_fw.write("#define JOINT_C1C2C3C4_CARD %d\n" % numberOfSpmfs) finally: const_fw.close()
return table def storeResults(self, outDir, symName, (dt4, dt3, dt2), word_C, constFile=None, constAppend=True): wordCard = len(self.symMap) # Save 5-grams NAME = '%sGivenC1C2C3C4' % symName numberOfSpmfs = gmtk.saveDt(outDir, NAME, dt4, 4) gmtk.saveCollection(outDir, NAME, numberOfSpmfs) gmtk.saveSpmfs(outDir, NAME, numberOfSpmfs, wordCard) number = len(word_C.vectSubList([1, 2, 3, 4])) gmtk.saveDpmfsProbs(outDir, NAME, number, wordCard, word_C) if constFile: if constAppend: const_fw = file(constFile, 'a') else: const_fw = file(writeConst, 'w') try: const_fw.write( "\n% the cardinality should be CONCEPT_CARD^DEPTH_OF_STACK, but I know that the stack values are sparse\n" ) const_fw.write("#define JOINT_C1C2C3C4_CARD %d\n" % numberOfSpmfs)
dt3 = {} dt2 = {} for fileName in list: if verbose: print("Processing file: " + fileName) HiddenObservation().read(fileName).collectConceptsC1C2C3C4(dt4) HiddenObservation().read(fileName).collectConceptsC1C2C3(dt3) HiddenObservation().read(fileName).collectConceptsC1C2(dt2) ############################################################################## # save pentagrams numberOfSpmfs = gmtk.saveDt(dirOut, "pushGivenC1C2C3C4", dt4) gmtk.saveCollection(dirOut, "pushGivenC1C2C3C4", numberOfSpmfs) gmtk.saveSpmfs(dirOut, "pushGivenC1C2C3C4", numberOfSpmfs, pushCard) if maxPush == 1: v = [44,43,0,0,0] else: v = [44,43,42,0,0] gmtk.saveDpmfsPopPush(dirOut, "pushGivenC1C2C3C4", numberOfSpmfs, pushCard, v) ############################################################################## # save quatrograms numberOfSpmfs = gmtk.saveDt(dirOut, "pushGivenC1C2C3", dt3, 3) gmtk.saveCollection(dirOut, "pushGivenC1C2C3", numberOfSpmfs) gmtk.saveSpmfs(dirOut, "pushGivenC1C2C3", numberOfSpmfs, pushCard) # do not generate dpmfs because it will be created during smoothing