def _parseLine(line, proppr=True): #returns mode, x, positive y's where x and ys are symbols if not line.strip() or line[0] == '#': return None, None, None parts = line.strip().split("\t") if not proppr: assert len(parts) >= 2, 'bad line: %r parts %r' % (line, parts) return declare.asMode(parts[0] + "/io"), parts[1], parts[2:] else: regex = re.compile('(\w+)\((\w+),(\w+)\)') mx = regex.search(parts[0]) if not mx: return None, None, None else: mode = declare.asMode(mx.group(1) + "/io") x = mx.group(2) pos = [] for ans in parts[1:]: label = ans[0] my = regex.search(ans[1:]) assert my, 'problem at line ' + line assert my.group( 1 ) == mode.functor, 'mismatched modes %s %s at line %s' % ( my.group(1), mode, line) assert my.group(2) == x, 'mismatched x\'s at line ' + line if label == '+': pos.append(my.group(3)) return mode, x, pos
def ensureCompiled(self, mode, inputs=None): """Compile a tensorlog function to target language, and cache the result. Returns the canonical name of the mode (which can be a string produced by a declare.ModeDeclaration) that points to the compiled workspace. Inputs can be used to specify the input placeholders for the inference and loss functions. """ if isinstance(mode, str): mode = declare.asMode(mode) assert isinstance(mode, declare.ModeDeclaration), 'invalid mode %r' % mode if mode not in self._wsDict: self.ws = self._wsDict[mode] = Workspace(self) startTime = time.time() def status(msg): logging.info('%s time %.3f sec mem %.3f Gb' % (msg, time.time() - startTime, util.memusage())) status('calling compile') fun = self.ws.tensorlogFun = self.prog.compile(mode) status('tensorlog compilation complete') self._doCompile(fun, mode, inputs) status('tensorlog->tensorflow compilation complete') return mode
def setup(optdict, settings): # prog is shortcut to the output optdict, for convenience. prog = optdict['prog'] # the weight vector is sparse - just the constants in the unary predicate rule prog.setRuleWeights(prog.db.vector(declare.asMode("rule(i)"))) # set the max recursion depth prog.maxDepth = settings['maxDepth'] # be verbose # funs.conf.trace = True # use a non-default learner, overriding the tracing function, # number of epochs, and regularizer learner = plearn.ParallelFixedRateGDLearner( prog, epochs=settings['epochs'], parallel=settings['para'], rate=settings['rate'], miniBatchSize=settings['batch'], regularizer=learn.L2Regularizer()) #learner = learn.FixedRateGDLearner( # prog,epochs=epochs,regularizer=learn.L2Regularizer()) #learner = learn.FixedRateSGDLearner( # prog,epochs=epochs,regularizer=learn.L2Regularizer()) # learner = plearn.ParallelAdaGradLearner( # prog,epochs=epochs,parallel=40,regularizer=learn.L2Regularizer()) return learner
def _listFunction(self, modeSpec): mode = declare.asMode(modeSpec) key = (mode, 0) if key not in self.prog.function: self.prog.compile(mode) fun = self.prog.function[key] print "\n".join(fun.pprint())
def setRuleWeights(self,weights=None,epsilon=1.0,ruleIdPred=None): """Set the db predicate 'weighted/1' as a parameter, and initialize it to the given vector. If no vector 'weights' is given, default to a constant vector of epsilon for each rule. 'weighted/1' is the default parameter used to weight rule-ids features, e.g., "r" in p(X,Y):-... {r}. You can also specify the ruleIds with the name of a unary db relation that holds all the rule ids. """ if len(self.ruleIds)==0: pass elif ruleIdPred is not None: # TODO check this stuff and add type inference! assert (ruleIdPred,1) in self.db.matEncoding,'there is no unary predicate called %s' % ruleIdPred self.db.markAsParameter("weighted",1) self.db.setParameter("weighted",1,self.db.vector(declare.asMode('%s(o)' % ruleIdPred)) * epsilon) else: assert self.db.isTypeless(), 'cannot setRuleWeights for db with declared types unless ruleIdPred is given' self.db.markAsParameter("weighted",1) if weights==None: weights = self.db.onehot(self.ruleIds[0]) for rid in self.ruleIds[1:]: weights = weights + self.db.onehot(rid) weights = mutil.mapData(lambda d:np.clip(d,0.0,1.0), weights) self.db.setParameter("weighted",1,weights*epsilon)
def setExptParams(): #usage: [targetPredicate] [epochs] #get the command-line options for this experiment pred = 'hypernym' if len(sys.argv)<=1 else sys.argv[1] epochs = 30 if len(sys.argv)<=2 else int(sys.argv[2]) # use comline.parseCommandLine to set up the program, etc optdict,args = comline.parseCommandLine([ '--logging', 'warn', '--db', 'inputs/wnet.db|inputs/wnet.cfacts', '--prog','inputs/wnet-learned.ppr', '--proppr', '--train','inputs/wnet-train.dset|inputs/wnet-train.exam', '--test', 'inputs/wnet-test.dset|inputs/wnet-valid.exam']) prog = optdict['prog'] # the weight vector is sparse - just the constants in the unary predicate rule prog.setRuleWeights(prog.db.vector(declare.asMode("rule(i)"))) targetMode = 'i_%s/io' % pred if pred!='ALL' else None learner = plearn.ParallelFixedRateGDLearner( prog,epochs=epochs,parallel=40,regularizer=learn.L2Regularizer()) return {'prog':prog, 'trainData':optdict['trainData'], 'testData':optdict['testData'], 'targetMode':targetMode, 'savedTestPredictions':'tmp-cache/%s-test.solutions.txt' % pred, 'savedTrainExamples':'tmp-cache/wnet-train.examples', 'savedTestExamples':'tmp-cache/wnet-test.examples', 'learner':learner }, epochs
def possibleOps(self, subExpr, typeName=None): """If a typeName is specified, then return a (expr,type) pairs, where each expression performs one primitive tensorlog operation on the subExpr given as input, and type is the name of the type for the resulting subExpr. If the typeName is NONE, """ # TODO add multiple-input and zero-input operations if typeName is None: typeName = matrixdb.THING assert self.db.isTypeless( ), 'if database has types declared, you must specify the type of the input to possibleOps' result = [] for (functor, arity) in self.db.matEncoding: if arity == 2: mode = declare.asMode("%s(i,o)" % functor) if self.db.schema.getDomain(functor, arity) == typeName: op = self._vecMatMulExpr( subExpr, self._matrix(mode, transpose=False)) if self.db.isTypeless(): result.append(op) else: result.append( (op, self.db.schema.getRange(functor, arity))) if self.db.schema.getRange(functor, arity) == typeName: op = self._vecMatMulExpr( subExpr, self._matrix(mode, transpose=True)) if self.db.isTypeless(): result.append(op) else: result.append( (op, self.db.schema.getDomain(functor, arity))) return result
def trainable_db_variables(self, mode, for_optimization=False): """Return a list of expressions associated with predicates marked as parameters/trainable in the tensorlog database. If for_optimization==True then return the underlying variables that are optimized, otherwise return expressions computing values that correspond most closely to the parameters. Eg, if a weight vector V is reparameterized by passing it through an softplus, so V=softplus(V0) is used in the proof_count expression, then for_optimization==True will return V0, and for_optimization==False will return V. """ if for_optimization: return self.xc.getParamVariables(declare.asMode(mode)) else: return self.xc.getParamHandles(declare.asMode(mode))
def __init__(self, initProgram, targetPred, trainData, gradient=False): self.rendered = False self.sortByValue = conf.sortByValue self.prog = initProgram self.trainData = trainData self.targetPred = targetPred #evaluate the function so the outputs are cached assert self.targetPred, 'most specify targetPred' self.mode = declare.asMode(self.targetPred) assert self.trainData.hasMode( self.mode), "No mode '%s' in trainData" % self.mode self.X = self.trainData.getX(self.mode) self.Y = self.trainData.getY(self.mode) self.fun = self.prog.getPredictFunction(self.mode) self.pad = opfunutil.Scratchpad() self.P = self.fun.eval(self.prog.db, [self.X], self.pad) # find the symbols that correspond to the inputs dd = self.prog.db.matrixAsSymbolDict(self.X) self.xSymbols = [d.keys()[0] for d in dd.values()] # evaluate the gradient so that's cached if gradient: learner = learn.OnePredFixedRateGDLearner( self.prog, tracer=learn.Tracer.silent) self.grad = learner.crossEntropyGrad(self.mode, self.X, self.Y, pad=self.pad) else: self.grad = None
def runMain(saveInPropprFormat=True): params = expt.setExptParams() prog = params['prog'] tlog = simple.Compiler(db=prog.db, prog=prog, autoset_db_params=False) train_data = tlog.load_small_dataset('inputs/train.examples') test_data = tlog.load_small_dataset('inputs/test.examples') mode = 'samebib/io' TX, TY = train_data[mode] UX, UY = test_data[mode] loss = tlog.loss(mode) optimizer = tf.train.AdagradOptimizer(0.1) train_step = optimizer.minimize(loss) train_fd = { tlog.input_placeholder_name(mode): TX, tlog.target_output_placeholder_name(mode): TY } test_fd = { tlog.input_placeholder_name(mode): UX, tlog.target_output_placeholder_name(mode): UY } t0 = time.time() session = tf.Session() session.run(tf.global_variables_initializer()) epochs = 30 for i in range(epochs): # progress print 'epoch', i + 1, 'of', epochs session.run(train_step, feed_dict=train_fd) print 'learning time', time.time() - t0, 'sec' inference = tlog.inference(mode) predicted_y = session.run(inference, feed_dict=test_fd) actual_y = tlog.target_output_placeholder(mode) correct_predictions = tf.equal(tf.argmax(actual_y, 1), tf.argmax(predicted_y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_predictions, tf.float32)) if saveInPropprFormat: # save test results in ProPPR format from tensorlog import declare from tensorlog import dataset from tensorlog import expt as tlog_expt m = declare.asMode(mode) native_test_data = dataset.Dataset({m: tlog.xc._unwrapOutput(UX)}, {m: tlog.xc._unwrapOutput(UY)}) savedTestExamples = 'tmp-cache/cora-test.examples' savedTestPredictions = 'tmp-cache/cora-test.solutions.txt' native_test_data.saveProPPRExamples(savedTestExamples, tlog.db) tlog_expt.Expt.predictionAsProPPRSolutions( savedTestPredictions, 'samebib', tlog.db, tlog.xc._unwrapOutput(UX), tlog.xc._unwrapOutput(predicted_y)) print 'ready for commands like: proppr eval %s %s --metric auc --defaultNeg' % ( savedTestExamples, savedTestPredictions) acc = session.run(accuracy, feed_dict=test_fd) print 'test acc', acc return acc
def possibleModes(rule): # cycle through all possible modes f = rule.lhs.functor a = rule.lhs.arity for k in range(a): io = ['i']*a io[k] = 'o' yield declare.asMode("%s/%s" % (f,"".join(io)))
def debug(self, modeSpec, sym): if not DEBUGGER_AVAILABLE: logging.warn('debugger is not available in this environment') return mode = declare.asMode(modeSpec) assert self.db.isTypeless(), 'cannot debug a db with declared types' X = self.db.onehot(sym) dset = dataset.Dataset({mode: X}, {mode: self.db.zeros()}) debug.Debugger(self.prog, mode, dset, gradient=False).mainloop()
def debugDset(self, modeSpec, test=False): if not DEBUGGER_AVAILABLE: logging.warn('debugger is not available in this environment') return assert self.db.isTypeless(), 'cannot debug a db with declared types' fullDataset = self.testData if test else self.trainData if fullDataset == None: print 'train/test dataset is not specified on command line?' else: mode = declare.asMode(modeSpec) dset = fullDataset.extractMode(mode) debug.Debugger(self.prog, mode, dset, gradient=True).mainloop()
def define(self,mode,outputFun,outputTypeFun=None): """Define the function associated with a mode. The definition is a function f(x), which inputs a subexpression defining the input, and the output is an expression which defines the output. outputType, if given, is the type of the output. """ m = declare.asMode(mode) self.outputFun[m] = outputFun self.outputTypeFun[m] = outputTypeFun key = (m.functor,m.arity) if key not in self.definedFunctorArity: self.definedFunctorArity[key] = [] self.definedFunctorArity[key].append(m)
def eval(self, modeSpec, sym, inputType=None, outputType=None): mode = declare.asMode(modeSpec) fun = self.prog.getFunction(mode) outputType = outputType or fun.outputType inputType = inputType or fun.inputTypes[0] tmp = self.prog.evalSymbols(mode, [sym], typeName=inputType) result = self.prog.db.rowAsSymbolDict(tmp, typeName=outputType) if (self.numTopEcho): top = sorted(map(lambda (key, val): (val, key), result.items()), reverse=True) for rank in range(min(len(top), self.numTopEcho)): print '%d\t%g\t%s' % (rank + 1, top[rank][0], top[rank][1]) return result
def runMain(): (ti, X) = setExptParams() start0 = time.time() for modeString in [ "t_stress/io", "t_influences/io", "t_cancer_spont/io", "t_cancer_smoke/io" ]: print 'eval', modeString, start = time.time() ti.prog.eval(declare.asMode(modeString), [X]) print 'time', time.time() - start, 'sec' tot = time.time() - start0 print 'total time', tot, 'sec' return tot
def runTF(tlog): dset = tlog.load_small_dataset('inputs/fb15k-valid.examples') session = tf.Session() session.run(tf.global_variables_initializer()) t0 = time.time() k = 0 for mode in dset: if tlog.prog.findPredDef(declare.asMode(mode)): (X, Y) = dset[mode] f = tlog.inference(mode) session.run(f, feed_dict={tlog.input_placeholder_name(mode): X}) k += X.shape[0] t1 = time.time() qps = k / (t1 - t0) print 'tlog executes on', k, 'inputs at', qps, 'qps' return qps
def fbQueries(prog, db): queries = [] ignored = 0 for line in open("inputs/fb15k-valid.examples"): k1 = line.find("(") k2 = line.find(",") pred = line[:k1] x = line[k1 + 1:k2] mode = declare.asMode("%s/io" % pred) if prog.findPredDef(mode): vx = db.onehot(x) queries.append((mode, vx)) else: ignored += 1 print len(queries), "queries loaded", "ignored", ignored return queries
def deserialize(dir): """Recover a saved dataset.""" logging.info('deserializing dataset file ' + dir) xDict = {} yDict = {} SIO.loadmat(os.path.join(dir, "xDict"), xDict) SIO.loadmat(os.path.join(dir, "yDict"), yDict) #serialization converts modes to strings so convert them #back.... it also converts matrices to csr for d in (xDict, yDict): for stringKey, mat in d.items(): del d[stringKey] if not stringKey.startswith('__'): d[declare.asMode(stringKey)] = SS.csr_matrix(mat) dset = Dataset(xDict, yDict) logging.info('deserialized dataset has %d modes and %d non-zeros' % (len(dset.modesToLearn()), dset.size())) return dset
def testTCToyIgnoringTypes(self): matrixdb.conf.ignore_types = True optdict,args = comline.parseCommandLine( ["--db", os.path.join(testtensorlog.TEST_DATA_DIR,"textcattoy3.cfacts"), "--prog", os.path.join(testtensorlog.TEST_DATA_DIR,"textcat3.ppr"), "--trainData", os.path.join(testtensorlog.TEST_DATA_DIR,"toytrain.exam"), "--testData", os.path.join(testtensorlog.TEST_DATA_DIR,"toytest.exam"), "--proppr"]) for compilerClass in [tensorflowxcomp.DenseMatDenseMsgCrossCompiler, tensorflowxcomp.SparseMatDenseMsgCrossCompiler]: xc = compilerClass(optdict['prog']) xc.runExpt( prog=optdict['prog'], trainData=optdict['trainData'], testData=optdict['testData'], targetMode=declare.asMode("predict/io")) pbDoc = xc.db.onehot('pb') self.checkXC(xc,'predict/io',pbDoc,collections.defaultdict(lambda:191)) close_cross_compiler(xc)
def _matrix(self, matMode, transpose=False): """ Wraps a call to db.matrix() """ # cache an expression for the un-transposed version of the matrix assert matMode.arity == 2 key = (matMode.getFunctor(), 2) canonicalMode = declare.asMode("%s(i,o)" % matMode.getFunctor()) if not key in self._handleExpr: assert ( matMode.functor, 2 ) in self.db.matEncoding, 'DB does not contain a value for %s' % str( matMode) variable_name = "M__" + matMode.getFunctor() val = self._wrapDBMatrix(self.db.matrix(canonicalMode, False)) self._insertHandleExpr(key, variable_name, val) if self.db.transposeNeeded(matMode, transpose): return self._transposeMatrixExpr(self._handleExpr[key]) else: return self._handleExpr[key]
def loadMatrix(db, functorToLearn, functorInDB): """Convert a DB matrix containing pairs x,f(x) to training data for a learner. For each row x with non-zero entries, copy that row to Y, and and also append a one-hot representation of x to the corresponding row of X. """ assert db.isTypeless( ), 'cannot run loadMatrix on database with defined types' functorToLearn = declare.asMode(functorToLearn) xrows = [] yrows = [] m = db.matEncoding[(functorInDB, 2)].tocoo() n = db.dim() for i in range(len(m.data)): x = m.row[i] xrows.append(SS.csr_matrix(([1.0], ([0], [x])), shape=(1, n))) rx = m.getrow(x) yrows.append(rx * (1.0 / rx.sum())) return Dataset({functorToLearn: mutil.stack(xrows)}, {functorToLearn: mutil.stack(yrows)})
def testTCToyTypes(self): matrixdb.conf.ignore_types = False optdict,args = comline.parseCommandLine( ["--db", os.path.join(testtensorlog.TEST_DATA_DIR,"textcattoy3.cfacts"), "--prog", os.path.join(testtensorlog.TEST_DATA_DIR,"textcat3.ppr"), "--trainData", os.path.join(testtensorlog.TEST_DATA_DIR,"toytrain.exam"), "--testData", os.path.join(testtensorlog.TEST_DATA_DIR,"toytest.exam"), "--proppr"]) for compilerClass in [tensorflowxcomp.DenseMatDenseMsgCrossCompiler, tensorflowxcomp.SparseMatDenseMsgCrossCompiler]: xc = compilerClass(optdict['prog']) xc.runExpt( prog=optdict['prog'], trainData=optdict['trainData'], testData=optdict['testData'], targetMode=declare.asMode("predict/io")) # check trainability for (functor,arity) in xc.db.matEncoding: v = xc.parameterFromDBToVariable(functor,arity) if v is not None: vIsTrainable = (v in tf.trainable_variables()) vIsParameter = ((functor,arity) in xc.db.paramSet) self.assertEqual(vIsTrainable,vIsParameter) pbDoc = xc.db.onehot('pb','doc') self.checkXC(xc,'predict/io',pbDoc,{'negPair':115,'posPair':115,'hasWord':59,'weighted':115,'label':5}) # some checks on the output of pprint lines = xc.pprint('predict/io') self.assertTrue(lines[0].find("SoftMaxFunction") >= 0) self.assertTrue(lines[1].find("SumFunction") >= 0) self.assertEqual(len(lines), 16) # some checks on misc xcomp API self.assertEqual(xc.inferenceOutputType('predict/io'),'label') pbId = xc.asSymbolId('pb',typeName='doc') pbSym = xc.asSymbol(pbId,typeName='doc') self.assertEqual(pbSym,'pb') self.assertEqual(xc.asSymbolId('this does not appear in the data',typeName='doc'), -1) close_cross_compiler(xc)
def _moveFeaturesToRHS(self,rule0): rule = parser.Rule(rule0.lhs, rule0.rhs) if not rule0.findall: #parsed format is {f1,f2,...} but we only support {f1} if rule0.features is None: logging.warn('this rule has no features: %s' % str(rule)) else: assert len(rule0.features)==1,'multiple constant features not supported' assert rule0.features[0].arity==0, '{foo(A,...)} not allowed, use {foo(A,...):true}' constFeature = rule0.features[0].functor constAsVar = constFeature.upper() rule.rhs.append( parser.Goal(bpcompiler.ASSIGN, [constAsVar,constFeature]) ) rule.rhs.append( parser.Goal('weighted',[constAsVar]) ) # record the rule name, ie the constant feature self.ruleIds.append(constFeature) else: #format is {foo(F):-...} assert len(rule0.features)==1,'feature generators of the form {a,b: ... } not supported' featureLHS = rule0.features[0] assert featureLHS.arity==1, 'non-constant features must be of the form {foo(X):-...}' outputVar = featureLHS.args[0] paramName = featureLHS.functor for goal in rule0.findall: if goal.arity!=0 and goal.functor!='true': rule.rhs.append(goal) rule.rhs.append( parser.Goal(paramName,[outputVar]) ) # record the feature predicate 'foo' as a parameter if self.db: self.db.markAsParameter(paramName,1) if self.db.isTypeless(): # record the domain of the predicate that will be used as a feature in parameters for goal in rule0.findall: if outputVar in goal.args: k = goal.args.index(outputVar) if goal.arity==2: paramMode = declare.asMode("%s/io" % goal.functor) if k==0 else declare.asMode("%s/oi" % goal.functor) self.paramDomains[paramName].append(paramMode) return rule
def check_learning_with_udp(self,ruleStrings,plugins): db = matrixdb.MatrixDB.loadFile(os.path.join(testtensorlog.TEST_DATA_DIR,"textcattoy3.cfacts")) rules = testtensorlog.rules_from_strings(ruleStrings) prog = program.ProPPRProgram(rules=rules,db=db,plugins=plugins) prog.setAllWeights() mode = declare.asMode("predict/io") prog.compile(mode) fun = prog.function[(mode,0)] print "\n".join(fun.pprint()) tlog = simple.Compiler(db=db, prog=prog) trainData = tlog.load_dataset(os.path.join(testtensorlog.TEST_DATA_DIR,"toytrain.exam")) testData = tlog.load_dataset(os.path.join(testtensorlog.TEST_DATA_DIR,"toytest.exam")) mode = trainData.keys()[0] TX,TY = trainData[mode] UX,UY = testData[mode] inference = tlog.inference(mode) trueY = tf.placeholder(tf.float32, shape=UY.shape, name='tensorlog/trueY') correct = tf.equal(tf.argmax(trueY,1), tf.argmax(inference,1)) accuracy = tf.reduce_mean(tf.cast(correct, tf.float32)) test_batch_fd = {tlog.input_placeholder_name(mode):UX, trueY.name:UY} loss = tlog.loss(mode) optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1) train_step = optimizer.minimize(loss) train_batch_fd = {tlog.input_placeholder_name(mode):TX, tlog.target_output_placeholder_name(mode):TY} session = tf.Session() session.run(tf.global_variables_initializer()) acc0 = session.run(accuracy, feed_dict=test_batch_fd) print 'initial accuracy',acc0 self.assertTrue(acc0<0.6) for i in range(10): print 'epoch',i+1 session.run(train_step, feed_dict=train_batch_fd) acc1 = session.run(accuracy, feed_dict=test_batch_fd) print 'final accuracy',acc1 self.assertTrue(acc1>=0.9) session.close()
def minibatches(self, dataset_obj, batch_size=100, shuffle_first=True): """Yields a series of pairs (mode,(X,Y)) where X and Y are a minibatch suitable for training the function designated by mode. Input is something returned by load_small_dataset or load_big_dataset. """ if isinstance(dataset_obj, dict): dataset_dict = dataset_obj x_dict = {} y_dict = {} for mode_str, (x, y) in dataset_dict.items(): mode = declare.asMode(mode_str) x_dict[mode] = self.xc.unwrapInput(x) y_dict[mode] = self.xc.unwrapInput(y) dset = dataset.Dataset(x_dict, y_dict) for mode, bx, by in dset.minibatchIterator( batchSize=batch_size, shuffleFirst=shuffle_first): yield str(mode), (self.xc.wrapInput(bx), self.xc.wrapInput(by)) elif isinstance(dataset_obj, dataset.Dataset): dset = dataset_obj for mode, bx, by in dset.minibatchIterator( batchSize=batch_size, shuffleFirst=shuffle_first): yield str(mode), (self.xc.wrapInput(bx), self.xc.wrapInput(by)) else: assert False, 'illegal dataset object %r' % dataset_obj
self.populateTree(fun.children(), child) def mainloop(self): if not self.rendered: self.render() self.root.mainloop() if __name__ == "__main__": def usage(): print 'debug.py [usual tensorlog options] mode [inputs]' optdict, args = comline.parseCommandLine(sys.argv[1:]) dset = optdict.get('trainData') or optdict.get('testData') if dset == None and len(args) < 2: usage() print 'debug on what input? specify --trainData or give a function input' elif len(args) < 1: usage() elif dset and len(args) > 2: print 'using --trainData not the function input given' elif dset: mode = declare.asMode(args[0]) Debugger(optdict['prog'], mode, dset, gradient=True).mainloop() else: mode = declare.asMode(args[0]) assert db.isTypeless(), 'cannot debug a database with declared types' X = optdict['prog'].db.onehot(args[1]) dset = dataset.Dataset({mode: X}, {mode: optdict['prog'].db.zeros()}) Debugger(optdict['prog'], mode, dset, gradient=False).mainloop()
def _run(self, prog=None, trainData=None, testData=None, targetMode=None, savedTestPredictions=None, savedTestExamples=None, savedTrainExamples=None, savedModel=None, learner=None): """ Run an experiment. The stages are - if targetMode is specified, extract just the examples from that mode from trainData and testData - evaluate the untrained program on the train and test data and print results - train on the trainData - if savedModel is given, write the learned database, including the trained parameters, to that directory. - if savedTestPredictions is given, write the test-data predictions in ProPPR format - if savedTestExamples (savedTrainExamples) is given, save the training/test examples in ProPPR format """ if targetMode: targetMode = declare.asMode(targetMode) trainData = trainData.extractMode(targetMode) testData = testData.extractMode(targetMode) if not learner: learner = learn.FixedRateGDLearner(prog) TP0 = Expt.timeAction('running untrained theory on train data', lambda: learner.datasetPredict(trainData)) Expt.printStats('untrained theory', 'train', trainData, TP0) if testData is not None: UP0 = Expt.timeAction('running untrained theory on test data', lambda: learner.datasetPredict(testData)) Expt.printStats('untrained theory', 'test', testData, UP0) Expt.timeAction('training %s' % type(learner).__name__, lambda: learner.train(trainData)) TP1 = Expt.timeAction('running trained theory on train data', lambda: learner.datasetPredict(trainData)) if testData is not None: UP1 = Expt.timeAction('running trained theory on test data', lambda: learner.datasetPredict(testData)) Expt.printStats('..trained theory', 'train', trainData, TP1) if testData is not None: testAcc, testXent = Expt.printStats('..trained theory', 'test', testData, UP1) else: testAcc, testXent = None, None if savedModel: Expt.timeAction('saving trained model', lambda: prog.db.serialize(savedModel)) if savedTestPredictions and testData: #todo move this logic to a dataset subroutine open(savedTestPredictions, "w").close() # wipe file first def doit(): qid = 0 for mode in testData.modesToLearn(): qid += Expt.predictionAsProPPRSolutions( savedTestPredictions, mode.functor, prog.db, UP1.getX(mode), UP1.getY(mode), True, qid) Expt.timeAction('saving test predictions', doit) if savedTestExamples and testData: Expt.timeAction( 'saving test examples', lambda: testData.saveProPPRExamples( savedTestExamples, prog.db)) if savedTrainExamples: Expt.timeAction( 'saving train examples', lambda: trainData.saveProPPRExamples( savedTrainExamples, prog.db)) if savedTestPredictions and savedTestExamples and testData: print 'ready for commands like: proppr eval %s %s --metric auc --defaultNeg' \ % (savedTestExamples,savedTestPredictions) return testAcc, testXent
def input_placeholder_name(self, mode): """ For tensorflow, the name of the placeholder associated with the input to this function. """ assert self.target == 'tensorflow' return self.xc.getInputName(declare.asMode(mode))
def target_output_placeholder(self, mode): """ For tensorflow, the placeholder associated with the output to this function. """ assert self.target == 'tensorflow' return self.xc.getTargetOutputPlaceholder(declare.asMode(mode))