Пример #1
0
 def testBatch(self):
   tlog = simple.Compiler(
       db=os.path.join(testtensorlog.TEST_DATA_DIR,"textcattoy3.cfacts"),
       prog=os.path.join(testtensorlog.TEST_DATA_DIR,"textcat3.ppr"))
   trainData = tlog.load_dataset(os.path.join(testtensorlog.TEST_DATA_DIR,"toytrain.exam"))
   testData = tlog.load_dataset(os.path.join(testtensorlog.TEST_DATA_DIR,"toytest.exam"))
   mode = trainData.keys()[0]
   TX,TY = trainData[mode]
   UX,UY = testData[mode]
   inference = tlog.inference(mode)
   trueY = tf.placeholder(tf.float32, shape=UY.shape, name='tensorlog/trueY')
   correct = tf.equal(tf.argmax(trueY,1), tf.argmax(inference,1))
   accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
   test_batch_fd = {tlog.input_placeholder_name(mode):UX, trueY.name:UY}
   loss = tlog.loss(mode)
   optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1)
   train_step = optimizer.minimize(loss)
   train_batch_fd = {tlog.input_placeholder_name(mode):TX, tlog.target_output_placeholder_name(mode):TY}
   session = tf.Session()
   session.run(tf.global_variables_initializer())
   acc0 = session.run(accuracy, feed_dict=test_batch_fd)
   print 'initial accuracy',acc0
   self.assertTrue(acc0<0.6)
   for i in range(10):
     print 'epoch',i+1
     session.run(train_step, feed_dict=train_batch_fd)
   acc1 = session.run(accuracy, feed_dict=test_batch_fd)
   print 'final accuracy',acc1
   self.assertTrue(acc1>=0.9)
   # test a round-trip serialization
   # saves the db
   cacheDir = tempfile.mkdtemp()
   db_file = os.path.join(cacheDir,'simple.db')
   tlog.set_all_db_params_to_learned_values(session)
   tlog.serialize_db(db_file)
   # load everything into a new graph and don't reset the learned params
   new_graph = tf.Graph()
   with new_graph.as_default():
     tlog2 = simple.Compiler(
         db=db_file,
         prog=os.path.join(testtensorlog.TEST_DATA_DIR,"textcat3.ppr"),
         autoset_db_params=False)
     # reconstruct the accuracy measure
     inference2 = tlog2.inference(mode)
     trueY2 = tf.placeholder(tf.float32, shape=UY.shape, name='tensorlog/trueY2')
     correct2 = tf.equal(tf.argmax(trueY2,1), tf.argmax(inference2,1))
     accuracy2 = tf.reduce_mean(tf.cast(correct2, tf.float32))
     # eval accuracy in a new session
     session2 = tf.Session()
     session2.run(tf.global_variables_initializer())
     test_batch_fd2 = {tlog2.input_placeholder_name(mode):UX, trueY2.name:UY}
     acc3 = session2.run(accuracy2, feed_dict=test_batch_fd2)
     print 'accuracy after round-trip serialization',acc3
     self.assertTrue(acc3>=0.9)
   session.close()
Пример #2
0
def runMain():
    (db, prog, modeSet, queries) = expt.setExptParams()
    tlog = simple.Compiler(db=db, prog=prog, autoset_db_params=False)
    fps1 = expt.compileAll(db, prog, modeSet, queries)
    fps2 = tfCompileAll(tlog, modeSet, queries)  # expect <= 2.5 fps
    qps = runTF(tlog)  # expect less than 23 qps
    return fps2, qps
Пример #3
0
def runMain():
    (ti, sparseX) = expt.setExptParams()
    X = sparseX.todense()

    # compile all the functions we'll need before I set up the session
    tlog = simple.Compiler(db=ti.db, prog=ti.prog, autoset_db_params=False)
    for modeString in [
            "t_stress/io", "t_influences/io", "t_cancer_spont/io",
            "t_cancer_smoke/io"
    ]:
        _ = tlog.inference(modeString)

    session = tf.Session()
    session.run(tf.global_variables_initializer())
    start0 = time.time()
    for modeString in [
            "t_stress/io", "t_influences/io", "t_cancer_spont/io",
            "t_cancer_smoke/io"
    ]:
        session.run(tf.global_variables_initializer())
        print 'eval', modeString,
        fd = {tlog.input_placeholder_name(modeString): X}
        session.run(tlog.inference(modeString), feed_dict=fd)
        print 'time', time.time() - start0, 'sec'
    tot = time.time() - start0
    print 'total time', tot, 'sec'
    return tot
Пример #4
0
  def testMToyMatParam(self):
    tlog = simple.Compiler(
        db=os.path.join(testtensorlog.TEST_DATA_DIR,"matchtoy.cfacts"),
        prog=os.path.join(testtensorlog.TEST_DATA_DIR,"matchtoy.ppr"))
    trainData = tlog.load_dataset(os.path.join(testtensorlog.TEST_DATA_DIR,"matchtoy-train.exam"))
    tlog.db.markAsParameter('dabbrev',2)
    factDict = tlog.db.matrixAsPredicateFacts('dabbrev',2,tlog.db.matEncoding[('dabbrev',2)])
    print 'before learning',len(factDict),'dabbrevs'
    self.assertTrue(len(factDict)==5)
    for f in sorted(factDict.keys()):
      print '>',str(f),factDict[f]

    # expt pipeline
    mode = trainData.keys()[0]
    TX,TY = trainData[mode]
    inference = tlog.inference(mode)
    trueY = tf.placeholder(tf.float32, shape=TY.shape, name='tensorlog/trueY')
    loss = tlog.loss(mode)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1)
    train_step = optimizer.minimize(loss)
    train_batch_fd = {tlog.input_placeholder_name(mode):TX, tlog.target_output_placeholder_name(mode):TY}
    session = tf.Session()
    session.run(tf.global_variables_initializer())
    for i in range(5):
      print 'epoch',i+1
      session.run(train_step, feed_dict=train_batch_fd)
    tlog.set_all_db_params_to_learned_values(session)
Пример #5
0
 def testTCToyTypes(self):
   matrixdb.conf.ignore_types = False
   tlog = simple.Compiler(
       db=os.path.join(testtensorlog.TEST_DATA_DIR,"textcattoy3.cfacts"),
       prog=os.path.join(testtensorlog.TEST_DATA_DIR,"textcat3.ppr"))
   trainData = tlog.load_small_dataset(os.path.join(testtensorlog.TEST_DATA_DIR,"toytrain.exam"))
   mode = trainData.keys()[0]
   docs,labels = trainData[mode]
   xc = tlog.get_cross_compiler()
   ops = xc.possibleOps(docs,'doc')
   print 'doc ops',ops
   self.assertTrue(len(ops)==1)
   (words,wordType) = ops[0]
   self.assertTrue(wordType=='word')
   ops = xc.possibleOps(words,'word')
   self.assertTrue(len(ops)==3)
   pairs = None
   for (expr,exprType) in ops:
     if exprType=='labelWordPair':
       pairs = expr
       break
   self.assertTrue(pairs is not None)
   ops = xc.possibleOps(pairs,'labelWordPair')
   self.assertTrue(len(ops)==2)
   for (expr,exprType) in ops:
     self.assertTrue(exprType=='word')
   close_cross_compiler(xc)
Пример #6
0
def runMain(num=250):
  params = expt.setExptParams(num)
  prog = params['prog']
  tlog = simple.Compiler(db=prog.db, prog=prog, autoset_db_params=False)
  train_data = tlog.load_big_dataset('inputs/train-%d.exam' % num)
  mode = params['targetMode']

  loss = tlog.loss(mode)
  optimizer = tf.train.AdagradOptimizer(0.1)
  train_step = optimizer.minimize(loss)

  session = tf.Session()
  session.run(tf.global_variables_initializer())
  t0 = time.time()
  epochs = 10
  for i in range(epochs):
      b = 0
      for (_,(TX,TY)) in tlog.minibatches(train_data,batch_size=125):
          print 'epoch',i+1,'of',epochs,'minibatch',b+1
          train_fd = {tlog.input_placeholder_name(mode):TX, tlog.target_output_placeholder_name(mode):TY}
          session.run(train_step, feed_dict=train_fd)
          b += 1
  print 'learning time',time.time()-t0,'sec'

  predicted_y = tlog.inference(mode)
  actual_y = tlog.target_output_placeholder(mode)
  correct_predictions = tf.equal(tf.argmax(actual_y,1), tf.argmax(predicted_y,1))
  accuracy = tf.reduce_mean(tf.cast(correct_predictions, tf.float32))

  test_data = tlog.load_small_dataset('inputs/test-%d.exam' % num)
  UX,UY = test_data[mode]
  test_fd = {tlog.input_placeholder_name(mode):UX, tlog.target_output_placeholder_name(mode):UY}
  acc = session.run(accuracy, feed_dict=test_fd)
  print 'test acc',acc
  return acc #expect 27.2
Пример #7
0
def setup_tlog(maxD,factFile,trainFile,testFile):
  tlog = simple.Compiler(db=factFile,prog="grid.ppr")
  tlog.prog.db.markAsParameter('edge',2)
  tlog.prog.maxDepth = maxD
  trainData = tlog.load_small_dataset(trainFile)
  testData = tlog.load_small_dataset(testFile)
  return (tlog,trainData,testData)
Пример #8
0
def runMain(saveInPropprFormat=True):
    params = expt.setExptParams()
    prog = params['prog']
    tlog = simple.Compiler(db=prog.db, prog=prog, autoset_db_params=False)
    train_data = tlog.load_small_dataset('inputs/train.examples')
    test_data = tlog.load_small_dataset('inputs/test.examples')

    mode = 'samebib/io'
    TX, TY = train_data[mode]
    UX, UY = test_data[mode]
    loss = tlog.loss(mode)
    optimizer = tf.train.AdagradOptimizer(0.1)
    train_step = optimizer.minimize(loss)
    train_fd = {
        tlog.input_placeholder_name(mode): TX,
        tlog.target_output_placeholder_name(mode): TY
    }
    test_fd = {
        tlog.input_placeholder_name(mode): UX,
        tlog.target_output_placeholder_name(mode): UY
    }

    t0 = time.time()
    session = tf.Session()
    session.run(tf.global_variables_initializer())
    epochs = 30
    for i in range(epochs):
        # progress
        print 'epoch', i + 1, 'of', epochs
        session.run(train_step, feed_dict=train_fd)
    print 'learning time', time.time() - t0, 'sec'

    inference = tlog.inference(mode)
    predicted_y = session.run(inference, feed_dict=test_fd)
    actual_y = tlog.target_output_placeholder(mode)
    correct_predictions = tf.equal(tf.argmax(actual_y, 1),
                                   tf.argmax(predicted_y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_predictions, tf.float32))

    if saveInPropprFormat:
        # save test results in ProPPR format
        from tensorlog import declare
        from tensorlog import dataset
        from tensorlog import expt as tlog_expt
        m = declare.asMode(mode)
        native_test_data = dataset.Dataset({m: tlog.xc._unwrapOutput(UX)},
                                           {m: tlog.xc._unwrapOutput(UY)})
        savedTestExamples = 'tmp-cache/cora-test.examples'
        savedTestPredictions = 'tmp-cache/cora-test.solutions.txt'
        native_test_data.saveProPPRExamples(savedTestExamples, tlog.db)
        tlog_expt.Expt.predictionAsProPPRSolutions(
            savedTestPredictions, 'samebib', tlog.db,
            tlog.xc._unwrapOutput(UX), tlog.xc._unwrapOutput(predicted_y))
        print 'ready for commands like: proppr eval %s %s --metric auc --defaultNeg' % (
            savedTestExamples, savedTestPredictions)

    acc = session.run(accuracy, feed_dict=test_fd)
    print 'test acc', acc
    return acc
Пример #9
0
 def testBuilder2(self):
   b = simple.Builder()
   predict,assign,weighted,hasWord,posPair,negPair = b.predicates("predict assign weighted hasWord posPair negPair")
   X,Pos,Neg,F,W = b.variables("X Pos Neg F W")
   b += predict(X,Pos) <= assign(Pos,'pos','label') // (weighted(F) | hasWord(X,W) & posPair(W,F))
   b += predict(X,Neg) <= assign(Neg,'neg','label') // (weighted(F) | hasWord(X,W) & negPair(W,F))
   dbSpec = os.path.join(testtensorlog.TEST_DATA_DIR,"textcattoy3.cfacts")
   self.runTextCatLearner(simple.Compiler(db=dbSpec,prog=b.rules))
Пример #10
0
 def testTCToyIgnoringTypes(self):
   matrixdb.conf.ignore_types = True
   tlog = simple.Compiler(
       db=os.path.join(testtensorlog.TEST_DATA_DIR,"textcattoy3.cfacts"),
       prog=os.path.join(testtensorlog.TEST_DATA_DIR,"textcat3.ppr"))
   trainData = tlog.load_small_dataset(os.path.join(testtensorlog.TEST_DATA_DIR,"toytrain.exam"))
   mode = trainData.keys()[0]
   docs,labels = trainData[mode]
   xc = tlog.get_cross_compiler()
   ops = xc.possibleOps(docs)
   binary_predicates = [functor for (functor,arity) in tlog.db.matEncoding if arity==2]
   self.assertTrue(len(ops) == len(binary_predicates)*2)
   for x in ops:
     # ops should just be tensors
     self.assertFalse(isinstance(x,tuple))
   close_cross_compiler(xc)
Пример #11
0
  def testBuilder3(self):
    b = simple.Builder()
    predict,assign,weighted,hasWord,posPair,negPair,label = b.predicates("predict assign weighted hasWord posPair negPair label")
    doc_t,label_t,word_t,labelWordPair_t = b.types("doc_t label_t word_t labelWordPair_t")

    b.schema += predict(doc_t,label_t)
    b.schema += hasWord(doc_t,word_t)
    b.schema += posPair(word_t,labelWordPair_t)
    b.schema += negPair(word_t,labelWordPair_t)
    b.schema += label(label_t)

    X,Pos,Neg,F,W = b.variables("X Pos Neg F W")
    b.rules += predict(X,Pos) <= assign(Pos,'pos','label') // (weighted(F) | hasWord(X,W) & posPair(W,F))
    b.rules += predict(X,Neg) <= assign(Neg,'neg','label') // (weighted(F) | hasWord(X,W) & negPair(W,F))

    # use the untyped version of the facts to make sure the schema works
    b.db = os.path.join(testtensorlog.TEST_DATA_DIR,"textcattoy.cfacts")

    self.runTextCatLearner(simple.Compiler(db=b.db, prog=b.rules))
Пример #12
0
  def check_learning_with_udp(self,ruleStrings,plugins):
    db = matrixdb.MatrixDB.loadFile(os.path.join(testtensorlog.TEST_DATA_DIR,"textcattoy3.cfacts"))
    rules = testtensorlog.rules_from_strings(ruleStrings)
    prog = program.ProPPRProgram(rules=rules,db=db,plugins=plugins)
    prog.setAllWeights()
    mode = declare.asMode("predict/io")
    prog.compile(mode)
    fun = prog.function[(mode,0)]
    print "\n".join(fun.pprint())
    tlog = simple.Compiler(db=db, prog=prog)

    trainData = tlog.load_dataset(os.path.join(testtensorlog.TEST_DATA_DIR,"toytrain.exam"))
    testData = tlog.load_dataset(os.path.join(testtensorlog.TEST_DATA_DIR,"toytest.exam"))
    mode = trainData.keys()[0]
    TX,TY = trainData[mode]
    UX,UY = testData[mode]
    inference = tlog.inference(mode)
    trueY = tf.placeholder(tf.float32, shape=UY.shape, name='tensorlog/trueY')
    correct = tf.equal(tf.argmax(trueY,1), tf.argmax(inference,1))
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    test_batch_fd = {tlog.input_placeholder_name(mode):UX, trueY.name:UY}
    loss = tlog.loss(mode)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1)
    train_step = optimizer.minimize(loss)
    train_batch_fd = {tlog.input_placeholder_name(mode):TX, tlog.target_output_placeholder_name(mode):TY}
    session = tf.Session()
    session.run(tf.global_variables_initializer())
    acc0 = session.run(accuracy, feed_dict=test_batch_fd)
    print 'initial accuracy',acc0
    self.assertTrue(acc0<0.6)
    for i in range(10):
      print 'epoch',i+1
      session.run(train_step, feed_dict=train_batch_fd)
    acc1 = session.run(accuracy, feed_dict=test_batch_fd)
    print 'final accuracy',acc1
    self.assertTrue(acc1>=0.9)
    session.close()
Пример #13
0
def runMain():
  # generate the data for a 10-by-10 grid
  (factFile,trainFile,testFile) = expt.genInputs(16)

  # generate the rules - for transitive closure
  b = simple.Builder()
  path,edge = b.predicates("path,edge")
  X,Y,Z = b.variables("X,Y,Z")
  b.rules += path(X,Y) <= edge(X,Y)
  b.rules += path(X,Y) <= edge(X,Z) & path(Z,Y)

  # construct a Compiler object
  tlog = simple.Compiler(db=factFile,prog=b.rules)

  # configure the database so that edge weights are a parameter
  tlog.prog.db.markAsParameter('edge',2)
  # configure the program so that maximum recursive depth is 16
  tlog.prog.maxDepth = 16

  # compile the rules, plus a query mode, into the inference function
  # we want to optimize - queries of the form {Y:path(x,Y)} where x is
  # a given starting point in the grid (an input) and Y is an output
  mode = 'path/io'
  predicted_y = tlog.inference(mode)

  # when we ask for an inference function, Tensorlog also compiles a
  # loss function.  ask for the placeholder used to hold the desired
  # output when we're computing loss, and use that to define an
  # accuracy metric, for testing
  actual_y = tlog.target_output_placeholder(mode)
  correct_predictions = tf.equal(tf.argmax(actual_y,1), tf.argmax(predicted_y,1))
  accuracy = tf.reduce_mean(tf.cast(correct_predictions, tf.float32))

  # now get up the loss used in learning from the compiler and set up
  # a learner for it
  unregularized_loss = tlog.loss(mode)
  optimizer = tf.train.AdagradOptimizer(1.0)
  train_step = optimizer.minimize(unregularized_loss)

  # set up the session
  session = tf.Session()
  session.run(tf.global_variables_initializer())

  # load the training and test data
  trainData = tlog.load_small_dataset(trainFile)
  testData = tlog.load_small_dataset(testFile)

  # run the optimizer for 20 epochs
  (tx,ty) = trainData[mode]
  train_fd = {tlog.input_placeholder_name(mode):tx, tlog.target_output_placeholder_name(mode):ty}
  for i in range(20):
    session.run(train_step, feed_dict=train_fd)
    print 'epoch',i+1,'train loss and accuracy',session.run([unregularized_loss,accuracy], feed_dict=train_fd)

  # test performance
  (ux,uy) = testData[mode]
  test_fd = {tlog.input_placeholder_name(mode):ux, tlog.target_output_placeholder_name(mode):uy}
  acc = session.run(accuracy, feed_dict=test_fd)

  print 'test acc',acc
  return acc
Пример #14
0
def run_main():
    logging.basicConfig(level=logging.DEBUG)

    t0 = time.time()

    # configure the experiment, generate the rules, and initialize the
    # Tensorlog compiler.  If we're training, then also initialize the
    # weight vectors to some sort of default values.
    c = configure_from_command_line(sys.argv[1:])
    b = generate_rules()
    # databases can be stored in two formats: the .db format or the
    # .cfacts format.  A .cfacts file is basically a tab-separated-value
    # file, where the first column is a relation name, the other columns
    # are arguments to that relation, and the final column is a weight
    # (if it's a number). The .cfacts file can also include typing
    # information, in lines like '# :-
    # mentions_entity(question_t,entity_t)' .cfacts files must be sorted
    # by relation type.  The .db format is a binary format which is more
    # compact and faster to load.  The syntax "foo.db|foo.cfacts" for a
    # database tells Tensorlog to load a cached .db version of the
    # .cfacts file if it exists (and is more recent than the .cfacts
    # file) and otherwise to load the .cfacts file and create a cached
    # version in the .db file.
    dbspec = '/tmp/train-%d.db|inputs/train-%d.cfacts' % (
        c.num, c.num) if c.action == 'train' else c.model
    tlog = simple.Compiler(db=dbspec,
                           prog=b.rules,
                           autoset_db_params=(c.action == 'train'))

    # set up the optimizer
    mode = 'answer/io'
    unregularized_loss = tlog.loss(mode)
    optimizer = tf.train.AdagradOptimizer(c.rate)
    train_step = optimizer.minimize(unregularized_loss)

    # define the measure we'll use to report quality of a learned model
    predicted_y = tlog.inference(mode)  # inference is the
    # proof-counting semantics
    # followed by a softmax
    actual_y = tlog.target_output_placeholder(mode)
    correct_predictions = tf.equal(tf.argmax(actual_y, 1),
                                   tf.argmax(predicted_y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_predictions, tf.float32))

    # initialize the tensorflow session
    session = tf.Session()
    session.run(tf.global_variables_initializer())

    t1 = time.time()
    print 'compilation and session initialization', (t1 - t0) / 1000.0, 'sec'

    if c.action == 'test':

        # a small_dataset is just a dictionary mapping function names (ie
        # modes), like "answer/io", to pairs X,Y, where X is an input and
        # Y a desired output.  If the action is to 'test' a learned model
        # then load in the test data and find that x,y pair.
        test_data = tlog.load_small_dataset('inputs/test-%d.exam' % c.num)
        _, (x, y) = test_data.items()[0]
        # ... then compute error rate and print it
        test_batch_fd = {
            tlog.input_placeholder_name(mode): x,
            tlog.target_output_placeholder(mode): y
        }
        print 'test error', 100 * (
            1.0 - session.run(accuracy, feed_dict=test_batch_fd)), '%'

    else:
        assert c.action == 'train'

        # load_big_dataset returns an object which can enumerate
        # mini-batches.  The object holds all the input and output vectors
        # for tensorlog as sparse vectors (ie, it doesn't stream thru them
        # from disk). This is still important from memory usage point of
        # view, however, because we cannot encode these x,y pairs as
        # sparse in tensorflow, since tensorflow doesn't support sparse
        # matrix-sparse matrix product, only dense matrix-sparse matrix
        # product.  So a the big dataset object will convert each
        # minibatch to a dense format on-the-fly before training on it.
        train_data = tlog.load_big_dataset('inputs/train-%d.exam' % c.num)

        t2 = time.time()
        print 'data loading', (t2 - t1), 'sec'

        # finally, run the learner for a fixed number of epochs
        for i in range(c.epochs):
            print 'starting epoch', i + 1, 'of', c.epochs, '...'
            b = 0
            for _, (x, y) in tlog.minibatches(train_data,
                                              batch_size=c.batch_size):
                train_batch_fd = {
                    tlog.input_placeholder_name(mode): x,
                    tlog.target_output_placeholder_name(mode): y
                }
                session.run(train_step, feed_dict=train_batch_fd)
                print 'finished minibatch', b + 1, 'epoch', i + 1, 'cumulative training time', (
                    time.time() - t2), 'sec'
                b += 1
        t3 = time.time()
        print 'learning', (t3 - t2), 'sec'

        # We have now learned values for all the parameters. This command
        # copies those learned values back into the knowledge
        # graph/database maintained by Tensorlog.
        tlog.set_all_db_params_to_learned_values(session)

        # Finally, write the whole knowledge graph, including the learned
        # parameters, out to disk in a compact format, which can be read
        # back in when we use the 'test' action
        tlog.serialize_db('learned-model.db')
        print 'wrote learned model to learned-model.db'
Пример #15
0
 def testMinibatch(self):
   tlog = simple.Compiler(
       db=os.path.join(testtensorlog.TEST_DATA_DIR,"textcattoy3.cfacts"),
       prog=os.path.join(testtensorlog.TEST_DATA_DIR,"textcat3.ppr"))
   self.runTextCatLearner(tlog)
Пример #16
0
    mainOptlist, mainArgs = getopt.getopt(sys.argv[1:], 'x', mainArgspec)
    mainOptdict = dict(mainOptlist)

    c = OptHolder()
    c.kb_version = mainOptdict.get('--kb_version', 'typed-small')
    c.epochs = int(mainOptdict.get('--epochs', '10'))
    c.num_train = int(mainOptdict.get('--num_train', '100'))
    c.num_test = int(mainOptdict.get('--num_test', '200'))
    c.prog_file = mainOptdict.get('--prog_file', 'dialog.ppr')
    for (var_name, value) in c.__dict__.items():
        command_line_opt = '--%s' % var_name
        print '# config:', var_name, '=', value, 'from', command_line_opt, mainOptdict.get(
            command_line_opt)

    # create the simple compiler and load the data
    tlog = simple.Compiler(db='idb-%s.cfacts' % c.kb_version, prog=c.prog_file)
    train_data = tlog.load_dataset('train-%d-corpus.exam' % c.num_train)
    test_data = tlog.load_dataset('test-%d-corpus.exam' % c.num_test)

    # check the data is as expected
    mode = 'answer/io'
    assert len(train_data.keys()) == 1 and mode in train_data
    assert len(test_data.keys()) == 1 and mode in train_data
    TX, TY = train_data[mode]
    UX, UY = test_data[mode]

    # for evaluating performance
    inference = tlog.inference(mode)
    trueY = tf.placeholder(tf.float32, shape=UY.shape, name='tensorlog/trueY')
    prediction_is_correct = tf.equal(tf.argmax(trueY, 1),
                                     tf.argmax(inference, 1))