Пример #1
0
 def _parseLine(line, proppr=True):
     #returns mode, x, positive y's where x and ys are symbols
     if not line.strip() or line[0] == '#':
         return None, None, None
     parts = line.strip().split("\t")
     if not proppr:
         assert len(parts) >= 2, 'bad line: %r parts %r' % (line, parts)
         return declare.asMode(parts[0] + "/io"), parts[1], parts[2:]
     else:
         regex = re.compile('(\w+)\((\w+),(\w+)\)')
         mx = regex.search(parts[0])
         if not mx:
             return None, None, None
         else:
             mode = declare.asMode(mx.group(1) + "/io")
             x = mx.group(2)
             pos = []
             for ans in parts[1:]:
                 label = ans[0]
                 my = regex.search(ans[1:])
                 assert my, 'problem at line ' + line
                 assert my.group(
                     1
                 ) == mode.functor, 'mismatched modes %s %s at line %s' % (
                     my.group(1), mode, line)
                 assert my.group(2) == x, 'mismatched x\'s at line ' + line
                 if label == '+':
                     pos.append(my.group(3))
             return mode, x, pos
Пример #2
0
    def ensureCompiled(self, mode, inputs=None):
        """Compile a tensorlog function to target language, and cache the
    result.  Returns the canonical name of the mode (which can be a
    string produced by a declare.ModeDeclaration) that points to the
    compiled workspace.

    Inputs can be used to specify the input placeholders for the
    inference and loss functions.
    """

        if isinstance(mode, str): mode = declare.asMode(mode)
        assert isinstance(mode,
                          declare.ModeDeclaration), 'invalid mode %r' % mode

        if mode not in self._wsDict:
            self.ws = self._wsDict[mode] = Workspace(self)
            startTime = time.time()

            def status(msg):
                logging.info('%s time %.3f sec mem %.3f Gb' %
                             (msg, time.time() - startTime, util.memusage()))

            status('calling compile')
            fun = self.ws.tensorlogFun = self.prog.compile(mode)
            status('tensorlog compilation complete')
            self._doCompile(fun, mode, inputs)
            status('tensorlog->tensorflow compilation complete')
        return mode
Пример #3
0
def setup(optdict, settings):
    # prog is shortcut to the output optdict, for convenience.
    prog = optdict['prog']

    # the weight vector is sparse - just the constants in the unary predicate rule
    prog.setRuleWeights(prog.db.vector(declare.asMode("rule(i)")))

    # set the max recursion depth
    prog.maxDepth = settings['maxDepth']

    # be verbose
    # funs.conf.trace = True

    # use a non-default learner, overriding the tracing function,
    # number of epochs, and regularizer
    learner = plearn.ParallelFixedRateGDLearner(
        prog,
        epochs=settings['epochs'],
        parallel=settings['para'],
        rate=settings['rate'],
        miniBatchSize=settings['batch'],
        regularizer=learn.L2Regularizer())

    #learner = learn.FixedRateGDLearner(
    #    prog,epochs=epochs,regularizer=learn.L2Regularizer())

    #learner = learn.FixedRateSGDLearner(
    #    prog,epochs=epochs,regularizer=learn.L2Regularizer())

    #    learner = plearn.ParallelAdaGradLearner(
    #        prog,epochs=epochs,parallel=40,regularizer=learn.L2Regularizer())
    return learner
Пример #4
0
 def _listFunction(self, modeSpec):
     mode = declare.asMode(modeSpec)
     key = (mode, 0)
     if key not in self.prog.function:
         self.prog.compile(mode)
     fun = self.prog.function[key]
     print "\n".join(fun.pprint())
Пример #5
0
 def setRuleWeights(self,weights=None,epsilon=1.0,ruleIdPred=None):
     """Set the db predicate 'weighted/1' as a parameter, and initialize it
     to the given vector.  If no vector 'weights' is given, default
     to a constant vector of epsilon for each rule.  'weighted/1'
     is the default parameter used to weight rule-ids features,
     e.g., "r" in p(X,Y):-... {r}.  You can also specify the
     ruleIds with the name of a unary db relation that holds all
     the rule ids.
     """
     if len(self.ruleIds)==0:
         pass
     elif ruleIdPred is not None:
         # TODO check this stuff and add type inference!
         assert (ruleIdPred,1) in self.db.matEncoding,'there is no unary predicate called %s' % ruleIdPred
         self.db.markAsParameter("weighted",1)
         self.db.setParameter("weighted",1,self.db.vector(declare.asMode('%s(o)' % ruleIdPred)) * epsilon)
     else:
         assert self.db.isTypeless(), 'cannot setRuleWeights for db with declared types unless ruleIdPred is given'
         self.db.markAsParameter("weighted",1)
         if weights==None:
             weights = self.db.onehot(self.ruleIds[0])
             for rid in self.ruleIds[1:]:
                 weights = weights + self.db.onehot(rid)
             weights = mutil.mapData(lambda d:np.clip(d,0.0,1.0), weights)
         self.db.setParameter("weighted",1,weights*epsilon)
Пример #6
0
def setExptParams():
  #usage: [targetPredicate] [epochs]
  #get the command-line options for this experiment
  pred = 'hypernym' if len(sys.argv)<=1 else sys.argv[1]
  epochs = 30 if len(sys.argv)<=2 else int(sys.argv[2])
  # use comline.parseCommandLine to set up the program, etc
  optdict,args = comline.parseCommandLine([
      '--logging', 'warn',
      '--db', 'inputs/wnet.db|inputs/wnet.cfacts',
      '--prog','inputs/wnet-learned.ppr', '--proppr',
      '--train','inputs/wnet-train.dset|inputs/wnet-train.exam',
      '--test', 'inputs/wnet-test.dset|inputs/wnet-valid.exam'])

  prog = optdict['prog']
  # the weight vector is sparse - just the constants in the unary predicate rule
  prog.setRuleWeights(prog.db.vector(declare.asMode("rule(i)")))
  targetMode = 'i_%s/io' % pred if pred!='ALL' else None
  learner = plearn.ParallelFixedRateGDLearner(
      prog,epochs=epochs,parallel=40,regularizer=learn.L2Regularizer())
  return {'prog':prog,
          'trainData':optdict['trainData'],
          'testData':optdict['testData'],
          'targetMode':targetMode,
          'savedTestPredictions':'tmp-cache/%s-test.solutions.txt' % pred,
          'savedTrainExamples':'tmp-cache/wnet-train.examples',
          'savedTestExamples':'tmp-cache/wnet-test.examples',
          'learner':learner
    }, epochs
Пример #7
0
    def possibleOps(self, subExpr, typeName=None):
        """If a typeName is specified, then return a (expr,type) pairs, where
    each expression performs one primitive tensorlog operation on the
    subExpr given as input, and type is the name of the type for the
    resulting subExpr.

    If the typeName is NONE,

    """
        # TODO add multiple-input and zero-input operations
        if typeName is None:
            typeName = matrixdb.THING
            assert self.db.isTypeless(
            ), 'if database has types declared, you must specify the type of the input to possibleOps'
        result = []
        for (functor, arity) in self.db.matEncoding:
            if arity == 2:
                mode = declare.asMode("%s(i,o)" % functor)
                if self.db.schema.getDomain(functor, arity) == typeName:
                    op = self._vecMatMulExpr(
                        subExpr, self._matrix(mode, transpose=False))
                    if self.db.isTypeless():
                        result.append(op)
                    else:
                        result.append(
                            (op, self.db.schema.getRange(functor, arity)))
                if self.db.schema.getRange(functor, arity) == typeName:
                    op = self._vecMatMulExpr(
                        subExpr, self._matrix(mode, transpose=True))
                    if self.db.isTypeless():
                        result.append(op)
                    else:
                        result.append(
                            (op, self.db.schema.getDomain(functor, arity)))
        return result
Пример #8
0
    def trainable_db_variables(self, mode, for_optimization=False):
        """Return a list of expressions associated with predicates marked as
    parameters/trainable in the tensorlog database.  If
    for_optimization==True then return the underlying variables that
    are optimized, otherwise return expressions computing values that
    correspond most closely to the parameters.

    Eg, if a weight vector V is reparameterized by passing it through
    an softplus, so V=softplus(V0) is used in the proof_count
    expression, then for_optimization==True will return V0, and
    for_optimization==False will return V.
    """
        if for_optimization:
            return self.xc.getParamVariables(declare.asMode(mode))
        else:
            return self.xc.getParamHandles(declare.asMode(mode))
Пример #9
0
    def __init__(self, initProgram, targetPred, trainData, gradient=False):
        self.rendered = False
        self.sortByValue = conf.sortByValue
        self.prog = initProgram
        self.trainData = trainData
        self.targetPred = targetPred

        #evaluate the function so the outputs are cached
        assert self.targetPred, 'most specify targetPred'
        self.mode = declare.asMode(self.targetPred)
        assert self.trainData.hasMode(
            self.mode), "No mode '%s' in trainData" % self.mode
        self.X = self.trainData.getX(self.mode)
        self.Y = self.trainData.getY(self.mode)
        self.fun = self.prog.getPredictFunction(self.mode)
        self.pad = opfunutil.Scratchpad()
        self.P = self.fun.eval(self.prog.db, [self.X], self.pad)
        # find the symbols that correspond to the inputs
        dd = self.prog.db.matrixAsSymbolDict(self.X)
        self.xSymbols = [d.keys()[0] for d in dd.values()]

        # evaluate the gradient so that's cached
        if gradient:
            learner = learn.OnePredFixedRateGDLearner(
                self.prog, tracer=learn.Tracer.silent)
            self.grad = learner.crossEntropyGrad(self.mode,
                                                 self.X,
                                                 self.Y,
                                                 pad=self.pad)
        else:
            self.grad = None
Пример #10
0
def runMain(saveInPropprFormat=True):
    params = expt.setExptParams()
    prog = params['prog']
    tlog = simple.Compiler(db=prog.db, prog=prog, autoset_db_params=False)
    train_data = tlog.load_small_dataset('inputs/train.examples')
    test_data = tlog.load_small_dataset('inputs/test.examples')

    mode = 'samebib/io'
    TX, TY = train_data[mode]
    UX, UY = test_data[mode]
    loss = tlog.loss(mode)
    optimizer = tf.train.AdagradOptimizer(0.1)
    train_step = optimizer.minimize(loss)
    train_fd = {
        tlog.input_placeholder_name(mode): TX,
        tlog.target_output_placeholder_name(mode): TY
    }
    test_fd = {
        tlog.input_placeholder_name(mode): UX,
        tlog.target_output_placeholder_name(mode): UY
    }

    t0 = time.time()
    session = tf.Session()
    session.run(tf.global_variables_initializer())
    epochs = 30
    for i in range(epochs):
        # progress
        print 'epoch', i + 1, 'of', epochs
        session.run(train_step, feed_dict=train_fd)
    print 'learning time', time.time() - t0, 'sec'

    inference = tlog.inference(mode)
    predicted_y = session.run(inference, feed_dict=test_fd)
    actual_y = tlog.target_output_placeholder(mode)
    correct_predictions = tf.equal(tf.argmax(actual_y, 1),
                                   tf.argmax(predicted_y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_predictions, tf.float32))

    if saveInPropprFormat:
        # save test results in ProPPR format
        from tensorlog import declare
        from tensorlog import dataset
        from tensorlog import expt as tlog_expt
        m = declare.asMode(mode)
        native_test_data = dataset.Dataset({m: tlog.xc._unwrapOutput(UX)},
                                           {m: tlog.xc._unwrapOutput(UY)})
        savedTestExamples = 'tmp-cache/cora-test.examples'
        savedTestPredictions = 'tmp-cache/cora-test.solutions.txt'
        native_test_data.saveProPPRExamples(savedTestExamples, tlog.db)
        tlog_expt.Expt.predictionAsProPPRSolutions(
            savedTestPredictions, 'samebib', tlog.db,
            tlog.xc._unwrapOutput(UX), tlog.xc._unwrapOutput(predicted_y))
        print 'ready for commands like: proppr eval %s %s --metric auc --defaultNeg' % (
            savedTestExamples, savedTestPredictions)

    acc = session.run(accuracy, feed_dict=test_fd)
    print 'test acc', acc
    return acc
Пример #11
0
 def possibleModes(rule):
     # cycle through all possible modes
     f = rule.lhs.functor
     a = rule.lhs.arity
     for k in range(a):
         io = ['i']*a
         io[k] = 'o'
         yield declare.asMode("%s/%s" % (f,"".join(io)))
Пример #12
0
 def debug(self, modeSpec, sym):
     if not DEBUGGER_AVAILABLE:
         logging.warn('debugger is not available in this environment')
         return
     mode = declare.asMode(modeSpec)
     assert self.db.isTypeless(), 'cannot debug a db with declared types'
     X = self.db.onehot(sym)
     dset = dataset.Dataset({mode: X}, {mode: self.db.zeros()})
     debug.Debugger(self.prog, mode, dset, gradient=False).mainloop()
Пример #13
0
 def debugDset(self, modeSpec, test=False):
     if not DEBUGGER_AVAILABLE:
         logging.warn('debugger is not available in this environment')
         return
     assert self.db.isTypeless(), 'cannot debug a db with declared types'
     fullDataset = self.testData if test else self.trainData
     if fullDataset == None:
         print 'train/test dataset is not specified on command line?'
     else:
         mode = declare.asMode(modeSpec)
         dset = fullDataset.extractMode(mode)
         debug.Debugger(self.prog, mode, dset, gradient=True).mainloop()
Пример #14
0
 def define(self,mode,outputFun,outputTypeFun=None):
   """Define the function associated with a mode.  The definition is a
   function f(x), which inputs a subexpression defining the input,
   and the output is an expression which defines the output.
   outputType, if given, is the type of the output.
   """
   m = declare.asMode(mode)
   self.outputFun[m] = outputFun
   self.outputTypeFun[m] = outputTypeFun
   key = (m.functor,m.arity)
   if key not in self.definedFunctorArity:
     self.definedFunctorArity[key] = []
   self.definedFunctorArity[key].append(m)
Пример #15
0
 def eval(self, modeSpec, sym, inputType=None, outputType=None):
     mode = declare.asMode(modeSpec)
     fun = self.prog.getFunction(mode)
     outputType = outputType or fun.outputType
     inputType = inputType or fun.inputTypes[0]
     tmp = self.prog.evalSymbols(mode, [sym], typeName=inputType)
     result = self.prog.db.rowAsSymbolDict(tmp, typeName=outputType)
     if (self.numTopEcho):
         top = sorted(map(lambda (key, val): (val, key), result.items()),
                      reverse=True)
         for rank in range(min(len(top), self.numTopEcho)):
             print '%d\t%g\t%s' % (rank + 1, top[rank][0], top[rank][1])
     return result
Пример #16
0
def runMain():

    (ti, X) = setExptParams()
    start0 = time.time()
    for modeString in [
            "t_stress/io", "t_influences/io", "t_cancer_spont/io",
            "t_cancer_smoke/io"
    ]:
        print 'eval', modeString,
        start = time.time()
        ti.prog.eval(declare.asMode(modeString), [X])
        print 'time', time.time() - start, 'sec'
    tot = time.time() - start0
    print 'total time', tot, 'sec'
    return tot
Пример #17
0
def runTF(tlog):
    dset = tlog.load_small_dataset('inputs/fb15k-valid.examples')
    session = tf.Session()
    session.run(tf.global_variables_initializer())
    t0 = time.time()
    k = 0
    for mode in dset:
        if tlog.prog.findPredDef(declare.asMode(mode)):
            (X, Y) = dset[mode]
            f = tlog.inference(mode)
            session.run(f, feed_dict={tlog.input_placeholder_name(mode): X})
            k += X.shape[0]
    t1 = time.time()
    qps = k / (t1 - t0)
    print 'tlog executes on', k, 'inputs at', qps, 'qps'
    return qps
Пример #18
0
def fbQueries(prog, db):
    queries = []
    ignored = 0
    for line in open("inputs/fb15k-valid.examples"):
        k1 = line.find("(")
        k2 = line.find(",")
        pred = line[:k1]
        x = line[k1 + 1:k2]
        mode = declare.asMode("%s/io" % pred)
        if prog.findPredDef(mode):
            vx = db.onehot(x)
            queries.append((mode, vx))
        else:
            ignored += 1
    print len(queries), "queries loaded", "ignored", ignored
    return queries
Пример #19
0
 def deserialize(dir):
     """Recover a saved dataset."""
     logging.info('deserializing dataset file ' + dir)
     xDict = {}
     yDict = {}
     SIO.loadmat(os.path.join(dir, "xDict"), xDict)
     SIO.loadmat(os.path.join(dir, "yDict"), yDict)
     #serialization converts modes to strings so convert them
     #back.... it also converts matrices to csr
     for d in (xDict, yDict):
         for stringKey, mat in d.items():
             del d[stringKey]
             if not stringKey.startswith('__'):
                 d[declare.asMode(stringKey)] = SS.csr_matrix(mat)
     dset = Dataset(xDict, yDict)
     logging.info('deserialized dataset has %d modes and %d non-zeros' %
                  (len(dset.modesToLearn()), dset.size()))
     return dset
Пример #20
0
 def testTCToyIgnoringTypes(self):
   matrixdb.conf.ignore_types = True
   optdict,args = comline.parseCommandLine(
       ["--db", os.path.join(testtensorlog.TEST_DATA_DIR,"textcattoy3.cfacts"),
        "--prog", os.path.join(testtensorlog.TEST_DATA_DIR,"textcat3.ppr"),
        "--trainData", os.path.join(testtensorlog.TEST_DATA_DIR,"toytrain.exam"),
        "--testData", os.path.join(testtensorlog.TEST_DATA_DIR,"toytest.exam"),
        "--proppr"])
   for compilerClass in [tensorflowxcomp.DenseMatDenseMsgCrossCompiler,
                         tensorflowxcomp.SparseMatDenseMsgCrossCompiler]:
     xc = compilerClass(optdict['prog'])
     xc.runExpt(
         prog=optdict['prog'],
         trainData=optdict['trainData'],
         testData=optdict['testData'],
         targetMode=declare.asMode("predict/io"))
     pbDoc = xc.db.onehot('pb')
     self.checkXC(xc,'predict/io',pbDoc,collections.defaultdict(lambda:191))
     close_cross_compiler(xc)
Пример #21
0
 def _matrix(self, matMode, transpose=False):
     """ Wraps a call to db.matrix()
 """
     # cache an expression for the un-transposed version of the matrix
     assert matMode.arity == 2
     key = (matMode.getFunctor(), 2)
     canonicalMode = declare.asMode("%s(i,o)" % matMode.getFunctor())
     if not key in self._handleExpr:
         assert (
             matMode.functor, 2
         ) in self.db.matEncoding, 'DB does not contain a value for %s' % str(
             matMode)
         variable_name = "M__" + matMode.getFunctor()
         val = self._wrapDBMatrix(self.db.matrix(canonicalMode, False))
         self._insertHandleExpr(key, variable_name, val)
     if self.db.transposeNeeded(matMode, transpose):
         return self._transposeMatrixExpr(self._handleExpr[key])
     else:
         return self._handleExpr[key]
Пример #22
0
 def loadMatrix(db, functorToLearn, functorInDB):
     """Convert a DB matrix containing pairs x,f(x) to training data for a
     learner.  For each row x with non-zero entries, copy that row
     to Y, and and also append a one-hot representation of x to the
     corresponding row of X.
     """
     assert db.isTypeless(
     ), 'cannot run loadMatrix on database with defined types'
     functorToLearn = declare.asMode(functorToLearn)
     xrows = []
     yrows = []
     m = db.matEncoding[(functorInDB, 2)].tocoo()
     n = db.dim()
     for i in range(len(m.data)):
         x = m.row[i]
         xrows.append(SS.csr_matrix(([1.0], ([0], [x])), shape=(1, n)))
         rx = m.getrow(x)
         yrows.append(rx * (1.0 / rx.sum()))
     return Dataset({functorToLearn: mutil.stack(xrows)},
                    {functorToLearn: mutil.stack(yrows)})
Пример #23
0
  def testTCToyTypes(self):
    matrixdb.conf.ignore_types = False
    optdict,args = comline.parseCommandLine(
        ["--db", os.path.join(testtensorlog.TEST_DATA_DIR,"textcattoy3.cfacts"),
         "--prog", os.path.join(testtensorlog.TEST_DATA_DIR,"textcat3.ppr"),
         "--trainData", os.path.join(testtensorlog.TEST_DATA_DIR,"toytrain.exam"),
         "--testData", os.path.join(testtensorlog.TEST_DATA_DIR,"toytest.exam"),
         "--proppr"])
    for compilerClass in [tensorflowxcomp.DenseMatDenseMsgCrossCompiler,
                          tensorflowxcomp.SparseMatDenseMsgCrossCompiler]:
      xc = compilerClass(optdict['prog'])
      xc.runExpt(
          prog=optdict['prog'],
          trainData=optdict['trainData'],
          testData=optdict['testData'],
          targetMode=declare.asMode("predict/io"))

      # check trainability
      for (functor,arity) in xc.db.matEncoding:
        v = xc.parameterFromDBToVariable(functor,arity)
        if v is not None:
          vIsTrainable = (v in tf.trainable_variables())
          vIsParameter = ((functor,arity) in xc.db.paramSet)
          self.assertEqual(vIsTrainable,vIsParameter)

      pbDoc = xc.db.onehot('pb','doc')
      self.checkXC(xc,'predict/io',pbDoc,{'negPair':115,'posPair':115,'hasWord':59,'weighted':115,'label':5})
      # some checks on the output of pprint
      lines = xc.pprint('predict/io')
      self.assertTrue(lines[0].find("SoftMaxFunction") >= 0)
      self.assertTrue(lines[1].find("SumFunction") >= 0)
      self.assertEqual(len(lines), 16)
      # some checks on misc xcomp API
      self.assertEqual(xc.inferenceOutputType('predict/io'),'label')
      pbId = xc.asSymbolId('pb',typeName='doc')
      pbSym = xc.asSymbol(pbId,typeName='doc')
      self.assertEqual(pbSym,'pb')
      self.assertEqual(xc.asSymbolId('this does not appear in the data',typeName='doc'), -1)
      close_cross_compiler(xc)
Пример #24
0
 def _moveFeaturesToRHS(self,rule0):
     rule = parser.Rule(rule0.lhs, rule0.rhs)
     if not rule0.findall:
         #parsed format is {f1,f2,...} but we only support {f1}
         if rule0.features is None:
           logging.warn('this rule has no features: %s' % str(rule))
         else:
           assert len(rule0.features)==1,'multiple constant features not supported'
           assert rule0.features[0].arity==0, '{foo(A,...)} not allowed, use {foo(A,...):true}'
           constFeature = rule0.features[0].functor
           constAsVar = constFeature.upper()
           rule.rhs.append( parser.Goal(bpcompiler.ASSIGN, [constAsVar,constFeature]) )
           rule.rhs.append( parser.Goal('weighted',[constAsVar]) )
           # record the rule name, ie the constant feature
           self.ruleIds.append(constFeature)
     else:
         #format is {foo(F):-...}
         assert len(rule0.features)==1,'feature generators of the form {a,b: ... } not supported'
         featureLHS = rule0.features[0]
         assert featureLHS.arity==1, 'non-constant features must be of the form {foo(X):-...}'
         outputVar = featureLHS.args[0]
         paramName = featureLHS.functor
         for goal in rule0.findall:
             if goal.arity!=0 and goal.functor!='true':
               rule.rhs.append(goal)
         rule.rhs.append( parser.Goal(paramName,[outputVar]) )
         # record the feature predicate 'foo' as a parameter
         if self.db: self.db.markAsParameter(paramName,1)
         if self.db.isTypeless():
             # record the domain of the predicate that will be used as a feature in parameters
             for goal in rule0.findall:
                 if outputVar in goal.args:
                   k = goal.args.index(outputVar)
                   if goal.arity==2:
                       paramMode = declare.asMode("%s/io" % goal.functor) if k==0 else declare.asMode("%s/oi" % goal.functor)
                       self.paramDomains[paramName].append(paramMode)
     return rule
Пример #25
0
  def check_learning_with_udp(self,ruleStrings,plugins):
    db = matrixdb.MatrixDB.loadFile(os.path.join(testtensorlog.TEST_DATA_DIR,"textcattoy3.cfacts"))
    rules = testtensorlog.rules_from_strings(ruleStrings)
    prog = program.ProPPRProgram(rules=rules,db=db,plugins=plugins)
    prog.setAllWeights()
    mode = declare.asMode("predict/io")
    prog.compile(mode)
    fun = prog.function[(mode,0)]
    print "\n".join(fun.pprint())
    tlog = simple.Compiler(db=db, prog=prog)

    trainData = tlog.load_dataset(os.path.join(testtensorlog.TEST_DATA_DIR,"toytrain.exam"))
    testData = tlog.load_dataset(os.path.join(testtensorlog.TEST_DATA_DIR,"toytest.exam"))
    mode = trainData.keys()[0]
    TX,TY = trainData[mode]
    UX,UY = testData[mode]
    inference = tlog.inference(mode)
    trueY = tf.placeholder(tf.float32, shape=UY.shape, name='tensorlog/trueY')
    correct = tf.equal(tf.argmax(trueY,1), tf.argmax(inference,1))
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    test_batch_fd = {tlog.input_placeholder_name(mode):UX, trueY.name:UY}
    loss = tlog.loss(mode)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1)
    train_step = optimizer.minimize(loss)
    train_batch_fd = {tlog.input_placeholder_name(mode):TX, tlog.target_output_placeholder_name(mode):TY}
    session = tf.Session()
    session.run(tf.global_variables_initializer())
    acc0 = session.run(accuracy, feed_dict=test_batch_fd)
    print 'initial accuracy',acc0
    self.assertTrue(acc0<0.6)
    for i in range(10):
      print 'epoch',i+1
      session.run(train_step, feed_dict=train_batch_fd)
    acc1 = session.run(accuracy, feed_dict=test_batch_fd)
    print 'final accuracy',acc1
    self.assertTrue(acc1>=0.9)
    session.close()
Пример #26
0
 def minibatches(self, dataset_obj, batch_size=100, shuffle_first=True):
     """Yields a series of pairs (mode,(X,Y)) where X and Y are a minibatch
 suitable for training the function designated by mode.  Input is
 something returned by load_small_dataset or load_big_dataset.
 """
     if isinstance(dataset_obj, dict):
         dataset_dict = dataset_obj
         x_dict = {}
         y_dict = {}
         for mode_str, (x, y) in dataset_dict.items():
             mode = declare.asMode(mode_str)
             x_dict[mode] = self.xc.unwrapInput(x)
             y_dict[mode] = self.xc.unwrapInput(y)
             dset = dataset.Dataset(x_dict, y_dict)
         for mode, bx, by in dset.minibatchIterator(
                 batchSize=batch_size, shuffleFirst=shuffle_first):
             yield str(mode), (self.xc.wrapInput(bx), self.xc.wrapInput(by))
     elif isinstance(dataset_obj, dataset.Dataset):
         dset = dataset_obj
         for mode, bx, by in dset.minibatchIterator(
                 batchSize=batch_size, shuffleFirst=shuffle_first):
             yield str(mode), (self.xc.wrapInput(bx), self.xc.wrapInput(by))
     else:
         assert False, 'illegal dataset object %r' % dataset_obj
Пример #27
0
            self.populateTree(fun.children(), child)

    def mainloop(self):
        if not self.rendered:
            self.render()
        self.root.mainloop()

if __name__ == "__main__":

    def usage():
        print 'debug.py [usual tensorlog options] mode [inputs]'

    optdict, args = comline.parseCommandLine(sys.argv[1:])
    dset = optdict.get('trainData') or optdict.get('testData')
    if dset == None and len(args) < 2:
        usage()
        print 'debug on what input? specify --trainData or give a function input'
    elif len(args) < 1:
        usage()
    elif dset and len(args) > 2:
        print 'using --trainData not the function input given'
    elif dset:
        mode = declare.asMode(args[0])
        Debugger(optdict['prog'], mode, dset, gradient=True).mainloop()
    else:
        mode = declare.asMode(args[0])
        assert db.isTypeless(), 'cannot debug a database with declared types'
        X = optdict['prog'].db.onehot(args[1])
        dset = dataset.Dataset({mode: X}, {mode: optdict['prog'].db.zeros()})
        Debugger(optdict['prog'], mode, dset, gradient=False).mainloop()
Пример #28
0
    def _run(self,
             prog=None,
             trainData=None,
             testData=None,
             targetMode=None,
             savedTestPredictions=None,
             savedTestExamples=None,
             savedTrainExamples=None,
             savedModel=None,
             learner=None):
        """ Run an experiment.

        The stages are
        - if targetMode is specified, extract just the examples from that mode from trainData and testData
        - evaluate the untrained program on the train and test data and print results
        - train on the trainData
        - if savedModel is given, write the learned database, including the trained parameters,
          to that directory.
        - if savedTestPredictions is given, write the test-data predictions in ProPPR format
        - if savedTestExamples (savedTrainExamples) is given, save the training/test examples in ProPPR format
        """

        if targetMode:
            targetMode = declare.asMode(targetMode)
            trainData = trainData.extractMode(targetMode)
            testData = testData.extractMode(targetMode)

        if not learner: learner = learn.FixedRateGDLearner(prog)

        TP0 = Expt.timeAction('running untrained theory on train data',
                              lambda: learner.datasetPredict(trainData))
        Expt.printStats('untrained theory', 'train', trainData, TP0)
        if testData is not None:
            UP0 = Expt.timeAction('running untrained theory on test data',
                                  lambda: learner.datasetPredict(testData))
            Expt.printStats('untrained theory', 'test', testData, UP0)

        Expt.timeAction('training %s' % type(learner).__name__,
                        lambda: learner.train(trainData))

        TP1 = Expt.timeAction('running trained theory on train data',
                              lambda: learner.datasetPredict(trainData))
        if testData is not None:
            UP1 = Expt.timeAction('running trained theory on test data',
                                  lambda: learner.datasetPredict(testData))

        Expt.printStats('..trained theory', 'train', trainData, TP1)
        if testData is not None:
            testAcc, testXent = Expt.printStats('..trained theory', 'test',
                                                testData, UP1)
        else:
            testAcc, testXent = None, None

        if savedModel:
            Expt.timeAction('saving trained model',
                            lambda: prog.db.serialize(savedModel))

        if savedTestPredictions and testData:
            #todo move this logic to a dataset subroutine
            open(savedTestPredictions, "w").close()  # wipe file first

            def doit():
                qid = 0
                for mode in testData.modesToLearn():
                    qid += Expt.predictionAsProPPRSolutions(
                        savedTestPredictions, mode.functor, prog.db,
                        UP1.getX(mode), UP1.getY(mode), True, qid)

            Expt.timeAction('saving test predictions', doit)

        if savedTestExamples and testData:
            Expt.timeAction(
                'saving test examples', lambda: testData.saveProPPRExamples(
                    savedTestExamples, prog.db))

        if savedTrainExamples:
            Expt.timeAction(
                'saving train examples', lambda: trainData.saveProPPRExamples(
                    savedTrainExamples, prog.db))

        if savedTestPredictions and savedTestExamples and testData:
            print 'ready for commands like: proppr eval %s %s --metric auc --defaultNeg' \
                % (savedTestExamples,savedTestPredictions)

        return testAcc, testXent
Пример #29
0
 def input_placeholder_name(self, mode):
     """ For tensorflow, the name of the placeholder associated with the input to this function.
 """
     assert self.target == 'tensorflow'
     return self.xc.getInputName(declare.asMode(mode))
Пример #30
0
 def target_output_placeholder(self, mode):
     """ For tensorflow, the placeholder associated with the output to this function.
 """
     assert self.target == 'tensorflow'
     return self.xc.getTargetOutputPlaceholder(declare.asMode(mode))