Пример #1
0
def dnn_concat(data,
               configfile,
               layers=[16, 8],
               activation='tanhlecun',
               initrange=1e-3,
               bn=True,
               keep_prob=.95,
               concat_size=24,
               uembed=32,
               iembed=32,
               learnrate=.00001,
               verbose=True,
               epochs=10,
               maxbadcount=20,
               mb=2000,
               eval_rate=500):

    with tf.name_scope('ant_graph'):
        ant = config.AntGraph(configfile,
                              data=data.dev.features,
                              marker='-',
                              graph_name='dnn_concat',
                              variable_bindings={
                                  'layers': layers,
                                  'activation': activation,
                                  'initrange': initrange,
                                  'bn': bn,
                                  'keep_prob': keep_prob,
                                  'concat_size': concat_size,
                                  'uembed': uembed,
                                  'iembed': iembed,
                              })

    y = ant.tensor_out
    y_ = tf.placeholder("float", [None, None], name='Target')
    ant.placeholderdict[
        'ratings'] = y_  # put the new placeholder in the graph for training
    with tf.name_scope('objective'):
        objective = tf.reduce_sum(tf.square(y_ - y))
    with tf.name_scope('dev_rmse'):
        dev_rmse = tf.sqrt(
            tf.div(tf.reduce_sum(tf.square(y - y_)), data.dev.num_examples))
    with tf.name_scope('training'):
        model = generic_model.Model(objective,
                                    ant.placeholderdict,
                                    mb=mb,
                                    learnrate=learnrate,
                                    verbose=verbose,
                                    maxbadcount=maxbadcount,
                                    epochs=epochs,
                                    evaluate=dev_rmse,
                                    predictions=y,
                                    model_name='res_concat',
                                    make_histograms=False,
                                    save=False,
                                    tensorboard=False)
        model.train(data.train, dev=data.dev, eval_schedule=eval_rate)
    return model
Пример #2
0
def mf(data, configfile, lamb=0.001,
            kfactors=20,
            learnrate=0.01,
            verbose=True,
            epochs=1000,
            maxbadcount=20,
            mb=500,
            initrange=1,
            eval_rate=500,
            random_seed=None,
            develop=False,
            train_dev_eval_factor=3):


    with tf.name_scope('ant_graph'):
        ant = config.AntGraph(configfile,
                              data=data.dev.features,
                              marker='-',
                              graph_name='basic_mf',
                              develop=develop,
                              variable_bindings={'kfactors': kfactors,
                                                 'initrange': initrange})
        y = ant.tensor_out
        y_ = tf.placeholder("float", [None, None], name='Target')
        ant.placeholderdict['ratings'] = y_
        with tf.name_scope('objective'):
            objective = (tf.reduce_sum(tf.square(y_ - y)))
        objective += (lamb*tf.reduce_sum(tf.square(ant.tensordict['huser'])) +
                     lamb*tf.reduce_sum(tf.square(ant.tensordict['hitem'])) +
                     lamb*tf.reduce_sum(tf.square(ant.tensordict['ubias'])) +
                     lamb*tf.reduce_sum(tf.square(ant.tensordict['ibias'])))
        with tf.name_scope('dev_rmse'):
            _rmse = node_ops.rmse(y_, y)
        mae = node_ops.mae(y_, y)
        model = generic_model.Model(objective, ant.placeholderdict,
                                    mb=mb,
                                    learnrate=learnrate,
                                    verbose=verbose,
                                    maxbadcount=maxbadcount,
                                    epochs=epochs,
                                    evaluate=_rmse,
                                    train_evaluate=_rmse,
                                    predictions=y,
                                    model_name='mf',
                                    random_seed=random_seed,
                                    save_tensors={'mae': mae})
        model.train(data.train, dev=data.dev, eval_schedule=eval_rate,train_dev_eval_factor= train_dev_eval_factor)

        return model
Пример #3
0
def tensorfactor(data,
                 configfile,
                 lamb=.001,
                 kfactors=1000,
                 learnrate=0.01,
                 verbose=True,
                 epochs=1000,
                 maxbadcount=20,
                 mb=500,
                 initrange=1,
                 eval_rate=500,
                 random_seed=None):

    with tf.name_scope('ant_graph'):
        ant = config.AntGraph(configfile,
                              data=data.dev.features,
                              marker='-',
                              graph_name='basic_mf',
                              variable_bindings={
                                  'kfactors': kfactors,
                                  'initrange': initrange,
                                  'lamb': lamb
                              })
        y = ant.tensor_out
        y_ = tf.placeholder("float", [None, None], name='Target')
        ant.placeholderdict['ratings'] = y_
        with tf.name_scope('objective'):
            objective = (tf.reduce_sum(tf.square(y_ - y)))
        with tf.name_scope('dev_rmse'):
            dev_rmse = node_ops.rmse(y_, y)
        model = generic_model.Model(objective,
                                    ant.placeholderdict,
                                    mb=mb,
                                    learnrate=learnrate,
                                    verbose=verbose,
                                    maxbadcount=maxbadcount,
                                    epochs=epochs,
                                    evaluate=dev_rmse,
                                    predictions=y,
                                    model_name='mf',
                                    random_seed=random_seed)
        model.train(data.train, dev=data.dev, eval_schedule=200)

        return model
Пример #4
0
def dssm(data,
         configfile,
         layers=[10, 10, 10],
         bn=True,
         keep_prob=.95,
         act='tanhlecun',
         initrange=1,
         kfactors=10,
         lamb=.1,
         mb=500,
         learnrate=0.0001,
         verbose=True,
         maxbadcount=10,
         epochs=100,
         model_name='dssm',
         random_seed=500,
         eval_rate=500):

    datadict = data.user.features.copy()
    datadict.update(data.item.features)

    configdatadict = data.dev.features.copy()
    configdatadict.update(datadict)
    with tf.name_scope('ant_graph'):
        ant = config.AntGraph(configfile,
                              data=configdatadict,
                              marker='-',
                              graph_name='basic_mf',
                              variable_bindings={
                                  'initrange': initrange,
                                  'kfactors': kfactors
                              })
    y_ = tf.placeholder("float", [None, None], name='Target')
    ant.placeholderdict['ratings'] = y_
    with tf.name_scope('objective'):
        if type(ant.tensor_out) is list:
            objective = tf.reduce_sum(tf.square(y_ - ant.tensor_out[0]))
        for i in range(1, len(ant.tensor_out)):
            objective += tf.reduce_sum(tf.square(y_ - ant.tensor_out[i]))
        objective += (
            lamb * tf.reduce_sum(tf.square(ant.tensordict['huser'])) +
            lamb * tf.reduce_sum(tf.square(ant.tensordict['hitem'])) +
            lamb * tf.reduce_sum(tf.square(ant.tensordict['ubias'])) +
            lamb * tf.reduce_sum(tf.square(ant.tensordict['ibias'])))
    with tf.name_scope('dev_rmse'):
        dev_rmse = node_ops.rmse(ant.tensor_out[0], y_)

    model = generic_model.Model(objective,
                                ant.placeholderdict,
                                mb=mb,
                                learnrate=learnrate,
                                verbose=verbose,
                                maxbadcount=maxbadcount,
                                epochs=epochs,
                                evaluate=dev_rmse,
                                predictions=ant.tensor_out[0],
                                model_name='dssm',
                                random_seed=random_seed)

    model.train(data.train,
                dev=data.dev,
                supplement=datadict,
                eval_schedule=eval_rate)
    return model
Пример #5
0
def tree(data,
         configfile,
         lamb=0.001,
         kfactors=20,
         learnrate=0.0001,
         verbose=True,
         maxbadcount=20,
         mb=500,
         initrange=0.00001,
         epochs=10,
         random_seed=None,
         eval_rate=500,
         keep_prob=0.95,
         act='tanh'):

    datadict = data.user.features.copy()
    datadict.update(data.item.features)

    configdatadict = data.dev.features.copy()
    configdatadict.update(datadict)

    with tf.name_scope('ant_graph'):
        ant = config.AntGraph(configfile,
                              data=configdatadict,
                              marker='-',
                              variable_bindings={
                                  'kfactors': kfactors,
                                  'initrange': initrange,
                                  'keep_prob': keep_prob,
                                  'act': act
                              },
                              graph_name='tree')

    y = ant.tensor_out
    y_ = tf.placeholder("float", [None, None], name='Target')
    ant.placeholderdict[
        'ratings'] = y_  # put the new placeholder in the graph for training
    with tf.name_scope('objective'):

        objective = (tf.reduce_sum(tf.square(y_ - y)) +
                     lamb * tf.reduce_sum(tf.square(ant.tensordict['huser'])) +
                     lamb * tf.reduce_sum(tf.square(ant.tensordict['hitem'])) +
                     lamb * tf.reduce_sum(tf.square(ant.tensordict['ubias'])) +
                     lamb * tf.reduce_sum(tf.square(ant.tensordict['ibias'])))
    with tf.name_scope('dev_rmse'):
        dev_rmse = tf.sqrt(
            tf.div(tf.reduce_sum(tf.square(y - y_)), data.dev.num_examples))

    with tf.name_scope('training'):
        model = generic_model.Model(objective,
                                    ant.placeholderdict,
                                    mb=mb,
                                    learnrate=learnrate,
                                    verbose=verbose,
                                    maxbadcount=maxbadcount,
                                    epochs=epochs,
                                    evaluate=dev_rmse,
                                    predictions=y,
                                    model_name='tree')
        model.train(data.train,
                    dev=data.dev,
                    supplement=datadict,
                    eval_schedule=eval_rate)

    return model
Пример #6
0
data.user.features['age'] = loader.center(data.user.features['age'], axis=None)
data.item.features['year'] = loader.center(data.item.features['year'],
                                           axis=None)
data.user.features['age'] = loader.maxnormalize(data.user.features['age'])
data.item.features['year'] = loader.maxnormalize(data.item.features['year'])

datadict = data.user.features.copy()
datadict.update(data.item.features)
configdatadict = data.dev.features.copy()
configdatadict.update(datadict)

with tf.variable_scope('mfgraph'):
    ant = config.AntGraph('tree.config',
                          data=configdatadict,
                          marker='-',
                          variable_bindings={
                              'kfactors': 100,
                              'initrange': 0.0001
                          },
                          develop=False)

y = ant.tensor_out
y_ = tf.placeholder("float", [None, None], name='Target')
ant.placeholderdict[
    'ratings'] = y_  # put the new placeholder in the graph for training
objective = tf.reduce_sum(tf.square(y_ - y))
dev_rmse = node_ops.rmse(y, y_)

model = generic_model.Model(objective,
                            ant.placeholderdict,
                            mb=500,
                            learnrate=0.00001,
Пример #7
0
def mf(data,
       configfile,
       lamb=0.001,
       kfactors=20,
       learnrate=0.01,
       verbose=True,
       epochs=1000,
       maxbadcount=20,
       mb=500,
       initrange=1,
       eval_rate=500,
       random_seed=None,
       develop=False):

    data = loader.read_data_sets(data,
                                 hashlist=['item', 'user', 'ratings'],
                                 folders=['dev', 'train', 'item'])
    data.train.labels['ratings'] = loader.center(data.train.labels['ratings'])
    data.dev.labels['ratings'] = loader.center(data.dev.labels['ratings'])
    with tf.name_scope('ant_graph'):
        ant = config.AntGraph(configfile,
                              data=data.dev.features,
                              marker='-',
                              graph_name='basic_mf',
                              develop=develop,
                              variable_bindings={
                                  'kfactors': kfactors,
                                  'initrange': initrange,
                                  'lamb': lamb
                              })
        print(ant.tensor_out)
        y = node_ops.x_dot_y(ant.tensor_out)
        y_ = tf.placeholder("float", [None, None], name='Target')

        data.item.features['util'] = utility_matrix

        xuser = tf.placeholder(tf.int32, [None])
        xitem = tf.placeholder(tf.int32, [None])

        xutil = tf.placeholder(tf.float32, [None, None])

        wuser = initrange * tf.Variable(
            tf.truncated_normal([data.dev.features['user'].dim, kfactors]))
        witem = initrange * tf.Variable(
            tf.truncated_normal([data.dev.features['item'].dim, kfactors]))
        wplus = initrange * tf.Variable(
            tf.truncated_normal([data.dev.features['item'].dim, kfactors]))

        ubias = initrange * tf.Variable(
            tf.truncated_normal([data.dev.features['user'].dim]))
        ibias = initrange * tf.Variable(
            tf.truncated_normal([data.dev.features['item'].dim]))

        i_bias = tf.nn.embedding_lookup(ibias, xitem)
        u_bias = tf.nn.embedding_lookup(ubias, xuser)

        huser = tf.nn.embedding_lookup(wuser, xuser)
        hitem = tf.nn.embedding_lookup(witem, xitem)
        hplus = tf.nn.embedding_lookup(xutil, xuser)

        plus = tf.mul(
            tf.matmul(hplus, wplus, a_is_sparse=True),
            tf.rsqrt(tf.reduce_sum(hplus, reduction_indices=1,
                                   keep_dims=True)))
        huserplus = huser  #+ plus

        ant.placeholderdict['ratings'] = y_
        with tf.name_scope('objective'):
            objective = (tf.reduce_sum(tf.square(y_ - y)))
        objective += (
            lamb * tf.reduce_sum(tf.square(ant.tensordict['huser'])) +
            lamb * tf.reduce_sum(tf.square(ant.tensordict['hitem'])) +
            lamb * tf.reduce_sum(tf.square(ant.tensordict['ubias'])) +
            lamb * tf.reduce_sum(tf.square(ant.tensordict['ibias'])))
        with tf.name_scope('dev_rmse'):
            dev_rmse = node_ops.rmse(y_, y)
        model = generic_model.Model(objective,
                                    ant.placeholderdict,
                                    mb=mb,
                                    learnrate=learnrate,
                                    verbose=verbose,
                                    maxbadcount=maxbadcount,
                                    epochs=epochs,
                                    evaluate=dev_rmse,
                                    predictions=y,
                                    model_name='mf',
                                    random_seed=random_seed)
        model.train(data.train, dev=data.dev, eval_schedule=eval_rate)

        return model
Пример #8
0
def dsadd(data,
          configfile,
          initrange=0.1,
          kfactors=20,
          lamb=.01,
          mb=500,
          learnrate=0.003,
          verbose=True,
          maxbadcount=10,
          epochs=100,
          model_name='dssm',
          random_seed=500,
          eval_rate=500):

    datadict = data.user.features.copy()
    datadict.update(data.item.features)

    configdatadict = data.dev.features.copy()
    configdatadict.update(datadict)
    with tf.name_scope('ant_graph'):
        ant = config.AntGraph(configfile,
                              data=configdatadict,
                              marker='-',
                              graph_name='basic_mf',
                              variable_bindings={
                                  'initrange': initrange,
                                  'kfactors': kfactors
                              })
    y_ = tf.placeholder("float", [None, None], name='Target')
    ant.placeholderdict['ratings'] = y_
    with tf.name_scope('objective'):
        if type(ant.tensor_out) is list:
            scalars = tf.Variable(
                0.001 * tf.truncated_normal([len(ant.tensor_out), 1]))
            prediction = tf.mul(ant.tensor_out[0],
                                tf.slice(scalars, [0, 0], [1, 1]))
            for i in range(1, len(ant.tensor_out)):
                with tf.variable_scope('predictor%d' % i):
                    prediction = prediction + tf.mul(
                        ant.tensor_out[i], tf.slice(scalars, [i, 0], [1, 1]))
            prediction = tf.square(y_ - prediction)

        objective = (
            tf.reduce_sum(prediction) +
            lamb * tf.reduce_sum(tf.square(ant.tensordict['huser'])) +
            lamb * tf.reduce_sum(tf.square(ant.tensordict['hitem'])) +
            lamb * tf.reduce_sum(tf.square(ant.tensordict['ubias'])) +
            lamb * tf.reduce_sum(tf.square(ant.tensordict['ibias'])) +
            lamb * tf.reduce_sum(tf.square(tf.concat(1, ant.tensor_out))))
    with tf.name_scope('dev_rmse'):
        dev_rmse = tf.sqrt(
            tf.div(tf.reduce_sum(prediction), data.dev.num_examples))

    model = generic_model.Model(objective,
                                ant.placeholderdict,
                                mb=500,
                                learnrate=0.000001,
                                verbose=True,
                                maxbadcount=10,
                                epochs=100,
                                evaluate=dev_rmse,
                                predictions=ant.tensor_out[0],
                                model_name='dssm',
                                random_seed=500)

    model.train(data.train,
                dev=data.dev,
                supplement=datadict,
                eval_schedule=eval_rate)
    return model
Пример #9
0
loader.maybe_download('ml100k.tar.gz', '.',
                      'http://sw.cs.wwu.edu/~tuora/aarontuor/ml100k.tar.gz')
loader.untar('ml100k.tar.gz')
data = loader.read_data_sets('ml100k',
                             folders=['dev', 'train'],
                             hashlist=['item', 'user', 'ratings'])
data.show()
data.train.labels['ratings'] = loader.center(data.train.labels['ratings'])
data.dev.labels['ratings'] = loader.center(data.dev.labels['ratings'])

with tf.variable_scope('mfgraph'):
    ant = config.AntGraph('mf2.config',
                          data=data.dev.features,
                          marker='-',
                          variable_bindings={
                              'kfactors': 100,
                              'initrange': 0.001,
                              'l2': 0.1
                          })

y = ant.tensor_out
y_ = tf.placeholder("float", [None, None], name='Target')
ant.placeholderdict[
    'ratings'] = y_  # put the new placeholder in the graph for training
objective = tf.reduce_sum(tf.square(y_ - y))
dev_rmse = node_ops.rmse(y, y_)
dev_mae = node_ops.mae(y, y_)

model = generic_model.Model(objective,
                            ant.placeholderdict,
                            mb=500,
Пример #10
0
# [-nlayers NUM_HIDDEN_LAYERS]

def deep(data, configfile,
        epochs=10,
        learnrate=0.001
        layers=[10, 10, 10],
        act='tanh',
        opt='grad',
        mb=500,
        type='r')


    with tf.name_scope('ant_graph'):
        ant = config.AntGraph(configfile,
                              data=data.dev.features,
                              marker='-',
                              graph_name='deep',
                              variable_bindings = {'layers': layers,
                                                   'act': act})
    y = ant.tensor_out
    y_ = tf.placeholder("float", [None, None], name='Target')
    ant.placeholderdict['targets'] = y_
    if type == 'r':
        objective = node_ops.mse(y, y_)
    if type == 'c':
        objective = node_ops.accuracy(y, y_)

    with tf.name_scope('objective'):
        objective = (tf.reduce_sum(tf.square(y_ - y)) +
                     lamb*tf.reduce_sum(tf.square(ant.tensordict['huser'])) +
                     lamb*tf.reduce_sum(tf.square(ant.tensordict['hitem'])) +
                     lamb*tf.reduce_sum(tf.square(ant.tensordict['ubias'])) +
Пример #11
0
def tree(data,
         configfile,
         lamb=0.001,
         kfactors=50,
         learnrate=0.00001,
         verbose=True,
         maxbadcount=20,
         mb=500,
         initrange=0.00001,
         epochs=10,
         random_seed=None,
         eval_rate=500,
         keep_prob=0.95,
         act='tanh'):

    datadict = data.user.features.copy()
    datadict.update(data.item.features)

    configdatadict = data.dev.features.copy()
    configdatadict.update(datadict)

    with tf.name_scope('ant_graph'):
        ant = config.AntGraph(configfile,
                              data=configdatadict,
                              marker='-',
                              variable_bindings={
                                  'kfactors': kfactors,
                                  'initrange': initrange,
                                  'keep_prob': keep_prob,
                                  'act': act
                              },
                              graph_name='tree')
    # ant.display_graph()
    alpha = node_ops.weights('tnorm', [1, 1], l2=1.0)
    beta = node_ops.weights('tnorm', [1, 1], l2=1.0)
    print(ant.tensor_out)
    ubias = initrange * tf.Variable(
        tf.truncated_normal([data.dev.features['user'].dim]))
    ibias = initrange * tf.Variable(
        tf.truncated_normal([data.dev.features['item'].dim]))

    i_bias = tf.nn.embedding_lookup(ibias, ant.placeholderdict['item'])
    u_bias = tf.nn.embedding_lookup(ubias, ant.placeholderdict['user'])
    y = alpha * ant.tensor_out[0] + beta * ant.tensor_out[1] + u_bias + i_bias
    y_ = tf.placeholder("float", [None, None], name='Target')
    ant.placeholderdict[
        'ratings'] = y_  # put the new placeholder in the graph for training
    with tf.name_scope('objective'):

        objective = (
            tf.reduce_sum(tf.square(y_ - y)) +
            lamb * tf.reduce_sum(tf.square(ant.tensordict['huser'])) +
            lamb * tf.reduce_sum(tf.square(ant.tensordict['hitem'])) +
            lamb * tf.reduce_sum(tf.square(ant.tensordict['huser2'])) +
            lamb * tf.reduce_sum(tf.square(ant.tensordict['hitem2'])) +
            lamb * tf.reduce_sum(tf.square(u_bias)) +
            lamb * tf.reduce_sum(tf.square(i_bias)))
    with tf.name_scope('dev_rmse'):
        dev_rmse = node_ops.rmse(y_, y)

    with tf.name_scope('training'):
        model = generic_model.Model(objective,
                                    ant.placeholderdict,
                                    mb=mb,
                                    learnrate=learnrate,
                                    verbose=verbose,
                                    maxbadcount=maxbadcount,
                                    epochs=epochs,
                                    evaluate=dev_rmse,
                                    predictions=y,
                                    model_name='tree',
                                    train_evaluate=dev_rmse)
        model.train(data.train,
                    dev=data.dev,
                    supplement=datadict,
                    eval_schedule=eval_rate,
                    train_dev_eval_factor=5)
    return model