示例#1
0
def svd(train, test, learning_rate=0.0005, reg=0.02, dim=50, batch_size=1000):
    samples_per_batch = len(train) // batch_size

    iter_train = dataio.ShuffleIterator([train["user"],
                                         train["item"],
                                         train["rate"]],
                                        batch_size=batch_size)

    iter_test = dataio.OneEpochIterator([test["user"],
                                         test["item"],
                                         test["rate"]],
                                        batch_size=-1)

    user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user")
    item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item")
    rate_batch = tf.placeholder(tf.float32, shape=[None])

    infer, regularizer = ops.inference_svd(user_batch, item_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=dim,
                                           device=DEVICE)
    global_step = tf.contrib.framework.get_or_create_global_step()
    _, train_op = ops.optimization(infer, regularizer, rate_batch, learning_rate=learning_rate, reg=reg, device=DEVICE)

    pid = int(os.getpid())

    init_op = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init_op)
        summary_writer = tf.summary.FileWriter(logdir="/tmp/svd/log", graph=sess.graph)
        print("{} {} {} {}".format("epoch", "train_error", "val_error", "elapsed_time"))
        errors = deque(maxlen=samples_per_batch)
        start = time.time()
        min_test_err = 9999
        for i in range(EPOCH_MAX * samples_per_batch):
            users, items, rates = next(iter_train)
            _, pred_batch = sess.run([train_op, infer], feed_dict={user_batch: users,
                                                                   item_batch: items,
                                                                   rate_batch: rates})
            pred_batch = clip(pred_batch)
            errors.append(np.power(pred_batch - rates, 2))
            if i % samples_per_batch == 0:
                train_err = np.sqrt(np.mean(errors))
                test_err2 = np.array([])
                for users, items, rates in iter_test:
                    pred_batch = sess.run(infer, feed_dict={user_batch: users,
                                                            item_batch: items})
                    pred_batch = clip(pred_batch)
                    test_err2 = np.append(test_err2, np.power(pred_batch - rates, 2))
                end = time.time()
                test_err = np.sqrt(np.mean(test_err2))
                min_test_err = min(test_err, min_test_err)
                print("{:5d} {:3d} {:f} {:f} {:f} {:f}(s)".format(pid, i // samples_per_batch, train_err, test_err, min_test_err,
                                                       end - start))
                train_err_summary = make_scalar_summary("training_error", train_err)
                test_err_summary = make_scalar_summary("test_error", test_err)
                summary_writer.add_summary(train_err_summary, i)
                summary_writer.add_summary(test_err_summary, i)
                start = end
                sys.stdout.flush()
示例#2
0
def svd(train, test):
    samples_per_batch = len(train) // BATCH_SIZE

    iter_train = dataio.ShuffleIterator([train["user"],
                                         train["item"],
                                         train["rate"]],
                                        batch_size=BATCH_SIZE)

    iter_test = dataio.OneEpochIterator([test["user"],
                                         test["item"],
                                         test["rate"]],
                                        batch_size=-1)

    user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user")
    item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item")
    rate_batch = tf.placeholder(tf.float32, shape=[None])

    infer, regularizer = ops.inference_svd(user_batch, item_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=DIM,
                                           device=DEVICE)
    global_step = tf.contrib.framework.get_or_create_global_step()
    _, train_op = ops.optimization(infer, regularizer, rate_batch, learning_rate=0.001, reg=0.05, device=DEVICE)

    init_op = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init_op)
        summary_writer = tf.summary.FileWriter(logdir="/tmp/svd/log", graph=sess.graph)
        print("{} {} {} {}".format("epoch", "train_error", "val_error", "elapsed_time"))
        errors = deque(maxlen=samples_per_batch)
        start = time.time()
        for i in range(EPOCH_MAX * samples_per_batch):
            users, items, rates = next(iter_train)
            _, pred_batch = sess.run([train_op, infer], feed_dict={user_batch: users,
                                                                   item_batch: items,
                                                                   rate_batch: rates})
            pred_batch = clip(pred_batch)
            errors.append(np.power(pred_batch - rates, 2))
            if i % samples_per_batch == 0:
                train_err = np.sqrt(np.mean(errors))
                test_err2 = np.array([])
                for users, items, rates in iter_test:
                    pred_batch = sess.run(infer, feed_dict={user_batch: users,
                                                            item_batch: items})
                    pred_batch = clip(pred_batch)
                    test_err2 = np.append(test_err2, np.power(pred_batch - rates, 2))
                end = time.time()
                test_err = np.sqrt(np.mean(test_err2))
                print("{:3d} {:f} {:f} {:f}(s)".format(i // samples_per_batch, train_err, test_err,
                                                       end - start))
                train_err_summary = make_scalar_summary("training_error", train_err)
                test_err_summary = make_scalar_summary("test_error", test_err)
                summary_writer.add_summary(train_err_summary, i)
                summary_writer.add_summary(test_err_summary, i)
                start = end
示例#3
0
def svd(train, test):
    samples_per_batch = len(train) // BATCH_SIZE

    iter_train = dataio.ShuffleIterator([train["user"],
                                         train["item"],
                                         train["rate"]],
                                        batch_size=BATCH_SIZE)

    iter_test = dataio.OneEpochIterator([test["user"],
                                         test["item"],
                                         test["rate"]],
                                        batch_size=-1)

    user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user")
    item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item")
    rate_batch = tf.placeholder(tf.float32, shape=[None])

    infer, regularizer = ops.inference_svd(user_batch, item_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=DIM,
                                           device=DEVICE)
    _, train_op = ops.optimiaztion(infer, regularizer, rate_batch, learning_rate=0.15, reg=0.05, device=DEVICE)

    init_op = tf.initialize_all_variables()
    with tf.Session() as sess:
        sess.run(init_op)
        print("{} {} {} {}".format("epoch", "train_error", "val_error", "elapsed_time"))
        errors = deque(maxlen=samples_per_batch)
        start = time.time()
        for i in range(EPOCH_MAX * samples_per_batch):
            users, items, rates = next(iter_train)
            _, pred_batch = sess.run([train_op, infer], feed_dict={user_batch: users,
                                                                   item_batch: items,
                                                                   rate_batch: rates})
            pred_batch = clip(pred_batch)
            errors.append(np.power(pred_batch - rates, 2))
            if i % samples_per_batch == 0:
                train_err = np.sqrt(np.mean(errors))
                test_err2 = np.array([])
                for users, items, rates in iter_test:
                    pred_batch = sess.run(infer, feed_dict={user_batch: users,
                                                            item_batch: items})
                    pred_batch = clip(pred_batch)
                    test_err2 = np.append(test_err2, np.power(pred_batch - rates, 2))
                end = time.time()
                print("{:3d} {:f} {:f} {:f}(s)".format(i // samples_per_batch, train_err, np.sqrt(np.mean(test_err2)),
                                                       end - start))
                start = end

        output_graph_def = tf.python.client.graph_util.extract_sub_graph(sess.graph.as_graph_def(),
                                                                         ["svd_inference", "svd_regularizer"])
        tf.train.SummaryWriter(logdir="/tmp/svd", graph_def=output_graph_def)
示例#4
0
def svd(train, test):
    samples_per_batch = len(train) // BATCH_SIZE
    print test.head(10)
    iter_train = dataio.ShuffleIterator(
        [train["user"], train["days_since_prior_order"], train["basket_size"]],
        batch_size=BATCH_SIZE)

    iter_test = dataio.OneEpochIterator(
        [test["user"], test["days_since_prior_order"], test["basket_size"]],
        batch_size=-1)

    user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user")
    days_since_prior_order_batch = tf.placeholder(
        tf.int32, shape=[None], name="id_days_since_prior_order")
    basket_size_batch = tf.placeholder(tf.float32, shape=[None])

    infer, regularizer = ops.inference_svd(user_batch,
                                           days_since_prior_order_batch,
                                           user_num=USER_NUM,
                                           item_num=ITEM_NUM,
                                           dim=DIM,
                                           device=DEVICE)
    global_step = tf.contrib.framework.get_or_create_global_step()
    _, train_op = ops.optimization(infer,
                                   regularizer,
                                   basket_size_batch,
                                   learning_rate=0.001,
                                   reg=0.05,
                                   device=DEVICE)

    init_op = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init_op)
        summary_writer = tf.summary.FileWriter(logdir="/tmp/svd/log",
                                               graph=sess.graph)
        print("{} {} {} {}".format("epoch", "train_error", "val_error",
                                   "elapsed_time"))
        errors = deque(maxlen=samples_per_batch)
        start = time.time()
        min = 100
        predList = []
        actList = []
        finalPred = []
        finalAct = []
        finalpr = []
        finalac = []
        for i in range(EPOCH_MAX * samples_per_batch):

            users, days_since_prior_orders, basket_sizes = next(iter_train)
            _, pred_batch = sess.run(
                [train_op, infer],
                feed_dict={
                    user_batch: users,
                    days_since_prior_order_batch: days_since_prior_orders,
                    basket_size_batch: basket_sizes
                })
            pred_batch = clip(pred_batch)
            errors.append(np.power(pred_batch - basket_sizes, 2))
            if i % samples_per_batch == 0:
                train_err = np.sqrt(np.mean(errors))
                test_err2 = np.array([])
                for users, days_since_prior_orders, basket_sizes in iter_test:
                    pred_batch = sess.run(infer,
                                          feed_dict={
                                              user_batch:
                                              users,
                                              days_since_prior_order_batch:
                                              days_since_prior_orders
                                          })
                    #pred_batch = clip(pred_batch)
                    test_err2 = np.append(
                        test_err2, np.power(pred_batch - basket_sizes, 2))

                    pr = pred_batch
                    ac = basket_sizes
                end = time.time()
                test_err = np.sqrt(np.mean(test_err2))
                print("{:3d} {:f} {:f} {:f}(s)".format(i // samples_per_batch,
                                                       train_err, test_err,
                                                       end - start))
                train_err_summary = make_scalar_summary(
                    "training_error", train_err)
                test_err_summary = make_scalar_summary("test_error", test_err)
                summary_writer.add_summary(train_err_summary, i)
                summary_writer.add_summary(test_err_summary, i)
                start = end

                if train_err < min:
                    min = train_err
                    finalpr = pr
                    finalac = ac

        return finalpr, finalac

LEARNING_RATE = 5 * 1e-3
# LEARNING_RATE = 0.1
EPOCH_MAX = 100
LAMBDA_REG = 0.1
LOG_STEP = 101


user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user")
item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item")
rate_batch = tf.placeholder(tf.float32, shape=[None])
wins_batch = tf.placeholder(tf.float32, shape=[None], name="nb_wins")
fails_batch = tf.placeholder(tf.float32, shape=[None], name="nb_fails")

infer, logits, logits_cdf, logits_pdf, regularizer, user_bias, user_features, item_bias, item_features, thresholds = ops.inference_svd(user_batch, item_batch, wins_batch, fails_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=DIM, device=DEVICE)
global_step = tf.train.get_or_create_global_step()
# Attention: only var_list = embd_user, bias_user
cost, auc, update_op, train_op = ops.optimization(infer, logits, logits_cdf, logits_pdf, regularizer, rate_batch, learning_rate=LEARNING_RATE, reg=LAMBDA_REG, device=DEVICE, var_list=[user_bias, user_features])

df_train, _, df_test = dataio.get_data()

saver = tf.train.Saver()

with tf.Session() as sess:
    saver.restore(sess, os.path.join(BASE_DIR, "fm.ckpt"))

    all_user_features = sess.run(user_features, feed_dict={user_batch: range(USER_NUM)})
    all_user_features_norms = np.diag(all_user_features.dot(all_user_features.T))
    all_user_bias = sess.run(user_bias, feed_dict={user_batch: range(USER_NUM)})
    # print('all_features', all_user_features.min(), 'to', all_user_features.max())
示例#6
0
def svd(train, test, length, moviefile, trainFl=False):
    print("Movies file length:")
    print(len(moviefile))
    samples_per_batch = len(train) // BATCH_SIZE

    iter_train = dataio.ShuffleIterator(
        [train["user"], train["item"], train["rate"]], batch_size=BATCH_SIZE)

    iter_test = dataio.OneEpochIterator(
        [test["user"], test["item"], test["rate"]], batch_size=-1)

    user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user")
    item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item")
    rate_batch = tf.placeholder(tf.float32, shape=[None])

    infer, regularizer = ops.inference_svd(user_batch,
                                           item_batch,
                                           user_num=USER_NUM,
                                           item_num=ITEM_NUM,
                                           dim=DIM,
                                           device=DEVICE)
    global_step = tf.contrib.framework.get_or_create_global_step()
    _, train_op = ops.optimization(infer,
                                   regularizer,
                                   rate_batch,
                                   learning_rate=0.001,
                                   reg=0.05,
                                   device=DEVICE)
    #zeros= tf.Variable(tf.zeros([1]),name="zeros")

    init_op = tf.global_variables_initializer()
    saver = tf.train.Saver()
    with tf.Session() as sess:
        sess.run(init_op)
        summary_writer = tf.summary.FileWriter(logdir="./tmp/svd/log",
                                               graph=sess.graph)
        print("{} {} {} {}".format("epoch", "train_error", "val_error",
                                   "elapsed_time"))
        errors = deque(maxlen=samples_per_batch)
        start = time.time()

        if trainFl == True:
            for i in range(EPOCH_MAX * samples_per_batch):
                users, items, rates = next(iter_train)
                _, pred_batch = sess.run([train_op, infer],
                                         feed_dict={
                                             user_batch: users,
                                             item_batch: items,
                                             rate_batch: rates
                                         })
                pred_batch = clip(pred_batch)
                errors.append(np.power(pred_batch - rates, 2))
                if i % samples_per_batch == 0:
                    train_err = np.sqrt(np.mean(errors))
                    test_err2 = np.array([])
                    for users, items, rates in iter_test:
                        pred_batch = sess.run(infer,
                                              feed_dict={
                                                  user_batch: users,
                                                  item_batch: items
                                              })
                        pred_batch = clip(pred_batch)
                        test_err2 = np.append(test_err2,
                                              np.power(pred_batch - rates, 2))
                    end = time.time()
                    test_err = np.sqrt(np.mean(test_err2))
                    print("{:3d} {:f} {:f} {:f}(s)".format(
                        i // samples_per_batch, train_err, test_err,
                        end - start))
                    train_err_summary = make_scalar_summary(
                        "training_error", train_err)
                    test_err_summary = make_scalar_summary(
                        "test_error", test_err)
                    summary_writer.add_summary(train_err_summary, i)
                    summary_writer.add_summary(test_err_summary, i)
                    start = end

            #meta_graph_def = tf.train.export_meta_graph(filename='/tmp/tfrecomm.meta')
            save_path = saver.save(sess, "./tmp/")
        else:
            saver.restore(sess, "./tmp/")

        # print("Model saved in file: %s" % save_path)
        # sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        # # Bind the socket to the port
        # server_address = ('0.0.0.0', 81)
        # print >>sys.stderr, 'starting up on %s port %s' % server_address
        # sock.bind(server_address)
        # sock.listen(1)

        movies = list(range(len(moviefile)))
        # print (movies)
        users = [1]
        pred_batch = sess.run(infer,
                              feed_dict={
                                  user_batch: users,
                                  item_batch: movies
                              })

        moviesrecomm = list(zip(movies, pred_batch))
        smovies = sorted(moviesrecomm, key=lambda x: x[1], reverse=True)

        print(
            " Top Movies ------------------------------------------------------------"
        )

        topmovies = smovies[0:10]
        print(topmovies)

        # give number between 1 - 5000
        data = 3
        del users[:]
        users.append(int(data))
        print(users)
        pred_batch = sess.run(infer,
                              feed_dict={
                                  user_batch: users,
                                  item_batch: movies
                              })
        moviesrecomm = list(zip(movies, pred_batch))
        smovies = sorted(moviesrecomm, key=lambda x: x[1], reverse=True)
        topmovies = smovies[0:10]
        print(topmovies)
        for item in topmovies:
            itopmovie = item[0]
            recommendedmovie = moviefile["title"][itopmovie]
            recommendedtags = moviefile["tags"][itopmovie]
            #print >>sys.stderr, 'sending data back to the client'
            # connection.sendall(recommendedmovie+":"+recommendedtags+"\n")
            #print >>sys.stderr, 'Sent data'
        return

        while True:
            # Wait for a connection
            print >> sys.stderr, 'waiting for a connection'
            connection, client_address = sock.accept()
            try:
                print >> sys.stderr, 'connection from', client_address
                # Receive the data in small chunks and retransmit it
                while True:
                    data = connection.recv(16)
                    print >> sys.stderr, 'received "%s"' % data
                    if data:
                        del users[:]
                        try:
                            user = int(data)
                        except:
                            break
                        users.append(int(data))
                        print(users)
                        pred_batch = sess.run(infer,
                                              feed_dict={
                                                  user_batch: users,
                                                  item_batch: movies
                                              })
                        moviesrecomm = list(zip(movies, pred_batch))
                        smovies = sorted(moviesrecomm,
                                         key=lambda x: x[1],
                                         reverse=True)
                        topmovies = smovies[0:10]
                        print(topmovies)
                        for item in topmovies:
                            itopmovie = item[0]
                            recommendedmovie = moviefile["title"][itopmovie]
                            recommendedtags = moviefile["tags"][itopmovie]
                            #print >>sys.stderr, 'sending data back to the client'
                            connection.sendall(recommendedmovie + ":" +
                                               recommendedtags + "\n")
                            #print >>sys.stderr, 'Sent data'
                    else:
                        print >> sys.stderr, 'no more data from', client_address
                        break
            finally:
                connection.close()
示例#7
0
def svd(train, test):
    samples_per_batch = len(train) // batch_size
    iter_train = data.ShuffleIterator(
        [train["user"], train["item"], train["rate"]], batch_size=batch_size)

    iter_test = data.OneEpochIterator(
        [test["user"], test["item"], test["rate"]], batch_size=-1)

    user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user")
    item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item")
    rate_batch = tf.placeholder(tf.int32, shape=[None])

    infer, regularizer = ops.inference_svd(user_batch,
                                           item_batch,
                                           user_num=user_num,
                                           item_num=item_num,
                                           dim=dim,
                                           device=device)
    _, train_op = ops.optimization(infer,
                                   regularizer,
                                   rate_batch,
                                   learning_rate=0.001,
                                   reg=0.05,
                                   device=device)

    init_op = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init_op)
        summary_writer = tf.summary.FileWriter(logdir="/tmp/svd/log",
                                               graph=sess.graph)
        print("{} {} {} {}".format("epoch", "train_error", "val_error",
                                   "elapsed_time"))
        errors = deque(maxlen=samples_per_batch)
        start = time.time()
        for i in range(epoch_max * samples_per_batch):
            users, items, rates = next(iter_train)
            _, pre_batch = sess.run([train_op, infer],
                                    feed_dict={
                                        user_batch: users,
                                        item_batch: items,
                                        rate_batch: rates
                                    })

            pred_barch = clip(pred_barch)
            errors.append(np.power(pre_batch - rates, 200))
            if i % samples_per_batch == 0:
                train_err = np.sqrt(np.mean(errors))
                test_err2 = np.array([])
                for users, items, rates in iter_test:
                    pred_barch = sess.run(infer,
                                          feed_dict={
                                              user_batch: user,
                                              item_batch: items
                                          })
                    pred_barch = clip(pred_barch)
                    test_err2 = np.append(test_err2,
                                          np.power(pred_barch - rates, 2))
                end = time.time()
                test_err = np.sqrt(np.mean(test_err2))
                print("{:3d} {:f} {:f} {:f}(s)".format(i // samples_per_batch,
                                                       train_err, test_err,
                                                       end - start))
                train_err_summary = make_scalar_summary(
                    "training_error", train_err)
                test_err_summary = make_scalar_summary("test_error", test_err)
                summary_writer.add_summary(train_err_summary, i)
                summary_writer.add_summary(test_err_summary, i)
                start = end
示例#8
0
def svd(train, test, total):
    samples_per_batch = len(train) // BATCH_SIZE

    iter_train = dataio.ShuffleIterator(
        [train["user"], train["item"], train["rate"]], batch_size=BATCH_SIZE)

    iter_test = dataio.OneEpochIterator(
        [test["user"], test["item"], test["rate"]], batch_size=-1)

    iter_totaltest = dataio.OneEpochIterator(
        [total["user"], total["item"], total["rate"]], batch_size=-1)

    user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user")
    item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item")
    rate_batch = tf.placeholder(tf.float32, shape=[None])

    infer, regularizer = ops.inference_svd(user_batch,
                                           item_batch,
                                           user_num=USER_NUM,
                                           item_num=ITEM_NUM,
                                           dim=DIM,
                                           device=DEVICE)
    global_step = tf.contrib.framework.get_or_create_global_step()
    _, train_op = ops.optimization(infer,
                                   regularizer,
                                   rate_batch,
                                   learning_rate=0.001,
                                   reg=0.05,
                                   device=DEVICE)

    init_op = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init_op)
        summary_writer = tf.summary.FileWriter(logdir="/tmp/svd/log",
                                               graph=sess.graph)
        print("{} {} {} {}".format("epoch", "train_error", "val_error",
                                   "elapsed_time"))
        errors = deque(maxlen=samples_per_batch)
        start = time.time()
        for i in range(EPOCH_MAX * samples_per_batch):
            users, items, rates = next(iter_train)
            _, pred_batch = sess.run([train_op, infer],
                                     feed_dict={
                                         user_batch: users,
                                         item_batch: items,
                                         rate_batch: rates
                                     })
            pred_batch = clip(pred_batch)
            errors.append(np.power(pred_batch - rates, 2))
            if i % samples_per_batch == 0:
                train_err = np.sqrt(np.mean(errors))
                test_err2 = np.array([])
                for users, items, rates in iter_test:
                    pred_batch = sess.run(infer,
                                          feed_dict={
                                              user_batch: users,
                                              item_batch: items
                                          })
                    pred_batch = clip(pred_batch)
                    test_err2 = np.append(test_err2,
                                          np.power(pred_batch - rates, 2))
                end = time.time()
                test_err = np.sqrt(np.mean(test_err2))
                print("{:3d} {:f} {:f} {:f}(s)".format(i // samples_per_batch,
                                                       train_err, test_err,
                                                       end - start))
                train_err_summary = make_scalar_summary(
                    "training_error", train_err)
                test_err_summary = make_scalar_summary("test_error", test_err)
                summary_writer.add_summary(train_err_summary, i)
                summary_writer.add_summary(test_err_summary, i)
                start = end
            if i == EPOCH_MAX * samples_per_batch - 1:
                for users, items, rates in iter_totaltest:
                    pred_total = sess.run(infer,
                                          feed_dict={
                                              user_batch: users,
                                              item_batch: items
                                          })
                    #print(users)
                    #print(items)
                    pred_total = clip(pred_total)
                    print(pred_total.shape)
                    print(pred_total)
                    file = open('result.txt', 'a')
                    for j in pred_total:
                        file.write(str(j) + "\n")
iter_train = dataio.ShuffleIterator([df_train["user"],
                                     df_train["item"],
                                    df_train["rate"]],
                                    batch_size=BATCH_SIZE)

iter_test = dataio.OneEpochIterator([df_test["user"],
                                     df_test["item"],
                                    df_test["rate"]],
                                    batch_size=-1)

user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user")
item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item")
rate_batch = tf.placeholder(tf.float32, shape=[None])

infer, regularizer = ops.inference_svd(user_batch, item_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=DIM, device=DEVICE)
global_step = tf.contrib.framework.get_or_create_global_step()
_, train_op = ops.optimization(infer, regularizer, rate_batch, learning_rate=0.001, reg=0.05, device=DEVICE)






def svd(train, test,length,moviefile, trainFl=False):
    init_op = tf.global_variables_initializer()
    saver=tf.train.Saver()
    with tf.Session() as sess:
        sess.run(init_op)
        if trainFl == True: 
            summary_writer = tf.summary.FileWriter(logdir="./tmp/svd/log", graph=sess.graph)
示例#10
0
def svd(train, test):
    samples_per_batch = len(train) // BATCH_SIZE

    iter_train = dataio.ShuffleIterator(
        [train["user"], train["item"], train["rate"]], batch_size=BATCH_SIZE)

    iter_test = dataio.OneEpochIterator(
        [test["user"], test["item"], test["rate"]], batch_size=-1)

    user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user")
    item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item")
    rate_batch = tf.placeholder(tf.float32, shape=[None])

    infer, regularizer = ops.inference_svd(user_batch,
                                           item_batch,
                                           user_num=USER_NUM,
                                           item_num=ITEM_NUM,
                                           dim=DIM,
                                           device=DEVICE)
    _, train_op = ops.optimiaztion(infer,
                                   regularizer,
                                   rate_batch,
                                   learning_rate=0.15,
                                   reg=0.05,
                                   device=DEVICE)

    init_op = tf.initialize_all_variables()
    with tf.Session() as sess:
        sess.run(init_op)
        print("{} {} {} {}".format("epoch", "train_error", "val_error",
                                   "elapsed_time"))
        errors = deque(maxlen=samples_per_batch)
        start = time.time()
        for i in range(EPOCH_MAX * samples_per_batch):
            users, items, rates = next(iter_train)
            _, pred_batch = sess.run([train_op, infer],
                                     feed_dict={
                                         user_batch: users,
                                         item_batch: items,
                                         rate_batch: rates
                                     })
            pred_batch = clip(pred_batch)
            errors.append(np.power(pred_batch - rates, 2))
            if i % samples_per_batch == 0:
                train_err = np.sqrt(np.mean(errors))
                test_err2 = np.array([])
                for users, items, rates in iter_test:
                    pred_batch = sess.run(infer,
                                          feed_dict={
                                              user_batch: users,
                                              item_batch: items
                                          })
                    pred_batch = clip(pred_batch)
                    test_err2 = np.append(test_err2,
                                          np.power(pred_batch - rates, 2))
                end = time.time()
                print("{:3d} {:f} {:f} {:f}(s)".format(
                    i // samples_per_batch, train_err,
                    np.sqrt(np.mean(test_err2)), end - start))
                start = end

        output_graph_def = tf.python.framework.graph_util.extract_sub_graph(
            sess.graph.as_graph_def(), ["svd_inference", "svd_regularizer"])
        tf.train.SummaryWriter(logdir="/tmp/svd", graph_def=output_graph_def)
示例#11
0
def svd(X_train, X_test, feedback_u, DIM, LAMBDA):
    'Main SVD code'

    # learning rate
    learning = LR

    # finding the number of batches in train data
    samples_per_batch = len(X_train) // BATCH_SIZE

    # initialize earlys topping parameters
    min_err = 100  # store minimum error
    counter = 0  # count number of times validation error was above minimum

    # build iterator objects for train and validation sets
    iter_train = dataio.ShuffleIterator(
        [X_train["user"], X_train["item"], X_train["rate"]],
        batch_size=BATCH_SIZE)

    iter_val = dataio.OneEpochIterator(
        [X_test["user"], X_test["item"], X_test["rate"]],
        batch_size=BATCH_SIZE)
    '''iter_test = dataio.OneEpochIterator([test["user"],
                                             test["item"],
                                             test["rate"]],
                                            batch_size=BATCH_SIZE)'''

    # start tensorflow with empty graph (needed when calling svd function multiply times i.e kfold validation)
    with tf.Graph().as_default():

        # Define tensor placeholders (tensor objects that you feed into tensor functions)
        user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user")
        item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item")
        rate_batch = tf.placeholder(tf.float32, shape=[None])
        feedback_batch = tf.placeholder(tf.float32, shape=[None, ITEM_NUM])
        feedback_mat = tf.placeholder(tf.float32, shape=[USER_NUM, ITEM_NUM])

        infer, regularizer = ops.inference_svd(user_batch,
                                               item_batch,
                                               feedback_batch,
                                               user_num=USER_NUM,
                                               item_num=ITEM_NUM,
                                               dim=DIM,
                                               device=DEVICE)
        _, train_op = ops.optimiaztion(infer,
                                       regularizer,
                                       rate_batch,
                                       learning_rate=LR,
                                       reg=LAMBDA,
                                       device=DEVICE)

        full_ratings = ops.get_pred(feedback_mat, ITEM_NUM, USER_NUM, DIM,
                                    DEVICE)

        # Initialize all variables function
        init_op = tf.initialize_all_variables()

        # Start the tensorflow session
        with tf.Session() as sess:

            # initialize variables
            sess.run(init_op)

            print("{} {} {} {}".format("epoch", "train_error", "val_error",
                                       "elapsed_time"))
            errors = deque(maxlen=samples_per_batch)

            # Time each epoch
            start = time.time()

            # Iterate through epochs
            for i in range(EPOCH_MAX * samples_per_batch):

                # Generate batch data
                users, items, rates = next(iter_train)
                feedback = feedback_u[users.astype('int'), :]

                # Run the training functions
                _, pred_batch = sess.run(
                    [train_op, infer],
                    feed_dict={
                        user_batch: users,
                        item_batch: items,
                        rate_batch: rates,
                        feedback_batch: feedback
                    })
                pred_batch = clip(pred_batch)
                errors.append(np.power(pred_batch - rates, 2))

                # Do prediction on the validation set
                if i % samples_per_batch == 0:  #end of epoch
                    train_err = np.sqrt(np.mean(errors))  #train rmse
                    test_err2 = np.array([])  # test rmse

                    # predict validation set using iterator
                    for users, items, rates in iter_val:
                        feedback = feedback_u[users.astype('int'), :]
                        pred_batch = sess.run(infer,
                                              feed_dict={
                                                  user_batch: users,
                                                  item_batch: items,
                                                  feedback_batch: feedback
                                              })
                        pred_batch = clip(pred_batch)
                        test_err2 = np.append(test_err2,
                                              np.power(pred_batch - rates, 2))
                    end = time.time()  # end timer

                    # Validation error
                    RMSE_val = np.sqrt(np.mean(test_err2))
                    print("{:3d} {:f} {:f} {:f}(s)".format(
                        i // samples_per_batch, train_err, RMSE_val,
                        end - start))

                    start = end  #reset clock

                    # Early stopping check: update minimum error variable if needed, if it did not minimize any further
                    # beyond 50 steps, stop the training and print error
                    if min_err > RMSE_val:
                        min_err = RMSE_val
                        counter = 0
                        print('Min error updated')
                    else:
                        counter += 1

                    if counter >= 100:
                        break

            # Output log information
            output_graph_def = graph_util.extract_sub_graph(
                sess.graph.as_graph_def(),
                ["svd_inference", "svd_regularizer"])
            tf.train.SummaryWriter(logdir="/tmp/svd",
                                   graph_def=output_graph_def)
            ratings_mat = sess.run(full_ratings,
                                   feed_dict={feedback_mat: feedback_u})

    return min_err, clip(ratings_mat)
示例#12
0
def svd(train, test):

    # 获取训练集的长度
    samples_per_batch = len(train) // BATCH_SIZE

    # 把列数据转变成行数据,然后随机打散
    iter_train = dataio.ShuffleIterator([train["user"],
                                         train["item"],
                                         train["rate"]],
                                        batch_size=BATCH_SIZE)
    print(iter_train)

    iter_test = dataio.OneEpochIterator([test["user"],
                                         test["item"],
                                         test["rate"]],
                                        batch_size=-1)
    print(iter_test)

    user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user")
    item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item")
    rate_batch = tf.placeholder(tf.float32, shape=[None])

    infer, regularizer = ops.inference_svd(user_batch, item_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=DIM,
                                           device=DEVICE)
    global_step = tf.contrib.framework.get_or_create_global_step()
    _, train_op = ops.optimization(infer, regularizer, rate_batch, learning_rate=0.001, reg=0.05, device=DEVICE)

    # 初始化变量
    init_op = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init_op)

        # 定义日志输入环境,需要跟ternsorboard的地址对应上
        summary_writer = tf.summary.FileWriter(logdir="/log", graph=sess.graph)

        print("{} {} {} {}".format("epoch", "train_error", "val_error", "elapsed_time"))
        
        errors = deque(maxlen=samples_per_batch)
        start = time.time()
        for i in range(EPOCH_MAX * samples_per_batch):
            
            users, items, rates = next(iter_train)
            _, pred_batch = sess.run([train_op, infer], feed_dict={user_batch: users,
                                                                   item_batch: items,
                                                                   rate_batch: rates})
            pred_batch = clip(pred_batch)
            # np.power指数,计算平方差
            errors.append(np.power(pred_batch - rates, 2))
            
            # 输出信息
            if i % samples_per_batch == 0:
                train_err = np.sqrt(np.mean(errors))
                test_err2 = np.array([])
                for users, items, rates in iter_test:
                    pred_batch = sess.run(infer, feed_dict={user_batch: users,
                                                            item_batch: items})
                    pred_batch = clip(pred_batch)
                    test_err2 = np.append(test_err2, np.power(pred_batch - rates, 2))
                end = time.time()
                test_err = np.sqrt(np.mean(test_err2))
                print("{:3d} {:f} {:f} {:f}(s)".format(i // samples_per_batch, train_err, test_err,
                                                       end - start))
                train_err_summary = make_scalar_summary("training_error", train_err)
                test_err_summary = make_scalar_summary("test_error", test_err)
                summary_writer.add_summary(train_err_summary, i)
                summary_writer.add_summary(test_err_summary, i)
                start = end
示例#13
0
def svd(train, test):
    nb_batches = len(train) // BATCH_SIZE

    iter_train = dataio.ShuffleIterator([
        train["user"], train["item"], train["outcome"], train["wins"],
        train["fails"]
    ],
                                        batch_size=BATCH_SIZE)

    iter_test = dataio.OneEpochIterator([
        test["user"], test["item"], test["outcome"], test["wins"],
        test["fails"]
    ],
                                        batch_size=-1)

    user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user")
    item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item")
    rate_batch = tf.placeholder(tf.float32, shape=[None])
    wins_batch = tf.placeholder(tf.float32, shape=[None], name="nb_wins")
    fails_batch = tf.placeholder(tf.float32, shape=[None], name="nb_fails")

    # infer, logits, logits_cdf, logits_pdf, regularizer, user_bias, user_features, item_bias, item_features, thresholds = ops.inference_svd(user_batch, item_batch, wins_batch, fails_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=DIM, device=DEVICE)
    infer, logits, regularizer, user_bias, user_features, item_bias, item_features = ops.inference_svd(
        user_batch,
        item_batch,
        wins_batch,
        fails_batch,
        user_num=USER_NUM,
        item_num=ITEM_NUM,
        dim=DIM,
        device=DEVICE)
    global_step = tf.train.get_or_create_global_step()
    #cost_l2, train_op = ops.optimization(infer, regularizer, rate_batch, learning_rate=LEARNING_RATE, reg=LAMBDA_REG, device=DEVICE)
    cost_nll, train_op = ops.optimization(infer,
                                          logits,
                                          regularizer,
                                          rate_batch,
                                          learning_rate=LEARNING_RATE,
                                          reg=LAMBDA_REG,
                                          device=DEVICE)
    #cost, train_op = ops.optimization(infer, logits, logits_cdf, logits_pdf, regularizer, rate_batch, learning_rate=LEARNING_RATE, reg=LAMBDA_REG, device=DEVICE)

    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())
    saver = tf.train.Saver()
    with tf.Session() as sess:
        sess.run(init_op)
        summary_writer = tf.summary.FileWriter(logdir="/tmp/svd/log",
                                               graph=sess.graph)
        print("{} {} {} {}".format("epoch", "train_error", "val_error",
                                   "elapsed_time"))
        train_se = deque(maxlen=nb_batches)
        train_nll = deque(maxlen=nb_batches)
        train_cost = deque(maxlen=nb_batches)
        train_acc = deque(maxlen=nb_batches)
        train_obo = deque(maxlen=nb_batches)
        train_auc = deque(maxlen=nb_batches)
        start = time.time()
        for i in range(EPOCH_MAX * nb_batches):
            train_users, train_items, train_rates, train_wins, train_fails = next(
                iter_train)
            batch_size = len(train_rates)

            _, train_logits, train_infer = sess.run(
                [train_op, logits, infer],
                feed_dict={
                    user_batch: train_users,
                    item_batch: train_items,
                    rate_batch: train_rates,
                    wins_batch: train_wins,
                    fails_batch: train_fails
                })
            #print('values', train_infer[42], train_logits[42], train_logits_cdf[42], ops.sigmoid(train_logits[42]), ops.sigmoid(train_logits_cdf[42]))

            # print(train_logits_cdf[42])
            # print(train_logits_pdf[42])
            # print(train_rates[42])

            if DISCRETE:
                if NB_CLASSES > 2:
                    cost_batch = sess.run(cost,
                                          feed_dict={
                                              rate_batch: train_rates,
                                              item_batch: train_items,
                                              user_batch: train_users,
                                              logits_cdf: train_logits_cdf
                                          })
                    # print(train_users[42])
                    # print(train_items[42])
                    # print(train_logits_pdf[42])
                    # print(train_logits_cdf[42])
                    # print('thr', all_thresholds)
                    # print('infer', train_infer[42])
                    train_cost.append(cost_batch)
                    train_acc.append(train_infer == train_rates)
                    train_obo.append(abs(train_infer - train_rates) <= 1)
                    train_se.append(np.power(train_infer - train_rates, 2))
                else:
                    nll_batch = sess.run(cost_nll,
                                         feed_dict={
                                             rate_batch: train_rates,
                                             logits: train_logits
                                         })
                    proba_batch = ops.sigmoid(train_logits)
                    train_acc.append(np.round(proba_batch) == train_rates)
                    train_auc.append(roc_auc_score(train_rates, proba_batch))
                    train_nll.append(nll_batch)
            else:
                l2_batch = sess.run(cost_l2,
                                    feed_dict={
                                        rate_batch: train_rates,
                                        infer: train_infer
                                    })
                #print('est-ce', np.sum(np.power(train_rates - train_pred_batch, 2)))
                #print('que = ', l2_batch)
                #train_se.append(np.power(l2_batch, 2))
                train_se.append(np.power(train_rates - train_infer, 2))

            if i % nb_batches == 0:
                # Compute test error
                train_rmse = np.sqrt(np.mean(train_se))
                train_macc = np.mean(train_acc)
                train_mobo = np.mean(train_obo)
                train_mauc = np.mean(train_auc)
                train_mnll = np.mean(train_nll) / BATCH_SIZE
                train_mcost = np.mean(train_cost)
                test_se = []
                test_acc = []
                test_obo = []
                test_auc = 0
                test_nll = []
                test_cost = []
                for test_users, test_items, test_rates, test_wins, test_fails in iter_test:
                    test_logits, test_infer = sess.run(
                        [logits, infer],
                        feed_dict={
                            user_batch: test_users,
                            item_batch: test_items,
                            wins_batch: test_wins,
                            fails_batch: test_fails
                        })
                    test_size = len(test_rates)

                    # print(test_logits_cdf[42], test_logits_pdf[42])
                    # print(test_infer[42], test_rates[42])

                    if DISCRETE:
                        if NB_CLASSES > 2:
                            cost_batch = sess.run(cost,
                                                  feed_dict={
                                                      rate_batch: test_rates,
                                                      item_batch: test_items,
                                                      user_batch: test_users
                                                  })
                            #print(cost_batch)
                            test_cost.append(cost_batch)
                            test_acc.append(test_infer == test_rates)
                            test_obo.append(abs(test_infer - test_rates) <= 1)
                            test_se.append(np.power(test_infer - test_rates,
                                                    2))
                        else:
                            #train_cost.append(cost_batch)
                            nll_batch = sess.run(cost_nll,
                                                 feed_dict={
                                                     rate_batch: test_rates,
                                                     logits: test_logits
                                                 })
                            proba_batch = ops.sigmoid(test_logits)
                            test_acc.append(
                                np.round(proba_batch) == test_rates)
                            test_auc = roc_auc_score(test_rates, proba_batch)
                            # print(proba_batch[:5], test_rates[:5], test_auc)
                            test_nll.append(nll_batch)
                    else:
                        l2_batch = sess.run(cost_l2,
                                            feed_dict={
                                                rate_batch: rates,
                                                infer: pred_batch
                                            })
                        test_se.append(np.power(rates - pred_batch, 2))

                end = time.time()
                test_rmse = np.sqrt(np.mean(test_se))
                test_macc = np.mean(test_acc)
                test_mobo = np.mean(test_obo)
                test_mnll = np.mean(test_nll) / len(test)
                test_mcost = np.mean(test_cost)
                if DISCRETE:
                    if NB_CLASSES > 2:
                        print(
                            "{:3d} TRAIN(size={:d}/{:d}, macc={:f}, mobo={:f}, rmse={:f}, mcost={:f}) TEST(size={:d}, macc={:f}, mobo={:f}, rmse={:f}, mcost={:f}) {:f}(s)"
                            .format(i // nb_batches, len(train_users),
                                    len(train), train_macc,
                                    train_mobo, train_rmse, train_mcost,
                                    len(test), test_macc, test_mobo, test_rmse,
                                    test_mcost, end - start))
                    else:
                        print(
                            "{:3d} TRAIN(size={:d}/{:d}, macc={:f}, mauc={:f}, mnll={:f}) TEST(size={:d}, macc={:f}, auc={:f}, mnll={:f}) {:f}(s)"
                            .format(
                                i // nb_batches,
                                len(train_users),
                                len(train),
                                #train_rmse, # rmse={:f}
                                train_macc,
                                train_mauc,
                                train_mnll,
                                len(test),
                                #test_rmse, # rmse={:f}
                                test_macc,
                                test_auc,
                                test_mnll,
                                end - start))
                else:
                    print(
                        "{:3d} TRAIN(size={:d}/{:d}, rmse={:f}) TEST(size={:d}, rmse={:f}) {:f}(s)"
                        .format(
                            i // nb_batches,
                            len(train_users),
                            len(train),
                            train_rmse,  # rmse={:f} 
                            #train_macc, train_mauc, train_mnll,
                            len(test),
                            test_rmse,  # rmse={:f} 
                            #test_macc, test_mauc, test_mnll,
                            end - start))
                train_err_summary = make_scalar_summary(
                    "training_error", train_rmse)
                test_err_summary = make_scalar_summary("test_error", test_rmse)
                summary_writer.add_summary(train_err_summary, i)
                summary_writer.add_summary(test_err_summary, i)
                start = end
        # print('thr', all_thresholds)

        # Save model
        print(os.path.join(BASE_DIR, 'fm.ckpt'))
        saver.save(sess, os.path.join(BASE_DIR, 'fm.ckpt'))
示例#14
0
def svd_with_pipe(samples_per_batch):
    trainfilequeue = tf.train.string_input_producer(
        ["/tmp/movielens/ml-1m/ratings.dat"], num_epochs=None, shuffle=False)
    testfilequeue = tf.train.string_input_producer(
        ["/tmp/movielens/ml-1m/ratings.dat"], num_epochs=None, shuffle=False)
    reader = tf.TextLineReader()
    user_batch, item_batch, rate_batch = shuffleInputPipeline(
        trainfilequeue, reader, BATCH_SIZE, 10)
    testuser_batch, testitem_batch, testrate_batch = shuffleInputPipeline(
        testfilequeue, reader, BATCH_SIZE, 10)

    infer, regularizer = ops.inference_svd(user_batch,
                                           item_batch,
                                           user_num=USER_NUM,
                                           item_num=ITEM_NUM,
                                           dim=DIM,
                                           device=DEVICE)
    global_step = tf.contrib.framework.get_or_create_global_step()
    _, train_op = ops.optimization(infer,
                                   regularizer,
                                   rate_batch,
                                   learning_rate=0.001,
                                   reg=0.05,
                                   device=DEVICE)

    init_op = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init_op)
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    testusers, testitems, testrates = sess.run(
        [testuser_batch, testitem_batch, testrate_batch])
    errors = deque(maxlen=samples_per_batch)
    print("{} {} {} {}".format("epoch", "train_error", "val_error",
                               "elapsed_time"))
    try:
        for i in range(EPOCH_MAX * samples_per_batch):
            start = time.time()
            users, items, rates = sess.run(
                [user_batch, item_batch, rate_batch])
            _, pred_batch = sess.run([train_op, infer],
                                     feed_dict={
                                         user_batch: users,
                                         item_batch: items,
                                         rate_batch: rates
                                     })
            pred_batch = clip(pred_batch)
            errors.append(np.power(pred_batch - rates, 2))
            if i % samples_per_batch == 0:
                train_err = np.sqrt(np.mean(errors))
                test_err2 = np.array([])

                pred_batch = sess.run(infer,
                                      feed_dict={
                                          user_batch: testusers,
                                          item_batch: testitems,
                                      })
                pred_batch = clip(pred_batch)
                test_err2 = np.append(test_err2,
                                      np.power(pred_batch - testrates, 2))
                end = time.time()
                print("{:3d} {:f} {:f} {:f}(s)".format(
                    i // samples_per_batch, train_err,
                    np.sqrt(np.mean(test_err2)), end - start))
                start = end

    except tf.errors.OutOfRangeError:
        print('Done Training')
    finally:
        coord.request_stop()
    coord.join(threads)
    sess.close()