Пример #1
0
def objective(trial):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("#device: ", device)

    if args.tfboard:
        from torch.utils.tensorboard import SummaryWriter

        tf_dir = os.path.join(args.output, "tfboard/")
        if not os.path.isdir(tf_dir):
            os.makedirs(tf_dir)
        tblogger = SummaryWriter(tf_dir)
    else:
        tblogger = None

    # Load Dataset
    dataloaders_dict = get_dataloader(args.input, args.batchsize)

    # Model Definition
    hidden_size = int(
        trial.suggest_discrete_uniform("hidden_size", 128, 512, 128))

    net = CustomDensenet(num_classes=args.n_cls, hidden_size=hidden_size)
    net.to(device)

    for name, param in net.named_parameters():
        param.require_grad = True  # Finetuning

    optimizer = get_optimizer(trial, net)
    criterion = nn.CrossEntropyLoss()

    flooding_level = float(
        trial.suggest_discrete_uniform("flooding_level", 0.00, 0.20, 0.02))

    trainer = SelfTrainer(
        model=net,
        dataloaders_dict=dataloaders_dict,
        criterion=criterion,
        optimizer=optimizer,
        device=device,
        output=args.output,
        tblogger=tblogger,
        flooding=flooding_level,
    )

    ES = EarlyStopping(patience=15, verbose=1)

    best = 100
    for epoch in range(args.epoch):
        print(f"Epoch {epoch}")
        for phase in ["train", "test"]:
            if (epoch == 0) and (phase == "train"):
                continue
            if phase == "train":
                loss, acc = trainer.train(phase, epoch)
            elif phase == "test":
                loss, acc, error_rate = trainer.eval(phase, epoch)

        if error_rate < best:
            best = error_rate
            best_ep = epoch + 1

        if ES.validate(loss):
            print("end loop")
            break

    return best
Пример #2
0
class STAMP:
    def __init__(self, sess, k, configs, tr_x, tr_y, val_x, val_y, te_x, te_y,
                 num_items, init_way, logger):
        self.sess = sess
        self.configs = configs
        self.tr_x = tr_x
        self.tr_y = tr_y
        self.val_x = val_x
        self.val_y = val_y
        self.te_x = te_x
        self.te_y = te_y
        #self.num_items = 37484 #num_items
        self.num_items = num_items  # num_items
        self.logger = logger

        self.rnn_hidden_size = configs.rnn_hidden_size
        self.batch_size = configs.batch_size
        self.num_layers = configs.num_layers

        # Initialize the optimizer
        self.optimizer_type = configs.optimizer_type
        self.weight_decay = configs.weight_decay
        self.momentum = configs.momentum
        self.lr = configs.lr
        self.eps = configs.eps

        self.clip_grad = configs.clip_grad
        self.clip_grad_threshold = configs.clip_grad_threshold
        self.lr_decay_step = configs.lr_decay_step
        self.lr_decay = configs.lr_decay
        self.lr_decay_rate = configs.lr_decay_rate
        self.drop_prob_ho = configs.drop_prob_ho
        self.drop_prob_input = configs.drop_prob_input
        self.drop_prob_recurrent = configs.drop_prob_recurrent

        # etc
        self.k = k
        self.time_sort = configs.time_sort
        self.loss_type = configs.loss_type
        self.n_epochs = configs.n_epochs
        self.is_shuffle = configs.is_shuffle
        self.embedding_size = configs.embedding_size
        self.num_topics = configs.num_topics
        self.early_stop = EarlyStopping(configs.max_patience)

        # batch_iterator
        self.tr_sess_idx = np.arange(len(self.tr_y))
        self.val_sess_idx = np.arange(len(self.val_y))
        self.te_sess_idx = np.arange(len(self.te_y))

        # record best epoch
        self.max_val_recall = [0 for _ in range(len(self.k))]
        self.max_te_recall = [0 for _ in range(len(self.k))]
        self.best_epoch = 0

        tr_lengths = [len(s) for s in self.tr_x]
        val_lengths = [len(s) for s in self.val_x]
        te_lengths = [len(s) for s in self.te_x]
        tr_maxlen = np.max(tr_lengths)
        val_maxlen = np.max(val_lengths)
        te_maxlen = np.max(te_lengths)
        self.maxlen = np.max([tr_maxlen, val_maxlen, te_maxlen])
        self.maxlen = None
        self.embed_init, self.weight_init, self.bias_init, self.gate_bias_init, self.kern_init = init_way

    def run(self):
        self.prepare_model()
        tf.global_variables_initializer().run()
        print("End of model prepare")
        for epoch in range(self.n_epochs):
            start_time = time.time()
            tr_pred_loss = self.train_model()
            val_pred_loss, val_recall_list, val_mrr_list = self.pred_evaluation(
                mode="valid")
            te_pred_loss, te_recall_list, te_mrr_list = self.pred_evaluation(
                mode="test")

            self.best_epoch, best_check = write_log(
                self.logger, epoch, tr_pred_loss, val_pred_loss, te_pred_loss,
                self.k, val_recall_list, val_mrr_list, te_recall_list,
                te_mrr_list, self.max_val_recall, self.max_te_recall,
                self.best_epoch, start_time)
            if self.early_stop.validate(val_recall_list[3]):
                self.logger.info("Training process is stopped early")
                break

    def prepare_model(self):
        self.rnn_x1 = tf.placeholder(tf.int32, [None, self.maxlen],
                                     name='input1')
        self.rnn_x2 = tf.placeholder(tf.int32, [None, 1], name='input2')
        self.rnn_y = tf.placeholder(tf.int64, [None, self.num_items],
                                    name='output')
        self.mask_x1 = tf.placeholder(tf.float32, [None, self.maxlen],
                                      name='mask_x1')  # batch_size * maxlen
        self.mask_x2 = tf.placeholder(tf.float32, [None, 1], name='mask_x2')
        self.keep_prob_input = tf.placeholder(tf.float32,
                                              name='keep_prob_input')
        self.keep_prob_ho = tf.placeholder(tf.float32, name='keep_prob_ho')
        self.batch_var_length = tf.placeholder(tf.float32,
                                               name="variable_length")

        Wemb = tf.get_variable('Wemb', [self.num_items, self.embedding_size],
                               initializer=self.embed_init)
        w0 = tf.get_variable('w0', [self.embedding_size, 1],
                             initializer=self.weight_init)
        w1 = tf.get_variable('w1', [self.embedding_size, self.embedding_size],
                             initializer=self.weight_init)
        w2 = tf.get_variable('w2', [self.embedding_size, self.embedding_size],
                             initializer=self.weight_init)
        w3 = tf.get_variable('w3', [self.embedding_size, self.embedding_size],
                             initializer=self.weight_init)
        ba = tf.get_variable('ba', [self.embedding_size],
                             initializer=self.bias_init)

        if self.loss_type == 'EMB':
            bili = tf.get_variable(
                'bili', [self.embedding_size, 2 * self.rnn_hidden_size],
                initializer=self.weight_init)
        elif self.loss_type == "Trilinear":
            ws = tf.get_variable('ws',
                                 [self.embedding_size, self.embedding_size],
                                 initializer=self.weight_init)
            bs = tf.get_variable('bs', [self.embedding_size],
                                 initializer=self.bias_init)
            wt = tf.get_variable('wt',
                                 [self.embedding_size, self.embedding_size],
                                 initializer=self.weight_init)
            bt = tf.get_variable('bt', [self.embedding_size],
                                 initializer=self.bias_init)
        elif self.loss_type == "TOP1":
            W_top1 = tf.get_variable(
                'W_top1', [2 * self.rnn_hidden_size, self.num_items],
                initializer=self.weight_init)
            b_top1 = tf.get_variable('b_top1', [1, self.num_items],
                                     initializer=self.bias_init)
        elif self.loss_type == "TOP1_variant":
            bili = tf.get_variable(
                'bili', [self.embedding_size, 2 * self.rnn_hidden_size],
                initializer=self.weight_init)
            W_top1 = tf.get_variable(
                'W_top1', [2 * self.rnn_hidden_size, self.num_items],
                initializer=self.weight_init)
            b_top1 = tf.get_variable('b_top1', [1, self.num_items],
                                     initializer=self.bias_init)

        emb_x1 = tf.nn.embedding_lookup(
            Wemb, self.rnn_x1)  # xi (batch_size * maxlen * num_hidden)
        emb_x2 = tf.squeeze(tf.nn.embedding_lookup(Wemb, self.rnn_x2),
                            axis=1)  # xt (batch_size * num_hidden)
        tiled_mask = tf.tile(tf.expand_dims(self.mask_x1, 2),
                             [1, 1, self.rnn_hidden_size
                              ])  # xt (batch_size * maxlen * num_hidden)
        ms = tf.reduce_sum(tf.multiply(emb_x1, tiled_mask),
                           axis=1)  # batch_size * num_hidden
        tiled_var_length = tf.tile(
            tf.reshape(self.batch_var_length, [-1, 1]),
            [1, self.rnn_hidden_size])  # (batch_size * num_hidden)
        ms = tf.reshape(tf.div(ms, tiled_var_length),
                        [-1, self.rnn_hidden_size])  # batch_size * num_hidden

        outputs1 = tf.transpose(emb_x1,
                                perm=[1, 0,
                                      2])  # maxlen * batch_size * num_hidden
        unnormalized_alpha = tf.map_fn(
            lambda x: compute_alpha_STAMP(x, emb_x2, ms, w0, w1, w2, w3, ba),
            outputs1)  # maxlen * batch_size
        unnormalized_alpha = tf.multiply(tf.transpose(unnormalized_alpha),
                                         self.mask_x1)  # batch_size * maxlen
        self.unnormalized_alpha = unnormalized_alpha
        alpha = unnormalized_alpha  # batch_size * maxlen
        #alpha = tf.nn.softmax(unnormalized_alpha + 100000000. * (self.mask_x1 - 1), dim=1)  # batch_size * max_len
        self.alpha = alpha
        tiled_alpha = tf.tile(
            tf.expand_dims(alpha, axis=2),
            [1, 1, self.rnn_hidden_size])  # batch_size * maxlen * hidden_size
        self.tiled_alpha = tiled_alpha
        ma = tf.reduce_sum(tf.multiply(emb_x1, tiled_alpha),
                           axis=1)  # batch * hidden
        hs = tf.nn.tanh(tf.matmul(ma, ws) + bs)  # batch * hidden
        ht = tf.nn.tanh(tf.matmul(emb_x2, wt) + bt)  # batch * hidden

        if self.loss_type == 'EMB':
            proj = tf.concat([hs, ht], 1)
            proj = tf.nn.dropout(proj, self.keep_prob_ho)
            ytem = tf.matmul(Wemb, bili)
            pred = tf.matmul(proj, tf.transpose(ytem))
            self.pred = tf.nn.softmax(pred)
            self.cost = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred,
                                                           labels=self.rnn_y))
        elif self.loss_type == "Trilinear":
            pred = tf.nn.sigmoid(
                tf.matmul(tf.multiply(ht, hs),
                          tf.transpose(Wemb)))  # batch * n_item
            self.pred = tf.nn.softmax(pred)
            self.cost = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred,
                                                           labels=self.rnn_y))
        elif self.loss_type == "TOP1":
            proj = tf.concat([hs, ht], 1)
            proj = tf.nn.dropout(proj, self.keep_prob_ho)
            pred = tf.matmul(proj, W_top1) + b_top1
            self.pred = tf.nn.tanh(pred)
            self.cost = loss_fn(self.rnn_y, self.pred, self.loss_type)
        elif self.loss_type == "TOP1_variant":
            pred = tf.nn.sigmoid(
                tf.matmul(tf.multiply(ht, hs),
                          tf.transpose(Wemb)))  # batch * n_item
            self.pred = tf.nn.tanh(pred)
            self.cost = loss_fn(self.rnn_y, self.pred, self.loss_type)

        self.optimizer = tf.train.AdamOptimizer(self.lr).minimize(self.cost)

    def train_model(self):
        if self.configs.is_shuffle:
            self.tr_sess_idx = np.random.permutation(self.tr_sess_idx)
        batch_loss_list = []
        num_batch = math.ceil(
            np.float32(len(self.tr_sess_idx)) / self.batch_size)
        for batch_itr in range(int(num_batch)):
            start_itr = self.batch_size * batch_itr
            end_itr = np.minimum(self.batch_size * (batch_itr + 1),
                                 len(self.tr_sess_idx))
            temp_batch_x = self.tr_x[self.tr_sess_idx[start_itr:end_itr]]  #
            temp_batch_y = self.tr_y[self.tr_sess_idx[start_itr:end_itr]]  #
            batch_x1, batch_x2, batch_y, mask_x1, mask_x2, labels, lengths = convert_batch_data_stamp(
                temp_batch_x, temp_batch_y, self.num_items, maxlen=self.maxlen)

            temp_keep_prob_ho = 1.0 - self.drop_prob_ho
            temp_keep_prob_input = 1.0 - self.drop_prob_input
            feed_dict = {
                self.rnn_x1: batch_x1,
                self.rnn_x2: batch_x2,
                self.rnn_y: batch_y,
                self.mask_x1: mask_x1,
                self.mask_x2: mask_x2,
                self.keep_prob_input: temp_keep_prob_input,
                self.keep_prob_ho: temp_keep_prob_ho,
                self.batch_var_length: lengths
            }
            _, pred_loss_, preds2 = self.sess.run(
                [self.optimizer, self.cost, self.pred], feed_dict=feed_dict)
            batch_loss_list.append(pred_loss_)

        return np.mean(batch_loss_list)

    def pred_evaluation(self, mode):
        if mode == "valid":
            sess_idx = self.val_sess_idx
            df_x = self.val_x
            df_y = self.val_y
        elif mode == "test":
            sess_idx = self.te_sess_idx
            df_x = self.te_x
            df_y = self.te_y

        batch_loss_list = []
        recalls = []
        mrrs = []
        evaluation_point_count = []
        for itr in range(len(self.k)):
            recalls.append(0)
            mrrs.append(0)
            evaluation_point_count.append(0)
        num_batch = math.ceil(np.float32(len(sess_idx)) / self.batch_size)

        for batch_itr in range(int(num_batch)):
            start_itr = self.batch_size * batch_itr
            end_itr = np.minimum(self.batch_size * (batch_itr + 1),
                                 len(sess_idx))
            temp_batch_x = df_x[sess_idx[start_itr:end_itr]]
            temp_batch_y = df_y[sess_idx[start_itr:end_itr]]
            batch_x1, batch_x2, batch_y, mask_x1, mask_x2, labels, lengths \
                = convert_batch_data_stamp(temp_batch_x,temp_batch_y,self.num_items,maxlen=self.maxlen)

            feed_dict = {
                self.rnn_x1: batch_x1,
                self.rnn_x2: batch_x2,
                self.rnn_y: batch_y,
                self.mask_x1: mask_x1,
                self.mask_x2: mask_x2,
                self.keep_prob_input: 1.0,
                self.keep_prob_ho: 1.0,
                self.batch_var_length: lengths
            }
            preds, pred_loss_ = self.sess.run([self.pred, self.cost],
                                              feed_dict=feed_dict)

            batch_loss_list.append(pred_loss_)

            recalls, mrrs, evaluation_point_count = evaluation(
                labels, preds, recalls, mrrs, evaluation_point_count, self.k)

        recall_list = []
        mrr_list = []
        for itr in range(len(self.k)):
            recall = np.asarray(recalls[itr],
                                dtype=np.float32) / evaluation_point_count[itr]
            mrr = np.asarray(mrrs[itr],
                             dtype=np.float32) / evaluation_point_count[itr]
            if self.max_val_recall[itr] < recall and mode == "valid":
                self.max_val_recall[itr] = recall
            if self.max_te_recall[itr] < recall and mode == "test":
                self.max_te_recall[itr] = recall
            recall_list.append(recall)
            mrr_list.append(mrr)

        return np.mean(batch_loss_list), recall_list, mrr_list
Пример #3
0
class NARM:
    def __init__(self, sess, k, configs, tr_x, tr_y, val_x, val_y, te_x, te_y,
                 num_items, init_way, logger):
        self.sess = sess
        self.configs = configs
        self.tr_x = tr_x
        self.tr_y = tr_y
        self.val_x = val_x
        self.val_y = val_y
        self.te_x = te_x
        self.te_y = te_y
        self.num_items = num_items
        self.logger = logger

        self.rnn_hidden_size = configs.rnn_hidden_size
        self.batch_size = configs.batch_size
        self.num_layers = configs.num_layers

        # Initialize the optimizer
        self.optimizer_type = configs.optimizer_type
        self.weight_decay = configs.weight_decay
        self.momentum = configs.momentum
        self.lr = configs.lr
        self.eps = configs.eps

        self.clip_grad = configs.clip_grad
        self.clip_grad_threshold = configs.clip_grad_threshold
        self.lr_decay_step = configs.lr_decay_step
        self.lr_decay = configs.lr_decay
        self.lr_decay_rate = configs.lr_decay_rate
        self.drop_prob_ho = configs.drop_prob_ho
        self.drop_prob_input = configs.drop_prob_input
        self.drop_prob_recurrent = configs.drop_prob_recurrent

        # etc
        self.k = k
        self.time_sort = configs.time_sort
        self.loss_type = configs.loss_type
        self.n_epochs = configs.n_epochs
        self.is_shuffle = configs.is_shuffle
        self.embedding_size = configs.embedding_size
        self.num_topics = configs.num_topics
        self.early_stop = EarlyStopping(configs.max_patience)

        # batch_iterator
        self.tr_sess_idx = np.arange(len(self.tr_y))
        self.val_sess_idx = np.arange(len(self.val_y))
        self.te_sess_idx = np.arange(len(self.te_y))

        # record best epoch
        self.max_val_recall = [0 for _ in range(len(self.k))]
        self.max_te_recall = [0 for _ in range(len(self.k))]
        self.best_epoch = 0

        tr_lengths = [len(s) for s in self.tr_x]
        val_lengths = [len(s) for s in self.val_x]
        te_lengths = [len(s) for s in self.te_x]
        tr_maxlen = np.max(tr_lengths)
        val_maxlen = np.max(val_lengths)
        te_maxlen = np.max(te_lengths)
        self.maxlen = np.max([tr_maxlen, val_maxlen, te_maxlen])
        self.maxlen = None
        self.embed_init, self.weight_init, self.bias_init, self.gate_bias_init, self.kern_init = init_way

    def run(self):
        self.prepare_model()
        tf.global_variables_initializer().run()
        print("End of model prepare")
        for epoch in range(self.n_epochs):
            start_time = time.time()
            tr_pred_loss = self.train_model()
            val_pred_loss, val_recall_list, val_mrr_list = self.pred_evaluation(
                mode="valid")
            te_pred_loss, te_recall_list, te_mrr_list = self.pred_evaluation(
                mode="test")

            self.best_epoch, best_check = write_log(
                self.logger, epoch, tr_pred_loss, val_pred_loss, te_pred_loss,
                self.k, val_recall_list, val_mrr_list, te_recall_list,
                te_mrr_list, self.max_val_recall, self.max_te_recall,
                self.best_epoch, start_time)
            if self.early_stop.validate(val_recall_list[3]):
                self.logger.info("Training process is stopped early")
                break

    def prepare_model(self):
        self.rnn_x = tf.placeholder(tf.int32, [None, None], name='input')
        self.rnn_y = tf.placeholder(tf.int64, [None, self.num_items],
                                    name='output')
        self.mask = tf.placeholder(tf.float32, [None, None], name='mask')
        self.keep_prob_input = tf.placeholder(tf.float32,
                                              name='keep_prob_input')
        self.keep_prob_ho = tf.placeholder(tf.float32, name='keep_prob_ho')
        self.batch_var_length = tf.placeholder(tf.int32,
                                               name="variable_length")

        Wemb = tf.get_variable('Wemb', [self.num_items, self.embedding_size],
                               initializer=self.embed_init)
        W_encoder = tf.get_variable(
            'W_encoder', [self.rnn_hidden_size, self.rnn_hidden_size],
            initializer=self.weight_init)
        W_decoder = tf.get_variable(
            'W_decoder', [self.rnn_hidden_size, self.rnn_hidden_size],
            initializer=self.weight_init)
        Bi_vector = tf.get_variable('Bi_vector', [1, self.rnn_hidden_size],
                                    initializer=self.weight_init)
        if self.loss_type == 'EMB':
            bili = tf.get_variable(
                'bili', [self.embedding_size, 2 * self.rnn_hidden_size],
                initializer=self.weight_init)
        elif self.loss_type == "Trilinear":
            ws = tf.get_variable('ws',
                                 [self.embedding_size, self.embedding_size],
                                 initializer=self.weight_init)
            bs = tf.get_variable('bs', [self.embedding_size],
                                 initializer=self.bias_init)
            wt = tf.get_variable('wt',
                                 [self.embedding_size, self.embedding_size],
                                 initializer=self.weight_init)
            bt = tf.get_variable('bt', [self.embedding_size],
                                 initializer=self.bias_init)
        elif self.loss_type == "TOP1":
            W_top1 = tf.get_variable(
                'W_top1', [2 * self.rnn_hidden_size, self.num_items],
                initializer=self.weight_init)
            b_top1 = tf.get_variable('b_top1', [1, self.num_items],
                                     initializer=self.bias_init)
        elif self.loss_type == "TOP1_variant":
            bili = tf.get_variable(
                'bili', [self.embedding_size, 2 * self.rnn_hidden_size],
                initializer=self.weight_init)
            W_top1 = tf.get_variable(
                'W_top1', [2 * self.rnn_hidden_size, self.num_items],
                initializer=self.weight_init)
            b_top1 = tf.get_variable('b_top1', [1, self.num_items],
                                     initializer=self.bias_init)

        emb = tf.nn.embedding_lookup(Wemb, self.rnn_x)
        emb = tf.nn.dropout(emb, self.keep_prob_input)

        custom_cell = tf.contrib.rnn.GRUCell(num_units=self.rnn_hidden_size)
        outputs, states = tf.nn.dynamic_rnn(
            custom_cell,
            emb,
            sequence_length=self.batch_var_length,
            dtype=tf.float32)

        self.outputs = outputs
        self.last_hidden = states  # 512 x 100
        outputs = tf.transpose(outputs, perm=[1, 0, 2])  # 19x512x100

        squares = tf.map_fn(lambda x: compute_alpha(
            x, self.last_hidden, W_encoder, W_decoder, Bi_vector),
                            outputs)  # 19x512
        weight = tf.nn.softmax(tf.transpose(squares) + 100000000. *
                               (self.mask - 1),
                               axis=1)  # batch_size * max_len
        attention_proj = tf.reduce_sum(outputs *
                                       tf.transpose(weight)[:, :, None],
                                       axis=0)

        # num_items x 2*100
        if self.loss_type == 'EMB':
            proj = tf.concat([attention_proj, states], 1)
            proj = tf.nn.dropout(proj, self.keep_prob_ho)
            ytem = tf.matmul(Wemb, bili)
            pred = tf.matmul(proj, tf.transpose(ytem))
            self.pred = tf.nn.softmax(pred)
            self.cost = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred,
                                                           labels=self.rnn_y))
        elif self.loss_type == "Trilinear":
            hs = tf.nn.tanh(tf.matmul(attention_proj, ws) +
                            bs)  # batch * hidden
            ht = tf.nn.tanh(tf.matmul(states, wt) + bt)  # batch * hidden
            pred = tf.nn.sigmoid(
                tf.matmul(tf.multiply(ht, hs),
                          tf.transpose(Wemb)))  # batch * n_item
            self.pred = tf.nn.softmax(pred)
            self.cost = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred,
                                                           labels=self.rnn_y))
        elif self.loss_type == "TOP1":
            proj = tf.concat([attention_proj, states], 1)
            proj = tf.nn.dropout(proj, self.keep_prob_ho)
            pred = tf.matmul(proj, W_top1) + b_top1
            self.pred = tf.nn.tanh(pred)
            self.cost = loss_fn(self.rnn_y, self.pred, self.loss_type)
        elif self.loss_type == "TOP1_variant":
            proj = tf.concat([attention_proj, states], 1)
            proj = tf.nn.dropout(proj, self.keep_prob_ho)
            ytem = tf.matmul(Wemb, bili)
            pred = tf.matmul(proj, tf.transpose(ytem))
            self.pred = tf.nn.tanh(pred)
            self.cost = loss_fn(self.rnn_y, self.pred, self.loss_type)

        self.optimizer = tf.train.AdamOptimizer(self.lr).minimize(self.cost)

    def train_model(self):
        if self.configs.is_shuffle:
            self.tr_sess_idx = np.random.permutation(self.tr_sess_idx)
        batch_loss_list = []
        num_batch = math.ceil(
            np.float32(len(self.tr_sess_idx)) / self.batch_size)
        for batch_itr in range(int(num_batch)):
            start_itr = self.batch_size * batch_itr
            end_itr = np.minimum(self.batch_size * (batch_itr + 1),
                                 len(self.tr_sess_idx))
            temp_batch_x = self.tr_x[self.tr_sess_idx[start_itr:end_itr]]
            temp_batch_y = self.tr_y[self.tr_sess_idx[start_itr:end_itr]]
            batch_x, batch_y, mask, labels, lengths = convert_batch_data(
                temp_batch_x, temp_batch_y, self.num_items, maxlen=None)
            temp_keep_prob_ho = 1.0 - self.drop_prob_ho
            temp_keep_prob_input = 1.0 - self.drop_prob_input
            feed_dict = {
                self.rnn_x: batch_x,
                self.rnn_y: batch_y,
                self.mask: mask,
                self.keep_prob_input: temp_keep_prob_input,
                self.keep_prob_ho: temp_keep_prob_ho,
                self.batch_var_length: lengths
            }
            _, pred_loss_, preds2 = self.sess.run(
                [self.optimizer, self.cost, self.pred], feed_dict=feed_dict)
            batch_loss_list.append(pred_loss_)

        return np.mean(batch_loss_list)

    def pred_evaluation(self, mode):
        if mode == "valid":
            sess_idx = self.val_sess_idx
            df_x = self.val_x
            df_y = self.val_y
        elif mode == "test":
            sess_idx = self.te_sess_idx
            df_x = self.te_x
            df_y = self.te_y

        batch_loss_list = []
        recalls = []
        mrrs = []
        evaluation_point_count = []
        for itr in range(len(self.k)):
            recalls.append(0)
            mrrs.append(0)
            evaluation_point_count.append(0)
        num_batch = math.ceil(np.float32(len(sess_idx)) / self.batch_size)
        for batch_itr in range(int(num_batch)):
            start_itr = self.batch_size * batch_itr
            end_itr = np.minimum(self.batch_size * (batch_itr + 1),
                                 len(sess_idx))
            temp_batch_x = df_x[sess_idx[start_itr:end_itr]]
            temp_batch_y = df_y[sess_idx[start_itr:end_itr]]
            batch_x, batch_y, mask, labels, lengths = convert_batch_data(
                temp_batch_x, temp_batch_y, self.num_items, maxlen=None)
            feed_dict = {
                self.rnn_x: batch_x,
                self.rnn_y: batch_y,
                self.mask: mask,
                self.keep_prob_input: 1.0,
                self.keep_prob_ho: 1.0,
                self.batch_var_length: lengths
            }
            preds, pred_loss_ = self.sess.run([self.pred, self.cost],
                                              feed_dict=feed_dict)
            batch_loss_list.append(pred_loss_)

            recalls, mrrs, evaluation_point_count = evaluation(
                labels, preds, recalls, mrrs, evaluation_point_count, self.k)

        recall_list = []
        mrr_list = []
        for itr in range(len(self.k)):
            recall = np.asarray(recalls[itr],
                                dtype=np.float32) / evaluation_point_count[itr]
            mrr = np.asarray(mrrs[itr],
                             dtype=np.float32) / evaluation_point_count[itr]
            if self.max_val_recall[itr] < recall and mode == "valid":
                self.max_val_recall[itr] = recall
            if self.max_te_recall[itr] < recall and mode == "test":
                self.max_te_recall[itr] = recall
            recall_list.append(recall)
            mrr_list.append(mrr)

        return np.mean(batch_loss_list), recall_list, mrr_list
Пример #4
0
class HCRNN:
    def __init__(self, sess, k, configs,tr_x,tr_y,val_x,val_y,te_x,te_y,num_items,init_way,logger):
        self.sess = sess
        self.configs = configs
        self.tr_x = tr_x
        self.tr_y = tr_y
        self.val_x = val_x
        self.val_y = val_y
        self.te_x = te_x
        self.te_y = te_y
        self.num_items = num_items
        self.logger =logger

        self.rnn_hidden_size = configs.rnn_hidden_size
        self.batch_size = configs.batch_size
        self.num_layers = configs.num_layers

        # Initialize the optimizer
        self.optimizer_type = configs.optimizer_type
        self.weight_decay = configs.weight_decay
        self.momentum = configs.momentum
        self.lr = configs.lr
        self.eps = configs.eps

        self.clip_grad = configs.clip_grad
        self.clip_grad_threshold = configs.clip_grad_threshold
        self.lr_decay_step = configs.lr_decay_step
        self.lr_decay = configs.lr_decay
        self.lr_decay_rate = configs.lr_decay_rate
        self.drop_prob_ho = configs.drop_prob_ho
        self.drop_prob_input = configs.drop_prob_input
        self.drop_prob_recurrent = configs.drop_prob_recurrent
        self.reg_lambda = configs.reg_lambda
        self.att_type = configs.att_type
        # etc
        self.k = k
        self.time_sort = configs.time_sort
        self.loss_type = configs.loss_type
        self.n_epochs = configs.n_epochs
        self.is_shuffle = configs.is_shuffle
        self.embedding_size = configs.embedding_size
        self.num_topics = configs.num_topics
        self.early_stop = EarlyStopping(configs.max_patience)

        # batch_iterator
        self.tr_sess_idx = np.arange(len(self.tr_y))
        self.val_sess_idx = np.arange(len(self.val_y))
        self.te_sess_idx = np.arange(len(self.te_y))

        # record best epoch
        self.max_val_recall = [0 for _ in range(len(self.k))]
        self.max_te_recall = [0 for _ in range(len(self.k))]
        self.best_epoch = 0
        self.two_phase_learning = self.configs.two_phase_learning
        tr_lengths = [len(s) for s in self.tr_x]; val_lengths = [len(s) for s in self.val_x]; te_lengths = [len(s) for s in self.te_x]
        tr_maxlen = np.max(tr_lengths); val_maxlen = np.max(val_lengths); te_maxlen = np.max(te_lengths)
        #self.maxlen = np.max([tr_maxlen,val_maxlen,te_maxlen])
        self.maxlen = None
        self.embed_init,self.weight_init,self.bias_init,self.gate_bias_init,self.kern_init = init_way

        self.prepare_model()
        tf.global_variables_initializer().run()
        self.saver = tf.train.Saver(tf.trainable_variables())
        print("End of model prepare")

    def run(self):
        for epoch in range(self.n_epochs):
            start_time = time.time()
            tr_pred_loss = self.train_model()
            val_pred_loss, val_recall_list, val_mrr_list = self.pred_evaluation(mode="valid")
            te_pred_loss, te_recall_list, te_mrr_list = self.pred_evaluation(mode="test")

            self.best_epoch, best_check = write_log(self.logger, epoch, tr_pred_loss, val_pred_loss, te_pred_loss, self.k, val_recall_list, val_mrr_list,
                      te_recall_list, te_mrr_list, self.max_val_recall, self.max_te_recall, self.best_epoch,start_time)
            # if best_check:
            #     if (self.configs.model_name == "HCRNN_v3") and (self.configs.random_seed == 10):
            #         self.saver.save(self.sess, self.save_path + '/model')
            if self.early_stop.validate(val_recall_list[3]):
                self.logger.info("Training process is stopped early")
                break

    def prepare_model(self):
        self.rnn_x = tf.placeholder(tf.int32, [None, self.maxlen], name='input')
        self.rnn_y = tf.placeholder(tf.int32, [None, self.num_items], name='output')
        self.topic_x = tf.placeholder(tf.float32,[None,self.num_items], name='topic_x')
        self.mask = tf.placeholder(tf.float32, [None, None], name='mask')
        self.keep_prob_input = tf.placeholder(tf.float32, name='keep_prob_input')
        self.keep_prob_ho = tf.placeholder(tf.float32, name='keep_prob_ho')
        self.batch_var_length = tf.placeholder(tf.int32, name="variable_length")
        self.is_training = tf.placeholder_with_default(True, shape=())
        real_batch_size = tf.shape(self.rnn_x)[0]
        real_maxlen = tf.shape(self.rnn_x)[1]
        with tf.variable_scope("HCRNN"):
            Wemb = tf.get_variable('Wemb', [self.num_items, self.embedding_size], initializer=self.embed_init)
            self.W_thetatv = tf.get_variable('W_thetatv', (self.num_topics, self.embedding_size), tf.float32,
                                        initializer=self.weight_init)
            if self.att_type == "normal_att":
                W_encoder = tf.get_variable('W_encoder', [self.rnn_hidden_size, self.rnn_hidden_size], initializer=self.weight_init)
                W_decoder = tf.get_variable('W_decoder', [self.rnn_hidden_size, self.rnn_hidden_size], initializer=self.weight_init)
                Bi_vector = tf.get_variable('Bi_vector', [1, self.rnn_hidden_size], initializer=self.weight_init)
                bili = tf.get_variable('bili', [self.embedding_size, 2 * self.rnn_hidden_size], initializer=self.weight_init)
            elif self.att_type == "bi_att":
                W_g1 = tf.get_variable('W_g1', [self.rnn_hidden_size, self.embedding_size], initializer=self.weight_init)
                W_g2 = tf.get_variable('W_g2', [self.rnn_hidden_size, self.embedding_size], initializer=self.weight_init)
                W_l1 = tf.get_variable('W_l1', [self.rnn_hidden_size, self.rnn_hidden_size], initializer=self.weight_init)
                W_l2 = tf.get_variable('W_l2', [self.rnn_hidden_size, self.rnn_hidden_size], initializer=self.weight_init)
                Bi_l_vector = tf.get_variable('Bi_l_vector', [1, self.rnn_hidden_size], initializer=self.weight_init)
                Bi_g_vector = tf.get_variable('Bi_g_vector', [1, self.rnn_hidden_size], initializer=self.weight_init)
                bili = tf.get_variable('bili', [self.embedding_size, 3 * self.rnn_hidden_size],
                                       initializer=self.weight_init)

        ############## Topic Model #########################
        emb_rnn_x = tf.nn.embedding_lookup(Wemb, self.rnn_x)
        emb_topic_x = tf.matmul(self.topic_x, Wemb)
        emb_rnn_x = tf.nn.dropout(emb_rnn_x, self.keep_prob_input) # batch_size * maxlen * hidden
        emb_topic_x = tf.nn.dropout(emb_topic_x, self.keep_prob_input) # batch_size * hidden
        self.theta, mu_theta, std_theta = NSTOPIC(emb_topic_x, self.num_topics, self.embedding_size, self.weight_init, self.bias_init, self.is_training)

        if self.configs.model_name == "HCRNN_v1":
            custom_cell = HCRNN_cell_v1(self.rnn_hidden_size, self.embedding_size,self.num_topics,self.theta,self.W_thetatv,self.weight_init, self.bias_init,
                                            self.gate_bias_init)
        elif self.configs.model_name == "HCRNN_v2":
            custom_cell = HCRNN_cell_v2(self.rnn_hidden_size, self.embedding_size,self.num_topics,self.theta,self.W_thetatv,self.weight_init, self.bias_init,
                                            self.gate_bias_init)
        elif self.configs.model_name == "HCRNN_v3":
            custom_cell = HCRNN_cell_v3(self.rnn_hidden_size, self.embedding_size,self.num_topics,self.theta,self.W_thetatv,self.weight_init, self.bias_init,
                                            self.gate_bias_init)
        outputs, states = tf.nn.dynamic_rnn(cell=custom_cell, inputs=emb_rnn_x, sequence_length=self.batch_var_length,dtype=tf.float32)
        self.all_hidden = outputs[0]
        self.all_state = outputs[1]
        self.reset = outputs[2]

        self.last_hidden = states[0]  # 512 x 100
        self.last_state = states[1]  # 512 x 100

        self.all_hidden = tf.transpose(self.all_hidden, perm=[1, 0, 2])  # 19x512x100
        self.all_state = tf.transpose(self.all_state, perm=[1, 0, 2])  # 19x512x100

        if self.att_type == "normal_att":
            squares = tf.map_fn(lambda x: compute_alpha(x, self.last_hidden, W_encoder, W_decoder, Bi_vector),
                                self.all_hidden)  # 19x512
            self.local_weight = tf.nn.softmax(tf.transpose(squares) + 100000000. * (self.mask - 1),
                                        axis=1)  # batch_size * max_len
            attention_proj = tf.reduce_sum(self.all_hidden * tf.transpose(self.local_weight)[:, :, None], axis=0)

        elif self.att_type == "bi_att":
            global_squares = tf.map_fn(lambda x: compute_global_alpha_norm(x, self.last_state, W_g1, W_g2), self.all_state)
            self.global_weight = tf.nn.softmax(tf.transpose(global_squares) + 100000000. * (self.mask - 1),
                                          axis=1)  # batch_size * max_len
            global_attention_proj = tf.reduce_sum(self.all_hidden * tf.transpose(self.global_weight)[:, :, None], axis=0)
            local_squares = tf.map_fn(lambda x: compute_alpha(x, self.last_hidden, W_l1, W_l2, Bi_l_vector), self.all_hidden)
            self.local_weight = tf.nn.softmax(tf.transpose(local_squares) + 100000000. * (self.mask - 1),
                                         axis=1)  # batch_size * max_len
            local_attention_proj = tf.reduce_sum(self.all_hidden * tf.transpose(self.local_weight)[:, :, None], axis=0)
            attention_proj = tf.concat([global_attention_proj, local_attention_proj], 1)
        # num_items x 2*100
        if self.loss_type == 'EMB':
            proj = tf.concat([attention_proj, self.last_hidden], 1)
            proj = tf.nn.dropout(proj, self.keep_prob_ho)
            ytem = tf.matmul(Wemb, bili)
            pred = tf.matmul(proj, tf.transpose(ytem))
            self.pred = tf.nn.softmax(pred)
            self.pred_cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred, labels=self.rnn_y))

        self.reg_cost = tf.reduce_mean(tf.reshape(kl_normal_reg_loss(mu_theta, std_theta), [-1, 1]))
        self.cost = self.pred_cost + self.reg_lambda * self.reg_cost

        optimizer = tf.train.AdamOptimizer(self.lr)
        fullvars = tf.trainable_variables()
        topic_vars = variable_parser(fullvars, 'NSTOPIC')
        rnn_vars = variable_parser(fullvars, 'HCRNN')
        topic_grads = tf.gradients(self.cost, topic_vars)
        rnn_grads = tf.gradients(self.cost, rnn_vars)
        if self.two_phase_learning:
            self.optimizer_rnn = optimizer.apply_gradients(zip(rnn_grads, rnn_vars))
            self.optimizer_topic = optimizer.apply_gradients(zip(topic_grads, topic_vars))
        else:
            self.optimizer_total = optimizer.minimize(self.cost)


    def train_model(self):
        if self.configs.is_shuffle:
            self.tr_sess_idx = np.random.permutation(self.tr_sess_idx)
        batch_loss_list = []
        num_batch = math.ceil(np.float32(len(self.tr_sess_idx)) / self.batch_size)
        for batch_itr in range(int(num_batch)):
            start_itr = self.batch_size * batch_itr
            end_itr = np.minimum(self.batch_size * (batch_itr+1),len(self.tr_sess_idx))
            temp_batch_x = self.tr_x[self.tr_sess_idx[start_itr:end_itr]]
            temp_batch_y = self.tr_y[self.tr_sess_idx[start_itr:end_itr]]
            batch_x,batch_topic_x,batch_y,mask,labels,lengths = convert_batch_data_HCRNN(temp_batch_x, temp_batch_y, self.num_items,maxlen=self.maxlen)
            temp_keep_prob_ho = 1.0 - self.drop_prob_ho
            temp_keep_prob_input = 1.0 - self.drop_prob_input
            feed_dict = {self.rnn_x: batch_x, self.rnn_y: batch_y,self.topic_x:batch_topic_x, self.mask: mask,
                         self.keep_prob_input: temp_keep_prob_input, self.keep_prob_ho: temp_keep_prob_ho,
                         self.batch_var_length: lengths}
            if self.two_phase_learning:
                _, pred_loss_ = self.sess.run([self.optimizer_topic, self.cost], feed_dict=feed_dict)
                _, pred_loss_ = self.sess.run([self.optimizer_rnn, self.cost], feed_dict=feed_dict)
            else:
                _, pred_loss_ = self.sess.run([self.optimizer_total, self.cost], feed_dict=feed_dict)
            batch_loss_list.append(pred_loss_)

        return np.mean(batch_loss_list)

    def pred_evaluation(self, mode):
        if mode == "valid":
            sess_idx = self.val_sess_idx
            df_x = self.val_x
            df_y = self.val_y
        elif mode == "test":
            sess_idx = self.te_sess_idx
            df_x = self.te_x
            df_y = self.te_y

        batch_loss_list = []
        recalls = [];   mrrs = [];  evaluation_point_count = []
        for itr in range(len(self.k)):
            recalls.append(0); mrrs.append(0);  evaluation_point_count.append(0)
        num_batch = math.ceil(np.float32(len(sess_idx)) / self.batch_size)
        #argmax_dict = dict()
        for batch_itr in range(int(num_batch)):
            start_itr = self.batch_size * batch_itr
            end_itr = np.minimum(self.batch_size * (batch_itr+1),len(sess_idx))
            temp_batch_x = df_x[sess_idx[start_itr:end_itr]]
            temp_batch_y = df_y[sess_idx[start_itr:end_itr]]
            batch_x,batch_topic_x,batch_y,mask,labels,lengths = convert_batch_data_HCRNN(temp_batch_x, temp_batch_y, self.num_items,maxlen=self.maxlen)
            feed_dict = {self.rnn_x: batch_x,self.rnn_y: batch_y,self.topic_x:batch_topic_x, self.mask: mask,
                         self.keep_prob_input: 1.0, self.keep_prob_ho: 1.0,
                         self.batch_var_length: lengths, self.is_training: False}
            preds,pred_loss_ = self.sess.run([self.pred,self.cost],feed_dict=feed_dict)
            batch_loss_list.append(pred_loss_)

            recalls,mrrs,evaluation_point_count = evaluation(labels, preds, recalls, mrrs, evaluation_point_count, self.k)

        recall_list = []
        mrr_list = []
        for itr in range(len(self.k)):
            recall = np.asarray(recalls[itr], dtype=np.float32) / evaluation_point_count[itr]
            mrr = np.asarray(mrrs[itr], dtype=np.float32) / evaluation_point_count[itr]
            if self.max_val_recall[itr] < recall and mode == "valid": self.max_val_recall[itr] = recall
            if self.max_te_recall[itr] < recall and mode == "test": self.max_te_recall[itr] = recall
            recall_list.append(recall)
            mrr_list.append(mrr)

        return np.mean(batch_loss_list),recall_list, mrr_list