def mgpu_train(*xs):
    gpu_ops = []
    gpu_grads = []
    xs = (tf.split(x, n_gpu, 0) for x in xs)
    for i, xs in enumerate(zip(*xs)):
        do_reuse = True if i > 0 else None
        with tf.device(assign_to_gpu(i, "/gpu:0")), tf.variable_scope(tf.get_variable_scope(), reuse=do_reuse):
            clf_logits, clf_losses, lm_losses = model(*xs, train=True, reuse=do_reuse)
            if lm_coef > 0:
                train_loss = tf.reduce_mean(clf_losses) + lm_coef*tf.reduce_mean(lm_losses)
            else:
                train_loss = tf.reduce_mean(clf_losses)
            params = find_trainable_variables("model")
            grads = tf.gradients(train_loss, params)
            grads = list(zip(grads, params))
            gpu_grads.append(grads)
            gpu_ops.append([clf_logits, clf_losses, lm_losses])
    #print(gpu_ops)
    #print(*gpu_ops)
    ops = [tf.concat(op, 0) for op in zip(*gpu_ops)]
    #print(ops)
    grads = average_grads(gpu_grads)
    #print(grads)
    grads = [g for g, p in grads]
    #print(grads)
    train = opt_fns[opt](params, grads, lr, partial(lr_schedules[lr_schedule], warmup=lr_warmup), n_updates_total, l2=l2, max_grad_norm=max_grad_norm, vector_l2=vector_l2, b1=b1, b2=b2, e=e)
    #print([train])
    return [train]+ops
예제 #2
0
 def load_checkpoint(self, sess, path=None):
     if path is None:
         save_dir = os.path.join(self.params.save_dir, self.params.desc,
                                 'best_params.jl')
     else:
         save_dir = path
     t_vars = utils.find_trainable_variables('model')
     # This should be fine since I'm loading my own saved weights
     sess.run([
         p.assign(ip)
         for p, ip in zip(t_vars, joblib.load(os.path.join(save_dir)))
     ])
예제 #3
0
def build_model(args, scope):
    nh = args.max_clause
    nw = args.max_var
    nc = 2
    nact = nc * nw
    ob_shape = (None, nh, nw, nc * args.nstack)
    X = tf.placeholder(tf.float32, ob_shape)
    Y = tf.placeholder(tf.float32, (None, nact))
    Z = tf.placeholder(tf.float32, (None))

    p, v = model3(X, nact, scope)
    params = find_trainable_variables(scope)
    with tf.name_scope("loss"):
        cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=Y, logits=p))
        value_loss = tf.losses.mean_squared_error(labels = Z, predictions = v)
        lossL2 = tf.add_n([ tf.nn.l2_loss(vv) for vv in params ])
        loss = cross_entropy + value_loss + args.l2_coeff * lossL2

    return X, Y, Z, p, v, params, loss
예제 #4
0
def build_graph(sess):
    X = tf.placeholder(tf.int32, [None, N_CTX, 2])
    M = tf.placeholder(tf.float32, [None, N_CTX])
    lm_logits, lm_losses = model(X, M, train=False, reuse=False)
    params = find_trainable_variables('model')
    sess.run(tf.global_variables_initializer())
    shapes = json.load(open('model/params_shapes.json'))
    offsets = np.cumsum([np.prod(shape) for shape in shapes])
    init_params = [np.load('model/params_{}.npy'.format(n)) for n in range(10)]
    init_params = np.split(np.concatenate(init_params, 0), offsets)[:-1]
    init_params = [
        param.reshape(shape) for param, shape in zip(init_params, shapes)
    ]
    init_params[0] = init_params[0][:N_CTX]
    init_params[0] = np.concatenate([init_params[1], init_params[0]], 0)
    del init_params[1]
    n_transfer = 1 + N_TRANSFER * 12
    sess.run([
        p.assign(ip)
        for p, ip in zip(params[:n_transfer], init_params[:n_transfer])
    ])
    return X, M, lm_logits, lm_losses
예제 #5
0
    def __init__(self,
                 policy,
                 ob_space,
                 ac_space,
                 lr,
                 max_grad_norm,
                 units_per_hlayer,
                 activ_fcn,
                 log_interval,
                 logdir,
                 nenvs,
                 batch_size,
                 ent_coef,
                 vf_coef,
                 keep_model,
                 meta=False):
        self.logger = logging.getLogger(self.__module__ + "." +
                                        self.__class__.__name__)
        self.logger.info('Set up A2C learning agent')
        self.num_steps_trained = 0
        self.log_interval = log_interval

        sess = make_session()
        nact = ac_space.n
        nbatch = nenvs * batch_size

        self.global_step = tf.get_variable('global_step', [],
                                           tf.int32,
                                           tf.constant_initializer(
                                               0, tf.int32),
                                           trainable=False)

        eval_model = policy(sess,
                            ob_space,
                            ac_space,
                            1,
                            1,
                            units_per_hlayer,
                            reuse=False,
                            activ_fcn=activ_fcn)
        step_model = policy(sess,
                            ob_space,
                            ac_space,
                            nenvs,
                            1,
                            units_per_hlayer,
                            reuse=tf.AUTO_REUSE,
                            activ_fcn=activ_fcn)
        train_model = policy(sess,
                             ob_space,
                             ac_space,
                             nenvs,
                             batch_size,
                             units_per_hlayer,
                             reuse=True,
                             activ_fcn=activ_fcn)

        # -- Loss computation --
        A = tf.placeholder(tf.int32, [None])
        ADV = tf.placeholder(tf.float32, [None])
        R = tf.placeholder(tf.float32, [None])

        def get_loss(model, placeholder_dict):
            a = placeholder_dict["A"]
            adv = placeholder_dict["ADV"]
            r = placeholder_dict["R"]
            # Compute cross entropy loss between estimated distribution of action and 'true' distribution of actions
            chosen_action_log_probs = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=model.pi_logit, labels=a)
            pg_loss = tf.reduce_mean(adv * chosen_action_log_probs)  # minimize
            vf_loss = tf.reduce_mean(mse(tf.squeeze(model.vf), r))  # minimize
            entropy = -tf.reduce_mean(cat_entropy(model.pi_logit))  # maximize
            return pg_loss, entropy, vf_loss, model.vf, chosen_action_log_probs, None, None

        self.input_plchld = {'A': A, 'ADV': ADV, 'R': R}
        pg_loss, entropy, vf_loss, _, chosen_action_log_probs, _, _ = get_loss(
            train_model, self.input_plchld)
        loss = pg_loss + entropy * ent_coef + vf_loss * vf_coef
        vf = tf.squeeze(train_model.vf)

        params = find_trainable_variables("model")
        trainer = tf.train.AdamOptimizer(learning_rate=lr)
        # trainer = tf.train.RMSPropOptimizer(learning_rate=LR, decay=alpha, epsilon=epsilon)
        gradients = trainer.compute_gradients(loss)
        grads, variables = zip(*gradients)
        if max_grad_norm is not None:
            grads, grad_norm = tf.clip_by_global_norm(grads, max_grad_norm)
        grads = list(zip(grads, params))
        _train = [
            trainer.apply_gradients(grads),
            self.global_step.assign_add(nbatch)
        ]

        if log_interval > 0:
            for g, v in gradients:
                if g is not None:
                    tf.summary.histogram("%s-grad" % v.name.replace(':', '_'),
                                         g)
            for p in params:
                if p is not None:
                    tf.summary.histogram("train/%s" % p.name.replace(':', '_'),
                                         p.value())
            tf.summary.scalar("train/pg_loss", pg_loss)
            tf.summary.scalar("train/vf_loss", vf_loss)
            tf.summary.scalar("train/entropy", entropy)
            tf.summary.histogram("others/ADV", ADV)
            tf.summary.histogram("others/neglocpac", chosen_action_log_probs)
            tf.summary.histogram("others/vf", vf)
            self.summary_step = tf.summary.merge_all()

        # Adding these to collection so we can restore them again
        tf.add_to_collection('inputs', eval_model.X)
        tf.add_to_collection('pi', eval_model.pi)
        tf.add_to_collection('pi_logit', eval_model.pi_logit)
        tf.add_to_collection('val', eval_model.vf)
        tf.add_to_collection('step', eval_model.ac)
        if eval_model.initial_state is not None:
            add_to_collection_rnn_state('state_in', eval_model.rnn_state_in)
            add_to_collection_rnn_state('state_out', eval_model.rnn_state_out)

        tf.global_variables_initializer().run(session=sess)

        def train(obs, states, rewards, actions, values):
            advs = rewards - values  # Estimate for A = Q(s,a) - V(s)
            # for step in range(len(obs)):
            #     cur_lr = lr.value()
            td_map = {
                train_model.X: obs,
                A: actions,
                ADV: advs,
                R: rewards
            }  #, LR:cur_lr}
            if states is not None:
                td_map[train_model.rnn_state_in] = states
                # td_map[train_model.M] = masks
            policy_loss, value_loss, policy_entropy, test, ap, global_step = sess.run(
                [
                    pg_loss, vf_loss, entropy, _train, train_model.pi,
                    self.global_step
                ], td_map)
            # TF summary logging
            if log_interval > 0 and (self.num_steps_trained % self.log_interval
                                     == 0):
                self.logger.info(
                    'Save summary of network weights, grads and losses.')
                summary_str = sess.run(self.summary_step, td_map)
                self.summary_writer.add_summary(
                    tf.Summary.FromString(summary_str), global_step)

            self.num_steps_trained += 1

            return policy_loss, value_loss, policy_entropy, ap

        saver = tf.train.Saver(max_to_keep=keep_model)

        def save(f_name):
            # test_run(20)
            gs = sess.run(self.global_step)
            self.logger.info(
                'Save network parameters of model at global step %s' % gs)
            saver.save(sess, os.path.join(logdir, f_name), global_step=gs)

        def load(load_path):
            saver.restore(sess, load_path)

        def test_run(env, n_eps, n_pipes):
            self.logger.info('Evaluating current agent')
            ep_return = []
            ep_length = []
            for i in range(0, n_eps):
                obs = env.reset()
                obs = normalize_obs(obs)
                done = False
                if eval_model.initial_state is not None:
                    if len(eval_model.initial_state) > 1:
                        rnn_s_in = (np.zeros(
                            eval_model.initial_state[0].shape),
                                    np.zeros(eval_model.initial_state[1].shape)
                                    )  # init lstm cell vector
                    else:
                        rnn_s_in = np.zeros(eval_model.initial_state.shape
                                            )  # init gru cell vector
                total_return = 0
                total_length = 0

                while not done and (total_return < n_pipes):
                    # self.logger.info(total_return)
                    if eval_model.initial_state is not None:
                        pi, pi_log, act, rnn_s_out = sess.run(
                            [
                                eval_model.pi, eval_model.pi_logit,
                                eval_model.ac, eval_model.rnn_state_out
                            ],
                            feed_dict={
                                eval_model.X: [obs],
                                eval_model.rnn_state_in: rnn_s_in
                            })
                    else:
                        pi, pi_log, act = sess.run([
                            eval_model.pi, eval_model.pi_logit, eval_model.ac
                        ],
                                                   feed_dict={
                                                       eval_model.X: [obs]
                                                   })
                    ac = np.argmax(pi_log)
                    obs, reward, done, _ = env.step(ac)
                    obs = normalize_obs(obs)
                    total_length += 1
                    total_return += reward
                    if eval_model.initial_state is not None:
                        rnn_s_in = rnn_s_out
                self.logger.info('Episode %s: %s, %s' %
                                 (i, total_return, total_length))
                ep_length.append(total_length)
                ep_return.append(total_return)
            return ep_return

        self.get_loss = get_loss
        self.trainer = trainer
        self.train_vars = params
        self.train = train
        self.train_model = train_model
        self.eval_model = eval_model
        self.step_model = step_model
        self.step = step_model.step
        self.value = step_model.value
        self.initial_state = step_model.initial_state
        self.save = save
        self.load = load
        self.test_run = test_run

        # Set the summary writer to write to the given logdir if logging is enabled
        if log_interval > 0:
            self.summary_writer = tf.summary.FileWriter(
                logdir, graph_def=sess.graph_def)
        else:
            self.summary_writer = None

        self.sess = sess
예제 #6
0
    def __init__(self, policy, config):

        sess = tf.get_default_session()

        # CREATE THE PLACEHOLDERS
        actions_ = tf.placeholder(tf.int32, [None], name="actions_")
        advantages_ = tf.placeholder(tf.float32, [None], name="advantages_")
        rewards_ = tf.placeholder(tf.float32, [None], name="rewards_")
        lr_ = tf.placeholder(tf.float32, name="learning_rate_")
        # Keep track of old actor
        oldneglopac_ = tf.placeholder(tf.float32, [None], name="oldneglopac_")

        # Keep track of old critic
        oldvpred_ = tf.placeholder(tf.float32, [None], name="oldvpred_")

        # Cliprange
        cliprange_ = tf.placeholder(tf.float32, [])

        # CREATE OUR TWO MODELS
        # Step_model that is used for sampling
        step_model = policy(sess, config, reuse=False)

        # Test model for testing our agent
        #test_model = policy(sess, action_space, 1, 1, reuse=False)

        # Train model for training
        train_model = policy(sess, config, reuse=True)

        # CALCULATE THE LOSS
        # Total loss = Policy gradient loss - entropy * entropy coefficient + Value coefficient * value loss

        # Clip the value
        # Get the value predicted
        value_prediction = train_model.vf

        # Clip the value = Oldvalue + clip(value - oldvalue, min = - cliprange, max = cliprange)
        value_prediction_clipped = oldvpred_ + tf.clip_by_value(
            train_model.vf - oldvpred_, -cliprange_, cliprange_)

        # Unclipped value
        value_loss_unclipped = tf.square(value_prediction - rewards_)

        # Clipped value
        value_loss_clipped = tf.square(value_prediction_clipped - rewards_)

        # Value loss 0.5 * SUM [max(unclipped, clipped)
        vf_loss = 0.5 * tf.reduce_mean(
            tf.maximum(value_loss_unclipped, value_loss_clipped))

        # Clip the policy
        # Output -log(pi) (new -log(pi))
        neglogpac = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=train_model.pi, labels=actions_)

        # Remember we want ratio (pi current policy / pi old policy)
        # But neglopac returns us -log(policy)
        # So we want to transform it into ratio
        # e^(-log old - (-log new)) == e^(log new - log old) == e^(log(new / old))
        # = new/old (since exponential function cancels log)
        # Wish we can use latex in comments
        ratio = tf.exp(oldneglopac_ - neglogpac)  # ratio = pi new / pi old

        # Remember also that we're doing gradient ascent, aka we want to MAXIMIZE the objective function which is equivalent to say
        # Loss = - J
        # To make objective function negative we can put a negation on the multiplication (pi new / pi old) * - Advantages
        pg_loss_unclipped = -advantages_ * ratio

        # value, min [1 - e] , max [1 + e]
        pg_loss_clipped = -advantages_ * tf.clip_by_value(
            ratio, 1.0 - cliprange_, 1.0 + cliprange_)

        # Final PG loss
        # Why maximum, because pg_loss_unclipped and pg_loss_clipped are negative, getting the min of positive elements = getting
        # the max of negative elements
        pg_loss = tf.reduce_mean(tf.maximum(pg_loss_unclipped,
                                            pg_loss_clipped))

        # Calculate the entropy
        # Entropy is used to improve exploration by limiting the premature convergence to suboptimal policy.
        entropy = tf.reduce_mean(train_model.pd.entropy())

        # Total loss (Remember that L = - J because it's the same thing than max J
        loss = pg_loss - entropy * config.ent_coef + vf_loss * config.vf_coef

        # UPDATE THE PARAMETERS USING LOSS
        # 1. Get the model parameters
        params = ut.find_trainable_variables("model")

        # 2. Calculate the gradients
        grads = tf.gradients(loss, params)
        if config.max_grad_norm is not None:
            # Clip the gradients (normalize)
            grads, grad_norm = tf.clip_by_global_norm(grads,
                                                      config.max_grad_norm)
        grads = list(zip(grads, params))
        # zip aggregate each gradient with parameters associated
        # For instance zip(ABCD, xyza) => Ax, By, Cz, Da

        # 3. Build our trainer
        trainer = tf.train.RMSPropOptimizer(learning_rate=lr_, epsilon=1e-5)

        # 4. Backpropagation
        _train = trainer.apply_gradients(grads)

        # Train function
        def train(ask_book_env, bid_book_env, inv_env, funds_env, actions,\
            returns, values, neglogpacs, lr, cliprange):

            # Here we calculate advantage A(s,a) = R + yV(s') - V(s)
            # Returns = R + yV(s')
            advantages = returns - values

            # Normalize the advantages (taken from aborghi implementation)
            advantages = (advantages - advantages.mean()) / (advantages.std() +
                                                             1e-8)

            # We create the feed dictionary
            td_map = {
                train_model.input_ask_book: ask_book_env,
                train_model.input_bid_book: bid_book_env,
                train_model.input_inventory: inv_env,
                train_model.input_funds: funds_env,
                actions_: actions,
                advantages_: advantages,
                rewards_: returns,
                lr_: lr,
                cliprange_: cliprange,
                oldneglopac_: neglogpacs,
                oldvpred_: values
            }

            policy_loss, value_loss, policy_entropy, _ = sess.run(
                [pg_loss, vf_loss, entropy, _train], td_map)

            return policy_loss, value_loss, policy_entropy

        def save(save_path):
            """
            Save the model
            """
            saver = tf.train.Saver()
            saver.save(sess, save_path)

        def load(load_path):
            """
            Load the model
            """
            saver = tf.train.Saver()
            print('Loading ' + load_path)
            saver.restore(sess, load_path)

        self.train = train
        self.train_model = train_model
        self.step_model = step_model
        self.step = step_model.step
        self.value = step_model.value
        # self.initial_state = step_model.initial_state
        self.save = save
        self.load = load
        tf.global_variables_initializer().run(session=sess)
def save(path):  # save the values of the trainable variables (we, h0, h1, ...)
    ps = sess.run(utils.find_trainable_variables('model'))
    joblib.dump(ps, utils.make_path(path))
        logits = result[
            3]  # shape: [?, 2] *Note: 2 for classifying the input as the right or wrong
        clf_loss = result[4]  # shape: [?]    * label - predicted_logit
        lm_loss = result[5]  # shape: [?]
        loss = clf_loss
    elif params.head_type == "lm":
        lm_loss = result[3]
        loss = lm_loss
    else:
        raise ValueError("Not a valid head_type!")
    config = tf.ConfigProto()  # Tensorflow properties (ask Fabian)
    config.gpu_options.allow_growth = True
    config.allow_soft_placement = True
    sess = tf.Session(config=config)
    t_vars = utils.find_trainable_variables(
        'model'
    )  # contains the trainable variables from model for gradient desc

    # --- load pretrained parameter -----------------------------------------------------------------------------------
    print("\nLoading pretrained parameter ...")
    # Initialize global variables
    transformer_decoder.init_and_load_parameter_from_file(sess=sess,
                                                          path="model/")

    # --- add evaluation nodes to tensorflow graph --------------------------------------------------------------------
    # Just add the node, not actually perform eval. Eval is performed in iter_apply,iter_predict
    # perform training but this time turn off dropout???
    # eval_mgpu_result: returns the losses but not grads since only evaluating
    eval_mgpu_result = transformer_decoder.mgpu_predict(
        X_train, M_train, Y_train)
    """
예제 #9
0
    def train(self):
        global_step = tf.train.get_or_create_global_step()
        X_train = tf.placeholder(tf.int32, [self.n_batch_train, 2, n_ctx, 2])
        M_train = tf.placeholder(tf.float32, [self.n_batch_train, 2, n_ctx])
        X = tf.placeholder(tf.int32, [None, 2, n_ctx, 2])
        M = tf.placeholder(tf.float32, [None, 2, n_ctx])

        Y_train = tf.placeholder(tf.int32, [self.n_batch_train])
        Y = tf.placeholder(tf.int32, [None])

        #self.train, self.logits, self.clf_losses, self.lm_losses = self.mgpu_train(self.X_train, self.M_train, self.Y_train)

        xs = [X_train, M_train, Y_train]
        gpu_ops = []
        gpu_grads = []
        xs = (tf.split(x, n_gpu, 0) for x in xs)
        optimizer = tf.train.AdamOptimizer(learning_rate=lr,
                                           beta1=b1,
                                           beta2=b2,
                                           epsilon=e)
        for i, xs in enumerate(zip(*xs)):
            do_reuse = True if i > 0 else None
            with tf.device(assign_to_gpu(i, "/gpu:0")), tf.variable_scope(
                    tf.get_variable_scope(), reuse=do_reuse):
                logits, clf_losses, lm_losses = self.model(*xs,
                                                           train=True,
                                                           reuse=do_reuse)
                if lm_coef > 0:
                    train_loss = tf.reduce_mean(
                        clf_losses) + lm_coef * tf.reduce_mean(lm_losses)
                else:
                    train_loss = tf.reduce_mean(clf_losses)
                raw_grads_and_vars = optimizer.compute_gradients(train_loss)
                grads_and_vars = [(tf.clip_by_global_norm([gv[0]],
                                                          max_grad_norm)[0][0],
                                   gv[1]) for gv in raw_grads_and_vars]
                gpu_grads.append(grads_and_vars)
                gpu_ops.append([logits, clf_losses, lm_losses])
        ops = [tf.concat(op, 0) for op in zip(*gpu_ops)]
        logits, clf_losses, lm_losses = ops
        grads = average_grads(gpu_grads)

        train_op = optimizer.apply_gradients(grads, global_step=global_step)
        clf_loss = tf.reduce_mean(clf_losses)
        saver = tf.train.Saver(max_to_keep=5)
        self.params = find_trainable_variables('model_lm')
        if pre_load:
            restore_op = [
                p.assign(ip) for p, ip in zip(
                    self.params, joblib.load(lm_dir + '/model_lm.params'))
            ]
        self.eval_mgpu_logits, self.eval_mgpu_clf_losses, self.eval_mgpu_lm_losses = self.mgpu_predict(
            X_train, M_train, Y_train)
        self.eval_logits, self.eval_clf_losses, self.eval_lm_losses = self.model(
            X, M, Y, train=False, reuse=True)
        self.eval_clf_loss = tf.reduce_mean(self.eval_clf_losses)
        self.eval_mgpu_clf_loss = tf.reduce_mean(self.eval_mgpu_clf_losses)

        summary_op = tf.get_collection(tf.GraphKeys.SUMMARIES)

        def trva_split(data, index):
            return [data[i] for i in index]

        x1, x2, y = encode_dataset(self.text_encoder, atec(data_dir))

        valid_index = np.load('data/valid_index.npy')
        if data_dir == 'data/para.tsv':
            valid_index = np.concatenate([
                valid_index, valid_index + len(y) // 4,
                valid_index + len(y) // 2, valid_index + 3 * len(y) // 4
            ])
        valid_index = valid_index.tolist()
        train_index = list(set(valid_index) ^ set(range(len(y))))
        trX1, trX2, trY = trva_split(x1, train_index), trva_split(
            x2, train_index), trva_split(y, train_index)
        vaX1, vaX2, vaY = trva_split(x1, valid_index), trva_split(
            x2, valid_index), trva_split(y, valid_index)
        trX, trM = self.transform_roc(trX1, trX2)
        vaX, vaM = self.transform_roc(vaX1, vaX2)

        n_train = len(trY)
        n_valid = len(vaY)
        self.n_updates_total = (n_train // self.n_batch_train) * n_iter

        def log():
            def iter_apply(Xs, Ms, Ys):
                fns = [
                    lambda x: np.concatenate(x, 0), lambda x: float(np.sum(x))
                ]
                results = []
                for xmb, mmb, ymb in iter_data((Xs, Ms, Ys),
                                               n_batch=self.n_batch_train,
                                               truncate=False,
                                               verbose=True):
                    n = len(xmb)
                    if n == self.n_batch_train:
                        res = sess.run(
                            [self.eval_mgpu_logits, self.eval_mgpu_clf_loss], {
                                X_train: xmb,
                                M_train: mmb,
                                Y_train: ymb
                            })
                    else:
                        res = sess.run([self.eval_logits, self.eval_clf_loss],
                                       {
                                           X: xmb,
                                           M: mmb,
                                           Y: ymb
                                       })
                    res = [r * n for r in res]
                    results.append(res)
                results = zip(*results)
                return [fn(res) for res, fn in zip(results, fns)]

            # global best_score
            tr_logits, tr_cost = iter_apply(trX[:n_valid], trM[:n_valid],
                                            trY[:n_valid])
            va_logits, va_cost = iter_apply(vaX, vaM, vaY)
            tr_cost = tr_cost / len(trY[:n_valid])
            va_cost = va_cost / n_valid
            tr_f1 = f1_score(trY[:n_valid], np.argmax(tr_logits, 1)) * 100.
            va_f1 = f1_score(vaY, np.argmax(va_logits, 1)) * 100.
            tf.logging.info(
                '%d %d %.3f %.3f %.2f %.2f' %
                (n_epochs, n_updates, tr_cost, va_cost, tr_f1, va_f1))

        scaffold = tf.train.Scaffold(saver=saver)
        log_hook = tf.train.LoggingTensorHook(
            {
                'step': global_step,
                'train_loss': clf_loss
            }, every_n_iter=100)
        summary_hook = tf.train.SummarySaverHook(save_steps=100,
                                                 output_dir=save_dir,
                                                 summary_op=summary_op)
        hooks = [summary_hook, log_hook]
        tf_config = tf.ConfigProto(allow_soft_placement=True)
        tf_config.gpu_options.allow_growth = True

        n_epochs = 0

        with tf.train.MonitoredTrainingSession(hooks=hooks,
                                               save_checkpoint_secs=600,
                                               checkpoint_dir=save_dir,
                                               scaffold=scaffold,
                                               config=tf_config) as sess:
            if pre_load:
                sess.run(restore_op)

            for i in range(n_iter):
                for xmb, mmb, ymb in iter_data(
                    (shuffle(trX, trM, trY, random_state=np.random)),
                        n_batch=self.n_batch_train,
                        truncate=True,
                        verbose=True):
                    cost, _, n_updates = sess.run(
                        [clf_loss, train_op, global_step], {
                            X_train: xmb,
                            M_train: mmb,
                            Y_train: ymb
                        })
                    if n_updates % 100 == 0:
                        log()
                n_epochs += 1
                log()
예제 #10
0
    train_flag = tf.placeholder_with_default(True, shape=())
    print(data_iterator.max_word)
    dp = {
        'max_word': data_iterator.max_word,
        'n_vocab': data_iterator.n_vocab,
        'n_special': 3,
        'clf_token': data_iterator.encoder['_classify_']
    }
    logits, clf_losses, lm_losses = model(X, M, Y, train_flag, data_params=dp)

    #lr, global_step = decay_learning_rate(6.25e-5)
    optimizer = tf.train.AdamOptimizer(6.25e-5)
    train_op = optimizer.minimize(clf_losses +
                                  0.5 * lm_losses)  #, global_step=global_step)

    params = find_trainable_variables('transformer')
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
    sess.run(tf.global_variables_initializer())

    shapes = json.load(open('./pretrain/params_shapes.json'))
    offsets = np.cumsum([np.prod(shape) for shape in shapes])
    init_params = [
        np.load('./pretrain/params_{}.npy'.format(n)) for n in range(10)
    ]
    init_params = np.split(np.concatenate(init_params, 0), offsets)[:-1]
    init_params = [
        param.reshape(shape) for param, shape in zip(init_params, shapes)
    ]
    init_params[0] = init_params[0][:max_word]
    init_params[0] = np.concatenate([
        init_params[1],
예제 #11
0
    def init_and_load_parameter_from_file(self, sess, path):
        tvars = utils.find_trainable_variables('model')
        with open(os.path.join(path, 'params_shapes.json')
                  ) as f:  # loads list of shapes from json file
            shapes = json.load(
                f)  # [[512, 768], [40478, 768], [1, 768, 2304], ..., ]
        """ 
        - np.cumsum:
             a = np.array([[1,2,3], [4,5,6]])
             np.cumsum(a) = array([ 1,  3,  6, 10, 15, 21])
        - load all the np params to a list
        - concatenate the params on axis 0, split according to offsets list and remove last sub-array
            np.split:
            If an index exceeds the dimension of the array along axis, an empty sub-array is returned correspondingly.
            x = np.arange(5.0, 13.0)
            np.split(x, [3, 5, 6, 10])      # split x based on the indices in the list
            >>> [array([ 5.,  6.,  7.]),    # first three: [0:3]
                array([ 8.,  9.]),          # next two: [3:5]
                array([ 10.]),              # next one: [5:6]
                array([ 11.,  12.]),        # next four: [6:10], but only two left in the array so only assigns them
                array([], dtype=float64)]   # last ones: [10:], but nothing left so returns empty array
        - reshape each split (concatenated) param into the corresponding shape
        - give embeddings to the special params ( _classify_, _delimiter_, ...)
        - concat the dictionary embeddings, special embeddings and the learned / pre-trained  input sequence embeddings
        """
        offsets = np.cumsum([np.prod(shape) for shape in shapes])
        init_params = [
            np.load(os.path.join(path, 'params_{}.npy'.format(n)))
            for n in range(10)
        ]
        init_params = np.split(np.concatenate(init_params, 0), offsets)[:-1]
        init_params = [
            param.reshape(shape) for param, shape in zip(init_params, shapes)
        ]
        embeddings_special = (
            np.random.randn(self.params.n_special, self.params.n_embd) *
            0.02).astype(np.float32)
        init_embeddings = np.concatenate([
            init_params[1], embeddings_special,
            init_params[0][:self.params.n_ctx]
        ])
        init_params[0] = init_embeddings
        del init_params[1]  # delete the vocab / dictionary embeddings
        if self.params.n_transfer == -1:
            self.params.n_transfer = 0
        else:
            # 1 (for we: i.e. the dictionary, special, input emb) +
            # 144 (for the other 12 layers which happen to have 12 trainable variables each :D)
            self.params.n_transfer = 1 + self.params.n_transfer * 12
        sess.run(tf.global_variables_initializer())
        """
        Perform transfer learning: set the first n_transfer variables (see how init_params and tvars looks like)
        tvars contains:
            index 0: weight embeddings (model/we:0) [40558, 768]: concat of dictionary, special and input seq embedds.,
            index 1: (model/h0/attn/c_attn/w:0) [1, 768, 2304]: weight for the self attn for similarity calc in layer 0,
            index2: (model/h0/attn/c_attn/b:0) [2304]: bias for the self attn in layer 0,
            index3: (model/h0/attn/c_proj/w:0) [2304]: weight for the final attn output after softmax of all similarity 
                                                        output in layer 0,
            ... (c_proj/b:0, layer_norm1, mlp, layer_norm2), ... and so on for the remaining 11 layers

        (The clf weight and bias are not assigned since learning hasn't been done yet for it (so just initialized)
        """
        if self.use_encoder is False:
            sess.run([
                p.assign(ip)
                for p, ip in zip(tvars[:self.params.n_transfer],
                                 init_params[:self.params.n_transfer])
            ])
        else:  # load only word embeddings
            # for x in range(len(tvars)):
            # if tvars[x].name == 'model/we:0':
            # sess.run([p.assign(ip) for p, ip in zip(tvars[x], init_params[0])])
            sess.run(
                [p.assign(ip) for p, ip in zip(tvars[:1], init_params[:1])])
예제 #12
0
    def __init__(self,
                 policy,
                 ob_space,
                 ac_space,
                 nenvs,
                 nsteps,
                 ent_coef=0.01,
                 vf_coef=0.5,
                 mf_coef=0.5,
                 max_grad_norm=0.5,
                 lr=7e-4,
                 alpha=0.99,
                 epsilon=1e-5,
                 total_timesteps=int(80e6),
                 lrschedule='linear'):

        sess = tf_util.make_session()
        nact = ac_space.n
        nbatch = nenvs * nsteps

        A = tf.placeholder(tf.int32, [nbatch])
        ADV = tf.placeholder(tf.float32, [nbatch])
        ADV_MOMENT = tf.placeholder(tf.float32, [nbatch])
        R = tf.placeholder(tf.float32, [nbatch])
        R2 = tf.placeholder(tf.float32, [nbatch])
        LR = tf.placeholder(tf.float32, [])
        ENT_COEF = tf.placeholder(tf.float32, [])

        step_model = policy(sess, ob_space, ac_space, nenvs, 1, reuse=False)
        train_model = policy(sess,
                             ob_space,
                             ac_space,
                             nenvs * nsteps,
                             nsteps,
                             reuse=True)

        neglogpac = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=train_model.pi, labels=A)
        pg_loss = tf.reduce_mean((ADV) * neglogpac)
        vf_loss = tf.reduce_mean(mse(tf.squeeze(train_model.vf), R))
        mf_loss = tf.reduce_mean(mse(tf.squeeze(train_model.mf), R2))
        entropy = tf.reduce_mean(cat_entropy(train_model.pi))
        ent_coef = Scheduler(v=ent_coef,
                             nvalues=total_timesteps / 10,
                             schedule='step')
        mf_coef = 0.01
        loss = pg_loss - entropy * ENT_COEF + vf_loss * vf_coef + mf_loss * mf_coef
        # loss = pg_loss + vf_loss * vf_coef + mf_loss * mf_coef
        # loss = pg_loss - entropy*ent_coef + vf_loss * vf_coef

        params = find_trainable_variables("model")
        grads = tf.gradients(loss, params)
        if max_grad_norm is not None:
            grads, grad_norm = tf.clip_by_global_norm(grads, max_grad_norm)
        grads = list(zip(grads, params))
        trainer = tf.train.RMSPropOptimizer(learning_rate=LR,
                                            decay=alpha,
                                            epsilon=epsilon)
        _train = trainer.apply_gradients(grads)

        lr = Scheduler(v=lr, nvalues=total_timesteps, schedule=lrschedule)

        def train(obs, states, rewards, rewards_square, masks, actions, values,
                  moments):
            values_random = np.random.normal(
                loc=values, scale=np.sqrt(np.maximum(moments - values**2, 0)))
            # values_random = values - np.sqrt(np.maximum(moments - values ** 2,0))
            advs = rewards - values_random
            # advs = (1 - 2 * rewards) * rewards - values  + 2 * values * values
            advs_moment = rewards_square - moments
            # advs = (1 + 2 * rewards) * (rewards)
            # advs_moment = rewards_square
            for step in range(len(obs)):
                cur_lr = lr.value()
                cur_ent_coef = ent_coef.value()
            td_map = {
                train_model.X: obs,
                A: actions,
                ADV: advs,
                ADV_MOMENT: advs_moment,
                R: rewards,
                R2: rewards_square,
                LR: cur_lr,
                ENT_COEF: cur_ent_coef
            }
            if states is not None:
                td_map[train_model.S] = states
                td_map[train_model.M] = masks
            policy_loss, value_loss, moment_loss, policy_entropy, _ = sess.run(
                [pg_loss, vf_loss, mf_loss, entropy, _train], td_map)
            return policy_loss, value_loss, moment_loss, policy_entropy

        def save(save_path):
            ps = sess.run(params)
            make_path(osp.dirname(save_path))
            joblib.dump(ps, save_path)

        def load(load_path):
            loaded_params = joblib.load(load_path)
            restores = []
            for p, loaded_p in zip(params, loaded_params):
                restores.append(p.assign(loaded_p))
            ps = sess.run(restores)

        self.train = train
        self.train_model = train_model
        self.step_model = step_model
        self.step = step_model.step
        self.value = step_model.value
        self.initial_state = step_model.initial_state
        self.save = save
        self.load = load
        tf.global_variables_initializer().run(session=sess)
    n_train = len(trY)
    n_valid = len(vaY)
    n_batch_train = n_batch*n_gpu
    n_updates_total = (n_train//n_batch_train)*n_iter

    X_train = tf.placeholder(tf.int32, [n_batch_train, 1, n_ctx, 2])
    M_train = tf.placeholder(tf.float32, [n_batch_train, 1, n_ctx])
    X = tf.placeholder(tf.int32, [None, 1, n_ctx, 2])
    M = tf.placeholder(tf.float32, [None, 1, n_ctx])

    Y_train = tf.placeholder(tf.int32, [n_batch_train])
    Y = tf.placeholder(tf.int32, [None])

    train, logits, clf_losses, lm_losses = mgpu_train(X_train, M_train, Y_train)
    clf_loss = tf.reduce_mean(clf_losses)
    params = find_trainable_variables('model')
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
    sess.run(tf.global_variables_initializer())

    shapes = json.load(open('model/params_shapes.json'))
    offsets = np.cumsum([np.prod(shape) for shape in shapes])
    
    init_params = [np.load('model/params_{}.npy'.format(n)) for n in range(10)]
    init_params = np.split(np.concatenate(init_params, 0), offsets)[:-1]
    init_params = [param.reshape(shape) for param, shape in zip(init_params, shapes)]
    init_params[0] = init_params[0][:n_ctx]
    init_params[0] = np.concatenate([init_params[1], (np.random.randn(n_special, n_embd)*0.02).astype(np.float32), init_params[0]], 0)
    del init_params[1]

    if n_transfer == -1:
        n_transfer = 0
예제 #14
0
    def __init__(self,
                 policy,
                 ob_space,
                 ac_space,
                 nenvs,
                 nsteps,
                 ent_coef=0.01,
                 vf_coef=0.5,
                 max_grad_norm=0.5,
                 lr=7e-4,
                 alpha=0.99,
                 epsilon=1e-5,
                 total_timesteps=int(80e6),
                 lrschedule='linear',
                 summary_dir=None):

        sess = tf_util.make_session()
        nbatch = nenvs * nsteps

        A = tf.placeholder(tf.int32, [nbatch])
        ADV = tf.placeholder(tf.float32, [nbatch])
        R = tf.placeholder(tf.float32, [nbatch])
        LR = tf.placeholder(tf.float32, [])

        step_model = policy(sess, ob_space, ac_space, nenvs, 1, reuse=False)
        train_model = policy(sess,
                             ob_space,
                             ac_space,
                             nenvs * nsteps,
                             nsteps,
                             reuse=True)

        neglogpac = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=train_model.pi, labels=A)
        pg_loss = tf.reduce_mean(ADV * neglogpac)
        vf_loss = tf.reduce_mean(mse(tf.squeeze(train_model.vf), R))
        entropy = tf.reduce_mean(cat_entropy(train_model.pi))
        loss = pg_loss - entropy * ent_coef + vf_loss * vf_coef

        params = find_trainable_variables("model")
        grads = tf.gradients(loss, params)
        if max_grad_norm is not None:
            grads, grad_norm = tf.clip_by_global_norm(grads, max_grad_norm)
        grads = list(zip(grads, params))
        trainer = tf.train.RMSPropOptimizer(learning_rate=LR,
                                            decay=alpha,
                                            epsilon=epsilon)
        _train = trainer.apply_gradients(grads)

        lr = Scheduler(v=lr, nvalues=total_timesteps, schedule=lrschedule)

        # storing summaries
        episode_reward = tf.placeholder("float")
        tf.summary.scalar("policy_loss", pg_loss)
        tf.summary.scalar("entropy", entropy)
        tf.summary.scalar("value_loss", vf_loss)
        tf.summary.scalar("episode_reward", episode_reward)
        summary_op = tf.summary.merge_all()

        def train(obs, states, mean_reward, rewards, masks, actions, values):
            advs = rewards - values
            for step in range(len(obs)):
                cur_lr = lr.value()
            td_map = {
                train_model.X: obs,
                A: actions,
                ADV: advs,
                R: rewards,
                LR: cur_lr,
                episode_reward: mean_reward
            }
            if states is not None:
                td_map[train_model.S] = states
                td_map[train_model.M] = masks
            policy_loss, value_loss, policy_entropy, summary, _ = sess.run(
                [pg_loss, vf_loss, entropy, summary_op, _train], td_map)
            return policy_loss, value_loss, policy_entropy, summary

        def save(save_path):
            ps = sess.run(params)
            make_path(osp.dirname(save_path))
            joblib.dump(ps, save_path)

        def load(load_path):
            loaded_params = joblib.load(load_path)
            restores = []
            for p, loaded_p in zip(params, loaded_params):
                restores.append(p.assign(loaded_p))
            sess.run(restores)

        self.train = train
        self.train_model = train_model
        self.step_model = step_model
        self.step = step_model.step
        self.value = step_model.value
        self.initial_state = step_model.initial_state
        self.save = save
        self.load = load
        tf.global_variables_initializer().run(session=sess)
        self.train_writer = tf.summary.FileWriter(summary_dir, sess.graph)
예제 #15
0
def fever_app(caller):


    global db, tokenizer, text_encoder, encoder, X_train, M_train, X, M, Y_train, Y,params,sess, n_batch_train, db_file, \
        drqa_index, max_page, max_sent, encoder_path, bpe_path, n_ctx, n_batch, model_file
    global n_vocab,n_special,n_y,max_len,clf_token,eval_lm_losses,eval_clf_losses,eval_mgpu_clf_losses,eval_logits, \
        eval_mgpu_logits,eval_logits

    LogHelper.setup()
    logger = LogHelper.get_logger("papelo")

    logger.info("Load config")
    config = json.load(open(os.getenv("CONFIG_FILE","configs/config-docker.json")))
    globals().update(config)
    print(globals())

    logger.info("Set Seeds")
    random.seed(42)
    np.random.seed(42)
    tf.set_random_seed(42)

    logger.info("Load FEVER DB")
    db = FeverDocDB(db_file)
    retrieval = TopNDocsTopNSents(db, max_page, max_sent, True, False, drqa_index)

    logger.info("Init word tokenizer")
    tokenizer = SimpleWordSplitter()

    # Prepare text encoder
    logger.info("Load BPE Text Encoder")
    text_encoder = TextEncoder(encoder_path, bpe_path)
    encoder = text_encoder.encoder
    n_vocab = len(text_encoder.encoder)

    n_y = 3
    encoder['_start_'] = len(encoder)
    encoder['_delimiter_'] = len(encoder)
    encoder['_classify_'] = len(encoder)
    clf_token = encoder['_classify_']
    n_special = 3
    max_len = n_ctx // 2 - 2

    n_batch_train = n_batch

    logger.info("Create TF Placeholders")
    X_train = tf.placeholder(tf.int32, [n_batch, 1, n_ctx, 2])
    M_train = tf.placeholder(tf.float32, [n_batch, 1, n_ctx])
    X = tf.placeholder(tf.int32, [None, 1, n_ctx, 2])
    M = tf.placeholder(tf.float32, [None, 1, n_ctx])

    Y_train = tf.placeholder(tf.int32, [n_batch])
    Y = tf.placeholder(tf.int32, [None])

    logger.info("Model Setup")
    eval_logits, eval_clf_losses, eval_lm_losses = model(X, M, Y, train=False, reuse=None)
    eval_mgpu_logits, eval_mgpu_clf_losses, eval_mgpu_lm_losses = mgpu_predict(X_train, M_train, Y_train)

    logger.info("Create TF Session")
    params = find_trainable_variables('model')

    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=float(os.getenv("TF_GPU_MEMORY_FRACTION","0.5")))
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options))
    sess.run(tf.global_variables_initializer())
    sess.run([p.assign(ip) for p, ip in zip(params, joblib.load(model_file))])

    logger.info("Ready")

    def predict(instances):
        predictions = []

        for instance in tqdm(instances):
            sents = retrieval.get_sentences_for_claim(instance["claim"])
            found_evidence = resolve_evidence(sents)
            instance["tokenized_claim"] = " ".join(map(lambda x: x.text, tokenizer.split_words(instance["claim"])))

            sub_instances = make_instances(instance, found_evidence)
            sub_predictions = predict_sub_instances(text_encoder, sub_instances)

            refute_evidence =  [i for i, x in enumerate(sub_predictions) if x == 2]
            support_evidence = [i for i, x in enumerate(sub_predictions) if x == 0]

            if len(support_evidence):
                predicted_label = "SUPPORTS"
                predicted_evidence = [[found_evidence[i]["title"], found_evidence[i]["line_number"]] for i in support_evidence]
            elif len(refute_evidence):
                predicted_label = "REFUTES"
                predicted_evidence = [[found_evidence[i]["title"], found_evidence[i]["line_number"]] for i in refute_evidence]
            else:
                predicted_label = "NOT ENOUGH INFO"
                predicted_evidence = []

            predictions.append({"predicted_label":predicted_label,
                                "predicted_evidence": predicted_evidence})




        return predictions

    return caller(predict)
예제 #16
0
파일: model.py 프로젝트: richardbaihe/atec
    def train(self):
        global_step = tf.train.get_or_create_global_step()
        X_train = tf.placeholder(tf.int32, [self.n_batch_train, 2, n_ctx, 2])
        M_train = tf.placeholder(tf.float32, [self.n_batch_train, 2, n_ctx])
        X = tf.placeholder(tf.int32, [None, 2, n_ctx, 2])
        M = tf.placeholder(tf.float32, [None, 2, n_ctx])

        Y_train = tf.placeholder(tf.int32, [self.n_batch_train])
        Y = tf.placeholder(tf.int32, [None])

        #self.train, self.logits, self.clf_losses, self.lm_losses = self.mgpu_train(self.X_train, self.M_train, self.Y_train)

        xs = [X_train, M_train, Y_train]
        gpu_ops = []
        gpu_grads = []
        xs = (tf.split(x, n_gpu, 0) for x in xs)
        optimizer = tf.train.AdamOptimizer(learning_rate=lr,
                                           beta1=b1,
                                           beta2=b2,
                                           epsilon=e)
        for i, xs in enumerate(zip(*xs)):
            do_reuse = True if i > 0 else None
            with tf.device(assign_to_gpu(i, "/gpu:0")), tf.variable_scope(
                    tf.get_variable_scope(), reuse=do_reuse):
                logits, clf_losses, lm_losses = self.model(*xs,
                                                           train=True,
                                                           reuse=do_reuse)
                if lm_coef > 0:
                    train_loss = tf.reduce_mean(
                        clf_losses) + lm_coef * tf.reduce_mean(lm_losses)
                else:
                    train_loss = tf.reduce_mean(clf_losses)
                raw_grads_and_vars = optimizer.compute_gradients(train_loss)
                grads_and_vars = [(tf.clip_by_global_norm([gv[0]],
                                                          max_grad_norm)[0][0],
                                   gv[1]) for gv in raw_grads_and_vars]
                gpu_grads.append(grads_and_vars)
                gpu_ops.append([logits, clf_losses, lm_losses])
        ops = [tf.concat(op, 0) for op in zip(*gpu_ops)]
        logits, clf_losses, lm_losses = ops
        grads = average_grads(gpu_grads)

        train_op = optimizer.apply_gradients(grads, global_step=global_step)
        clf_loss = tf.reduce_mean(clf_losses)
        saver = tf.train.Saver(max_to_keep=5)
        self.params = find_trainable_variables('model_lm')

        self.eval_mgpu_logits, self.eval_mgpu_clf_losses, self.eval_mgpu_lm_losses = self.mgpu_predict(
            X_train, M_train, Y_train)
        self.eval_logits, self.eval_clf_losses, self.eval_lm_losses = self.model(
            X, M, Y, train=False, reuse=True)
        self.eval_clf_loss = tf.reduce_mean(self.eval_clf_losses)
        self.eval_mgpu_clf_loss = tf.reduce_mean(self.eval_mgpu_clf_losses)

        summary_op = tf.get_collection(tf.GraphKeys.SUMMARIES)

        def trva_split(data, index):
            return [data[i] for i in index]

        x1, x2, y = encode_dataset(self.text_encoder, atec(data_dir))

        valid_index = np.load('data/valid_index.npy')
        if data_dir == 'data/para.tsv':
            valid_index = np.concatenate([
                valid_index, valid_index + len(y) // 4,
                valid_index + len(y) // 2, valid_index + 3 * len(y) // 4
            ])
        valid_index = valid_index.tolist()
        train_index = list(set(valid_index) ^ set(range(len(y))))
        trX1, trX2, trY = trva_split(x1, train_index), trva_split(
            x2, train_index), trva_split(y, train_index)
        vaX1, vaX2, vaY = trva_split(x1, valid_index), trva_split(
            x2, valid_index), trva_split(y, valid_index)
        trX, trM = self.transform_roc(trX1, trX2)
        vaX, vaM = self.transform_roc(vaX1, vaX2)

        n_train = len(trY)
        n_valid = len(vaY)
        self.n_updates_total = (n_train // self.n_batch_train) * n_iter
        self.build_graph()
        if pre_load:
            shapes = json.load(open('model/params_shapes.json'))
            offsets = np.cumsum([np.prod(shape) for shape in shapes])
            init_params = [
                np.load('model/params_{}.npy'.format(n)) for n in range(10)
            ]
            init_params = np.split(np.concatenate(init_params, 0),
                                   offsets)[:-1]
            init_params = [
                param.reshape(shape)
                for param, shape in zip(init_params, shapes)
            ]
            init_params[0] = init_params[0][:+n_ctx]
            init_params[0] = np.concatenate([
                init_params[1],
                (np.random.randn(self.n_special, n_embd) * 0.02).astype(
                    np.float32), init_params[0]
            ], 0)
            del init_params[1]

            if self.n_transfer == -1:
                self.n_transfer = 0
            else:
                self.n_transfer = 1 + self.n_transfer * 12
            self.sess.run([
                p.assign(ip) for p, ip in zip(self.params[:self.n_transfer],
                                              init_params[:self.n_transfer])
            ])
        if not new_model:
            print('loading old model')
            self.load()
            print('load success')
        n_updates = 0
        n_epochs = 0
        self.save(os.path.join(save_dir, desc, 'best_params.jl'))
        self.best_score = 0

        def log():
            def iter_apply(Xs, Ms, Ys):
                fns = [
                    lambda x: np.concatenate(x, 0), lambda x: float(np.sum(x))
                ]
                results = []
                for xmb, mmb, ymb in iter_data((Xs, Ms, Ys),
                                               n_batch=self.n_batch_train,
                                               truncate=False,
                                               verbose=True):
                    n = len(xmb)
                    if n == self.n_batch_train:
                        res = sess.run(
                            [self.eval_mgpu_logits, self.eval_mgpu_clf_loss], {
                                X_train: xmb,
                                M_train: mmb,
                                Y_train: ymb
                            })
                    else:
                        res = sess.run([self.eval_logits, self.eval_clf_loss],
                                       {
                                           X: xmb,
                                           M: mmb,
                                           Y: ymb
                                       })
                    res = [r * n for r in res]
                    results.append(res)
                results = zip(*results)
                return [fn(res) for res, fn in zip(results, fns)]

            # global best_score
            tr_logits, tr_cost = iter_apply(trX[:n_valid], trM[:n_valid],
                                            trY[:n_valid])
            va_logits, va_cost = iter_apply(vaX, vaM, vaY)
            tr_cost = tr_cost / len(trY[:n_valid])
            va_cost = va_cost / n_valid
            tr_f1 = f1_score(trY[:n_valid], np.argmax(tr_logits, 1)) * 100.
            va_f1 = f1_score(vaY, np.argmax(va_logits, 1)) * 100.
            self.logger.log(n_epochs=n_epochs,
                            n_updates=n_updates,
                            tr_cost=tr_cost,
                            va_cost=va_cost,
                            tr_f1=tr_f1,
                            va_f1=va_f1)
            print('%d %d %.3f %.3f %.2f %.2f' %
                  (n_epochs, n_updates, tr_cost, va_cost, tr_f1, va_f1))
            score = va_f1
            if score > self.best_score:
                self.best_score = score
                self.save(os.path.join(save_dir, desc, 'best_params.jl'))

        for i in range(n_iter):
            for xmb, mmb, ymb in iter_data(
                (shuffle(trX, trM, trY, random_state=np.random)),
                    n_batch=self.n_batch_train,
                    truncate=True,
                    verbose=True):
                cost, _ = self.sess.run([self.clf_loss, self.train], {
                    self.X_train: xmb,
                    self.M_train: mmb,
                    self.Y_train: ymb
                })
                n_updates += 1
                if n_updates % 1000 == 0:
                    log()
            n_epochs += 1
            log()
예제 #17
0
파일: model.py 프로젝트: richardbaihe/atec
    def ccc_train(self):
        # Resolve hostnames and ports of other nodes
        host, hosts = client(bootstrap_host, bootstrap_port)

        # Create a cluster and identify the job name and task of this node
        cluster = tf.train.ClusterSpec({
            'ps': hosts[:num_ps],
            'worker': hosts[num_ps:]
        })

        task = hosts.index(host)
        job_name = ('ps', 'worker')[task >= num_ps]
        task = cluster.job_tasks(job_name).index(host)
        tf_config = tf.ConfigProto(allow_soft_placement=True)
        tf_config.gpu_options.allow_growth = True
        server = tf.train.Server(cluster,
                                 job_name=job_name,
                                 task_index=task,
                                 config=tf_config)

        if job_name == 'ps':
            # create a shared queue on the parameter server which is visible on /job:ps/task:%d
            with tf.device('/job:ps/task:%d' % task):
                queue = tf.FIFOQueue(cluster.num_tasks('worker'),
                                     tf.int32,
                                     shared_name='done_queue%d' % task)

            # wait for the queue to be filled
            with tf.Session(server.target) as sess:
                for i in range(cluster.num_tasks('worker')):
                    sess.run(queue.dequeue())
                    print('ps:%d received "done" from worker:%d' % (task, i))
                print('ps:%d quitting' % task)

        elif job_name == 'worker':
            with tf.device(
                    tf.train.replica_device_setter(
                        worker_device='/job:worker/task:%d' % task,
                        cluster=cluster)):
                global_step = tf.train.get_or_create_global_step()

                sentences = self.batched_data(
                    tfrecord_filename,
                    self.single_example_parser,
                    self.n_batch_train,
                    padded_shapes=tf.Dimension(n_ctx),
                    num_epochs=n_iter)
                sentences = tf.cast(sentences, tf.int32)
                max_len = tf.shape(sentences)[1]  #sentences.get_shape()[1]
                xmb = tf.reshape(sentences,
                                 [self.n_batch_train, 1, max_len, 1])
                M_train = tf.cast(
                    tf.reshape(tf.sign(xmb), [self.n_batch_train, 1, max_len]),
                    tf.float32)
                positions = tf.reshape(tf.range(
                    self.n_vocab + self.n_special,
                    self.n_vocab + self.n_special + max_len),
                                       shape=[1, 1, max_len, 1])
                #tf.constant(np.arange(self.n_vocab + self.n_special, self.n_vocab + self.n_special + max_len),shape=[1, 1, max_len, 1])
                positions = tf.tile(positions, [self.n_batch_train, 1, 1, 1])
                X_train = tf.concat([xmb, positions], axis=3)

                optimizer = tf.train.AdamOptimizer(learning_rate=lr,
                                                   beta1=b1,
                                                   beta2=b2,
                                                   epsilon=e)
                gpu_grads = []
                gpu_loss = []
                gpu_ppl = []
                xs = [X_train, M_train]
                xs = (tf.split(x, n_gpu, 0) for x in xs)
                for i, xs in enumerate(zip(*xs)):
                    do_reuse = True if i > 0 else None
                    with tf.device(assign_to_gpu(i)), tf.variable_scope(
                            tf.get_variable_scope(), reuse=do_reuse):
                        lm_losses = self.model(*xs, train=True, num_ps=num_ps)
                        train_ppl_single = tf.reduce_mean(math.e**lm_losses)
                        train_loss_single = tf.reduce_mean(lm_losses)
                        gpu_loss.append(train_loss_single)
                        gpu_ppl.append(train_ppl_single)
                        optimizer = tf.train.AdamOptimizer(learning_rate=lr,
                                                           beta1=b1,
                                                           beta2=b2,
                                                           epsilon=e)
                        raw_grads_and_vars = optimizer.compute_gradients(
                            train_loss_single)
                        grads_and_vars = [
                            (tf.clip_by_global_norm([gv[0]],
                                                    max_grad_norm)[0][0],
                             gv[1]) for gv in raw_grads_and_vars
                        ]
                        gpu_grads.append(grads_and_vars)

                train_ppl = tf.reduce_mean(gpu_ppl)
                train_loss = tf.reduce_mean(gpu_loss)
                grads = average_grads(gpu_grads)

                train_op = optimizer.apply_gradients(grads,
                                                     global_step=global_step)

                saver = tf.train.Saver(max_to_keep=5)

                X = tf.placeholder(tf.int32, [None, 1, n_ctx, 2])
                M = tf.placeholder(tf.float32, [None, 1, n_ctx])
                valid_lm_losses = self.model(X, M, train=False, reuse=True)
                valid_ppl = tf.reduce_mean(math.e**valid_lm_losses)
                valid_loss = tf.reduce_mean(valid_lm_losses)

                self.params = find_trainable_variables('model_lm')
                tf.summary.scalar('train_loss', train_loss)
                #tf.summary.scalar('valid_loss', valid_loss)
                tf.summary.scalar('train_ppl', train_ppl)
                #tf.summary.scalar('valid_ppl', valid_ppl)
                summary_op = tf.summary.merge_all()

            done_ops = []
            # create a shared queue on the worker which is visible on /job:ps/task:%d
            for i in range(cluster.num_tasks('ps')):
                with tf.device('/job:ps/task:%d' % i):
                    with tf.name_scope('done_queue'):
                        done_queue = tf.FIFOQueue(cluster.num_tasks('worker'),
                                                  tf.int32,
                                                  shared_name='done_queue' +
                                                  str(i))
                        done_ops.append(done_queue.enqueue(task))
            scaffold = tf.train.Scaffold(saver=saver)
            summary_hook = tf.train.SummarySaverHook(save_steps=1000,
                                                     output_dir=save_dir,
                                                     summary_op=summary_op)
            hooks = [
                summary_hook,  # tf.train.CheckpointSaverHook(save_secs=600, checkpoint_dir=save_dir, saver=saver),
                tf.train.StopAtStepHook(last_step=1000000),
                tf.train.LoggingTensorHook(
                    {
                        'step': global_step,
                        'train_loss': train_loss,
                        'ppl': train_ppl
                    },
                    every_n_iter=100),
                tf.train.FinalOpsHook([done_ops])
            ]
            valid_data = pre_train_valid(valid_dir)
            vaX1 = encode_dataset(self.text_encoder, pre_train(valid_data))[0]
            vaX, vaM = self.transform_roc(vaX1)
            with tf.train.MonitoredTrainingSession(master=server.target,
                                                   is_chief=(task == 0),
                                                   hooks=hooks,
                                                   save_checkpoint_secs=600,
                                                   checkpoint_dir=save_dir,
                                                   scaffold=scaffold) as sess:
                coord = tf.train.Coordinator()
                threads = tf.train.start_queue_runners(sess=sess, coord=coord)
                try:
                    while not coord.should_stop():

                        ppl, loss, _, step = sess.run([
                            train_ppl, train_loss, train_op, global_step
                        ])  #,options=run_options, run_metadata=run_metadata)
                        if step % steps_to_validate == 0:
                            va_cost = []
                            va_ppl = []
                            for xm, mm in iter_data((vaX, vaM),
                                                    n_batch=self.n_batch_train,
                                                    truncate=False,
                                                    verbose=True):

                                ps = sess.run(self.params)
                                joblib.dump(ps,
                                            save_dir + 'model_lm.params',
                                            protocol=2)
                                res, ppl = sess.run([valid_loss, valid_ppl], {
                                    X: xm,
                                    M: mm
                                })
                                va_cost.append(np.sum(res))
                                va_ppl.append(np.sum(ppl))

                            va_cost = np.average(va_cost)
                            va_ppl = np.average(va_ppl)
                            tf.logging.info(
                                '=========n_steps:\t%d valid_cost:\t%.3f valid ppl:\t%.3f=========='
                                % (step, va_cost, va_ppl))

                except tf.errors.OutOfRangeError:
                    print('Epochs Complete!')
                finally:
                    coord.request_stop()
                coord.join(threads)
예제 #18
0
    def __init__(self,
                 policy,
                 ob_space,
                 ac_space,
                 nenvs,
                 nsteps,
                 nstack,
                 num_procs,
                 ent_coef=0.01,
                 vf_coef=0.5,
                 max_grad_norm=0.5,
                 lr=7e-4,
                 alpha=0.99,
                 epsilon=1e-5,
                 total_timesteps=int(80e6),
                 lrschedule='linear',
                 optimizer='adam'):
        config = tf.ConfigProto(allow_soft_placement=True,
                                intra_op_parallelism_threads=num_procs,
                                inter_op_parallelism_threads=num_procs)
        config.gpu_options.allow_growth = True
        sess = tf.Session(config=config)
        nbatch = nenvs * nsteps

        A = tf.placeholder(tf.int32, [nbatch])
        ADV = tf.placeholder(tf.float32, [nbatch])
        R = tf.placeholder(tf.float32, [nbatch])
        LR = tf.placeholder(tf.float32, [])

        train_model = policy(sess,
                             ob_space,
                             ac_space,
                             nenvs,
                             nsteps,
                             nstack,
                             reuse=True)
        step_model = train_model

        neglogpac = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=train_model.pi, labels=A)
        pg_loss = tf.reduce_mean(ADV * neglogpac)
        vf_loss = tf.reduce_mean(mse(tf.squeeze(train_model.vf), R))
        entropy = tf.reduce_mean(cat_entropy(train_model.pi))
        loss = pg_loss + vf_loss * vf_coef - entropy * ent_coef

        params = find_trainable_variables("model")
        grads = tf.gradients(loss, params)
        if max_grad_norm is not None:
            grads, grad_norm = tf.clip_by_global_norm(grads, max_grad_norm)
        grads = list(zip(grads, params))
        if optimizer == 'adam':
            trainer = tf.train.AdamOptimizer()
        else:
            trainer = tf.train.RMSPropOptimizer(learning_rate=LR,
                                                decay=alpha,
                                                epsilon=epsilon)

        _train = trainer.apply_gradients(grads)

        lr = Scheduler(v=lr, nvalues=total_timesteps, schedule=lrschedule)

        def train(obs, states, rewards, masks, actions, values):
            advs = rewards - values
            for step in range(len(obs)):
                cur_lr = lr.value()
            td_map = {
                train_model.X: obs,
                A: actions,
                ADV: advs,
                R: rewards,
                LR: cur_lr
            }
            if states != []:
                td_map[train_model.S] = states
                td_map[train_model.M] = masks
            total_loss, policy_loss, value_loss, policy_entropy, _ = sess.run(
                [loss, pg_loss, vf_loss, entropy, _train], td_map)
            return total_loss, policy_loss, value_loss, policy_entropy

        def save(save_path):
            ps = sess.run(params)
            make_path(save_path)
            joblib.dump(ps, save_path)

        def load(load_path):
            loaded_params = joblib.load(load_path)
            restores = []
            for p, loaded_p in zip(params, loaded_params):
                restores.append(p.assign(loaded_p))
            ps = sess.run(restores)

        self.train = train
        self.train_model = train_model
        self.step_model = step_model
        self.step = step_model.step
        self.value = step_model.value
        self.initial_state = step_model.initial_state
        self.save = save
        self.load = load
        tf.global_variables_initializer().run(session=sess)
예제 #19
0
    def mgpu_train(self, *xs):
        gpu_ops = []
        gpu_grads = []
        tvars = None
        # split input data into number of gpus (4 for Fab, 2 for me, 1 on the computer)
        xs = (tf.split(x, self.params.n_gpu, 0) for x in xs)
        for i, xs in enumerate(zip(*xs)):
            do_reuse = True if i > 0 else None
            """
            reuse: variable foo/gpu:X can be shared in a reusing scope, else gives error
            logits: the result from the last layer, loss: the difference between this result and label
            model(): - assign each input to the model and build train graph
                     - clf_logits: [?, 2], clf_loss: [?] 
                        where ?: shape of current batch input;  
                        logits is [,2] because we are classifying btwn two diff input seqns
            for train: these results operation are also used to perform gradient descent and update in gpu (unlike in 
                        mgpu_predict where they are just used to only calc the themselves in the gpu)
            tf.gradients(): apply gradient diff. calc (Jacobian) to the trainable variables
            grads = list(): zips the gradient descent values and the variables to which they are to be applied on
            gpu_ops.append: appends the logit and loss outputs from each gpu if clf
            """
            with tf.device(utils.assign_to_gpu(
                    i, "/gpu:0")), tf.variable_scope(tf.get_variable_scope(),
                                                     reuse=do_reuse):
                clf_logits, lm_logits, clf_losses, lm_losses = self.model(
                    *xs, train=True, reuse=do_reuse)
                if self.params.head_type == "clf":
                    if self.params.lm_coef > 0:  # calculate and apply a joint loss if clf task also includes lm
                        train_loss = tf.reduce_mean(
                            clf_losses
                        ) + self.params.lm_coef * tf.reduce_mean(lm_losses)
                        tf.summary.scalar('Multi-task Clf-Lm Loss average',
                                          train_loss)
                    else:
                        train_loss = tf.reduce_mean(clf_losses)
                        tf.summary.scalar('Clf Loss average', train_loss)
                elif self.params.head_type == "lm":
                    train_loss = tf.reduce_mean(lm_losses)
                    tf.summary.scalar('Lm Loss average', train_loss)
                else:
                    raise ValueError(
                        "{} is not a valid parameter for head_type!".format(
                            self.params.head_type))
                tvars = utils.find_trainable_variables("model")
                grads = tf.gradients(train_loss, tvars)
                grads = list(zip(grads, tvars))
                gpu_grads.append(
                    grads)  # appends the gradient properties from each gpu
                if self.params.head_type == "clf":
                    gpu_ops.append([clf_logits, clf_losses, lm_losses])
                elif self.params.head_type == "lm":
                    gpu_ops.append([
                        lm_losses
                    ])  # appends just the loss outputs from each gpu if lm
                else:
                    raise ValueError(
                        "{} is not a valid parameter for head_type!".format(
                            self.params.head_type))

        ops = [tf.concat(op, 0) for op in zip(*gpu_ops)
               ]  # concatenate the loss result from the different gpus
        # contains [an average of the grads from each gpu, and the corresponding variables]
        grads = utils.average_grads(gpu_grads)
        """
        Gradient operations (only in train, not in predict)
        Accumulate gradient and perform update after a certain treshold. False for rocstories
        The threshold condition is defined in the train-loop section in __main__ in train.py
        
        zero_ops: operation to assign 0s into a non-trainable tf.Variable of shape tvars
        accum_ops: operation to store the average of the grads from each gpu into a non-trainable tf.Variable of shape tvars
        
        else loop: returns only the gradients, not the variables
        """
        if self.params.gradient_accumulation:
            tvars = utils.find_trainable_variables("model")
            accum_tvars = [
                tf.Variable(tf.zeros_like(tv.initialized_value()),
                            trainable=False) for tv in tvars
            ]
            zero_ops = [tv.assign(tf.zeros_like(tv)) for tv in accum_tvars]
            accum_ops = [
                accum_tvars[i].assign_add(grad[0])
                for i, grad in enumerate(grads)
            ]
            grads = accum_tvars
        else:
            zero_ops = None
            accum_ops = None
            grads = [g for g, p in grads]

        # Perform Optimization  (rocstories:- param.opt: adam)
        # partial(LR_SCHEDULES...): i guess for changing the lr decay value over time (Not sure)
        train = OPT_FNS[self.params.opt](
            tvars,
            grads,
            self.params.lr,
            partial(LR_SCHEDULES[self.params.lr_schedule],
                    warmup=self.params.lr_warmup),
            self.params.n_updates_total,
            l2=self.params.l2,
            max_grad_norm=self.params.max_grad_norm,
            vector_l2=self.params.vector_l2,
            b1=self.params.b1,
            b2=self.params.b2,
            e=self.params.e)

        # Tensorboard
        self.merged = tf.summary.merge_all()
        self.writer = tf.summary.FileWriter(self.logdir,
                                            tf.Session().graph)  # sess.graph
        return [train, accum_ops, zero_ops] + ops
예제 #20
0
    def prepare_loss(self):
        self.X_input_train_shape = (None, self.img_height, self.img_width,
                                    self.num_classes * self.num_stack)
        self.X_input_step_shape = (None, self.img_height, self.img_width,
                                   self.num_classes * self.num_stack)

        self.actions = tf.placeholder(tf.int32, [None])  # actions
        self.advantage = tf.placeholder(tf.float32,
                                        [None])  # advantage function
        self.reward = tf.placeholder(tf.float32, [None])  # reward
        self.learning_rate = tf.placeholder(tf.float32, [])  # learning rate
        self.is_training = tf.placeholder(tf.bool)  # is_training

        # The model structure
        self.actor_network = self.policy(self.sess,
                                         self.X_input_step_shape,
                                         self.num_actions,
                                         reuse=False,
                                         is_training=False)

        self.critic_network = self.policy(self.sess,
                                          self.X_input_train_shape,
                                          self.num_actions,
                                          reuse=True,
                                          is_training=self.is_training)

        with tf.variable_scope('train_output'):
            negative_log_prob_action = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=self.critic_network.policy_logits, labels=self.actions)
            self.policy_gradient_loss = tf.reduce_mean(
                self.advantage * negative_log_prob_action)
            self.value_function_loss = tf.reduce_mean(
                mse(tf.squeeze(self.critic_network.value_function),
                    self.reward))
            self.entropy = tf.reduce_mean(
                openai_entropy(self.critic_network.policy_logits))
            self.loss = self.policy_gradient_loss - self.entropy * self.entropy_coeff + self.value_function_loss * self.vf_coeff

            # Gradient Clipping
            params = find_trainable_variables("policy")
            grads = tf.gradients(self.loss, params)
            if self.max_grad_norm is not None:
                grads, grad_norm = tf.clip_by_global_norm(
                    grads, self.max_grad_norm)
            # Apply Gradients
            grads = list(zip(grads, params))
            optimizer = tf.train.RMSPropOptimizer(
                learning_rate=self.learning_rate,
                decay=self.alpha,
                epsilon=self.epsilon)
            self.optimize = optimizer.apply_gradients(grads)

            # monitor training
            summaries = []
            summaries.append(
                tf.summary.scalar('loss/policy_gradient_loss',
                                  self.policy_gradient_loss))
            summaries.append(
                tf.summary.scalar('loss/value_function_loss',
                                  self.value_function_loss))
            summaries.append(tf.summary.scalar('loss/entropy', self.entropy))
            summaries.append(tf.summary.scalar('loss/total_loss', self.loss))
            summaries.append(tf.summary.scalar('train/gradnorm', grad_norm))
            self.summary = tf.summary.merge(summaries)