Exemplo n.º 1
0
    def _construct_graph(self, n_updates_total, target_dim=None, train=True, pre_trained_weights=None):
        gpu_grads = []
        self.summaries = []

        # store whether or not graph was previously compiled with dropout
        self.train = train
        self.target_dim = target_dim
        self._define_placeholders()


        aggregator = defaultdict(list)
        train_loss_tower = 0
        gpus = get_available_gpus(self.config)
        n_splits = max(len(gpus), 1)
        for i, (X, M, Y) in enumerate(soft_split(self.X, self.M, self.Y, n_splits=n_splits)):
            do_reuse = True if i > 0 else tf.AUTO_REUSE

            if gpus:
                device = tf.device(assign_to_gpu(gpus[i], params_device=gpus[0]))
            else:
                device = tf.device('cpu')

            scope = tf.variable_scope(tf.get_variable_scope(), reuse=do_reuse)

            with device, scope:
                featurizer_state = featurizer(
                    X,
                    config=self.config,
                    encoder=self.encoder,
                    dropout_placeholder=self.do_dropout,
                    train=train,
                    reuse=do_reuse
                )
Exemplo n.º 2
0
 def _infer_prep(self, *X, max_length=None):
     max_length = max_length or self.config.max_length
     infer_x, infer_mask = self._text_to_ids(*X, max_length=max_length)
     n_batch_train = self.config.batch_size * max(
         len(get_available_gpus(self.config)), 1)
     self._build_model(n_updates_total=0,
                       target_dim=self.target_dim,
                       train=False)
     yield from iter_data(infer_x,
                          infer_mask,
                          n_batch=n_batch_train,
                          verbose=self.config.verbose)
Exemplo n.º 3
0
 def _initialize_session(self):
     gpus = get_available_gpus(self.config)
     os.environ['CUDA_VISIBLE_DEVICES'] = ",".join([str(gpu) for gpu in gpus])
     conf = tf.ConfigProto(allow_soft_placement=True)
     self.sess = tf.Session(config=conf)
Exemplo n.º 4
0
    def _training_loop(self, arr_encoded, Y, batch_size=None):
        batch_size = batch_size or self.config.batch_size
        self.label_encoder = self._get_target_encoder()
        n_batch_train = batch_size * max(len(get_available_gpus(self.config)), 1)
        train_x, train_mask = arr_encoded.token_ids, arr_encoded.mask
        n_examples = train_x.shape[0]
        n_updates_total = (n_examples // n_batch_train) * self.config.n_epochs

        if Y is not None:
            Y = self.label_encoder.fit_transform(Y)
            target_dim = len(self.label_encoder.target_dim)
        else:
            # only language model will be trained, mock fake target
            Y = [[None]] * n_examples
            target_dim = None

        self._build_model(n_updates_total=n_updates_total, target_dim=target_dim)

        dataset = (train_x, train_mask, Y)

        x_tr, x_va, m_tr, m_va, y_tr, y_va = train_test_split(*dataset, test_size=self.config.val_size,
                                                              random_state=self.config.seed)
        dataset = (x_tr, m_tr, y_tr)
        val_dataset = (x_va, m_va, y_va)

        self.is_trained = True
        avg_train_loss = 0
        avg_val_loss = 0
        global_step = 0
        best_val_loss = float("inf")
        val_window = [float("inf")] * self.config.val_window_size
        for i in range(self.config.n_epochs):
            for xmb, mmb, ymb in iter_data(*dataset, n_batch=n_batch_train, verbose=self.config.verbose):
                global_step += 1
                if global_step % self.config.val_interval == 0:
                    tqdm.tqdm.write("Train loss is :{}, Val loss is :{}".format(avg_train_loss, avg_val_loss))

                    outputs = self._eval(
                        self.summaries,
                        feed_dict={
                            self.X: xmb,
                            self.M: mmb,
                            self.Y: ymb,
                            self.do_dropout: DROPOUT_OFF
                        }
                    )

                    if self.train_writer is not None:
                        self.train_writer.add_summary(outputs.get(self.summaries), global_step)

                    sum_val_loss = 0
                    for xval, mval, yval in iter_data(*val_dataset, n_batch=n_batch_train, verbose=self.config.verbose, tqdm_desc="Validation"):
                        outputs = self._eval(
                            self.clf_loss,
                            self.summaries,
                            feed_dict={
                                self.X: xval,
                                self.M: mval,
                                self.Y: yval,
                                self.do_dropout: DROPOUT_OFF
                            }
                        )

                        if self.valid_writer is not None:
                            self.valid_writer.add_summary(outputs.get(self.summaries), global_step)
                        val_cost = outputs.get(self.clf_loss, 0)
                        sum_val_loss += val_cost
                        avg_val_loss = (
                                avg_val_loss * self.config.rolling_avg_decay
                                + val_cost * (1 - self.config.rolling_avg_decay)
                        )
                    val_window.append(sum_val_loss)
                    val_window.pop(0)

                    if np.mean(val_window) <= best_val_loss:
                        best_val_loss = np.mean(val_window)
                        if self.config.save_best_model:
                            self.save(self.config.autosave_path)

                outputs = self._eval(
                    self.clf_loss, 
                    self.train_op,
                    feed_dict={
                      self.X: xmb,
                      self.M: mmb,
                      self.Y: ymb,
                      self.do_dropout: DROPOUT_ON
                    }
                )
                  
                cost = outputs.get(self.clf_loss, 0)
                avg_train_loss = avg_train_loss * self.config.rolling_avg_decay + cost * (
                        1 - self.config.rolling_avg_decay)

        return self
Exemplo n.º 5
0
    def _construct_graph(self, n_updates_total, target_dim=None, train=True):
        gpu_grads = []
        self.summaries = []

        # store whether or not graph was previously compiled with dropout
        self.train = train
        self.target_dim = target_dim
        self._define_placeholders()

        aggregator = defaultdict(list)
        train_loss_tower = 0
        gpus = get_available_gpus(self.config)
        n_splits = max(len(gpus), 1)
        for i, (X, M, Y) in enumerate(
                soft_split(self.X, self.M, self.Y, n_splits=n_splits)):
            do_reuse = True if i > 0 else tf.AUTO_REUSE

            if gpus:
                device = tf.device(
                    assign_to_gpu(gpus[i], params_device=gpus[0]))
            else:
                device = tf.device('cpu')

            scope = tf.variable_scope(tf.get_variable_scope(), reuse=do_reuse)

            with device, scope:
                featurizer_state = featurizer(
                    X,
                    config=self.config,
                    encoder=self.encoder,
                    dropout_placeholder=self.do_dropout,
                    train=train,
                    reuse=do_reuse)
                language_model_state = language_model(
                    X=X,
                    M=M,
                    config=self.config,
                    embed_weights=featurizer_state['embed_weights'],
                    hidden=featurizer_state['sequence_features'],
                    reuse=do_reuse)

                lm_loss_coef = self.config.lm_loss_coef
                if target_dim is None:
                    lm_loss_coef = 1.0

                train_loss = lm_loss_coef * tf.reduce_mean(
                    language_model_state['losses'])

                aggregator['features'].append(featurizer_state['features'])
                aggregator['lm_losses'].append(language_model_state['losses'])

                lm_logits = language_model_state["logits"]
                aggregator["lm_model"].append(
                    sample_with_temperature(lm_logits, self.config.lm_temp))

                if target_dim is not None:
                    target_model_state = self._target_model(
                        featurizer_state=featurizer_state,
                        targets=Y,
                        n_outputs=target_dim,
                        train=train,
                        reuse=do_reuse,
                        max_length=self.config.max_length)
                    train_loss += (1 - lm_loss_coef) * tf.reduce_mean(
                        target_model_state['losses'])
                    train_loss_tower += train_loss

                    params = find_trainable_variables("model")
                    grads = tf.gradients(train_loss, params)
                    grads = list(zip(grads, params))
                    gpu_grads.append(grads)
                    aggregator['logits'].append(target_model_state['logits'])
                    aggregator['clf_losses'].append(
                        target_model_state['losses'])

        self.lm_predict_op = tf.concat(aggregator["lm_model"], 0)
        self.features = tf.concat(aggregator['features'], axis=0)
        self.lm_losses = tf.concat(aggregator['lm_losses'], axis=0)

        if target_dim is not None:
            self.logits = tf.concat(aggregator['logits'], axis=0)
            self.clf_losses = concat_or_stack(aggregator['clf_losses'])

            self.predict_op, self.predict_proba_op = self._predict_ops(
                self.logits, **target_model_state.get("predict_params", {}))
            self._compile_train_op(params=params,
                                   grads=gpu_grads,
                                   n_updates_total=n_updates_total)
            self.clf_loss = tf.reduce_mean(self.clf_losses)
            self.lm_loss = tf.reduce_mean(self.lm_losses)
            self.summaries.append(
                tf.summary.scalar('TargetModelLoss', self.clf_loss))
            self.summaries.append(
                tf.summary.scalar('LanguageModelLoss', self.lm_loss))
            self.summaries.append(
                tf.summary.scalar('TotalLoss', train_loss_tower / n_splits))
            self.summaries = tf.summary.merge(self.summaries)