Пример #1
0
    def _construct_graph(self, n_updates_total, target_dim=None, train=True, pre_trained_weights=None):
        gpu_grads = []
        self.summaries = []

        # store whether or not graph was previously compiled with dropout
        self.train = train
        self.target_dim = target_dim
        self._define_placeholders()


        aggregator = defaultdict(list)
        train_loss_tower = 0
        gpus = get_available_gpus(self.config)
        n_splits = max(len(gpus), 1)
        for i, (X, M, Y) in enumerate(soft_split(self.X, self.M, self.Y, n_splits=n_splits)):
            do_reuse = True if i > 0 else tf.AUTO_REUSE

            if gpus:
                device = tf.device(assign_to_gpu(gpus[i], params_device=gpus[0]))
            else:
                device = tf.device('cpu')

            scope = tf.variable_scope(tf.get_variable_scope(), reuse=do_reuse)

            with device, scope:
                featurizer_state = featurizer(
                    X,
                    config=self.config,
                    encoder=self.encoder,
                    dropout_placeholder=self.do_dropout,
                    train=train,
                    reuse=do_reuse
                )
Пример #2
0
    def _construct_graph(self, n_updates_total, target_dim=None, train=True, pre_trained_weights=None):
        gpu_grads = []
        self.summaries = []

        # store whether or not graph was previously compiled with dropout
        self.train = train
        self._define_placeholders(target_dim=target_dim)

        aggregator = defaultdict(list)
        train_loss_tower = 0
        gpus = self.config.visible_gpus
        n_splits = max(len(gpus), 1)

        # multi-GPU setup, using CPU as param server is most efficient unless system has direct GPU connections
        # single GPU, no need to use a different GPU as a parameter server
        params_device = 'cpu' if len(gpus) != 1 else gpus[0]
        for i, (X, M, Y) in enumerate(soft_split(self.X, self.M, self.Y, n_splits=n_splits)):
            do_reuse = True if i > 0 else tf.AUTO_REUSE

            if gpus:
                device = tf.device(assign_to_gpu(gpus[i], params_device=params_device))
            else:
                device = tf.device('cpu')

            scope = tf.variable_scope(tf.get_variable_scope(), reuse=do_reuse)

            with device, scope:
                featurizer_state = featurizer(
                    X,
                    config=self.config,
                    encoder=self.encoder,
                    dropout_placeholder=self.do_dropout,
                    train=train,
                    reuse=do_reuse
                )
                language_model_state = language_model(
                    X=X,
                    M=M,
                    config=self.config,
                    embed_weights=featurizer_state['embed_weights'],
                    hidden=featurizer_state['sequence_features'],
                    reuse=do_reuse
                )

                lm_loss_coef = self.config.lm_loss_coef
                if target_dim is None:
                    lm_loss_coef = 1.0

                train_loss = lm_loss_coef * tf.reduce_mean(language_model_state['losses'])

                aggregator['features'].append(featurizer_state['features'])
                aggregator['lm_losses'].append(language_model_state['losses'])

                lm_logits = language_model_state["logits"]
                aggregator["lm_model"].append(sample_with_temperature(lm_logits, self.config.lm_temp))

                if target_dim is not None:
                    with tf.variable_scope('model/target'):
                        target_model_state = self._target_model(
                            featurizer_state=featurizer_state,
                            targets=Y,
                            n_outputs=target_dim,
                            train=train,
                            reuse=do_reuse,
                            max_length=self.config.max_length
                        )
                    train_loss += (1 - lm_loss_coef) * tf.reduce_mean(target_model_state['losses'])
                    train_loss_tower += train_loss

                    aggregator['logits'].append(target_model_state['logits'])
                    aggregator['target_losses'].append(target_model_state['losses'])

                params = find_trainable_variables("model")
                grads = tf.gradients(train_loss, params)
                grads = list(zip(grads, params))
                gpu_grads.append(grads)

        with tf.device(params_device):
            self.lm_predict_op = tf.concat(aggregator["lm_model"], 0)
            self.features = tf.concat(aggregator['features'], axis=0)
            self.lm_losses = tf.concat(aggregator['lm_losses'], axis=0)

            if train:
                self._compile_train_op(
                    params=params,
                    grads=gpu_grads,
                    n_updates_total=n_updates_total,
                    initial_params=pre_trained_weights
                )

            if target_dim is not None:
                self.logits = tf.concat(aggregator['logits'], axis=0)
                self.target_losses = concat_or_stack(aggregator['target_losses'])

                self.predict_op = self._predict_op(
                    self.logits, **target_model_state.get("predict_params", {})
                )
                self.predict_proba_op = self._predict_proba_op(
                    self.logits, **target_model_state.get("predict_params", {})
                )
                self.target_loss = tf.reduce_mean(self.target_losses)
                self.lm_loss = tf.reduce_mean(self.lm_losses)
                self.summaries.append(tf.summary.scalar('TargetModelLoss', self.target_loss))
                self.summaries.append(tf.summary.scalar('LanguageModelLoss', self.lm_loss))
                self.summaries.append(tf.summary.scalar('TotalLoss', train_loss_tower / n_splits))
            
            self.summaries = tf.summary.merge(self.summaries) if self.summaries else self.noop
Пример #3
0
    def _construct_graph(self, n_updates_total, target_dim=None, train=True):
        gpu_grads = []
        self.summaries = []

        # store whether or not graph was previously compiled with dropout
        self.train = train
        self._define_placeholders(target_dim=target_dim)

        aggregator = defaultdict(list)
        train_loss_tower = 0
        gpus = self.config.visible_gpus
        n_splits = max(len(gpus), 1)

        # multi-GPU setup, using CPU as param server is most efficient unless system has direct GPU connections
        # single GPU, no need to use a different GPU as a parameter server
        params_device = 'cpu' if len(gpus) != 1 else gpus[0]

        # decide on setting for language model loss coefficient
        # if the language model loss does not contribute to overall loss,
        # remove the language model computation from the graph
        lm_loss_coef = self.config.lm_loss_coef
        if target_dim is None:
            lm_loss_coef = 1.0
        compile_lm = (train and lm_loss_coef > 0) or self.require_lm

        for i, (X, M, Y) in enumerate(soft_split(self.X, self.M, self.Y, n_splits=n_splits)):
            do_reuse = True if i > 0 else tf.AUTO_REUSE

            if gpus:
                device = tf.device(assign_to_gpu(gpus[i], params_device=params_device))
            else:
                device = tf.device('cpu')

            scope = tf.variable_scope(tf.get_variable_scope(), reuse=do_reuse)

            with device, scope:
                featurizer_state = featurizer(
                    X,
                    config=self.config,
                    encoder=self.encoder,
                    dropout_placeholder=self.do_dropout,
                    train=train,
                    reuse=do_reuse
                )

                if compile_lm:
                    language_model_state = language_model(
                        X=X,
                        M=M,
                        config=self.config,
                        embed_weights=featurizer_state['embed_weights'],
                        hidden=featurizer_state['sequence_features'],
                        reuse=do_reuse
                    )

                    train_loss = lm_loss_coef * tf.reduce_mean(language_model_state['losses'])
                    aggregator['lm_losses'].append(language_model_state['losses'])
                    lm_logits = language_model_state["logits"]
                    aggregator["lm_model"].append(sample_with_temperature(lm_logits, self.config.lm_temp))
                else:
                    train_loss = 0

                aggregator['features'].append(featurizer_state['features'])

                if target_dim is not None:
                    with tf.variable_scope('model/target'):
                        target_model_state = self._target_model(
                            featurizer_state=featurizer_state,
                            targets=Y,
                            n_outputs=target_dim,
                            train=train,
                            reuse=do_reuse,
                            max_length=self.config.max_length
                        )
                    train_loss += (1 - lm_loss_coef) * tf.reduce_mean(target_model_state['losses'])
                    train_loss_tower += train_loss

                    aggregator['logits'].append(target_model_state['logits'])
                    aggregator['target_losses'].append(target_model_state['losses'])

                params = find_trainable_variables("model")
                grads = tf.gradients(train_loss, params)
                grads = list(zip(grads, params))
                gpu_grads.append(grads)

        with tf.device(params_device):
            self.features = tf.concat(aggregator['features'], axis=0)

            if compile_lm:
                self.lm_predict_op = tf.concat(aggregator["lm_model"], 0)
                self.lm_losses = tf.concat(aggregator['lm_losses'], axis=0)
                self.lm_loss = tf.reduce_mean(self.lm_losses)
                self.summaries.append(tf.summary.scalar('LanguageModelLoss', self.lm_loss))

            if train:
                self._compile_train_op(
                    params=params,
                    grads=gpu_grads,
                    n_updates_total=n_updates_total
                )

            if target_dim is not None:
                self.logits = tf.concat(aggregator['logits'], axis=0)
                self.target_losses = concat_or_stack(aggregator['target_losses'])

                self.predict_op = self._predict_op(
                    self.logits, **target_model_state.get("predict_params", {})
                )
                self.predict_proba_op = self._predict_proba_op(
                    self.logits, **target_model_state.get("predict_params", {})
                )
                self.target_loss = tf.reduce_mean(self.target_losses)

                self.summaries.append(tf.summary.scalar('TargetModelLoss', self.target_loss))
                self.summaries.append(tf.summary.scalar('TotalLoss', train_loss_tower / n_splits))

            self.summaries = tf.summary.merge(self.summaries) if self.summaries else self.noop
Пример #4
0
    def _construct_graph(self, n_updates_total, target_dim=None, train=True):
        gpu_grads = []
        self.summaries = []

        # store whether or not graph was previously compiled with dropout
        self.train = train
        self.target_dim = target_dim
        self._define_placeholders()

        aggregator = defaultdict(list)
        train_loss_tower = 0
        gpus = get_available_gpus(self.config)
        n_splits = max(len(gpus), 1)
        for i, (X, M, Y) in enumerate(
                soft_split(self.X, self.M, self.Y, n_splits=n_splits)):
            do_reuse = True if i > 0 else tf.AUTO_REUSE

            if gpus:
                device = tf.device(
                    assign_to_gpu(gpus[i], params_device=gpus[0]))
            else:
                device = tf.device('cpu')

            scope = tf.variable_scope(tf.get_variable_scope(), reuse=do_reuse)

            with device, scope:
                featurizer_state = featurizer(
                    X,
                    config=self.config,
                    encoder=self.encoder,
                    dropout_placeholder=self.do_dropout,
                    train=train,
                    reuse=do_reuse)
                language_model_state = language_model(
                    X=X,
                    M=M,
                    config=self.config,
                    embed_weights=featurizer_state['embed_weights'],
                    hidden=featurizer_state['sequence_features'],
                    reuse=do_reuse)

                lm_loss_coef = self.config.lm_loss_coef
                if target_dim is None:
                    lm_loss_coef = 1.0

                train_loss = lm_loss_coef * tf.reduce_mean(
                    language_model_state['losses'])

                aggregator['features'].append(featurizer_state['features'])
                aggregator['lm_losses'].append(language_model_state['losses'])

                lm_logits = language_model_state["logits"]
                aggregator["lm_model"].append(
                    sample_with_temperature(lm_logits, self.config.lm_temp))

                if target_dim is not None:
                    target_model_state = self._target_model(
                        featurizer_state=featurizer_state,
                        targets=Y,
                        n_outputs=target_dim,
                        train=train,
                        reuse=do_reuse,
                        max_length=self.config.max_length)
                    train_loss += (1 - lm_loss_coef) * tf.reduce_mean(
                        target_model_state['losses'])
                    train_loss_tower += train_loss

                    params = find_trainable_variables("model")
                    grads = tf.gradients(train_loss, params)
                    grads = list(zip(grads, params))
                    gpu_grads.append(grads)
                    aggregator['logits'].append(target_model_state['logits'])
                    aggregator['clf_losses'].append(
                        target_model_state['losses'])

        self.lm_predict_op = tf.concat(aggregator["lm_model"], 0)
        self.features = tf.concat(aggregator['features'], axis=0)
        self.lm_losses = tf.concat(aggregator['lm_losses'], axis=0)

        if target_dim is not None:
            self.logits = tf.concat(aggregator['logits'], axis=0)
            self.clf_losses = concat_or_stack(aggregator['clf_losses'])

            self.predict_op, self.predict_proba_op = self._predict_ops(
                self.logits, **target_model_state.get("predict_params", {}))
            self._compile_train_op(params=params,
                                   grads=gpu_grads,
                                   n_updates_total=n_updates_total)
            self.clf_loss = tf.reduce_mean(self.clf_losses)
            self.lm_loss = tf.reduce_mean(self.lm_losses)
            self.summaries.append(
                tf.summary.scalar('TargetModelLoss', self.clf_loss))
            self.summaries.append(
                tf.summary.scalar('LanguageModelLoss', self.lm_loss))
            self.summaries.append(
                tf.summary.scalar('TotalLoss', train_loss_tower / n_splits))
            self.summaries = tf.summary.merge(self.summaries)