def __init__(self, trainpath, testpath, log_path='data', n_layers=150):
        self.train_path = trainpath
        self.test_path = testpath
        self.n_layers = n_layers
        self.data_folder = log_path
        self.callbacks_list = []
        # Define the basic TensorBoard callback.
        logdir = os.path.join(
            config.LOG_PATH,
            "logs/image/" + datetime.now().strftime("%Y%m%d-%H%M%S"))

        file_writer_cm = create_file_writer(logdir + '/cm')
        tensorboard_callback = TensorBoard(log_dir=logdir)

        checkpoint_path = config.BASE_MODEL + "_training/cp.ckpt"
        cp_callback = ModelCheckpoint(filepath=os.path.join(
            config.MODEL_PATH, checkpoint_path),
                                      save_weights_only=True,
                                      monitor='val_loss',
                                      verbose=1,
                                      save_best_only=True)

        early_stopper = EarlyStopping(monitor='val_loss',
                                      min_delta=0.0001,
                                      patience=6,
                                      verbose=1,
                                      mode='auto')

        self.callbacks_list = [cp_callback, tensorboard_callback]
        self.transform()
        self.fit()
Пример #2
0
 def host_call_fn(**kwargs):
   writer = contrib_summary.create_file_writer(summary_dir, max_queue=1000)
   always_record = contrib_summary.always_record_summaries()
   with writer.as_default(), always_record:
     for name, scalar in kwargs.items():
       contrib_summary.scalar(name, tf.reduce_mean(input_tensor=scalar))
     return contrib_summary.all_summary_ops()
Пример #3
0
def main(env_name="CartPole-v0",
         n_steps=1000000,
         random_actions=False,
         verbose=False,
         visualise=False,
         reward_style=None,
         testing=False):
    current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    test = "-test" if testing else ""
    log_dir = 'logs/dqn/' + current_time + test
    summary_writer = tf_summary.create_file_writer(log_dir)

    env_ = gym.make(env_name)
    e = Epsilon(0.1, 0.9, 0.99)
    agent = Q_Learn(env_,
                    e,
                    obs_precision=1,
                    tf_writer=summary_writer,
                    random_actions=random_actions,
                    verbose=verbose,
                    visualise=visualise,
                    reward_style=reward_style)
    for i in range(n_steps):
        agent.step()

    env_.env.close()
Пример #4
0
    def __init__(self,
                 base_dir,
                 create_agent_fn,
                 create_environment_fn=unity_lib.create_otc_environment,
                 checkpoint_file_prefix='ckpt',
                 logging_file_prefix='log',
                 log_every_n=1,
                 num_iterations=200,
                 training_steps=2500,
                 evaluation_steps=1250,
                 max_steps_per_episode=2700):
        """Initialize the Runner object in charge of running a full experiment.

    Args:
      base_dir: str, the base directory to host all required sub-directories.
      create_agent_fn: A function that takes as args an
        environment, and returns an agent.
      create_environment_fn: A function which receives a problem name and
        creates a Gym environment for that problem (e.g. an Atari 2600 game).
      checkpoint_file_prefix: str, the prefix to use for checkpoint files.
      logging_file_prefix: str, prefix to use for the log files.
      log_every_n: int, the frequency for writing logs.
      num_iterations: int, the iteration number threshold (must be greater than
        start_iteration).
      training_steps: int, the number of training steps to perform.
      evaluation_steps: int, the number of evaluation steps to perform.
      max_steps_per_episode: int, maximum number of steps after which an episode
        terminates.

    This constructor will take the following actions:
    - Initialize an environment.
    - Initialize a logger.
    - Initialize an agent.
    - Reload from the latest checkpoint, if available, and initialize the
      Checkpointer object.
    """
        assert base_dir is not None
        self._logging_file_prefix = logging_file_prefix
        self._log_every_n = log_every_n
        self._num_iterations = num_iterations
        self._training_steps = training_steps
        self._evaluation_steps = evaluation_steps
        self._max_steps_per_episode = max_steps_per_episode
        self._base_dir = base_dir
        self._create_directories()
        self._summary_writer = tf_summary.create_file_writer(self._base_dir)
        self._summary_writer.as_default()
        self.experiment_data = {}

        self._environment = create_environment_fn()
        config = tf.compat.v1.ConfigProto(allow_soft_placement=True)
        # Allocate only subset of the GPU memory as needed which allows for running
        # multiple agents/workers on the same GPU.
        # config.gpu_options.allow_growth = True
        self._agent = create_agent_fn(self._environment,
                                      self._base_dir,
                                      summary_writer=self._summary_writer)
        # self._summary_writer.add_graph(graph=tf.get_default_graph())

        self._initialize_checkpointer_and_maybe_resume(checkpoint_file_prefix)
Пример #5
0
 def _get_SummaryWriter(self):
     if not self.args.debug and not self.args.do_test:
         ensure_dir(os.path.join('./summary/', self.experiment_name))
         self.summarywriter = summary.create_file_writer(
             logdir='./summary/{}/{}/train'.format(
                 self.experiment_name,
                 time.strftime("%m%d-%H-%M-%S", time.localtime(
                     time.time()))))
Пример #6
0
def prepare_log(name):
    log_dir = f"./logs/{name}"
    os.makedirs(log_dir, exist_ok=True)
    tensorboard = TensorBoard(log_dir=log_dir)
    tensorboard.set_model(discriminator)

    writer = create_file_writer(log_dir)

    return (log_dir, writer)
Пример #7
0
 def __init__(self, logdir="./tensorboard_logs/", run_id=None):
     """
     :param logdir: dir where TensorBoard events will be written
     :param run_id: name for log id, otherwise it usses datetime
     """
     from tensorflow import summary
     self.summary = summary
     run_id = datetime.now().isoformat()[:-7].replace("T", " ").replace(
         ":", "_") if run_id is None else run_id
     self._path = path.join(logdir, run_id)
     self.writer = summary.create_file_writer(self._path)
Пример #8
0
def log_test_results(cfg, model, test_generator, test_metrics, log_dir):
    '''
    Visualize performance of a trained model on the test set. Optionally save the model.
    :param cfg: Project config
    :param model: A trained Keras model
    :param test_generator: A Keras generator for the test set
    :param test_metrics: Dict of test set performance metrics
    :param log_dir: Path to write TensorBoard logs
    '''

    # Visualization of test results
    test_predictions = model.predict(test_generator, verbose=0)
    test_labels = test_generator.labels
    plt = plot_roc(test_labels,
                   test_predictions,
                   list(test_generator.class_indices.keys()),
                   dir_path=cfg['PATHS']['IMAGES'])
    roc_img = plot_to_tensor()
    plt = plot_confusion_matrix(test_labels,
                                test_predictions,
                                list(test_generator.class_indices.keys()),
                                dir_path=cfg['PATHS']['IMAGES'])
    cm_img = plot_to_tensor()

    # Log test set results and plots in TensorBoard
    writer = tf_summary.create_file_writer(logdir=log_dir)

    # Create table of test set metrics
    test_summary_str = [['**Metric**', '**Value**']]
    for metric in test_metrics:
        metric_values = test_metrics[metric]
        test_summary_str.append([metric, str(metric_values)])

    # Create table of model and train hyperparameters used in this experiment
    hparam_summary_str = [['**Variable**', '**Value**']]
    for key in cfg['TRAIN']:
        hparam_summary_str.append([key, str(cfg['TRAIN'][key])])
    for key in cfg['NN'][cfg['TRAIN']['MODEL_DEF'].upper()]:
        hparam_summary_str.append(
            [key, str(cfg['NN'][cfg['TRAIN']['MODEL_DEF'].upper()][key])])

    # Write to TensorBoard logs
    with writer.as_default():
        tf_summary.text(name='Test set metrics',
                        data=tf.convert_to_tensor(test_summary_str),
                        step=0)
        tf_summary.text(name='Run hyperparameters',
                        data=tf.convert_to_tensor(hparam_summary_str),
                        step=0)
        tf_summary.image(name='ROC Curve (Test Set)', data=roc_img, step=0)
        tf_summary.image(name='Confusion Matrix (Test Set)',
                         data=cm_img,
                         step=0)
    return
def test_filewriter():

    train_log_dir = 'test_log_dir'
    train_summary_writer = summary.create_file_writer(train_log_dir)

    name = "loss" + str(random.randint(1, 10))

    with train_summary_writer.as_default():
        #name, tensor, collections = None, family = None):
        for i in range(10):
            loss = F.l1_loss(torch.rand(1), torch.rand(1))
            tf.summary.scalar(name, loss.item(), step=i)
Пример #10
0
    def __init__(self, model, train_log_dir, test_log_dir, manager):
        self._model = model

        self._loss_fn = tf.nn.sparse_softmax_cross_entropy_with_logits
        self._manager = manager

        self._train_loss = Mean(name='train_loss')
        self._test_loss = Mean(name='test_loss')

        self._train_acc = SparseCategoricalAccuracy(name='train_acc')
        self._test_acc = SparseCategoricalAccuracy(name='test_acc')

        self._train_loss.reset_states()
        self._test_loss.reset_states()

        self._train_acc.reset_states()
        self._test_acc.reset_states()

        os.makedirs(train_log_dir, exist_ok=True)
        os.makedirs(test_log_dir, exist_ok=True)

        self._train_summary_writer = create_file_writer(train_log_dir)
        self._test_summary_writer = create_file_writer(test_log_dir)
Пример #11
0
def log_test_results(cfg, model, test_generator, test_metrics, log_dir):
    '''
    Visualize performance of a trained model on the test set. Optionally save the model.
    :param cfg: Project config
    :param model: A trained Keras model
    :param test_generator: A Keras generator for the test set
    :param test_metrics: Dict of test set performance metrics
    :param log_dir: Path to write TensorBoard logs
    '''

    # Visualization of test results
    test_predictions = model.predict_generator(test_generator, verbose=0)
    test_labels = test_generator.labels
    covid_idx = test_generator.class_indices['COVID-19']
    plt = plot_roc("Test set", test_labels, test_predictions, class_id=covid_idx)
    roc_img = plot_to_tensor()
    plt = plot_confusion_matrix(test_labels, test_predictions, class_id=covid_idx)
    cm_img = plot_to_tensor()

    # Log test set results and plots in TensorBoard
    writer = tf_summary.create_file_writer(logdir=log_dir)

    # Create table of test set metrics
    test_summary_str = [['**Metric**','**Value**']]
    thresholds = cfg['TRAIN']['THRESHOLDS']  # Load classification thresholds
    for metric in test_metrics:
        if metric in ['precision', 'recall'] and isinstance(metric, list):
            metric_values = dict(zip(thresholds, test_metrics[metric]))
        else:
            metric_values = test_metrics[metric]
        test_summary_str.append([metric, str(metric_values)])

    # Create table of model and train config values
    hparam_summary_str = [['**Variable**', '**Value**']]
    for key in cfg['TRAIN']:
        hparam_summary_str.append([key, str(cfg['TRAIN'][key])])
    if cfg['TRAIN']['CLASS_MODE'] == 'binary':
        for key in cfg['NN']['DCNN_BINARY']:
            hparam_summary_str.append([key, str(cfg['NN']['DCNN_BINARY'][key])])
    else:
        for key in cfg['NN']['DCNN_BINARY']:
            hparam_summary_str.append([key, str(cfg['NN']['DCNN_BINARY'][key])])

    # Write to TensorBoard logs
    with writer.as_default():
        tf_summary.text(name='Test set metrics', data=tf.convert_to_tensor(test_summary_str), step=0)
        tf_summary.text(name='Run hyperparameters', data=tf.convert_to_tensor(hparam_summary_str), step=0)
        tf_summary.image(name='ROC Curve (Test Set)', data=roc_img, step=0)
        tf_summary.image(name='Confusion Matrix (Test Set)', data=cm_img, step=0)
    return
Пример #12
0
 def __init__(self, run_name, save_every, base_dir="experiments"):
     super().__init__()
     self.save_counter = 0
     self.least_loss = -1
     os.makedirs(base_dir, exist_ok=True)
     self.base_dir = os.path.join(base_dir, run_name)
     os.makedirs(self.base_dir, exist_ok=True)
     os.makedirs(os.path.join(self.base_dir, "models"), exist_ok=True)
     os.makedirs(os.path.join(self.base_dir, "logs"), exist_ok=True)
     self.summary_writer = create_file_writer(
         os.path.join(self.base_dir, "logs"))
     self.summary_writer.set_as_default()
     self.iters_since_last_model_save = save_every + 1
     self.save_every = save_every
Пример #13
0
    def start_model_manager_training(self, epoch_start=0, *, logdir, hparam, other_loggers=[], **runtime_options):

        assert hasattr(self.model_manager,"save_hparams")
        self.model_manager.save_hparams(hparam=hparam, logdir=logdir)

        with summary.create_file_writer(logdir).as_default():
            hp.hparams(hparam)
            # TODO:
            # For now the call to the training function of LoggingExperimentManager is quite superfluous.
            # However the idea is to have LoggingExperimentManager specify one of the user-facing API with detailed
            # specifications on requirements.
            return super(TBExperimentManager, self).\
                start_model_manager_training(logdir=logdir,
                                             hparam=hparam,
                                             epoch_start=epoch_start,
                                             logger_functions=[summary.scalar]+other_loggers,
                                             **runtime_options)
Пример #14
0
    def prepare_dirs(self):
        self.log_dir = f"./logs/{self.name}"
        if not os.path.exists(self.log_dir):
            os.makedirs(self.log_dir)
            tensorboard = TensorBoard(log_dir=self.log_dir)
            tensorboard.set_model(self.discriminator)

        self.writer = create_file_writer(self.log_dir)

        checkpoint_dir = f'.\\checkpoints\\{self.name}'
        self.checkpoint_prefix = os.path.join(checkpoint_dir, 'ckpt')
        self.checkpoint = tf.train.Checkpoint(generator=self.generator,
                                              discriminator=self.discriminator)
        manager = tf.train.CheckpointManager(self.checkpoint, checkpoint_dir,
                                             max_to_keep=5)
        if manager.latest_checkpoint:
            self.checkpoint.restore(manager.latest_checkpoint)
            print(f"Restored from {manager.latest_checkpoint}")
Пример #15
0
    def set_callbacks(self, checkpoints=True, tensorboard=True):
        """ Set any model callbacks here """

        if checkpoints:
            if not os.path.exists('checkpoints'):
                os.mkdir('checkpoints')

            checkpoint = ModelCheckpoint(filepath='checkpoints/' +
                                         self.filename(),
                                         monitor='val_accuracy',
                                         verbose=1,
                                         save_best_only=True,
                                         mode='max')
            self.callbacks.append(checkpoint)

        if tensorboard:
            log_dir = os.path.join(self.model_log_dir, self.filename()[:-3])
            self.file_writer = create_file_writer(log_dir + '/metrics')
            self.file_writer.set_as_default()
            tensorboard_callback = TensorBoard(
                log_dir=log_dir,
                write_graph=True,
                write_images=True,
                histogram_freq=0,
                profile_batch=0,
            )
            self.callbacks.append(tensorboard_callback)

        lr_schedule = None
        config = self.lr_schedule_config
        if config:
            if config.get('lr_schedule') == 'polynomial':
                lr_schedule = PolynomialDecay(maxEpochs=self.epochs,
                                              initAlpha=self.lr,
                                              power=config.get('lr_power'))
            elif config.get('lr_schedule') == 'linear':
                lr_schedule = PolynomialDecay(maxEpochs=self.epochs,
                                              initAlpha=self.lr,
                                              power=1)

        if lr_schedule:
            lr_callback = LearningRateScheduler(lr_schedule)
            self.callbacks.append(lr_callback)
Пример #16
0
def _write_aggregate_summaries(model_dir, global_step, eval_tag,
                               aggregates_dict):
    """Writes text metrics as summaries."""

    eval_dir = os.path.join(model_dir, eval_tag)
    summary_writer = contrib_summary.create_file_writer(eval_dir)
    with summary_writer.as_default(), \
        contrib_summary.always_record_summaries():
        for k, v in sorted(aggregates_dict[_ROUGE_METRIC].items()):
            contrib_summary.scalar("text_eval/%s-R" % k,
                                   v.mid.recall,
                                   step=global_step)
            contrib_summary.scalar("text_eval/%s-P" % k,
                                   v.mid.precision,
                                   step=global_step)
            contrib_summary.scalar("text_eval/%s-F" % k,
                                   v.mid.fmeasure,
                                   step=global_step)
        for k, v in sorted(aggregates_dict[_BLEU_METRIC].items()):
            contrib_summary.scalar("text_eval/%s" % k,
                                   v.mid.bleu,
                                   step=global_step)
        for k, v in sorted(aggregates_dict[_REPETITION_METRIC].items()):
            contrib_summary.scalar("text_eval/%s-T" % k,
                                   v.mid.target_ratio,
                                   step=global_step)
            contrib_summary.scalar("text_eval/%s-P" % k,
                                   v.mid.prediction_ratio,
                                   step=global_step)
        for k, v in sorted(aggregates_dict[_LENGTH_METRIC].items()):
            contrib_summary.scalar("text_eval/%s-T" % k,
                                   v.mid.target_length,
                                   step=global_step)
            contrib_summary.scalar("text_eval/%s-P" % k,
                                   v.mid.prediction_length,
                                   step=global_step)
            contrib_summary.scalar("text_eval/%s-R" % k,
                                   v.mid.relative_length,
                                   step=global_step)
Пример #17
0
def boss(env, nb_AP, nb_Users, action_queues, matrix_queues, logger_folder,
         max_epsiode_steps):
    step = 0
    writer = summary.create_file_writer('logs/' + logger_folder + '/boss')
    writer.set_as_default()
    summary.experimental.set_step(step)

    while True:
        step += max_epsiode_steps
        W = np.zeros((nb_AP, nb_Users)).astype('float32')
        for i in range(nb_AP):
            W[i:] = action_queues[i].get()

        W = W / np.linalg.norm(W, axis=1).reshape(W.shape[0], 1)
        for q in matrix_queues:
            q.put(W)

        env.set_W(W)
        r = np.sum(np.log2(1 + env.sinr()))
        summary.scalar(name='Episode/Reward', data=r, step=step)
        print(
            "********* \nReward {0:5.6f} Step {1: 6} norm {2: 4.5f}\n************"
            .format(np.sum(np.log2(1 + env.sinr())), step, np.linalg.norm(W)))
Пример #18
0
 def _get_active_writer(self):
     if self.mode not in self._writers:
         self._writers[self.mode] = create_file_writer(
             os.path.join(self._log_dir, self.mode.value))
     return self._writers[self.mode]
Пример #19
0
    def eval_metrics_host_call_fn(policy_output,
                                  value_output,
                                  pi_tensor,
                                  value_tensor,
                                  policy_cost,
                                  value_cost,
                                  l2_cost,
                                  combined_cost,
                                  step,
                                  est_mode=tf.estimator.ModeKeys.TRAIN):
        policy_entropy = -tf.reduce_mean(
            tf.reduce_sum(policy_output * tf.compat.v1.log(policy_output),
                          axis=1))
        # pi_tensor is one_hot when generated from sgfs (for supervised learning)
        # and soft-max when using self-play records. argmax normalizes the two.
        policy_target_top_1 = tf.argmax(pi_tensor, axis=1)

        policy_output_in_top1 = tf.compat.v1.to_float(
            tf.compat.v1.nn.in_top_k(policy_output, policy_target_top_1, k=1))
        policy_output_in_top3 = tf.compat.v1.to_float(
            tf.compat.v1.nn.in_top_k(policy_output, policy_target_top_1, k=3))

        policy_top_1_confidence = tf.reduce_max(policy_output, axis=1)
        policy_target_top_1_confidence = tf.boolean_mask(
            policy_output,
            tf.one_hot(policy_target_top_1,
                       tf.shape(policy_output)[1]))

        value_cost_normalized = value_cost / params['value_cost_weight']
        avg_value_observed = tf.reduce_mean(value_tensor)

        with tf.compat.v1.variable_scope('metrics'):
            metric_ops = {
                'policy_cost':
                tf.compat.v1.metrics.mean(policy_cost),
                'value_cost':
                tf.compat.v1.metrics.mean(value_cost),
                'value_cost_normalized':
                tf.compat.v1.metrics.mean(value_cost_normalized),
                'l2_cost':
                tf.compat.v1.metrics.mean(l2_cost),
                'policy_entropy':
                tf.compat.v1.metrics.mean(policy_entropy),
                'combined_cost':
                tf.compat.v1.metrics.mean(combined_cost),
                'avg_value_observed':
                tf.compat.v1.metrics.mean(avg_value_observed),
                'policy_accuracy_top_1':
                tf.compat.v1.metrics.mean(policy_output_in_top1),
                'policy_accuracy_top_3':
                tf.compat.v1.metrics.mean(policy_output_in_top3),
                'policy_top_1_confidence':
                tf.compat.v1.metrics.mean(policy_top_1_confidence),
                'policy_target_top_1_confidence':
                tf.compat.v1.metrics.mean(policy_target_top_1_confidence),
                'value_confidence':
                tf.compat.v1.metrics.mean(tf.abs(value_output)),
            }

        if est_mode == tf.estimator.ModeKeys.EVAL:
            return metric_ops

        # NOTE: global_step is rounded to a multiple of FLAGS.summary_steps.
        eval_step = tf.reduce_min(step)

        # Create summary ops so that they show up in SUMMARIES collection
        # That way, they get logged automatically during training
        summary_writer = contrib_summary.create_file_writer(FLAGS.work_dir)
        #with summary_writer.as_default(), \
        #        contrib_summary.record_summaries_every_n_global_steps(
        #            params['summary_steps'], eval_step):
        #    for metric_name, metric_op in metric_ops.items():
        #        contrib_summary.scalar(
        #            metric_name, metric_op[1], step=eval_step)

        # Reset metrics occasionally so that they are mean of recent batches.
        reset_op = tf.compat.v1.variables_initializer(
            tf.compat.v1.local_variables('metrics'))
        cond_reset_op = tf.cond(
            tf.equal(eval_step % params['summary_steps'],
                     tf.compat.v1.to_int64(1)), lambda: reset_op,
            lambda: tf.no_op())

        #return contrib_summary.all_summary_ops() + [cond_reset_op]
        return [cond_reset_op]
 def __init__(self, **kwargs):
     super().__init__(**kwargs)
     self.step = 1
     self.writer = summary.create_file_writer(self.log_dir)
Пример #21
0
    def train(self,
              lr: float = 1e-5,
              num_epochs: int = 3,
              eval_every: int = None,
              best_valid_loss=float("Inf")):
        """

        :param lr:
        :param num_epochs:
        :param eval_every:
        :param best_valid_loss:
        :return:
        """
        self.optimizer = optim.Adam(self.model.parameters(), lr=lr)
        # instantiate tensorboard writer
        pathstr = str(Path(self.TRAIN_LOG_DIR /
                           f"lr={lr}-epochs={num_epochs}"))
        self.writer = summary.create_file_writer(pathstr)

        # initialize running values
        if eval_every is None:
            eval_every = len(self.train_iter) // 2
        running_loss = 0.0
        valid_running_loss = 0.0
        global_step = 0
        train_loss_list = []
        valid_loss_list = []
        global_steps_list = []

        # training loop
        self.model.train()
        for epoch in range(num_epochs):
            for (label, text), _ in self.train_iter:
                label = label.type(torch.LongTensor)
                label = label.to(self.device)
                text = text.type(torch.LongTensor)
                text = text.to(self.device)
                output = self.model(text, label)
                loss, _ = output

                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()

                # update running values
                running_loss += loss.item()
                global_step += 1

                # evaluation step
                if global_step % eval_every == 0:
                    self.model.eval()
                    with torch.no_grad():

                        # validation loop
                        for (label, text), _ in self.valid_iter:
                            label = label.type(torch.LongTensor)
                            label = label.to(self.device)
                            text = text.type(torch.LongTensor)
                            text = text.to(self.device)
                            output = self.model(text, label)
                            loss, _ = output

                            valid_running_loss += loss.item()
                            curr_val_loss = loss.item()

                    # evaluation
                    average_train_loss = running_loss / eval_every
                    average_valid_loss = valid_running_loss / len(
                        self.valid_iter)
                    train_loss_list.append(average_train_loss)
                    valid_loss_list.append(average_valid_loss)
                    global_steps_list.append(global_step)

                    # resetting running values
                    curr_train_loss = loss.item()
                    running_loss = 0.0
                    valid_running_loss = 0.0
                    self.model.train()

                    # print progress
                    print(
                        "Epoch [{}/{}], Step [{}/{}], Train Loss: {:.4f}, Valid Loss: {:.4f}"
                        .format(epoch + 1, num_epochs, global_step,
                                num_epochs * len(self.train_iter),
                                average_train_loss, average_valid_loss))

                    # write to tensorboard logs
                    with self.writer.as_default():
                        tf.summary.scalar('train loss',
                                          curr_train_loss,
                                          step=global_step)
                    with self.writer.as_default():
                        tf.summary.scalar('validation loss',
                                          curr_val_loss,
                                          step=global_step)

                    # checkpoint
                    if best_valid_loss > average_valid_loss:
                        best_valid_loss = average_valid_loss
                        #print(self.OUTPUT_DIR / 'foo.pt')
                        #print(best_valid_loss)
                        self.save_checkpoint(self.OUTPUT_DIR / 'model.pt',
                                             best_valid_loss)
                        self.save_metrics(self.OUTPUT_DIR / 'metrics.pt',
                                          train_loss_list, valid_loss_list,
                                          global_steps_list)

        self.save_metrics(self.OUTPUT_DIR / 'metrics.pt', train_loss_list,
                          valid_loss_list, global_steps_list)
        print("Finished Training!")
Пример #22
0
    def train_fit(data_set):
        """
        function: 开启模型训练过程
        :return: Model, Log
        """
        summary_writer = summary.create_file_writer(log_dir)

        data_params = {
            'batch_size': BATCH_SIZE,
            'shuffle': SHUFFLE,
            'num_workers': NUM_WORKS
        }

        devices = 'cuda' if cuda.is_available() else 'cpu'
        device(devices)

        optimizer = Adam(lr=0.001, beta_1=0.9, beta_2=0.99)

        train_set = CustomDataset(data_set, token, TEXT_LEN)
        train_set_pt = DataLoader(train_set, **data_params)
        model = BertClass()
        ckpt = train.Checkpoint(transformer=model.trainable_variables,
                                optimizer=optimizer)

        ckpt_manager = train.CheckpointManager(ckpt,
                                               checkpoint_path,
                                               max_to_keep=MAX_TO_KEEP)

        def train_step(model_, id_, mk_, type_ids_, optimizer_, target_):
            with GradientTape() as tp:
                y_pred = model_(id_, mk_, type_ids_)
                loss_value = loss_fn(target=target_, output=y_pred)
                # y_pred = [round(y_p) for y_p in y_pred]
                acc = accuracy_fn(target_, y_pred)
            gradient = tp.gradient(loss_value, model.trainable_variables)

            optimizer_.apply_gradients(zip(gradient,
                                           model.trainable_variables))

            return loss_value, np.array(acc).mean(), y_pred

        for epoch in range(1, EPOCHS + 1):
            for _, batch_data in enumerate(train_set_pt):
                ids = convert_to_tensor(batch_data['ids'].detach().numpy())
                mask = convert_to_tensor(batch_data['mask'].detach().numpy())
                token_type_ids = convert_to_tensor(
                    batch_data['token_type_ids'].detach().numpy())
                targets = convert_to_tensor(
                    batch_data['targets'].detach().numpy())
                loss, accuracy, pred = train_step(model_=model,
                                                  id_=ids,
                                                  mk_=mask,
                                                  type_ids_=token_type_ids,
                                                  optimizer_=optimizer,
                                                  target_=targets)

                if _ % 20 == 0 and _ > 0:
                    # 将loss和accuracy写入日志文件
                    # 日志每训练十批数据保存一次日志文件
                    print("epoch: {}, fit step: {}, loss: {}, accuracy: {}".
                          format(epoch, _, loss, accuracy))
                    print("epoch is {}, predict: {}".format(epoch, pred))

            if epoch % 2 == 0:
                # 模型每训练两轮保存一次
                ckpt_manager.save(check_interval=True)

                with summary_writer.as_default():
                    summary.scalar(name="loss_value_step:{}".format(epoch),
                                   data=loss,
                                   step=epoch)

                with summary_writer.as_default():
                    summary.scalar(name='accuracy_value_step:{}'.format(epoch),
                                   data=accuracy,
                                   step=epoch)
Пример #23
0
from cpprb import ReplayBuffer, PrioritizedReplayBuffer

gamma = 0.99
batch_size = 1024

N_iteration = int(1e+5)
target_update_freq = 1000
eval_freq = 100

egreedy = 0.1

# Log
dir_name = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
logdir = os.path.join("logs", dir_name)
writer = create_file_writer(logdir + "/metrics")
writer.set_as_default()

# Env
env = gym.make('CartPole-v1')
eval_env = gym.make('CartPole-v1')

# For CartPole: input 4, output 2
model = Sequential([
    Dense(64, activation='relu', input_shape=(env.observation_space.shape)),
    Dense(64, activation='relu'),
    Dense(env.action_space.n)
])
target_model = clone_model(model)

# Loss Function
Пример #24
0
	def run(_run):
		# Load configs, if parameters are unspecified, fill in a default
		config = _run.config		

		run = config.get('fit_params') 
		model_params = config.get('model_params')   
		data_params = config.get('data_params')
		batch_size = data_params.get('batch_size')
		augmentations = data_params.get('augmentations')
		buffer_size = data_params.get('buffer_size') # the buffer sizes for shuffling
		use_sampling = data_params.get('use_sampling')
		class_target_prob = 1 / model_params.get('num_classes')
		print("[!] list of parameter configurations")
		pprint(config)
		
		
		# Load data and define generators ------------------------------------------
		print("[!] loading datasets \n")
		x_train,  x_val, x_test, probs = load_data()
		
		# get a rough estimate: there are 100 files per TFRecord
		# except for one TFRecord per item, so this estimate might not be 100% correct
		num_training = len(x_train) * 100
		
		# TF parsing functions
		print("[!] Creating dataset iterators \n")
		# Load the dataset iterators
		
		train_dataset = create_training_dataset(x_train, batch_size, buffer_size, augmentations,
										  use_sampling, probs, class_target_prob,
										  **model_params)
		
		val_dataset = validate(x_val, batch_size, **model_params)
		test_dataset = validate(x_test, batch_size, **model_params)		
		
		
		# we need the actual labels from the TFRecords, but they take INCREDIBLY long to parse
		# parse through them once, and create a csv file with a list of all the labels
		# note: the tf parsing requires that there is no randomness (shuffling) in the validation/test labels

		if not os.path.exists('../datasets/data/valid/val_labels.csv'):
			print(os.path.exists('../datasets/data/valid/val_labels.csv'))
			print("[!] creating validation label file in ../datasets/data/valid/val_labels.csv")
			create_label_csv(val_dataset,'../datasets/data/valid/val_labels.csv')
		else:
			print("[!] validation labels csv exist")
			
		if not os.path.exists('../datasets/data/test/test_labels.csv'):
			print("[!] creating test label file in ../datasets/data/test/test_labels.csv")
			create_label_csv(test_dataset,'../datasets/data/test/test_labels.csv')
		else:
			print("[!] test labels csv exist")

		# load the file with validation labels
		# getting labels from a TFRecords with lots of other data is horribly slow...
		print("[!] Loading validation labels for callbacks")
		val_labels = pd.read_csv('../datasets/data/valid/val_labels.csv')
		val_labels = np.squeeze(val_labels.to_numpy())
		
		# Model definitions --------------------------------------------------------
		print("[!] compiling model and adding callbacks \n")
		# function for building the model
		model_func = model_dict[run.get('model')]

		# invoke the user function
		model = model_func(**model_params)
		model.summary()
		# compile the model with catcrossentropy: one hot encoded labels!!
		model.compile(optimizer= tf.keras.optimizers.Adam(run.get('lr')),
						loss= 'categorical_crossentropy',
						metrics=['accuracy'])
		
		# Model callbacks ----------------------------------------------------------
		
		# ReduceLRonPlateau
		if run.get('reduce_lr_on_plateau'):
			reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, min_lr=10e-7, verbose=1)
		else:
			reduce_lr = Callback()

		# Model checkpoints
		now = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
		aug_string = 'aug' if augmentations==True else 'noaug'
		modelcheckpoint_name= lambda x: "checkpoints/model-{}-{}-{}-{}-{}.hdf5".format(run.get('model'), 
																					x, 
																					aug_string, 
																					'ch_' + str(len(model_params.get('channels'))), 
																					now)

		modelcheckpoint = ModelCheckpoint(modelcheckpoint_name('best_loss'), 
									monitor = 'val_loss', 
									verbose=1, 
									save_best_only=True, 
									save_weights_only=True)
		
		# Model early stopping
		earlystopping = EarlyStopping(monitor='val_loss', patience=10)


		# tensorboard and metric callbacks

		log_dir = "logs/fit/{}-{}-{}-{}".format(run.get('model'), aug_string, 'ch_' + str(len(model_params.get('channels'))), now)

		file_writer = tfsum.create_file_writer(log_dir)
		tensorboard_cb = tf.keras.callbacks.TensorBoard(log_dir=log_dir, 
														histogram_freq=1, 
														profile_batch=0)

		f1_metric = Metrics(val_dataset, 
				            val_labels, 
				            save_best=True, 
							save_name= modelcheckpoint_name('best_f1'), 
							writer=file_writer)
		
		# Model Training and evaluation --------------------------------------------
		print("[!] fitting model \n")
		
		model.fit(
			train_dataset.repeat(), 
			epochs=run.get('epochs'), 
			steps_per_epoch= int(num_training / batch_size),
			validation_data=val_dataset, 
			validation_steps = None,
			shuffle=True,
			verbose= 1,
			callbacks = [tensorboard_cb, f1_metric, LogMetrics(), modelcheckpoint, earlystopping, reduce_lr, MemoryCallback()]
		)

		print("[!] done running, terminating program")
		'''
Пример #25
0
 def __init__(self, log_dir):
     """Initialize summary writer."""
     self.writer = tf_summary.create_file_writer(log_dir)
Пример #26
0
        # update weights
        self.weights -= learning_rate * gradient / np.sqrt(self.grad_magnitude)

        return loss / self.batch_size


if __name__ == "__main__":

    time_string = datetime.now().strftime("%Y%m%d-%H%M%S")

    # base_dir = tempfile.TemporaryDirectory().name
    base_dir = "/Users/bert/Desktop"
    log_dir = '{}/logs/'.format(base_dir)
    print("Storing logs in {}".format(log_dir))
    writer = summary.create_file_writer(log_dir + time_string)

    step = 0
    start_time = time.time()

    # Use this next line to load bot weights from disk
    #engine_white = LearningEngine(None, None, sys.stderr)
    # Use this next line to re-initialize bot
    engine_white = LearningEngine(None,
                                  None,
                                  sys.stderr,
                                  weights=None,
                                  weight_file=None)

    board = chess.Board()
if USE_FLOW is None:
    prefix = "true"
elif USE_FLOW == True:
    prefix = "flow"
else:
    prefix = "mse"

if len(sys.argv) > 1:
    prefix = sys.argv[1] + prefix

prefix += "w" + str(WIDTH) + "h" + str(HEIGHT)

log_dir = "./tfboard/" + prefix + str(time.time())

tf_summary_writer = tf_summary.create_file_writer(log_dir)

train_samples = gen_dataset(NUM_SAMPLES,
                            WIDTH,
                            HEIGHT,
                            min_val=MIN_VAL,
                            max_val=MAX_VAL)
print("generated dataset!")

if NORMALIZE_FEATURES:
    trainX, trainY = get_training_features(train_samples, MIN_VAL, MAX_VAL,
                                           MAX_COST)
else:
    trainX, trainY = get_training_features(train_samples, None, None, None)

if USE_FLOW is None:
Пример #28
0
                                                           '').split(',')))
    for item in args.cnn_kernels
]

assert args.evaluate_dev_steps % args.print_loss_steps == 0

train_summary_writer, test_summary_writer = None, None
if args.use_tensorboard:
    import tensorflow as tf
    from tensorflow import summary

    # !rm -rf logs
    current_time = str(datetime.datetime.now().timestamp())
    train_log_dir = '../logs/tensorboard/train/' + current_time
    test_log_dir = '../logs/tensorboard/test/' + current_time
    train_summary_writer = summary.create_file_writer(train_log_dir)
    test_summary_writer = summary.create_file_writer(test_log_dir)

    print(f"log file: {current_time}")

torch.cuda.is_available()


def evaluate_on_dev(model, corpus_dev_reader, dictionary_word,
                    dictionary_char):
    loss_values = []
    batch_generator_dev = corpus_dev_reader.batchify(dictionary_word,
                                                     args.batch_size,
                                                     args.seq_len)

    with torch.no_grad():
        target = sample['target'].long().to(device)

        pred = model(input)
        pred_loss = criterion(pred, target)

        top3_val, top3_idx = torch.topk(pred, 3)

        num_correct = torch.sum(top3_idx == target.view(-1, 1))

    return pred_loss.item(), num_correct.item()


"""### Prepare the Tensorboard"""

train_log_dir = './runs/train'
train_summary_writer = summary.create_file_writer(train_log_dir)
val_log_dir = './runs/validate'
val_summary_writer = summary.create_file_writer(val_log_dir)

# Commented out IPython magic to ensure Python compatibility.
# %tensorboard --logdir runs

"""### Run Training"""

max_epoch = 200
save_stride = 10
tmp_path = './checkpoint.pth'
max_accu = -1
for epoch in tqdm(range(max_epoch)):
    ### Train Phase
Пример #30
0
testImage = list(glob(join(relativeData, 'Test', '*')))
shuffle(testImage)
testImage = array(testImage).reshape((len(testImage), 1))

testLabels, totalTime, times = predict(currentPath, testImage, bestModel,
                                       classNames)


def plotImages(figure):
    buf = BytesIO()
    plt.savefig(buf, format='png')
    plt.close(figure)
    buf.seek(0)
    return expand_dims(_image.decode_png(buf.getvalue(), channels=4), 0)


fileWriter = create_file_writer(logDir)
fileWriter.set_as_default()
with fileWriter.as_default():
    image('Test:\nTotal time: ' + str(totalTime) + '\nTime per image: ' +
          str(sum(times) / len(times)),
          plotImages(
              imageGrid(testImage,
                        testLabels,
                        classNames=classNames,
                        perRow=4,
                        imageDimensions=(32, 32),
                        nImages=16)),
          step=0)