Пример #1
0
def train_multidec(args):
    print("Training multidec")
    device = torch.device(args.gpu)
    print("Loading dataset...")
    full_dataset = load_multi_csv_data(args, CONFIG)
    print("Loading dataset completed")
    # full_loader = DataLoader(full_dataset, batch_size=args.batch_size, shuffle=False)

    image_encoder = MDEC_encoder(input_dim=args.input_dim, z_dim=args.latent_dim, n_clusters=args.n_clusters,
                                 encodeLayer=[500, 500, 2000], activation="relu", dropout=0)
    image_encoder.load_model(os.path.join(CONFIG.CHECKPOINT_PATH, "image_sdae_" + str(args.latent_dim)) + ".pt")
    text_encoder = MDEC_encoder(input_dim=args.input_dim, z_dim=args.latent_dim, n_clusters=args.n_clusters,
                                encodeLayer=[500, 500, 2000], activation="relu", dropout=0)
    text_encoder.load_model(os.path.join(CONFIG.CHECKPOINT_PATH, "text_sdae_" + str(args.latent_dim)) + ".pt")
    mdec = MultiDEC(device=device, image_encoder=image_encoder, text_encoder=text_encoder, n_clusters=args.n_clusters)
    exp = Experiment("MDEC " + str(args.latent_dim) + '_' + str(args.n_clusters), capture_io=True)
    print(mdec)

    for arg, value in vars(args).items():
        exp.param(arg, value)
    try:
        mdec.fit(full_dataset, lr=args.lr, batch_size=args.batch_size, num_epochs=args.epochs,
                 save_path=CONFIG.CHECKPOINT_PATH)
        print("Finish!!!")

    finally:
        exp.end()
Пример #2
0
def pretrain_ddec(args):
    print("Pretraining...")

    print("Loading dataset...")
    with open(os.path.join(args.text_embedding_dir, 'word_embedding.p'), "rb") as f:
        embedding_model = cPickle.load(f)
    with open(os.path.join(args.text_embedding_dir, 'word_idx.json'), "r", encoding='utf-8') as f:
        word_idx = json.load(f)
    train_dataset, test_dataset = load_pretrain_data(args.image_dir, word_idx[1], args, CONFIG)
    print("Loading dataset completed")

    dualnet = DualNet(pretrained_embedding=embedding_model, text_features=args.text_features, z_dim=args.z_dim, n_classes=args.n_classes)
    if args.resume:
        print("loading model...")
        dualnet.load_model("/4TBSSD/CHECKPOINT/pretrain_" + str(args.z_dim) + "_0.pt")
    exp = Experiment("Dualnet_pretrain_" + str(args.z_dim), capture_io=True)
    print(dualnet)

    for arg, value in vars(args).items():
        exp.param(arg, value)
    try:
        dualnet.fit(train_dataset,  test_dataset, args=args,
                 save_path="/4TBSSD/CHECKPOINT/pretrain_" + str(args.z_dim) + "_0.pt")
        print("Finish!!!")

    finally:
        exp.end()
Пример #3
0
    def objective(self, params):
        """
        objective function to optimize

        :param params: hyperparamters for optimizer
        :return: maximum validation accuracy
        :rtype: float
        """
        # get instances
        dataset = Datasets.get(self.dataset_name)
        model = Models.get(self.model_name, dataset=dataset)
        optimizer = Optimizers.get(self.optimizer_name, params=params)

        # configure hyperdash experiment
        hd_exp = HyperdashExperiment(
            f'{self.dataset_name}',
            api_key_getter=lambda: self.config['hyperdash']['api_key'])
        hd_exp.param('dataset_name', self.dataset_name)
        hd_exp.param('model_name', self.model_name)
        hd_exp.param('optimizer_name', self.optimizer_name)

        for k, v in params.items():
            hd_exp.param(k, v)

        # set callbacks
        callbacks = [
            Hyperdash(['accuracy', 'loss', 'val_accuracy', 'val_loss'],
                      hd_exp),
            EarlyStopping('val_accuracy',
                          patience=10,
                          min_delta=0.01,
                          verbose=1),
            TerminateOnNaN()
        ]

        # get data
        (x_train, y_train), *_ = dataset.get_batch()

        # start learning
        model.compile(loss=self.loss,
                      optimizer=optimizer,
                      metrics=['accuracy'])
        history = model.fit(x_train,
                            y_train,
                            batch_size=self.batch_size,
                            epochs=self.epochs,
                            callbacks=callbacks,
                            validation_split=0.2,
                            verbose=2)

        # stop hyperdash experiment
        hd_exp.end()

        # return maximum validation accuracy
        val_accuracy = np.array(history.history['val_accuracy'])
        return max(val_accuracy) * (-1)
Пример #4
0
def train_reconstruction_all(args):
    device = torch.device(args.gpu)

    df_input_data = pd.read_csv(os.path.join(
        CONFIG.CSV_PATH, args.prefix + "_" + args.target_csv),
                                index_col=0,
                                encoding='utf-8-sig')
    exp = Experiment(args.target_modal + " SDAE " + str(args.latent_dim),
                     capture_io=True)
    try:
        for arg, value in vars(args).items():
            exp.param(arg, value)
        print("Loading dataset...")

        train_dataset, val_dataset = load_autoencoder_data(
            df_input_data, CONFIG)
        print("Loading dataset completed")
        train_loader, val_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=args.shuffle), \
                                   DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False)

        sdae = StackedDAE(input_dim=args.input_dim,
                          z_dim=args.latent_dim,
                          binary=False,
                          encodeLayer=[500, 500, 2000],
                          decodeLayer=[2000, 500, 500],
                          activation="relu",
                          dropout=args.dropout,
                          device=device)
        if args.resume:
            print("resume from checkpoint")
            sdae.load_model(
                os.path.join(
                    CONFIG.CHECKPOINT_PATH, args.prefix + "_" +
                    args.target_modal + "_" + args.target_dataset + "_sdae_" +
                    str(args.latent_dim) + "_all.pt"))
        else:
            sdae.pretrain(train_loader,
                          val_loader,
                          lr=args.lr,
                          batch_size=args.batch_size,
                          num_epochs=args.pretrain_epochs,
                          corrupt=0.2,
                          loss_type="mse")
        sdae.fit(train_loader,
                 val_loader,
                 lr=args.lr,
                 num_epochs=args.epochs,
                 corrupt=0.2,
                 loss_type="mse",
                 save_path=os.path.join(
                     CONFIG.CHECKPOINT_PATH, args.prefix + "_" +
                     args.target_modal + "_" + args.target_dataset + "_sdae_" +
                     str(args.latent_dim) + "_all.pt"))
    finally:
        exp.end()
def train_bayes(params):
    """
    Wrapper around train function to serve as objective function for Gaussian
    optimization in scikit-optimize routine gp_minimize.

    Arguments:
    ----------
        params: list, shape=[nb_layers + 2,]
        List of search space dimensions. Entries have to be tuples 
        (lower_bound, upper_bound) for Reals or Integers.

    Returns:
    --------
        tbd

    """
    # Create Hyperdash hd_experiment
    hd_exp = Experiment(project_name)

    # Translate params into format understood by train function
    # n_layer = 4
    # layer_sizes = hd_exp.param('layer_sizes', (2**np.array(params[:n_layer])).tolist())
    # learning_rate = hd_exp.param('learning rate', 10**params[n_layer])
    # mini_batch_size = hd_exp.param('mini batch size', int(2**params[n_layer + 1]))
    # pkeep = hd_exp.param('dropout prob', 1)
    # hyper_params = [layer_sizes, learning_rate, mini_batch_size, pkeep]
    # hyper_param_str = make_hyper_param_str(hyper_params)

    layer_sizes = [4096] * 4
    learning_rate = hd_exp.param('learning rate', 10**params[0])
    mini_batch_size = hd_exp.param('mini batch size', int(2**params[1]))
    pkeep = hd_exp.param('dropout prob', 1)
    hyper_params = [layer_sizes, learning_rate, mini_batch_size, pkeep]
    hyper_param_str = make_hyper_param_str(hyper_params)

    # Call train function
    tic = time.time()
    logger.info('Start training for ' + hyper_param_str)
    log_df, best_error = train(train_tuple, validation_tuple, hyper_params,
                               nb_epochs, random_seed, hd_exp, project_dir)
    elapsed_time = time.time() - tic
    logger.info('Finished training in {} s.'.format(elapsed_time))

    # Writing Pandas log file to csv file on disk.
    logger.info('Writing pandas DF log to disk.')
    log_df.to_csv(project_dir + '/' + hyper_param_str + '/data_df.csv')

    # Finish Hyperdash Experiment
    hd_exp.end()

    return best_error
class HyperdashCallback(Callback):
    exp = None
    last = 1

    def on_train_begin(self, logs=None):
        self.exp = Experiment("Deep Weather")

    def on_train_end(self, logs=None):
        self.exp.end()

    def on_epoch_end(self, epoch, logs=None):
        if 'loss' in logs:
            self.exp.metric("progress", min(0.1, self.last - logs["loss"]))
            self.last = logs["loss"]
            self.exp.metric("loss", min(0.5, logs["loss"]))
            self.exp.metric("val_loss", min(0.5, logs["val_loss"]))
Пример #7
0
    def test_experiment_keras_callback(self):
        with patch("sys.stdout", new=StringIO()) as faked_out:
            exp = Experiment("MNIST")
            keras_cb = exp.callbacks.keras
            keras_cb.on_epoch_end(0, {"val_acc": 1, "val_loss": 2})
            # Sleep 1 second due to client sampling
            time.sleep(1)
            keras_cb.on_epoch_end(1, {"val_acc": 3, "val_loss": 4})
            exp.end()

        # Test metrics match what is expected
        metrics_messages = []
        for msg in server_sdk_messages:
            payload = msg["payload"]
            if "name" in payload:
                metrics_messages.append(payload)
        expect_metrics = [
            {
                "is_internal": False,
                "name": "val_acc",
                "value": 1
            },
            {
                "is_internal": False,
                "name": "val_loss",
                "value": 2
            },
            {
                "is_internal": False,
                "name": "val_acc",
                "value": 3
            },
            {
                "is_internal": False,
                "name": "val_loss",
                "value": 4
            },
        ]
        assert len(expect_metrics) == len(metrics_messages)
        for i, message in enumerate(metrics_messages):
            assert message["is_internal"] == expect_metrics[i]["is_internal"]
            assert message["name"] == expect_metrics[i]["name"]
            assert message["value"] == expect_metrics[i]["value"]

        captured_out = faked_out.getvalue()
        assert "error" not in captured_out
Пример #8
0
class BaseTrainer(_BaseTrainer):
    """ Base trainer to make pytorch training be easier.

    Args:
        data-augmentation (bool): Crop randomly and add random noise for data augmentation.
        epoch (int): Number of epochs to train.
        opt (str): Optimization method.
        gpu (bool): Use GPU.
        seed (str): Random seed to train.
        train (str): Path to training image-pose list file.
        val (str): Path to validation image-pose list file.
        batchsize (int): Learning minibatch size.
        out (str): Output directory.
        resume (str): Initialize the trainer from given file.
            The file name is 'epoch-{epoch number}.iter'.
        resume_model (str): Load model definition file to use for resuming training
            (it\'s necessary when you resume a training).
            The file name is 'epoch-{epoch number}.model'.
        resume_opt (str): Load optimization states from this file
            (it\'s necessary when you resume a training).
            The file name is 'epoch-{epoch number}.state'.
    """

    def __init__(self, **kwargs):
        self.data_augmentation = kwargs['data_augmentation']
        self.epoch = kwargs['epoch']
        self.gpu = (kwargs['gpu'] >= 0)
        self.opt = kwargs['opt']
        self.seed = kwargs['seed']
        self.train = kwargs['train']
        self.val = kwargs['val']
        self.batchsize = kwargs['batchsize']
        self.out = kwargs['out']
        self.resume = kwargs['resume']
        self.resume_model = kwargs['resume_model']
        self.resume_opt = kwargs['resume_opt']
        self.hyperdash = kwargs['hyperdash']
        if self.hyperdash:
            self.experiment = Experiment(self.hyperdash)
            for key, val in kwargs.items():
                self.experiment.param(key, val)
        # validate arguments.
        self._validate_arguments()
        self.lowest_loss = 0
        self.device = torch.device('cuda' if kwargs['gpu'] >= 0 else 'cpu')
        #self.experiment.log_multiple_params(kwargs)
        self.dataloader = torch.utils.data.DataLoader

    def _validate_arguments(self):
        if self.seed is not None and self.data_augmentation:
            raise NotSupportedError('It is not supported to fix random seed for data augmentation.')
        if self.gpu and not torch.cuda.is_available():
            raise GPUNotFoundError('GPU is not found.')
        #for path in (self.train, self.val):
        #    if not os.path.isfile(path):
        #        raise FileNotFoundError('{0} is not found.'.format(path))
        if self.opt not in ('MomentumSGD', 'Adam'):
            raise UnknownOptimizationMethodError(
                '{0} is unknown optimization method.'.format(self.opt))
        if self.resume is not None:
            for path in (self.resume, self.resume_model, self.resume_opt):
                if not os.path.isfile(path):
                    raise FileNotFoundError('{0} is not found.'.format(path))

    # TODO: make it acceptable multiple optimizer, or define out of this trainer.
    def _get_optimizer(self, model, **kwargs):
        if self.opt == 'MomentumSGD':
            optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
        elif self.opt == "Adam":
            optimizer = optim.Adam(model.parameters())
        else:
            try:
                optimizer = getattr(optim, self.opt)(**kwargs)
            except OptimNotSupportedError:
                print("This optim is not available. See https://pytorch.org/docs/stable/optim.html")
        return optimizer

    def forward(self, batch, model, criterion):
        data, target = map(lambda d: d.to(self.device), batch)
        output = model(data)
        loss = criterion(output, target)
        return loss

    def _train(self, model, optimizer, criterion, train_iter, logger, start_time, log_interval=10):
        model.train()
        loss_sum = 0.0
        for iteration, batch in enumerate(tqdm(train_iter, desc='this epoch'), 1):
            optimizer.zero_grad()
            loss = self.forward(batch, model, criterion, isTest=False)
            loss_sum += loss
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 500)
            optimizer.step()
            if self.hyperdash:
                self.experiment.metric("loss", int(loss.cpu().data.numpy()), log=False)
            if iteration % log_interval == 0:
                log = 'elapsed_time: {0}, loss: {1}'.format(time.time() - start_time, loss.data[0])
                logger.write(log)
        return loss_sum / len(train_iter)

    def _test(self, model, test_iter, criterion, logger, start_time):
        model.eval()
        test_loss = 0
        for batch in test_iter:
            loss = self.forward(batch, model, criterion, isTest=True)
            print('Test loss: {}'.format(loss.data))
            test_loss += loss.item()
        test_loss /= len(test_iter)
        log = 'elapsed_time: {0}, validation/loss: {1}'.format(time.time() - start_time, test_loss)
        if self.hyperdash:
            self.experiment.metric('test_loss', int(test_loss.cpu().data.numpy()))
        logger.write(log)
        return test_loss

    def _checkpoint(self, epoch, model, optimizer, logger):
        filename = os.path.join(self.out, 'epoch-{0}'.format(epoch + 1))
        torch.save({'epoch': epoch + 1, 'logger': logger.state_dict()}, filename + '.iter')
        torch.save(model.state_dict(), filename + '.model')
        torch.save(optimizer.state_dict(), filename + '.state')

    def _best_checkpoint(self, epoch, model, optimizer, logger):
        filename = os.path.join(self.out, 'best_model')
        torch.save({'epoch': epoch + 1, 'logger': logger.state_dict()}, filename + '.iter')
        torch.save(model.state_dict(), filename + '.model')
        torch.save(optimizer.state_dict(), filename + '.state')

    def fit(self, model, train_data, val_data, criterion):
        """ Execute training """
        # set random seed.
        if self.seed is not None:
            random.seed(self.seed)
            torch.manual_seed(self.seed)
            if self.gpu:
                torch.cuda.manual_seed(self.seed)
        # initialize model to train.
        if self.resume_model:
            model.load_state_dict(torch.load(self.resume_model))
        # prepare gpu.
        if self.gpu:
            model.cuda()
        # load the datasets.
        train_iter = self.dataloader(train_data, batch_size=self.batchsize, shuffle=True)
        val_iter = self.dataloader(val_data, batch_size=3, shuffle=False)
        # set up an optimizer.
        optimizer = self._get_optimizer(model)
        if self.resume_opt:
            optimizer.load_state_dict(torch.load(self.resume_opt))
        # set intervals.
        val_interval = 3
        resume_interval = self.epoch / 10
        log_interval = 10
        # set logger and start epoch.
        logger = TrainLogger(self.out)
        start_epoch = 0
        if self.resume:
            resume = torch.load(self.resume)
            start_epoch = resume['epoch']
            logger.load_state_dict(resume['logger'])
        # start training.
        start_time = time.time()
        loss = 0
        for epoch in trange(start_epoch, self.epoch, initial=start_epoch, total=self.epoch, desc='     total'):
            self._train(model, optimizer, criterion, train_iter, log_interval, logger, start_time)
            if (epoch) % val_interval == 0:
                loss = self._test(model, val_iter, criterion, logger, start_time)
                if self.lowest_loss == 0 or self.lowest_loss > loss:
                    logger.write('Best model updated. loss: {} => {}'.format(self.lowest_loss, loss))
                    self._best_checkpoint(epoch, model, optimizer, logger)
                    self.lowest_loss = loss
            if (epoch + 1) % resume_interval == 0:
                self._checkpoint(epoch, model, optimizer, logger)

        if self.hyperdash:
            self.experiment.end()

    @staticmethod
    def get_args():
        # arg definition
        parser = argparse.ArgumentParser(
            description='Training pose net for comparison \
            between chainer and pytorch about implementing DeepPose.')
        parser.add_argument(
            '--data-augmentation', '-a', action='store_true', help='Crop randomly and add random noise for data augmentation.')
        parser.add_argument(
            '--epoch', '-e', type=int, default=100, help='Number of epochs to train.')
        parser.add_argument(
            '--opt', '-o', type=str, default='Adam',
            choices=['MomentumSGD', 'Adam'], help='Optimization method.')
        parser.add_argument(
            '--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU).')
        parser.add_argument(
            '--seed', '-s', type=int, help='Random seed to train.')
        parser.add_argument(
            '--train', type=str, default='data/train', help='Path to training image-pose list file.')
        parser.add_argument(
            '--val', type=str, default='data/test', help='Path to validation image-pose list file.')
        parser.add_argument(
            '--batchsize', type=int, default=32, help='Learning minibatch size.')
        parser.add_argument(
            '--out', default='result', help='Output directory')
        parser.add_argument(
            '--resume', default=None,
            help='Initialize the trainer from given file. \
            The file name is "epoch-{epoch number}.iter".')
        parser.add_argument(
            '--resume-model', type=str, default=None,
            help='Load model definition file to use for resuming training \
            (it\'s necessary when you resume a training). \
            The file name is "epoch-{epoch number}.mode"')
        parser.add_argument(
            '--resume-opt', type=str, default=None,
            help='Load optimization states from this file \
            (it\'s necessary when you resume a training). \
            The file name is "epoch-{epoch number}.state"')
        parser.add_argument(
            '--hyperdash', type=str, default=None,
            help='If you use hyperdash logging, enter here the name of experiment. Before using, you have to login to hyperdash with "hyperdash login --github". The default is None that means no logging with hyperdash')
        args = parser.parse_args()
        return args
def train_multidec(args):
    print("Training started")
    device = torch.device(args.gpu)
    df_image_data = pd.read_csv(os.path.join(CONFIG.CSV_PATH, args.image_csv),
                                index_col=0,
                                encoding='utf-8-sig')
    df_text_data = pd.read_csv(os.path.join(CONFIG.CSV_PATH, args.text_csv),
                               index_col=0,
                               encoding='utf-8-sig')

    df_label = pd.read_csv(os.path.join(CONFIG.CSV_PATH, args.label_csv),
                           index_col=0,
                           encoding='utf-8-sig')
    df_weight = pd.read_csv(os.path.join(CONFIG.CSV_PATH, args.weight_csv),
                            index_col=0,
                            encoding='utf-8-sig')
    short_code_array = np.array(df_label.index)
    label_array = np.array(df_label['category'])
    n_classes = np.max(label_array) + 1

    exp = Experiment("multi_classifier", capture_io=True)
    for arg, value in vars(args).items():
        exp.param(arg, value)
    try:
        kf = KFold(n_splits=5, random_state=42)
        image_score_list = []
        text_score_list = []
        multi_score_list = []
        kf_count = 0
        for train_index, val_index in kf.split(short_code_array):
            print("Current fold: ", kf_count)
            short_code_train = short_code_array[train_index]
            short_code_val = short_code_array[val_index]
            label_train = label_array[train_index]
            label_val = label_array[val_index]
            df_train = pd.DataFrame(data=label_train,
                                    index=short_code_train,
                                    columns=df_label.columns)
            df_val = pd.DataFrame(data=label_val,
                                  index=short_code_val,
                                  columns=df_label.columns)
            print("Loading dataset...")
            train_dataset, val_dataset = load_multi_csv_data(
                df_image_data, df_text_data, df_weight, df_train, df_val,
                CONFIG)
            print("\nLoading dataset completed")

            if args.fixed_weight is None:
                image_classifier = SingleClassifier(device=device,
                                                    input_dim=args.input_dim,
                                                    filter_num=64,
                                                    n_classes=n_classes)
                text_classifier = SingleClassifier(device=device,
                                                   input_dim=args.input_dim,
                                                   filter_num=64,
                                                   n_classes=n_classes)
                print("pretraining image classifier...")
                image_classifier.fit(
                    train_dataset,
                    val_dataset,
                    input_modal=1,
                    lr=args.lr,
                    num_epochs=args.pretrain_epochs,
                    save_path=os.path.join(CONFIG.CHECKPOINT_PATH,
                                           "image_classifier") + ".pt")
                image_classifier.load_model(
                    os.path.join(CONFIG.CHECKPOINT_PATH, "image_classifier") +
                    ".pt")
                print("pretraining text classifier...")
                text_classifier.fit(
                    train_dataset,
                    val_dataset,
                    input_modal=2,
                    lr=args.lr,
                    num_epochs=args.pretrain_epochs,
                    save_path=os.path.join(CONFIG.CHECKPOINT_PATH,
                                           "text_classifier") + ".pt")
                text_classifier.load_model(
                    os.path.join(CONFIG.CHECKPOINT_PATH, "text_classifier") +
                    ".pt")
                print("pretraining weight classifier...")
                weight_calculator = WeightCalculator(device=device,
                                                     input_dim=args.input_dim *
                                                     2,
                                                     n_classes=n_classes)
                weight_calculator.fit(
                    train_dataset,
                    val_dataset,
                    lr=args.lr,
                    num_epochs=args.pretrain_epochs,
                    save_path=os.path.join(CONFIG.CHECKPOINT_PATH,
                                           "weight_calculator") + ".pt")
                weight_calculator.load_model(
                    os.path.join(CONFIG.CHECKPOINT_PATH, "weight_calculator") +
                    ".pt")
                multi_classifier = MultiClassifier(
                    device=device,
                    image_classifier=image_classifier,
                    text_classifier=text_classifier,
                    weight_calculator=weight_calculator)
                print(multi_classifier)
                print("training multi classifier...")
                multi_classifier.fit(
                    train_dataset,
                    val_dataset,
                    lr=args.lr,
                    batch_size=args.batch_size,
                    num_epochs=args.epochs,
                    save_path=os.path.join(CONFIG.CHECKPOINT_PATH,
                                           "multi_classifier") + ".pt")
            else:
                image_classifier = SingleClassifier(device=device,
                                                    input_dim=args.input_dim,
                                                    filter_num=64,
                                                    n_classes=n_classes)
                text_classifier = SingleClassifier(device=device,
                                                   input_dim=args.input_dim,
                                                   filter_num=64,
                                                   n_classes=n_classes)
                print("pretraining image classifier...")
                image_classifier.fit(
                    train_dataset,
                    val_dataset,
                    input_modal=1,
                    lr=args.lr,
                    num_epochs=args.pretrain_epochs,
                    save_path=os.path.join(
                        CONFIG.CHECKPOINT_PATH, "image_classifier_fw_" +
                        str(args.fixed_weight)) + ".pt")
                image_classifier.load_model(
                    os.path.join(
                        CONFIG.CHECKPOINT_PATH, "image_classifier_fw_" +
                        str(args.fixed_weight)) + ".pt")
                print("pretraining text classifier...")
                text_classifier.fit(
                    train_dataset,
                    val_dataset,
                    input_modal=2,
                    lr=args.lr,
                    num_epochs=args.pretrain_epochs,
                    save_path=os.path.join(
                        CONFIG.CHECKPOINT_PATH, "text_classifier_fw_" +
                        str(args.fixed_weight)) + ".pt")
                text_classifier.load_model(
                    os.path.join(
                        CONFIG.CHECKPOINT_PATH, "text_classifier_fw_" +
                        str(args.fixed_weight)) + ".pt")
                multi_classifier = MultiClassifier(
                    device=device,
                    image_classifier=image_classifier,
                    text_classifier=text_classifier,
                    fixed_weight=args.fixed_weight)
                print(multi_classifier)
                print("training multi classifier with fixed weight...")
                multi_classifier.fit(
                    train_dataset,
                    val_dataset,
                    lr=args.lr,
                    batch_size=args.batch_size,
                    num_epochs=args.epochs,
                    save_path=os.path.join(
                        CONFIG.CHECKPOINT_PATH, "multi_classifier_fw_" +
                        str(args.fixed_weight)) + ".pt")

            print("Finish!!!")
            print(
                "#current fold best image score: %.6f, text score: %.6f multi score: %.6f"
                % (image_classifier.score, text_classifier.score,
                   multi_classifier.score))
            image_score_list.append(image_classifier.score)
            text_score_list.append(text_classifier.score)
            multi_score_list.append(multi_classifier.score)
            kf_count = kf_count + 1

        print(
            "#average image score: %.6f, text score: %.6f multi score: %.6f" %
            (np.mean(image_score_list), np.mean(text_score_list),
             np.mean(multi_score_list)))

    finally:
        exp.end()
Пример #10
0
def train_multidec(args):
    print("Training test lstm")
    device = torch.device(args.gpu)

    with open(
            os.path.join(CONFIG.DATASET_PATH, args.target_dataset,
                         'word_embedding.p'), "rb") as f:
        embedding_model = cPickle.load(f)
    with open(
            os.path.join(CONFIG.DATASET_PATH, args.target_dataset,
                         'dictionary_list.p'), 'rb') as f:
        dictionary_list = cPickle.load(f)
    with open(os.path.join(CONFIG.DATASET_PATH, args.target_dataset,
                           'word_idx.json'),
              "r",
              encoding='utf-8') as f:
        word_idx = json.load(f)
    df_text_data = pd.read_csv(os.path.join(CONFIG.DATASET_PATH,
                                            args.target_dataset, 'posts.csv'),
                               index_col=0,
                               header=None,
                               encoding='utf-8-sig')
    print(df_text_data[:5])
    df_label = pd.read_csv(os.path.join(CONFIG.CSV_PATH, args.label_csv),
                           index_col=0,
                           encoding='utf-8-sig')
    df_text_data = df_text_data.loc[df_label.index]
    label_array = np.array(df_label['category'])
    n_clusters = np.max(label_array) + 1

    exp = Experiment("Text lstm", capture_io=True)

    for arg, value in vars(args).items():
        exp.param(arg, value)
    try:
        acc_list = []
        nmi_list = []
        f_1_list = []
        kf_count = 0
        for fold_idx in range(args.start_fold, args.fold):
            print("Current fold: ", kf_count)
            df_train = pd.read_csv(os.path.join(
                CONFIG.CSV_PATH,
                "train_" + str(fold_idx) + "_" + args.label_csv),
                                   index_col=0,
                                   encoding='utf-8-sig')
            if args.sampled_n is not None:
                df_train = df_train.sample(n=args.sampled_n, random_state=42)
            df_test = pd.read_csv(os.path.join(
                CONFIG.CSV_PATH,
                "test_" + str(fold_idx) + "_" + args.label_csv),
                                  index_col=0,
                                  encoding='utf-8-sig')
            embedding = nn.Embedding.from_pretrained(
                torch.FloatTensor(embedding_model))
            if args.use_de:
                print("make dictionary embedding")
                dictionary_embedding = make_de(df_text_data, df_train,
                                               dictionary_list, n_clusters)
                input_size = int(embedding.embedding_dim + n_clusters)
                print("Loading dataset...")
                train_dataset, test_dataset = load_text_data(
                    df_text_data,
                    df_train,
                    df_test,
                    CONFIG,
                    word2idx=word_idx[1],
                    n_clusters=n_clusters,
                    de=dictionary_embedding)
                print("\nLoading dataset completed")
            else:
                input_size = int(embedding.embedding_dim)
                print("Loading dataset...")
                train_dataset, test_dataset = load_text_data(
                    df_text_data,
                    df_train,
                    df_test,
                    CONFIG,
                    word2idx=word_idx[1],
                    n_clusters=n_clusters)
                print("\nLoading dataset completed")
            text_encoder = LSTMClassifier(device=device,
                                          batch_size=args.batch_size,
                                          input_size=input_size,
                                          output_size=n_clusters,
                                          hidden_size=[128, 256, 512],
                                          embedding=embedding,
                                          dropout=args.dropout)
            text_model = TextModel(device=device, text_encoder=text_encoder)
            text_model.fit(train_dataset,
                           lr=args.lr,
                           batch_size=args.batch_size,
                           num_epochs=args.epochs,
                           save_path=None,
                           use_de=args.use_de)
            text_model.predict(test_dataset,
                               batch_size=args.batch_size,
                               use_de=args.use_de)
            acc_list.append(text_model.acc)
            nmi_list.append(text_model.nmi)
            f_1_list.append(text_model.f_1)
            kf_count = kf_count + 1
        print("#Average acc: %.4f, Average nmi: %.4f, Average f_1: %.4f" %
              (np.mean(acc_list), np.mean(nmi_list), np.mean(f_1_list)))

    finally:
        exp.end()
Пример #11
0
def main():
    start_time = time()
    args = get_args()
    if args.checkpoint_dir_name:
        dir_name = args.checkpoint_dir_name
    else:
        dir_name = datetime.datetime.now().strftime('%y%m%d%H%M%S')
    path_to_dir = Path(__file__).resolve().parents[1]
    path_to_dir = os.path.join(path_to_dir, *['log', dir_name])
    os.makedirs(path_to_dir, exist_ok=True)
    # tensorboard
    path_to_tensorboard = os.path.join(path_to_dir, 'tensorboard')
    os.makedirs(path_to_tensorboard, exist_ok=True)
    writer = SummaryWriter(path_to_tensorboard)
    # model saving
    os.makedirs(os.path.join(path_to_dir, 'model'), exist_ok=True)
    path_to_model = os.path.join(path_to_dir, *['model', 'model.tar'])
    # csv
    os.makedirs(os.path.join(path_to_dir, 'csv'), exist_ok=True)
    path_to_results_csv = os.path.join(path_to_dir, *['csv', 'results.csv'])
    path_to_args_csv = os.path.join(path_to_dir, *['csv', 'args.csv'])
    if not args.checkpoint_dir_name:
        with open(path_to_args_csv, 'a') as f:
            args_dict = vars(args)
            param_writer = csv.DictWriter(f, list(args_dict.keys()))
            param_writer.writeheader()
            param_writer.writerow(args_dict)

    # logging using hyperdash
    if not args.no_hyperdash:
        from hyperdash import Experiment
        exp = Experiment('Classification task on CIFAR10 dataset with CNN')
        for key in vars(args).keys():
            exec("args.%s = exp.param('%s', args.%s)" % (key, key, key))
    else:
        exp = None

    path_to_dataset = os.path.join(
        Path(__file__).resolve().parents[2], 'datasets')
    os.makedirs(path_to_dataset, exist_ok=True)
    train_loader, eval_loader, classes = get_loader(
        batch_size=args.batch_size,
        num_workers=args.num_workers,
        path_to_dataset=path_to_dataset)

    # show some of the training images, for fun.
    dataiter = iter(train_loader)
    images, labels = dataiter.next()
    img_grid = torchvision.utils.make_grid(images)
    matplotlib_imshow(img_grid)
    writer.add_image('four_CIFAR10_images', img_grid)

    # define a network, loss function and optimizer
    model = CNN()
    writer.add_graph(model, images)
    model = torch.nn.DataParallel(model)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(),
                          lr=args.lr,
                          momentum=args.momentum)
    start_epoch = 0
    # resume training
    if args.checkpoint_dir_name:
        print('\nLoading the model...')
        checkpoint = torch.load(path_to_model)
        model.state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        start_epoch = checkpoint['epoch'] + 1
    summary(model, input_size=(3, 32, 32))
    model.to(args.device)

    # train the network
    print('\n--------------------')
    print('Start training and evaluating the CNN')
    for epoch in range(start_epoch, args.n_epoch):
        start_time_per_epoch = time()
        train_loss, train_acc = train(train_loader, model, criterion,
                                      optimizer, args.device, writer, epoch,
                                      classes)
        eval_loss, eval_acc = eval(eval_loader, model, criterion, args.device)
        elapsed_time_per_epoch = time() - start_time_per_epoch
        result_dict = {
            'epoch': epoch,
            'train_loss': train_loss,
            'eval_loss': eval_loss,
            'train_acc': train_acc,
            'eval_acc': eval_acc,
            'elapsed time': elapsed_time_per_epoch
        }
        with open(path_to_results_csv, 'a') as f:
            result_writer = csv.DictWriter(f, list(result_dict.keys()))
            if epoch == 0: result_writer.writeheader()
            result_writer.writerow(result_dict)
        # checkpoint
        torch.save(
            {
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict()
            }, path_to_model)
        if exp:
            exp.metric('train loss', train_loss)
            exp.metric('eval loss', eval_loss)
            exp.metric('train acc', train_acc)
            exp.metric('eval acc', eval_acc)
        else:
            print(result_dict)

        writer.add_scalar('loss/train_loss', train_loss,
                          epoch * len(train_loader))
        writer.add_scalar('loss/eval_loss', eval_loss,
                          epoch * len(eval_loader))
        writer.add_scalar('acc/train_acc', train_acc,
                          epoch * len(train_loader))
        writer.add_scalar('acc/eval_acc', eval_acc, epoch * len(eval_loader))

    elapsed_time = time() - start_time
    print('\nFinished Training, elapsed time ===> %f' % elapsed_time)
    if exp:
        exp.end()
    writer.close()
Пример #12
0
def main():

    args = parse_args()

    # set random seed
    #logger.info('> set random seed {}'.format(args.seed))
    random.seed(args.seed)
    np.random.seed(args.seed)

    # Set up Devices
    #logger.info('> set gpu device {}'.format(args.gpus))
    num_cuda_devices = utils.set_devices(args.gpus)

    # Load model
    #logger.info('> load model {}'.format(args.model_name))
    ext = os.path.splitext(args.model_file)[1]
    model_path = '.'.join(os.path.split(args.model_file)).replace(ext, '')
    model = import_module(model_path)
    model = getattr(model, args.model_name)(args.output_class)
    if num_cuda_devices > 0:
        model = torch.nn.DataParallel(model)
        model.cuda()

    logger.info('> set optimizer')
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(),
                          lr=args.initial_lr,
                          momentum=args.lr_momentum)

    # Create result dir
    result_dir = create_result_dir(args.model_name)

    fh_handler = logging.FileHandler(os.path.join(result_dir, "log"))
    fh_handler.setFormatter(
        logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
    logger.addHandler(fh_handler)

    shutil.copy(args.model_file,
                os.path.join(result_dir, os.path.basename(args.model_file)))
    script_file_list = glob.glob('./*.py') + glob.glob('./*.sh')
    for file_name in script_file_list:
        shutil.copy(file_name,
                    os.path.join(result_dir, os.path.basename(file_name)))
    with open(os.path.join(result_dir, 'args'), 'w') as fp:
        fp.write(json.dumps(vars(args)))
    print(json.dumps(vars(args), sort_keys=True, indent=4))

    # Create Dataset
    logger.info('> Creating DataSet')
    train_transform = partial(transforms.transform_f,
                              random_angle=args.random_angle,
                              expand_ratio=args.expand_ratio,
                              crop_size=args.crop_size,
                              train=True)
    train = getdataset.getCcoreDataset(args.train_json, train_transform,
                                       args.train_mode)

    val_transform = partial(transforms.transform_f,
                            random_angle=args.random_angle,
                            expand_ratio=args.expand_ratio,
                            crop_size=args.crop_size,
                            train=True)
    val = getdataset.getCcoreDataset(args.train_json, val_transform,
                                     args.train_mode)

    # Create DataLoader
    logger.info('> create dataloader')
    train_loader = torch.utils.data.DataLoader(train,
                                               batch_size=args.batchsize,
                                               shuffle=True,
                                               num_workers=4)
    val_loader = torch.utils.data.DataLoader(val,
                                             batch_size=args.batchsize,
                                             shuffle=False,
                                             num_workers=4)

    # Training
    logger.info('> run training')
    best_prec = 0

    # Create Hyperdash Experiment
    logger.info('> Create Hyperdash Experiment {}'.format(
        args.experiment_name))
    exp = Experiment(args.experiment_name,
                     api_key_getter=utils.get_api_key_from_env)

    for epoch in tqdm(range(args.training_epoch)):

        training_result = training(train_loader, model, criterion, optimizer)
        val_result = validate(val_loader, model, criterion)

        result_str = 'epoch : {} / {}\
        main/loss : {:.3f}\
        main/acc : {:.3f}\
        val/loss : {:.3f}\
        val/acc : {:.3f}'.format(epoch, args.training_epoch,
                                 training_result['loss'],
                                 training_result['acc'], val_result['loss'],
                                 val_result['acc'])
        logger.info(result_str)
        exp.log(result_str)

        prec1 = val_result['acc']

        # remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec
        best_prec = max(prec1, best_prec)
        if is_best:
            save_checkpoint(
                state={
                    'epoch': epoch + 1,
                    #'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_prec': best_prec,
                    'optimizer': optimizer.state_dict(),
                },
                is_best=is_best,
                result_dir=result_dir)

        exp.metric('main/loss', training_result['loss'])
        exp.metric('val/loss', val_result['loss'])

    logger.info('> end training')
    exp.end()
Пример #13
0
def train_multidec(args):
    print("Training multidec")
    device = torch.device(args.gpu)
    df_image_data = pd.read_csv(os.path.join(CONFIG.CSV_PATH, args.image_csv),
                                index_col=0,
                                encoding='utf-8-sig')
    df_text_data = pd.read_csv(os.path.join(CONFIG.CSV_PATH, args.text_csv),
                               index_col=0,
                               encoding='utf-8-sig')

    df_label = pd.read_csv(os.path.join(CONFIG.CSV_PATH, args.label_csv),
                           index_col=0,
                           encoding='utf-8-sig')
    short_code_array = np.array(df_label.index)
    label_array = np.array(df_label['category'])
    n_clusters = np.max(label_array) + 1
    short_code_train, short_code_val, label_train, label_val = train_test_split(
        short_code_array, label_array, test_size=0.2, random_state=42)
    df_train = pd.DataFrame(data=label_train,
                            index=short_code_train,
                            columns=df_label.columns)
    df_val = pd.DataFrame(data=label_val,
                          index=short_code_val,
                          columns=df_label.columns)
    print("Loading dataset...")
    train_dataset, val_dataset = load_multi_csv_data(df_image_data,
                                                     df_text_data, df_train,
                                                     df_val, CONFIG)
    print("Loading dataset completed")

    image_encoder = MDEC_encoder(input_dim=args.input_dim,
                                 z_dim=args.latent_dim,
                                 n_clusters=n_clusters,
                                 encodeLayer=[500, 500, 2000],
                                 activation="relu",
                                 dropout=0)
    image_encoder.load_model(
        os.path.join(CONFIG.CHECKPOINT_PATH, "image_sdae_" +
                     str(args.latent_dim)) + ".pt")
    text_encoder = MDEC_encoder(input_dim=args.input_dim,
                                z_dim=args.latent_dim,
                                n_clusters=n_clusters,
                                encodeLayer=[500, 500, 2000],
                                activation="relu",
                                dropout=0)
    text_encoder.load_model(
        os.path.join(CONFIG.CHECKPOINT_PATH, "text_sdae_" +
                     str(args.latent_dim)) + ".pt")
    mdec = MultiDEC(device=device,
                    image_encoder=image_encoder,
                    text_encoder=text_encoder,
                    n_clusters=n_clusters)
    exp = Experiment("MDEC " + str(args.latent_dim), capture_io=True)
    print(mdec)

    for arg, value in vars(args).items():
        exp.param(arg, value)
    try:
        mdec.fit(train_dataset,
                 val_dataset,
                 lr=args.lr,
                 batch_size=args.batch_size,
                 num_epochs=args.epochs,
                 save_path=CONFIG.CHECKPOINT_PATH)
        print("Finish!!!")

    finally:
        exp.end()
validation_tuple = load_labeled_csv(validation_filename, feature_cols,
                                    label_cols)

# Normalize training and validation data by training statistics
train_mean = np.mean(train_tuple.features, axis=0)
train_std = np.std(train_tuple.features, axis=0)

train_tuple.features -= train_mean
train_tuple.features /= train_std

validation_tuple.features -= train_mean
validation_tuple.features /= train_std

logger.info('Finished importing and normalization of input data.')

# ------------------------ Training --------------------------------------- #

hd_exp = Experiment(hyper_param_str)

# Run backpropagation training.
df, best_error = train(train_tuple, validation_tuple, hyper_params, nb_epochs,
                       random_seed, hd_exp, deep_cal_dir + '/code/')

logger.info('Writing log dataframe to csv on disk.')
df.to_csv(hyper_param_str + '/log_file.csv')

# Finish Hyperdash experiment.
hd_exp.end()

logger.info("PROGRAM END.")
Пример #15
0
def run_pusher3dof(args, sim=True, vanilla=False):
    try:
        from hyperdash import Experiment

        hyperdash_support = True
    except:
        hyperdash_support = False

    env = NormalizedEnv(gym.make(args.env))

    torques = [1.0] * 3  # if real
    colored = False

    if sim:
        torques = [args.t0, args.t1, args.t2]
        colored = True

    if not vanilla:
        env.env._init(
            torques=torques,
            colored=colored
        )

    if args.seed > 0:
        np.random.seed(args.seed)
        env.seed(args.seed)

    nb_states = env.observation_space.shape[0]
    nb_actions = env.action_space.shape[0]

    agent = DDPG(nb_states, nb_actions, args)
    evaluate = Evaluator(
        args.validate_episodes,
        args.validate_steps,
        args.output,
        max_episode_length=args.max_episode_length
    )

    exp = None

    if args.mode == 'train':
        if hyperdash_support:
            prefix = "real"
            if sim: prefix = "sim"

            exp = Experiment("s2r-pusher3dof-ddpg-{}".format(prefix))
            import socket

            exp.param("host", socket.gethostname())
            exp.param("type", prefix)  # sim or real
            exp.param("vanilla", vanilla)  # vanilla or not
            exp.param("torques", torques)
            exp.param("folder", args.output)

            for arg in ["env", "max_episode_length", "train_iter", "seed", "resume"]:
                arg_val = getattr(args, arg)
                exp.param(arg, arg_val)

        train(args, args.train_iter, agent, env, evaluate,
              args.validate_steps, args.output,
              max_episode_length=args.max_episode_length, debug=args.debug, exp=exp)

        # when done
        exp.end()

    elif args.mode == 'test':
        test(args.validate_episodes, agent, env, evaluate, args.resume,
             visualize=args.vis, debug=args.debug, load_best=args.best)

    else:
        raise RuntimeError('undefined mode {}'.format(args.mode))
Пример #16
0
 def test_job():
     exp = Experiment("Exception experiment")
     time.sleep(0.1)
     raise Exception(expected_exception)
     exp.end()
Пример #17
0
def train_reconstruction(args):
    device = torch.device(args.gpu)
    print("Loading embedding model...")
    with open(
            os.path.join(CONFIG.DATASET_PATH, args.target_dataset,
                         'word_embedding.p'), "rb") as f:
        embedding_model = cPickle.load(f)
    with open(os.path.join(CONFIG.DATASET_PATH, args.target_dataset,
                           'word_idx.json'),
              "r",
              encoding='utf-8') as f:
        word_idx = json.load(f)
    print("Loading embedding model completed")
    print("Loading dataset...")
    train_dataset, val_dataset = load_text_data(args,
                                                CONFIG,
                                                word2idx=word_idx[1])
    print("Loading dataset completed")
    train_loader, val_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=args.shuffle),\
             DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False)

    # t1 = max_sentence_len + 2 * (args.filter_shape - 1)
    t1 = CONFIG.MAX_SENTENCE_LEN
    t2 = int(math.floor(
        (t1 - args.filter_shape) / 2) + 1)  # "2" means stride size
    t3 = int(math.floor((t2 - args.filter_shape) / 2) + 1)
    args.t3 = t3
    embedding = nn.Embedding.from_pretrained(
        torch.FloatTensor(embedding_model))
    text_encoder = text_model.ConvolutionEncoder(embedding, t3,
                                                 args.filter_size,
                                                 args.filter_shape,
                                                 args.latent_size)
    text_decoder = text_model.DeconvolutionDecoder(embedding, args.tau, t3,
                                                   args.filter_size,
                                                   args.filter_shape,
                                                   args.latent_size, device)
    if args.resume:
        print("Restart from checkpoint")
        checkpoint = torch.load(os.path.join(CONFIG.CHECKPOINT_PATH,
                                             args.resume),
                                map_location=lambda storage, loc: storage)
        start_epoch = checkpoint['epoch']
        text_encoder.load_state_dict(checkpoint['text_encoder'])
        text_decoder.load_state_dict(checkpoint['text_decoder'])
    else:
        print("Start from initial")
        start_epoch = 0

    text_autoencoder = text_model.TextAutoencoder(text_encoder, text_decoder)
    criterion = nn.NLLLoss().to(device)
    text_autoencoder.to(device)

    optimizer = AdamW(text_autoencoder.parameters(),
                      lr=1.,
                      weight_decay=args.weight_decay,
                      amsgrad=True)
    step_size = args.half_cycle_interval * len(train_loader)
    clr = cyclical_lr(step_size,
                      min_lr=args.lr,
                      max_lr=args.lr * args.lr_factor)
    scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, [clr])
    if args.resume:
        optimizer.load_state_dict(checkpoint['optimizer'])
        scheduler.load_state_dict(checkpoint['scheduler'])
    exp = Experiment("Text autoencoder " + str(args.latent_size),
                     capture_io=False)

    for arg, value in vars(args).items():
        exp.param(arg, value)
    try:
        text_autoencoder.train()

        for epoch in range(start_epoch, args.epochs):
            print("Epoch: {}".format(epoch))
            for steps, batch in enumerate(train_loader):
                torch.cuda.empty_cache()
                feature = Variable(batch).to(device)
                optimizer.zero_grad()
                prob = text_autoencoder(feature)
                loss = criterion(prob.transpose(1, 2), feature)
                loss.backward()
                optimizer.step()
                scheduler.step()

                if (steps * args.batch_size) % args.log_interval == 0:
                    input_data = feature[0]
                    single_data = prob[0]
                    _, predict_index = torch.max(single_data, 1)
                    input_sentence = util.transform_idx2word(
                        input_data.detach().cpu().numpy(),
                        idx2word=word_idx[0])
                    predict_sentence = util.transform_idx2word(
                        predict_index.detach().cpu().numpy(),
                        idx2word=word_idx[0])
                    print("Epoch: {} at {} lr: {}".format(
                        epoch, str(datetime.datetime.now()),
                        str(scheduler.get_lr())))
                    print("Steps: {}".format(steps))
                    print("Loss: {}".format(loss.detach().item()))
                    print("Input Sentence:")
                    print(input_sentence)
                    print("Output Sentence:")
                    print(predict_sentence)
                    del input_data, single_data, _, predict_index
                del feature, prob, loss

            exp.log("\nEpoch: {} at {} lr: {}".format(
                epoch, str(datetime.datetime.now()), str(scheduler.get_lr())))
            _avg_loss, _rouge_1, _rouge_2 = eval_reconstruction_with_rouge(
                text_autoencoder, word_idx[0], criterion, val_loader, device)
            exp.log("\nEvaluation - loss: {}  Rouge1: {} Rouge2: {}".format(
                _avg_loss, _rouge_1, _rouge_2))

            util.save_models(
                {
                    'epoch': epoch + 1,
                    'text_encoder': text_encoder.state_dict(),
                    'text_decoder': text_decoder.state_dict(),
                    'avg_loss': _avg_loss,
                    'Rouge1:': _rouge_1,
                    'Rouge2': _rouge_2,
                    'optimizer': optimizer.state_dict(),
                    'scheduler': scheduler.state_dict()
                }, CONFIG.CHECKPOINT_PATH,
                "text_autoencoder_" + str(args.latent_size))

        print("Finish!!!")

    finally:
        exp.end()
Пример #18
0
    def test_experiment_handles_numpy_numbers(self):
        nums_to_test = [
            ("int_", np.int_()),
            ("intc", np.intc()),
            ("intp", np.intp()),
            ("int8", np.int8()),
            ("int16", np.int16()),
            ("int32", np.int32()),
            ("int64", np.int64()),
            ("uint8", np.uint8()),
            ("uint16", np.uint16()),
            ("uint32", np.uint32()),
            ("uint64", np.uint64()),
            ("float16", np.float16()),
            ("float32", np.float32()),
            ("float64", np.float64()),
        ]
        # Make sure the SDK doesn't choke and JSON serialization works
        exp = Experiment("MNIST")
        for name, num in nums_to_test:
            exp.metric("test_metric_{}".format(name), num)
            exp.param("test_param_{}".format(name), num)
        exp.end()

        # Test params match what is expected
        params_messages = []
        for msg in server_sdk_messages:
            payload = msg["payload"]
            if "params" in payload:
                params_messages.append(payload)

        expected_params = []
        for name, num in nums_to_test:
            obj = {
                "params": {},
                "is_internal": False,
            }
            obj["params"]["test_param_{}".format(name)] = num
            obj["is_internal"] = False
            expected_params.append(obj)

        assert len(expected_params) == len(params_messages)
        for i, message in enumerate(params_messages):
            print(message)
            print(expected_params[i])
            assert message == expected_params[i]

        # Test metrics match what is expected
        metrics_messages = []
        for msg in server_sdk_messages:
            payload = msg["payload"]
            if "name" in payload:
                metrics_messages.append(payload)

        expected_metrics = []
        for name, num in nums_to_test:
            expected_metrics.append({
                "name": "test_metric_{}".format(name),
                "value": num,
                "is_internal": False,
            })

        assert len(expected_metrics) == len(metrics_messages)
        for i, message in enumerate(metrics_messages):
            assert message == expected_metrics[i]
Пример #19
0
    def test_experiment(self):
        # Run a test job via the Experiment API
        # Make sure log file is where is supposed to be
        # look at decorator
        # verify run start/stop is sent
        with patch("sys.stdout", new=StringIO()) as faked_out:
            exp = Experiment("MNIST")
            exp.log("test print")
            exp.param("batch size", 32)
            for i in exp.iter(2):
                time.sleep(1)
                exp.metric("accuracy", i * 0.2)
            time.sleep(0.1)
            exp.end()

        # Test params match what is expected
        params_messages = []
        for msg in server_sdk_messages:
            payload = msg["payload"]
            if "params" in payload:
                params_messages.append(payload)

        expect_params = [
            {
                "params": {
                    "batch size": 32,
                },
                "is_internal": False,
            },
            {
                "params": {
                    "hd_iter_0_epochs": 2,
                },
                "is_internal": True,
            },
        ]
        assert len(expect_params) == len(params_messages)
        for i, message in enumerate(params_messages):
            assert message == expect_params[i]

        # Test metrics match what is expected
        metrics_messages = []
        for msg in server_sdk_messages:
            payload = msg["payload"]
            if "name" in payload:
                metrics_messages.append(payload)

        expect_metrics = [
            {
                "is_internal": True,
                "name": "hd_iter_0",
                "value": 0
            },
            {
                "is_internal": False,
                "name": "accuracy",
                "value": 0
            },
            {
                "is_internal": True,
                "name": "hd_iter_0",
                "value": 1
            },
            {
                "is_internal": False,
                "name": "accuracy",
                "value": 0.2
            },
        ]
        assert len(expect_metrics) == len(metrics_messages)
        for i, message in enumerate(metrics_messages):
            assert message == expect_metrics[i]

        captured_out = faked_out.getvalue()
        assert "error" not in captured_out

        # Make sure correct API name / version headers are sent
        assert server_sdk_headers[0][API_KEY_NAME] == API_NAME_EXPERIMENT
        assert server_sdk_headers[0][
            VERSION_KEY_NAME] == get_hyperdash_version()

        # Make sure logs were persisted
        expect_logs = [
            "{ batch size: 32 }",
            "test print",
            "| Iteration 0 of 1 |",
            "| accuracy:   0.000000 |",
        ]

        log_dir = get_hyperdash_logs_home_path_for_job("MNIST")
        latest_log_file = max([
            os.path.join(log_dir, filename) for filename in os.listdir(log_dir)
        ],
                              key=os.path.getmtime)
        with open(latest_log_file, "r") as log_file:
            data = log_file.read()
            for log in expect_logs:
                assert_in(log, data)
        os.remove(latest_log_file)
Пример #20
0
def train_multidec(args):
    print("Training unidec")
    device = torch.device(args.gpu)
    df_input_data = pd.read_csv(os.path.join(
        CONFIG.CSV_PATH, args.prefix_csv + "_" + args.input_csv),
                                index_col=0,
                                encoding='utf-8-sig')

    df_label = pd.read_csv(os.path.join(CONFIG.CSV_PATH, args.label_csv),
                           index_col=0,
                           encoding='utf-8-sig')
    label_array = np.array(df_label['category'])
    n_clusters = np.max(label_array) + 1

    exp = Experiment(args.prefix_csv + "_" + args.target_modal + "_UDEC",
                     capture_io=True)

    for arg, value in vars(args).items():
        exp.param(arg, value)
    try:
        acc_list = []
        nmi_list = []
        f_1_list = []
        kf_count = 0
        for fold_idx in range(args.fold):
            print("Current fold: ", kf_count)
            df_train = pd.read_csv(os.path.join(
                CONFIG.CSV_PATH,
                "train_" + str(fold_idx) + "_category_label.csv"),
                                   index_col=0,
                                   encoding='utf-8-sig')
            df_test = pd.read_csv(os.path.join(
                CONFIG.CSV_PATH,
                "test_" + str(fold_idx) + "_category_label.csv"),
                                  index_col=0,
                                  encoding='utf-8-sig')
            print("Loading dataset...")
            full_dataset, train_dataset, val_dataset = load_semi_supervised_uni_csv_data(
                df_input_data, df_train, df_test, CONFIG)
            print("\nLoading dataset completed")

            encoder = UDEC_encoder(input_dim=args.input_dim,
                                   z_dim=args.latent_dim,
                                   n_clusters=n_clusters,
                                   encodeLayer=[500, 500, 2000],
                                   activation="relu",
                                   dropout=0)
            encoder.load_model(
                os.path.join(
                    CONFIG.CHECKPOINT_PATH, args.prefix_model + "_" +
                    args.target_modal + "_sdae_" + str(fold_idx)) + ".pt")
            # encoder.load_model(os.path.join(CONFIG.CHECKPOINT_PATH, "sampled_plus_labeled_scaled_" + args.target_modal + "_sdae_" + str(fold_idx)) + ".pt")
            udec = UniDEC(device=device,
                          encoder=encoder,
                          use_prior=args.use_prior,
                          n_clusters=n_clusters)
            udec.fit_predict(
                full_dataset,
                train_dataset,
                val_dataset,
                lr=args.lr,
                batch_size=args.batch_size,
                num_epochs=args.epochs,
                save_path=os.path.join(
                    CONFIG.CHECKPOINT_PATH, args.prefix_csv + "_" +
                    args.target_modal + "_udec_" + str(fold_idx)) + ".pt",
                tol=args.tol,
                kappa=args.kappa)
            acc_list.append(udec.acc)
            nmi_list.append(udec.nmi)
            f_1_list.append(udec.f_1)
            kf_count = kf_count + 1
        print("#Average acc: %.4f, Average nmi: %.4f, Average f_1: %.4f" %
              (np.mean(acc_list), np.mean(nmi_list), np.mean(f_1_list)))

    finally:
        exp.end()
Пример #21
0
def train_multidec(args):
    print("Training weight calc")
    device = torch.device(args.gpu)
    df_image_data = pd.read_csv(os.path.join(
        CONFIG.CSV_PATH, args.prefix_csv + "_pca_normalized_image_encoded_" +
        args.target_dataset + ".csv"),
                                index_col=0,
                                encoding='utf-8-sig')
    df_text_data = pd.read_csv(os.path.join(
        CONFIG.CSV_PATH,
        args.prefix_csv + "_text_doc2vec_" + args.target_dataset + ".csv"),
                               index_col=0,
                               encoding='utf-8-sig')

    df_label = pd.read_csv(os.path.join(CONFIG.CSV_PATH, args.label_csv),
                           index_col=0,
                           encoding='utf-8-sig')
    label_array = np.array(df_label['category'])
    n_clusters = np.max(label_array) + 1
    #n_clusters = args.n_clusters

    exp = Experiment(args.prefix_csv + "_ODEC", capture_io=True)

    for arg, value in vars(args).items():
        exp.param(arg, value)
    try:
        acc_list = []
        nmi_list = []
        f_1_list = []
        for fold_idx in range(args.start_fold, args.fold):
            print("Current fold: ", fold_idx)
            df_train = pd.read_csv(os.path.join(
                CONFIG.CSV_PATH, "train_" + str(fold_idx) + "_" +
                args.target_dataset + "_label.csv"),
                                   index_col=0,
                                   encoding='utf-8-sig')
            if args.sampled_n is not None:
                df_train = df_train.sample(n=args.sampled_n, random_state=42)
            df_test = pd.read_csv(os.path.join(
                CONFIG.CSV_PATH, "test_" + str(fold_idx) + "_" +
                args.target_dataset + "_label.csv"),
                                  index_col=0,
                                  encoding='utf-8-sig')
            print("Loading dataset...")
            full_dataset, train_dataset, val_dataset = load_semi_supervised_csv_data(
                df_image_data, df_text_data, df_train, df_test, CONFIG)
            print("\nLoading dataset completed")

            image_encoder = MDEC_encoder(input_dim=args.input_dim,
                                         z_dim=args.latent_dim,
                                         n_clusters=n_clusters,
                                         encodeLayer=[500, 500, 2000],
                                         activation="relu",
                                         dropout=0)
            image_encoder.load_model(
                os.path.join(
                    CONFIG.CHECKPOINT_PATH, args.prefix_model + "_image"
                    "_" + args.target_dataset + "_sdae_" +
                    str(args.latent_dim) + '_' + str(fold_idx)) + ".pt")
            # image_encoder.load_model(os.path.join(CONFIG.CHECKPOINT_PATH, "sampled_plus_labeled_scaled_image_sdae_" + str(fold_idx)) + ".pt")
            text_encoder = MDEC_encoder(input_dim=args.input_dim,
                                        z_dim=args.latent_dim,
                                        n_clusters=n_clusters,
                                        encodeLayer=[500, 500, 2000],
                                        activation="relu",
                                        dropout=0)
            text_encoder.load_model(
                os.path.join(
                    CONFIG.CHECKPOINT_PATH, args.prefix_model + "_text"
                    "_" + args.target_dataset + "_sdae_" +
                    str(args.latent_dim) + '_' + str(fold_idx)) + ".pt")
            # text_encoder.load_model(os.path.join(CONFIG.CHECKPOINT_PATH, "sampled_plus_labeled_scaled_text_sdae_" + str(fold_idx)) + ".pt")
            mdec = MultiDEC(device=device,
                            image_encoder=image_encoder,
                            text_encoder=text_encoder,
                            ours=args.ours,
                            use_prior=args.use_prior,
                            fl=args.fl,
                            n_clusters=n_clusters)

            mdec.load_model(
                os.path.join(
                    CONFIG.CHECKPOINT_PATH, args.prefix_csv + "_odec_" +
                    str(args.latent_dim) + '_' + str(fold_idx)) + ".pt")
            mdec.to(device)
            mdec.eval()
            wcalc = WeightCalc(device=device,
                               ours=args.ours,
                               use_prior=args.use_prior,
                               input_dim=args.input_dim,
                               n_clusters=n_clusters)
            wcalc.fit_predict(
                mdec,
                full_dataset,
                train_dataset,
                val_dataset,
                args,
                CONFIG,
                lr=args.lr,
                batch_size=args.batch_size,
                num_epochs=args.epochs,
                save_path=os.path.join(
                    CONFIG.CHECKPOINT_PATH, args.prefix_csv + "_wcalc_" +
                    str(args.latent_dim) + '_' + str(fold_idx)) + ".pt",
                tol=args.tol,
                kappa=args.kappa)
            acc_list.append(wcalc.acc)
            nmi_list.append(wcalc.nmi)
            f_1_list.append(wcalc.f_1)
        print("#Average acc: %.4f, Average nmi: %.4f, Average f_1: %.4f" %
              (np.mean(acc_list), np.mean(nmi_list), np.mean(f_1_list)))

    finally:
        exp.end()
Пример #22
0
def train_reconstruction(train_loader, test_loader, encoder, decoder, args):
    exp = Experiment("Reconstruction Training")
    #vis = Visualizations()
    vis = visdom.Visdom(port=8098)
    try:
        lr = args.lr
        encoder_opt = torch.optim.Adam(encoder.parameters(), lr=lr)
        decoder_opt = torch.optim.Adam(decoder.parameters(), lr=lr)

        encoder.train()
        decoder.train()
        steps = 0
        all_losses = []
        for epoch in range(1, args.epochs + 1):
            epoch_losses = []
            print("=======Epoch========")
            print(epoch)
            for batch in train_loader:
                feature = batch  # Variable
                if args.use_cuda:
                    encoder.cuda()
                    decoder.cuda()
                    feature = feature.cuda()

                encoder_opt.zero_grad()
                decoder_opt.zero_grad()

                h = encoder(feature)
                prob = decoder(h)
                reconstruction_loss = compute_cross_entropy(prob, feature)
                reconstruction_loss.backward()
                encoder_opt.step()
                decoder_opt.step()

                print("Epoch: {}".format(epoch))
                print("Steps: {}".format(steps))
                print("Loss: {}".format(reconstruction_loss.item() /
                                        args.sentence_len))
                exp.metric("Loss",
                           reconstruction_loss.item() / args.sentence_len)

                epoch_losses.append(reconstruction_loss.item())

                # check reconstructed sentence
                if steps % args.log_interval == 0:
                    print("Test!!")
                    input_data = feature[0]
                    single_data = prob[0]
                    _, predict_index = torch.max(single_data, 1)
                    input_sentence = transform_id2word(
                        input_data.data,
                        train_loader.dataset.index2word,
                        lang="en")
                    predict_sentence = transform_id2word(
                        predict_index.data,
                        train_loader.dataset.index2word,
                        lang="en")
                    print("Input Sentence:")
                    print(input_sentence)
                    print("Output Sentence:")
                    print(predict_sentence)

                steps += 1

            # Visualization data

            epoch_loss = sum(epoch_losses) / float(len(epoch_losses))
            all_losses.append(epoch_loss)
            if epoch == 1:
                # vis.plot_loss(np.mean(epoch_losses), steps)
                win = vis.line(X=np.array((epoch, )),
                               Y=np.array((epoch_loss, )),
                               name="train_loss",
                               opts=dict(xlabel='Epoch',
                                         ylabel='Loss',
                                         title='Train and Eval Loss'))
            else:
                vis.line(X=np.array((epoch, )),
                         Y=np.array((epoch_loss, )),
                         name="train_loss",
                         update="append",
                         win=win)
            #epoch_losses.clear()

            if epoch % args.test_interval == 0:
                eval_reconstruction(encoder, decoder, test_loader, args, vis,
                                    win, epoch)

            if epoch % args.lr_decay_interval == 0:
                # decrease learning rate
                lr = lr / 1.05
                encoder_opt = torch.optim.Adam(encoder.parameters(), lr=lr)
                decoder_opt = torch.optim.Adam(decoder.parameters(), lr=lr)
                encoder.train()
                decoder.train()

            if epoch % args.save_interval == 0:
                save_models(encoder, args.save_dir, "encoder", steps)
                save_models(decoder, args.save_dir, "decoder", steps)

            if epoch % 20 == 0:
                # finalization
                # save vocabulary
                #with open("word2index", "wb") as w2i, open("index2word", "wb") as i2w:
                #    pickle.dump(train_loader.dataset.word2index, w2i)
                #    pickle.dump(train_loader.dataset.index2word, i2w)
                torch.save(train_loader.dataset.index2word,
                           "/home/avshalom/ext/ae_cnn_code/index2word.pt")
                torch.save(train_loader.dataset.word2index,
                           "/home/avshalom/ext/ae_cnn_code/word2index.pt")

                # save models
                #save_models(encoder, args.save_dir, "encoder", "final")
                #save_models(decoder, args.save_dir, "decoder", "final")
                torch.save(
                    encoder,
                    "/home/avshalom/ext/ae_cnn_code/encoder_lsize_%s_epoch_%s.pt"
                    % (args.latent_size, epoch))

        print("Finish!!!")
    finally:
        exp.end()
Пример #23
0
def demo(args=None):
    from_file = get_api_key_from_file()
    from_env = get_api_key_from_env()
    api_key = from_env or from_file

    if not api_key:
        print("""
            `hyperdash demo` requires a Hyperdash API key. Try setting your API key in the
            HYPERDASH_API_KEY environment variable, or in a hyperdash.json file in the local
            directory or your user's home directory with the following format:

            {
                "api_key": "<YOUR_API_KEY>"
            }
        """)
        return

    print("""
Running the following program:

    from hyperdash import Experiment
    exp = Experiment("Dogs vs. Cats")

    # Parameters
    estimators = exp.param("Estimators", 500)
    epochs = exp.param("Epochs", 5)
    batch = exp.param("Batch Size", 64)

    for epoch in xrange(1, epochs + 1):
        accuracy = 1. - 1./epoch
        loss = float(epochs - epoch)/epochs
        print("Training model (epoch {})".format(epoch))
        time.sleep(1)

        # Metrics
        exp.metric("Accuracy", accuracy)
        exp.metric("Loss", loss)

    exp.end()
    """)
    from hyperdash import Experiment
    exp = Experiment("Dogs vs. Cats")

    # Parameters
    estimators = exp.param("Estimators", 500)
    epochs = exp.param("Epochs", 5)
    batch = exp.param("Batch Size", 64)

    for epoch in xrange(epochs):
        print("Training model (epoch {})".format(epoch))

        accuracy = 1. - 1. / (epoch + 1)
        loss = float(epochs - epoch) / (epochs + 1)

        # Metrics
        exp.metric("Accuracy", accuracy)
        exp.metric("Loss", loss)

        time.sleep(1)

    exp.end()
Пример #24
0
def train(train_list,
          test_list,
          lr,
          epoch,
          batchsize,
          insize,
          outsize,
          save_interval=10,
          weight_decay=5e-4,
          lr_step=10,
          model_name='resnet34',
          loss_name='focal_loss',
          metric_name='arc_margin',
          optim_name='adam',
          num_workers=4,
          print_freq=1e+6,
          debug=False):

    device = torch.device("cuda")

    train_dataset = Dataset(train_list,
                            mode='train',
                            insize=insize,
                            debug=debug)
    trainloader = torch.utils.data.DataLoader(train_dataset,
                                              batch_size=batchsize,
                                              shuffle=True,
                                              num_workers=num_workers)
    test_dataset = Dataset(test_list, mode='test', insize=insize, debug=debug)
    testloader = torch.utils.data.DataLoader(test_dataset,
                                             batch_size=batchsize,
                                             shuffle=False,
                                             num_workers=num_workers)
    class_num = train_dataset.get_classnum()

    print('{} train iters per epoch:'.format(len(trainloader)))
    print('{} test iters per epoch:'.format(len(testloader)))

    if loss_name == 'focal_loss':
        criterion = FocalLoss(gamma=2)
    else:
        criterion = torch.nn.CrossEntropyLoss()

    if model_name == 'resnet18':
        model = resnet_face18(insize, outsize)
    elif model_name == 'resnet34':
        model = resnet34(insize, outsize)
    elif model_name == 'resnet50':
        model = resnet50(insize, outsize)
    elif model_name == 'resnet101':
        model = resnet101(insize, outsize)
    elif model_name == 'resnet152':
        model = resnet152(insize, outsize)
    elif model_name == 'shuffle':
        model = ShuffleFaceNet(outsize)
    elif model_name == 'simplev1':
        model = CNNv1(insize, outsize, activation='relu', kernel_pattern='v1')
    else:
        raise ValueError('Invalid model name: {}'.format(model_name))

    if metric_name == 'add_margin':
        metric_fc = AddMarginProduct(outsize, class_num, s=30, m=0.35)
    elif metric_name == 'arc_margin':
        metric_fc = ArcMarginProduct(outsize,
                                     class_num,
                                     s=30,
                                     m=0.5,
                                     easy_margin=False)
    elif metric_name == 'sphere':
        metric_fc = SphereProduct(outsize, class_num, m=4)
    else:
        metric_fc = nn.Linear(outsize, class_num)

    # view_model(model, opt.input_shape)
    print(model)
    model.to(device)
    model = DataParallel(model)
    metric_fc.to(device)
    metric_fc = DataParallel(metric_fc)

    assert optim_name in ['sgd', 'adam']
    if optim_name == 'sgd':
        optimizer = torch.optim.SGD([{
            'params': model.parameters()
        }, {
            'params': metric_fc.parameters()
        }],
                                    lr=lr,
                                    weight_decay=weight_decay)
    elif optim_name == 'adam':
        optimizer = torch.optim.Adam([{
            'params': model.parameters()
        }, {
            'params': metric_fc.parameters()
        }],
                                     lr=lr,
                                     weight_decay=weight_decay)
    scheduler = StepLR(optimizer, step_size=lr_step, gamma=0.1)

    start = time.time()
    training_id = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
    hyperdash_exp = Experiment(training_id)
    checkpoints_dir = os.path.join('logs', training_id)
    if not os.path.exists(checkpoints_dir):
        os.makedirs(checkpoints_dir)
    logging_path = os.path.join(checkpoints_dir, 'history.csv')

    config = {}
    config['train_list'] = train_list
    config['test_list'] = test_list
    config['lr'] = lr
    config['epoch'] = epoch
    config['batchsize'] = batchsize
    config['insize'] = insize
    config['outsize'] = outsize
    config['save_interval'] = save_interval
    config['weight_decay'] = weight_decay
    config['lr_step'] = lr_step
    config['model_name'] = model_name
    config['loss_name'] = loss_name
    config['metric_name'] = metric_name
    config['optim_name'] = optim_name
    config['num_workers'] = num_workers
    config['debug'] = debug
    for k, v in config.items():
        hyperdash_exp.param(k, v, log=False)
    with open(os.path.join(checkpoints_dir, 'train_config.json'), 'w') as f:
        json.dump(config, f, indent=4)

    with open(logging_path, 'w') as f:
        f.write('epoch,time_elapsed,train_loss,train_acc,test_loss,test_acc\n')

    prev_time = datetime.datetime.now()
    for i in range(epoch):
        model.train()
        for ii, data in enumerate(tqdm(trainloader, disable=True)):
            data_input, label = data
            data_input = data_input.to(device)
            label = label.to(device).long()
            feature = model(data_input)
            output = metric_fc(feature, label)
            loss = criterion(output, label)
            pred_classes = np.argmax(output.data.cpu().numpy(), axis=1)
            acc = np.mean(
                (pred_classes == label.data.cpu().numpy()).astype(int))
            optimizer.zero_grad()
            loss.backward()

            #import pdb; pdb.set_trace()
            optimizer.step()
            #scheduler.step()

            iters = i * len(trainloader) + ii

            if iters % print_freq == 0 or debug:
                speed = print_freq / (time.time() - start)
                time_str = time.asctime(time.localtime(time.time()))
                print('{} train epoch {} iter {} {} iters/s loss {} acc {}'.
                      format(time_str, i, ii, speed, loss.item(), acc))

                start = time.time()

        model.eval()
        for ii, data in enumerate(tqdm(testloader, disable=True)):
            data_input, label = data
            data_input = data_input.to(device)
            label = label.to(device).long()
            feature = model(data_input)
            output = metric_fc(feature, label)
            test_loss = criterion(output, label)
            output = np.argmax(output.data.cpu().numpy(), axis=1)
            test_acc = np.mean(
                (output == label.data.cpu().numpy()).astype(int))
            #test_acc = np.mean((torch.argmax(output, dim=1) == label).type(torch.int32))

        if i % save_interval == 0 or i == epoch:
            save_model(model.module, checkpoints_dir, model_name, i)
            save_model(metric_fc.module, checkpoints_dir, metric_name, i)

        new_time = datetime.datetime.now()
        with open(logging_path, 'a') as f:
            f.write('{},{},{},{},{},{}\n'.format(
                i, (new_time - prev_time).total_seconds(), loss.item(), acc,
                test_loss.item(), test_acc))
        prev_time = datetime.datetime.now()

        hyperdash_exp.metric('train_loss', loss.item(), log=False)
        hyperdash_exp.metric('train_acc', acc, log=False)
        hyperdash_exp.metric('test_loss', test_loss.item(), log=False)
        hyperdash_exp.metric('test_acc', test_acc, log=False)

    hyperdash_exp.end()
    print('Finished {}'.format(training_id))
Пример #25
0
        if TRAIN:
            optimizer.step()

        loss.detach_()
        net.hidden[0].detach_()
        net.hidden[1].detach_()

        printEpisodeLoss(epoch_idx, episode_idx, loss_episode, diff_episode,
                         len(x))

        loss_epoch += loss_epi
        diff_epoch += diff_episode

    printEpochLoss(epoch_idx, episode_idx, loss_epoch, diff_epoch)
    if TRAIN:
        saveModel(state=net.state_dict(),
                  epoch=epoch_idx,
                  episode_idx=episode_idx,
                  loss_epoch=loss_epoch,
                  diff_epoch=diff_epoch,
                  is_best=(loss_epoch < min(loss_history)))
        loss_history.append(loss_epoch)
    else:
        print(old_model_string)
        break

# Cleanup and mark that the experiment successfully completed
if hyperdash_support:
    exp.end()
Пример #26
0
def train_multidec_transductive(args):
    print("Training multidec")
    device = torch.device(args.gpu)
    df_image_data = pd.read_csv(os.path.join(
        CONFIG.CSV_PATH, args.prefix_csv + "_" + args.image_csv),
                                index_col=0,
                                encoding='utf-8-sig')
    df_text_data = pd.read_csv(os.path.join(
        CONFIG.CSV_PATH, args.prefix_csv + "_" + args.text_csv),
                               index_col=0,
                               encoding='utf-8-sig')

    df_label = pd.read_csv(os.path.join(CONFIG.CSV_PATH, args.label_csv),
                           index_col=0,
                           encoding='utf-8-sig')
    label_array = np.array(df_label['category'])
    n_clusters = np.max(label_array) + 1

    exp = Experiment(args.prefix_csv + "_MDEC", capture_io=True)

    for arg, value in vars(args).items():
        exp.param(arg, value)
    try:

        df_train = pd.read_csv(os.path.join(CONFIG.CSV_PATH, args.trans_csv),
                               index_col=0,
                               encoding='utf-8-sig')
        print("Loading dataset...")
        full_dataset, train_dataset = load_transductive_semi_supervised_csv_data(
            df_image_data, df_text_data, df_label, df_train, CONFIG)
        print("\nLoading dataset completed")

        image_encoder = MDEC_encoder(input_dim=args.input_dim,
                                     z_dim=args.latent_dim,
                                     n_clusters=n_clusters,
                                     encodeLayer=[500, 500, 2000],
                                     activation="relu",
                                     dropout=0)
        image_encoder.load_model(
            os.path.join(
                CONFIG.CHECKPOINT_PATH, args.prefix_model + "_image"
                "_" + args.target_dataset + "_sdae_" + str(args.latent_dim) +
                "_all.pt"))
        # image_encoder.load_model(os.path.join(CONFIG.CHECKPOINT_PATH, "sampled_plus_labeled_scaled_image_sdae_" + str(fold_idx)) + ".pt")
        text_encoder = MDEC_encoder(input_dim=args.input_dim,
                                    z_dim=args.latent_dim,
                                    n_clusters=n_clusters,
                                    encodeLayer=[500, 500, 2000],
                                    activation="relu",
                                    dropout=0)
        text_encoder.load_model(
            os.path.join(
                CONFIG.CHECKPOINT_PATH, args.prefix_model + "_text"
                "_" + args.target_dataset + "_sdae_" + str(args.latent_dim) +
                "_all.pt"))
        # text_encoder.load_model(os.path.join(CONFIG.CHECKPOINT_PATH, "sampled_plus_labeled_scaled_text_sdae_" + str(fold_idx)) + ".pt")
        mdec = MultiDEC(device=device,
                        image_encoder=image_encoder,
                        text_encoder=text_encoder,
                        ours=args.ours,
                        use_prior=args.use_prior,
                        n_clusters=n_clusters)

        if args.ssldec:
            mdec.fit_predict_transductive_ssldec(
                full_dataset,
                train_dataset,
                args,
                CONFIG,
                lr=args.lr,
                batch_size=args.batch_size,
                num_epochs=args.epochs,
                save_path=os.path.join(
                    CONFIG.CHECKPOINT_PATH, args.prefix_csv + "_mdec_" +
                    str(args.latent_dim) + "_all.pt"),
                tol=args.tol,
                kappa=args.kappa)
        else:
            mdec.fit_predict_transductive(
                full_dataset,
                train_dataset,
                args,
                CONFIG,
                lr=args.lr,
                batch_size=args.batch_size,
                num_epochs=args.epochs,
                save_path=os.path.join(
                    CONFIG.CHECKPOINT_PATH, args.prefix_csv + "_mdec_" +
                    str(args.latent_dim) + "_all.pt"),
                tol=args.tol,
                kappa=args.kappa)
        print("#Average acc: %.4f, Average nmi: %.4f, Average f_1: %.4f" %
              (mdec.acc, mdec.nmi, mdec.f_1))

    finally:
        exp.end()
Пример #27
0
def train_reconstruction(train_loader, test_loader, encoder, decoder, args):
    exp = Experiment("Reconstruction Training")
    try:
        lr = args.lr
        encoder_opt = torch.optim.Adam(encoder.parameters(), lr=lr)
        decoder_opt = torch.optim.Adam(decoder.parameters(), lr=lr)

        encoder.train()
        decoder.train()
        steps = 0
        for epoch in range(1, args.epochs+1):
            print("=======Epoch========")
            print(epoch)
            for batch in train_loader:
                feature = Variable(batch)
                if args.use_cuda:
                    encoder.cuda()
                    decoder.cuda()
                    feature = feature.cuda()

                encoder_opt.zero_grad()
                decoder_opt.zero_grad()

                h = encoder(feature)
                prob = decoder(h)
                reconstruction_loss = compute_cross_entropy(prob, feature)
                reconstruction_loss.backward()
                encoder_opt.step()
                decoder_opt.step()

                steps += 1
                print("Epoch: {}".format(epoch))
                print("Steps: {}".format(steps))
                print("Loss: {}".format(reconstruction_loss.data[0] / args.sentence_len))
                exp.metric("Loss", reconstruction_loss.data[0] / args.sentence_len)
                # check reconstructed sentence
                if steps % args.log_interval == 0:
                    print("Test!!")
                    input_data = feature[0]
                    single_data = prob[0]
                    _, predict_index = torch.max(single_data, 1)
                    input_sentence = util.transform_id2word(input_data.data, train_loader.dataset.index2word, lang="en")
                    predict_sentence = util.transform_id2word(predict_index.data, train_loader.dataset.index2word, lang="en")
                    print("Input Sentence:")
                    print(input_sentence)
                    print("Output Sentence:")
                    print(predict_sentence)

            if steps % args.test_interval == 0:
                eval_reconstruction(encoder, decoder, test_loader, args)


            if epoch % args.lr_decay_interval == 0:
                # decrease learning rate
                lr = lr / 5
                encoder_opt = torch.optim.Adam(encoder.parameters(), lr=lr)
                decoder_opt = torch.optim.Adam(decoder.parameters(), lr=lr)
                encoder.train()
                decoder.train()

            if epoch % args.save_interval == 0:
                util.save_models(encoder, args.save_dir, "encoder", steps)
                util.save_models(decoder, args.save_dir, "decoder", steps)

        # finalization
        # save vocabulary
        with open("word2index", "wb") as w2i, open("index2word", "wb") as i2w:
            pickle.dump(train_loader.dataset.word2index, w2i)
            pickle.dump(train_loader.dataset.index2word, i2w)

        # save models
        util.save_models(encoder, args.save_dir, "encoder", "final")
        util.save_models(decoder, args.save_dir, "decoder", "final")

        print("Finish!!!")
    finally:
        exp.end()
Пример #28
0
def train_reconstruction(args):
    device = torch.device(args.gpu)
    print("Loading dataset...")
    train_dataset, val_dataset = load_imgseq_data(args, CONFIG)
    print("Loading dataset completed")
    train_loader, val_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=args.shuffle),\
             DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False)

    #imgseq_encoder = imgseq_model.RNNEncoder(args.embedding_dim, args.num_layer, args.latent_size, bidirectional=True)
    #imgseq_decoder = imgseq_model.RNNDecoder(CONFIG.MAX_SEQUENCE_LEN, args.embedding_dim, args.num_layer, args.latent_size, bidirectional=True)
    t1 = CONFIG.MAX_SEQUENCE_LEN
    t2 = int(math.floor((t1 - 3) / 1) + 1)  # "2" means stride size
    t3 = int(math.floor((t2 - 3) / 1) + 1)
    imgseq_encoder = imgseq_model.ConvolutionEncoder(
        embedding_dim=args.embedding_dim,
        t3=t3,
        filter_size=300,
        filter_shape=3,
        latent_size=1000)
    imgseq_decoder = imgseq_model.DeconvolutionDecoder(
        embedding_dim=args.embedding_dim,
        t3=t3,
        filter_size=300,
        filter_shape=3,
        latent_size=1000)
    if args.resume:
        print("Restart from checkpoint")
        checkpoint = torch.load(os.path.join(CONFIG.CHECKPOINT_PATH,
                                             args.resume),
                                map_location=lambda storage, loc: storage)
        start_epoch = checkpoint['epoch']
        imgseq_encoder.load_state_dict(checkpoint['imgseq_encoder'])
        imgseq_decoder.load_state_dict(checkpoint['imgseq_decoder'])
    else:
        print("Start from initial")
        start_epoch = 0

    imgseq_autoencoder = imgseq_model.ImgseqAutoEncoder(
        imgseq_encoder, imgseq_decoder)
    criterion = nn.MSELoss().to(device)
    imgseq_autoencoder.to(device)

    optimizer = AdamW(imgseq_autoencoder.parameters(),
                      lr=1.,
                      weight_decay=args.weight_decay,
                      amsgrad=True)
    step_size = args.half_cycle_interval * len(train_loader)
    clr = cyclical_lr(step_size,
                      min_lr=args.lr,
                      max_lr=args.lr * args.lr_factor)
    scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, [clr])

    if args.resume:
        optimizer.load_state_dict(checkpoint['optimizer'])
        scheduler.load_state_dict(checkpoint['scheduler'])

    exp = Experiment("Image-sequence autoencoder " + str(args.latent_size),
                     capture_io=False)

    for arg, value in vars(args).items():
        exp.param(arg, value)
    try:
        imgseq_autoencoder.train()

        for epoch in range(start_epoch, args.epochs):
            print("Epoch: {}".format(epoch))
            for steps, batch in enumerate(train_loader):
                torch.cuda.empty_cache()
                feature = Variable(batch).to(device)
                optimizer.zero_grad()
                feature_hat = imgseq_autoencoder(feature)
                loss = criterion(feature_hat, feature)
                loss.backward()
                optimizer.step()
                scheduler.step()

                if (steps * args.batch_size) % args.log_interval == 0:
                    print("Epoch: {} at {} lr: {}".format(
                        epoch, str(datetime.datetime.now()),
                        str(scheduler.get_lr())))
                    print("Steps: {}".format(steps))
                    print("Loss: {}".format(loss.detach().item()))
                    input_data = feature[0]
                del feature, feature_hat, loss

            exp.log("\nEpoch: {} at {} lr: {}".format(
                epoch, str(datetime.datetime.now()), str(scheduler.get_lr())))
            _avg_loss = eval_reconstruction(imgseq_autoencoder, criterion,
                                            val_loader, device)
            exp.log("\nEvaluation - loss: {}".format(_avg_loss))

            util.save_models(
                {
                    'epoch': epoch + 1,
                    'imgseq_encoder': imgseq_encoder.state_dict(),
                    'imgseq_decoder': imgseq_decoder.state_dict(),
                    'avg_loss': _avg_loss,
                    'optimizer': optimizer.state_dict(),
                    'scheduler': scheduler.state_dict()
                }, CONFIG.CHECKPOINT_PATH,
                "imgseq_autoencoder_" + str(args.latent_size))

        print("Finish!!!")

    finally:
        exp.end()
Пример #29
0
class Experiment:
    @logger.read
    def __init__(self, dataset_name, model_name, optimizer_name, trial_num):
        """
        :param dataset_name: name of the dataset
        :type dataset_name: str
        :param model_name: name of the model
        :type model_name: str
        :param optimizer_name: name of the optimizer
        :type optimizer_name: str
        :param trial_num: current number of repeated trials
        :type trial_num: int
        """
        # get optimized hyperparameters
        with open(
                f'../params/{dataset_name}_{model_name}_{optimizer_name}/result.json'
        ) as f:
            params = json.load(f)

        # get instances
        self.dataset = Datasets.get(dataset_name)
        self.model = Models.get(model_name, dataset=self.dataset)
        self.optimizer = Optimizers.get(optimizer_name, params=params)

        # get config
        with open('./config.json') as f:
            config = json.load(f)

        # get constants
        c = config['constants'][dataset_name][model_name]
        self.loss = c['loss']
        self.batch_size = c['batch_size']
        self.epochs = c['epochs']

        # configure and initialize directory
        d = self.main_dir = f'../data/{dataset_name}_{model_name}_{optimizer_name}/trial{trial_num}'
        if os.path.exists(d):
            shutil.rmtree(d)
        os.makedirs(d)

        # configure hyperdash experiment
        self.hd_exp = HyperdashExperiment(
            f'{dataset_name}',
            api_key_getter=lambda: config['hyperdash']['api_key'])
        self.hd_exp.param('dataset_name', dataset_name)
        self.hd_exp.param('model_name', model_name)
        self.hd_exp.param('optimizer_name', optimizer_name)
        self.hd_exp.param('trial_num', trial_num)

        for k, v in params.items():
            self.hd_exp.param(k, v)

        # set callbacks
        self.callbacks = [
            Hyperdash(['accuracy', 'loss', 'val_accuracy', 'val_loss'],
                      self.hd_exp),
            TensorBoard(log_dir=f'{self.main_dir}/tensorboard'),
            TimeLogger(filename=f'{self.main_dir}/time.csv'),
            CSVLogger(filename=f'{self.main_dir}/result.csv', append=True)
        ]

    @logger.write
    def begin(self):
        # get data
        (x_train, y_train), (x_test, y_test) = self.dataset.get_batch()

        # start learning
        self.model.compile(loss=self.loss,
                           optimizer=self.optimizer,
                           metrics=['accuracy'])
        self.model.fit(x_train,
                       y_train,
                       batch_size=self.batch_size,
                       epochs=self.epochs,
                       callbacks=self.callbacks,
                       validation_split=0.2,
                       verbose=2)

        # save final scores
        score = self.model.evaluate(x_test, y_test, verbose=1)
        with open(f'{self.main_dir}/test.json', 'w') as f:
            json.dump({
                'test loss': score[0],
                'test accuracy': score[1]
            },
                      f,
                      indent=4)

        # stop hyperdash experiment
        self.hd_exp.end()
Пример #30
0
    def run(self):
        #
        #	hyperdash
        #
        exp = Experiment("faceid")
        hd_callback = Hyperdash(exp=exp)

        # print(self.create_couple("RGB-D_Face_database/faceid_train/"))
        # print(self.create_couple_rgbd("RGB-D_Face_database/faceid_val/"))
        self.create_couple_rgbd("RGB-D_Face_database/faceid_val/")
        # print(self.create_wrong("RGB-D_Face_database/faceid_train/"))
        # print(self.create_wrong_rgbd("RGB-D_Face_database/faceid_val/")[0].shape)

        # quit()

        #
        # create network
        #
        img_input = Input(shape=(200, 200, 4))

        x = Convolution2D(64, (5, 5), strides=(2, 2),
                          padding='valid')(img_input)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(x)

        x = self.fire(x, squeeze=16, expand=16)
        x = self.fire(x, squeeze=16, expand=16)
        x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(x)

        x = self.fire(x, squeeze=32, expand=32)
        x = self.fire(x, squeeze=32, expand=32)
        x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(x)

        x = self.fire(x, squeeze=48, expand=48)
        x = self.fire(x, squeeze=48, expand=48)
        x = self.fire(x, squeeze=64, expand=64)
        x = self.fire(x, squeeze=64, expand=64)
        x = Dropout(0.2)(x)

        x = Convolution2D(512, (1, 1), padding='same')(x)
        out = Activation('relu')(x)

        modelsqueeze = Model(img_input, out)
        print("\nmodel squeeze summary")
        modelsqueeze.summary()
        plot_model(modelsqueeze, show_shapes=True, to_file='model_squeeze.png')

        im_in = Input(shape=(200, 200, 4))
        x1 = modelsqueeze(im_in)
        x1 = Flatten()(x1)
        x1 = Dense(512, activation="relu")(x1)
        x1 = Dropout(0.2)(x1)
        feat_x = Dense(128, activation="linear")(x1)
        feat_x = Lambda(lambda x: K.l2_normalize(x, axis=1))(feat_x)

        model_top = Model(inputs=[im_in], outputs=feat_x)
        print("\nmodel top summary")
        model_top.summary()
        plot_model(model_top, show_shapes=True, to_file='model_top.png')

        im_in1 = Input(shape=(200, 200, 4))
        im_in2 = Input(shape=(200, 200, 4))
        feat_x1 = model_top(im_in1)
        feat_x2 = model_top(im_in2)
        lambda_merge = Lambda(self.euclidean_distance)([feat_x1, feat_x2])

        model_final = Model(inputs=[im_in1, im_in2], outputs=lambda_merge)
        print("\nmodel final summary")
        model_final.summary()
        plot_model(model_final, show_shapes=True, to_file='model_final.png')

        adam = Adam(lr=0.001)
        sgd = SGD(lr=0.001, momentum=0.9)

        model_final.compile(optimizer=adam, loss=self.contrastive_loss)

        #
        # plot model
        #
        # print("write model summary png...")
        # plot_model(model_final, show_shapes=True, to_file='model.png')
        # print("write model summary png...done")

        #
        # generator
        #
        gen = self.generator(16)
        val_gen = self.val_generator(4)

        #
        # checkpoint
        # 各エポック終了後にモデルを保存
        # 	file_name = str(datetime.datetime.now()).split(' ')[0] + '_{epoch:02d}.hdf5'
        # 	filepath = os.path.join(save_dir, file_name)
        #
        """
		keras.callbacks.ModelCheckpoint(
			filepath,
			monitor='val_loss',
			verbose=0,
			save_best_only=False,
			save_weights_only=False,
			mode='auto',
			period=1)
		"""
        drive_dir = 'RGB-D_Face_database/snapshot/'
        base_file_name = 'model'
        checkpointer = keras.callbacks.ModelCheckpoint(
            # filepath=drive_dir+base_file_name+'.{epoch:02d}-loss{loss:.2f}-acc{acc:.2f}-vloss{val_loss:.2f}-vacc{val_acc:.2f}.hdf5',
            filepath=drive_dir + base_file_name +
            '.epoch{epoch:03d}-loss{loss:.4f}-val_loss{val_loss:.4f}.hdf5',
            # filepath=drive_dir+base_file_name+'.{epoch:02d}-{val_loss:.2f}.hdf5',
            verbose=1,
            save_best_only=True,
            # monitor='val_acc',
            monitor='val_loss',
            mode='auto')

        #
        # ProgressbarLogger
        #
        pbarl = keras.callbacks.ProgbarLogger(count_mode='samples')

        #
        # CSV Logger
        # 各エポックの結果をcsvファイルに保存する (Google Driveでは学習終了まで反映されない。localに保存)
        #
        """
		keras.callbacks.CSVLogger(
			filename,
			separator=',',
			append=False)
		"""
        csv_logger = keras.callbacks.CSVLogger('./xxx.log')

        #
        # reduce LR on plateau
        # 評価値の改善が止まった時に学習率を減らす
        #
        """
		keras.callbacks.ReduceLROnPlateau(
			monitor='val_loss',
			factor=0.1,
			patience=10,
			verbose=0,
			mode='auto',
			epsilon=0.0001,
			cooldown=0,
			min_lr=0)
		"""
        reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='val_loss',
                                                      factor=0.2,
                                                      patience=5,
                                                      min_lr=0.001)

        #
        # early stopping
        #
        """
		keras.callbacks.EarlyStopping(
			monitor='val_loss',
			min_delta=0,
			patience=0,
			verbose=0,
			mode='auto')
		"""
        early_stop = keras.callbacks.EarlyStopping(monitor='val_loss',
                                                   verbose=1,
                                                   patience=100)

        #
        # tensor board
        #
        """
		keras.callbacks.TensorBoard(
			log_dir='./logs',
			histogram_freq=0,
			batch_size=32,
			write_graph=True,
			write_grads=False,
			write_images=False,
			embeddings_freq=0,
			embeddings_layer_names=None,
			embeddings_metadata=None)
		$tensorboard --logdir=/full_path_to_your_logs
		"""
        # tensorboard = keras.callbacks.TensorBoard(log_dir="RGB-D_Face_database/log", histogram_freq=1)
        # old_session = KTF.get_session()
        # new_session = tf.Session('')
        # KTF.set_session(new_session)

        #
        # generator
        #
        """
		outputs = model_final.fit_generator(
			generator, 
			steps_per_epoch=None, 
			epochs=1, 
			verbose=1, 
			callbacks=None, 
			validation_data=None, 
			validation_steps=None, 
			class_weight=None, 
			max_queue_size=10, 
			workers=1, 
			use_multiprocessing=False, 
			shuffle=True, 
			initial_epoch=0)
		"""
        # steps_per_epoch=30,
        # epochs=50,
        # callbacks=[checkpointer, csv_logger, reduce_lr, early_stop, tensorboard, hd_callback],
        # validation_steps=20
        # fit_generator(
        #	self,
        #	generator,
        #	steps_per_epoch=None,
        #	epochs=1,
        #	verbose=1,
        #	callbacks=None,
        #	validation_data=None,
        #	validation_steps=None,
        #	class_weight=None,
        #	max_queue_size=10,
        #	workers=1,
        #	use_multiprocessing=False,
        #	shuffle=True,
        #	initial_epoch=0)
        outputs = model_final.fit_generator(
            gen,
            steps_per_epoch=10,  # 30
            epochs=1,  # 50
            verbose=1,
            # callbacks=[checkpointer],
            # callbacks=[checkpointer, hd_callback],
            callbacks=[
                checkpointer, csv_logger, early_stop, reduce_lr, hd_callback
            ],
            # callbacks=[checkpointer, pbarl, csv_logger, early_stop, reduce_lr, hd_callback],
            # callbacks=[checkpointer, csv_logger, early_stop, reduce_lr, tensorboard, hd_callback],
            # pickle_safe=True,
            validation_data=val_gen,
            validation_steps=20,
            # workers=8,
            use_multiprocessing=True)  # 20

        #
        # model save
        #
        print('saving model_final...')
        model_final.save("RGB-D_Face_database/snapshot/model_final.h5")
        print('saving model_final...done')

        #
        # model test
        #
        """
		"""
        cop = self.create_couple("RGB-D_Face_database/faceid_val/")
        score = model_final.evaluate([
            cop[0].reshape((1, 200, 200, 4)), cop[1].reshape((1, 200, 200, 4))
        ], np.array([0.]))
        print('Test score(couple):', score[0])
        print('Test accuracy(couple):', score[1])

        cop = self.create_wrong_rgbd("RGB-D_Face_database/faceid_val/")
        score = model_final.predict([
            cop[0].reshape((1, 200, 200, 4)), cop[1].reshape((1, 200, 200, 4))
        ])
        print('Test score(wrong_rgbd):', score[0])
        print('Test accuracy(wrong_rgbd):', score[1])

        #
        # save model (architecture,json)
        #
        print('save the architecture of a model...')
        json_string = model_final.to_json()
        open(drive_dir + base_file_name + 'model.json', 'w').write(json_string)
        # open(os.path.join(drive_dir+base_file_name,'model.json'), 'w').write(json_string)
        print('save the architecture of a model...done')

        print('save weights...')
        yaml_string = model_final.to_yaml()
        open(drive_dir + base_file_name + 'model.yaml', 'w').write(yaml_string)
        # open(os.path.join(drive_dir+base_file_name,'model.yaml'), 'w').write(yaml_string)
        model_final.save_weights(drive_dir + base_file_name +
                                 'model_weights.hdf5')
        # model_final.save_weights(os.path.join(drive_dir+base_file_name,'model_weights.hdf5'))
        print('save weights...done')

        # debug
        print('debug: load_model...')
        del model_final
        model_final = keras.models.load_model(
            "RGB-D_Face_database/snapshot/model_final.h5",
            # custom_objects={
            # 'euclidean_distance': euclidean_distance,
            # 'contrastive_loss': contrastive_loss,
            # 'l2_normalize': K.l2_normalize
            # },
            compile=False)
        print('debug: load_model...done')

        #
        # tensorboard
        #
        # KTF.set_session(old_session)
        # print('tensorboard done')

        #
        # hyperdash
        #
        print('hyperdash done')
        exp.end()