示例#1
0
def training_main(args):
    # process input file

    print('step 1: validating file name')
    print(args.train_file)
    input_file = utl.validate_file(args.train_file)

    print('step 2: splitting training and testing data sets')
    user_map, item_map, tr_sparse, test_sparse, item_ID_mapping_dd = utl.split_train_and_test(
        args, input_file)

    # train model
    print('step 3: training the model')
    output_row, output_col = model.train_model(tr_sparse, params)

    # save trained model to job directory
    print('step 4: saving the model')
    utl.save_model(args, user_map, item_map, output_row, output_col,
                   item_ID_mapping_dd)

    # log results
    print('step 5: get results')
    train_rmse = model.get_rmse(output_row, output_col, tr_sparse)
    test_rmse = model.get_rmse(output_row, output_col, test_sparse)

    if args.hyperparam_tune:
        # write test_rmse metric for hyperparam tuning
        util.write_hptuning_metric(args, test_rmse)

    tf.logging.info('train RMSE = %.2f' % train_rmse)
    tf.logging.info('test RMSE = %.2f' % test_rmse)
示例#2
0
def train_model(model, criterion, optimizer, scheduler, train_loader, train_dataset, test_loader, config):
    # Train the Model
    num_epochs = config['num_epochs']
    batch_size = config['batchsize']
    model_type = config['model_type']
    top_test_acc = 0
    for epoch in range(num_epochs):
        for i, (images, labels) in enumerate(train_loader):
            if torch.cuda.is_available():
                images = images.cuda()
                labels = labels.cuda()
            images = Variable(images)
            labels = Variable(labels)
            # Forward + Backward + Optimize
            outputs = model(images)
            loss = criterion(outputs, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        test_accuracy = test_model(test_loader)
        scheduler.step()
#        import ipdb as pdb; pdb.set_trace()
        print('[{0}] Test Accuracy of the model on the 10000 test images: {1} , lr:{2}, loss:{3}'.format(epoch, test_accuracy, get_lr(optimizer), float(loss.data.cpu().numpy())))
        if test_accuracy > top_test_acc :
            utility.save_model(config=config, model=model)
            top_test_acc = test_accuracy
def train_gan(discriminator: nn.Module, generator: nn.Module, name, train_loader, val_loader=None, epochs=50, num_imgs=9, Optimizer_fn=optim.Adam, loss_type=0, k=1):
    os.makedirs(os.path.join(CKPT_PATH, name), exist_ok=True)
    with open(os.path.join(CKPT_PATH, name, "loss_log.txt"),"w") as loss_log:
        pass
    latent_sample = torch.randn(size=(num_imgs, generator.latent_size,), device=device)
    latent_sample[0] = 0
    losses = dict(
        train_d_real_losses=[],
        train_d_fake_losses=[],
        train_g_losses=[],
        val_d_real_losses=[],
    )
    d_optimizer, d_scheduler = get_optimizer_scheduler(Optimizer_fn, discriminator.parameters())
    g_optimizer, g_scheduler = get_optimizer_scheduler(Optimizer_fn, generator.parameters())

    for epoch in tqdm.trange(epochs):  # loop over the dataset multiple times
        train_d_real_loss, train_d_fake_loss, train_g_loss = train_epoch(discriminator, generator, train_loader, device, d_optimizer, d_scheduler, g_optimizer, g_scheduler, loss_type, k)
        val_d_real_loss = val_epoch(discriminator, generator, val_loader, device, loss_type, 50)

        losses["train_d_real_losses"].append(train_d_real_loss)
        losses["train_d_fake_losses"].append(train_d_fake_loss)
        losses["train_g_losses"].append(train_g_loss)
        losses["val_d_real_losses"].append(val_d_real_loss)
        loss_msg = f"epoch {epoch}: train_d_real_loss={train_d_real_loss}, train_d_fake_loss={train_d_fake_loss}, train_g_loss={train_g_loss}, val_d_real_loss={val_d_real_loss}"
        logging.info(loss_msg)
        with open(os.path.join(CKPT_PATH, name, "loss_log.txt"),"a") as loss_log:
            loss_log.write(loss_msg)
        plot_losses(losses, name)
        generator.eval().to(device)
        save_image(generator(latent_sample).detach().to("cpu"), "generated_images", name, epoch)   
        save_model(discriminator, generator, name)
示例#4
0
def api_call_tfidf():

    document_title_content_save_path = os.path.join(STORAGE, 'apicall.txt')
    test_document_title_content_save_path = os.path.join(
        STORAGE, 'test_apicall.txt')

    tfidf_save_path = os.path.join(STORAGE,
                                   'apicall_tfidf/webpage.tfidf.model')

    rawy, raw_documents = load_documents(document_title_content_save_path)
    labels, docs, filesid = load_test_documents(
        test_document_title_content_save_path)

    documents = raw_documents + docs

    print(len(documents), len(documents))

    #   model = TfidfVectorizer(min_df = 4,decode_error ='ignore',stop_words='english',ngram_range=(1, 1),max_features=50000)
    model = TfidfVectorizer(decode_error='ignore',
                            stop_words='english',
                            ngram_range=(2, 3),
                            max_features=5000)  #apicall
    #     model = TfidfVectorizer(decode_error ='ignore',stop_words='english',ngram_range=(1, 1),max_features=1500)

    x = model.fit_transform(documents)

    save_model(model, tfidf_save_path)

    display_scores(model, x, 'apicall_tfidf')
示例#5
0
def main():
    print("Reading in the training data")
    train = utility.load_data("training", "finalinput")
    truth = np.ravel(np.array(train['votes_useful_log']))
    del train['votes_useful_log']

    print("Extracting features and training review text model")
    classifier = get_pipeline()
    classifier.fit(train.values[:,1:], np.array(truth))

    print("Saving the classifier")
    utility.save_model(classifier, "fullsgd_model_rev{}".format(revision))
 def select_best_model():
     min_mae, best_model_type, best_regressor = float('inf'), '', None
     for model_class in [ModelLinearRegression, ModelNeuralNetwork, ModelRandomForest, ModelXGBoost, ModelAdaBoost]:
         model_type = model_class.__name__[5:]
         print('\nBuilding {}...'.format(model_type))
         builder = model_class(sub_dataframe_for_modeling, features)
         mae, regressor = builder.process_modeling()
         if model_type != 'NeuralNetwork':
             utility.save_model(athletes_name, activity, model_type, regressor)
             if mae < min_mae: min_mae, best_model_type, best_regressor = mae, model_type, regressor
     print("\n***Best model for activity '{}' is {} with mean absolute error: {}***"
       .format(activity, best_model_type, min_mae))
     if best_regressor is not None:
         best_model_dict[activity] = best_model_type
示例#7
0
def main():
    revision = 4

    print("Reading in the training data")
    train = utility.load_data("training", "rtext")
    inds = random.sample(range(len(train)), 100000)
    mtrain = train.ix[inds]

    print("Extracting features and training review text model")
    classifier = get_pipeline()
    classifier.fit(list(mtrain['rtext_bcat']), 
                   list(mtrain['votes_useful_log']))

    print("Saving the classifier")
    utility.save_model(classifier, "train_rtext_rev{}".format(revision))
示例#8
0
 def train(self, model):
     # prepare dataset
     dataset = PerQuestionDataset(self.args, 'train', self.word2id, self.rela2id)
     if self.args.dataset.lower() == 'wq' or self.args.dataset.lower() == 'wq_train1test2':
         train_dataset, valid_dataset = random_split(dataset, 0.9, 0.1)
     else:
         train_dataset = dataset
         valid_dataset = PerQuestionDataset(self.args, 'valid', self.word2id, self.rela2id)
     datas = DataLoader(dataset=train_dataset, batch_size=1, shuffle=True, num_workers=18, 
             pin_memory=False, collate_fn=quick_collate)
     self._set_optimizer(model)
     earlystop_counter, min_valid_metric = 0, 100
     # training
     for epoch in range(0, self.args.epoch_num):
         model = model.train().cuda()
         total_loss, total_acc = 0.0, 0.0
         loss_count, acc_count = 0, 0
         total_rc_acc, total_td_acc = 0.0, 0.0
         rc_count, td_count = 0, 0
         for trained_num, data in enumerate(datas):
             if self.args.framework == 'baseline':
                 # baseline is equivalent to single step relation choose
                 index, ques, tuples = data
                 self.optimizer.zero_grad(); model.zero_grad(); 
                 loss, acc, score = self._single_step_rela_choose(model, ques, tuples)
                 if loss != 0:
                     loss.backward(); self.optimizer.step()
                 total_loss += (loss.data if loss!=0 else 0); loss_count += 1
                 total_acc += acc; acc_count += 1
                 print(f'\r{self.args.framework}_{self.args.model}({self.args.dynamic}) {datetime.now().strftime("%Y-%m-%d %H:%M:%S")} Epoch {epoch} {trained_num}/{len(datas)} Loss:{total_loss/loss_count:.5f} Acc:{total_acc/acc_count:.4f}', end='')
             else:
                 model, loss, acc, score, label, rc_acc, td_acc = self._execute_UHop(model, data, 'train')
                 total_loss += loss[0].data; loss_count += loss[1]
                 total_acc += acc; acc_count += 1
                 total_rc_acc += rc_acc[0]; rc_count += rc_acc[1]
                 total_td_acc += td_acc[0]; td_count += td_acc[1]
                 print(f'\r{self.args.framework}_{self.args.model}({self.args.dynamic}) {datetime.now().strftime("%Y-%m-%d %H:%M:%S")} Epoch {epoch} {trained_num}/{len(datas)} Loss:{total_loss/loss_count:.5f} Acc:{total_acc/acc_count:.4f} RC_Acc:{total_rc_acc/rc_count:.2f} TD_Acc:{total_td_acc/td_count:.2f}', end='')
         # validation for examing if early stop
         valid_loss, valid_acc, valid_score, _ = self.evaluate(model, 'valid', valid_dataset)
         if valid_loss < min_valid_metric:
             min_valid_metric = valid_loss
             earlystop_counter = 0
             save_model(model, self.args.path)
         else:
             earlystop_counter += 1
         if earlystop_counter > self.args.earlystop_tolerance:
             break
     return model
 def train(self):
     logging.debug(f"Start training of {self.name}")
     self._decoder = self._decoder_class().to(device).train()
     self._encoder = self._encoder_class().to(device).train()
     train_autoencoder(self.encoder,
                       self.decoder,
                       self.train_loader,
                       device,
                       self.name,
                       self.encoder.latent_size,
                       epochs=self.epochs,
                       Optimizer=self.Optimizer,
                       normal_loss_factor=self.normal_loss,
                       val_loader=self.val_loader)
     save_model(self.encoder, self.decoder, self.name)
     self._trained = True
示例#10
0
def model_build_main(storage, datasetpath, featureheaders, targethearders):

    name, clf, modelinfo = model_build(datasetpath, featureheaders,
                                       targethearders)

    summarypath = os.path.join(storage, 'model/lightgbm.model.esimate')
    modelsavepath = os.path.join(storage, 'model/lightgbm.model')
    modelinfosavepath = os.path.join(storage, 'model/lightgbm.modelinfo')

    txt = json.dumps(modelinfo, indent=4)

    write_to_file(modelinfosavepath, txt.encode('utf-8'), mode='wb+')

    save_model(clf, modelsavepath)

    print('model summary:')

    print('save model summary->', summarypath)
    write_to_file(summarypath, txt.encode('utf-8'), mode='wb+')
示例#11
0
def main():

    word_vectors = {}

    with open(GOOGLE_ENGLISH_WORD_PATH) as f:
        lines = f.readlines()
        for line in lines:
            line = line.strip('\n')
            if line:
                word = line
                print(line)
                word_vectors[word] = None

    model = gensim.models.KeyedVectors.load_word2vec_format(
        GOOGLE_WORD2VEC_MODEL, binary=True)

    for word in word_vectors:
        try:
            v = model.wv[word]
            word_vectors[word] = v
        except:
            pass

    save_model(word_vectors, GOOGLE_WORD_FEATURE)
示例#12
0
def api_call_tfidf_1():

    document_title_content_save_path = os.path.join(STORAGE, 'apicall.txt')
    test_document_title_content_save_path = os.path.join(
        STORAGE, 'test_apicall.txt')

    tfidf_save_path = os.path.join(STORAGE,
                                   'apicall_tfidf_1/webpage.tfidf.model')

    rawy, raw_documents = load_documents(document_title_content_save_path)
    labels, docs, filesid = load_test_documents(
        test_document_title_content_save_path)

    documents = raw_documents + docs

    model = TfidfVectorizer(decode_error='ignore',
                            stop_words='english',
                            ngram_range=(1, 1))

    x = model.fit_transform(documents)

    save_model(model, tfidf_save_path)

    display_scores(model, x, 'apicall_tfidf_1')
def run_main(config):
    train_loss_total_avg = 0.0
    train_transform = transforms.Compose([
        CenterCrop2D((200, 200)),
        ElasticTransform(alpha_range=(28.0, 30.0),
                         sigma_range=(3.5, 4.0),
                         p=0.3),
        RandomAffine(degrees=4.6, scale=(0.98, 1.02), translate=(0.03, 0.03)),
        RandomTensorChannelShift((-0.10, 0.10)),
        ToTensor(),
        NormalizeInstance(),
    ])

    val_transform = transforms.Compose([
        CenterCrop2D((200, 200)),
        ToTensor(),
        NormalizeInstance(),
    ])

    #    import ipdb as pdb; pdb.set_trace()

    # Here we assume that the SC GM Challenge data is inside the folder
    # "data" and it was previously resampled.
    gmdataset_train = SCGMChallenge2DTrain(root_dir="data",
                                           subj_ids=range(1, 9),
                                           transform=train_transform,
                                           slice_filter_fn=SliceFilter())

    # Here we assume that the SC GM Challenge data is inside the folder
    # "../data" and it was previously resampled.
    gmdataset_val = SCGMChallenge2DTrain(root_dir="data",
                                         subj_ids=range(9, 11),
                                         transform=val_transform)

    train_loader = DataLoader(gmdataset_train,
                              batch_size=16,
                              shuffle=True,
                              pin_memory=True,
                              collate_fn=mt_collate,
                              num_workers=1)

    val_loader = DataLoader(gmdataset_val,
                            batch_size=16,
                            shuffle=True,
                            pin_memory=True,
                            collate_fn=mt_collate,
                            num_workers=1)

    # import ipdb as pdb; pdb.set_trace()

    utility.create_log_file(config)
    utility.log_info(
        config, "{0}\nStarting experiment {1}\n{0}\n".format(
            50 * "=", utility.get_experiment_name(config)))
    model = Unet(drop_rate=0.4, bn_momentum=0.1, config=config)
    # print(model)
    #summary(model, (3, 224, 224))

    # import ipdb as pdb; pdb.set_trace()
    if config['operation_mode'].lower(
    ) == "retrain" or config['operation_mode'].lower() == "inference":
        print("Using a trained model...")
        model.load_state_dict(torch.load(config['trained_model']))
    elif config["operation_mode"].lower() == "visualize":
        print("Visualizing weights...")
        if cuda:
            model.load_state_dict(torch.load(config['trained_model']))
        else:
            model.load_state_dict(
                torch.load(config['trained_model'], map_location='cpu'))
        v.visualize_model(model, config)
        return

    # import ipdb as pdb; pdb.set_trace()
    if cuda:
        model.cuda()

    num_epochs = config["num_epochs"]
    initial_lr = config["lr"]

    optimizer = optim.Adam(model.parameters(), lr=initial_lr)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, num_epochs)

    betas = torch.linspace(3.0, 8.0, num_epochs)
    best_dice = 0
    # import ipdb as pdb; pdb.set_trace()
    writer = SummaryWriter(log_dir=utility.get_experiment_dir(config))
    for epoch in tqdm(range(1, num_epochs + 1)):
        start_time = time.time()

        if not (config['operation_mode'].lower() == "inference"):
            scheduler.step()

            lr = scheduler.get_lr()[0]
            model.beta = betas[epoch - 1]  # for ternary net, set beta
            writer.add_scalar('learning_rate', lr, epoch)

            model.train()
            train_loss_total = 0.0
            num_steps = 0
            for i, batch in enumerate(train_loader):
                input_samples, gt_samples = batch["input"], batch["gt"]
                if cuda:
                    var_input = input_samples.cuda()
                    var_gt = gt_samples.cuda()
                else:
                    var_input = input_samples
                    var_gt = gt_samples
                preds = model(var_input)

                loss = dice_loss(preds, var_gt)
                # if epoch == 1 and i == len(train_loader) - 1:
                #     import ipdb as pdb; pdb.set_trace()
                # if epoch == 4 and i == len(train_loader) - 1:
                #     import ipdb as pdb; pdb.set_trace()
                train_loss_total += loss.item()

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                num_steps += 1

                if epoch % 5 == 0:
                    grid_img = vutils.make_grid(input_samples,
                                                normalize=True,
                                                scale_each=True)
                    writer.add_image('Input', grid_img, epoch)

                    grid_img = vutils.make_grid(preds.data.cpu(),
                                                normalize=True,
                                                scale_each=True)
                    writer.add_image('Predictions', grid_img, epoch)

                    grid_img = vutils.make_grid(gt_samples,
                                                normalize=True,
                                                scale_each=True)
                    writer.add_image('Ground Truth', grid_img, epoch)

        if not (config['operation_mode'].lower() == "inference"):
            train_loss_total_avg = train_loss_total / num_steps

        # import ipdb as pdb; pdb.set_trace()
        model.eval()
        val_loss_total = 0.0
        num_steps = 0

        metric_fns = [
            dice_score, hausdorff_score, precision_score, recall_score,
            specificity_score, intersection_over_union, accuracy_score
        ]

        metric_mgr = MetricManager(metric_fns)

        for i, batch in enumerate(val_loader):
            # import ipdb as pdb; pdb.set_trace()
            input_samples, gt_samples = batch["input"], batch["gt"]

            with torch.no_grad():
                if cuda:
                    var_input = input_samples.cuda()
                    var_gt = gt_samples.cuda()
                else:
                    var_input = input_samples
                    var_gt = gt_samples

                preds = model(var_input)
                loss = dice_loss(preds, var_gt)
                val_loss_total += loss.item()

            # Metrics computation
            gt_npy = gt_samples.numpy().astype(np.uint8)
            gt_npy = gt_npy.squeeze(axis=1)

            preds = preds.data.cpu().numpy()
            # if np.isnan(preds).any():
            #     import ipdb as pdb; pdb.set_trace()
            preds = threshold_predictions(preds)
            preds = preds.astype(np.uint8)
            preds = preds.squeeze(axis=1)

            metric_mgr(preds, gt_npy)

            num_steps += 1
        metrics_dict = metric_mgr.get_results()
        metric_mgr.reset()

        writer.add_scalars('metrics', metrics_dict, epoch)

        val_loss_total_avg = val_loss_total / num_steps

        if not (config['operation_mode'].lower() == "inference"):
            writer.add_scalars('losses', {'train_loss': train_loss_total_avg},
                               epoch)
            writer.add_scalars('losses', {
                'val_loss': val_loss_total_avg,
                'train_loss': train_loss_total_avg
            }, epoch)

        end_time = time.time()
        total_time = end_time - start_time
        log_str = "Epoch {} took {:.2f} seconds dice_score={}.".format(
            epoch, total_time, metrics_dict["dice_score"])
        utility.log_info(config, log_str)
        tqdm.write(log_str)
        if metrics_dict["dice_score"] > best_dice:
            best_dice = metrics_dict["dice_score"]
            utility.save_model(model=model, config=config)
    if not (config['operation_mode'].lower() == "inference"):
        utility.save_model(model=model, config=config)
示例#14
0
# Next, five fully connected layers
model.add(Dense(1164, activation='relu'))

model.add(Dropout(keep_prob))
model.add(Dense(100, activation='relu'))

model.add(Dense(50, activation='relu'))

model.add(Dense(10, activation='relu'))

model.add(Dense(1))

model.summary()

#model.compile(optimizer=Adam(learning_rate), loss="mse",metrics=['accuracy'] )
model.compile(optimizer=Adam(learning_rate), loss="mse")

# create two generators for training and validation
train_data_gen = utility.generate_train_batch()
validation_data_gen = utility.generate_val_batch()

history = model.fit_generator(train_data_gen,
                              samples_per_epoch=num_train_images,
                              nb_epoch=number_of_epochs,
                              validation_data=validation_data_gen,
                              nb_val_samples=num_val_images,
                              verbose=1)

# finally save our model and weights
utility.save_model(model)
示例#15
0
def run_main(config):
    dataset_base_path = "./data/"
    target_path = natsorted(glob(dataset_base_path + '/mask/*.png'))
    image_paths = natsorted(glob(dataset_base_path + '/img/*.png'))
    target_val_path = natsorted(glob(dataset_base_path + '/val_mask/*.png'))
    image_val_path = natsorted(glob(dataset_base_path + '/val_img/*.png'))

    nih_dataset_train = EMdataset(image_paths=image_paths,
                                  target_paths=target_path)
    nih_dataset_val = EMdataset(image_paths=image_val_path,
                                target_paths=target_val_path)

    #import ipdb as pdb; pdb.set_trace()
    train_loader = DataLoader(nih_dataset_train,
                              batch_size=16,
                              shuffle=True,
                              num_workers=1)
    val_loader = DataLoader(nih_dataset_val,
                            batch_size=16,
                            shuffle=True,
                            num_workers=1)
    model = m.Unet(drop_rate=0.4, bn_momentum=0.1, config=config)
    if config['operation_mode'].lower(
    ) == "retrain" or config['operation_mode'].lower() == "inference":
        print("Using a trained model...")
        model.load_state_dict(torch.load(config['trained_model']))
    elif config["operation_mode"].lower() == "visualize":
        print("Using a trained model...")
        if cuda:
            model.load_state_dict(torch.load(config['trained_model']))
        else:
            model.load_state_dict(
                torch.load(config['trained_model'], map_location='cpu'))
        v.visualize_model(model, config)
        return

    # import ipdb as pdb; pdb.set_trace()
    if cuda:
        model.cuda()
        print('gpu_activate')

    num_epochs = config["num_epochs"]
    initial_lr = config["lr"]
    experiment_path = config["log_output_dir"] + config['experiment_name']
    output_image_dir = experiment_path + "/figs/"

    betas = torch.linspace(3.0, 8.0, num_epochs)

    # criterion  = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=initial_lr)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, num_epochs)

    # import ipdb as pdb; pdb.set_trace()
    writer = SummaryWriter(log_dir=utility.get_experiment_dir(config))
    best_score = 0
    for epoch in tqdm(range(1, num_epochs + 1)):
        start_time = time.time()

        scheduler.step()

        lr = scheduler.get_lr()[0]
        model.beta = betas[epoch - 1]  # for ternary net, set beta
        writer.add_scalar('learning_rate', lr, epoch)

        model.train()
        train_loss_total = 0.0
        num_steps = 0
        capture = True
        for i, batch in enumerate(train_loader):
            input_samples, gt_samples = batch[0], batch[1]

            if cuda:
                var_input = input_samples.cuda()
                var_gt = gt_samples.cuda()
            else:
                var_input = input_samples
                var_gt = gt_samples
            preds = model(var_input)
            loss = dice_loss(preds, var_gt)
            # import ipdb as pdb; pdb.set_trace()
            var_gt = var_gt.float()
            train_loss_total += loss.item()

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            num_steps += 1
            if epoch % 1 == 0 and capture:
                capture = False
                input_samples, gt_samples = get_samples(
                    image_val_path, target_val_path, 4)
                if cuda:
                    input_samples = input_samples.cuda()
                preds = model(input_samples)
                input_samples = input_samples.data.cpu().numpy()
                preds = preds.data.cpu().numpy()
                # import ipdb as pdb; pdb.set_trace()
                save_image(input_samples[0][0], gt_samples[0][0], preds[0][0],
                           epoch, 0, output_image_dir)

        train_loss_total_avg = train_loss_total / num_steps

        # import ipdb as pdb; pdb.set_trace()
        model.eval()
        val_loss_total = 0.0
        num_steps = 0

        metric_fns = [
            dice_score, hausdorff_score, precision_score, recall_score,
            specificity_score, intersection_over_union, accuracy_score
        ]

        metric_mgr = MetricManager(metric_fns)

        for i, batch in enumerate(val_loader):
            input_samples, gt_samples = batch[0], batch[1]

            with torch.no_grad():
                if cuda:
                    var_input = input_samples.cuda()
                    var_gt = gt_samples.cuda(async=True)
                else:
                    var_input = input_samples
                    var_gt = gt_samples

                preds = model(var_input)
                loss = dice_loss(preds, var_gt)
                # loss = criterion(preds, var_gt)
                # loss = weighted_bce_loss(preds, var_gt, 0.5, 2.5)
                val_loss_total += loss.item()

            gt_npy = gt_samples.data.cpu().numpy()  #.astype(np.uint8)
            gt_npy = gt_npy.squeeze(axis=1)

            preds = preds.data.cpu().numpy()
            preds = threshold_predictions(preds)
            # preds = preds.astype(np.uint8)
            preds = preds.squeeze(axis=1)

            metric_mgr(preds, gt_npy)

            num_steps += 1

        metrics_dict = metric_mgr.get_results()
        metric_mgr.reset()

        writer.add_scalars('metrics', metrics_dict, epoch)

        val_loss_total_avg = val_loss_total / num_steps

        writer.add_scalars('losses', {
            'val_loss': val_loss_total_avg,
            'train_loss': train_loss_total_avg
        }, epoch)

        end_time = time.time()
        total_time = end_time - start_time
        msg = "Epoch {} took {:.2f} seconds dice_score={}. precision={} iou={} loss_train={} val_loss={}".format(
            epoch, total_time, metrics_dict["dice_score"],
            metrics_dict["precision_score"],
            metrics_dict["intersection_over_union"], train_loss_total_avg,
            val_loss_total_avg)
        utility.log_info(config, msg)
        tqdm.write(msg)
        writer.add_scalars('losses', {'train_loss': train_loss_total_avg},
                           epoch)

        if metrics_dict["dice_score"] > best_score:
            best_score = metrics_dict["dice_score"]
            utility.save_model(model=model, config=config)

    if not (config['operation_mode'].lower() == "inference"):
        utility.save_model(model=model, config=config)
    classification_report_df.to_csv(config.generic_path +
                                    "validation_classification_report.csv")

    graph["train_epoch_loss_list"].append(train_epoch_loss)
    graph['train_epoch_accu_list'].append(train_epoch_accu)
    graph['valid_epoch_loss_list'].append(valid_epoch_loss)
    graph['valid_epoch_accu_list'].append(valid_epoch_accu)

    validation_f1_score_macro = f1_score(y_valid_actual,
                                         y_valid_predicted,
                                         average="macro")
    print("validation_f1_score_macro: {}".format(validation_f1_score_macro))
    graph['validation_f1_score_macro_list'].append(validation_f1_score_macro)

    if valid_epoch_accu > best_validation_accuracy:

        # Creating check point
        utility.save_model(EPOCH=epoch,
                           model=model,
                           optimizer=optimizer,
                           LOSS=train_epoch_loss,
                           ACCURACY=train_epoch_accu,
                           PATH=config.checkpoint_path)

        best_validation_accuracy = valid_epoch_accu
        graph["best_validation_accuracy"] = best_validation_accuracy

    print("graph: {}".format(graph))
    utility.save_graph(graph_data=graph, path=config.generic_path)
    bot.telegram_bot_sendtext(graph)
示例#17
0
        test_accuracy = test_model(test_loader)
        scheduler.step()
        # import ipdb as pdb; pdb.set_trace()
        print(
            '[{0}] Test Accuracy of the model on the 10000 test images: {1} , lr:{2}, loss:{3}'
            .format(epoch, test_accuracy, get_lr(optimizer), loss.item()))
        # print('Test Accuracy of the model on the 10000 test images: {0}'.format(test_accuracy))


if __name__ == '__main__':
    args = utility.parse_args()
    model_type = args['modelype']
    config_file = args['configfile']
    config = config.Configuration(model_type, config_file)
    print(config.get_config_str())
    config = config.config_dict
    model, criterion, optimizer, scheduler = build_model(config)
    # import ipdb as pdb; pdb.set_trace()
    if torch.cuda.is_available():
        model = model.cuda()
    train_loader, test_loader, train_dataset, test_dataset = utility.load_dataset(
        config)
    if config['operation_mode'] == "inference":
        model_inference(test_loader, config)
    else:
        train_model(model, criterion, optimizer, scheduler, train_loader,
                    train_dataset, test_loader, config)
    # test_model(test_loader)
    # Save the Trained Model
    utility.save_model(config=config, model=model)
    train_loss = 0
    step = 0
    for images, labels in loader['trainloader']:
        step += 1
        model.train()
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()

        yhat = model(images)
        loss = criterion(yhat, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

        if step % 30 == 0:
            val_loss, val_acc = validation(model, criterion,
                                           loader['valloader'], device)
            print('Step: {}; Validation Loss: {}; Validation Accuracy: {}'.
                  format(step, val_loss, val_acc))

    print('Epoch: {}/{}; Training Loss: {}'.format(
        epoch + 1, epochs, train_loss / len(loader['trainloader'])))

# Test Accuracy:
test_loss, test_acc = validation(model, criterion, loader['testloader'],
                                 device)

print('Test Loss: {}; Test Accuracy: {}'.format(test_loss, test_acc))

save_model(model, categories_to_names, args.save_dir)
def train_stacked_ae(encoder: nn.Module,
                     decoder: nn.Module,
                     train_loader,
                     device,
                     name,
                     latent_size=4,
                     epochs=50,
                     num_imgs=9,
                     Optimizer=optim.Adam):
    train_losses = []
    bunch = get_bunch(train_loader)
    latent_sample = torch.randn(size=(
        num_imgs,
        latent_size,
    ), device=device)
    latent_sample[0] = 0
    original_images = next(iter(train_loader))[0].to(device)
    original_images = original_images[:min(num_imgs,
                                           original_images.size()[0])]
    save_img(
        get_grid(original_images.to("cpu")),
        os.path.join(CKPT_PATH, name, "compressed_images", "original.png"))
    criterion = nn.MSELoss()
    scheduler = optimizer = Optimizer(
        list(encoder.parameters()) + list(decoder.parameters()))
    while True:
        try:
            optimizer = optimizer.optimizer
        except:
            break
    if scheduler == optimizer:
        scheduler = False
    for epoch in tqdm.trange(epochs):  # loop over the dataset multiple times
        torch.cuda.empty_cache()
        decoder = decoder.train().to(device)
        encoder = encoder.train().to(device)
        running_loss = 0.0
        print(epoch)
        for i, data in tqdm.tqdm(enumerate(train_loader, 0)):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data
            inputs = inputs.to(device=device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = decoder(encoder(inputs, epoch), epoch)
            loss = criterion(outputs, inputs)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
        if scheduler:
            scheduler.step()
        train_losses.append(running_loss / i)
        logging.info(f"epoch {epoch}: loss={running_loss/i}")
        decoder = decoder.eval()
        save_image(
            decoder(encoder(original_images, epoch), epoch).detach().to("cpu"),
            "compressed_images", name, epoch)
        save_image(
            decoder(latent_sample).detach().to("cpu"), "generated_images",
            name, epoch)
        torch.cuda.empty_cache()
        mean, cov = latent_space_pca(encoder, bunch)
        save_image(
            decoder(
                normal_to_pc(latent_sample, mean.to(device),
                             cov.to(device))).detach().to("cpu"),
            "pca_gen_images", name, epoch)
        del mean, cov
        torch.cuda.empty_cache()
        save_labeled_pca_gen_images(encoder, decoder, latent_sample, bunch,
                                    name, epoch)
        save_model(encoder, decoder, name)
    return train_losses
示例#20
0
# Train the model on the training data and evaluate it on the validation data. Save the model and the
# resulting plots in the output path.
training_losses, training_accuracies, validation_losses, validation_accuracies = [], [], [], []
for epoch in range(1, input_arguments.epochs + 1):
    training_results = utility.run_epoch(fasttext, training_iterator,
                                         loss_function, optimizer)
    training_losses.append(training_results[0])
    training_accuracies.append(training_results[1])

    validation_results = utility.run_epoch(fasttext, validation_iterator,
                                           loss_function)
    validation_losses.append(validation_results[0])
    validation_accuracies.append(validation_results[1])

    print(
        "Epoch: {} | Training Loss: {:.4f} | Training Accuracy: {:.2f} | Validation Loss: {:.4f} |"
        " Validation Accuracy: {:.2f}".format(epoch, training_losses[-1],
                                              training_accuracies[-1],
                                              validation_losses[-1],
                                              validation_accuracies[-1]))
utility.save_training_plots(input_arguments.output_path, training_losses,
                            training_accuracies, validation_losses,
                            validation_accuracies)
utility.save_model(fasttext, input_arguments.output_path)

# Evaluate the model on the test data and save the results.
test_loss, test_accuracy = utility.run_epoch(fasttext, test_iterator,
                                             loss_function)
utility.save_test_results(input_arguments.output_path, test_loss,
                          test_accuracy)
        sys.exit()
    if number_of_epochs <= 0 or number_of_epochs > 10000:
        print("The number of epochs value should be between 1 and 10000")
        sys.exit()


check_arguments()
print("The training will be processed using", device)

# data processing
datasets, loaders = utility.data_processing(data_dir)
# building model, criterion, and optimizer
model = utility.model_build(arch, hidden_size, output)
criterion, optimizer = utility.crit_optim(model, lr)
# training the model
model = utility.train_model(loaders["train"], loaders["valid"], model,
                            criterion, optimizer, device, number_of_epochs, 40)
# print accuracy
__, test_accuracy = utility.loss_accuracy(loaders["test"], model, criterion,
                                          device)
print("The model accuracy on the test set is: {:.2f}%".format(test_accuracy *
                                                              100))
# save the model
model_name = utility.save_model(model,
                                datasets["train"],
                                optimizer,
                                arch,
                                output,
                                hidden_size,
                                model_dir="/")
print("The model was saved in:", model_name)
示例#22
0
def run_main(config):
    train_transform = transforms.Compose([
        CenterCrop2D((200, 200)),
        ElasticTransform(alpha_range=(28.0, 30.0),
                         sigma_range=(3.5, 4.0),
                         p=0.3),
        RandomAffine(degrees=4.6, scale=(0.98, 1.02), translate=(0.03, 0.03)),
        RandomTensorChannelShift((-0.10, 0.10)),
        ToTensor(),
        NormalizeInstance(),
    ])

    val_transform = transforms.Compose([
        CenterCrop2D((200, 200)),
        ToTensor(),
        NormalizeInstance(),
    ])
    # import ipdb as pdb; pdb.set_trace()
    dataset_base_path = "/export/tmp/hemmat/datasets/em_challenge/"
    target_path = natsort.natsorted(glob.glob(dataset_base_path +
                                              'mask/*.PNG'))
    image_paths = natsort.natsorted(glob.glob(dataset_base_path +
                                              'data/*.PNG'))
    target_val_path = natsort.natsorted(
        glob.glob(dataset_base_path + 'val_mask/*.PNG'))
    image_val_path = natsort.natsorted(
        glob.glob(dataset_base_path + 'val_img/*.PNG'))

    gmdataset_train = EMdataset(image_paths=image_paths,
                                target_paths=target_path)
    gmdataset_val = EMdataset(image_paths=image_val_path,
                              target_paths=target_val_path)
    train_loader = DataLoader(gmdataset_train,
                              batch_size=5,
                              shuffle=True,
                              num_workers=1)
    val_loader = DataLoader(gmdataset_val,
                            batch_size=4,
                            shuffle=True,
                            num_workers=1)

    utility.create_log_file(config)
    utility.log_info(
        config, "{0}\nStarting experiment {1}\n{0}\n".format(
            50 * "=", utility.get_experiment_name(config)))
    # import ipdb as pdb; pdb.set_trace()
    model = m.Unet(drop_rate=0.4, bn_momentum=0.1, config=config)
    if config['operation_mode'].lower(
    ) == "retrain" or config['operation_mode'].lower() == "inference":
        print("Using a trained model...")
        model.load_state_dict(torch.load(config['trained_model']))
    elif config["operation_mode"].lower() == "visualize":
        print("Using a trained model...")
        if cuda:
            model.load_state_dict(torch.load(config['trained_model']))
        else:
            model.load_state_dict(
                torch.load(config['trained_model'], map_location='cpu'))
        mv.visualize_model(model, config)
        return

    # import ipdb as pdb; pdb.set_trace()
    if cuda:
        model.cuda()

    num_epochs = config["num_epochs"]
    initial_lr = config["lr"]
    experiment_path = config["log_output_dir"] + config['experiment_name']
    output_image_dir = experiment_path + "/figs/"

    betas = torch.linspace(3.0, 8.0, num_epochs)
    optimizer = optim.Adam(model.parameters(), lr=initial_lr)
    # scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, num_epochs)
    lr_milestones = range(0, int(num_epochs), int(int(num_epochs) / 5))
    lr_milestones = lr_milestones[1:]
    scheduler = optim.lr_scheduler.MultiStepLR(optimizer,
                                               milestones=lr_milestones,
                                               gamma=0.1)

    # import ipdb as pdb; pdb.set_trace()
    writer = SummaryWriter(log_dir=utility.get_experiment_dir(config))
    best_dice = 0
    for epoch in tqdm(range(1, num_epochs + 1)):
        start_time = time.time()

        scheduler.step()

        lr = scheduler.get_lr()[0]
        model.beta = betas[epoch - 1]  # for ternary net, set beta
        writer.add_scalar('learning_rate', lr, epoch)

        model.train()
        train_loss_total = 0.0
        num_steps = 0
        capture = True
        for i, batch in enumerate(train_loader):
            #import ipdb as pdb; pdb.set_trace()
            input_samples, gt_samples, idx = batch[0], batch[1], batch[2]

            if cuda:
                var_input = input_samples.cuda()
                var_gt = gt_samples.cuda(async=True)
                var_gt = var_gt.float()
            else:
                var_input = input_samples
                var_gt = gt_samples
                var_gt = var_gt.float()
            preds = model(var_input)

            # import ipdb as pdb; pdb.set_trace()
            loss = calc_loss(preds, var_gt)
            train_loss_total += loss.item()

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            num_steps += 1
            if epoch % 5 == 0 and capture:
                capture = False
                input_samples, gt_samples = get_samples(
                    image_val_path, target_val_path, 4)
                if cuda:
                    input_samples = input_samples.cuda()
                preds = model(input_samples)
                input_samples = input_samples.data.cpu().numpy()
                preds = preds.data.cpu().numpy()
                # import ipdb as pdb; pdb.set_trace()
                save_image(input_samples[0][0], gt_samples[0][0], preds[0][0],
                           epoch, 0, output_image_dir)

        train_loss_total_avg = train_loss_total / num_steps

        # import ipdb as pdb; pdb.set_trace()
        model.train()
        val_loss_total = 0.0
        num_steps = 0

        metric_fns = [
            dice_score, hausdorff_score, precision_score, recall_score,
            specificity_score, intersection_over_union, accuracy_score,
            rand_index_score
        ]

        metric_mgr = MetricManager(metric_fns)

        for i, batch in enumerate(val_loader):
            #            input_samples, gt_samples = batch[0], batch[1]
            input_samples, gt_samples, idx = batch[0], batch[1], batch[2]
            with torch.no_grad():
                if cuda:
                    var_input = input_samples.cuda()
                    var_gt = gt_samples.cuda(async=True)
                    var_gt = var_gt.float()
                else:
                    var_input = input_samples
                    var_gt = gt_samples
                    var_gt = var_gt.float()
                # import ipdb as pdb; pdb.set_trace()
                preds = model(var_input)
                loss = dice_loss(preds, var_gt)
                val_loss_total += loss.item()
            # Metrics computation
            gt_npy = gt_samples.numpy().astype(np.uint8)
            gt_npy = gt_npy.squeeze(axis=1)

            preds = preds.data.cpu().numpy()
            preds = threshold_predictions(preds)
            preds = preds.astype(np.uint8)
            preds = preds.squeeze(axis=1)
            metric_mgr(preds, gt_npy)
            #save_image(input_samples[0][0], preds[0], gt_samples, epoch, idx[0])
            # save_pred(model, image_val_path, epoch, output_image_dir)
            num_steps += 1

        metrics_dict = metric_mgr.get_results()
        metric_mgr.reset()

        writer.add_scalars('metrics', metrics_dict, epoch)

        # import ipdb as pdb; pdb.set_trace()
        val_loss_total_avg = val_loss_total / num_steps

        writer.add_scalars('losses', {
            'val_loss': val_loss_total_avg,
            'train_loss': train_loss_total_avg
        }, epoch)

        end_time = time.time()
        total_time = end_time - start_time
        # import ipdb as pdb; pdb.set_trace()
        log_str = "Epoch {} took {:.2f} seconds train_loss={}   dice_score={}   rand_index_score={}  lr={}.".format(
            epoch, total_time, train_loss_total_avg,
            metrics_dict["dice_score"], metrics_dict["rand_index_score"],
            get_lr(optimizer))
        utility.log_info(config, log_str)
        tqdm.write(log_str)

        writer.add_scalars('losses', {'train_loss': train_loss_total_avg},
                           epoch)
        if metrics_dict["dice_score"] > best_dice:
            best_dice = metrics_dict["dice_score"]
            utility.save_model(model=model, config=config)
    if not (config['operation_mode'].lower() == "inference"):
        utility.save_model(model=model, config=config)
def train_vae(encoder: nn.Module,
              decoder: nn.Module,
              train_loader,
              device,
              name,
              epochs=50,
              num_imgs=9,
              Optimizer=optim.Adam,
              loss_type=0,
              val_loader=None):
    os.makedirs(os.path.join(CKPT_PATH, name), exist_ok=True)
    with open(os.path.join(CKPT_PATH, name, "loss_log.txt"), "w") as loss_log:
        pass
    latent_sample = torch.randn(size=(
        num_imgs,
        encoder.latent_size,
    ),
                                device=device)
    latent_sample[0] = 0
    original_images = next(iter(train_loader))[0].to(device)
    original_images = original_images[:min(num_imgs,
                                           original_images.size()[0])]
    save_img(
        get_grid(original_images.to("cpu")),
        os.path.join(CKPT_PATH, name, "compressed_images", "original.png"))
    save_img(
        get_grid(original_images.to("cpu")),
        os.path.join(CKPT_PATH, name, "compressed_sampled_images",
                     "original.png"))
    criterion = nn.MSELoss()
    scheduler = optimizer = Optimizer(
        list(encoder.parameters()) + list(decoder.parameters()))
    log_scale = nn.Parameter(torch.Tensor([0.0])).to(device)
    train_losses = []
    train_kls = []
    val_losses = []
    val_kls = []
    while True:
        try:
            optimizer = optimizer.optimizer
        except:
            break
    if scheduler == optimizer:
        scheduler = False

    for epoch in tqdm.trange(epochs):  # loop over the dataset multiple times
        train_loss, train_kl = train_epoch(decoder, device, encoder,
                                           train_loader, loss_type, criterion,
                                           log_scale, optimizer, scheduler)
        val_loss, val_kl = val_epoch(decoder, device, encoder, val_loader,
                                     loss_type, criterion, log_scale)

        train_losses.append(train_loss)
        train_kls.append(train_kl)
        val_losses.append(val_loss)
        val_kls.append(val_kl)
        loss_msg = f"epoch {epoch}: train_loss={train_loss}, train_kl={train_kl}, val_loss={val_loss}, val_kl={val_kl}"
        logging.info(loss_msg)
        with open(os.path.join(CKPT_PATH, name, "loss_log.txt"),
                  "a") as loss_log:
            loss_log.write(loss_msg)
        plot_loss(train_losses, train_kls, val_losses, val_kls, name)
        make_images(decoder, device, encoder, original_images, name, epoch,
                    latent_sample)
        save_model(encoder, decoder, name)