Exemplo n.º 1
0
    def evaluation(self, global_step):
        eval_input_fn = self.input_fn_builder(features=self.dev_features,
                                              seq_length=self.max_seq_length,
                                              is_training=False,
                                              drop_remainder=False)

        predictions = self.estimator.predict(eval_input_fn,
                                             yield_single_examples=False)
        res = np.concatenate([a["prob"] for a in predictions], axis=0)

        metrics = PRF(np.array(self.dev_label), res.argmax(axis=-1))

        print('\n Global step is : ', global_step)
        MAP, AvgRec, MRR = eval_reranker(self.dev_cid, self.dev_label, res[:,
                                                                           0])

        metrics['MAP'] = MAP
        metrics['AvgRec'] = AvgRec
        metrics['MRR'] = MRR

        metrics['global_step'] = global_step

        print_metrics(metrics, 'dev', save_dir=self._log_save_path)

        return MAP * 100, MRR
Exemplo n.º 2
0
 def print_info(self, iter_time, name, loss):
     if np.mod(iter_time, self.flags.print_freq) == 0:
         ord_output = collections.OrderedDict([(name, loss), ('dataset', self.flags.dataset),
                                               ('discriminator', self.flags.discriminator),
                                               ('train_interval', np.float32(self.flags.train_interval)),
                                               ('gpu_index', self.flags.gpu_index)])
         utils.print_metrics(iter_time, ord_output)
Exemplo n.º 3
0
def test(args, model, data_path, fold, gpu, dicts, data_loader):
    filename = data_path.replace('train', fold)
    device = torch.device('cuda:{}'.format(
        args.gpu)) if args.gpu != -1 else torch.device('cpu')
    print('file for evaluation: %s' % filename)
    num_labels = len(dicts['ind2c'])
    y, yhat, yhat_raw, hids, losses = [], [], [], [], []

    model.eval()
    data_iter = iter(data_loader)
    num_iter = len(data_loader)

    for i in range(num_iter):
        with torch.no_grad():
            inputs_id, labels, text_inputs, inputs_mask = next(data_iter)
            inputs_id, labels = inputs_id.to(device), labels.to(device)
            output, loss = model(inputs_id, labels, None)
            output = torch.sigmoid(output)
            output = output.data.cpu().numpy()
            losses.append(loss.item())
            target_data = labels.data.cpu().numpy()
            yhat_raw.append(output)
            output = np.round(output)
            y.append(target_data)
            yhat.append(output)

    y = np.concatenate(y, axis=0)
    yhat = np.concatenate(yhat, axis=0)
    yhat_raw = np.concatenate(yhat_raw, axis=0)

    k = 5 if num_labels == 50 else [8, 15]
    metrics = all_metrics(yhat, y, k=k, yhat_raw=yhat_raw)
    print_metrics(metrics)
    metrics['loss_%s' % fold] = np.mean(losses)
    return metrics
Exemplo n.º 4
0
def train(model, train_data_loader, val_data_loader, loss_fn, optimizer,
          n_epochs, model_name):
    best_auprc = -1
    for epoch_i in range(1, n_epochs + 1):

        start = time.time()
        model.train()
        ## Training
        train_loss, train_metrics = run_batch(model, optimizer,
                                              train_data_loader, epoch_i,
                                              "train", loss_fn)

        model.eval()
        with paddle.no_grad():

            ## Validation
            if val_data_loader:
                val_loss, val_metrics = run_batch(model, optimizer,
                                                  val_data_loader, epoch_i,
                                                  "val", loss_fn)
                if best_auprc < val_metrics[1]:
                    current_sate = get_model_params_state(
                        model, args, epoch_i, *val_metrics)
                    paddle.save(current_sate, f"{model_name}.pdparams")
                    best_auprc = val_metrics[1]

        if train_data_loader:
            print(f"\n#### Epoch {epoch_i} time {time.time() - start:.4f}s")
            print_metrics(train_loss, 0, 0)

        if val_data_loader:
            print(f"#### Validation epoch {epoch_i}")
            print_metrics(val_loss, *val_metrics)
Exemplo n.º 5
0
def main():
    path = '../data/accidents'
    data = pd.read_csv(f'{path}/accident_data_clean_balanced.csv', header=0)

    # Feature columns
    cat_cols = ['roadway_type', 'intersection', 'light_condition', 'atmospheric_conditions',
                'manner_of_collision', 'body_type', 'vehicle_conditions', 'part_of_day']
    binary_cols = ['land_use_urban', 'national_highway_system', 'previous_dwi_convictions',
                   'previous_speeding_convictions', 'speeding_related', 'driver_vision_obscured', 'is_weekend',
                   'multiple_vehicles', 'nonmotorist_involved', 'multiple_motorists', 'drunk_driver_involved']
    numeric_cols = ['vehicle_year', 'speed_limit']

    data[cat_cols] = data[cat_cols].apply(lambda x: x.astype('category'))

    labels = data['multiple_fatalities']
    features = data[cat_cols + binary_cols + numeric_cols]

    # features = pd.get_dummies(features, columns=cat_cols, drop_first=True)
    # features.rename(columns={'manner_of_collision_Not Collision with Motor Vehicle in Transport (Not Necessarily in Transport for\n2005-2009)': 'manner_of_collision_Not Collision with Motor Vehicle in Transport'},
    #                 inplace=True)
    feature_names = features.columns

    oe = OrdinalEncoder()
    features = oe.fit_transform(features)

    scaler = StandardScaler()
    features = scaler.fit_transform(features)

    X_train, X_test, y_train, y_test = train_test_split(features, labels,
                                                        test_size=0.2, random_state=2020)
    print('Class Balance')
    print(y_test.value_counts())
    print()

    models = {
        'Random Forest': (RandomForestClassifier(n_estimators=100,
                                                 min_samples_leaf=5,
                                                 random_state=2020),
                          'rf'),
        'Logistic Regression': (LogisticRegressionCV(cv=5, scoring='f1',
                                                     max_iter=1000,
                                                     random_state=2020),
                                'lr')
    }

    for name, (model, suffix) in models.items():
        print(name)
        print('-' * 20)
        model.fit(X_train, y_train)

        y_pred = model.predict(X_test)
        y_probs = model.predict_proba(X_test)[:, 1]

        utils.print_metrics(y_test, y_pred)
        utils.roc_curve(y_test, y_probs, name, suffix)
        utils.feature_importance(model, feature_names, name, suffix)
        utils.permutation_importances(model, X_test, y_test, feature_names, name, suffix)
        utils.permutation_importances(model, X_train, y_train, feature_names, name, suffix, dataset='train')
        print('#' * 50)
Exemplo n.º 6
0
    def print_info_integrated(self, loss, iter_time):
        if np.mod(iter_time, self.flags.print_freq) == 0:
            ord_output = collections.OrderedDict([('tar_iters', self.flags.iters),
                                                  ('G_gen_loss', loss[0]), ('Dy_dis_loss', loss[1]),
                                                  ('F_gen_loss', loss[2]), ('Dx_dis_loss', loss[3]),
                                                  ('gpu_index', self.flags.gpu_index)])

            utils.print_metrics(iter_time, ord_output)
Exemplo n.º 7
0
def test(args, model, data_path, fold, gpu, dicts, data_loader):

    filename = data_path.replace('train', fold)
    print('file for evaluation: %s' % filename)
    num_labels = len(dicts['ind2c'])

    y, yhat, yhat_raw, hids, losses = [], [], [], [], []

    model.eval()

    # loader
    data_iter = iter(data_loader)
    num_iter = len(data_loader)
    for i in range(num_iter):
        with torch.no_grad():

            if args.model.find("bert") != -1:
                inputs_id, segments, masks, labels = next(data_iter)

                inputs_id, segments, masks, labels = torch.LongTensor(inputs_id), torch.LongTensor(segments), \
                                                     torch.LongTensor(masks), torch.FloatTensor(labels)

                if gpu >= 0:
                    inputs_id, segments, masks, labels = inputs_id.cuda(
                        gpu), segments.cuda(gpu), masks.cuda(gpu), labels.cuda(gpu)

                output, loss = model(inputs_id, segments, masks, labels)
            else:

                inputs_id, labels, text_inputs = next(data_iter)

                inputs_id, labels, = torch.LongTensor(inputs_id), torch.FloatTensor(labels)

                if gpu >= 0:
                    inputs_id, labels, text_inputs = inputs_id.cuda(gpu), labels.cuda(gpu), text_inputs.cuda(gpu)

                output, loss = model(inputs_id, labels, text_inputs)

            output = torch.sigmoid(output)
            output = output.data.cpu().numpy()

            losses.append(loss.item())
            target_data = labels.data.cpu().numpy()

            yhat_raw.append(output)
            output = np.round(output)
            y.append(target_data)
            yhat.append(output)

    y = np.concatenate(y, axis=0)
    yhat = np.concatenate(yhat, axis=0)
    yhat_raw = np.concatenate(yhat_raw, axis=0)

    k = 5 if num_labels == 50 else [8,15]
    metrics = all_metrics(yhat, y, k=k, yhat_raw=yhat_raw)
    print_metrics(metrics)
    metrics['loss_%s' % fold] = np.mean(losses)
    return metrics
Exemplo n.º 8
0
def main(config_path='./configs/config.yaml'):
    config = load_config(config_path)

    init_experiment(config)
    set_random_seed(config.seed)

    train_dataset = getattr(data, config.train.dataset.type)(
        config.data_root, **vars(config.train.dataset.params))
    train_loader = getattr(data, config.train.loader.type)(
        train_dataset, **vars(config.train.loader.params))

    val_dataset = getattr(data, config.val.dataset.type)(
        config.data_root, **vars(config.val.dataset.params))
    val_loader = getattr(data, config.val.loader.type)(val_dataset, **vars(
        config.val.loader.params))

    device = torch.device(config.device)
    model = getattr(models,
                    config.model.type)(**vars(config.model.params)).to(device)
    optimizer = getattr(optims, config.optim.type)(model.parameters(),
                                                   **vars(config.optim.params))
    scheduler = None
    loss_f = getattr(losses, config.loss.type)(**vars(config.loss.params))

    early_stopping = EarlyStopping(save=config.model.save,
                                   path=config.model.save_path,
                                   **vars(config.stopper.params))

    train_writer = SummaryWriter(log_dir=os.path.join(config.tb_dir, 'train'))
    val_writer = SummaryWriter(log_dir=os.path.join(config.tb_dir, 'val'))

    for epoch in range(1, config.epochs + 1):
        print(f'Epoch {epoch}')
        train_metrics = train(model, optimizer, train_loader, loss_f, device)
        print_metrics('Train', train_metrics)
        write_metrics(epoch, train_metrics, train_writer)

        val_metrics = val(model, val_loader, loss_f, device)
        print_metrics('Val', val_metrics)
        write_metrics(epoch, val_metrics, val_writer)

        early_stopping(val_metrics['avg_weighted_loss'],
                       model)  # will save the best model to disk
        if early_stopping.early_stop:
            print(f'Early stopping after {epoch} epochs.')
            break

        if scheduler:
            scheduler.step()

    train_writer.close()
    val_writer.close()

    if config.model.save:
        torch.save(
            model.state_dict(),
            config.model.save_path.replace('checkpoint', 'last_checkpoint'))
Exemplo n.º 9
0
    def print_info(self, loss, iter_time):
        if np.mod(iter_time, self.flags.print_freq) == 0:
            ord_output = collections.OrderedDict([('cur_iter', iter_time), ('tar_iters', self.flags.iters),
                                                  ('batch_size', self.flags.batch_size),
                                                  ('d_loss', loss[0]), ('g_loss', loss[1]),
                                                  ('dataset', self.flags.dataset),
                                                  ('gpu_index', self.flags.gpu_index)])

            utils.print_metrics(iter_time, ord_output)
Exemplo n.º 10
0
    def print_info(self, loss):
        if np.mod(self.iter_time, self.flags.print_freq) == 0:
            ord_output = collections.OrderedDict([
                ('G_loss', loss[0]), ('Dy_loss', loss[1]), ('F_loss', loss[2]),
                ('Dx_loss', loss[3]), ('dataset', self.dataset.name),
                ('gpu_index', self.flags.gpu_index)
            ])

            utils.print_metrics(self.iter_time, ord_output)
    def measure(self, generated, vessels, masks, num_data, iter_time, phase,
                total_time):
        # masking
        vessels_in_mask, generated_in_mask = utils.pixel_values_in_mask(
            vessels, generated, masks)

        # averaging processing time
        avg_pt = (total_time / num_data) * 1000  # average processing tiem

        # evaluate Area Under the Curve of ROC and Precision-Recall
        auc_roc = utils.AUC_ROC(vessels_in_mask, generated_in_mask)
        auc_pr = utils.AUC_PR(vessels_in_mask, generated_in_mask)

        # binarize to calculate Dice Coeffient
        binarys_in_mask = utils.threshold_by_otsu(generated, masks)
        dice_coeff = utils.dice_coefficient_in_train(vessels_in_mask,
                                                     binarys_in_mask)
        acc, sensitivity, specificity = utils.misc_measures(
            vessels_in_mask, binarys_in_mask)
        score = auc_pr + auc_roc + dice_coeff + acc + sensitivity + specificity

        # # auc_sum for saving best model in training
        # auc_sum = auc_roc + auc_pr
        # if self.flags.stage == 2:
        #     #auc_sum = auc_roc + auc_pr
        #     auc_sum = auc_roc + auc_pr
        # else:
        #     auc_sum = auc_roc + auc_pr

        auc_sum = dice_coeff + acc + auc_pr

        # print information
        ord_output = collections.OrderedDict([('auc_pr', auc_pr),
                                              ('auc_roc', auc_roc),
                                              ('dice_coeff', dice_coeff),
                                              ('acc', acc),
                                              ('sensitivity', sensitivity),
                                              ('specificity', specificity),
                                              ('score', score),
                                              ('auc_sum', auc_sum),
                                              ('best_auc_sum',
                                               self.best_auc_sum),
                                              ('avg_pt', avg_pt)])
        utils.print_metrics(iter_time, ord_output)

        # write in tensorboard when in train mode only
        if phase == 'train':
            self.model.measure_assign(auc_pr, auc_roc, dice_coeff, acc,
                                      sensitivity, specificity, score,
                                      iter_time)
        elif phase == 'test':
            # write in npy format for evaluation
            utils.save_obj(vessels_in_mask, generated_in_mask,
                           os.path.join(self.auc_out_dir, "auc_roc.npy"),
                           os.path.join(self.auc_out_dir, "auc_pr.npy"))

        return auc_sum
def main():
    csv_files = glob.glob(player_dir + "/*.csv")
    abt = []
    ing = []
    n_files = len(csv_files)
    for i, filename in enumerate(csv_files):
        print("elaborating file {} of {}".format(i + 1, n_files))
        ph = pd.read_csv(filename)  # get player history
        ph["timestamp"] = pd.to_numeric(ph["timestamp"], downcast='integer')
        ph.set_index(['timestamp'], inplace=True)
        n_weeks = 52
        for index in range(1, n_weeks+1):
            # consider only the last ABT week
            if only_last_abt_week and index < n_weeks and ph.at[index, 'status'] == 1 and ph.at[index + 1, 'status'] == 1:
                ph.at[index, 'status'] = 0
            if ph.at[index, 'status'] != 2 and index > history-1:  # not churned and has an history
                row = []
                for j in range(history):
                    ev = ph.at[index-j, 'evolution']
                    la = ph.at[index-j, 'lvl_avg']
                    th = ph.at[index-j, 'time_hours']
                    ca = ph.at[index-j, 'current_absence']
                    pr = ph.at[index-j, 'week_present_ratio']
                    row += [ev, la, th, ca, pr]
                if ph.at[index, 'status'] == 0:  # active
                    ing.append(row)
                else:  # about to churn
                    abt.append(row)

    print("ABT sequences: {}".format(len(abt)))
    print("ING sequences: {}".format(len(ing)))
    abt_labels = np.array([1 for i in range(len(abt))])
    ing_labels = np.array([0 for i in range(len(ing))])[:len(abt)]

    abt = np.array(abt)
    ing = np.array(ing)[:len(abt)]

    X, y = shuffle(np.concatenate((abt, ing)), np.concatenate((abt_labels, ing_labels)))

    # scale the data
    scaler = StandardScaler()
    X = scaler.fit_transform(X)

    # prepare training set and test set
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    # use svm
    clf = SVC()
    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_test)
    acc = accuracy_score(y_pred, y_test)
    print("accuracy on test set: {:.3f}".format(acc))
    utils.print_metrics(y_pred, y_test)

    """
Exemplo n.º 13
0
    def print_info(self, loss, iter_batch, iter_epoch, tar_batch):
        if np.mod(iter_batch, self.flags.print_freq) == 0:
            ord_output = collections.OrderedDict([
                ('cur_batch', iter_batch), ('tar_batch', tar_batch),
                ('cur_epoch', iter_epoch), ('tar_epochs', self.flags.epochs),
                ('batch_size', self.flags.batch_size), ('total_loss', loss[0]),
                ('gpu_index', self.flags.gpu_index)
            ])

            utils.print_metrics(iter_batch, ord_output)
Exemplo n.º 14
0
def main():
    csv_path = 'data/all_test_clean.csv'
    tweets, targets, labels = load_csv(csv_path)
    print('--- LOADED CSV ---')
    model = load_bert()
    print('--- LOADED MODEL ---')
    preds = predict(model, tweets, targets)
    save_npy(preds, 'ada_bert', 'preds/')
    print('--- SAVED PREDS ---')
    print_metrics(preds, labels, 'ada_bert')
Exemplo n.º 15
0
def train_ddi_vae(dim_z, hidden_layers_px, hidden_layers_qz, save_path):
    #############################
    ''' Experiment Parameters '''
    #############################

    num_batches = 100       #Number of minibatches in a single epoch, num_examples % self.num_batches == 0
    # dim_z = 50              #Dimensionality of latent variable (z)
    epochs = 3001           #Number of epochs through the full dataset
    learning_rate = 3e-3    #Learning rate of ADAM
    l2_loss = 1e-6          #L2 Regularisation weight
    seed = 31415            #Seed for RNG

    #Neural Networks parameterising p(x|z), q(z|x)
    # hidden_layers_px = [ 600, 1000, 800, 500 ]
    # hidden_layers_qz = [ 600, 1000, 800, 500 ]

    ####################
    ''' Load Dataset '''
    ####################

    # mnist_path = 'mnist/mnist_28.pkl.gz'
    # #Uses anglpy module from original paper (linked at top) to load the dataset
    # train_x, train_y, valid_x, valid_y, test_x, test_y = mnist.load_numpy(mnist_path, binarize_y=True)
    #
    # x_train, y_train = train_x.T, train_y.T
    # x_valid, y_valid = valid_x.T, valid_y.T
    # x_test, y_test = test_x.T, test_y.T

    x_train, y_train, x_valid, y_valid, x_test, y_test = load_dataset("/home/cdy/ykq/vae/ddi/train_dataset")
    utils.print_metrics(['x_train', x_train.shape[0], x_train.shape[1]],
                        ['y_train', y_train.shape[0], y_train.shape[1]],
                        ['x_valid', x_valid.shape[0], x_valid.shape[1]],
                        ['y_valid', y_valid.shape[0], y_valid.shape[1]],
                        ['x_test', x_test.shape[0], x_test.shape[1]],
                        ['y_test', y_test.shape[0], y_test.shape[1]],
                        )

    dim_x = x_train.shape[1]
    dim_y = y_train.shape[1]

    ######################################
    ''' Train Variational Auto-Encoder '''
    ######################################

    VAE = DdiVariationalAutoencoder(   dim_x = dim_x,
                                    dim_z = dim_z,
                                    hidden_layers_px = hidden_layers_px,
                                    hidden_layers_qz = hidden_layers_qz,
                                    l2_loss = l2_loss )

    #draw_img uses pylab and seaborn to draw images of original vs. reconstruction
    #every n iterations (set to 0 to disable)

    VAE.train(  x = x_train, x_valid = x_valid, epochs = epochs, num_batches = num_batches, save_path=save_path,
                learning_rate = learning_rate, seed = seed, stop_iter = 30, print_every = 10, draw_img = 0 )
Exemplo n.º 16
0
    def print_info(self, loss, iter_time, epoch_time):
        if np.mod(iter_time, self.flags.print_freq) == 0:
            ord_output = collections.OrderedDict([
                ('cur_epoch', epoch_time), ('tar_Epoch', self.flags.epoch),
                ('batch_size', self.flags.batch_size), ('G_loss', loss[0]),
                ('Dy_loss', loss[1]), ('F_loss', loss[2]),
                ('Dx_loss', loss[3]), ('dataset', self.flags.dataset),
                ('gpu_index', self.flags.gpu_index)
            ])

            utils.print_metrics(iter_time, ord_output)
    def print_info(self, loss, iter_time):
        if np.mod(iter_time, self.flags.print_freq) == 0:
            ord_output = collections.OrderedDict([
                ('cur_iter', iter_time), ('tar_iter', self.num_iters),
                ('batch_size', self.flags.batch_size),
                ('content_loss', loss[0]), ('style_loss', loss[1]),
                ('tv_loss', loss[2]), ('total_loss', loss[3]),
                ('gpu_index', self.flags.gpu_index)
            ])

            utils.print_metrics(iter_time, ord_output)
Exemplo n.º 18
0
def main(seg1_fname, seg2_fname,

	calc_rand_score=True,
	calc_rand_error=True,
	calc_variation_score=True,
	calc_variation_information=True,

	relabel2d=False,
	foreground_restricted=True,
	split_0_segment=True,

	other=None):
	'''
	Script functionality, computes the overlap matrix,
    computes any specified metrics,
	and prints the results nicely
	'''

	print("Loading Data...")
	seg1 = io_utils.import_file(seg1_fname)
	seg2 = io_utils.import_file(seg2_fname)


	prep = utils.parse_fns( utils.prep_fns,
                             [relabel2d, foreground_restricted ] )
	seg1, seg2 = utils.run_preprocessing( seg1, seg2, prep )


	om = utils.calc_overlap_matrix(seg1, seg2, split_0_segment)


	#Calculating each desired metric
	metrics = utils.parse_fns( utils.metric_fns,
                               [calc_rand_score,
                                calc_rand_error,
                                calc_variation_score,
                                calc_variation_information] )

	results = {}
	for (name,metric_fn) in metrics:
	  if relabel2d:
	    full_name = "2D {}".format(name)
	  else:
	    full_name = name

	  (f,m,s) = metric_fn( om, full_name, other )
	  results["{} Full".format(name)] = f
	  results["{} Merge".format(name)] = m
	  results["{} Split".format(name)] = s


	print("")
	utils.print_metrics(results)
def main():
    path = '../data/accidents'
    data = pd.read_csv(f'{path}/accident_data_clean_balanced.csv', header=0)

    cat_cols = [
        'month', 'roadway_type', 'intersection', 'light_condition',
        'atmospheric_conditions', 'manner_of_collision', 'body_type',
        'vehicle_conditions', 'part_of_day'
    ]
    binary_cols = [
        'land_use_urban', 'national_highway_system',
        'previous_dwi_convictions', 'previous_speeding_convictions',
        'speeding_related', 'driver_vision_obscured', 'is_weekend',
        'multiple_vehicles', 'nonmotorist_involved', 'multiple_motorists',
        'drunk_driver_involved'
    ]
    numeric_cols = ['vehicle_year', 'speed_limit']

    data[cat_cols] = data[cat_cols].apply(lambda x: x.astype('category'))

    labels = data['multiple_fatalities']
    features = data[cat_cols + binary_cols + numeric_cols]
    feature_names = features.columns

    # oe = OrdinalEncoder()
    # features = oe.fit_transform(features)

    features = pd.get_dummies(features, columns=cat_cols)

    # scaler = StandardScaler()
    # features = scaler.fit_transform(features)

    X_train, X_test, y_train, y_test = train_test_split(features,
                                                        labels,
                                                        test_size=0.2,
                                                        random_state=2020)
    print('Class Balance')
    print(y_test.value_counts())
    print()

    model = GridSearchCV(estimator=KNeighborsClassifier(),
                         param_grid={'n_neighbors': range(1, 20, 2)},
                         cv=5,
                         scoring='f1')
    model.fit(X_train, y_train)
    print(model.best_params_)
    print()

    y_pred = model.predict(X_test)
    y_probs = model.predict_proba(X_test)[:, 1]

    utils.print_metrics(y_test, y_pred)
    utils.roc_curve(y_test, y_probs, 'KNN', 'knn')
Exemplo n.º 20
0
def print_results(plot=False):
    from utils import print_error_hist, calculate_metrics, print_metrics, plot_conf_matrix
    import pandas as pd
    global y_test
    global y_pred
    global y_train

    exp_var, mse, mae, r2, error_percentage, recall, precision = calculate_metrics(y_test, y_pred, y_train, plot)
    if plot:
        print_metrics(exp_var, mse, mae, r2, error_percentage, recall, precision)
    # return result if average need to be calculated
    return exp_var, mse, mae, r2, error_percentage, recall, precision
Exemplo n.º 21
0
 def one_test(self, batch_dataset, config):
     test_data = [
         batch
         for batch in batch_dataset.batch_test_data(2 * config.batch_size)
     ]
     steps = batch_dataset.test_steps_num
     cID = batch_dataset.test_cID
     self.is_train = False
     self.cweight = [1., 1., 1.]
     test_metrics, _ = self.evaluate(test_data, steps, 'test', cID)
     print_metrics(test_metrics,
                   'test',
                   categories_num=self.args.categories_num)
Exemplo n.º 22
0
    def print_info(self, loss, iter_time, num_try):
        if np.mod(iter_time, self.flags.print_freq) == 0:
            ord_output = collections.OrderedDict([
                ('num_try', num_try), ('tar_try', self.flags.num_try),
                ('cur_iter', iter_time), ('tar_iters', self.flags.iters),
                ('batch_size', self.flags.sample_batch),
                ('context_loss', np.mean(loss[0])),
                ('prior_loss', np.mean(loss[1])),
                ('total_loss', np.mean(loss[2])),
                ('mask_type', self.flags.mask_type),
                ('gpu_index', self.flags.gpu_index)
            ])

            utils.print_metrics(iter_time, ord_output)
Exemplo n.º 23
0
    def print_info(self, loss, iter_time):
        if np.mod(iter_time, self.flags.print_freq) == 0:
            ord_output = collections.OrderedDict([
                ('cur_iter', iter_time), ('tar_iters', self.flags.iters),
                ('batch_size', self.flags.batch_size), ('G_loss', loss[0]),
                ('G_gen_loss', loss[1]), ('G_cond_loss', loss[2]),
                ('G_cycle_loss', loss[3]), ('Dy_loss', loss[4]),
                ('F_loss', loss[5]), ('F_gen_loss', loss[6]),
                ('F_cond_loss', loss[7]), ('F_cycle_loss', loss[3]),
                ('Dx_loss', loss[8]), ('dataset', self.flags.dataset),
                ('gpu_index', self.flags.gpu_index)
            ])

            utils.print_metrics(iter_time, ord_output)
Exemplo n.º 24
0
 def k_stratified(self):
     ''' Classify the data splitting the training and test sets in folds preserving the percentage of samples for each class '''
     sss = StratifiedShuffleSplit(self.__label_list,
                                  n_iter=1,
                                  train_size=0.7,
                                  test_size=0.3)
     for train_index, test_index in sss:
         print(len(train_index), len(test_index))
         instance_train, instance_test = self.__data_vectorized[
             train_index], self.__data_vectorized[test_index]
         label_train, label_test = self.__label_list[
             train_index], self.__label_list[test_index]
         predicted = self.__classifier.fit(
             instance_train, label_train).predict(instance_test)
         print_metrics(label_test, predicted)
Exemplo n.º 25
0
    def predict_labels(self, x_test, y_test):

        test_vars = tf.get_collection(bookkeeper.GraphKeys.TEST_VARIABLES)
        tf.variables_initializer(test_vars).run()

        x_test_mu = x_test[:, :self.dim_x]
        x_test_lsgms = x_test[:, self.dim_x:2 * self.dim_x]

        accuracy, cross_entropy, precision, recall = \
         self.session.run( [self.eval_accuracy, self.eval_cross_entropy, self.eval_precision, self.eval_recall],
          feed_dict = {self.x_labelled_mu: x_test_mu, self.x_labelled_lsgms: x_test_lsgms, self.y_lab: y_test} )

        utils.print_metrics('X', ['Test', 'accuracy', accuracy],
                            ['Test', 'cross-entropy', cross_entropy],
                            ['Test', 'precision', precision],
                            ['Test', 'recall', recall])
Exemplo n.º 26
0
def main(
    seg1_fname,
    seg2_fname,
    calc_rand_score=True,
    calc_rand_error=True,
    calc_variation_score=True,
    calc_variation_information=True,
    relabel2d=False,
    foreground_restricted=True,
    split_0_segment=True,
    other=None,
):
    """
	Script functionality, computes the overlap matrix,
    computes any specified metrics,
	and prints the results nicely
	"""

    print("Loading Data...")
    seg1 = io_utils.import_file(seg1_fname)
    seg2 = io_utils.import_file(seg2_fname)

    prep = utils.parse_fns(utils.prep_fns, [relabel2d, foreground_restricted])
    seg1, seg2 = utils.run_preprocessing(seg1, seg2, prep)

    om = utils.calc_overlap_matrix(seg1, seg2, split_0_segment)

    # Calculating each desired metric
    metrics = utils.parse_fns(
        utils.metric_fns, [calc_rand_score, calc_rand_error, calc_variation_score, calc_variation_information]
    )

    results = {}
    for (name, metric_fn) in metrics:
        if relabel2d:
            full_name = "2D {}".format(name)
        else:
            full_name = name

        (f, m, s) = metric_fn(om, full_name, other)
        results["{} Full".format(name)] = f
        results["{} Merge".format(name)] = m
        results["{} Split".format(name)] = s

    print("")
    utils.print_metrics(results)
Exemplo n.º 27
0
	def predict_labels( self, x_test, y_test ):

		test_vars = tf.get_collection(bookkeeper.GraphKeys.TEST_VARIABLES)
		tf.initialize_variables(test_vars).run()

		x_test_mu = x_test[:,:self.dim_x]
		x_test_lsgms = x_test[:,self.dim_x:2*self.dim_x]

		accuracy, cross_entropy, precision, recall = \
			self.session.run( [self.eval_accuracy, self.eval_cross_entropy, self.eval_precision, self.eval_recall],
				feed_dict = {self.x_labelled_mu: x_test_mu, self.x_labelled_lsgms: x_test_lsgms, self.y_lab: y_test} )

		utils.print_metrics(	'X',
								['Test', 'accuracy', accuracy],
								['Test', 'cross-entropy', cross_entropy],
								['Test', 'precision', precision],
								['Test', 'recall', recall] )
Exemplo n.º 28
0
    def measure(self, generated, vessels, masks, num_data, iter_time, phase,
                total_time):
        vessels_in_mask, generated_in_mask = utils.pixel_values_in_mask(
            vessels, generated, masks)
        avg_pt = (total_time / num_data) * 1000  # average processing tiem

        # evaluation
        auc_roc = utils.AUC_ROC(vessels_in_mask, generated_in_mask)
        auc_pr = utils.AUC_PR(vessels_in_mask, generated_in_mask)

        binarys_in_mask = utils.threshold_by_otsu(generated, masks)
        dice_coeff = utils.dice_coefficient_in_train(vessels_in_mask,
                                                     binarys_in_mask)
        acc, sensitivity, specificity = utils.misc_measures(
            vessels_in_mask, binarys_in_mask)
        score = auc_pr + auc_roc + dice_coeff + acc + sensitivity + specificity

        # print information
        ord_output = collections.OrderedDict([('auc_pr', auc_pr),
                                              ('auc_roc', auc_roc),
                                              ('dice_coeff', dice_coeff),
                                              ('acc', acc),
                                              ('sensitivity', sensitivity),
                                              ('specificity', specificity),
                                              ('score', score),
                                              ('best_dice_coeff',
                                               self.best_dice_coeff),
                                              ('avg_pt', avg_pt)])

        utils.print_metrics(iter_time, ord_output)

        # write in tensorboard
        if phase == 'train':
            self.model.measure_assign(auc_pr, auc_roc, dice_coeff, acc,
                                      sensitivity, specificity, score,
                                      iter_time)

        if phase == 'test':
            # write in npy format for evaluation
            utils.save_obj(vessels_in_mask, generated_in_mask,
                           os.path.join(self.auc_out_dir, "auc_roc.npy"),
                           os.path.join(self.auc_out_dir, "auc_pr.npy"))

        return dice_coeff
Exemplo n.º 29
0
 def one_train(self, epochs, batch_size, train_data, train_label, dev_data,
               dev_label):
     self.compile_model()
     for e in range(epochs):
         history = self.model.fit(train_data,
                                  train_label,
                                  batch_size=batch_size,
                                  verbose=1,
                                  validation_data=(dev_data, dev_label))
         dev_out = self.model.predict(dev_data,
                                      batch_size=2 * batch_size,
                                      verbose=1)
         metrics = PRF(dev_label,
                       (dev_out > 0.5).astype('int32').reshape([-1]))
         metrics['epoch'] = e + 1
         metrics['val_loss'] = history.history['val_loss']
         print_metrics(metrics,
                       metrics_type=self.__class__.__name__ +
                       self.args.selfname,
                       save_dir=self.args.log_dir)
Exemplo n.º 30
0
def main():
    path = '../data/persons'
    data = pd.read_csv(f'{path}/person_data_clean.csv', header=0)

    cat_cols = ['person_type', 'trafficway_type', 'manner_of_collision', 'body_type', 'seating_position',
                'ejection', 'safety_equipment_use']
    binary_cols = ['sex', 'land_use_urban', 'rollover', 'air_bag_deployed']
    numeric_cols = ['age']

    data[cat_cols] = data[cat_cols].apply(lambda x: x.astype('category'))

    labels = data['fatality']
    features = data[cat_cols + binary_cols + numeric_cols]
    feature_names = features.columns

    oe = OrdinalEncoder()
    features = oe.fit_transform(features)

    # features = pd.get_dummies(features, columns=cat_cols)

    scaler = StandardScaler()
    features = scaler.fit_transform(features)

    X_train, X_test, y_train, y_test = train_test_split(features, labels,
                                                        test_size=0.2, random_state=2020)
    print('Class Balance')
    print(y_test.value_counts())
    print()

    model = GridSearchCV(estimator=KNeighborsClassifier(),
                         param_grid={'n_neighbors': range(1, 20, 2)},
                         cv=5, scoring='f1')
    model.fit(X_train, y_train)
    print(model.best_params_)
    print()

    y_pred = model.predict(X_test)
    y_probs = model.predict_proba(X_test)[:, 1]

    utils.print_metrics(y_test, y_pred)
    utils.roc_curve(y_test, y_probs, 'KNN', 'knn')
Exemplo n.º 31
0
def test_seg_model(model, args):
    # prepare dataset
    test_dset = ChromosomeDataset(os.path.join(args.data_dir+args.simu_type, "test_imgs"),
                                  transform = transforms.Compose([transforms.ToTensor(),]))
    test_dataloader = DataLoader(test_dset, batch_size=args.batch_size, shuffle=False, num_workers=0)

    model.eval()   # Set model to evaluate mode
    metrics = defaultdict(float)
    epoch_samples = 0
    for inputs, labels in test_dataloader:
        inputs = inputs.cuda()
        labels = labels.cuda()

        # forward
        # track history if only in train
        with torch.no_grad():
            outputs = model(inputs)
            loss = calc_loss(outputs, labels, metrics)
        # statistics
        epoch_samples += inputs.size(0)
    print_metrics(metrics, epoch_samples, "test")
Exemplo n.º 32
0
    def print_info(self, loss, iter_time, is_sup=True):
        if np.mod(iter_time, self.flags.print_freq) == 0:
            if is_sup:
                ord_output = collections.OrderedDict([('tar_iters', self.flags.iters),
                                                      ('G_loss_sup', loss[0]), ('G_gen_loss_sup', loss[1]),
                                                      ('G_cond_loss', loss[2]), ('G_gdl_loss', loss[3]),
                                                      ('G_perceptual_loss', loss[4]), ('G_ssim_loss', loss[5]),
                                                      ('G_cycle_loss_sup', loss[6]), ('Dy_loss_sup', loss[7]),
                                                      ('F_loss_sup', loss[8]), ('F_gen_loss_sup', loss[9]),
                                                      ('F_cond_loss', loss[10]), ('F_gdl_loss', loss[11]),
                                                      ('F_perceptual_loss', loss[12]), ('F_ssim_loss', loss[13]),
                                                      ('F_cycle_loss_sup', loss[6]), ('Dx_loss_sup', loss[14]),
                                                      ('gpu_index', self.flags.gpu_index)])
            else:
                ord_output = collections.OrderedDict([('tar_iters', self.flags.iters),
                                                      ('G_loss_unsup', loss[0]), ('G_gen_loss_unsup', loss[1]),
                                                      ('G_cycle_loss_unsup', loss[2]), ('Dy_loss_unsup', loss[3]),
                                                      ('F_loss_unsup', loss[4]), ('F_gen_loss_unsup', loss[5]),
                                                      ('F_cycle_loss_unsup', loss[2]), ('Dx_loss_unsup', loss[6]),
                                                      ('gpu_index', self.flags.gpu_index)])

            utils.print_metrics(iter_time, ord_output)
Exemplo n.º 33
0
	def train(      self, x_labelled, y, x_unlabelled,
					epochs,
					x_valid, y_valid,
					print_every = 1,
					learning_rate = 3e-4,
					beta1 = 0.9,
					beta2 = 0.999,
					seed = 31415,
					stop_iter = 100,
					save_path = None,
					load_path = None    ):


		''' Session and Summary '''
		if save_path is None: 
			self.save_path = 'checkpoints/model_GC_{}-{}-{}_{}.cpkt'.format(
				self.num_lab,learning_rate,self.batch_size,time.time())
		else:
			self.save_path = save_path

		np.random.seed(seed)
		tf.set_random_seed(seed)

		with self.G.as_default():

			self.optimiser = tf.train.AdamOptimizer( learning_rate = learning_rate, beta1 = beta1, beta2 = beta2 )
			self.train_op = self.optimiser.minimize( self.cost )
			init = tf.initialize_all_variables()
			self._test_vars = None
			
		
		_data_labelled = np.hstack( [x_labelled, y] )
		_data_unlabelled = x_unlabelled
		x_valid_mu, x_valid_lsgms = x_valid[ :, :self.dim_x ], x_valid[ :, self.dim_x:2*self.dim_x ]

		with self.session as sess:

			sess.run(init)
			if load_path == 'default': self.saver.restore( sess, self.save_path )
			elif load_path is not None: self.saver.restore( sess, load_path )	

			best_eval_accuracy = 0.
			stop_counter = 0

			for epoch in range(epochs):

				''' Shuffle Data '''
				np.random.shuffle( _data_labelled )
				np.random.shuffle( _data_unlabelled )

				''' Training '''
				
				for x_l_mu, x_l_lsgms, y, x_u_mu, x_u_lsgms in utils.feed_numpy_semisupervised(	
					self.num_lab_batch, self.num_ulab_batch, 
					_data_labelled[:,:2*self.dim_x], _data_labelled[:,2*self.dim_x:],_data_unlabelled ):

					training_result = sess.run( [self.train_op, self.cost],
											feed_dict = {	self.x_labelled_mu:			x_l_mu, 	
															self.x_labelled_lsgms: 		x_l_lsgms,
															self.y_lab: 				y,
															self.x_unlabelled_mu: 		x_u_mu,
															self.x_unlabelled_lsgms: 	x_u_lsgms } )

					training_cost = training_result[1]

				''' Evaluation '''

				stop_counter += 1

				if epoch % print_every == 0:

					test_vars = tf.get_collection(bookkeeper.GraphKeys.TEST_VARIABLES)
					if test_vars:
						if test_vars != self._test_vars:
							self._test_vars = list(test_vars)
							self._test_var_init_op = tf.initialize_variables(test_vars)
						self._test_var_init_op.run()


					eval_accuracy, eval_cross_entropy = \
						sess.run( [self.eval_accuracy, self.eval_cross_entropy],
									feed_dict = { 	self.x_labelled_mu: 	x_valid_mu,
													self.x_labelled_lsgms:	x_valid_lsgms,
													self.y_lab:				y_valid } )

					if eval_accuracy > best_eval_accuracy:

						best_eval_accuracy = eval_accuracy
						self.saver.save( sess, self.save_path )
						stop_counter = 0

					utils.print_metrics( 	epoch+1,
											['Training', 'cost', training_cost],
											['Validation', 'accuracy', eval_accuracy],
											['Validation', 'cross-entropy', eval_cross_entropy] )

				if stop_counter >= stop_iter:
					print('Stopping GC training')
					print('No change in validation accuracy for {} iterations'.format(stop_iter))
					print('Best validation accuracy: {}'.format(best_eval_accuracy))
					print('Model saved in {}'.format(self.save_path))
					break
Exemplo n.º 34
0
	def train(      self, x, x_valid,
					epochs, num_batches,
					print_every = 1,
					learning_rate = 3e-4,
					beta1 = 0.9,
					beta2 = 0.999,
					seed = 31415,
					stop_iter = 100,
					save_path = None,
					load_path = None,
					draw_img = 1    ):

		self.num_examples = x.shape[0]
		self.num_batches = num_batches

		assert self.num_examples % self.num_batches == 0, '#Examples % #Batches != 0'

		self.batch_size = self.num_examples // self.num_batches

		''' Session and Summary '''
		if save_path is None: 
			self.save_path = 'checkpoints/model_VAE_{}-{}_{}.cpkt'.format(learning_rate,self.batch_size,time.time())
		else:
			self.save_path = save_path

		np.random.seed(seed)
		tf.set_random_seed(seed)

		with self.G.as_default():

			self.optimiser = tf.train.AdamOptimizer( learning_rate = learning_rate, beta1 = beta1, beta2 = beta2 )
			self.train_op = self.optimiser.minimize( self.cost )
			init = tf.initialize_all_variables()
			self._test_vars = None

		with self.session as sess:

			sess.run(init)
			if load_path == 'default': self.saver.restore( sess, self.save_path )
			elif load_path is not None: self.saver.restore( sess, load_path )	

			training_cost = 0.
			best_eval_log_lik = - np.inf
			stop_counter = 0

			for epoch in range(epochs):

				''' Shuffle Data '''
				np.random.shuffle( x )

				''' Training '''
				
				for x_batch in utils.feed_numpy( self.batch_size, x ):

					training_result = sess.run( [self.train_op, self.cost],
											feed_dict = { self.x: x_batch } )

					training_cost = training_result[1]

				''' Evaluation '''

				stop_counter += 1

				if epoch % print_every == 0:

					test_vars = tf.get_collection(bookkeeper.GraphKeys.TEST_VARIABLES)
					if test_vars:
						if test_vars != self._test_vars:
							self._test_vars = list(test_vars)
							self._test_var_init_op = tf.initialize_variables(test_vars)
						self._test_var_init_op.run()


					eval_log_lik, x_recon_eval = \
						sess.run( [self.eval_log_lik, self.x_recon_eval],
									feed_dict = { self.x: x_valid } )

					if eval_log_lik > best_eval_log_lik:

						best_eval_log_lik = eval_log_lik
						self.saver.save( sess, self.save_path )
						stop_counter = 0

					utils.print_metrics( 	epoch+1,
											['Training', 'cost', training_cost],
											['Validation', 'log-likelihood', eval_log_lik] )

					if draw_img > 0 and epoch % draw_img == 0:

						import matplotlib
						matplotlib.use('Agg')
						import pylab
						import seaborn as sns

						five_random = np.random.random_integers(x_valid.shape[0], size = 5)
						x_sample = x_valid[five_random]
						x_recon_sample = x_recon_eval[five_random]

						sns.set_style('white')
						f, axes = pylab.subplots(5, 2, figsize=(8,12))
						for i,row in enumerate(axes):

							row[0].imshow(x_sample[i].reshape(28, 28), vmin=0, vmax=1)
							im = row[1].imshow(x_recon_sample[i].reshape(28, 28), vmin=0, vmax=1, 
								cmap=sns.light_palette((1.0, 0.4980, 0.0549), input="rgb", as_cmap=True))

							pylab.setp([a.get_xticklabels() for a in row], visible=False)
							pylab.setp([a.get_yticklabels() for a in row], visible=False)
	
						f.subplots_adjust(left=0.0, right=0.9, bottom=0.0, top=1.0)
						cbar_ax = f.add_axes([0.9, 0.1, 0.04, 0.8])
						f.colorbar(im, cax=cbar_ax, use_gridspec=True)
		
						pylab.tight_layout()
						pylab.savefig('img/recon-'+str(epoch)+'.png', format='png')
						pylab.clf()
						pylab.close('all')

				if stop_counter >= stop_iter:
					print('Stopping VAE training')
					print('No change in validation log-likelihood for {} iterations'.format(stop_iter))
					print('Best validation log-likelihood: {}'.format(best_eval_log_lik))
					print('Model saved in {}'.format(self.save_path))
					break