예제 #1
0
def main():
    dataset_train = read_dataset(
        "https://raw.githubusercontent.com/dataminerdbm/test_data_scientist/main/treino.csv"
    )
    dataset_test = read_dataset(
        "https://raw.githubusercontent.com/dataminerdbm/test_data_scientist/main/teste.csv"
    )

    dataset_train.replace(to_replace=[None], value=np.nan, inplace=True)
    dataset_test.replace(to_replace=[None], value=np.nan, inplace=True)

    raw_dataset_values_train = dataset_train.drop(columns=['inadimplente'])

    transformed_values_train = input_data(raw_dataset_values_train)
    transformed_values_test = input_data(dataset_test)

    # Deve-se utilizar a mesma escala de dados para o treinamento e teste
    # https://datascience.stackexchange.com/questions/27615/should-we-apply-normalization-to-test-data-as-well
    scaler = StandardScaler()
    standardized_values_train = scaler.fit_transform(transformed_values_train)
    standardized_values_test = scaler.transform(transformed_values_test)

    standardized_values_train = pd.DataFrame(
        standardized_values_train, columns=raw_dataset_values_train.keys())
    standardized_values_test = pd.DataFrame(standardized_values_test,
                                            columns=dataset_test.keys())

    train_x = standardized_values_train
    train_y = dataset_train.inadimplente

    test_x = standardized_values_test

    undersample = RandomUnderSampler(sampling_strategy='majority')

    model = RandomForestClassifier()

    X_under, y_under = undersample.fit_resample(train_x, train_y)

    model.fit(X_under, y_under)

    filename = 'test_data_scientist_dataminer/modelo-adaboost.joblib'
    dump(model, filename)

    loaded_model = load(filename)

    predictions = model.predict(test_x)

    dataset_test_raw_df = read_dataset(
        "https://raw.githubusercontent.com/dataminerdbm/test_data_scientist/main/teste.csv"
    )
    dataset_test_raw_df['inadimplente'] = predictions
    dataset_test_raw_df.to_csv("test_data_scientist_dataminer/teste.csv",
                               index=False)
예제 #2
0
 def retrieve_images(self):
     images,labels,_ = util.read_dataset(self.root)
     img_ph_list,img_bg_list = util.crop_out(images,labels)
     ph_dict = [{'mat': np.rollaxis(x, 2, 0) , 'val': 1} for x in img_ph_list]
     bg_dict = [{'mat': np.rollaxis(x, 2, 0) , 'val': 0} for x in img_bg_list]
     mix_dict = ph_dict + bg_dict
     shuffle(mix_dict)
     return mix_dict
예제 #3
0
parser.add_argument('--dims', type=int, help="dims", default=5)
parser.add_argument('--epochs', type=int, help="epochs", default=10)
args, unparsed = parser.parse_known_args()
# ##################################################
EPOCHES = args.epochs
DIM_NUM = args.dims

learning_rate = 0.0002
BATCH_SIZE = 5
DIS_STEP = 5

# real data
# ######################################## difference from mpc
prefix = '../playground/datasets/'
file_name_prefix = prefix + str(DIM_NUM) + "D/" + str(DIM_NUM) + "d"
real_X = read_dataset(file_name_prefix + "_attr_plain.csv")
real_Y = read_dataset(file_name_prefix + "_label_plain.csv")
# ######################################## difference from mpc
real_X = np.array(real_X)
real_Y = np.array(real_Y)

X = tf.placeholder(tf.float32, real_X.shape)
Y = tf.placeholder(tf.float32, real_Y.shape)
print(X)
print(Y)

# initialize W & b
W = tf.Variable(tf.zeros([DIM_NUM, 1]))
b = tf.Variable(tf.zeros([1]))
print(W)
print(b)
예제 #4
0
learning_rate = 0.0002
BATCH_SIZE = 5
DIS_STEP = 5

# real data (for test, use an option to distinguish)
# ######################################## difference from tf
my_party_id = args.party_id

if my_party_id == 2:
    my_party_id = 0

prefix = '../playground/datasets/'
file_name_prefix = prefix + str(DIM_NUM) + "D/" + str(DIM_NUM) + "d"
file_name_suffix = "share_" + str(my_party_id) + ".csv"
real_X = read_dataset(file_name_prefix + "_attr_" + file_name_suffix)
real_Y = read_dataset(file_name_prefix + "_label_" + file_name_suffix)
# ######################################## difference from tf

X = tf.Variable(real_X)
Y = tf.Variable(real_Y)
print(X)
print(Y)

# initialize W & b
W = tf.Variable(tf.zeros([DIM_NUM, 1], dtype=tf.float64))
b = tf.Variable(tf.zeros([1], dtype=tf.float64))
print(W)
print(b)

# predict
def main():
    global args, best_er1
    args = parser.parse_args()

    # Check if CUDA is enabled
    args.cuda = not args.no_cuda and torch.cuda.is_available()

    for tgt_idx, tgt in enumerate(dataset_targets[args.dataset]):
        print("Training a model for {}".format(tgt))

        # Load data
        root = args.dataset_path if args.dataset_path else dataset_paths[
            args.dataset]
        task_type = args.dataset_type if args.dataset_type else dataset_types[
            args.dataset]
        if args.resume:
            resume_dir = args.resume.format(dataset=args.dataset,
                                            model=args.model,
                                            layers=args.layers,
                                            feature=tgt)
        #end if
        Model_Class = model_dict[args.model]

        print("Preparing dataset")
        node_features, edge_features, target_features, task_type, train_loader, valid_loader, test_loader = read_dataset(
            args.dataset, root, args.batch_size, args.prefetch)

        # Define model and optimizer

        print('\tCreate model')
        hidden_state_size = args.hidden
        model = Model_Class(node_features=node_features,
                            edge_features=edge_features,
                            target_features=1,
                            hidden_features=hidden_state_size,
                            num_layers=args.layers,
                            dropout=0.5,
                            type=task_type,
                            s2s_processing_steps=args.s2s)
        print("#Parameters: {param_count}".format(
            param_count=count_params(model)))

        print('Optimizer')
        optimizer = optim.Adam(model.parameters(),
                               lr=args.lr,
                               weight_decay=args.weight_decay)

        criterion, evaluation, metric_name, metric_compare, metric_best = get_metric_by_task_type(
            task_type, target_features)

        print('Logger')
        logger = Logger(
            args.log_path.format(dataset=args.dataset,
                                 model=args.model,
                                 layers=args.layers,
                                 feature=tgt))

        lr_step = (args.lr - args.lr * args.lr_decay) / (
            args.epochs * args.schedule[1] - args.epochs * args.schedule[0])

        # get the best checkpoint if available without training
        if args.resume:
            checkpoint_dir = resume_dir
            best_model_file = os.path.join(checkpoint_dir, 'model_best.pth')
            if not os.path.isdir(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            if os.path.isfile(best_model_file):
                print("=> loading best model '{}'".format(best_model_file))
                checkpoint = torch.load(best_model_file)
                args.start_epoch = checkpoint['epoch']
                best_acc1 = checkpoint['best_er1']
                model.load_state_dict(checkpoint['state_dict'])
                if args.cuda:
                    model.cuda()
                optimizer.load_state_dict(checkpoint['optimizer'])
                print("=> loaded best model '{}' (epoch {})".format(
                    best_model_file, checkpoint['epoch']))
            else:
                print("=> no best model found at '{}'".format(best_model_file))

        print('Check cuda')
        if args.cuda:
            print('\t* Cuda')
            model = model.cuda()
            criterion = criterion.cuda()

        # Epoch for loop
        for epoch in range(0, args.epochs):
            try:
                if epoch > args.epochs * args.schedule[
                        0] and epoch < args.epochs * args.schedule[1]:
                    args.lr -= lr_step
                    for param_group in optimizer.param_groups:
                        param_group['lr'] = args.lr
                #end if

                # train for one epoch
                train(train_loader,
                      model,
                      criterion,
                      optimizer,
                      epoch,
                      evaluation,
                      logger,
                      target_range=(tgt_idx, ),
                      tgt_name=tgt,
                      metric_name=metric_name,
                      cuda=args.cuda,
                      log_interval=args.log_interval)

                # evaluate on test set
                er1 = validate(valid_loader,
                               model,
                               criterion,
                               evaluation,
                               logger,
                               target_range=(tgt_idx, ),
                               tgt_name=tgt,
                               metric_name=metric_name,
                               cuda=args.cuda,
                               log_interval=args.log_interval)

                is_best = metric_compare(er1, best_er1)
                best_er1 = metric_best(er1, best_er1)
                save_checkpoint(
                    {
                        'epoch': epoch + 1,
                        'state_dict': model.state_dict(),
                        'best_er1': best_er1,
                        'optimizer': optimizer.state_dict(),
                    },
                    is_best=is_best,
                    directory=resume_dir)

                # Logger step
                logger.log_value('learning_rate', args.lr).step()
            except KeyboardInterrupt:
                break
            #end try
        #end for

        # get the best checkpoint and test it with test set
        if args.resume:
            checkpoint_dir = resume_dir
            best_model_file = os.path.join(checkpoint_dir, 'model_best.pth')
            if not os.path.isdir(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            if os.path.isfile(best_model_file):
                print("=> loading best model '{}'".format(best_model_file))
                checkpoint = torch.load(best_model_file)
                args.start_epoch = checkpoint['epoch']
                best_acc1 = checkpoint['best_er1']
                model.load_state_dict(checkpoint['state_dict'])
                if args.cuda:
                    model.cuda()
                optimizer.load_state_dict(checkpoint['optimizer'])
                print("=> loaded best model '{}' (epoch {})".format(
                    best_model_file, checkpoint['epoch']))
            else:
                print("=> no best model found at '{}'".format(best_model_file))
            #end if
        #end if

        # (For testing)
        validate(test_loader,
                 model,
                 criterion,
                 evaluation,
                 target_range=(tgt_idx, ),
                 tgt_name=tgt,
                 metric_name=metric_name,
                 cuda=args.cuda,
                 log_interval=args.log_interval)
예제 #6
0
import initializer
import os.path
import os
import sys
import cv2
from matplotlib import pyplot as plt
import numpy as np
import util
import math
import csv

# Training set
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
dataset_path = BASE_DIR + '/dataset/' + 'Video Data-03-08-2015/extended_dataset.csv'
dataset = util.read_dataset(dataset_path,
                            244,
                            split_teain_test=True,
                            split_ratio=0.7)

train_set = dataset['train_set']
test_set = dataset['test_set']
blurry_set = dataset['blury_set']
cropped_set = dataset['cropped_set']
number_of_samples = len(train_set['X'])

log_file = BASE_DIR + '/logs/classifier_7.0.csv'
f = open(log_file, 'wt')
writer = csv.writer(f)
writer.writerow(('Epoch', 'Iteration', 'Phase', 'Accuracy', 'Loss'))
# For test purpose
# print train_set['X'][0].shape
# plt.imshow(train_set['X'][0], interpolation = 'bicubic')
예제 #7
0
init = tf.global_variables_initializer()
# def read_dataset(file_name = None):
#     if file_name is None:
#         print("Error! No file name!")
#         return
#     res_data = []
#     with open(file_name, 'r') as f:
#         cr = csv.reader(f)
#         for each_r in cr:
#             curr_r = [np.array([v], dtype=np.float_)[0] for v in each_r]
#             res_data.append(curr_r)
#             #print(each_r)
#     return res_data

file_name_prefix = str("../datasets/") + str(DIM_NUM) + "D/" + str(DIM_NUM) + "d"
plain_attr_ds = read_dataset(file_name_prefix + "_attr_plain.csv")
plain_label_ds = read_dataset(file_name_prefix + "_label_plain.csv")

save_file_prefix = "./comp_log/native"

with tf.Session() as native_sess:
    native_sess.run(init)
    xW, xb = native_sess.run([W, b])
    print("init weight:{} \nbias:{}".format(xW, xb))

    total_batch = int(len(plain_attr_ds) / BATCH_SIZE)
    start_t = time.time()
    for epoch in range(EPOCHES):
        avg_loss = 0.0
        #print("batch:", total_batch)
        curr_loss = None
def main():
    dataset = read_dataset(
        "https://raw.githubusercontent.com/dataminerdbm/test_data_scientist/main/treino.csv"
    )

    dataset.replace(to_replace=[None], value=np.nan, inplace=True)

    raw_dataset_values = dataset.drop(columns=['inadimplente'])

    transformed_values = input_data(raw_dataset_values)

    standardized_values = rescale_data(transformed_values, raw_dataset_values)

    # calc_corr_fig(standardized_values)

    x = standardized_values
    # Remove-se as demais características correlacionadas, mantendo-se apenas uma
    x_without_corr_feat = standardized_values.drop(columns=[
        'vezes_passou_de_30_59_dias', 'numero_de_vezes_que_passou_60_89_dias'
    ])
    y = dataset.inadimplente

    SEED = 7707
    np.random.seed(SEED)
    # Realiza-se a estratificação dos dados tendo em vista o desbalanceamento da base
    train_x, test_x, train_y, test_y = train_test_split(x,
                                                        y,
                                                        test_size=0.3,
                                                        stratify=y)

    train_x_without_corr_feat, test_x_without_corr_feat, train_y_without_corr_feat, test_y_without_corr_feat = train_test_split(
        x_without_corr_feat, y, test_size=0.3, stratify=y)

    undersample = RandomUnderSampler(sampling_strategy='majority')

    X_without_corr_feat_under, y_without_corr_feat_under = undersample.fit_resample(
        x_without_corr_feat, y)
    x_under, y_under = undersample.fit_resample(x, y)
    train_x_under, train_y_under = undersample.fit_resample(train_x, train_y)
    train_x_without_corr_feat_under, train_y_without_corr_feat_under = undersample.fit_resample(
        train_x_without_corr_feat, train_y_without_corr_feat)

    #tsne_scatterplot(x_without_corr_feat, y)

    # Os classificadores validados foram escolhidos de acordo com o aspecto da base de dados:
    # características numéricas, multidimensional com alto número de instâncias e problema não linearmente separável
    models = [
        DummyClassifier(),
        KNeighborsClassifier(),
        DecisionTreeClassifier(),
        GaussianNB(),
        AdaBoostClassifier(n_estimators=100),
        RandomForestClassifier(),
        BaggingClassifier(base_estimator=GaussianNB(), n_estimators=100)
    ]
    k_size = 5

    # Criando aleatoridade nos grupos de folds (para evitar repetição). Abordagem mais adequada para bases desbalanceadas
    # https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GroupKFold.html#sklearn.model_selection.GroupKFold
    x_under['idade_r'] = x_under.idade + np.random.randint(-2, 3, size=14662)
    x_under.idade_r = x_under.idade + abs(x_under.idade.min()) + 1

    print("Validando modelos com todas as características")
    validate_models_cv(x_under, y_under, x_under.idade_r, models, k_size)
    validate_models_holdout(train_x_under, train_y_under, test_x, test_y,
                            models, k_size)

    print("Validando modelos sem as características correlacionadas")
    validate_models_cv(X_without_corr_feat_under, y_without_corr_feat_under,
                       x_under.idade_r, models, k_size)
    validate_models_holdout(train_x_without_corr_feat_under,
                            train_y_without_corr_feat_under,
                            test_x_without_corr_feat, test_y_without_corr_feat,
                            models, k_size)
예제 #9
0
    def train():
        print("Start training.")
        mkdir(args.output_result_path)
        writer_process = open(os.path.join(args.output_result_path,
                                           'process.txt'),
                              mode='w')
        writer_process.writelines("Start training.")
        trainset = read_dataset(args, args.train_path, label_columns, vocab)
        random.shuffle(trainset)

        best_josn['train_num'] = len(trainset)
        input_ids = torch.LongTensor([example[0] for example in trainset])
        label_ids = torch.LongTensor([example[1] for example in trainset])
        length_ids = torch.LongTensor([example[2] for example in trainset])

        print("Batch size: ", args.batch_size)
        print("The number of training instances:", best_josn['train_num'])

        start_time = time.time()
        best_josn['Time'] = get_time_dif(start_time)
        print("Time usage:", best_josn['Time'])

        param_optimizer = list(model.named_parameters())
        nll_criterion = nn.NLLLoss()
        if args.attention_layer == 'm_pol_untrain_a':
            optimizer_grouped_parameters = [{
                'params': [
                    p for n, p in param_optimizer
                    if ('query_embedding.weight' not in n)
                ],
                'weight_decay_rate':
                0.01
            }]
        else:
            optimizer_grouped_parameters = [{
                'params': [p for n, p in param_optimizer],
                'weight_decay_rate':
                0.01
            }]
        optimizer = optim.SGD(optimizer_grouped_parameters,
                              lr=args.learning_rate,
                              momentum=args.momentum)
        for epoch in range(1, args.epochs_num + 1):
            model.train()
            for i, (input_ids_batch, label_ids_batch,
                    length_ids_batch) in enumerate(
                        batch_loader(args.batch_size, input_ids, label_ids,
                                     length_ids)):
                model.zero_grad()
                input_ids_batch = input_ids_batch.cuda()
                label_ids_batch = label_ids_batch.cuda()
                length_ids_batch = length_ids_batch.cuda()

                if args.attention_layer == 'att':
                    predicted_ids_batch, _ = model(input_ids_batch,
                                                   length_ids_batch,
                                                   elmo_embedding)
                else:
                    predicted_ids_batch, _, orthogonal_loss = model(
                        input_ids_batch, length_ids_batch, elmo_embedding)
                    best_josn['Total_orthogonal_loss'] += orthogonal_loss
                batch_loss = nll_criterion(predicted_ids_batch,
                                           label_ids_batch)
                best_josn['Total_batch_loss'] += batch_loss
                if args.attention_layer != 'm_pre_orl_pun_a' and args.attention_layer != 'mpoa':
                    optimizer.zero_grad()
                    batch_loss.backward()
                    optimizer.step()
                else:
                    optimizer.zero_grad()
                    (0.1 * orthogonal_loss).backward(retain_graph=True)
                    (0.9 * batch_loss).backward()
                    optimizer.step()
                best_josn['Time'] = get_time_dif(start_time)
                if (i + 1) % args.report_steps == 0:
                    if args.attention_layer == 'att':
                        print(
                            "Epoch id: {}, Training steps: {}, Avg batch loss: {:.4f}, Time: {}"
                            .format(
                                epoch, i + 1, best_josn['Total_batch_loss'] /
                                args.report_steps, best_josn['Time']))
                        writer_process.writelines(
                            "Epoch id: {}, Training steps: {}, Avg batch loss: {:.4f}, Time: {}"
                            .format(
                                epoch, i + 1, best_josn['Total_batch_loss'] /
                                args.report_steps, best_josn['Time']))
                    else:
                        print(
                            "Epoch id: {}, Training steps: {}, Avg batch loss: {:.4f}, Avg orthogonal loss: {:.4f}, Time: {}"
                            .format(
                                epoch, i + 1, best_josn['Total_batch_loss'] /
                                args.report_steps,
                                best_josn['Total_orthogonal_loss'] /
                                args.report_steps, best_josn['Time']))
                        writer_process.writelines(
                            "Epoch id: {}, Training steps: {}, Avg batch loss: {:.4f}, Avg orthogonal loss: {:.4f}, Time: {}"
                            .format(
                                epoch, i + 1, best_josn['Total_batch_loss'] /
                                args.report_steps,
                                best_josn['Total_orthogonal_loss'] /
                                args.report_steps, best_josn['Time']))
                    best_josn['Total_batch_loss'] = 0
                    best_josn['Total_orthogonal_loss'] = 0
            # 读取验证集
            evaluate(args, False)
            best_josn['Time'] = get_time_dif(start_time)
            if best_josn['F_macro'] > best_josn['Best_F_macro'] + 0.001:
                best_josn['Best_F_macro'] = best_josn['F_macro']
                best_josn['Last_up_epoch'] = epoch
                torch.save(model,
                           os.path.join(args.output_result_path, 'result.pkl'))
                print("Deving Acc: {:.4f}, F_macro: {:.4f}, Time: {} *".format(
                    best_josn['ACC'], best_josn['F_macro'], best_josn['Time']))
                writer_process.writelines(
                    "Deving Acc: {:.4f}, F_macro: {:.4f}, Time: {} *".format(
                        best_josn['ACC'], best_josn['F_macro'],
                        best_josn['Time']))
            elif epoch - best_josn['Last_up_epoch'] == args.require_improvement:
                print("No optimization for a long time, auto-stopping...")
                writer_process.writelines(
                    "No optimization for a long time, auto-stopping...")
                break
            else:
                print("Deving Acc: {:.4f}, F_macro: {:.4f}, Time: {} ".format(
                    best_josn['ACC'], best_josn['F_macro'], best_josn['Time']))
                writer_process.writelines(
                    "Deving Acc: {:.4f}, F_macro: {:.4f}, Time: {} ".format(
                        best_josn['ACC'], best_josn['F_macro'],
                        best_josn['Time']))
예제 #10
0
    def evaluate(args, is_test):
        model.eval()
        if is_test:
            print("Start testing.")
            dataset = read_dataset(args, args.test_path, label_columns, vocab)
            best_josn['test_num'] = len(dataset)
            writer_result = open(os.path.join(args.output_result_path,
                                              'result.txt'),
                                 encoding='utf-8',
                                 mode='w')
            writer_summary_result = open(os.path.join(args.summary_result_path,
                                                      'summary_result.txt'),
                                         mode='a')
        else:
            dataset = read_dataset(args, args.dev_path, label_columns, vocab)
            best_josn['dev_num'] = len(dataset)
            random.shuffle(dataset)
        input_ids = torch.LongTensor([example[0] for example in dataset])
        label_ids = torch.LongTensor([example[1] for example in dataset])
        length_ids = torch.LongTensor([example[2] for example in dataset])
        input = [example[3] for example in dataset]

        if is_test:
            batch_size = 1
        else:
            batch_size = args.batch_size

        for i, (input_ids_batch, label_ids_batch,
                length_ids_batch) in enumerate(
                    batch_loader(batch_size, input_ids, label_ids,
                                 length_ids)):
            model.zero_grad()
            input_ids_batch = input_ids_batch.cuda()
            label_ids_batch = label_ids_batch.cuda()
            length_ids_batch = length_ids_batch.cuda()

            if args.attention_layer == 'att':
                predicted, weight = model(input_ids_batch, length_ids_batch,
                                          elmo_embedding)
            else:
                predicted, weight, _ = model(input_ids_batch, length_ids_batch,
                                             elmo_embedding)
            best_josn['Weights'] += weight.squeeze(
                dim=1).cpu().detach().numpy().tolist()
            _, predicted_labels = torch.max(predicted.data, 1)
            best_josn['Predict'] += predicted_labels.cpu().numpy().tolist()
            best_josn['Label'] += label_ids_batch.data.cpu().numpy().tolist()

        if is_test:
            details_result = metrics.classification_report(
                best_josn['Label'], best_josn['Predict'])
            best_josn['P_macro'], best_josn['R_macro'], best_josn[
                'F_macro'], _ = metrics.precision_recall_fscore_support(
                    best_josn['Label'], best_josn['Predict'], average="macro")
            best_josn['ACC'] = metrics.classification.accuracy_score(
                best_josn['Label'], best_josn['Predict'])
            saveSenResult(input, best_josn['Label'], best_josn['Predict'],
                          args, best_josn['Weights'])
            writer_result.writelines(details_result)
            print(
                "Testing Acc: {:.4f}, F_macro: {:.4f}, P_macro: {:.4f}, R_macro: {:.4f}"
                .format(best_josn['ACC'], best_josn['F_macro'],
                        best_josn['P_macro'], best_josn['R_macro']))
            writer_result.writelines(
                "Testing Acc: {:.4f}, F_macro: {:.4f}, P_macro: {:.4f}, R_macro: {:.4f}"
                .format(best_josn['ACC'], best_josn['F_macro'],
                        best_josn['P_macro'], best_josn['R_macro']))
            writer_summary_result.writelines('保存路径' + args.output_result_path +
                                             '\n')
            writer_summary_result.writelines(
                "Testing Acc: {:.4f}, F_macro: {:.4f}, P_macro: {:.4f}, R_macro: {:.4f}\n\n"
                .format(best_josn['ACC'], best_josn['F_macro'],
                        best_josn['P_macro'], best_josn['R_macro']))
            writer_summary_result.writelines(details_result)
        else:
            best_josn['P_macro'], best_josn['R_macro'], best_josn[
                'F_macro'], _ = metrics.precision_recall_fscore_support(
                    best_josn['Label'], best_josn['Predict'], average="macro")
            best_josn['ACC'] = metrics.classification.accuracy_score(
                best_josn['Label'], best_josn['Predict'])
		stride = 25
		device = torch.device("cpu")

	# load model
	from cnn_simple import Net 
	# only load param 
	# model = Net()
	# model.load_state_dict(torch.load('trained.pth'))
	# or load the entire model
	path = sys.argv[1]
	model = torch.load('trained.pth')
	model.eval()
	acc = 0

	# load the image
	images,labels,names = util.read_dataset(path)
	for k,mat in enumerate(images):
		# mat = cv2.imread(img)
		img_wid = mat.shape[1]
		img_hgt = mat.shape[0]

		# run a window through the image to find highest score region
		# can improve using YOLO with muliti scale/coarse level window, 
		# and evaluate IOT 
		windows = []
		for j in range(0,img_hgt-2*l,stride):
		    for i in range(0,img_wid-2*l,stride):
		        window = mat[j:j+2*l,i:i+2*l]
		        window = np.rollaxis(window, 2, 0)
		        windows.append(window)
예제 #12
0
def main():
    params = load_params()
    m = get_model(conv_units=params['model']['conv_units'])
    m.summary()

    training_images, training_labels, testing_images, testing_labels = read_dataset(
        DATASET_FILE)

    assert training_images.shape[0] + testing_images.shape[0] == 70000
    assert training_labels.shape[0] + testing_labels.shape[0] == 70000

    training_images = normalize(training_images)
    testing_images = normalize(testing_images)

    training_labels = tf.keras.utils.to_categorical(training_labels,
                                                    num_classes=10,
                                                    dtype="float32")
    testing_labels = tf.keras.utils.to_categorical(testing_labels,
                                                   num_classes=10,
                                                   dtype="float32")

    # We use the test set as validation for simplicity
    x_train = training_images
    x_valid = testing_images
    y_train = training_labels
    y_valid = testing_labels

    history = m.fit(
        x_train,
        y_train,
        batch_size=BATCH_SIZE,
        epochs=params["train"]["epochs"],
        verbose=1,
        validation_data=(x_valid, y_valid),
        callbacks=[DvcLiveCallback(model_file=f"{OUTPUT_DIR}/model.h5")],
    )

    metrics_dict = m.evaluate(
        testing_images,
        testing_labels,
        batch_size=BATCH_SIZE,
        return_dict=True,
    )

    with open(METRICS_FILE, "w") as f:
        f.write(json.dumps(metrics_dict))

    misclassified = {}

    # predictions for the confusion matrix
    y_prob = m.predict(x_valid)
    y_pred = y_prob.argmax(axis=-1)
    os.makedirs("plots")
    with open("plots/confusion.csv", "w") as f:
        f.write("actual,predicted\n")
        sx = y_valid.shape[0]
        for i in range(sx):
            actual = y_valid[i].argmax()
            predicted = y_pred[i]
            f.write(f"{actual},{predicted}\n")
            misclassified[(actual, predicted)] = x_valid[i]

    # find misclassified examples and generate a confusion table image
    confusion_out = create_image_matrix(misclassified)
    imageio.imwrite("plots/confusion.png", confusion_out)
예제 #13
0
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)

init = tf.global_variables_initializer()

file_name_prefix = str("../datasets/") + str(DIM_NUM) + "D/" + str(DIM_NUM) + "d"

#parser = argparse.ArgumentParser(description="MPC Logistic Regression with SCE loss demo")
#parser.add_argument('--party_id', type=int, help="Party ID")
#args = parser.parse_args()
my_party_id = args.party_id

if my_party_id == 2:
    my_party_id = 0

file_name_suffix = "share_" + str(my_party_id) + ".csv"
shared_attr_ds = read_dataset(file_name_prefix + "_attr_" + file_name_suffix)
shared_label_ds = read_dataset(file_name_prefix + "_label_" + file_name_suffix)

# print(shared_attr_ds)
# print(shared_label_ds)

if args.party_id == 0:
    csvprefix = "./comp_log/mpc"

with tf.Session() as mpc_sess:
    mpc_sess.run(init)
    total_batch = int(len(shared_attr_ds) / BATCH_SIZE)
    start_t = time.time()
    for epoch in range(EPOCHES):
        avg_loss = 0.0
        #print("batch:", total_batch)