Пример #1
0
def main(seq_file, ss_file, ss8_file, acc_file, acc20_file, src_bio_file,
         output_file):
    print("load data...")
    data = gen_data(seq_file, ss_file, ss8_file, acc_file, acc20_file,
                    src_bio_file)

    x_test_seq, x_test_bi, x_test_tri, x_test_acc, x_test_acc20, x_test_ss, x_test_ss8, x_test_bio = \
        data['test']['seq'], data['test']['bigram'], data['test']['trigram'], \
        data['test']['acc'], data['test']['acc20'], data['test']['ss'], data['test']['ss8'], \
        data['test']['src_bio']

    print("make data...")

    x_test = make_data(x_test_seq, x_test_bi, x_test_tri, x_test_acc,
                       x_test_acc20, x_test_ss, x_test_ss8, x_test_bio)

    filepath = './result/model/EPSOL.hdf5'

    best_model = utils.load_model(filepath)

    [pred_test,
     pred_prob_test] = get_classification_prediction(best_model, x_test)
    save_classification_prediction(pred_test, pred_prob_test, output_file)
    print("-----------------------------------------------")
    print("EPSOL prediction finished!")
    print("-----------------------------------------------")
Пример #2
0
def main():

    print("load data...")
    data = load_data()

    x_train_seq, x_train_bi, x_train_tri, x_train_acc, x_train_acc20, x_train_ss, x_train_ss8, x_train_bio, y_train = \
        data['train']['seq'], data['train']['bigram'], data['train']['trigram'], \
        data['train']['acc'], data['train']['acc20'], data['train']['ss'], data['train']['ss8'], \
        data['train']['src_bio'], data['train']['label']

    x_dev_seq, x_dev_bi, x_dev_tri, x_dev_acc, x_dev_acc20, x_dev_ss, x_dev_ss8, x_dev_bio, y_dev = \
        data['dev']['seq'], data['dev']['bigram'], data['dev']['trigram'], \
        data['dev']['acc'], data['dev']['acc20'], data['dev']['ss'], data['dev']['ss8'], \
        data['dev']['src_bio'], data['dev']['label']

    x_test_seq, x_test_bi, x_test_tri, x_test_acc, x_test_acc20, x_test_ss, x_test_ss8, x_test_bio, y_test = \
        data['test']['seq'], data['test']['bigram'], data['test']['trigram'], \
        data['test']['acc'], data['test']['acc20'], data['test']['ss'], data['test']['ss8'], \
        data['test']['src_bio'], data['test']['label']

    print("make data...")
    x_train, y_oh_train = make_data(
        x_train_seq, x_train_bi, x_train_tri, x_train_acc, x_train_acc20, x_train_ss, x_train_ss8, x_train_bio, y_train)
    x_dev, y_oh_dev = make_data(
        x_dev_seq, x_dev_bi, x_dev_tri, x_dev_acc, x_dev_acc20, x_dev_ss, x_dev_ss8, x_dev_bio, y_dev)
    x_test, y_oh_test = make_data(
        x_test_seq, x_test_bi, x_test_tri, x_test_acc, x_test_acc20, x_test_ss, x_test_ss8, x_test_bio, y_test)

    model = Models.EPSOL().get_model()
    model.compile(loss='binary_crossentropy',
                  optimizer=utils.get_adam_optim(), metrics=['accuracy'])
    print(model.summary())

    model.fit(x_train, y_oh_train, batch_size=64,
              epochs=10,
              validation_data=(x_dev, y_oh_dev),
              callbacks=get_callbacks())
    
    

    filepath = './result/model/' + model_name + '.hdf5'
    # model.save(filepath)
    # print("save model ok!")

    best_model = utils.load_model(filepath)

    [pred_test,pred_prob_test] = get_classification_prediction(best_model,x_test)
    save_classification_prediction(pred_test,pred_prob_test)
    print("save result ok!")
Пример #3
0
def check():
    m = load_model()
    fname = os.path.join(csv_folder, csv_name_drop_unk)
    df24 = pd.read_csv(fname, comment='#')
    # df24['norm_kepid'] = df24['kepid'].apply(norm_kepid)
    #
    # df24['int_label'] = df24['av_training_set'].apply(
    #     lambda x: 1 if x == 'PC' else 0)
    #
    # df24.sort_values(by=['int_label', 'norm_kepid', 'tce_plnt_num'],
    #                  ascending=[False, True, True],
    #                  inplace=True, kind='mergesort')
    # count_kepid = -1

    kepids = set(df24['kepid'].values)
    prev_kepid = None

    count, total = 1, len(df24)
    diff_count = 0
    processed = 0
    with open('diff_kepid.txt', 'w') as f:
        with open('unk_kepid.txt', 'w') as f_unk:
            for kepid in kepids:
                # if prev_kepid != kepid:
                #     count_kepid += 1
                #     prev_kepid = kepid

                res = test_kepid(m, kepid, dr24=True)

                sub_df = df24[df24['kepid'] == int(kepid)]
                for plnt, prob in res.items():
                    cls = sub_df[sub_df['tce_plnt_num'] == int(
                        plnt)]['av_training_set'].values[0]

                    processed += 1
                    if (cls == 'PC' and prob < 0.5) \
                            or (cls != 'PC' and prob > 0.5):
                        # predict wrongly
                        diff_count += 1
                        print(f'diff rate: {diff_count / processed:.3f}')
                        print(f'{kepid}-{plnt} prob: {prob}',
                              file=f)
                        continue

                print(f'{count}/{total}')
                count += 1
Пример #4
0
# init flask app instance
app = Flask(__name__)


@app.route("/")
def index():
    print(model.summary())
    return render_template("index.html")


@app.route('/predict_message', methods=["POST"])
def predict_message():
    if request.method == "POST":
        message = request.form["message"]
        try:
            prediction = predict(message, model, pipeline)
            return render_template("index.html", prediction=prediction[0][0])
        except IndexError as e:
            logging.critical(e)
    return render_template("index.html")


if __name__ == '__main__':
    model = load_model(
        "./data/models_data/model_conv_drop_false_1_5_new_data.json",
        "./data/models_data/model_weights_conv_drop_false_1_5_new_data.h5")
    pipeline = create_pipeline(
        key_word_path="data/pickled/key_word_map_new_data_1.pkl")
    app.run(debug=True, port=4000, host="0.0.0.0")
Пример #5
0
def main(opt):
    train_data, valid_data = get_train_valid_split_data_names(opt.img_folder, opt.ano_folder, valid_size=1/8)

    # データの読み込み
    print("load data")
    train_dataset = Phase1Dataset(train_data, load_size=(640, 640), augment=True, limit=opt.limit)
    print("train data length : %d" % (len(train_dataset)))
    valid_dataset = Phase1Dataset(valid_data, load_size=(640, 640), augment=False, limit=opt.limit)
    print("valid data length : %d" % (len(valid_dataset)))
    # DataLoaderの作成
    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=opt.batch_size,
        shuffle=True,
        num_workers=opt.num_workers,
        pin_memory=True,
        drop_last=True
    )

    valid_loader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=1,
        shuffle=False,
        num_workers=opt.num_workers,
        pin_memory=True,
        drop_last=True
    )

    # GPUの設定(PyTorchでは明示的に指定する必要がある)
    device = torch.device('cuda' if opt.gpus > 0 else 'cpu')

    # モデルの作成
    heads = {'hm': 1}
    model = get_pose_net(18, heads, 256).to(device)
    if opt.load_model != '':
        model, optimizer, start_epoch = load_model(
            model, opt.load_model, optimizer)

    # 最適化手法を定義
    if opt.optimizer == "SGD":
        optimizer = torch.optim.SGD(model.parameters(), lr=opt.lr)#, momentum=m, dampening=d, weight_decay=w, nesterov=n)
    elif opt.optimizer == "Adam":
        optimizer = torch.optim.Adam(model.parameters(), opt.lr)
    elif opt.optimizer == "RAdam":
        optimizer = optim.RAdam(model.parameters(), lr=opt.lr)
    
    # 損失関数を定義
    criterion = HMLoss()
    # 学習率のスケジューリングを定義
    scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=1, eta_min=0.00001)

    start_epoch = 0
    best_validation_loss = 1e10
    # 保存用フォルダの作成
    os.makedirs(os.path.join(opt.save_dir, opt.task, 'visualized'), exist_ok=True)

    # 学習 TODO エポック終了時点ごとにテスト用データで評価とモデル保存
    for epoch in range(start_epoch + 1, opt.num_epochs + 1):
        print("learning rate : %f" % scheduler.get_last_lr()[0])
        train(train_loader, model, optimizer, criterion, device, opt.num_epochs, epoch)
        if opt.optimizer == "SGD":
            scheduler.step()

        # 最新モデルの保存
        save_model(os.path.join(opt.save_dir, opt.task, 'model_last.pth'),
                   epoch, model, optimizer, scheduler)

        # テスト用データで評価
        validation_loss, accumulate_datas = valid(valid_loader, model, criterion, device)
        # ベストスコア更新でモデルの保存
        if validation_loss < best_validation_loss:
            best_validation_loss = validation_loss
            save_model(os.path.join(opt.save_dir, opt.task, 'model_best.pth'),
                       epoch, model, optimizer, scheduler)
            print("saved best model")
            visualization(os.path.join(opt.save_dir, opt.task, 'visualized'),
                        accumulate_datas)
Пример #6
0
def compare(threashhold=0.5):
    global _same, _total, _all_diff, _all_fp, _all_fn, _wrong_local_view_kepids

    fname = path.join(path.dirname(__file__), 'robo.csv')
    df = pd.read_csv(fname)

    kepids_and_plnt = df[['kepid', 'tce_plnt_num', 'pred_class']]

    m = load_model()

    seen = {}
    _same = 0
    _total = 0
    fp, fn = 0, 0
    count = 1

    # kepid_count = -1
    # prev_kepid = None

    _wrong_local_view_kepids = []
    _all_diff = []
    _all_fp = []
    _all_fn = []

    # signal.signal(signal.SIGINT, sig_handler)

    for (kepid, plnt_num, pred_class) in __read_df(kepids_and_plnt):
        # if prev_kepid != kepid:
        #     kepid_count += 1
        #     prev_kepid = kepid
        try:
            if kepid not in seen:
                res = test_kepid(m, kepid)
                seen[kepid] = res

            prob_of_pc = seen[kepid][plnt_num]
            class_of_pc = '1' if float(prob_of_pc) > threashhold else '0'

            if str(pred_class) == class_of_pc:
                _same += 1
            else:
                print(f"diff: {kepid}-{plnt_num}")
                _all_diff.append(f'{kepid}-{plnt_num}')
                if str(pred_class) == '1':
                    fn += 1
                    _all_fn.append(f'{kepid}-{plnt_num} ({prob_of_pc})')

                if str(pred_class) == '0':
                    fp += 1
                    _all_fp.append(f'{kepid}-{plnt_num} ({prob_of_pc})')

            _total += 1
            print(
                f"{count}/{len(kepids_and_plnt)},  precision: {_same / _total * 100:.3f}%"
            )
            count += 1
        except Exception as e:
            print(e)
            _wrong_local_view_kepids.append(kepid)

    _write_output()
Пример #7
0
def test(opt):
    # set device to cpu/gpu
    if opt.use_gpu:
        device = torch.device("cuda", opt.gpu_id)
    else:
        device = torch.device("cpu")

    transform_test = transforms.Compose([
        transforms.ToTensor(),
    ])

    # get CIFAR10/CIFAR100 test set
    if opt.dataset == "CIFAR10":
        test_set = CIFAR10(root="./data",
                           train=False,
                           download=True,
                           transform=transform_test)
    else:
        test_set = CIFAR100(root="./data",
                            train=False,
                            download=True,
                            transform=transform_test)
    num_classes = np.unique(test_set.targets).shape[0]

    # get test dataloader
    test_loader = DataLoader(test_set,
                             batch_size=opt.batch_size,
                             num_workers=opt.num_workers,
                             shuffle=False)

    print(
        "Dataset -- {}, Metric -- {}, Train Mode -- {}, Test Mode -- {}, Blackbox -- {}, Backbone -- {}"
        .format(opt.dataset, opt.metric, opt.train_mode, opt.test_mode,
                opt.test_bb, opt.backbone))
    print("Test iteration batch size: {}".format(opt.batch_size))
    print("Test iterations per epoch: {}".format(len(test_loader)))

    model = load_model(opt.dataset, opt.metric, opt.train_mode, opt.backbone,
                       opt.s, opt.m)
    model.to(device)
    if opt.use_gpu:
        model = DataParallel(model).to(device)

    # load balck box model for black box attacks
    if opt.test_bb:
        # Test Black box attacks for different metrics
        if opt.bb_metric != "softmax":
            attack_model = load_model_underscore(
                opt.dataset, opt.bb_metric, "clean", opt.backbone, opt.s,
                opt.m)  #I trained other bb models in at mode for some reason
            model.to(device)
            if opt.use_gpu:
                model = DataParallel(model).to(device)
        else:
            model_bb = load_model(opt.dataset, "bb", "", opt.backbone, opt.s,
                                  opt.m)
            model_bb.to(device)
            if opt.use_gpu:
                model_bb = DataParallel(model_bb).to(device)
            attack_model = model_bb
    else:
        attack_model = model

    # get prediction results for model
    y_true, y_pred = [], []
    for ii, data in enumerate(test_loader):
        # load data batch to device
        images, labels = data
        images = images.to(device)
        labels = labels.to(device).long()
        predictions = labels.cpu().numpy()

        # random restarts for pgd attack
        for restart_cnt in range(opt.test_restarts):
            #print("Batch {}/{} -- Restart {}/{}\t\t\t\t".format(ii+1,len(test_loader), restart_cnt+1, opt.test_restarts))

            # perform adversarial attack update to images
            if opt.test_mode == "fgsm":
                adv_images = fgsm(attack_model, images, labels, 8. / 255)
            elif opt.test_mode == "bim":
                adv_images = bim(attack_model, images, labels, 8. / 255,
                                 2. / 255, 7)
            elif opt.test_mode == "pgd_7":
                adv_images = pgd(attack_model, images, labels, 8. / 255,
                                 2. / 255, 7)
            elif opt.test_mode == "pgd_20":
                adv_images = pgd(attack_model, images, labels, 8. / 255,
                                 2. / 255, 20)
            elif opt.test_mode == "mim":
                adv_images = mim(attack_model, images, labels, 8. / 255,
                                 2. / 255, 0.9, 40)
            else:
                adv_images = images

            # get feature embedding from resnet and prediction
            _, predictions_i = model(adv_images, labels)

            # accumulate test results
            predictions_i = torch.argmax(predictions_i, 1).cpu().numpy()
            labels_i = labels.cpu().numpy()
            predictions[np.where(
                predictions_i != labels_i)] = predictions_i[np.where(
                    predictions_i != labels_i)]

        y_true.append(labels.cpu().numpy())
        y_pred.append(predictions)

    y_true, y_pred = np.concatenate(y_true), np.concatenate(y_pred)
    print(classification_report(y_true, y_pred))
    print("Accuracy: {}".format(accuracy_score(y_true, y_pred)))
    return y_true, y_pred
Пример #8
0
 def __init__(self):
     loaded = load_best_model_exp(FLAGS.model)
     self.__dict__.update(loaded)
     self.best_model = load_model(self.best_model_dir, self.best_model_params)
     self.data_loader = create_data_loader()
     self.data_loader.load_data()
Пример #9
0
def main():

    print("load data...")
    data = load_data()

    x_train_seq, x_train_bi, x_train_tri, x_train_acc, x_train_acc20, x_train_ss, x_train_ss8, x_train_bio, y_train = \
        data['train']['seq'], data['train']['bigram'], data['train']['trigram'], \
        data['train']['acc'], data['train']['acc20'], data['train']['ss'], data['train']['ss8'], \
        data['train']['src_bio'], data['train']['label']

    x_dev_seq, x_dev_bi, x_dev_tri, x_dev_acc, x_dev_acc20, x_dev_ss, x_dev_ss8, x_dev_bio, y_dev = \
        data['dev']['seq'], data['dev']['bigram'], data['dev']['trigram'], \
        data['dev']['acc'], data['dev']['acc20'], data['dev']['ss'], data['dev']['ss8'], \
        data['dev']['src_bio'], data['dev']['label']

    x_seq_full = np.concatenate((x_train_seq, x_dev_seq), axis=0)
    x_bi_full = np.concatenate((x_train_bi, x_dev_bi), axis=0)
    x_tri_full = np.concatenate((x_train_tri, x_dev_tri), axis=0)
    x_acc_full = np.concatenate((x_train_acc, x_dev_acc), axis=0)
    x_acc20_full = np.concatenate((x_train_acc20, x_dev_acc20), axis=0)
    x_ss_full = np.concatenate((x_train_ss, x_dev_ss), axis=0)
    x_ss8_full = np.concatenate((x_train_ss8, x_dev_ss8), axis=0)
    x_bio_full = np.concatenate((x_train_bio, x_dev_bio), axis=0)
    y_full = np.concatenate((y_train, y_dev), axis=0)

    print("make data...")
    x_full, y_oh_full = make_data(x_seq_full, x_bi_full, x_tri_full,
                                  x_acc_full, x_acc20_full, x_ss_full,
                                  x_ss8_full, x_bio_full, y_full)

    kf = KFold(n_splits=10, shuffle=True, random_state=2021)
    count = 1
    acc_list = []
    mcc_list = []
    for train_index, test_index in kf.split(y_oh_full):
        print('Starting CV Iteration: ', str(count))
        global model_name
        model_name = "cv_fold_" + str(count)
        print(model_name)

        x_train, y_train, x_test, y_test = make_fold(x_full, y_oh_full,
                                                     train_index, test_index)

        model = Models.EPSOL().get_model()
        model.compile(loss='binary_crossentropy',
                      optimizer=utils.get_adam_optim(),
                      metrics=['accuracy'])
        # print(model.summary())

        model.fit(x_train,
                  y_train,
                  batch_size=64,
                  epochs=10,
                  validation_split=0.1,
                  callbacks=get_callbacks())

        filepath = './result/model/' + model_name + '.hdf5'

        best_model = utils.load_model(filepath)

        [pred_test, pred_prob_test, acc, mcc,
         label] = get_classification_prediction(best_model, x_test, y_test)
        print("-------------------------------------------------------")
        print("ACC of fold-{} cross-validation: {}".format(str(count), acc))
        print("MCC of fold-{} cross-validation: {}".format(str(count), mcc))
        acc_list.append(acc)
        mcc_list.append(mcc)
        save_classification_prediction(pred_test, pred_prob_test, label)
        print("save result ok!")
        count = count + 1

    mean_acc = (1.0 * sum(acc_list)) / len(acc_list)
    mean_mcc = (1.0 * sum(mcc_list)) / len(mcc_list)
    print("-------------------------------------------------------")
    print("Mean ACC of 10 fold cross-validation: {}".format(mean_acc))
    print("Mean MCC of 10 fold cross-validation: {}".format(mean_mcc))
Пример #10
0
 def load_best_model(self):
     loaded = load_best_model_exp(self.train_eval_model.model_name)
     self.__dict__.update(loaded)
     self.best_model = load_model(self.best_model_dir, self.best_model_params)
     print("Loaded experiment with best model: {model} for data set: {data_set}".format(model=FLAGS.model,
                                                                                        data_set=FLAGS.data_set))
Пример #11
0
    def run(self):
        futures = []
        if FLAGS.plot:
            progress = FloatProgress(min=0, max=1)
            display(progress)
        else:
            printProgressBar(0, self.num_samples, prefix='Progress experiment {model}/{data_set}:'.
                             format(model=FLAGS.model, data_set=FLAGS.data_set), suffix='Complete', length=50)

        done = 0.0
        with (SameProcessExecutor() if self.num_workers <= 0 else concurrent.futures.ProcessPoolExecutor(
                self.num_workers)) as executor:
            for i in range(self.num_samples):
                inserted = False
                while not inserted:
                    if len(futures) < self.num_workers or self.num_workers <= 0:
                        x = self.optimizer.ask()  # x is a list of n_points points
                        objective_fun = self.train_eval_model.create_train_eval(i)
                        args_named = self.to_named_params(x)
                        futures.append(
                            WorkItem(i, x, args_named, executor.submit(objective_fun, args=None, **args_named)))
                        inserted = True

                    for wi in list(futures):
                        try:
                            model_dir, train_eval, validation_eval, test_eval = wi.future.result(0)
                            self.train_eval_task_finished(futures, wi, model_dir, train_eval, validation_eval,
                                                          test_eval)
                            done += 1
                            if FLAGS.plot:
                                progress.value = done / self.num_samples
                            else:
                                printProgressBar(done, self.num_samples, prefix='Progress experiment {model}/{data_set}:'.
                             format(model=FLAGS.model, data_set=FLAGS.data_set), suffix='Complete', length=50)
                        except concurrent.futures.TimeoutError:
                            pass

                    if len(futures) != 0 and len(futures) == self.num_workers:
                        time.sleep(1)

        for wi in list(futures):
            model_dir, train_eval, validation_eval, test_eval = wi.future.result()
            self.train_eval_task_finished(futures, wi, model_dir, train_eval, validation_eval, test_eval)
            done += 1
            if FLAGS.plot:
                progress.value = done / self.num_samples
            else:
                printProgressBar(done, self.num_samples, prefix='Progress experiment {model}/{data_set}:'.
                             format(model=FLAGS.model, data_set=FLAGS.data_set), suffix='Complete', length=50)

        self.best_model = load_model(self.best_model_dir, self.best_model_params)

        predict_train, predict_valid, predict_test = invoke_in_process_pool(self.num_workers,
                                                                            Callable(predict_estimator, self.best_model,
                                                                                     self.train_eval_model.data_loader.train_x,
                                                                                     self.train_eval_model.data_loader.train_y),
                                                                            Callable(predict_estimator, self.best_model,
                                                                                     self.train_eval_model.data_loader.validation_x,
                                                                                     self.train_eval_model.data_loader.validation_y),
                                                                            Callable(predict_estimator, self.best_model,
                                                                                     self.train_eval_model.data_loader.test_x,
                                                                                     self.train_eval_model.data_loader.test_y)
                                                                            )

        self.best_model_train_ll = predict_train["log_likelihood"]
        self.best_model_valid_ll = predict_valid["log_likelihood"]
        self.best_model_test_ll = predict_test["log_likelihood"]

        self.save()