def test_dataset():
    qm9 = QM9Dataset('data/adjacency_matrix_train.pkl')

    np.random.seed(0)
    sample = qm9[0]

    assert sample.length == 19
    assert sample.targets == np.array(['N'])
    assert np.array_equal(sample.target_mask, np.array([0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]))
    assert sample.adj.max()==1

    dl = DataLoader(qm9, batch_size=10)
    a = next(iter(dl))

    qm9 = QM9Dataset('data/adjacency_matrix_train.pkl', bond_order=True)

    np.random.seed(0)
    sample = qm9[0]

    assert sample.length == 19
    assert sample.targets == np.array(['N'])
    assert np.array_equal(sample.target_mask, np.array([0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]))
    assert sample.adj.max()==2
    dl = DataLoader(qm9, batch_size=10)
    a = next(iter(dl))
    def preprocess(self, query):

        ## input checks
        if isinstance(query, dict):
            query = pd.DataFrame(query)
        elif isinstance(query, pd.DataFrame):
            pass
        else:
            raise Exception(f"ERROR - FAIL:(model_evaluation) - invalid input. {type(query)} was given")

        X = query.drop(['y'], axis=1)
        y = query[['y']]
        query_train = DataLoader().feature_pipeline(self.numerical, self.categorical).fit(X).transform(X) # get from model trainer log
   
        for n_samples in range(10, len(X), 20):

            subset_indices = X.sample(n=n_samples, replace=True)
            subset_query = DataLoader().feature_pipeline(self.numerical, self.categorical).fit(subset_indices).transform(subset_indices)
            if subset_query.shape[1] != query_train.shape[1]:
                continue
            else:
                    break

        print(f'n_ : {n_samples}')
        print(f'y_ : {subset_query.shape[1]}')

        return subset_query, subset_indices, y
def main_experiment(train_data, valid_data):
    """
    Question 8:
    """
    trainloader = DataLoader(train_data, batch_size=32)
    devloader = DataLoader(valid_data, batch_size=len(valid_data))

    mlp = MLPClassifier(constants.Circles.INPUT_DIM, constants.Circles.N_CLASSES, 10, 0.05, 50)
    mlp.train(trainloader, devloader, log=os.path.join(constants.Circles.RESULTS_DIR, 'circles_log.txt'))
Exemplo n.º 4
0
    def __init__(self, args):
        self.args = args
        train = DataLoader(self.args.trainpath)
        dev = DataLoader(self.args.devpath)

        self.train_words, self.train_poss, self.train_chunks, self.train_labels = train.get_all_train_tokens(
        )
        self.train_max_sentence_len, self.train_max_word_len = train.get_required_max_len(
        )
        self.dev_words, self.dev_poss, self.dev_chunks, self.dev_labels = dev.get_all_train_tokens(
        )
        self.dev_max_sentence_len, self.dev_max_word_len = dev.get_required_max_len(
        )

        vocabulary = Vocabulary(self.train_words)
        self.vocab = vocabulary.get_word_vocab()
        self.char_vocab = vocabulary.get_char_vocab()

        self.train_vect = Vectorizer(self.train_max_sentence_len,
                                     self.train_max_word_len, self.vocab,
                                     self.char_vocab, self.train_words)
        self.dev_vect = Vectorizer(self.train_max_sentence_len,
                                   self.train_max_word_len, self.vocab,
                                   self.char_vocab, self.dev_words)

        self.poss_vect = LabelEncoderModel(self.train_poss,
                                           self.train_max_sentence_len)
        self.chunks_vect = LabelEncoderModel(self.train_chunks,
                                             self.train_max_sentence_len)
        self.labels_vect = LabelEncoderModel(self.train_labels,
                                             self.train_max_sentence_len)

        #st wrong here
        self.pos_emb_weights = self.poss_vect.get_emb_weights()
        self.chunk_emb_weights = self.chunks_vect.get_emb_weights()
        self.word_emb_weights, self.word_emb_dimensions = PretrainedEmbedder(
            self.vocab, self.args.pretrained_path).pretrained_embedder()
        self.model = ModelTraining(
            self.args.dropout,
            self.args.lr,
            len(set(sum(self.train_labels, []))),
            len(self.vocab),
            len(self.char_vocab),
            self.train_max_word_len,
            len(set(sum(self.train_poss, []))),
            len(set(sum(self.train_chunks, []))),
            word_emb_dimensions=self.word_emb_dimensions,
            word_emb_weights=self.word_emb_weights,
            pos_emb_weights=self.pos_emb_weights,
            chunk_emb_weights=self.chunk_emb_weights).model_build()
Exemplo n.º 5
0
def train(epochs=10, batch_size=50, lr=1e-3):
    dataloader = DataLoader('/data/strokes.npy')
    Model = model.Model()
    optimizer = torch.optim.RMSprop(Model.params(), lr=lr)

    for epoch in range(epochs):
        x, y = dataloader.generate_batch()
        x = torch.from_numpy(np.array(x))
        y = torch.from_numpy(np.array(y))

        y1 = y[:, :, 0]
        y2 = y[:, :, 1]
        y2 = y[:, :, 2]

        hidden = autograd.Variable(torch.randn(1, x.size(0), 121))

        e, pi, mu1, mu2, sigma1, sigma2, corr, hidden = Model(x, hidden)

        loss_val = loss(e, pi, mu1, mu2, sigma1, sigma2, corr, y1, y2, y3)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    torch.save(Model.save_dict(), 'unconditional.pt')
    def test_data_validation(self):

        ## schema checks
        try:
            validate = DataLoader().validate_schema(self.test_dataset)
            if validate is None:
                LOG.info("PASS: Test data validation passed.")
        except:
            raise Exception(
                "ERROR - FAIL:(model_evaluation) - invalid input schema.")

        ## input checks
        if isinstance(self.test_dataset, dict):
            self.test_dataset = pd.DataFrame(self.test_dataset)
        elif isinstance(self.test_dataset, pd.DataFrame):
            pass
        else:
            raise Exception(
                f"ERROR - FAIL:(model_evaluation) - invalid input. {self.test_dataset} was given"
            )

        ## features check
        test_features = sorted(self.test_dataset.columns.drop(['y']).tolist())
        data_features = sorted(self.dataset.columns.drop(['y']).tolist())
        if test_features != data_features:
            print(f"test features: {','.join(test_features)}")
            raise Exception(
                "ERROR - FAIL:(model_evaluation) - invalid features present")
def main(argv):
    log.info('Beginning prediction')
    funcs = pd.read_pickle(
        os.path.join(FLAGS.resources,
                     '{}.pkl'.format(FLAGS.function)))['functions'].values
    funcs = GODAG.initialize_idmap(funcs, FLAGS.function)

    log.info('GO DAG initialized. Updated function list-{}'.format(len(funcs)))
    FeatureExtractor.load(FLAGS.resources)
    log.info('Loaded amino acid and ngram mapping data')

    data = DataLoader(filename=FLAGS.inputfile)
    if FLAGS.evaluate:
        test_dataiter = DataIterator(batchsize=FLAGS.batchsize,
                                     size=FLAGS.testsize,
                                     dataloader=data,
                                     functype=FLAGS.function,
                                     featuretype='ngrams')
        predict_evaluate(test_dataiter, 0.2, FLAGS.modelsdir)
    else:
        test_dataiter = DataIterator(batchsize=FLAGS.batchsize,
                                     size=FLAGS.testsize,
                                     dataloader=data,
                                     functype=FLAGS.function,
                                     featuretype='ngrams',
                                     test=True)
        predict(test_dataiter, 0.2, FLAGS.modelsdir, funcs)
Exemplo n.º 8
0
def run_nn_dmi(args):
    set_global_seeds(args['seed'])
    dataset = DataLoader(args['dataset'], args)
    X_train, X_test, X_val, y_train, y_test, y_val = dataset.prepare_train_test_val(
        args)
    mlp = MLP(
        feature_dim=X_train.shape[-1],
        hidsizes=args['hidsize'],
        dropout=args['dropout'],
        outputs=2,
    )
    classifier = DMIClassifier(
        model=mlp,
        learning_rate=args['lr'],
    )
    results = classifier.fit(
        X_train,
        y_train,
        X_test,
        y_test,
        batchsize=args['batchsize'],
        episodes=args['episodes'],
        logger=logger if args['seeds'] == 1 else None,
    )
    return results
Exemplo n.º 9
0
def run_pam(args):
    set_global_seeds(args['seed'])
    dataset = DataLoader(args['dataset'])
    X_train, X_test, X_val, y_train, y_test, y_val = dataset.prepare_train_test_val(
        args)
    model = Perceptron(feature_dim=X_train.shape[-1], margin=args['margin'])
    model.fit(X_train, y_train)
    return model.score(X_test, y_test)
Exemplo n.º 10
0
def run_c_svm(args):
    set_global_seeds(args['seed'])
    dataset = DataLoader(args['dataset'], args)
    X_train, X_test, X_val, y_train, y_test, y_val = dataset.prepare_train_test_val(
        args)
    model = SVC(gamma='auto', class_weight={0: 1., 1: args['C1']})
    model.fit(X_train, y_train)
    return model.score(X_test, y_test)
def decision_boundaries(train_data, valid_data):
    """
    Question 5: Train the neural network using gradient descent on the two circles dataset.
    Plot the decision regions for several different values of the hyperparameters
    (weight decay, number of hidden units, early stopping) so as to illustrate their
    effect on the capacity of the model.
    """

    # raw data is only used to plot the decision boundary
    raw_data = np.loadtxt(open(constants.Circles.DATA_PATH, 'r'))
    X = raw_data[:, :2]
    y = raw_data[:, -1]

    # hyperparameters
    HIDDEN_DIM_SET = [8, 14]
    NUM_EPOCH_SET = [30]
    LEARNING_RATE_SET = [0.05]
    L1_WEIGH_DECAY = [0, 0.005]
    L2_WEIGH_DECAY = [0, 0.005]

    trainloader = DataLoader(train_data, batch_size=32)
    devloader = DataLoader(valid_data, batch_size=len(valid_data))

    i = 0
    for h in HIDDEN_DIM_SET:
        for lr in LEARNING_RATE_SET:
            for l1 in L1_WEIGH_DECAY:
                for l2 in L2_WEIGH_DECAY:
                    for n_epoch in NUM_EPOCH_SET:

                        print('\nhidden_dim: {}, lr: {}, l1: {}, l2: {}'.format(h, lr, l1, l2))
                        mlp = MLPClassifier(constants.Circles.INPUT_DIM, constants.Circles.N_CLASSES, h, lr, n_epoch, l1, l2, l1, l2)
                        mlp.train(trainloader, devloader)

                        figure_name = 'decision_boundaries_{}.png'.format(i)

                        visualize.plot_decision(
                            X, y,
                            path=os.path.join(constants.Circles.FIGURES_DIR, figure_name),
                            model=mlp,
                            param=[h, lr, n_epoch, l1, l2, l1, l2]
                        )
                        i += 1
Exemplo n.º 12
0
    def load_data(self, subset=False):
        """Loads and Preprocess data """

        LOG.info(f'loading {self.config.data.path} dataset .....')

        self.dataset = DataLoader().load_data(self.config.data)

        LOG.info("..... validating all data")

        try:
            validate = DataLoader().validate_schema(self.dataset)
            if validate is None:
                LOG.info("PASS: data validation passed.")
        except:
            LOG.critical("FAIL: data validation failed.")
            raise Exception(
                "CRITICAL - FAIL:(dataloader) - invalid data schema")
            # sys.exit(100) # exit if using log and no raise exception

        # self.X, self.y = DataLoader().split_feature_target(self.dataset, self.target)
        # self.X_train, self.X_test, self.y_train ,self.y_test = DataLoader().preprocess_data(self.X, self.y, self.test_size, self.random_state)

        self.train_dataset, self.test_dataset = DataLoader().preprocess_data(
            self.dataset, self.test_size, self.random_state)

        train_shape = DataLoader().feature_pipeline(self.numerical, self.categorical) \
            .fit(self.train_dataset).transform(self.train_dataset)

        # subset the data to enable faster unittests
        if subset:
            subset_query = np.empty(shape=(1, 1), dtype=object)
            while subset_query.shape[1] != train_shape.shape[1]:
                subset_indices = self.train_dataset.sample(
                    frac=self.subset_n_frac, replace=True)
                subset_query = DataLoader().feature_pipeline(
                    self.numerical, self.categorical).fit(
                        subset_indices).transform(subset_indices)
                self.train_dataset = subset_indices

        self.X_train= DataLoader().feature_pipeline(self.numerical, self.categorical) \
            .fit(self.train_dataset).transform(self.train_dataset)
        self.y_train = DataLoader().target_pipeline(self.target).fit(self.train_dataset[self.target]) \
            .transform(self.train_dataset[self.target])

        self.X_test= DataLoader().feature_pipeline(self.numerical, self.categorical).fit(self.test_dataset) \
            .transform(self.test_dataset)
        self.y_test = DataLoader().target_pipeline(self.target).fit(self.test_dataset[self.target]) \
            .transform(self.test_dataset[self.target])
Exemplo n.º 13
0
 def data_loader(self,
                 batch_size=10,
                 num_workers=4,
                 shuffle=False,
                 pin_memory=False):
     return DataLoader(self,
                       batch_size=batch_size,
                       shuffle=shuffle,
                       num_workers=num_workers,
                       pin_memory=pin_memory)
Exemplo n.º 14
0
def find_best_c1(args):
    set_global_seeds(args['seed'])
    dataset = DataLoader(args['dataset'], args)
    X_train, X_test, X_val, y_train, y_test, y_val = dataset.prepare_train_test_val(
        args)
    results = []
    for c1 in CLASS_WEIGHTS:
        model = SVC(gamma='auto', class_weight={0: 1., 1: c1})
        model.fit(X_train, y_train)
        results.append(model.score(X_val, y_val))
    return results
Exemplo n.º 15
0
def test_transformer_forward_cuda():
    qm9 = QM9Dataset('data/adjacency_matrix_train.pkl')

    np.random.seed(0)
    torch.manual_seed(0)

    dl = DataLoader(qm9, batch_size=1)
    sample = next(iter(dl))
    transformer = TransformerModel().cuda()
    sample.cuda()
    out = transformer(sample)

    assert torch.equal(out['prediction'][sample.target_mask], torch.tensor([0]).cuda())
Exemplo n.º 16
0
def find_best_margin(args):
    """ return `best_margin / 0.1` """
    set_global_seeds(args['seed'])
    dataset = DataLoader(args['dataset'])
    X_train, X_test, X_val, y_train, y_test, y_val = dataset.prepare_train_test_val(
        args)

    results = []
    for margin in MARGINS:
        model = Perceptron(feature_dim=X_train.shape[-1], margin=margin)
        model.fit(X_train, y_train)
        results.append(model.score(X_val, y_val))
    return results
Exemplo n.º 17
0
def deploy(path):
    assert os.path.exists(path), f'{path} not found : ('
    dataset = 'YOUR_DATASET_NAME'

    img_size = 256
    test_transform = transforms.Compose([
        transforms.Resize((img_size, img_size)),
        transforms.ToTensor(),
        transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
    ])
    testA = ImageFolder(os.path.join('dataset', dataset, 'testA'), test_transform)
    with fluid.dygraph.guard(): 
        testA_loader = DataLoader(testA, batch_size=1, shuffle=False)
        real_A, _ = next(iter(testA_loader))
        in_np = real_A.numpy()

    # load model
    place = fluid.CPUPlace()
    exe = fluid.Executor(place)
    program, feed_vars, fetch_vars = fluid.io.load_inference_model(path, exe)

    # inference
    fetch, = exe.run(program, feed={feed_vars[0]: in_np}, fetch_list=fetch_vars)
    def img_postprocess(img):
        assert isinstance(img, np.ndarray), type(img)
        img = img * 0.5 + 0.5
        img = img.squeeze(0).transpose((1, 2, 0))
        # BGR to RGB
        img = img[:, :, ::-1]
        return img
    in_img = img_postprocess(in_np)
    out_img = img_postprocess(fetch)
    plt.subplot(121)
    plt.title('real A')
    plt.imshow(in_img)
    plt.subplot(122)
    plt.title('A to B')
    plt.imshow(out_img)
    plt.show()
Exemplo n.º 18
0
def test_transformer_forward_cpu():
    qm9 = QM9Dataset('data/adjacency_matrix_train.pkl', epsilon_greedy=0.5)

    np.random.seed(0)
    torch.manual_seed(0)

    dl = DataLoader(qm9, batch_size=2)
    sample = next(iter(dl))
    transformer = TransformerModel()
    out = transformer(sample)

    assert torch.equal(out['prediction'][sample.target_mask], torch.tensor([0, 4, 0, 4, 0, 0, 0]))

    criterion = CrossEntropyLoss()
    targets = sample.targets_num

    assert torch.equal(targets, torch.tensor([[2, 1, 3, 2, 1, 1], [1, 0, 0, 0, 0, 0]]))
    targets = targets[targets != 0]
    targets -= 1
    assert torch.equal(targets, torch.tensor([1, 0, 2, 1, 0, 0, 0]))
    loss = criterion(out['out'][sample.target_mask], targets)

    assert torch.equal(loss, cross_entropy(out['out'][sample.target_mask], targets, reduction='none').mean())
def finite_difference_check(dataset, batch_size):
    """
    Computes the gradients for a single example, and
    check that the gradient is correct using the nite
    difference method.

    Answers to questions 1, 2, and 4.
    """
    dataloader = DataLoader(dataset, batch_size)

    inputs, targets = next(dataloader)

    mlp = MLPClassifier(constants.Circles.INPUT_DIM, constants.Circles.N_CLASSES)
    gradHats, grads, param_names = mlp.finite_difference_check(inputs, targets)

    figure_name = 'finite_difference_check_batch_size_{}.png'.format(batch_size)

    visualize.plot_gradient(
        gradHats, grads,
        param_names,
        legend=['finite differences approx.', 'backpropagation'],
        path=os.path.join(constants.Circles.FIGURES_DIR, figure_name)
    )
Exemplo n.º 20
0
    def __init__(self, isInjector=True):
        self.isInjector = isInjector
        # Input shape
        cube_shape = config['cube_shape']
        self.img_rows = config['cube_shape'][1]
        self.img_cols = config['cube_shape'][2]
        self.img_depth = config['cube_shape'][0]
        self.channels = 1
        self.num_classes = 5
        self.img_shape = (self.img_rows, self.img_cols, self.img_depth,
                          self.channels)

        # Configure data loader
        if self.isInjector:
            self.dataset_path = config['unhealthy_samples']
            self.modelpath = config['modelpath_inject']
        else:
            self.dataset_path = config['healthy_samples']
            self.modelpath = config['modelpath_remove']

        self.dataloader = DataLoader(dataset_path=self.dataset_path,
                                     normdata_path=self.modelpath,
                                     img_res=(self.img_rows, self.img_cols,
                                              self.img_depth))

        # Calculate output shape of D (PatchGAN)
        patch = int(self.img_rows / 2**4)
        self.disc_patch = (patch, patch, patch, 1)

        # Number of filters in the first layer of G and D
        self.gf = 100
        self.df = 100

        optimizer = Adam(0.0002, 0.5)
        optimizer_G = Adam(0.000001, 0.5)

        # Build and compile the discriminator
        self.discriminator = self.build_discriminator()
        self.discriminator.summary()
        self.discriminator.compile(loss='mse',
                                   optimizer=optimizer_G,
                                   metrics=['accuracy'])

        # -------------------------
        # Construct Computational
        #   Graph of Generator
        # -------------------------

        # Build the generator
        self.generator = self.build_generator()
        self.generator.summary()

        # Input images and their conditioning images
        img_A = Input(shape=self.img_shape)
        img_B = Input(shape=self.img_shape)

        # By conditioning on B generate a fake version of A
        fake_A = self.generator([img_B])

        # For the combined model we will only train the generator
        self.discriminator.trainable = False

        # Discriminators determines validity of translated images / condition pairs
        valid = self.discriminator([fake_A, img_B])

        self.combined = Model(inputs=[img_A, img_B], outputs=[valid, fake_A])
        self.combined.compile(loss=['mse', 'mae'],
                              loss_weights=[1, 100],
                              optimizer=optimizer)
Exemplo n.º 21
0
def trainloop(args_dict, model, suff_name='', model_val=None, epoch_start=0):

    ## DataLoaders
    dataloader = DataLoader(args_dict)
    N_train, N_val, N_test = dataloader.get_dataset_size()
    train_gen = dataloader.generator('train', args_dict.bs)
    val_gen = dataloader.generator('val', args_dict.bs)

    if args_dict.es_metric == 'loss':

        model_name = os.path.join(
            args_dict.data_folder, 'models', args_dict.model_name + suff_name +
            '_weights.{epoch:02d}-{val_loss:.2f}.h5')

        ep = EarlyStopping(monitor='val_loss',
                           patience=args_dict.pat,
                           verbose=0,
                           mode='auto')

        mc = ModelCheckpoint(model_name,
                             monitor='val_loss',
                             verbose=0,
                             save_best_only=True,
                             save_weights_only=True,
                             mode='auto')

        tb = TensorBoard(log_dir='./logs_tb')

        # reset states after each batch (bcs stateful)
        rs = ResetStatesCallback()

        model.fit_generator(train_gen,
                            nb_epoch=args_dict.nepochs,
                            samples_per_epoch=N_train,
                            validation_data=val_gen,
                            nb_val_samples=N_val,
                            callbacks=[mc, ep, rs],
                            verbose=1,
                            nb_worker=args_dict.workers,
                            pickle_safe=False)

    else:  # models saved based on other metrics - manual train loop

        # validation generator in test mode to output image names
        val_gen_test = dataloader.generator('val',
                                            args_dict.bs,
                                            train_flag=False)

        # load vocab to convert captions to words and compute cider
        data = json.load(
            open(
                os.path.join(args_dict.data_folder, 'data',
                             args_dict.json_file), 'r'))
        vocab_src = data['ix_to_word']
        inv_vocab = {}
        for idx in vocab_src.keys():
            inv_vocab[int(idx)] = vocab_src[idx]
        vocab = {v: k for k, v in inv_vocab.items()}

        # init waiting param and best metric values
        wait = 0
        best_metric = -np.inf

        for e in range(args_dict.nepochs):
            print("Epoch %d/%d" %
                  (e + 1 + epoch_start, args_dict.nepochs + epoch_start))
            prog = Progbar(target=N_train)

            samples = 0
            for x, y, sw in train_gen:  # do one epoch
                loss = model.train_on_batch(x=x, y=y, sample_weight=sw)
                model.reset_states()
                samples += args_dict.bs
                if samples >= N_train:
                    break
                prog.update(current=samples, values=[('loss', loss)])

            # forward val images to get loss
            samples = 0
            val_losses = []
            for x, y, sw in val_gen:
                val_losses.append(model.test_on_batch(x, y, sw))
                model.reset_states()
                samples += args_dict.bs
                if samples > N_val:
                    break
            # forward val images to get captions and compute metric
            # this can either be done with true prev words or gen prev words:
            # args_dict.es_prev_words to 'gt' oget_modelr 'gen'
            if args_dict.es_prev_words == 'gt':
                results_file = gencaps(args_dict, cnn, lang_model,
                                       val_gen_test, inv_vocab, N_val)
            else:
                aux_model = os.path.join(args_dict.data_folder, 'tmp',
                                         args_dict.model_name + '_aux.h5')
                model.save_weights(aux_model, overwrite=True)
                model_val.load_weights(aux_model)
                results_file = gencaps(args_dict, model_val, val_gen_test,
                                       inv_vocab, N_val)

            # get merged ground truth file to eval caps

            ann_file = './utils/captions_merged.json'

            # score captions and return requested metric
            metric = get_metric(args_dict, results_file, ann_file)
            prog.update(current=N_train,
                        values=[('loss', loss),
                                ('val_loss', np.mean(val_losses)),
                                (args_dict.es_metric, metric)])

            # decide if we save checkpoint and/or stop training
            if metric > best_metric:
                best_metric = metric
                wait = 0
                model_name = os.path.join(
                    args_dict.data_folder, 'models',
                    args_dict.model_name + suff_name + '_weights_e' + str(e) +
                    '_' + args_dict.es_metric + "%0.2f" % metric + '.h5')
                model.save_weights(model_name)
            else:
                wait += 1

            if wait > args_dict.pat:
                break

    args_dict.mode = 'train'

    return model, model_name
Exemplo n.º 22
0
def main(argv):
    funcs = pd.read_pickle(os.path.join(FLAGS.resources, '{}.pkl'.format(FLAGS.function)))['functions'].values
    funcs = GODAG.initialize_idmap(funcs, FLAGS.function)

    log.info('GO DAG initialized. Updated function list-{}'.format(len(funcs)))
    FeatureExtractor.load(FLAGS.resources)
    log.info('Loaded amino acid and ngram mapping data')

    data = DataLoader(filename=FLAGS.inputfile)
    modelsavename = 'savedmodels_{}_{}'.format(__processor__, int(time.time()))
    if FLAGS.predict != '':
        modelsavename = FLAGS.predict
        bestthres = 0.1
        log.info('no training')
        valid_dataiter = DataIterator(batchsize=FLAGS.batchsize, size=FLAGS.validationsize,
                                      dataloader=data, functype=FLAGS.function, featuretype='onehot')

        train_iter = DataIterator(batchsize=FLAGS.batchsize, size=FLAGS.trainsize,
                                  seqlen=FLAGS.maxseqlen, dataloader=data,
                                  numfiles=np.floor((FLAGS.trainsize * FLAGS.batchsize) / 250000),
                                  functype=FLAGS.function, featuretype='onehot')
        next(valid_dataiter)
        next(train_iter)
    else:
        with tf.Session() as sess:
            valid_dataiter = DataIterator(batchsize=FLAGS.batchsize, size=FLAGS.validationsize,
                                          dataloader=data, functype=FLAGS.function, featuretype='onehot')

            train_iter = DataIterator(batchsize=FLAGS.batchsize, size=FLAGS.trainsize,
                                      seqlen=FLAGS.maxseqlen, dataloader=data,
                                      numfiles=np.floor((FLAGS.trainsize * FLAGS.batchsize) / 250000),
                                      functype=FLAGS.function, featuretype='onehot')

            encoder = CHARCNNEncoder(vocab_size=len(FeatureExtractor.aminoacidmap) + 1,
                                     inputsize=train_iter.expectedshape).build()
            log.info('built encoder')
            decoder = HierarchicalGODecoder(funcs, encoder.outputs, FLAGS.function).build(GODAG)
            log.info('built decoder')
            init = tf.global_variables_initializer()
            init.run(session=sess)
            chkpt = tf.train.Saver(max_to_keep=4)
            train_writer = tf.summary.FileWriter(FLAGS.outputdir + '/train',
                                              sess.graph)

            test_writer = tf.summary.FileWriter(FLAGS.outputdir + '/test')
            step = 0
            maxwait = 1
            wait = 0
            bestf1 = -1
            metagraphFlag = True
            log.info('starting epochs')
            for epoch in range(FLAGS.num_epochs):
                for x, y in train_iter:
                    if x.shape[0] != y.shape[0]:
                        raise Exception('invalid, x-{}, y-{}'.format(str(x.shape), str(y.shape)))

                    _, loss, summary = sess.run([decoder.train, decoder.loss, decoder.summary],
                                                 feed_dict={decoder.ys_: y, encoder.xs_: x,
                                                            decoder.threshold: [0.2]})
                    train_writer.add_summary(summary, step)
                    log.info('step-{}, loss-{}'.format(step, round(loss, 2)))
                    step += 1

                if True:
                    log.info('beginning validation')
                    prec, recall, f1 = validate(valid_dataiter, sess, encoder, decoder, test_writer)
                    thres = np.argmax(np.round(f1, 2))
                    log.info('epoch: {} \n precision: {}, recall: {}, f1: {}'.format(epoch,
                                                                                     np.round(prec, 2)[thres],
                                                                                     np.round(recall, 2)[thres],
                                                                                     np.round(f1, 2)[thres]))
                    log.info('precision mat {}'.format(str(np.round(prec, 2))))
                    log.info('recall mat {}'.format(str(np.round(recall, 2))))
                    log.info('f1 mat {}'.format(str(np.round(f1, 2))))

                    log.info('selected threshold is {}'.format(thres/10 + 0.1))
                    if f1[thres] > (bestf1 + 1e-3):
                        bestf1 = f1[thres]
                        bestthres = THRESHOLD_RANGE[thres]
                        wait = 0
                        chkpt.save(sess, os.path.join(FLAGS.outputdir, modelsavename,
                                                        'model_{}_{}'.format(FLAGS.function, step)),
                                    global_step=step, write_meta_graph=metagraphFlag)
                        metagraphFlag = False
                    else:
                        wait += 1
                        if wait > maxwait:
                            log.info('f1 didnt improve for last {} validation steps, so stopping'.format(maxwait))
                            break

                    step += 1

                train_iter.reset()

    log.info('testing model')
    test_dataiter = DataIterator(batchsize=FLAGS.batchsize, size=FLAGS.testsize,
                                 dataloader=data, functype=FLAGS.function, featuretype='onehot')
    prec, recall, f1 = predict_evaluate(test_dataiter, [bestthres], os.path.join(FLAGS.outputdir, modelsavename))
    log.info('test results')
    log.info('precision: {}, recall: {}, F1: {}'.format(round(prec, 2), round(recall, 2), round(f1, 2)))
    data.close()
Exemplo n.º 23
0
        path=constants.TRAIN_PATH,
        input_features=constants.INPUT_FEATURES,
        output_features=constants.OUTPUT_FEATURES,
        header=0,
        transform=lambda X: [x / 255 for x in X]
    )

    valid_data = Dataset(
        path=constants.VALID_PATH,
        input_features=constants.INPUT_FEATURES,
        output_features=constants.OUTPUT_FEATURES,
        header=0,
        transform=lambda X: [x / 255 for x in X]
    )

    trainloader = DataLoader(train_data, batch_size=constants.BATCH_SIZE)
    devloader = DataLoader(valid_data, batch_size=1000)

    mlp = MLPClassifier(
        input_size=constants.INPUT_DIM,
        hidden_size=constants.HIDDEN_DIM,
        output_size=constants.N_CLASSES,
        learning_rate=constants.LEARNING_RATE,
        num_epochs=constants.NUM_EPOCHS
    )

    loss_storage, acc_storage = mlp.train(
        trainloader,
        devloader,
        log=os.path.join(constants.RESULTS_DIR, 'mnist_log.txt')
    )
Exemplo n.º 24
0
from utils.bert import get_config, BertModel, BertForEmoji, set_learned_params
from torch import optim, nn
import torch
from utils.dataloader import DataLoader
from utils.train import train_model

train_dl, val_dl, TEXT, dataloaders_dict = DataLoader(max_length=256,
                                                      batch_size=32)
# モデル設定のJOSNファイルをオブジェクト変数として読み込む
config = get_config(file_path="./weights/bert_config.json")

# ベースのBERTモデルを生成
net_bert = BertModel(config)

# BERTモデルに学習済みパラメータセット
net_bert = set_learned_params(net_bert,
                              weights_path="./weights/pytorch_model.bin")

net = BertForEmoji(net_bert)

# 訓練モードに設定
net.train()

# 勾配計算を最後のBertLayerモジュールと追加した分類アダプターのみ実行

for name, param in net.named_parameters():
    param.requires_grad = False

for name, param in net.bert.encoder.layer[-1].named_parameters():
    param.requires_grad = True
def main(argv):
    goids = GODAG.initialize_idmap(None, None)

    labelembedding = load_labelembedding(os.path.join(FLAGS.resources, 'goEmbeddings.txt'), goids)
    assert(labelembedding.shape[0] == (len(goids))) , 'label embeddings and known go ids differ'

    ## Add a row of zeros to refer to NOGO or STOPGO
    labelembedding = np.vstack([np.zeros(labelembedding.shape[1]), labelembedding]).astype(np.float32)
    labelembeddingsize = labelembedding.shape[1]

    # shift all goids by 1, to allow STOPGO
    GODAG.idmap = {key: (val + 1) for key, val in GODAG.idmap.items()}
    log.info('min go index - {}'.format(min(list(GODAG.idmap.values()))))
    GODAG.idmap['STOPGO'] = 0
    GODAG.GOIDS.insert(0, 'STOPGO')
    log.info('first from main-{}, from goids-{},  from idmap-{}, by reversemap-{}'.format(goids[0], GODAG.GOIDS[1], GODAG.id2node(1), GODAG.get_id(goids[0])))

    FeatureExtractor.load(FLAGS.resources)
    log.info('Loaded amino acid and ngram mapping data')

    data = DataLoader(filename=FLAGS.inputfile)
    modelsavename = FLAGS.predict
    if FLAGS.predict == "":
        modelsavename = 'savedmodels_{}'.format(int(time.time()))
        with tf.Session() as sess:
            # sess = tf_debug.LocalCLIDebugWrapperSession(sess)
            valid_dataiter = DataIterator(batchsize=FLAGS.batchsize, size=FLAGS.validationsize,
                                          dataloader=data, functype=FLAGS.function, featuretype='onehot',
                                          onlyLeafNodes=True, numfuncs=FLAGS.maxnumfuncs)


            train_iter = DataIterator(batchsize=FLAGS.batchsize, size=FLAGS.trainsize,
                                      seqlen=FLAGS.maxseqlen, dataloader=data,
                                      numfiles=np.floor((FLAGS.trainsize * FLAGS.batchsize) / 250000),
                                      functype=FLAGS.function, featuretype='onehot', onlyLeafNodes=True, numfuncs=FLAGS.maxnumfuncs)

            #encoder = CNNEncoder(vocab_size=len(FeatureExtractor.ngrammap) + 1, inputsize=train_iter.expectedshape).build()

            encoder = MultiCharCNN(vocab_size=len(FeatureExtractor.aminoacidmap) + 1,
                                   inputsize=train_iter.expectedshape, with_dilation=False, charfilter=32,
                                   poolsize=80, poolstride=48).build()

            log.info('built encoder')
            decoder = GORNNDecoder(encoder.outputs, labelembedding, numfuncs=FLAGS.maxnumfuncs,
                                   trainlabelEmbedding=FLAGS.trainlabel, distancefunc=FLAGS.distancefunc, godag=GODAG).build()
            log.info('built decoder')

            init = tf.global_variables_initializer()
            init.run(session=sess)
            chkpt = tf.train.Saver(max_to_keep=4)
            train_writer = tf.summary.FileWriter(FLAGS.outputdir + '/train',
                                              sess.graph)

            test_writer = tf.summary.FileWriter(FLAGS.outputdir + '/test')
            step = 0
            maxwait = 2
            wait = 0
            bestf1 = 0
            bestthres = 0
            metagraphFlag = True
            log.info('starting epochs')
            log.info('params - trainsize-{}, validsie-{}, rootfunc-{}, batchsize-{}'.format(FLAGS.trainsize, FLAGS.validationsize,
                                                                                            FLAGS.function, FLAGS.batchsize))
            for epoch in range(FLAGS.num_epochs):
                for x, y in train_iter:
                    if x.shape[0] != y.shape[0]:
                        raise Exception('invalid, x-{}, y-{}'.format(str(x.shape), str(y.shape)))

                    negatives = get_negatives(y, 10)
                    _, loss, summary = sess.run([decoder.train, decoder.loss, decoder.summary],
                                                 feed_dict={decoder.ys_: y[:, :FLAGS.maxnumfuncs], encoder.xs_: x,
                                                    decoder.negsamples: negatives, decoder.istraining: [True]})
                    train_writer.add_summary(summary, step)
                    log.info('step-{}, loss-{}'.format(step, round(loss, 2)))
                    step += 1

                log.info('beginning validation')
                prec, recall, f1 = validate(valid_dataiter, sess, encoder, decoder, test_writer)
                log.info('epoch: {} \n precision: {}, recall: {}, f1: {}'.format(epoch,
                                                                                 np.round(prec, 2),
                                                                                 np.round(recall, 2),
                                                                                 np.round(f1, 2)))
                if np.round(f1,2) >= (bestf1):
                    bestf1 = np.round(f1,2)
                    wait = 0
                    log.info('saving meta graph')
                    #ipdb.set_trace()
                    chkpt.save(sess, os.path.join(FLAGS.outputdir, modelsavename,
                                                    'model_{}_{}'.format(FLAGS.function, step)),
                                global_step=step, write_meta_graph=metagraphFlag)
                    metagraphFlag = True
                else:
                    wait += 1
                    if wait > maxwait:
                        log.info('f1 didnt improve for last {} validation steps, so stopping'.format(maxwait))
                        break

                train_iter.reset()
                prec, recall, f1 = validate(train_iter, sess, encoder, decoder, None)
                log.info('training error,epoch-{}, precision: {}, recall: {}, f1: {}'.format(epoch,
                                                                                             np.round(prec, 2),
                                                                                             np.round(recall, 2),
                                                                                             np.round(f1, 2)))


                train_iter.reset()

    log.info('testing model')
    test_dataiter = DataIterator(batchsize=FLAGS.batchsize, size=FLAGS.testsize,
                                 dataloader=data, functype=FLAGS.function, featuretype='onehot',
                                 onlyLeafNodes=True, numfuncs=FLAGS.maxnumfuncs)
    prec, recall, f1 = predict_evaluate(test_dataiter, os.path.join(FLAGS.outputdir, modelsavename))
    log.info('test results')
    log.info('precision: {}, recall: {}, F1: {}'.format(round(prec, 2), round(recall, 2), round(f1, 2)))
    data.close()
Exemplo n.º 26
0
def main(argv):
    funcs = pd.read_pickle(
        os.path.join(FLAGS.resources,
                     '{}.pkl'.format(FLAGS.function)))['functions'].values
    funcs = GODAG.initialize_idmap(funcs, FLAGS.function)

    log.info('GO DAG initialized. Updated function list-{}'.format(len(funcs)))
    FeatureExtractor.load(FLAGS.resources)
    log.info('Loaded amino acid and ngram mapping data')
    pretrained = None
    featuretype = 'onehot'
    if FLAGS.pretrained != '':
        log.info('loading pretrained embedding')
        pretrained, ngrammap = load_pretrained_embedding(FLAGS.pretrained)
        FeatureExtractor.ngrammap = ngrammap
        featuretype = 'ngrams'

    with tf.Session() as sess:
        data = DataLoader(filename=FLAGS.inputfile)
        log.info('initializing validation data')
        valid_dataiter = DataIterator(batchsize=FLAGS.batchsize,
                                      size=FLAGS.validationsize,
                                      dataloader=data,
                                      functype=FLAGS.function,
                                      featuretype='ngrams',
                                      numfuncs=len(funcs),
                                      all_labels=False,
                                      autoreset=True)

        log.info('initializing train data')
        train_iter = DataIterator(batchsize=FLAGS.batchsize,
                                  size=FLAGS.trainsize,
                                  seqlen=FLAGS.maxseqlen,
                                  dataloader=data,
                                  numfiles=4,
                                  numfuncs=len(funcs),
                                  functype=FLAGS.function,
                                  featuretype='ngrams',
                                  all_labels=False,
                                  autoreset=True)

        vocabsize = ((len(FeatureExtractor.ngrammap) +
                      1) if featuretype == 'ngrams' else
                     (len(FeatureExtractor.aminoacidmap) + 1))

        model = KerasDeepGO(funcs,
                            FLAGS.function,
                            GODAG,
                            train_iter.expectedshape,
                            vocabsize,
                            pretrained_embedding=pretrained).build()
        log.info('built encoder')
        log.info('built decoder')
        keras.backend.set_session(sess)
        log.info('starting epochs')

        model_path = FLAGS.outputdir + 'models/model_seq_' + FLAGS.function + '.h5'
        checkpointer = keras.callbacks.ModelCheckpoint(filepath=model_path,
                                                       verbose=1,
                                                       save_best_only=True,
                                                       save_weights_only=True)
        earlystopper = keras.callbacks.EarlyStopping(monitor='val_loss',
                                                     patience=10,
                                                     verbose=1)

        model_jsonpath = FLAGS.outputdir + 'models/model_{}.json'.format(
            FLAGS.function)
        f = open(model_jsonpath, 'w')
        f.write(model.to_json())
        f.close()

        model.fit_generator(train_iter,
                            steps_per_epoch=FLAGS.trainsize,
                            epochs=5,
                            validation_data=valid_dataiter,
                            validation_steps=FLAGS.validationsize,
                            max_queue_size=128,
                            callbacks=[checkpointer, earlystopper])

        valid_dataiter.close()
        train_iter.close()

    log.info('initializing test data')
    test_dataiter = DataIterator(batchsize=FLAGS.batchsize,
                                 size=FLAGS.testsize,
                                 seqlen=FLAGS.maxseqlen,
                                 dataloader=data,
                                 numfiles=4,
                                 numfuncs=len(funcs),
                                 functype=FLAGS.function,
                                 featuretype='ngrams',
                                 all_labels=True)

    prec, recall, f1 = predict_evaluate(test_dataiter, model_jsonpath,
                                        model_path)
    log.info('testing error, prec-{}, recall-{}, f1-{}'.format(
        np.round(prec, 3), np.round(recall, 3), np.round(f1, 3)))
    data.close()
Exemplo n.º 27
0
    default='Transformer')
parser.add_argument('--gamma', default=1, type=float)
parser.add_argument('--bond_order', default=False, type=bool)
parser.add_argument('--dataset', default='zinc', choices=['qm9', 'zinc'])
args = parser.parse_args()

train_file = f'data/{args.dataset}/adjacency_matrix_train_scaffold.pkl' if args.scaffold else f'data/{args.dataset}/adjacency_matrix_train.pkl'
validation_file = f'data/{args.dataset}/adjacency_matrix_validation_scaffold.pkl' if args.scaffold else f'data/{args.dataset}/adjacency_matrix_validation.pkl'

training = QM9Dataset(data=train_file,
                      num_masks=args.num_masks,
                      epsilon_greedy=args.epsilon_greedy,
                      num_fake=args.num_fake,
                      bond_order=args.bond_order)

train_dl = DataLoader(training, batch_size=args.batch_size)

# Create multiple validation dlators, one for 25, 50 and 75% masked atoms
val_dls = []
if args.num_fake == 0:
    for masks in range(1, 6):

        val_set = QM9Dataset(data=validation_file,
                             num_masks=masks,
                             bond_order=args.bond_order)
        val_dl = DataLoader(val_set, batch_size=args.batch_size)
        val_dls.append(val_dl)

if args.num_masks == 0:
    for fakes in range(1, 6):
Exemplo n.º 28
0
def main(argv):
    goids = GODAG.initialize_idmap(None, None)
    # GO_MAT = GODAG.get_fullmat(goids)
    # log.info('GO Matrix shape - {}'.format(GO_MAT.shape))
    # GO_MAT = np.vstack([np.zeros(GO_MAT.shape[1]), GO_MAT])
    labelembedding = load_labelembedding(os.path.join(FLAGS.data, 'goEmbeddings.txt'), goids)
    assert(labelembedding.shape[0] == (len(goids) + 1)) , 'label embeddings and known go ids differ'
    labelembeddingsize = labelembedding.shape[1]
    FeatureExtractor.load(FLAGS.data)
    log.info('Loaded amino acid and ngram mapping data')

    data = DataLoader()
    modelsavename = 'savedmodels_{}'.format(int(time.time()))
    with tf.Session() as sess:
        # sess = tf_debug.LocalCLIDebugWrapperSession(sess)
        valid_dataiter = DataIterator(batchsize=FLAGS.batchsize, size=FLAGS.validationsize,
                                      dataloader=data, functype=FLAGS.function, featuretype='ngrams',
                                      onlyLeafNodes=True, limit=FLAGS.maxnumfuncs)


        train_iter = DataIterator(batchsize=FLAGS.batchsize, size=FLAGS.trainsize,
                                  seqlen=FLAGS.maxseqlen, dataloader=data,
                                  numfiles=np.floor((FLAGS.trainsize * FLAGS.batchsize) / 250000),
                                  functype=FLAGS.function, featuretype='ngrams', onlyLeafNodes=True, limit=FLAGS.maxnumfuncs)

        encoder = CNNEncoder(vocab_size=len(FeatureExtractor.ngrammap) + 1, inputsize=train_iter.expectedshape).build()
        log.info('built encoder')
        decoder = GORNNDecoder(encoder.outputs, labelembedding, numfuncs=FLAGS.maxnumfuncs).build()
        log.info('built decoder')
        init = tf.global_variables_initializer()
        init.run(session=sess)
        chkpt = tf.train.Saver(max_to_keep=4)
        train_writer = tf.summary.FileWriter(FLAGS.outputdir + '/train',
                                          sess.graph)

        test_writer = tf.summary.FileWriter(FLAGS.outputdir + '/test')
        step = 0
        maxwait = 1
        wait = 0
        bestf1 = 0
        bestthres = 0
        metagraphFlag = True
        log.info('starting epochs')
        log.info('params - trainsize-{}, validsie-{}, rootfunc-{}, batchsize-{}'.format(FLAGS.trainsize, FLAGS.validationsize,
                                                                                        FLAGS.function, FLAGS.batchsize))
        for epoch in range(FLAGS.num_epochs):
            for x, y in train_iter:
                if x.shape[0] != y.shape[0]:
                    raise Exception('invalid, x-{}, y-{}'.format(str(x.shape), str(y.shape)))

                negatives = get_negatives(y, 10)
                _, loss, summary = sess.run([decoder.train, decoder.loss, decoder.summary],
                                            feed_dict={decoder.ys_: y, encoder.xs_: x,
                                                decoder.negsamples: negatives})
                train_writer.add_summary(summary, step)
                log.info('step-{}, loss-{}'.format(step, round(loss, 2)))
                step += 1

            log.info('beginning validation')
            prec, recall, f1 = validate(valid_dataiter, sess, encoder, decoder, test_writer)
            log.info('epoch: {} \n precision: {}, recall: {}, f1: {}'.format(epoch,
                                                                             np.round(prec, 2),
                                                                             np.round(recall, 2),
                                                                             np.round(f1, 2)))
            if f1 > (bestf1 + 1e-3):
                bestf1 = f1
                wait = 0
                chkpt.save(sess, os.path.join(FLAGS.outputdir, modelsavename,
                                                'model_{}_{}'.format(FLAGS.function, step)),
                            global_step=step, write_meta_graph=metagraphFlag)
                metagraphFlag = False
            else:
                wait += 1
                if wait > maxwait:
                    log.info('f1 didnt improve for last {} validation steps, so stopping'.format(maxwait))
                    break

            train_iter.reset()

    log.info('testing model')
    test_dataiter = DataIterator(batchsize=FLAGS.batchsize, size=FLAGS.testsize,
                                 dataloader=data, functype=FLAGS.function, featuretype='ngrams',
                                 onlyLeafNodes=True, limit=FLAGS.maxnumfuncs)
    prec, recall, f1 = predict_evaluate(test_dataiter, [bestthres], os.path.join(FLAGS.outputdir, modelsavename))
    log.info('test results')
    log.info('precision: {}, recall: {}, F1: {}'.format(round(prec, 2), round(recall, 2), round(f1, 2)))
    data.close()
Exemplo n.º 29
0
from utils.dataloader import DataLoader
import torch
from model import model_utils
from optimizer.optimizer import NoamOpt
from train.trainer import Trainer

hidden_size = 256
num_encoder = 6
num_decoder = 6
n_head = 8
pf_dim = 1024
drop_out = 0.5

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = 'cpu'

dataloader = DataLoader(device)
train_iterator, valid_iterator, test_iterator = dataloader.load_data(64)
model = model_utils.create_model(dataloader.src_vocab_size(), dataloader.trg_vocab_size(), hidden_size, num_encoder, num_decoder, n_head, pf_dim,
                                 drop_out, dataloader.get_pad_idx(), device)

print(model_utils.count_parameters(model))
model_utils.init(model)
optimizer = NoamOpt(hidden_size , 1, 2000, torch.optim.Adam(model.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9))

trainer = Trainer(train_iterator, valid_iterator, model, optimizer, dataloader.get_pad_idx(), device)
trainer.train(5)
# for i, batch in enumerate(train_iterator):
#     src = batch.src.permute(1, 0).to(device)
#     trg = batch.trg.permute(1, 0).to(device)
Exemplo n.º 30
0
                               targets_global, predictions_global)
    return metric_per_length, lengths, metric_global


device = torch.device('cuda') if torch.cuda.is_available() else torch.device(
    'cpu')
batch_size = 248
val_iters_mask = []

num_corrupted = [1, 2, 3, 4, 5, 20]

for masks in num_corrupted:

    test_set = QM9Dataset(data='data/adjacency_matrix_test.pkl',
                          num_masks=masks)
    test_dl = DataLoader(test_set, batch_size=batch_size)
    val_iters_mask.append(test_dl)

val_iters_fake = []

for fakes in num_corrupted:

    test_set = QM9Dataset(data='data/adjacency_matrix_test.pkl',
                          num_fake=fakes)
    test_dl = DataLoader(test_set, batch_size=batch_size)
    val_iters_fake.append(test_dl)

model_names = [
    "Transformer_num_masks=1_num_fake=0_num_same=0_num_layers=4_num_heads=3_embedding_dim=64_dropout=0.0_lr=0.001_edge_encoding=1_epsilon_greedy=0.2.pt",
    "BagOfWords_num_masks=1_num_fake=0_num_same=0_num_layers=4_embedding_dim=64_lr=0.0005_epsilon_greedy=0.2_bow_type=1.pt",
    "BagOfWords_num_masks=1_num_fake=0_num_same=0_num_layers=4_embedding_dim=64_lr=0.0005_epsilon_greedy=0.2_bow_type=2.pt",