Exemplo n.º 1
0
def train(parameters):
    model_folder = setup_log(parameters, 'train')

    set_seed(parameters['seed'])

    ###################################
    # Data Loading
    ###################################
    print('Loading training data ...')
    train_loader = DataLoader(parameters['train_data'], parameters)
    train_loader(embeds=parameters['embeds'])
    train_data = DocRelationDataset(train_loader, 'train', parameters, train_loader).__call__()

    print('\nLoading testing data ...')
    test_loader = DataLoader(parameters['test_data'], parameters)
    test_loader()
    test_data = DocRelationDataset(test_loader, 'test', parameters, train_loader).__call__()

    ###################################
    # Training
    ###################################
    trainer = Trainer(train_loader, parameters, {'train': train_data, 'test': test_data}, model_folder)
    trainer.run()

    if parameters['plot']:
        plot_learning_curve(trainer, model_folder)

    if parameters['save_model']:
        save_model(model_folder, trainer, train_loader)
Exemplo n.º 2
0
def train(parameters):
    model_folder = setup_log(parameters, 'train')

    set_seed(0)

    ###################################
    # Data Loading
    ###################################
    print('\nLoading training data ...')
    train_loader = DataLoader(parameters['train_data'], parameters)
    train_loader(embeds=parameters['embeds'])
    train_data = RelationDataset(train_loader, 'train',
                                 parameters['unk_w_prob'],
                                 train_loader).__call__()

    print('\nLoading testing data ...')
    test_loader = DataLoader(parameters['test_data'], parameters)
    test_loader()
    test_data = RelationDataset(test_loader, 'test', parameters['unk_w_prob'],
                                train_loader).__call__()

    ###################################
    # TRAINING
    ###################################
    trainer = Trainer({
        'train': train_data,
        'test': test_data
    }, parameters, train_loader, model_folder)
    trainer.run()

    trainer.eval_epoch(final=True, save_predictions=True)
    if parameters['plot']:
        plot_learning_curve(trainer, model_folder)
Exemplo n.º 3
0
 def test(self):
     test_loader = DataLoader(
         os.path.join(self.config['global']['folders']['datasets'],
                      self.config['global']['files']['datasets']['test']))
     self.model.evaluate_generator(
         generator=test_loader.flow(batch=self.batch),
         val_samples=test_loader.size)
Exemplo n.º 4
0
def run():
    '''
    Main method of the package.
    '''
    # ------------- LOAD DATA -------------- #
    loader = DataLoader()
    training_set, test_set = loader.leave_one_out(test_index=0)

    # --------------- TRAINING ---------------- #
    trainlandmarks = training_set[1]
    # train a Feature Detection system
    featuredetector = FDTraining()
    # fully automatic:
    featuredetector.search_region = featuredetector.scan_region(trainlandmarks,
                                                                diff=55,
                                                                searchStep=20)
    # semi-automatic:
    # featuredetector.search_region = ((880, 1125), (1350, 1670), 20)
    print '---Search space set to', featuredetector.search_region
    print 'Done.'

    # build and train an Active Shape Model
    asm = ASMTraining(training_set, k=3, levels=3)

    # --------------- TESTING ----------------- #
    testimage, testlandmarks = test_set
    # remove some noise from the test image
    testimage = remove_noise(testimage)

    # perform feature matching to find init regions
    print '---Searching for matches...'
    matches = featuredetector.match(testimage)
    print 'Done.'

    # or perform manual initialisation (click on center)
    matches = [featuredetector._ellipse(plot.set_clicked_center(testimage))]

    for i in range(len(matches)):
        # search and fit image
        new_fit = asm.activeshape.multiresolution_search(testimage,
                                                         matches[i],
                                                         t=10,
                                                         max_level=2,
                                                         max_iter=10,
                                                         n=0.2)
        # Find the target that the new fit represents in order
        # to compute the error. This is done by taking the smallest
        # MSE of all targets.
        mse = np.zeros((testlandmarks.shape[0], 1))
        for i in range(mse.shape[0]):
            mse[i] = mean_squared_error(testlandmarks[i], new_fit)
        best_fit_index = np.argmin(mse)
        # implement maximally tolerable error
        if int(mse[best_fit_index]) < MSE_THRESHOLD:
            print 'MSE:', mse[best_fit_index]
            plot.render_shape_to_image(testimage,
                                       testlandmarks[best_fit_index],
                                       color=(0, 0, 0))
        else:
            print 'Bad fit. Needs to restart.'
def load(ql, f_name="./res/leerpaden_app.xlsx", id_="simone"):
    print("Loading data")
    loader = DataLoader(f_name=f_name, s_name="Blad1")
    data, transfer_data = loader.load(quick_loading=ql)
    log_data = None
    if id_ not in ["test"]:
        log_data = loader.load_log()
    if loader.quick_loaded is False:
        print("Organizing data")
        # data["DateTime"] = loader.combine_date_time(data["SubmitDate"],
        #                                             data["Time"])

        if id_ in [
                "kb_all", "kb_all_attempts_curve", "kb_smoothed_curves", "jm"
        ]:
            data = data[[
                'DateTime', 'UserId', 'ExerciseId', 'LOID', 'Correct',
                'AbilityAfterAnswer', 'Effort', 'Lesson', 'LessonProgress'
            ]]
        else:
            data = data[[
                'DateTime', 'UserId', 'ExerciseId', 'LOID', 'Correct',
                'AbilityAfterAnswer'
            ]]
        print("Preprocessing data")
        if id_ not in ["kb", "kb_all"]:
            if "LessonProgress" in data.columns:
                unfiltered = loader.sort_data_by(
                    data, ["DateTime", "LessonProgress"])
            else:
                unfiltered = loader.sort_data_by(data, "DateTime")
        else:
            unfiltered = data
        transfer_data = loader.first_attempts_only(
            ['UserId', 'ExerciseId', 'LOID'], df=transfer_data, copy_df=False)
        data = loader.filter(filters, df=unfiltered)
        # print(data.head())
        if id_ in [
                "karlijn_en_babette",
                "kb",
                "kb_all",
                "test",
                "jm",
        ]:
            data = PhaseFinder().find_gynzy_phases(data, id_)
        elif id_ in [
                "kb_all_attempts_curve",
                "kb_smoothed_curves",
        ]:
            data = PhaseFinder().find_gynzy_phases_with_lesson_info(data, id_)
        else:
            data = PhaseFinder().find_phases(data)
            data = correct(data)
        loader.quick_save(transfer_data, f_name="quicktransfer.pkl")
        loader.quick_save(data)
    first_att_data = loader.first_attempts_only(
        ['UserId', 'ExerciseId', 'LOID'], df=data)
    # print(data.loc[data.UserId == 59491].tail(40).values)
    return data, first_att_data, transfer_data, log_data
Exemplo n.º 6
0
    def train(self):
        train_loader = DataLoader(
            os.path.join(self.config['global']['folders']['datasets'],
                         self.config['global']['files']['datasets']['train']))

        validation_loader = DataLoader(os.path.join(
            self.config['global']['folders']['datasets'],
            self.config['global']['files']['datasets']['validation']),
                                       random=False)

        h = self.model.fit_generator(train_loader.flow(self.batch),
                                     samples_per_epoch=self.samples,
                                     nb_epoch=self.epochs,
                                     validation_data=validation_loader.flow(
                                         self.batch),
                                     nb_val_samples=validation_loader.size)

        self.dump(h.history)
Exemplo n.º 7
0
    def __init__(self):
        self.img_rows = 128
        self.img_cols = 128
        self.channels = 3
        self.n_features = 128
        self.n_classes = 31
        self.img_shape = (self.img_rows, self.img_cols, self.channels)

        self.data_loader = DataLoader(img_res=(self.img_rows, self.img_cols),
                                      n_classes=self.n_classes)

        optimizer = Adam(0.0002, 0.5)

        self.D_R = build_discriminator(self.img_shape)
        self.D_F = build_feature_discriminator(self.n_features)
        self.D_R.compile(loss='binary_crossentropy',
                         optimizer=optimizer,
                         metrics=['accuracy'])
        self.D_F.compile(loss='binary_crossentropy',
                         optimizer=optimizer,
                         metrics=['accuracy'])

        self.Refiner = build_refiner(self.img_shape, self.channels)
        self.Feature = build_encoder(self.img_shape, self.n_features)
        self.Classifier = build_classifier(self.n_features, self.n_classes)

        self.D_R.trainable = False
        self.D_F.trainable = False

        self.Classifier.compile(loss='categorical_crossentropy',
                                optimizer=optimizer,
                                metrics=['accuracy'])
        self.Classifier.trainable = False

        self.GAN_1 = Sequential()
        self.GAN_1.add(self.Refiner)
        self.GAN_1.add(self.D_R)
        self.GAN_1.compile(loss='binary_crossentropy',
                           optimizer=optimizer,
                           metrics=['accuracy'])

        self.GAN_2 = Sequential()
        self.GAN_2.add(self.Refiner)
        self.GAN_2.add(self.Feature)
        self.GAN_2.add(self.D_F)
        self.GAN_2.compile(loss='binary_crossentropy',
                           optimizer=optimizer,
                           metrics=['accuracy'])

        self.GAN_3 = Sequential()
        self.GAN_3.add(self.Refiner)
        self.GAN_3.add(self.Feature)
        self.GAN_3.add(self.Classifier)
        self.GAN_3.compile(loss='categorical_crossentropy',
                           optimizer=optimizer,
                           metrics=['accuracy'])
def weak_label(train_primitive_matrix):
	# Load data
	dl = DataLoader()
	# train_primitive_matrix, val_primitive_matrix, test_primitive_matrix, \
	# train_ground, val_ground, test_ground, mode, frameNums = dl.load_data(mode = 'auto', numFramesToLoad = 1000)
	_, val_primitive_matrix, _, \
	_, val_ground, _, mode, frameNums = dl.load_data(mode = 'auto', numFramesToLoad = 1000, need_split = False)
	# Pass into reef
	return_matrix = reef_label(train_primitive_matrix, val_primitive_matrix, val_ground, None)
	return(return_matrix)
Exemplo n.º 9
0
    def __init__(self):
        config = ConfigLoader()
        self.parameters = config.load_config()
        self.model_folder = setup_log(self.parameters, 'train')

        set_seed(0)

        ###################################
        # Data Loading
        ###################################
        print('\nLoading training data ...')
        self.train_loader = DataLoader(parameters['train_data'], self.parameters)
        train_loader(embeds=self.parameters['embeds'])
        self.train_data = RelationDataset(train_loader, 'train', self.parameters['unk_w_prob'], train_loader).__call__()

        print('\nLoading testing data ...')
        test_loader = DataLoader(self.parameters['test_data'], parameters)
        test_loader()
        self.test_data = RelationDataset(test_loader, 'test', self.parameters['unk_w_prob'], train_loader).__call__()
Exemplo n.º 10
0
def main(params):
    # Arguments passed down from the parser
    download_data_path = params['input_data_path']
    data_basepath = params['output_data_path']
    logs_path = params['logs_path']
    plots_path = params['plots_path']
    contour_type = params['contour_type']
    toggle_plot = params['toggle_plot']
    mini_batch_size = params['mini_batch_size']

    # Set up logging
    _setup_logging(logs_path)

    # Meat of the python program
    logging.info(
        'Started running preprocessor for the following parameters: {}'.format(
            params))
    reader = DataReader(download_data_path=download_data_path,
                        data_basepath=data_basepath,
                        logs_path=logs_path,
                        plots_path=plots_path,
                        contour_type=contour_type,
                        save_plot=toggle_plot)
    images, masks, metadata = reader.load_samples(reader.sample_tuples)
    loader = DataLoader(output_dir=data_basepath,
                        images=images,
                        masks=masks,
                        metadata=metadata,
                        mini_batch_size=mini_batch_size)
    minibatches = loader.random_mini_batches()

    # If user enabled the toggle_plot to evaluate the reader and loader modules
    if toggle_plot:
        # Check out the overall view of all samples (dicoms, masks) with no shuffle and no partitioning
        logging.debug(
            'Plotting the overall view of all (dicom, mask) samples...')
        reader.plot_samples(images, masks, metadata,
                            'data-reader_no-shuffle_batchset.jpg')

        # Check out first minibatch to see whether it matches the ones in 'data-reader_no-shuffle_batchset.jpg' with same label
        logging.debug(
            'Extracting and plotting the first minibatch to validate DataLoader against the previous plot from DataReader...'
        )
        for i, minibatch in enumerate(minibatches):
            if i > 1:
                break
            minibatch_image, minibatch_mask, minibatch_metadata = minibatch

        # minibatch_image (8,256,256), minibatch_mask (8,256,256), minibatch_metadata (8,)
        reader.plot_samples(minibatch_image, minibatch_mask,
                            minibatch_metadata,
                            'data-loader_shuffled_batchset.jpg')
        logging.info('Finished running preprocessor...')
Exemplo n.º 11
0
def test(parameters):
    model_folder = setup_log(parameters, 'test')

    print('\nLoading mappings ...')
    train_loader = load_mappings(model_folder)
    
    print('\nLoading testing data ...')
    test_loader = DataLoader(parameters['test_data'], parameters)
    test_loader()    
    test_data = DocRelationDataset(test_loader, 'test', parameters, train_loader).__call__() 

    m = Trainer(train_loader, parameters, {'train': [], 'test': test_data}, model_folder)
    trainer = load_model(model_folder, m)
    trainer.eval_epoch(final=True, save_predictions=True)
Exemplo n.º 12
0
def run(sequences, base_path):
    kinect_nodes = ['KINECTNODE1', 'KINECTNODE2', 'KINECTNODE3', 'KINECTNODE4',
                    'KINECTNODE5', 'KINECTNODE6', 'KINECTNODE7', 'KINECTNODE8',
                    'KINECTNODE9', 'KINECTNODE10']
    edges = [
        (0, 1), (0, 2), (1, 15), (15, 16), (0, 3), (3, 4), (4, 5),
        (2, 6), (6, 7), (7, 8), (1, 17), (17, 18), (0, 9), (9, 10),
        (10, 11), (2, 12), (12, 13), (13, 14)
    ]
    failed_count = 0
    total_maps = 0
    for sequence in sequences:
        loader = DataLoader(base_path, sequence)
        mi, ma = loader.min_max()
        ma = mi+25  # TODO : temporary
        for idx in trange(mi, ma-len(kinect_nodes), len(kinect_nodes)):

            shuffle(kinect_nodes)  # TODO : turn back on

            with ProcessPoolExecutor() as executor:
                results = [executor.submit(create_tmap, loader, edges, i, node, idx)
                           for i, node in enumerate(kinect_nodes)]
                for f in as_completed(results):
                    total_maps += 1
                    try:
                        tmap, jmap, d_im = f.result()
                        # 217088*8*(1+19+18)*10
                        # Accumulate enough maps for about 10GB, then save compressed
                        if tmap.size*tmap.itemsize > 10000000000:
                            # Save the map with savez_compressed
                            # Clear accumulation map
                            pass
                        # size*itemsize*(d, j, e)*numkin*(maxidx)
                        # print(f'size: {tmap.size}, items: {tmap.dtype} ({tmap.itemsize} bytes)')
                    except ValueError:
                        failed_count += 1
                        continue

            # for i, node in enumerate(kinect_nodes):
            #     d_im, bodies, camera = loader.frame(idx+i, node)
            #     total_maps += 1
            #     try:
            #         tmap, _ = target_map(bodies, edges, camera, d_im.shape)
            #     except ValueError:
            #         # Failed to get TMAP
            #         failed_count += 1
            #         continue

            # print(kinect_nodes)
    print(f'FAILED: {failed_count}, TOTAL MAPS: {total_maps}')
Exemplo n.º 13
0
    def __init__(self,
                 filename=None,
                 page_duration=1.,
                 nchannels=None,
                 **kwargs):
        if 'position' not in kwargs:
            kwargs['position'] = (400, 300)
        if 'size' not in kwargs:
            kwargs['size'] = (800, 600)
        super(RawDataView, self).__init__(**kwargs)

        self.loader = DataLoader(filename,
                                 page_duration=page_duration,
                                 nchannels=nchannels)

        self.signals = SignalsVisual(self.loader.data)
def load_data(data_loc):
    """
    Load the data from an external excel resource.

    Parameters
    ----------
    data_loc: str
        Path to the data.

    Returns
    -------
    pd.DataFrame
        Data frame containing the data with additional pre and post phases
        added.
    """
    # load raw data
    loader = DataLoader(f_name=data_loc, s_name="Blad1")
    loaded_data, _ = loader.load(quick_loading=True)

    # Select columns
    if 'phase' in loaded_data.columns:
        loaded_data = loaded_data[[
            'DateTime', 'UserId', 'ExerciseId', 'LOID', 'Correct',
            'AbilityAfterAnswer', 'Effort', 'Lesson', 'LessonProgress', 'phase'
        ]]
    else:
        loaded_data = loaded_data[[
            'DateTime', 'UserId', 'ExerciseId', 'LOID', 'Correct',
            'AbilityAfterAnswer', 'Effort', 'Lesson', 'LessonProgress'
        ]]

    # Sort data
    loaded_data = loader.sort_data_by(loaded_data,
                                      ["DateTime", "LessonProgress"])

    # Filter unneeded
    loaded_data = loader.filter(filters, df=loaded_data)
    if not loader.quick_loaded:
        loaded_data = PhaseFinder().find_gynzy_phases_with_lesson_info(
            loaded_data, "")
        loader.quick_save(loaded_data)
    return loaded_data
Exemplo n.º 15
0
def test(parameters):
    print('*** Testing Model ***')
    model_folder = setup_log(parameters, 'test')

    print('Loading mappings ...')
    with open(os.path.join(model_folder, 'mappings.pkl'), 'rb') as f:
        loader = pkl.load(f)

    print('Loading testing data ...')
    test_loader = DataLoader(parameters['test_data'], parameters)
    test_loader.__call__()
    test_data = RelationDataset(test_loader, 'test', parameters['unk_w_prob'],
                                loader).__call__()

    m = Trainer({
        'train': [],
        'test': test_data
    }, parameters, loader, model_folder)
    trainer = load_model(model_folder, m)
    trainer.eval_epoch(final=True, save_predictions=True)
Exemplo n.º 16
0
def main():
    torch.manual_seed(42)

    parser = argparse.ArgumentParser()
    parser.add_argument('path', type=os.path.abspath)
    parser.add_argument('--dataset', default="div2k", type=str)
    parser.add_argument('--transform', default=None, type=str)
    parser.add_argument('--gpu', default=0, type=int)

    args = parser.parse_args()

    torch.cuda.set_device(args.gpu)

    validation = DataLoader(os.path.join("data", args.dataset, "val"),
                            shuffle=False,
                            num_workers=0)
    model = SteganoGAN.load(path=args.path)
    metrics = {field: list() for field in METRIC_FIELDS}
    model._validate(validation, metrics, transform=args.transform)
    metrics = {k: torch.tensor(v).mean().item() for k, v in metrics.items()}
    print(metrics)
Exemplo n.º 17
0
def Train():
    global loader, training_set, test_set, trainlandmarks, pca
    # ------------- LOAD DATA -------------- #
    loader = DataLoader()
    training_set, test_set = loader.leave_one_out(test_index=0)

    # --------------- TRAINING ---------------- #
    trainlandmarks = training_set[1]

    # build and train an Active Shape Model
    asm = ASMTraining(training_set, k=3, levels=3)
    pca = asm.activeshape.pdmodel

    t = 0
    for i in range(len(pca.eigenvalues)):
        if sum(pca.eigenvalues[:i]) / sum(pca.eigenvalues) < 0.98:
            t = t + 1
        else:
            break

    print("Constructed model with {0} modes of variation".format(t))
Exemplo n.º 18
0
def main(args):
    # Load input file, prepare training and validation sets
    data_loader = DataLoader(args.input, args.pre_emb, args.dim_word,
                             args.batch_size, args.lowercase, args.zeros)

    # Save vocabularies
    with open(os.path.join(args.output, 'words_vocab.pkl'), 'wb') as f:
        cPickle.dump(data_loader.word_to_id, f)
    with open(os.path.join(args.output, 'char_vocab.pkl'), 'wb') as f:
        cPickle.dump(data_loader.char_to_id, f)
    with open(os.path.join(args.output, 'tag_vocab.pkl'), 'wb') as f:
        cPickle.dump(data_loader.tag_to_id, f)
    # Save parameters
    with open(os.path.join(args.output, 'args.json'), 'wb') as f:
        cPickle.dump(args, f)

    # Build model
    model = Model(args, data_loader)

    best_score = 0
    niter_without_improvement = 0
    for epoch in range(args.nepochs):
        print("Epoch {:} out of {:}".format(epoch + 1, args.nepochs))
        data_loader.reset_pointer()
        score = model.run_epoch(epoch)
        args.learning_rate *= args.decay_rate
        # early stopping and saving best parameters
        if score >= best_score:
            niter_without_improvement = 0
            model.save_session(args.output)
            best_score = score
            print("New best score: {}".format(score))
        else:
            niter_without_improvement += 1
            if niter_without_improvement >= args.early_stopping:
                print("Early stopping {} epochs without improvement".format(
                    niter_without_improvement))
                break
Exemplo n.º 19
0
cost = T.mean(ctc.cpu_ctc_th(network_output, input_lens, output, output_lens))
grads = T.grad(cost, wrt=network_output)
all_params = lasagne.layers.get_all_params(l_out)
updates = lasagne.updates.adam(cost, all_params, 0.001)

train = theano.function([l_in.input_var, input_lens, output, output_lens],
                        cost,
                        updates=updates)
predict = theano.function([l_in.input_var], network_output)
get_grad = theano.function([l_in.input_var, input_lens, output, output_lens],
                           grads)

from loader import DataLoader

data_loader = DataLoader(mbsz=mbsz,
                         min_len=min_len,
                         max_len=max_len,
                         num_classes=num_classes)

i = 1
while True:
    i += 1
    print i
    sample = data_loader.sample()
    cost = train(*sample)
    out = predict(sample[0])
    print cost
    print "input", sample[0][0].argmax(1)
    print "prediction", out[:, 0].argmax(1)
    print "expected", sample[2][:sample[3][0]]
    if i == 10000:
        grads = get_grad(*sample)
Exemplo n.º 20
0
    # 1-1)trainig model
    if args.train > 1:
        # read trainingdata
        print("Reading training data...")
        seqs, labels = read_all(args.dir)
        for i in range(len(seqs)):
            seqs[i] = torch.tensor(seqs[i]).unsqueeze(0).float()
        le = LabelEncoder()
        le = le.fit(labels)
        labels_en = le.transform(labels)
        print("-->Complete reading training data")
        print("-->num of training data:", len(labels))
        print(labels[0])
        print(seqs[0])
        print(seqs[0].shape)
        train_loader = DataLoader(length=1024, batch_size=64, n_batches=1000)
        train_loader(labels_en, seqs, labels_en)
        print(len(train_loader))

        # train model
        print("\nTraining model...")
        model = Discriminator(1024, len(labels)).float().to(device)
        optimizer = optim.Adam(model.parameters(), lr=args.rate)

        for epoch in range(args.epoch):
            train(model, device, train_loader, optimizer, epoch + 1)
            #val_loss = test(model, device, test_loader)
            print("")

    # 1-2)using trained model
    if args.train < 1:
Exemplo n.º 21
0
import tensorflow as tf
from model import Model
from loader import DataLoader
from flags import *
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

FLAGS = tf.app.flags.FLAGS

print('<Creating flags>')
create_flags()

print(f'<Loading data - {FLAGS.dataset}>')
dl = DataLoader(FLAGS.dataset)
dl.load()

print('<Defining model>')
tf.reset_default_graph()
model = Model()

print('<Testing model>')
print(f'Using IBP: {FLAGS.use_ibp}')
print(f'Test epsilon: {FLAGS.test_eps}')
model.test(dl, -1, use_ibp=FLAGS.use_ibp, test_eps=FLAGS.test_eps)
Exemplo n.º 22
0
def main(args):
    #initialize dataset class
    ldr = DataLoader(mode=0,
                     seed=args.seed,
                     path=args.dataset,
                     drp_percent=args.drp_impt)
    data_loader = torch.utils.data.DataLoader(ldr,
                                              batch_size=args.batch_size,
                                              shuffle=True,
                                              drop_last=False)
    num_neurons = int(ldr.train[0].shape[0])

    #Initialize normalizing flow model neural network and its optimizer
    flow = util.init_flow_model(num_neurons, args.num_nf_layers, InterpRealNVP,
                                ldr.train[0].shape[0], args)
    nf_optimizer = torch.optim.Adam(
        [p for p in flow.parameters() if p.requires_grad == True], lr=args.lr)

    #Initialize latent space neural network and its optimizer
    num_hidden_neurons = [
        int(ldr.train[0].shape[0]),
        int(ldr.train[0].shape[0]),
        int(ldr.train[0].shape[0]),
        int(ldr.train[0].shape[0]),
        int(ldr.train[0].shape[0])
    ]
    nn_model = LatentToLatentApprox(int(ldr.train[0].shape[0]),
                                    num_hidden_neurons).float()
    if args.use_cuda:
        nn_model.cuda()
    nn_optimizer = torch.optim.Adam(
        [p for p in nn_model.parameters() if p.requires_grad == True],
        lr=args.lr)

    reset_scheduler = 2

    if args.dataset == 'news':
        print("\n****************************************")
        print("Starting OnlineNewsPopularity experiment\n")
    elif args.dataset == 'mnist':
        print("\n*********************************")
        print("Starting MNIST dropout experiment\n")
    else:
        print("Invalid dataset error")
        sys.exit()

    #Train and test MCFlow
    for epoch in range(args.n_epochs):
        util.endtoend_train(flow, nn_model, nf_optimizer, nn_optimizer,
                            data_loader, args)  #Train the MCFlow model

        with torch.no_grad():
            ldr.mode = 1  #Use testing data
            te_mse, _ = util.endtoend_test(flow, nn_model, data_loader,
                                           args)  #Test MCFlow model
            ldr.mode = 0  #Use training data
            print("Epoch", epoch, " Test RMSE", te_mse**.5)

        if (epoch + 1) % reset_scheduler == 0:
            #Reset unknown values in the dataset using predicted estimates
            if args.dataset == 'mnist':
                ldr.reset_img_imputed_values(nn_model, flow, args.seed, args)
            else:
                ldr.reset_imputed_values(nn_model, flow, args.seed, args)
            flow = util.init_flow_model(
                num_neurons, args.num_nf_layers, InterpRealNVP,
                ldr.train[0].shape[0],
                args)  #Initialize brand new flow model to train on new dataset
            nf_optimizer = torch.optim.Adam(
                [p for p in flow.parameters() if p.requires_grad == True],
                lr=args.lr)
            reset_scheduler = reset_scheduler * 2
Exemplo n.º 23
0
    vocab_X = data_npz['vocab_X'].item()
    vocab_Y = data_npz['vocab_Y'].item()

    model_path = model_dir / f'model_{args.epoch:03d}.pth'
    model = EncoderDecoder(**args_params)
    model.load_state_dict(torch.load(model_path.as_posix()))
    print(f'loaded model from {model_path}', file=sys.stderr)

    test_X = []
    test_max_length = 0
    for sentence in load_data('../data/chap3/test.en'):
        test_X.append(sentence_to_ids(vocab_X, sentence))
        test_max_length = max(test_max_length, len(test_X[-1]))

    test_dataloader = DataLoader(test_X, test_X, 1, shuffle=False)

    pred_Y = []
    for batch in test_dataloader:
        batch_X, _, lengths_X = batch
        pred = model(batch_X, lengths_X, max_length=lengths_X[0])
        pred = pred.max(dim=-1)[1].view(-1).data.cpu().numpy().tolist()
        if word2id['<EOS>'] in pred:
            pred = pred[:pred.index(word2id['<EOS>'])]
        pred_y = [vocab_Y.id2word[_id] for _id in pred]
        pred_Y.append(pred_y)

    with open('./submission.csv', 'w') as f:
        writer = csv.writer(f, delimiter=' ', lineterminator='\n')
        writer.writerows(pred_Y)
Exemplo n.º 24
0
handlers = [
    logging.FileHandler(os.path.join(opt.save_dir, 'output.log'), mode='w'),
    logging.StreamHandler()
]
logging.basicConfig(handlers=handlers, level=logging.INFO, format='')
logger = logging.getLogger()

NOISE_DIM = 100
NF = opt.nf
N_EMB = opt.nemb

if __name__ == '__main__':
    L = DataLoader(data_dir='data/',
                   n_emb=N_EMB,
                   method=opt.method,
                   batch_size=opt.batch,
                   shuffle=True,
                   validation_split=0.0)
    model, trainer = None, None
    if opt.method == 'cgan':
        model = CGAN
        trainer = CGANTrainer
    elif opt.method == 'acgan':
        model = ACGAN
        trainer = ACGANTrainer
    elif opt.method == 'wcgan':
        model = WCGAN
        trainer = WCGANTrainer

    G = model.Generator(noise_dim=NOISE_DIM, condition_dim=N_EMB, nf=NF)
    D = model.Discriminator(noise_dim=NOISE_DIM, condition_dim=N_EMB, nf=NF)
Exemplo n.º 25
0
emb_file = './dataset/' + args.dataset + '/embedding.npy'
emb_matrix = np.load(emb_file)
assert emb_matrix.shape[0] == len(token_vocab['i2w'])
assert emb_matrix.shape[1] == args.emb_dim

args.token_vocab_size = len(token_vocab['i2w'])
args.post_vocab_size = len(dicts['post'])
args.pos_vocab_size = len(dicts['pos'])

dicts['token'] = token_vocab['w2i']

# load training set and test set
print("Loading data from {} with batch size {}...".format(
    args.dataset, args.batch_size))
train_batch = DataLoader('./dataset/' + args.dataset + '/train.json',
                         args.batch_size, args, dicts)
test_batch = DataLoader('./dataset/' + args.dataset + '/test.json',
                        args.batch_size, args, dicts)

# create the folder for saving the best models and log file
model_save_dir = args.save_dir
helper.ensure_dir(model_save_dir, verbose=True)
file_logger = helper.FileLogger(
    model_save_dir + '/' + args.log,
    header="# epoch\ttrain_loss\ttest_loss\ttrain_acc\ttest_acc\ttest_f1")

# create the model
trainer = GCNTrainer(args, emb_matrix=emb_matrix)

# start training
train_acc_history, train_loss_history, test_loss_history, f1_score_history = [], [], [], [0.]
Exemplo n.º 26
0
        'Differences in MFCC_{0} distribution\nbetween clips of {1}'.format(
            coefficient, dataset[category][clip].category), ax3)
    plot_single_feature_aggregate(
        aggregate,
        'Aggregate MFCC_{0} distribution\n(bag-of-frames across all clips\nof {1})'
        .format(coefficient, dataset[category][clip].category), ax4)
    plt.show()


def view_clip_overview(categories=5, clips_shown=1):
    f, axes = plt.subplots(categories,
                           clips_shown,
                           figsize=(clips_shown * 2, categories * 2),
                           sharex=True,
                           sharey=True)
    f.subplots_adjust(hspace=0.35)

    for c in range(0, categories):
        for i in range(0, clips_shown):
            plot_clip_overview(clips_10[c][i], axes[c])
    plt.show()


loader = DataLoader('/home/runge/projects/sound_detector/TRAIN-10',
                    "audio_clips_segmentation.tfrecords", 512, 1, 1, 2)
clips_10 = loader.load_dataset('/home/runge/projects/sound_detector/TRAIN-10')

# plot_single_clip(clips_10[1][0])
# generate_feature_summary(clips_10, 1, 0, 1)
#view_clip_overview(10,1)
save_clip_overview(10, 1)
from filtering import get_wids, create_dictionary

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--tag", default="exp")
    parser.add_argument("--input")
    parser.add_argument("--output")

    args = parser.parse_args()
    logger = get_logger(tag=args.tag)

    if (not args.input) or (not args.output):
        print(f"Usage: python {__file__} <input_data_file> <output_dir>")
        exit(1)

    loader = DataLoader(args.input)
    msgs, labels, label_dict = loader.get_data()

    dictionary, bow_dictionary = create_dictionary(msgs)
    seq_title, bow_title, label_title = get_wids(msgs, dictionary,
                                                 bow_dictionary, labels,
                                                 logger)

    seq_title_train, seq_title_test, \
        bow_title_train, bow_title_test, \
        label_train, label_test = train_test_split(
            seq_title,
            bow_title,
            label_title,
            shuffle=True,
            test_size=0.2,
Exemplo n.º 28
0
print("Loading vocab...")
token_vocab = Vocab.load_vocab(args.vocab_dir + '/vocab_tok.vocab')  # token
post_vocab = Vocab.load_vocab(args.vocab_dir + '/vocab_post.vocab')  # position
pos_vocab = Vocab.load_vocab(args.vocab_dir + '/vocab_pos.vocab')  # POS
dep_vocab = Vocab.load_vocab(args.vocab_dir + '/vocab_dep.vocab')  # deprel
pol_vocab = Vocab.load_vocab(args.vocab_dir + '/vocab_pol.vocab')  # polarity
vocab = (token_vocab, post_vocab, pos_vocab, dep_vocab, pol_vocab)
print(
    "token_vocab: {}, post_vocab: {}, pos_vocab: {}, dep_vocab: {}, pol_vocab: {}"
    .format(len(token_vocab), len(post_vocab), len(pos_vocab), len(dep_vocab),
            len(pol_vocab)))

print("Loading data from {} with batch size {}...".format(
    args.data_dir, args.batch_size))
test_batch = DataLoader(args.data_dir + '/test.json', args.batch_size, args,
                        vocab)
unpacked = helper.unpack_raw_data(test_batch.raw_data, args.batch_size)

print("Evaluating...")
predictions, labels = [], []
test_loss, test_acc, test_step = 0., 0., 0
for i, batch in enumerate(test_batch):
    loss, acc, pred, label, _, _ = loaded_model.predict(batch)
    test_loss += loss
    test_acc += acc
    predictions += pred
    labels += label
    test_step += 1
f1_score = metrics.f1_score(labels, predictions, average='macro')

print("test_loss: {}, test_acc: {}, f1_score: {}".format( \
Exemplo n.º 29
0
    def __init__(self):
        self.img_rows = 128
        self.img_cols = 128
        self.channels = 3
        self.img_shape = (self.img_rows, self.img_cols, self.channels)

        self.dataset_name = 'chokepoint'
        self.data_loader = DataLoader(dataset_name=self.dataset_name,
                                      img_res=(self.img_rows, self.img_cols))

        patch = int(self.img_rows / 2**4)
        self.disc_patch = (patch, patch, 1)

        self.gf = 32
        self.df = 64

        self.lambda_c = 10.0                    
        self.lambda_id = 0.1 * self.lambda_c    

        optimizer = Adam(0.0002, 0.5)

        self.d_sim = self.build_discriminator()
        self.d_target = self.build_discriminator()
        self.d_sim.compile(loss='mse',
            optimizer=optimizer,
            metrics=['accuracy'])
        self.d_target.compile(loss='mse',
            optimizer=optimizer,
            metrics=['accuracy'])


        self.g_R1 = self.build_refiner()
        self.g_R2 = self.build_refiner()

        img_sim = Input(shape=self.img_shape)
        img_target = Input(shape=self.img_shape)

        refined_target = self.g_R1(img_sim)
        refined_sim = self.g_R2(img_target)

        rec_sim = self.g_R2(refined_target)
        rec_target = self.g_R1(refined_sim)

        img_sim_id = self.g_R2(img_sim)
        img_target_id = self.g_R1(img_target)


        self.d_sim.trainable = False
        self.d_target.trainable = False


        valid_sim = self.d_sim(refined_sim)
        valid_target = self.d_target(refined_target)


        self.combined = Model(inputs=[img_sim, img_target],
                              outputs=[ valid_sim, valid_target,
                                        rec_sim, rec_target,
                                        img_sim_id, img_target_id ])
        self.combined.compile(loss=['mse', 'mse',
                                    'mae', 'mae',
                                    'mae', 'mae'],
                            loss_weights=[  1, 1,
                                            self.lambda_c, self.lambda_c,
                                            self.lambda_id, self.lambda_id ],
                            optimizer=optimizer)
Exemplo n.º 30
0
def main(args):
    if args.gpu is not None:
        print('Using GPU %d' % args.gpu)
        os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
        os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu)
    else:
        print('CPU mode')

    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    train_transform = transforms.Compose([
        transforms.RandomResizedCrop(227),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normalize,
    ])

    val_transform = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(227),
        #transforms.RandomResizedCrop(227),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normalize,
    ])
    # DataLoader initialize
    train_data = DataLoader(args.pascal_path,
                            'trainval',
                            transform=train_transform)
    t_trainloader = torch.utils.data.DataLoader(dataset=train_data,
                                                batch_size=args.batch,
                                                shuffle=True,
                                                num_workers=CORES,
                                                pin_memory=True)
    print('[DATA] Target Train loader done!')
    val_data = DataLoader(args.pascal_path,
                          'test',
                          transform=val_transform,
                          random_crops=args.crops)
    t_testloader = torch.utils.data.DataLoader(dataset=val_data,
                                               batch_size=args.batch,
                                               shuffle=False,
                                               num_workers=CORES,
                                               pin_memory=True)
    print('[DATA] Target Test loader done!')

    if not args.test:
        s_trainset = torchvision.datasets.ImageFolder(
            args.imgnet_path,
            transform=transforms.Compose([
                transforms.RandomHorizontalFlip(),
                transforms.RandomResizedCrop(227),
                transforms.ToTensor(), normalize
            ]))
        s_trainloader = torch.utils.data.DataLoader(dataset=s_trainset,
                                                    batch_size=5 * args.batch,
                                                    shuffle=False,
                                                    num_workers=CORES,
                                                    pin_memory=True)
        print('[DATA] Source Train loader done!')

    N = len(train_data.names)
    iter_per_epoch = N / args.batch

    model = Network(num_classes=21)
    g_model = Network(num_classes=21)
    d_model = disnet()

    if args.gpu is not None:
        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        print('[MODEL] CUDA DEVICE : {}'.format(device))

        model.to(device)
        g_model.to(device)
        d_model.to(device)

    optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad,
                                       model.parameters()),
                                lr=args.lr,
                                momentum=0.9,
                                weight_decay=0.0001)
    g_optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad,
                                         g_model.parameters()),
                                  lr=args.lr,
                                  momentum=0.9,
                                  weight_decay=0.0001)
    d_optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad,
                                         d_model.parameters()),
                                  lr=args.lr,
                                  momentum=0.9,
                                  weight_decay=0.0001)

    if args.model is not None:
        checkpoint = torch.load(args.model)
        model.load(checkpoint['model'], True)
        g_model.load(checkpoint['g_model'], True)
        d_model.load_state_dict(checkpoint['d_model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        g_optimizer.load_state_dict(checkpoint['g_optimizer'])
        d_optimizer.load_state_dict(checkpoint['d_optimizer'])

    ############## TRAINING ###############
    print('Start training: lr %f, batch size %d' % (args.lr, args.batch))
    print('Checkpoint: ' + args.checkpoint)

    # Train the Model
    steps = args.iter_start
    best_mAP = 0.0
    best_path = './{}/model-{}_pretrained-{}_lr-0pt001_lmd_s-{}_acc-{}.pth'.format(
        args.checkpoint, 'alexnet', 'False', args.lmd_s, '{}')

    if args.test:
        args.epochs = 1

    for epoch in range(int(iter_per_epoch * args.iter_start), args.epochs):
        if not args.test:
            adjust_learning_rate(optimizer,
                                 epoch,
                                 init_lr=args.lr,
                                 step=100,
                                 decay=0.1)
            adjust_learning_rate(g_optimizer,
                                 epoch,
                                 init_lr=args.lr / 2,
                                 step=100,
                                 decay=0.1)
            adjust_learning_rate(d_optimizer,
                                 epoch,
                                 init_lr=args.lr / 1.5,
                                 step=100,
                                 decay=0.1)

            done = train(epoch, model, g_model, d_model, optimizer,
                         g_optimizer, d_optimizer, t_trainloader,
                         s_trainloader, args.lmd_s, device)

        best_mAP = test(epoch, model, g_model, d_model, optimizer, g_optimizer,
                        d_optimizer, t_testloader, best_mAP, best_path, device)