Exemplo n.º 1
0
def tile_image(full_size_path, tile_size=400):
    p = IntProgress(description='Tiling', max=1)
    display(p)
    root_dir = os.path.dirname(full_size_path)
    name, ext = os.path.splitext(os.path.basename(full_size_path))
    tile_dir = os.path.join(root_dir, '{0}-{1}x{1}'.format(name, tile_size))
    if not os.path.exists(tile_dir):
        os.mkdir(tile_dir)
    tpl = os.path.join(tile_dir, '{name}-{i:02}-{j:02}{ext}')
    full_size = skimage.io.imread(full_size_path)
    X, Y = full_size.shape[:2]
    total_tiles = (X // tile_size) * (Y // tile_size)
    p.max = total_tiles
    print("Creating %i tiles in %s" % (total_tiles, tile_dir))

    for i in range(X // tile_size):
        for j in range(Y // tile_size):
            tile = full_size[
                i * tile_size: (i+1) * tile_size,
                j * tile_size: (j+1) * tile_size,
                :
            ]
            fname = tpl.format(**locals())
            p.value += 1
            skimage.io.imsave(fname, tile)
    p.value = p.max
Exemplo n.º 2
0
def from_geodataframe_to_map(portal: Portal, gdf: GeoDataFrame, data_name: str, map_title: str, layer_name: str):
    progress = IntProgress()
    progress.max = 100
    progress.value = 0
    progress.description = '上传文件:'
    display(progress)

    def refresh_progress(read, total):
        progress.value = read

    data_id = portal.upload_dataframe_as_json(data_name, gdf, callback=refresh_progress)
    layer = portal.prepare_geojson_layer(data_id, layer_name)
    map_id = portal.create_map([layer], 3857, map_title)
    mr = portal.get_map(map_id)
    pm = PortalThumbnail(mr)
    display(pm)
    return map_id
Exemplo n.º 3
0
def progress_bar(dirs, path, extension, data):
    progress = IntProgress()
    progress.max = len(mydirs)
    progress.description = '(Init)'
    display(progress)
    for mydir in dirs:
        os.chdir(path + mydir)
        all_filenames = [i for i in glob.glob('*.{}'.format(extension))]
        for file in all_filenames:
            df_temp = pd.read_csv(file, encoding='utf8')
            try:
                data = pd.merge(data, df_temp, how='outer', on='SEQN')
            except:
                print(mypath + mydir + '/' + file)
        progress.value += 1
        progress.description = mydir
    progress.description = '(Done)'
Exemplo n.º 4
0
def download_original():
    p = IntProgress(max=1, description="Downloading")
    display(p)
    if os.path.exists(full_size_file):
        print("Already have %s" % full_size_file)
        p.value = p.max
    else:
        r = requests.get(url, stream=True)
        content_length = r.headers.get('content-length', int(1e8))
        print("Downloading %s" % url)
        p.max = content_length
        r.raise_for_status()

        with open(full_size_file, 'wb') as f:
            for chunk in r.iter_content(chunk_size=8096):
                p.value += len(chunk)
                f.write(chunk)
    p.value = p.max
Exemplo n.º 5
0
def train(FLAG):
    print("Reading dataset...")
    # load data
    Xtrain, Ytrain = read_images(TRAIN_DIR), read_masks(TRAIN_DIR, onehot=True)
    Xtest, Ytest = read_images(VAL_DIR), read_masks(VAL_DIR, onehot=True)
    track = [
        "hw3-train-validation/validation/0008",
        "hw3-train-validation/validation/0097",
        "hw3-train-validation/validation/0107"
    ]
    Xtrack, Ytrack = read_list(track)

    vgg16 = VGG16(classes=7, shape=(256, 256, 3))
    vgg16.build(vgg16_npy_path=FLAG.init_from,
                mode=FLAG.mode,
                keep_prob=FLAG.keep_prob)

    saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)
    checkpoint_path = os.path.join(FLAG.save_dir, 'model.ckpt')

    def initialize_uninitialized(sess):
        global_vars = tf.global_variables()
        is_not_initialized = sess.run(
            [tf.is_variable_initialized(var) for var in global_vars])
        not_initialized_vars = [
            v for (v, f) in zip(global_vars, is_not_initialized) if not f
        ]
        if len(not_initialized_vars):
            sess.run(tf.variables_initializer(not_initialized_vars))

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        # hyper parameters
        batch_size = 32
        epoch = 500
        early_stop_patience = 50
        min_delta = 0.0001
        opt_type = 'adam'

        # recorder
        epoch_counter = 0

        # optimizer
        global_step = tf.Variable(0, trainable=False)

        # Passing global_step to minimize() will increment it at each step.
        if opt_type is 'sgd':
            start_learning_rate = FLAG.lr
            half_cycle = 2000
            learning_rate = tf.train.exponential_decay(start_learning_rate,
                                                       global_step,
                                                       half_cycle,
                                                       0.5,
                                                       staircase=True)
            opt = tf.train.MomentumOptimizer(learning_rate=learning_rate,
                                             momentum=0.9,
                                             use_nesterov=True)
        else:
            start_learning_rate = FLAG.lr
            half_cycle = 2000
            learning_rate = tf.train.exponential_decay(start_learning_rate,
                                                       global_step,
                                                       half_cycle,
                                                       0.5,
                                                       staircase=True)
            opt = tf.train.AdamOptimizer(learning_rate=learning_rate)

        obj = vgg16.loss
        train_op = opt.minimize(obj, global_step=global_step)

        # progress bar
        ptrain = IntProgress()
        pval = IntProgress()
        display(ptrain)
        display(pval)
        ptrain.max = int(Xtrain.shape[0] / batch_size)
        pval.max = int(Xtest.shape[0] / batch_size)

        # re-initialize
        initialize_uninitialized(sess)

        # reset due to adding a new task
        patience_counter = 0
        current_best_val_loss = np.float('Inf')

        # optimize when the aggregated obj
        while (patience_counter < early_stop_patience
               and epoch_counter < epoch):

            # start training
            stime = time.time()
            bar_train = Bar(
                'Training',
                max=int(Xtrain.shape[0] / batch_size),
                suffix='%(index)d/%(max)d - %(percent).1f%% - %(eta)ds')
            bar_val = Bar(
                'Validation',
                max=int(Xtest.shape[0] / batch_size),
                suffix='%(index)d/%(max)d - %(percent).1f%% - %(eta)ds')

            train_loss, train_accu = 0.0, 0.0
            for i in range(int(Xtrain.shape[0] / batch_size)):
                st = i * batch_size
                ed = (i + 1) * batch_size
                loss, accu, _ = sess.run(
                    [obj, vgg16.accuracy, train_op],
                    feed_dict={
                        vgg16.x: Xtrain[st:ed, :],
                        vgg16.y: Ytrain[st:ed, :],
                        vgg16.is_train: True
                    })
                train_loss += loss
                train_accu += accu
                ptrain.value += 1
                ptrain.description = "Training %s/%s" % (ptrain.value,
                                                         ptrain.max)
            train_loss = train_loss / ptrain.value
            train_accu = train_accu / ptrain.value

            # validation
            val_loss = 0
            val_accu = 0
            for i in range(int(Xtest.shape[0] / batch_size)):
                st = i * batch_size
                ed = (i + 1) * batch_size
                loss, accu = sess.run(
                    [obj, vgg16.accuracy],
                    feed_dict={
                        vgg16.x: Xtest[st:ed, :],
                        vgg16.y: Ytest[st:ed, :],
                        vgg16.is_train: False
                    })
                val_loss += loss
                val_accu += accu
                pval.value += 1
                pval.description = "Testing %s/%s" % (pval.value, pval.value)
            val_loss = val_loss / pval.value
            val_accu = val_accu / pval.value

            # plot
            if epoch_counter % 10 == 0:
                Xplot = sess.run(vgg16.pred,
                                 feed_dict={
                                     vgg16.x: Xtrack[:, :],
                                     vgg16.y: Ytrack[:, :],
                                     vgg16.is_train: False
                                 })

                for i, fname in enumerate(track):
                    saveimg = skimage.transform.resize(Xplot[i],
                                                       output_shape=(512, 512),
                                                       order=0,
                                                       preserve_range=True,
                                                       clip=False)
                    saveimg = label2rgb(saveimg)
                    imageio.imwrite(
                        os.path.join(
                            FLAG.save_dir,
                            os.path.basename(fname) + "_pred_" +
                            str(epoch_counter) + ".png"), saveimg)
                    print(
                        os.path.join(
                            FLAG.save_dir,
                            os.path.basename(fname) + "_pred_" +
                            str(epoch_counter) + ".png"))

            # early stopping check
            if (current_best_val_loss - val_loss) > min_delta:
                current_best_val_loss = val_loss
                patience_counter = 0
                saver.save(sess, checkpoint_path, global_step=epoch_counter)
                print("save in %s" % checkpoint_path)
            else:
                patience_counter += 1

            # shuffle Xtrain and Ytrain in the next epoch
            idx = np.random.permutation(Xtrain.shape[0])
            Xtrain, Ytrain = Xtrain[idx, :, :, :], Ytrain[idx, :]

            # epoch end
            epoch_counter += 1

            ptrain.value = 0
            pval.value = 0
            bar_train.finish()
            bar_val.finish()

            print(
                "Epoch %s (%s), %s sec >> train loss: %.4f, train accu: %.4f, val loss: %.4f, val accu: %.4f"
                % (epoch_counter, patience_counter,
                   round(time.time() - stime,
                         2), train_loss, train_accu, val_loss, val_accu))
Exemplo n.º 6
0
def train(FLAG):
    print("Reading dataset...")
    if FLAG.dataset == 'CIFAR-10':
        train_data = CIFAR10(train=True)
        test_data = CIFAR10(train=False)
        vgg16 = VGG16(classes=10)
    elif FLAG.dataset == 'CIFAR-100':
        train_data = CIFAR100(train=True)
        test_data = CIFAR100(train=False)
        vgg16 = VGG16(classes=100)
    else:
        raise ValueError("dataset should be either CIFAR-10 or CIFAR-100.")
    print("Build VGG16 models for %s..." % FLAG.dataset)

    Xtrain, Ytrain = train_data.train_data, train_data.train_labels
    Xtest, Ytest = test_data.test_data, test_data.test_labels

    vgg16.build(vgg16_npy_path=FLAG.init_from,
                prof_type=FLAG.prof_type,
                conv_pre_training=True,
                fc_pre_training=False)
    vgg16.sparsity_train(l1_gamma=FLAG.lambda_s,
                         l1_gamma_diff=FLAG.lambda_m,
                         decay=FLAG.decay,
                         keep_prob=FLAG.keep_prob)

    # define tasks
    tasks = ['var_dp']
    print(tasks)

    # initial task
    cur_task = tasks[0]
    obj = vgg16.loss_dict[tasks[0]]

    saver = tf.train.Saver(tf.global_variables(), max_to_keep=len(tasks))

    checkpoint_path = os.path.join(FLAG.save_dir, 'model.ckpt')
    tvars_trainable = tf.trainable_variables()

    #for rm in vgg16.gamma_var:
    #    tvars_trainable.remove(rm)
    #    print('%s is not trainable.'% rm)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        # hyper parameters
        batch_size = 64
        epoch = 500
        early_stop_patience = 50
        min_delta = 0.0001
        opt_type = 'adam'

        # recorder
        epoch_counter = 0

        # optimizer
        global_step = tf.Variable(0, trainable=False)

        # Passing global_step to minimize() will increment it at each step.
        if opt_type is 'sgd':
            start_learning_rate = 1e-4  # adam # 4e-3 #sgd
            half_cycle = 20000
            learning_rate = tf.train.exponential_decay(start_learning_rate,
                                                       global_step,
                                                       half_cycle,
                                                       0.5,
                                                       staircase=True)
            opt = tf.train.MomentumOptimizer(learning_rate=learning_rate,
                                             momentum=0.9,
                                             use_nesterov=True)
        else:
            start_learning_rate = 1e-4  # adam # 4e-3 #sgd
            half_cycle = 10000
            learning_rate = tf.train.exponential_decay(start_learning_rate,
                                                       global_step,
                                                       half_cycle,
                                                       0.5,
                                                       staircase=True)
            opt = tf.train.AdamOptimizer(learning_rate=learning_rate)

        train_op = opt.minimize(obj,
                                global_step=global_step,
                                var_list=tvars_trainable)

        # progress bar
        ptrain = IntProgress()
        pval = IntProgress()
        display(ptrain)
        display(pval)
        ptrain.max = int(Xtrain.shape[0] / batch_size)
        pval.max = int(Xtest.shape[0] / batch_size)

        spareness = vgg16.spareness(thresh=0.05)
        print("initial spareness: %s" % sess.run(spareness))

        # re-initialize
        initialize_uninitialized(sess)

        # reset due to adding a new task
        patience_counter = 0
        current_best_val_accu = 0

        # optimize when the aggregated obj
        while (patience_counter < early_stop_patience
               and epoch_counter < epoch):

            def load_batches():
                for i in range(int(Xtrain.shape[0] / batch_size)):
                    st = i * batch_size
                    ed = (i + 1) * batch_size
                    batch = ia.Batch(images=Xtrain[st:ed, :, :, :],
                                     data=Ytrain[st:ed, :])
                    yield batch

            batch_loader = ia.BatchLoader(load_batches)
            bg_augmenter = ia.BackgroundAugmenter(batch_loader=batch_loader,
                                                  augseq=transform,
                                                  nb_workers=4)

            # start training
            stime = time.time()
            bar_train = Bar(
                'Training',
                max=int(Xtrain.shape[0] / batch_size),
                suffix='%(index)d/%(max)d - %(percent).1f%% - %(eta)ds')
            bar_val = Bar(
                'Validation',
                max=int(Xtest.shape[0] / batch_size),
                suffix='%(index)d/%(max)d - %(percent).1f%% - %(eta)ds')
            train_loss, train_accu = 0.0, 0.0
            while True:
                batch = bg_augmenter.get_batch()
                if batch is None:
                    print("Finished epoch.")
                    break
                x_images_aug = batch.images_aug
                y_images = batch.data
                loss, accu, _ = sess.run(
                    [obj, vgg16.accu_dict[cur_task], train_op],
                    feed_dict={
                        vgg16.x: x_images_aug,
                        vgg16.y: y_images,
                        vgg16.is_train: True
                    })
                bar_train.next()
                train_loss += loss
                train_accu += accu
                ptrain.value += 1
                ptrain.description = "Training %s/%s" % (ptrain.value,
                                                         ptrain.max)
            train_loss = train_loss / ptrain.value
            train_accu = train_accu / ptrain.value
            batch_loader.terminate()
            bg_augmenter.terminate()

            # # training an epoch
            # for i in range(int(Xtrain.shape[0]/batch_size)):
            #     st = i*batch_size
            #     ed = (i+1)*batch_size

            #     augX = transform.augment_images(Xtrain[st:ed,:,:,:])

            #     sess.run([train_op], feed_dict={vgg16.x: augX,
            #                                     vgg16.y: Ytrain[st:ed,:],
            #                                     vgg16.is_train: False})
            #     ptrain.value +=1
            #     ptrain.description = "Training %s/%s" % (i, ptrain.max)
            #     bar_train.next()

            # validation
            val_loss = 0
            val_accu = 0
            for i in range(int(Xtest.shape[0] / 200)):
                st = i * 200
                ed = (i + 1) * 200
                loss, accu = sess.run(
                    [obj, vgg16.accu_dict[cur_task]],
                    feed_dict={
                        vgg16.x: Xtest[st:ed, :],
                        vgg16.y: Ytest[st:ed, :],
                        vgg16.is_train: False
                    })
                val_loss += loss
                val_accu += accu
                pval.value += 1
                pval.description = "Testing %s/%s" % (pval.value, pval.value)
            val_loss = val_loss / pval.value
            val_accu = val_accu / pval.value

            print("\nspareness: %s" % sess.run(spareness))
            # early stopping check
            if (val_accu - current_best_val_accu) > min_delta:
                current_best_val_accu = val_accu
                patience_counter = 0

                para_dict = sess.run(vgg16.para_dict)
                np.save(os.path.join(FLAG.save_dir, "para_dict.npy"),
                        para_dict)
                print("save in %s" %
                      os.path.join(FLAG.save_dir, "para_dict.npy"))
            else:
                patience_counter += 1

            # shuffle Xtrain and Ytrain in the next epoch
            idx = np.random.permutation(Xtrain.shape[0])
            Xtrain, Ytrain = Xtrain[idx, :, :, :], Ytrain[idx, :]

            # epoch end
            # writer.add_summary(epoch_summary, epoch_counter)
            epoch_counter += 1

            ptrain.value = 0
            pval.value = 0
            bar_train.finish()
            bar_val.finish()

            print(
                "Epoch %s (%s), %s sec >> train loss: %.4f, train accu: %.4f, val loss: %.4f, val accu at %s: %.4f"
                % (epoch_counter, patience_counter,
                   round(time.time() - stime, 2), train_loss, train_accu,
                   val_loss, cur_task, val_accu))
        saver.save(sess, checkpoint_path, global_step=epoch_counter)

        sp, rcut = gammaSparsifyVGG16(para_dict, thresh=0.02)
        np.save(os.path.join(FLAG.save_dir, "sparse_dict.npy"), sp)
        print("sparsify %s in %s" % (np.round(
            1 - rcut, 3), os.path.join(FLAG.save_dir, "sparse_dict.npy")))

        #writer.close()
        arr_spareness.append(1 - rcut)
        np.save(os.path.join(FLAG.save_dir, "sprocess.npy"), arr_spareness)
    FLAG.optimizer = opt_type
    FLAG.lr = start_learning_rate
    FLAG.batch_size = batch_size
    FLAG.epoch_end = epoch_counter
    FLAG.val_accu = current_best_val_accu

    header = ''
    row = ''
    for key in sorted(vars(FLAG)):
        if header is '':
            header = key
            row = str(getattr(FLAG, key))
        else:
            header += "," + key
            row += "," + str(getattr(FLAG, key))
    row += "\n"
    header += "\n"
    if os.path.exists("/home/cmchang/new_CP_CNN/model.csv"):
        with open("/home/cmchang/new_CP_CNN/model.csv", "a") as myfile:
            myfile.write(row)
    else:
        with open("/home/cmchang/new_CP_CNN/model.csv", "w") as myfile:
            myfile.write(header)
            myfile.write(row)
Exemplo n.º 7
0
def vgg16_train(model, train, test, init_from, save_dir, batch_size=64, epoch=300, early_stop_patience=25):
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    
    checkpoint_path = os.path.join(save_dir, 'model.ckpt')

    with tf.Session() as sess:
        print(tf.trainable_variables())
        
        # hyper parameters
        learning_rate =  5e-4 #adam
        min_delta = 0.0001

        # recorder
        epoch_counter = 0
        loss_history = []
        val_loss_history = []

        # optimizer
        opt = tf.train.AdamOptimizer(learning_rate=learning_rate)
        train_op = opt.minimize(model.loss)
        
        # saver 
        saver = tf.train.Saver(tf.global_variables(), max_to_keep=2)
        
        sess.run(tf.global_variables_initializer())

        # progress bar
        ptrain = IntProgress()
        pval = IntProgress()
        display(ptrain)
        display(pval)
        ptrain.max = int(train.images.shape[0]/batch_size)
        pval.max = int(test.images.shape[0]/batch_size)

        # reset due to adding a new task
        patience_counter = 0
        current_best_val_loss = 100000 # a large number
        

        # train start
        while(patience_counter < early_stop_patience):
            stime = time.time()
            bar_train = Bar('Training', max=int(train.images.shape[0]/batch_size), suffix='%(index)d/%(max)d - %(percent).1f%% - %(eta)ds')
            bar_val =  Bar('Validation', max=int(test.images.shape[0]/batch_size), suffix='%(index)d/%(max)d - %(percent).1f%% - %(eta)ds')
            
            # training an epoch
            train_loss = 0
            for i in range(int(train.images.shape[0]/batch_size)):
                st = i*batch_size
                ed = (i+1)*batch_size
                
                _, loss = sess.run([train_op, model.loss],
                                   feed_dict={model.x: train.images[st:ed,:],
                                              model.y: train.labels[st:ed,:],
                                              model.w: train.weights[st:ed,:]
                                             })
                train_loss += loss
                ptrain.value +=1
                ptrain.description = "Training %s/%s" % (i, ptrain.max)
                bar_train.next()
            
            train_loss /= ptrain.max
            
            val_loss = 0

            for i in range(int(test.images.shape[0]/batch_size)):
                st = i*batch_size
                ed = (i+1)*batch_size
                
                loss = sess.run(model.loss,
                                   feed_dict={model.x: test.images[st:ed,:],
                                              model.y: test.labels[st:ed,:],
                                              model.w: np.expand_dims(np.repeat(1.0,batch_size),axis=1)
                                             })
                val_loss += loss
                pval.value +=1
                pval.description = "Training %s/%s" % (i, pval.max)
                bar_val.next()
                
            val_loss /= pval.max
            
            if (current_best_val_loss - val_loss) > min_delta:
                current_best_val_loss = val_loss
                patience_counter = 0
                saver.save(sess, checkpoint_path, global_step=epoch_counter)
                print("reset early stopping and save model into %s at epoch %s" % (checkpoint_path,epoch_counter))
            else:
                patience_counter += 1

            # shuffle Xtrain and Ytrain in the next epoch
            train.shuffle()
            
            loss_history.append(train_loss)
            val_loss_history.append(val_loss)

            ptrain.value = 0
            pval.value = 0
            bar_train.finish()
            bar_val.finish()
            print("Epoch %s (%s), %s sec >> train-loss: %.4f, val-loss: %.4f" % (epoch_counter, patience_counter, round(time.time()-stime,2), train_loss, val_loss))
            
            # epoch end
            epoch_counter += 1
            if epoch_counter >= epoch:
                break
        res = pd.DataFrame({"epoch":range(0,len(loss_history)), "loss":loss_history, "val_loss":val_loss_history})
        res.to_csv(os.path.join(save_dir,"history.csv"), index=False)
        print("end training")
Exemplo n.º 8
0
def train(FLAG):
    print("Reading dataset...")
    # load data
    Xtrain, df_train = read_dataset(TRAIN_CSV, TRAIN_DIR)
    Xtest, df_test = read_dataset(TEST_CSV, TEST_DIR)

    vae = VAE()
    vae.build(lambda_KL=FLAG.lambda_KL,
              n_dim=FLAG.n_dim,
              batch_size=FLAG.batch_size,
              shape=Xtrain.shape[1:])

    saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)
    checkpoint_path = os.path.join(FLAG.save_dir, 'model.ckpt')

    def initialize_uninitialized(sess):
        global_vars = tf.global_variables()
        is_not_initialized = sess.run(
            [tf.is_variable_initialized(var) for var in global_vars])
        not_initialized_vars = [
            v for (v, f) in zip(global_vars, is_not_initialized) if not f
        ]
        if len(not_initialized_vars):
            sess.run(tf.variables_initializer(not_initialized_vars))

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        # hyper parameters
        batch_size = FLAG.batch_size
        epoch = 500
        early_stop_patience = 50
        min_delta = 0.0001
        opt_type = 'adam'

        # recorder
        epoch_counter = 0

        # optimizer
        global_step = tf.Variable(0, trainable=False)

        # Passing global_step to minimize() will increment it at each step.
        if opt_type is 'sgd':
            start_learning_rate = FLAG.lr
            half_cycle = 2000
            learning_rate = tf.train.exponential_decay(start_learning_rate,
                                                       global_step,
                                                       half_cycle,
                                                       0.5,
                                                       staircase=True)
            opt = tf.train.MomentumOptimizer(learning_rate=learning_rate,
                                             momentum=0.9,
                                             use_nesterov=True)
        else:
            start_learning_rate = FLAG.lr
            half_cycle = 2000
            learning_rate = tf.train.exponential_decay(start_learning_rate,
                                                       global_step,
                                                       half_cycle,
                                                       0.5,
                                                       staircase=True)
            opt = tf.train.AdamOptimizer(learning_rate=learning_rate)

        obj = vae.train_op
        train_op = opt.minimize(obj, global_step=global_step)

        # progress bar
        ptrain = IntProgress()
        pval = IntProgress()
        display(ptrain)
        display(pval)
        ptrain.max = int(Xtrain.shape[0] / batch_size)
        pval.max = int(Xtest.shape[0] / batch_size)

        # re-initialize
        initialize_uninitialized(sess)

        # reset due to adding a new task
        patience_counter = 0
        current_best_val_loss = np.float('Inf')

        # optimize when the aggregated obj
        while (patience_counter < early_stop_patience
               and epoch_counter < epoch):

            # start training
            stime = time.time()
            bar_train = Bar(
                'Training',
                max=int(Xtrain.shape[0] / batch_size),
                suffix='%(index)d/%(max)d - %(percent).1f%% - %(eta)ds')
            bar_val = Bar(
                'Validation',
                max=int(Xtest.shape[0] / batch_size),
                suffix='%(index)d/%(max)d - %(percent).1f%% - %(eta)ds')

            train_loss = 0.0
            train_reconstruction_loss = 0.0
            train_kl_loss = 0.0
            for i in range(int(Xtrain.shape[0] / batch_size)):
                st = i * batch_size
                ed = (i + 1) * batch_size
                loss, reconstruction_loss, kl_loss, _ = sess.run(
                    [
                        obj, vae.loss['reconstruction'], vae.loss['KL_loss'],
                        train_op
                    ],
                    feed_dict={
                        vae.x: Xtrain[st:ed, :],
                        vae.y: Xtrain[st:ed, :],
                        vae.is_train: True
                    })
                print(loss)
                print(reconstruction_loss)
                print(kl_loss)
                train_loss += loss
                train_reconstruction_loss += reconstruction_loss
                train_kl_loss += kl_loss
                ptrain.value += 1
                ptrain.description = "Training %s/%s" % (ptrain.value,
                                                         ptrain.max)

                output = sess.run(
                    [vae.output],
                    feed_dict={
                        vae.x: Xtrain[0:64, :],
                        vae.y: Xtrain[0:64, :],
                        vae.is_train: False
                    })

                print("=== train data ====")
                print(output)
                #print((Xtrain[0,:]-128.0)/128.0)
            train_loss = train_loss / ptrain.value
            train_reconstruction_loss = train_reconstruction_loss / ptrain.value
            train_kl_loss = train_kl_loss / ptrain.value

            # validation
            val_loss = 0
            val_reconstruction_loss = 0.0
            val_kl_loss = 0.0
            for i in range(int(Xtest.shape[0] / batch_size)):
                st = i * batch_size
                ed = (i + 1) * batch_size
                loss, reconstruction_loss, kl_loss = sess.run(
                    [obj, vae.loss['reconstruction'], vae.loss['KL_loss']],
                    feed_dict={
                        vae.x: Xtest[st:ed, :],
                        vae.y: Xtest[st:ed, :],
                        vae.is_train: False
                    })
                val_loss += loss
                val_reconstruction_loss += reconstruction_loss
                val_kl_loss += kl_loss
                pval.value += 1
                pval.description = "Testing %s/%s" % (pval.value, pval.value)
            val_loss = val_loss / pval.value
            val_reconstruction_loss = val_reconstruction_loss / pval.value
            val_kl_loss = val_kl_loss / pval.value

            # plot
            # if epoch_counter%10 == 0:
            #     Xplot = sess.run(vae.output,
            #             feed_dict={vae.x: Xtest[:,:],
            #                         vae.y: Xtest[:,:],
            #                         vae.is_train: False})
            #     for i, fname in enumerate(track):
            #         imageio.imwrite(os.path.join(FLAG.save_dir,os.path.basename(fname)+"_pred_"+str(epoch_counter)+".png"), saveimg)
            #         print(os.path.join(FLAG.save_dir,os.path.basename(fname)+"_pred_"+str(epoch_counter)+".png"))

            # early stopping check
            if (current_best_val_loss - val_loss) > min_delta:
                current_best_val_loss = val_loss
                patience_counter = 0
                saver.save(sess, checkpoint_path, global_step=epoch_counter)
                print("save in %s" % checkpoint_path)
            else:
                patience_counter += 1

            # shuffle Xtrain and Ytrain in the next epoch
            idx = np.random.permutation(Xtrain.shape[0])
            Xtrain = Xtrain[idx, :, :, :]

            # epoch end
            epoch_counter += 1

            ptrain.value = 0
            pval.value = 0
            bar_train.finish()
            bar_val.finish()

            print(
                "Epoch %s (%s), %s sec >> train loss: %.4f, train recon loss: %.4f, train kl loss: %.4f, val loss: %.4f, val recon loss: %.4f, val kl loss: %.4f"
                % (epoch_counter, patience_counter,
                   round(time.time() - stime, 2), train_loss,
                   train_reconstruction_loss, train_kl_loss, val_loss,
                   val_reconstruction_loss, val_kl_loss))

        # para_dict = sess.run(vgg16.para_dict)
        # np.save(os.path.join(FLAG.save_dir, "para_dict.npy"), para_dict)
        # print("save in %s" % os.path.join(FLAG.save_dir, "para_dict.npy"))

        FLAG.optimizer = opt_type
        FLAG.lr = start_learning_rate
        FLAG.batch_size = batch_size
        FLAG.epoch_end = epoch_counter
        FLAG.val_loss = current_best_val_loss

        header = ''
        row = ''
        for key in sorted(vars(FLAG)):
            if header is '':
                header = key
                row = str(getattr(FLAG, key))
            else:
                header += "," + key
                row += "," + str(getattr(FLAG, key))
        row += "\n"
        if os.path.exists("/home/cmchang/DLCV2018SPRING/hw4/model.csv"):
            with open("/home/cmchang/DLCV2018SPRING/hw4/model.csv",
                      "a") as myfile:
                myfile.write(row)
        else:
            with open("/home/cmchang/DLCV2018SPRING/hw4/model.csv",
                      "w") as myfile:
                myfile.write(header)
                myfile.write(row)
Exemplo n.º 9
0
def train(FLAG):
    print("Reading dataset...")
    if FLAG.dataset == 'CIFAR-10':
        train_data = CIFAR10(train=True)
        test_data = CIFAR10(train=False)
    elif FLAG.dataset == 'CIFAR-100':
        train_data = CIFAR100(train=True)
        test_data = CIFAR100(train=False)
    else:
        raise ValueError("dataset should be either CIFAR-10 or CIFAR-100.")

    Xtrain, Ytrain = train_data.train_data, train_data.train_labels
    Xtest, Ytest = test_data.test_data, test_data.test_labels

    print("Build VGG16 models...")
    vgg16 = VGG16(FLAG.init_from, prof_type=FLAG.prof_type)

    # build model using  dp
    dp = [(i + 1) * 0.05 for i in range(1, 20)]
    vgg16.build(dp=dp)

    # define tasks
    tasks = ['100', '50']
    print(tasks)

    saver = tf.train.Saver(tf.global_variables(), max_to_keep=len(tasks))

    checkpoint_path = os.path.join(FLAG.save_dir, 'model.ckpt')

    tvars_trainable = tf.trainable_variables()
    for rm in vgg16.gamma_var:
        tvars_trainable.remove(rm)
        print('%s is not trainable.' % rm)

    # useful function
    def initialize_uninitialized(sess):
        global_vars = tf.global_variables()
        is_not_initialized = sess.run(
            [tf.is_variable_initialized(var) for var in global_vars])
        not_initialized_vars = [
            v for (v, f) in zip(global_vars, is_not_initialized) if not f
        ]
        if len(not_initialized_vars):
            sess.run(tf.variables_initializer(not_initialized_vars))

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        # hyper parameters
        learning_rate = 2e-4
        batch_size = 32
        alpha = 0.5
        early_stop_patience = 4
        min_delta = 0.0001

        # optimizer
        # opt = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9)
        opt = tf.train.AdamOptimizer(learning_rate=learning_rate)
        # recorder
        epoch_counter = 0

        # tensorboard writer
        writer = tf.summary.FileWriter(FLAG.log_dir, sess.graph)

        # progress bar
        ptrain = IntProgress()
        pval = IntProgress()
        display(ptrain)
        display(pval)
        ptrain.max = int(Xtrain.shape[0] / batch_size)
        pval.max = int(Xtest.shape[0] / batch_size)

        # initial task
        obj = vgg16.loss_dict[tasks[0]]

        while (len(tasks)):

            # acquire a new task
            cur_task = tasks[0]
            tasks = tasks[1:]
            new_obj = vgg16.loss_dict[cur_task]

            # just finished a task
            if epoch_counter > 0:
                # save models
                saver.save(sess, checkpoint_path, global_step=epoch_counter)

                # task-wise loss aggregation
                # obj = tf.add(tf.multiply(1-alpha,obj), tf.multiply(alpha,new_obj))
                obj = tf.add(obj, new_obj)
            # optimizer
            train_op = opt.minimize(obj, var_list=tvars_trainable)

            # re-initialize
            initialize_uninitialized(sess)

            # reset due to adding a new task
            patience_counter = 0
            current_best_val_loss = 100000  # a large number

            # optimize when the aggregated obj
            while (patience_counter < early_stop_patience):
                stime = time.time()
                bar_train = Bar(
                    'Training',
                    max=int(Xtrain.shape[0] / batch_size),
                    suffix='%(index)d/%(max)d - %(percent).1f%% - %(eta)ds')
                bar_val = Bar(
                    'Validation',
                    max=int(Xtest.shape[0] / batch_size),
                    suffix='%(index)d/%(max)d - %(percent).1f%% - %(eta)ds')

                # training an epoch
                for i in range(int(Xtrain.shape[0] / batch_size)):
                    st = i * batch_size
                    ed = (i + 1) * batch_size
                    sess.run([train_op],
                             feed_dict={
                                 vgg16.x: Xtrain[st:ed, :, :, :],
                                 vgg16.y: Ytrain[st:ed, :]
                             })
                    ptrain.value += 1
                    ptrain.description = "Training %s/%s" % (i, ptrain.max)
                    bar_train.next()

                # validation
                val_loss = 0
                val_accu = 0
                for i in range(int(Xtest.shape[0] / 200)):
                    st = i * 200
                    ed = (i + 1) * 200
                    loss, accu, epoch_summary = sess.run(
                        [obj, vgg16.accu_dict[cur_task], vgg16.summary_op],
                        feed_dict={
                            vgg16.x: Xtest[st:ed, :],
                            vgg16.y: Ytest[st:ed, :]
                        })
                    val_loss += loss
                    val_accu += accu
                    pval.value += 1
                    pval.description = "Testing %s/%s" % (i, pval.value)
                val_loss = val_loss / pval.value
                val_accu = val_accu / pval.value

                # early stopping check
                if (current_best_val_loss - val_loss) > min_delta:
                    current_best_val_loss = val_loss
                    patience_counter = 0
                else:
                    patience_counter += 1

                # shuffle Xtrain and Ytrain in the next epoch
                idx = np.random.permutation(Xtrain.shape[0])
                Xtrain, Ytrain = Xtrain[idx, :, :, :], Ytrain[idx, :]

                # epoch end
                writer.add_summary(epoch_summary, epoch_counter)
                epoch_counter += 1

                ptrain.value = 0
                pval.value = 0
                bar_train.finish()
                bar_val.finish()

                print(
                    "Epoch %s (%s), %s sec >> obj loss: %.4f, task at %s: %.4f"
                    % (epoch_counter, patience_counter,
                       round(time.time() - stime,
                             2), val_loss, cur_task, val_accu))
        saver.save(sess, checkpoint_path, global_step=epoch_counter)

        writer.close()
Exemplo n.º 10
0
    progress_bar(mydirs, mypath, extension, dff)


# 2015-2016
mypath = "/Users/Tim/Desktop/scor_test/Data/2015-2016/"
mydirs = [f for f in listdir(mypath) if isdir(join(mypath, f))]
mydirs.remove("Demographics")
mydirs.remove("Dietary")
df9 = pd.read_csv(
    "/Users/Tim/Desktop/scor_test/Data/2015-2016/Demographics/DEMO_I.csv",
    encoding='utf8')
extension = 'csv'

# Initialize a progess bar
progress = IntProgress()
progress.max = len(mydirs)
progress.description = '(Init)'
display(progress)
for mydir in mydirs:
    os.chdir(mypath + mydir)
    all_filenames = [i for i in glob.glob('*.{}'.format(extension))]
    for file in all_filenames:
        df_temp = pd.read_csv(file, encoding='utf8')
        try:
            df9 = pd.merge(df9, df_temp, how='outer', on='SEQN')
        except:
            print(mypath + mydir + '/' + file)
    progress.value += 1
    progress.description = mydir
progress.description = '(Done)'