Exemplo n.º 1
0
def split_dataset(dataset, train_percent=None):
    ''' Splits the dataset into train and test '''

    if not train_percent or int(train_percent) > 100:
        print("Train percent Invalid, using default")
        train_percent = 80

    # Shuffle / Randamize the indecies
    data_indecies = [i for i in rage(dataset.num_records)]
    shuffled_indecies = np.shuffe(data_indecies)

    # How many traininig data we need? 
    num_train_records = int(train_percent) * dataset.num_records // 100

    # Init train and test 
    train_text, train_labels = [], []
    test_text, test_labels = [], []

    for index in shuffled_indecies:
        if index < num_train_records:
            train_labels.append(dataset.labels[index])
            train_text.append(dataset.text[index])
        else:
            test_labels.append(dataset.labels[index])
            test_text.append(dataset.text[index])
    
    train_dataset = DataSet(None, train_text, train_labels, dataset.isVectorized)
    test_dataset  = DataSet(None, test_text, test_labels, dataset.isVectorized)

    return train_dataset, test_dataset
Exemplo n.º 2
0
 def get_dataset(self, is_train=True):
     ld = LoadedData()
     if is_train:
         ld.load_data()
         ld.label_normalize()
     else:
         # load source speaker data
         ld.load_data(filename=config.src_data_dir,
                      test_set_size=config.src_test_size,
                      vali_set_size=config.src_vali_size)
     # sort the data
     # ld.sort_data()
     ld.print_info()
     dataset = DataSet(ld)
     self.ds = dataset
     self.loaded_data = ld
     self.train_dataset_iter = dataset.train_iterator
     self.vali_dataset_iter = dataset.vali_iterator
     self.test_dataset_iter = dataset.test_iterator
     self.dataset_iter = tf.data.Iterator.from_string_handle(
         self.dataset_handle, dataset.train_set.output_types,
         dataset.train_set.output_shapes)
     with tf.name_scope('batch_data'):
         self.batch_features, \
         self.batch_labels, \
         self.batch_lengths, \
         self.batch_uttids = self.dataset_iter.get_next()
Exemplo n.º 3
0
    def load(self, dataPath, numTrain, numValid, numTest):
        """Load the data."""
        print("Loading data from " + dataPath + "...")

        data = np.genfromtxt(dataPath, delimiter=",", dtype="uint8")

        # The last numTest instances ALWAYS comprise the test set.
        train, test = data[:numTrain + numValid], data[numTrain + numValid:]
        shuffle(train)

        train, valid = train[:numTrain], train[numTrain:]

        self.trainingSet = DataSet(train)
        self.validationSet = DataSet(valid)
        self.testSet = DataSet(test)

        print("Data loaded.")
def split_dataset(dataset, ratio=None):
    size = dataset.size
    if ratio is None:
        ratio = _choose_optimal_train_ratio(size)

    mask = np.zeros(size, dtype=np.bool_)
    train_size = int(size * ratio)
    mask[:train_size] = True
    np.random.shuffle(mask)

    train_x = dataset.x[mask, :]
    train_y = dataset.y[mask]

    mask = np.invert(mask)
    test_x = dataset.x[mask, :]
    test_y = dataset.y[mask]

    return DataSet(train_x, train_y), DataSet(test_x, test_y)
 def get_data_set(self, name, directory='/data_sets'):
     data_set = self.get_saved_data_set(name)[0]
     if not data_set:
         if not self.reset:
             print('Could not find dataset. Creating new one')
             self.captains_log.error(
                 'Could not find saved dataset. Creating new one')
         data = DataSet(name, self.save, directory)
         for message in data.set_up():
             self._log_message(message[0], message[1])
         if self.save:
             self.save_data_set(data)
         return data
     else:
         if self.reset:
             data = DataSet(name, self.save, directory)
             for message in data.set_up():
                 self._log_message(message[0], message[1])
             if self.save:
                 self.save_data_set(data)
             return data
         return data_set
Exemplo n.º 6
0
def create_data_set(key_items, value_items, name=""):
    data_items = []
    for key_counter in range(0, len(key_items)):
        key = key_items[key_counter]
        if key_counter < len(value_items):
            value = value_items[key_counter]
        else:
            value = None
        data_items.append(DataItem(key, value))

    ds = DataSet(name)
    ds.add_data_items(data_items)
    return ds
Exemplo n.º 7
0
def main():
    from data_set import DataSet
    from collections import namedtuple
    hps = {
        'encode_step': 5,  # 历史数据个数
        'train_data_num': 100000,  # 训练集个数
        }
    
    hps = namedtuple("HParams", hps.keys())(**hps)
    
    data_set = DataSet(hps)
    obs = Observations(0, 0, 0, 0)
    print(obs.values(data_set.history_data, hps.encode_step).shape)
    return
def to_dataset(df, k, target_column, with_bias):
    df = df[1:].reset_index(drop=True)
    df = df.drop(['date'], axis=1)
    target = df[target_column]

    n, cols = df.shape
    windows_num = n - k  # effective window size, including the label, is k + 1

    x = np.empty([windows_num, k * cols + int(with_bias)])
    y = np.empty([windows_num])

    for i in xrange(windows_num):
        window = df[i:i + k]
        row = window.as_matrix().reshape((-1, ))
        if with_bias:
            row = np.insert(row, 0, 1)
        x[i] = row
        y[i] = target[i + k]

    debug('data set: x=%s y=%s' % (x.shape, y.shape))
    return DataSet(x, y)
Exemplo n.º 9
0
def main():
    hps = get_hps()
    data_set = DataSet(hps)
    env = Env(hps, data_set)
    model = Model(hps, env.observations_dim, env.actions_dim)
    obs = env.reset()
    data_set.add_data(obs, 0, 0)

    data_size = hps.train_data_num
    for i in range(data_size):
        print('\r{}/{}'.format(i, data_size), end='')
        obs, reward, _ = env.step(obs, Actions([0.3, 0.3, 0.4]))
        data_set.add_data(obs, 0, 0)

    n = hps.train_iter
    for i in range(n):
        print('\n\n{}/{}'.format(i, n))
        model.price_train(1, data_set)
        if i % hps.eval_interval == 0:
            print('-'*50)
            model.price_test(1, data_set)
            print('-'*50)
        
    return
Exemplo n.º 10
0
SIC_PATH = 'sic_day_GFDL-CM3_historical*'
SIT_PATH = 'sit_day_GFDL-CM3_historical*'
TAS_PATH = 'tas_3hr_GFDL-CM3_historical_r1i1p1_*.nc'
CLT_PATH = 'tcdc.eatm.gauss.19[89]*.nc'

BEGIN_DATE = datetime(1979, 1, 1, 0, 0, 0)
NUM_YEARS = 20

DELTA_T = 150

if __name__ == '__main__':
    print('Creating DataSet')
    data_set = DataSet(
        sic_path=SIC_PATH,
        sit_path=SIT_PATH,
        tas_path=TAS_PATH,
        clt_path=CLT_PATH,
        sic_scale=.01,
        clt_scale=.01,
    )
    print('Getting Albedos')
    albedos = Albedos()
    year = dateutil.relativedelta.relativedelta(years=1)
    rad_start_dates = [BEGIN_DATE + year * n for n in range(NUM_YEARS)]
    forcings = []
    for rad_start_date in rad_start_dates:
        forcing = get_radiative_forcing(
            start_date=rad_start_date,
            delta_t=DELTA_T,
            data_set=data_set,
            albedos=albedos,
        )
Exemplo n.º 11
0
                              self.input_x: xs,
                              self.label_y: labels,
                              self.diag_x: diags,
                              self.keep_prob: 0.4
                          })

    def predict(self, dataset):
        return self.sess.run(self.predict_y_label,
                             feed_dict={
                                 self.input_x: dataset.xs,
                                 self.diag_x: dataset.diags,
                                 self.label_y: dataset.labels,
                                 self.keep_prob: 0.4
                             })


if __name__ == "__main__":
    for i in range(10):
        print("start with dataset: ", i)
        net = NaiveNet()
        test_set = DataSet(i, prefix="test")
        net.train(1000, DataSet(i), test_set)
        accuracies.append(net.best_accu)
        precisions.append(net.best_precision)
        recalls.append(net.best_recall)
        F1s.append(net.best_f1)
    print("Final Average Accuracy: ", np.average(accuracies))
    print("Final Average Precisions: ", np.average(precisions))
    print("Final Average Recalls: ", np.average(recalls))
    print("Final Average F1s: ", np.average(F1s))
Exemplo n.º 12
0
def aggregate_demo(args):
    """
    python3 aggregate_demo.py pong --range-start=0 --range-end=5
    """

    if args.demo_memory_folder is not None:
        demo_memory_folder = args.demo_memory_folder
    else:
        demo_memory_folder = "{}_demo_samples".format(args.env)

    game_state = game.GameState(game=args.env)
    D = DataSet(args.resized_height, args.resized_width, RandomState(),
                args.replay_memory, args.phi_len, game_state.n_actions)

    data_file = '{}-dqn.pkl'.format(args.env)
    img_file = '{}-dqn-images.h5'.format(args.env)
    for index in range(args.range_start, args.range_end):
        print("Demonstration sample #{num:03d}".format(num=index + 1))
        try:
            data = pickle.load(
                open(
                    demo_memory_folder + '/{0:03d}/'.format(index + 1) +
                    data_file, 'rb'))
        except:
            print("Check demo folder if it exist!")
            return
        actions = data['D.actions']
        rewards = data['D.rewards']
        terminal = data['D.terminal']

        imgs = get_compressed_images(demo_memory_folder +
                                     '/{0:03d}/'.format(index + 1) + img_file +
                                     '.gz')
        print("\tMemory size: {}".format(data['D.size']))
        for mem_index in range(data['D.size']):
            D.add_sample(imgs[mem_index], actions[mem_index],
                         rewards[mem_index], terminal[mem_index])
        # h5file.close()
        print("\tTotal Memory size: {}".format(D.size))

    D.resize()
    D.create_validation_set(percent=args.validation_set_percent)

    data = {
        'D.width': D.width,
        'D.height': D.height,
        'D.max_steps': D.max_steps,
        'D.phi_length': D.phi_length,
        'D.num_actions': D.num_actions,
        'D.actions': D.actions,
        'D.rewards': D.rewards,
        'D.terminal': D.terminal,
        'D.bottom': D.bottom,
        'D.top': D.top,
        'D.size': D.size,
        'D.validation_set_markers': D.validation_set_markers,
        'D.validation_indices': D.validation_indices,
        'epsilon': args.init_epsilon,
        't': 0
    }
    images = D.imgs

    pickle.dump(
        data, open(demo_memory_folder + '/' + args.env + '-dqn-all.pkl', 'wb'),
        pickle.HIGHEST_PROTOCOL)
    print("Saving and compressing replay memory...")
    save_compressed_images(
        demo_memory_folder + '/' + args.env + '-dqn-images-all.h5', images)
    print("Saved and compressed replay memory")
Exemplo n.º 13
0
 def to_dataset(self, data):
     ds = DataSet(data, self.header)
     ds = ds.split(self.friend_index)
     return ds.X, ds.Y
Exemplo n.º 14
0
def get_demo(args):
    """
    Human:
    python3 run_experiment.py pong --demo-time-limit=5 --collect-demo --demo-type=0 --file-num=1

    Random:
    python3 run_experiment.py pong --demo-time-limit=5 --collect-demo --demo-type=1 --file-num=1

    Model:
    python3 run_experiment.py pong --demo-time-limit=5 --collect-demo --demo-type=2 --file-num=1
    python3 run_experiment.py pong --demo-time-limit=5 --collect-demo --demo-type=2 --model-folder=pong_networks_rms_1 --file-num=1
    """
    if args.demo_type == 2:
        os.environ['CUDA_VISIBLE_DEVICES'] = ''
        import tensorflow as tf
        from dqn_net import DqnNet
    from collect_demo import CollectDemonstration

    if args.folder is not None:
        folder = '{}_{}'.format(args.env, args.folder)
    else:
        folder = '{}_demo_samples'.format(args.env)
        if args.demo_type == 1:
            folder = '{}_demo_samples_random'.format(args.env)
        elif args.demo_type == 2:
            folder = '{}_demo_samples_model'.format(args.env)

    game_state = game.GameState(
        human_demo=True if args.demo_type==0 else False,
        frame_skip=1, game=args.env)
    if False: # Deterministic
        rng = RandomState(123456)
    else:
        rng = RandomState()
    D = DataSet(
        args.resized_height, args.resized_width,
        rng, (args.demo_time_limit * 5000),
        args.phi_len, game_state.n_actions)

    model_net = None
    if args.demo_type == 2: # From model
        if args.model_folder is not None:
            model_folder = args.model_folder
        else:
            model_folder = '{}_networks_{}'.format(args.env, args.optimizer.lower())
        sess = tf.Session()
        with tf.device('/cpu:0'):
            model_net = DqnNet(
                sess, args.resized_height, args.resized_width, args.phi_len,
                game_state.n_actions, args.env, gamma=args.gamma, copy_interval=args.c_freq,
                optimizer=args.optimizer, learning_rate=args.lr,
                epsilon=args.epsilon, decay=args.decay, momentum=args.momentum,
                verbose=args.verbose, path=None, folder=None,
                slow=args.use_slow, tau=args.tau)
            model_net.load(folder=model_folder)

    collect_demo = CollectDemonstration(
        game_state, args.resized_height, args.resized_width, args.phi_len,
        args.env, D, terminate_loss_of_life=args.terminate_life_loss,
        folder=folder, sample_num=args.file_num
    )
    collect_demo.run(
        minutes_limit=args.demo_time_limit,
        demo_type=args.demo_type,
        model_net=model_net)
Exemplo n.º 15
0
 def setUpClass(self):
     self.dataset = DataSet()
     self.dataset.load(100)
Exemplo n.º 16
0
	network=DeepQLearner(config.STATE_SIZE,
                        config.ACTION_SIZE,
                        config.PHI_LENGTH,
                        config.BATCH_SIZE,
                        config.DISCOUNT,
                        config.RHO,
                        config.MOMENTUM,
                        config.LEARNING_RATE,
                        config.RMS_EPSILON,
                        config.RNG,
                        config.UPDATE_RULE,
                        config.BATCH_ACCUMULATOR,
                        config.FREEZE_INTERVAL)
    # Initialize DataSet
	dataSet=DataSet(config.STATE_SIZE,
                    config.REPLAY_MEMORY_SIZE,
                    config.PHI_LENGTH,
                    config.RNG)

	eC=environmentControl(config.PATH_ROBOT, 
    					  config.PATH_GOAL,
    					  config.PATH_LAUNCHFILE)
	#eC.spawn(config.ROBOT_NAME)
	eC.spawnGoal()
	eC.setRandomModelState(config.ROBOT_NAME)
	eC.setRandomModelState('goal')
	#eC.pause()

	dP=dataProcessor(eC, 
					 config.ROBOT_NAME,
					 config.UPDATES_PER_STEP,
					 config.PHI_LENGTH,
        probability = float(E**epsilon) / float(1 + (E**epsilon))

        for i in range(run_times):
            result.append(self.do_randomize(dataset, probability))

        return result

    def get_qD(self, D):
        count = 0
        for i in D.records:
            if i[4] > 13 and i[14] == '<=50K':
                count += 1
        return float(count) / float(len(D.records))

    def compute_accuacy(self, D, N=100, e=0.5, beta=0.05):
        p = (1 + math.exp(e)) / (math.exp(e) - 1)
        q = 1 / (1 + math.exp(e))
        alpha = p * math.sqrt(math.log(2 / beta) / (2 * N))

        qD = self.get_qD(D)
        data_list = self.do_randomized_mechenism(D, N, e)
        errors = [(p * (d - q)) - qD for d in data_list]

        return errors, alpha


D0 = DataSet()
D0.create_from_csv('./adult.csv')
rr = RandomizedResponse()
errors, alpha = rr.compute_accuacy(D0)
Exemplo n.º 18
0
common = {
    'dev': './data/atis.pkl.dev',
    'test': './data/atis.pkl.test',
    'slot': './data/atis.pkl.slot',
}

if __name__ == '__main__':

    config = config_plain
    # experiments = experiments[5:6]

    if not os.path.exists('./out'):
        os.mkdir('./out')

    # for vocab size
    DataSet('./data/atis.pkl.slot', './data/atis.pkl.train')
    DataSet('./data/atis.pos.slot', './data/atis.pos.train')

    slot = common['slot']
    validation_set = DataSet(slot, common['dev'])
    test_set = DataSet(slot, common['test'])

    print('# Experiments (%d)' % len(experiments))
    print('# validation_set (%d)' % validation_set.size())
    print('# test_set (%d)' % test_set.size())

    pos_model = None
    if 'pos_model' in config:
        pos_set = DataSet('./data/atis.pos.slot', './data/atis.pos.train')
        print('# Pre-training')
        print('# POS training set (%d)' % pos_set.size())
Exemplo n.º 19
0
def update_model(super_params, url, id, flag, model_name, start_index):
    ckpt = tf.train.get_checkpoint_state('./' + model_name + '/')
    out_length = 0
    if ckpt:
        tf.train.import_meta_graph(ckpt.model_checkpoint_path +'.meta')
        graph = tf.get_default_graph()
        out_length = graph.get_tensor_by_name("fc3/out:0").shape[1]
    # 每次训练重置Graph
    tf.reset_default_graph()
    log = []
    y_ = tf.placeholder(tf.float32, shape=[None, super_params['out_length']], name="y_")
    keep_prob = tf.placeholder(tf.float32, name='keep_prob')
    out = define(super_params['out_length'], keep_prob)
    images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
    phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
    # 计算训练数据的正确率
    correct_prediction = tf.equal(tf.argmax(out, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy')

    loss_temp = tf.losses.softmax_cross_entropy(onehot_labels=y_, logits=out)

    # 计算平均损失值
    cross_entropy_loss = tf.reduce_mean(loss_temp, name='cross_entropy_loss')
    # 反向传播调整参数
    train_step = tf.train.AdamOptimizer(learning_rate=0.001,
                                        beta1=0.9,
                                        beta2=0.999,
                                        epsilon=1e-08).minimize(cross_entropy_loss)

    saver = tf.train.Saver(max_to_keep=2)
    train_set = DataSet(super_params['train_set_path'],
                        1,
                        (super_params['input_width'], super_params['input_height'], 3),
                        super_params['batch_size'])
    test_set = DataSet(super_params['test_set_path'],
                       1,
                       (super_params['input_width'], super_params['input_height'], 3),
                       super_params['batch_size'])
    tf.add_to_collection("predict", out)
    with tf.Session() as sess:
        writer = tf.summary.FileWriter('logs/2', sess.graph) #将训练日志写入到logs文件夹下
        train_accuracy_scalar = tf.summary.scalar('train_accuracy', accuracy)
        train_loss_scalar = tf.summary.scalar('train_loss', cross_entropy_loss)
        ckpt = tf.train.get_checkpoint_state('./' + model_name + '/')
        sess.run(tf.global_variables_initializer())
        if out_length != super_params['out_length']:
            loader = tf.train.Saver(
                var_list=[var for var in tf.trainable_variables() if not var.name.startswith("fc3")],
                max_to_keep=2)
        else:
            loader = tf.train.Saver(var_list=[var for var in tf.trainable_variables()],
                                    max_to_keep=2)
        print(ckpt)
        if ckpt:
            if os.path.exists(ckpt.model_checkpoint_path + '.meta'):
                print("restore")
                loader.restore(sess, ckpt.model_checkpoint_path)
                print('restored')

        for epoch in range(super_params['epoch']):
            train_set.reset()
            test_set.reset()
            step = 0
            train_loss = 0
            train_accuracy = 0
            while train_set.is_end():
                input_x, input_y, _ = train_set.next_bath()
                input_y = input_y.astype(int)
                input_y = np.eye(super_params['out_length'])[input_y]
                feed_dict = {images_placeholder: input_x,
                             y_: input_y,
                             keep_prob: super_params['keep_prob'],
                             phase_train_placeholder: False}
                train_accuracy = accuracy.eval(feed_dict=feed_dict)
                train_loss = cross_entropy_loss.eval(feed_dict=feed_dict)
                train_step.run(feed_dict=feed_dict)
                step_info = "epoch:{} step:{} loss: {:.5f} train_accuracy:{:.5f}".format(epoch,
                                                                                         step,
                                                                                         train_loss,
                                                                                         train_accuracy)
                step += 1
                print(step_info)
                log.append(step_info)
                if flag:
                    status_handler.handleTrainStep(url, id, step_info)
                accuracy_scalar, loss_scalar = sess.run([train_accuracy_scalar, train_loss_scalar],
                                                    feed_dict=feed_dict)
                writer.add_summary(accuracy_scalar, epoch)
                writer.add_summary(loss_scalar, epoch)

            if epoch % 5 == 0:
                total_accuracy = 0
                total_loss = 0
                test_step = 0
                while test_set.is_end():
                    test_x, test_y, _ = test_set.next_bath()
                    test_y = test_y.astype(int)
                    test_y = np.eye(super_params['out_length'])[test_y]
                    feed_dict = {images_placeholder: test_x,
                                 y_: test_y,
                                 keep_prob: super_params['keep_prob'],
                                 phase_train_placeholder: False}

                    test_accuracy = accuracy.eval(feed_dict=feed_dict)
                    test_loss = cross_entropy_loss.eval(feed_dict=feed_dict)
                    total_accuracy += test_accuracy
                    total_loss += test_loss
                    test_step += 1
                test_info = "TEST: epoch:{} loss: {:.5f} test_accuracy:{:.5f}".format(epoch,
                                                                                      total_loss / test_step,
                                                                                      total_accuracy / test_step)
                log.append(test_info)
                print(test_info)
                saver.save(sess, './' + model_name + '/my-model', global_step=epoch)
                if ((total_loss / test_step) < 0.001) & ((total_accuracy / test_step) > 0.99):
                    break
        saver.save(sess, './' + model_name + '/my-model', global_step=epoch)
        write_log(log, './' + model_name + '/log.txt')
    return log
Exemplo n.º 20
0
    # if TEST_LIM > 0:
    #     X_test = X_test[0:TEST_LIM]
    #     y_test = y_test[0:TEST_LIM]

    print('X_train.shape: %s' % str(X_train.shape))
    print('y_train.shape: %s' % str(y_train.shape))
    print('X_val.shape: %s' % str(X_val.shape))
    print('y_val.shape: %s' % str(y_val.shape))
    # print('X_test.shape: %s' % str(X_test.shape))
    # print('y_test.shape: %s' % str(y_test.shape))

    assert X_train.shape[0] == y_train.shape[0]
    assert X_val.shape[0] == y_val.shape[0]
    # assert X_test.shape[0] == y_test.shape[0]

    data_train_ = DataSet(X=X_train, y=y_train, batch_size=BATCH_SIZE_INT)
    data_val_ = DataSet(X=X_val, y=y_val, batch_size=BATCH_SIZE_INT)
    # data_test_ = DataSet(X=X_test, y=y_test, batch_size=BATCH_SIZE_INT)

    if TRAIN:
        history, best_train_acc, best_val_acc = \
            vgg.train(data_train=data_train_, data_val=data_val_,
                  save_path=SAVE_PATH,
                  weights_path=WEIGHTS_PATH,
                  restore_path=RESTORE_PATH,
                  save_summaries_every=SAVE_SUMMARIES_EVERY,
                  display_every=DISPLAY_EVERY,
                  display=DISPLAY,
                  nb_to_display=NB_TO_DISPLAY,
                  nb_epochs=NB_EPOCHS,
                  save_best_only=SAVE_BEST_ONLY)
    axarr[0].legend([alpha_line_lp], ['alpha = {:.6f}'.format(laplace_alpha)])
    axarr[0].plot(laplace_errors, 'go')

    axarr[1].set_title('Randomized Response')
    axarr[1].axhline(0, color='g')
    alpha_line_rr = axarr[1].axhline(rr_alpha, color='r')
    axarr[1].axhline(-rr_alpha, color='r')
    axarr[1].set_xlabel('Nth run')
    axarr[1].set_ylabel('Error')
    axarr[1].legend([alpha_line_rr], ['alpha = ' + '{:.6f}'.format(rr_alpha)])
    axarr[1].plot(rr_errors, 'go')

    plt.title('BETA = ' + str(beta) + 'Epsilon = ' + str(e) + 'N = ' + str(N))

    plt.show()


D0 = DataSet()
D0.create_from_csv('adult.csv')
D1 = DataSet()
D1.copy_from_dataset(D0)
D1.records.pop()  # eliminate one element

laplace = Laplace()
D0_histagram_data, qD0 = laplace.do_mechanism(D0, 1000)
D1_histagram_data, qD1 = laplace.do_mechanism(D1, 1000)

# parameters are all laplace's parameter
draw_privacy_loss(D0_histagram_data, D1_histagram_data, 1000, e=0.5)
draw_accuracy(D0_histagram_data, qD0)
Exemplo n.º 22
0
if __name__ == "__main__":
    if len(sys.argv) != 6:
        print("\nUsage: python3 program_name data_path class_col_name test_size prune_size runs")
        print("\ntest_size and prune_size are in range (0, 1)")
        print("\ndata_set is divided into temp_set and test_set")
        print("temp_set is then divided into training_set and prune_set\n")
        sys.exit()

    data_path = sys.argv[1]
    class_name = sys.argv[2]
    test_size = float(sys.argv[3])
    prune_size = float(sys.argv[4])
    runs = int(sys.argv[5])

    data = DataSet(data_path, class_name)
    class_range = len(id3.get_attr_values(data.data_set)[class_name])
    results_path = f'./benchmarks/t{sys.argv[3]}_p{sys.argv[4]}.csv'

    print("\ndata set: " + data_path)
    print("set size:          " + str(len(data.data_set.index)))
    print("------------------ ")
    print("training set size: " + str(len(data.train_set.index)))
    print("prune set size:    " + str(len(data.prune_set.index)))
    print("test set size:     " + str(len(data.test_set.index)))

    with open(results_path, 'w') as file:
        for i in range(runs):
            data.resplit_dataset(test_size=test_size, prune_size=prune_size)
            id3_tree = id3.build_id3(data.train_set, data.data_set)
            c45_tree = id3.build_c45(data.prune_set, data.data_set, id3_tree)
Exemplo n.º 23
0
    # Set up axes
    ax.set_xticklabels([''] + input_sentence.split(' ') + ['<EOS>'],
                       rotation=90)
    ax.set_yticklabels([''] + output_words)

    # Show label at every tick
    ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
    ax.yaxis.set_major_locator(ticker.MultipleLocator(1))

    plt.show()


if __name__ == "__main__":

    # train()
    data_set = DataSet(opt.filename, opt.max_len, opt.min_count, device)

    model = build_model(data_set.english_vocab, data_set.french_vocab)
    # evaluate()

    # optim
    optimizer = build_optimizer(model)

    # loss function
    criterion = nn.NLLLoss(ignore_index=PAD_id, reduction='elementwise_mean')

    # Loading checkpoint
    checkpoint = None
    if opt.checkpoint:
        checkpoint = load_checkpoint(opt.checkpoint)
        model.load_state_dict(checkpoint['state_dict'])
Exemplo n.º 24
0
def main(path):
    settings = Settings(path)
    # mask seg set net los vis
    if 'mask' in settings.stages:
        # need v,h raw and frames
        square = 'square' in settings.flags
        max_seg = settings.max_seg
        print(square)
        print(max_seg)
        v, h = data_io.read_from_file(settings.files['raw'], 'raw')
        mask = segment.vert_horiz_seg(v[:, settings.frames, :],
                                      h[:, settings.frames, :],
                                      square=square,
                                      max_seg=max_seg)
        data_io.save_to(mask, settings.files['mask'], 'mask')

    if 'seg' in settings.stages:
        mask = data_io.read_from_file(settings.files['mask'], 'mask')
        v, h = data_io.read_from_file(settings.files['raw'], 'raw')
        seg_v = segment.divide_data_to_segments(mask, v[:, settings.frames, :])
        seg_h = segment.divide_data_to_segments(mask, h[:, settings.frames, :])
        data_io.save_to([seg_v, seg_h], settings.files['seg'], 'seg')

    if 'set' in settings.stages:
        [seg_v, seg_h] = data_io.read_from_file(settings.files['seg'], 'seg')
        cv = 'cv' in settings.flags
        normalize = 'norm' in settings.flags
        sizes = settings.sizes
        data_sets = create_data_set.get_data(seg_v,
                                             seg_h,
                                             n_new_train=sizes['train'],
                                             normalize=normalize)
        data_io.save_to(data_sets, settings.files['set'], 'set')

    if 'net' in settings.stages:
        cv = 'cv' in settings.flags
        zero_all = 'zero_all' in settings.flags
        value_type = 'acc' if 'acc' in settings.flags else 'loss'
        data_sets = data_io.read_from_file(settings.files['set'], 'set')
        mask = data_io.read_from_file(settings.files['mask'], 'mask')

        tx, ty, vx, vy = data_sets
        D_in = vx.shape[1]

        ty = ty.astype(np.float64)
        vy = vy.astype(np.float64)

        n_data_sets = len(tx)
        n_frames = len(settings.frames)
        mask_nubmers = np.unique(mask)
        n_seg = len(mask_nubmers) - 1 if mask_nubmers[0] == 0 else len(
            mask_nubmers)

        frames_loss_maps = np.zeros([n_data_sets, n_frames])
        seg_loss_maps = np.zeros([n_data_sets, n_seg])
        all_train_losses = []
        all_test_losses = []
        all_acc = []
        for idx, (one_tx, one_ty, one_vx,
                  one_vy) in enumerate(zip(tx, ty, vx, vy)):
            one_train = DataSet(torch.from_numpy(one_tx),
                                torch.from_numpy(one_ty))
            one_test = DataSet(torch.from_numpy(one_vx.reshape([1, -1])),
                               torch.from_numpy(one_vy.reshape([
                                   1,
                               ])))
            mean_t, std_t = one_train.calc_mean_std()
            one_train = one_train.normalize(mean_t, std_t)
            one_test = one_test.normalize(mean_t, std_t)
            print(idx)
            net = dense_net.get_model(D_in)
            training_parameters = run_nn.get_train_params(net)

            net, train_losses, valid_losses, valid_accuracies = run_nn.train(
                net, [one_train, one_test], training_parameters)
            all_acc.append(valid_accuracies[-1])
            if valid_losses[-1] > 0.6:
                print('\n{}\n'.format(idx))
            all_train_losses.append(train_losses)
            all_test_losses.append(valid_losses)
            frames_loss_maps[idx, :] = np.asarray(
                run_nn.run_with_missing_parts(net,
                                              mask,
                                              one_test,
                                              False,
                                              len(settings.frames),
                                              part_type='frames',
                                              zero_all=zero_all,
                                              value_type=value_type))
            seg_loss_maps[idx, :] = run_nn.run_with_missing_parts(
                net,
                mask,
                one_test,
                False,
                len(settings.frames),
                part_type='segments',
                zero_all=zero_all,
                value_type=value_type)

        print('acc: {}'.format(np.mean(np.asarray(all_acc))))
        frame_loss = np.mean(frames_loss_maps, axis=0)
        seg_loss = segment.recreate_image(mask, np.mean(seg_loss_maps, axis=0))
        data_io.save_to(frame_loss, settings.files['vis_frame'], 'vis')
        data_io.save_to(seg_loss, settings.files['vis_seg'], 'vis')
        visualize_res.plot_losses(all_train_losses, all_test_losses, [],
                                  n_data_sets)

    if 'show_vis' in settings.stages:
        zero_all = 'zero_all' in settings.flags
        value_type = 'acc' if 'acc' in settings.flags else 'loss'
        zero_all_str = 'Present' if zero_all else 'Missing'
        value_type_str = 'Accuracy' if value_type == 'acc' else 'Loss'
        title_seg = 'Average {} per {} Segment'.format(value_type_str,
                                                       zero_all_str)
        title_frame = 'Average {} per {} Frame'.format(value_type_str,
                                                       zero_all_str)

        # images = data_io.read_from_file(settings.files['vis_both'], 'vis')
        # visualize_res.plot_spatial(images, settings.frame_groups_string, n_frames=len(images))

        loss_map = data_io.read_from_file(settings.files['vis_frame'], 'vis')
        visualize_res.plot_temporal(
            loss_map, [x + 1 for x in settings.frames],
            title=title_frame,
            ylabel=value_type
        )  # counting starts from 0, so the relevant frames are +1

        image = data_io.read_from_file(settings.files['vis_seg'], 'vis')
        visualize_res.plot_spatial(image, title=title_seg)
def main(_):
  # Import data
  ###mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True)

  print("starting to load data...")
  x2 = pickle.load(open( "T1_GD_all_x_no_normalization.p", "rb" ))
  print("x2 loaded.")
  y2 = pickle.load(open( "T1_GD_all_y_no_normalization.p", "rb" ))
  print("y2 loaded.")
  validate_x2 = pickle.load(open( "T1_GD_validation_x_no_normalization_aggregated_.p", "rb" ))
  print("validate_x2 loaded.")
  validate_y2 = pickle.load(open( "T1_GD_validation_y_no_normalization_aggregated.p", "rb" ))
  print("validate_y2 loaded.")
  validate_x2_nonaggregated = pickle.load(open( "T1_GD_all__validation_x_no_normalization.p", "rb" ))
  print("validate_x2 loaded.")
  validate_y2_nonaggregated = pickle.load(open( "T1_GD_all__validation_y_no_normalization.p", "rb" ))
  print("validate_y2 loaded.")

  number_epochs = sys.argv[0]
  kernal_size = sys.argv[2]


  data_set_all = DataSet(x2,y2, fake_data=False)
  validation_set_all = DataSet(validate_x2_nonaggregated, validate_y2_nonaggregated, fake_data=False)


  # Create the convolutional model
  x = tf.placeholder(tf.float32, [None, 65536])

  # Define loss and optimizer
  y_ = tf.placeholder(tf.float32, [None, 3])

  # Build the graph for the deep net
  # with tf.device('/gpu:2'):
  y_conv, keep_prob, saver = deepnn(x)
  print(keep_prob)


  #plt.imshow(mnist.test.images[0].reshape(28,28))
  #print(type(mnist.test.images))
  #print(mnist.test.images.shape)
  #plt.show()
  cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv))
  train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
  correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
  accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))




  #grads = new_optimizer.compute_gradients(cross_entropy)
  data_points = []
  avg_loss = 0
  total_loss = 0
  avg_validation_loss = 0
  total_validation_loss = 0
  batch_size = sys.argv[1]
  batches_completed = 0
  validation_batches_completed = 0
  config = tf.ConfigProto(allow_soft_placement=True)
  config.gpu_options.allow_growth = True
  output_file= open("T1_GD_validation_loss_file_no_normalization_"+epoch_size+"_epochs_"+kernal_size+"_kernalsize_"+batch_size+"_batchsize.txt","w+")
  
  with tf.Session(config = config) as sess:
    
    sess.run(tf.global_variables_initializer())


    # sess.graph.finalize()

    for i in range(5000):
      batch_x, batch_y = data_set_all.next_batch(batch_size)
      for batch_slice in batch_x:
        batch_slice = numpy.reshape(batch_slice, (256, 256))
        batch_slice = random_alteration(batch_slice)
        batch_slice = numpy.reshape(batch_slice, 65536)


      batches_completed += 1
      loss = sess.run(cross_entropy, feed_dict={x: batch_x, y_: batch_y, keep_prob: 0.5})
      total_loss += loss
      new_avg_loss = total_loss/batches_completed

      if(new_avg_loss>avg_loss and batches_completed != 1):
        avg_loss = new_avg_loss
      # break

      avg_loss = new_avg_loss

      data_points.append(loss)

      if i % 1000 == 0:
        validation_batch_x, validation_batch_y = validation_set_all.next_batch(batch_size)
        validation_batches_completed+=1
        train_accuracy = accuracy.eval(feed_dict={x: validation_batch_x, y_: validation_batch_y, keep_prob: 1.0})
        validation_loss = cross_entropy.eval(feed_dict={x: validation_batch_x, y_: validation_batch_y, keep_prob: 1.0})
        total_validation_loss += validation_loss
        new_avg_validation_loss = total_validation_loss/validation_batches_completed

        if(new_avg_validation_loss>avg_validation_loss and batches_completed!=1):
          avg_validation_loss = new_avg_validation_loss


        avg_validation_loss = new_avg_validation_loss

        output_file.write("Validation loss at i = %d is %g\n" % (i, avg_validation_loss))


        total_times = 0.0
        total_accuracy = 0.0
        prediction=tf.argmax(y_conv,1)
        probabilities=tf.nn.softmax(y_conv)
        probs_array = []
        condensed_y = []

        for j in range(len(validate_x2)):
          #print(test_x2[i])
          #print(test_y2[i])
          temp3 = accuracy.eval(feed_dict={x: validate_x2[j], y_: validate_y2[j], keep_prob: 1.0})
          print('test accuracy %g' % temp3)
          total_accuracy = total_accuracy + temp3
          total_times = total_times+1
          temp4 = prediction.eval(feed_dict={x: validate_x2[j], keep_prob: 1.0}, session=sess)
          print("predictions", temp4)
          probability = probabilities.eval(feed_dict={x: validate_x2[j], keep_prob: 1.0}, session=sess)
          print(probability)
          if j==0:
            probs_array = probability.mean(axis=0)
            condensed_y = validate_y2[j].mean(axis=0)
            continue
          probs_array = numpy.vstack([probs_array, probability.mean(axis=0)])
          condensed_y = numpy.vstack([condensed_y, validate_y2[j].mean(axis=0)])


        fpr = dict()
        tpr = dict()
        roc_auc = dict()

        for j in range(3):
          fpr[j], tpr[j], _ = roc_curve(condensed_y[:, j], probs_array[:, j])
          roc_auc[j] = auc(fpr[j], tpr[j])

          # Compute micro-average ROC curve and ROC area
        fpr["micro"], tpr["micro"], _ = roc_curve(condensed_y.ravel(), probs_array.ravel())
        roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

        output_file.write("ROCs at i = %d are "%i)
        for j in range(3):
          plt.plot(fpr[j], tpr[j], label='ROC curve of class {0} (area = {1:0.2f})'''.format(j, roc_auc[j]))
          output_file.write(str(roc_auc[j])+", ")

        output_file.write("\n")
        output_file.flush()
        print('step %d, training accuracy %g' % (i, train_accuracy))
        name = 'T1_GD_testing_with_intermediateROC_no_normalization_epoch_' + str(i)
        save_path = saver.save(sess, name)

      train_step.run(feed_dict={x: batch_x, y_: batch_y, keep_prob: 0.5})


    #testing  
    print(avg_loss)
    output_file.close()
    save_path = saver.save(sess, 'T1_GD_testing_with_intermediateROC_no_normalization_final')
Exemplo n.º 26
0
def main(_):
    # Import data
    ###mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True)

    print("starting to load data...")
    x2 = pickle.load(open("all_x_l2normalization.p", "rb"))
    print("x2 loaded.")
    y2 = pickle.load(open("all_y_l2normalization.p", "rb"))
    print("y2 loaded.")
    validate_x2 = pickle.load(open("all__validation_x_l2normalization.p",
                                   "rb"))
    print("validate_x2 loaded.")
    validate_y2 = pickle.load(open("all__validation_y_l2normalization.p",
                                   "rb"))
    print("validate_y2 loaded.")

    data_set_all = DataSet(x2, y2, fake_data=False)
    validation_set_all = DataSet(validate_x2, validate_y2, fake_data=False)

    # Create the convolutional model
    x = tf.placeholder(tf.float32, [None, 65536])

    # Define loss and optimizer
    y_ = tf.placeholder(tf.float32, [None, 3])

    # Build the graph for the deep net
    y_conv, keep_prob, saver = deepnn(x)
    print(keep_prob)

    #plt.imshow(mnist.test.images[0].reshape(28,28))
    #print(type(mnist.test.images))
    #print(mnist.test.images.shape)
    #plt.show()
    cross_entropy = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv))
    train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
    correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    #grads = new_optimizer.compute_gradients(cross_entropy)
    data_points = []
    avg_loss = 0
    total_loss = 0
    avg_validation_loss = 0
    total_validation_loss = 0
    batch_size = 10
    batches_completed = 0
    validation_batches_completed = 0
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    output_file = open("validation_loss_file_l2normalization.txt", "w+")

    with tf.Session(config=config) as sess:

        sess.run(tf.global_variables_initializer())

        # sess.graph.finalize()

        for i in range(1000000):
            batch_x, batch_y = data_set_all.next_batch(batch_size)
            for batch_slice in batch_x:
                batch_slice = numpy.reshape(batch_slice, (256, 256))
                batch_slice = random_alteration(batch_slice)
                batch_slice = numpy.reshape(batch_slice, 65536)

            batches_completed += 1
            loss = sess.run(cross_entropy,
                            feed_dict={
                                x: batch_x,
                                y_: batch_y,
                                keep_prob: 0.5
                            })
            total_loss += loss
            new_avg_loss = total_loss / batches_completed

            if (new_avg_loss > avg_loss and batches_completed != 1):
                avg_loss = new_avg_loss
            # break

            avg_loss = new_avg_loss

            data_points.append(loss)

            if i % 10000 == 0:
                validation_batch_x, validation_batch_y = validation_set_all.next_batch(
                    batch_size)
                validation_batches_completed += 1
                train_accuracy = accuracy.eval(feed_dict={
                    x: validation_batch_x,
                    y_: validation_batch_y,
                    keep_prob: 1.0
                })
                validation_loss = cross_entropy.eval(feed_dict={
                    x: validation_batch_x,
                    y_: validation_batch_y,
                    keep_prob: 1.0
                })
                total_validation_loss += validation_loss
                new_avg_validation_loss = total_validation_loss / validation_batches_completed

                if (new_avg_validation_loss > avg_validation_loss
                        and batches_completed != 1):
                    avg_validation_loss = new_avg_validation_loss

                avg_validation_loss = new_avg_validation_loss

                output_file.write("Validation loss at i = %d is %g\n" %
                                  (i, avg_validation_loss))
                output_file.flush()
                print('step %d, training accuracy %g' % (i, train_accuracy))
                name = 'my-model_testing_l2normalization_epoch_' + str(i)
                save_path = saver.save(sess, name)
            train_step.run(feed_dict={x: batch_x, y_: batch_y, keep_prob: 0.5})

        #testing
        print(avg_loss)
        output_file.close()
        save_path = saver.save(sess, 'my-model_testing_l2normalization_final')
            self.classifier, minibatch_index, inputs, outputs, learning_rate)


from data_set import DataSet

if __name__ == '__main__':
    argparser = argparse.ArgumentParser(
        description='Demonstrate Multilayer Perceptron')
    argparser.add_argument(
        '--training-epochs',
        dest='epochs',
        type=int,
        default='1000',
        help='number of epochs to run the training (default: 1000)')

    dataset = DataSet()
    dataset.load()
    trainer = MultilayerPerceptronTrainer(
        dataset, n_epochs=argparser.parse_args().epochs)
    trainer.initialize()
    state = trainer.start_training(patience=10000,
                                   patience_increase=2,
                                   improvement_threshold=0.995)
    start_time = time.clock()
    while (trainer.continue_training(state)):
        print('epoch %d, validation error %f%%' %
              (state.epoch, state.epoch_losses[-1][0] * 100.0))
    end_time = time.clock()
    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
    print(('Optimization complete. Best validation score of %f%% '
Exemplo n.º 28
0
def main():
    sys.setrecursionlimit(2000)

    config = Configuration()

    with open(config.DATA_FOLDER + '/config.txt', 'r') as f:
        configFile = f.read().split(',')

    print('Parameters', configFile)
    config.EPSILON_START = float(configFile[0])
    config.LOAD_NET_NUMBER = int(float(configFile[1]))

    agentTF = AgentTF(config.STATE_SIZE, config.PHI_LENGTH, config.ACTION_SIZE,
                      config.HIDDEN_LAYERS, config.BATCH_SIZE, config.TAU,
                      config.GAMMA)

    if config.LOAD_NET_NUMBER > 0:
        dataSet = loadDataSet(config.DATA_FOLDER, config.LOAD_NET_NUMBER)
        agentTF.restore_model(config.DATA_FOLDER)
        countTotalSteps = config.LOAD_NET_NUMBER
    else:
        # Initialize DataSet
        dataSet = DataSet(config.STATE_SIZE, config.REPLAY_MEMORY_SIZE,
                          config.PHI_LENGTH, config.RNG)
        countTotalSteps = 0

        openLearningFile(config.DATA_FOLDER)

    eC = environmentControl(config.PATH_ROBOT, config.PATH_GOAL,
                            config.PATH_LAUNCHFILE)
    eC.spawn(config.ROBOT_NAME)
    eC.spawnGoal()
    eC.setRandomModelState(config.ROBOT_NAME)
    #eC.pause()

    dP = dataProcessor(eC, config.ROBOT_NAME, config.PHI_LENGTH,
                       config.STATE_SIZE, config.NUM_SENSOR_VAL,
                       config.SENSOR_RANGE_MAX, config.SENSOR_RANGE_MIN,
                       config.VEL, config.VEL_CURVE, config.UPDATE_TIME,
                       config.SPEED_UP)

    lastState = np.zeros((1, config.STATE_SIZE))
    lastReward = 0
    lastAction = 0

    countSteps = 0
    batchCount = 0
    lossAverages = np.empty([0])
    epochCount = 0

    epsilon = max(config.EPSILON_START, config.EPSILON_MIN)
    epsilonRate = config.EPSILON_DECAY

    quit = False

    try:
        for i in range(4):
            action = np.random.randint(config.ACTION_SIZE)
            dP.action(action)

            state, reward = dP.getStateReward()
            dataSet.addSample(lastState, action, reward, state, dP.isGoal)
            countTotalSteps += 1
            countSteps += 1
            lastState = state
        if config.EPSILON_START < -0:
            quit = True
        while not quit:
            if countTotalSteps % 1000 == 0:
                updateLearningFile(config.DATA_FOLDER, lossAverages,
                                   countTotalSteps)
                lossAverages = np.empty([0])
                print(countTotalSteps)

            phi = dataSet.phi(lastState)
            action = agentTF.getAction(phi, epsilon)
            #action=userAction()
            eC.unpause()
            dP.action(action)
            state, reward = dP.getStateReward()
            eC.pause()

            if dP.isGoal:
                print('The goal was reached in ', countSteps, ' steps')
                countSteps = 1
                eC.setRandomModelState(config.ROBOT_NAME)
                eC.setRandomModelState('goal')
                dP.isGoal = False

            if dP.flipped:
                eC.setRandomModelState(config.ROBOT_NAME)
                dP.flipped = False

            # After NUM_STEPS the chance is over
            if countSteps % config.NUM_STEPS == 0:
                countSteps = 1
                reward -= 1
                eC.setRandomModelState(config.ROBOT_NAME)
                eC.setRandomModelState('goal')
                print('Your chance is over! Try again ...')

            #print(reward)

            dataSet.addSample(lastState, action, reward, state, dP.isGoal)

            # Training
            if countTotalSteps > config.REPLAY_START_SIZE and countTotalSteps % 5 == 0:
                batchStates, batchActions, batchRewards, batchNextStates, batchTerminals= \
                          dataSet.randomBatch(config.BATCH_SIZE)
                loss = agentTF.train(batchStates, batchActions, batchRewards,
                                     batchNextStates, batchTerminals)
                #print('Loss', loss)
                # count How many trainings had been done
                batchCount += 1
                # add loss to lossAverages
                lossAverages = np.append(lossAverages, loss)

            #Update Epsilon save dataSet, network
            if countTotalSteps % config.SIZE_EPOCH == 0:
                # Number of Epochs
                epochCount += 1

                # Update Epsilon
                if (epsilon - epsilonRate) < config.EPSILON_MIN - 0.01:
                    quit = True
                epsilon = max(epsilon - epsilonRate, config.EPSILON_MIN)
                print('Epsilon updated to: ', epsilon)

                agentTF.save_model(countTotalSteps, config.DATA_FOLDER)
                saveDataSet(config.DATA_FOLDER, countTotalSteps, dataSet)
            lastState = state
            countTotalSteps += 1
            countSteps += 1

    except rospy.exceptions.ROSException:
        agentTF.save_model(countTotalSteps, config.DATA_FOLDER)
        saveDataSet(config.DATA_FOLDER, countTotalSteps, dataSet)
        agentTF.close()
        eC.close()

        with open(config.DATA_FOLDER + '/config.txt', 'w') as f:
            out = "{},{}".format(epsilon, countTotalSteps)
            f.write(out)
Exemplo n.º 29
0
    def run(cls, dev, test, labeled_slot, labeled_train, unlabeled_slot,
            unlabeled_train, steps, gpu_memory):
        training_set = DataSet(labeled_slot, labeled_train)
        validation_set = DataSet(labeled_slot, dev)
        test_set = DataSet(labeled_slot, test)
        unlabeled_set = DataSet(unlabeled_slot, unlabeled_train)

        print('# training_set (%d)' % training_set.size())
        print('# validation_set (%d)' % validation_set.size())
        print('# test_set (%d)' % test_set.size())
        print('# unlabeled_set (%d)' % unlabeled_set.size())

        classifier = tf.contrib.learn.Estimator(
            model_fn=SlotFilling.rnn_model_fn,
            params={
                'num_slot': training_set.num_classes(),
                'num_pos': unlabeled_set.num_classes(),
                'drop_out': DROP_OUT,
                'embedding_dimension': EMBEDDING_DIMENSION,
                'vocab_size': DataSet.vocab_size(),
                'unlabeled': unlabeled_set.size() > 0
            },
            config=tf.contrib.learn.RunConfig(
                gpu_memory_fraction=gpu_memory,
                save_checkpoints_secs=30,
            ),
            model_dir='./model')

        validation_metrics = {
            "accuracy":
            tf.contrib.learn.MetricSpec(
                metric_fn=tf.contrib.metrics.streaming_accuracy,
                prediction_key='predictions',
                weight_key='labeled_mask')
        }

        monitor = tf.contrib.learn.monitors.ValidationMonitor(
            input_fn=lambda: SlotFilling.input_fn(
                validation_set, unlabeled_set, validation_set.size(), 1),
            eval_steps=1,
            every_n_steps=50,
            metrics=validation_metrics,
            early_stopping_metric="loss",
            early_stopping_metric_minimize=True,
            early_stopping_rounds=300)

        classifier.fit(input_fn=lambda: SlotFilling.input_fn(
            training_set, unlabeled_set, training_set.size(), 500),
                       monitors=[monitor],
                       steps=steps)

        predictions = classifier.predict(input_fn=lambda: SlotFilling.input_fn(
            test_set, unlabeled_set, test_set.size(), 1))

        slot_correct = 0
        slot_no_match = 0
        slot_mismatch = 0
        slot_over_match = 0

        for i, p in enumerate(predictions):
            target = test_set.labels()[i][:test_set.lengths()[i]]
            prediction = list(p['predictions'])[:test_set.lengths()[i]]
            for expected, actual in zip(target, prediction):
                actual = int(actual)
                if expected is actual:
                    slot_correct += 1
                elif test_set.get_slot(actual) is 'o':
                    slot_no_match += 1
                elif test_set.get_slot(expected) is 'o':
                    slot_over_match += 1
                else:
                    slot_mismatch += 1

        return {
            'accuracy': slot_correct / sum(test_set.lengths()),
            'correct': slot_correct,
            'no_match': slot_no_match,
            'mismatch': slot_mismatch,
            'over_match': slot_over_match,
        }
Exemplo n.º 30
0
 def load_data(self, imu_file_name: str, att_file_name: str):
     '''read pixhawk log file'''
     self._data_set = DataSet(imu_file_name, att_file_name)
     self._data_set.load_imu_data()