示例#1
0
def run_em(data, labels, iterations, clusters, method):
    for c in range(len(clusters)):
        for i in range(iterations):
            em = GaussianMixture(n_components=clusters[c],
                                 n_init=13,
                                 covariance_type="full").fit(data)
            guess = em.predict(data)

            acc = metrics.accuracy_score(labels, guess)
            h**o = metrics.homogeneity_score(
                labels, guess)  # compare the true lables to those em predicted
            comp = metrics.completeness_score(labels, guess)
            vm = metrics.v_measure_score(labels, guess)
            arand = metrics.adjusted_rand_score(labels, guess)
            mi = metrics.adjusted_mutual_info_score(
                labels, guess, average_method="arithmetic")
            if c > 1:
                ch = metrics.calinski_harabaz_score(data, guess)
            else:
                ch = 0

            printf(" %6s %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6d em\n",
                   method, acc, h**o, comp, vm, arand, mi, ch, clusters[c])

        if iterations > 1:
            printf("\n")
示例#2
0
 def __load_data_set(self):
     tools.printf("Loading training data...")
     self.train_data_gen = data_roller.StatefulRollerDataGen(self.cfg, config.dataset_path, self.train_sequences,
                                                             frames=self.train_frames_range)
     tools.printf("Loading validation data...")
     self.val_data_gen = data_roller.StatefulRollerDataGen(self.cfg, config.dataset_path, [self.val_sequence],
                                                           frames=self.val_frames_range)
示例#3
0
def print_xlc_data(tsize, depth_base, accuracy, vaccuracy, cv):
    max = 0.0
    midx = 0
    for i in range(len(cv)):
        if cv[i] > max:
            max = cv[i]
            midx = i

    printf("#T-pct  Depth  T-acc  V-acc  V-err\n")
    printf("%6d %6d %6.3f %6.3f %6.3f\n", tsize * 100, depth_base + midx,
           accuracy[midx], vaccuracy[midx], 100 - vaccuracy[midx])
示例#4
0
    def __init__(self, cfg, base_dir, seq, frames=None):
        pickles_dir = config.lidar_pickles_path
        seq_data = pykitti.odometry(base_dir, seq)
        self.num_frames = len(seq_data.poses)
        self.data = np.zeros([
            self.num_frames, cfg.input_channels, cfg.input_height,
            cfg.input_width
        ],
                             dtype=np.float16)

        with (open(os.path.join(pickles_dir, seq + "_range.pik"),
                   "rb")) as opfile:
            i = 0
            while True:
                try:
                    cur_image = pickle.load(opfile)
                    self.data[i, 0, :, :] = cur_image
                except EOFError:
                    break
                i += 1
                if i % 1000 == 0:
                    tools.printf("Loading lidar range seq. %s %.1f%% " %
                                 (seq, (i / self.num_frames) * 100))
            assert (i == self.num_frames)

        with (open(os.path.join(pickles_dir, seq + "_intensity.pik"),
                   "rb")) as opfile:
            i = 0
            while True:
                try:
                    cur_image = pickle.load(opfile)
                    self.data[i, 1, :, :] = cur_image
                    self.data[i, 1, :, :] = np.divide(self.data[i, 1, :, :],
                                                      255.0,
                                                      dtype=np.float16)
                    self.data[i, 1, :, :] = np.subtract(self.data[i, 1, :, :],
                                                        0.5,
                                                        dtype=np.float16)
                except EOFError:
                    break
                i += 1
                if i % 1000 == 0:
                    tools.printf("Loading lidar intensity seq. %s %.1f%% " %
                                 (seq, (i / self.num_frames) * 100))
            assert (i == self.num_frames)

        # select the range of frames
        if frames:
            self.data = self.data[frames]
            self.num_frames = self.data.shape[0]
示例#5
0
def usage():
    printf(
        "usage: dimann.py [-c components] [-f] [-i iterations] [-r learn-rate] [-s stats] [-S solver]  train-path test-path\n"
    )
    printf(
        "\t use -f if the classification in the data is in col 0 (first), else it is assumed to be in the nth col\n"
    )
    printf(
        "\t solvers must be one of: all, adam, lbfgs or sgd. Defualt is all.\n"
    )
    printf("\t learn rate must be one of: constant, adaptive, invscaling\n")
示例#6
0
    def __init_tf_savers(self):
        self.tf_saver_checkpoint = tf.train.Saver(max_to_keep=2)
        self.tf_saver_best = tf.train.Saver(max_to_keep=2)

        var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)

        if self.cfg.dont_restore_init:
            regex = re.compile("initializer_layer")
            var_list = list(filter(lambda a: not regex.match(a.name),
                                   var_list))
        if self.cfg.dont_restore_fc:
            regex = re.compile("fc_layer")
            var_list = list(filter(lambda a: not regex.match(a.name),
                                   var_list))

        tools.printf("Variables to load from checkpoint...")
        for i in range(0, len(var_list)):
            tools.printf("    " + var_list[i].name)

        self.tf_saver_restore = tf.train.Saver(var_list=var_list)
        self.best_val_path = os.path.join(self.results_dir_path, "best_val")
        self.model_epoch_path = self.results_dir_path
示例#7
0
    def __init__(self, config, base_dir, sequences, frames=None):
        self.truncated_seq_sizes = []
        self.end_of_sequence_indices = []
        self.curr_batch_idx = 0
        self.unused_batch_indices = []
        self.cfg = config

        if not frames:
            frames = [None] * len(sequences)

        total_num_examples = 0

        for i_seq, seq in enumerate(sequences):
            seq_data = pykitti.odometry(base_dir, seq, frames=frames[i_seq])
            num_frames = len(seq_data.poses)

            # less than timesteps number of frames will be discarded
            num_examples = (num_frames - 1) // self.cfg.timesteps
            self.truncated_seq_sizes.append(num_examples * self.cfg.timesteps + 1)
            total_num_examples += num_examples

        # less than batch size number of examples will be discarded
        self.total_batch_count = total_num_examples // self.cfg.batch_size
        # +1 adjusts for the extra image in the last time step
        total_timesteps = self.total_batch_count * (self.cfg.timesteps + 1)

        # since some examples will be discarded, readjust the truncated_seq_sizes
        deleted_frames = (total_num_examples - self.total_batch_count * self.cfg.batch_size) * self.cfg.timesteps
        for i in range(len(self.truncated_seq_sizes) - 1, -1, -1):
            if self.truncated_seq_sizes[i] > deleted_frames:
                self.truncated_seq_sizes[i] -= deleted_frames
                break
            else:
                self.truncated_seq_sizes[i] = 0
                deleted_frames -= self.truncated_seq_sizes[i]

        # for storing all training
        self.input_frames = np.zeros(
            [total_timesteps, self.cfg.batch_size, self.cfg.input_channels, self.cfg.input_height,
             self.cfg.input_width],
            dtype=np.uint8)
        poses_wrt_g = np.zeros([total_timesteps, self.cfg.batch_size, 4, 4], dtype=np.float32)  # ground truth poses

        num_image_loaded = 0
        for i_seq, seq in enumerate(sequences):
            seq_data = pykitti.odometry(base_dir, seq, frames=frames[i_seq])
            length = self.truncated_seq_sizes[i_seq]

            i = -1
            j = -1
            for i_img in range(length):

                if i_img % 100 == 0:
                    tools.printf("Loading sequence %s %.1f%% " % (seq, (i_img / length) * 100))

                i = num_image_loaded % total_timesteps
                j = num_image_loaded // total_timesteps

                # swap axis to channels first
                img = seq_data.get_cam2(i_img)
                img = img.resize((self.cfg.input_width, self.cfg.input_height))
                img = np.array(img)
                if img.shape[2] == 3:  # convert to bgr if colored image
                    img = img[..., [2, 1, 0]]
                img = np.reshape(img, [img.shape[0], img.shape[1], self.cfg.input_channels])
                img = np.moveaxis(np.array(img), 2, 0)
                pose = seq_data.poses[i_img]

                self.input_frames[i, j] = img
                poses_wrt_g[i, j] = pose
                num_image_loaded += 1

                # insert the extra image at the end of the batch, note the number of
                # frames per batch of batch size 1 is timesteps + 1
                if i_img != 0 and i_img != length - 1 and i_img % self.cfg.timesteps == 0:
                    i = num_image_loaded % total_timesteps
                    j = num_image_loaded // total_timesteps
                    self.input_frames[i, j] = img
                    poses_wrt_g[i, j] = pose

                    num_image_loaded += 1

            # If the batch has the last frame in a sequence, the following frame
            # in the next batch must have a reset for lstm state
            self.end_of_sequence_indices.append((i + 1, j,))

        # make sure the all of examples are fully loaded, just to detect bugs
        assert (num_image_loaded == total_timesteps * self.cfg.batch_size)

        # now convert all the ground truth from 4x4 to xyz + quat, this is after the SE3 layer
        self.se3_ground_truth = np.zeros([total_timesteps, self.cfg.batch_size, 7], dtype=np.float32)
        for i in range(0, self.se3_ground_truth.shape[0]):
            for j in range(0, self.se3_ground_truth.shape[1]):
                translation = transformations.translation_from_matrix(poses_wrt_g[i, j])
                quat = transformations.quaternion_from_matrix(poses_wrt_g[i, j])
                self.se3_ground_truth[i, j] = np.concatenate([translation, quat])

        # extract the relative transformation between frames after the fully connected layer
        self.fc_ground_truth = np.zeros([total_timesteps, self.cfg.batch_size, 6], dtype=np.float32)
        # going through rows, then columns
        for i in range(0, self.fc_ground_truth.shape[0]):
            for j in range(0, self.fc_ground_truth.shape[1]):

                # always identity at the beginning of the sequence
                if i % (self.cfg.timesteps + 1) == 0:
                    m = transformations.identity_matrix()
                else:
                    m = np.dot(np.linalg.inv(poses_wrt_g[i - 1, j]), poses_wrt_g[i, j])  # double check

                translation = transformations.translation_from_matrix(m)
                ypr = transformations.euler_from_matrix(m, axes="rzyx")
                assert (np.all(np.abs(ypr) < np.pi))
                self.fc_ground_truth[i, j] = np.concatenate([translation, ypr])  # double check

        tools.printf("All data loaded, batch_size=%d, timesteps=%d, num_batches=%d" % (
            self.cfg.batch_size, self.cfg.timesteps, self.total_batch_count))

        self.next_epoch()
        gc.collect()  # force garbage collection
示例#8
0
    def __build_model_and_summary(self):
        # split the tensors
        with tf.variable_scope("tower_split"), tf.device("/cpu:0"):
            # splitted tensors
            ts_inputs = tf.split(self.t_inputs, self.num_gpu, 1)
            ts_lstm_initial_state = tf.split(self.t_lstm_initial_state, self.num_gpu, 2)
            ts_initial_poses = tf.split(self.t_initial_poses, self.num_gpu, 0)
            ts_imu_data = tf.split(self.t_imu_data, self.num_gpu, 1)
            ts_ekf_initial_state = tf.split(self.t_ekf_initial_state, self.num_gpu, 0)
            ts_ekf_initial_covar = tf.split(self.t_ekf_initial_covariance, self.num_gpu, 0)
            ts_se3_labels = tf.split(self.t_se3_labels, self.num_gpu, 1)
            ts_fc_labels = tf.split(self.t_fc_labels, self.num_gpu, 1)

            # list to store results
            ts_ekf_states = []
            ts_ekf_covar_states = []
            ts_lstm_states = []
            losses_keys = ["se3_loss", "se3_xyz_loss", "se3_quat_loss",
                           "fc_loss", "fc_xyz_loss", "fc_ypr_loss", "x_loss", "y_loss", "z_loss",
                           "total_loss"]
            ts_losses_dict = dict(zip(losses_keys, [[] for i in range(len(losses_keys))]))

        for i in range(0, self.num_gpu):
            device_setter = tf.train.replica_device_setter(ps_tasks=1,
                                                           ps_device='/job:localhost/replica:0/task:0/device:CPU:0',
                                                           worker_device='/job:localhost/replica:0/task:0/device:GPU:%d' % i)

            with tf.name_scope("tower_%d" % i), tf.device(device_setter):
                tools.printf("Building model...")

                fc_outputs, fc_covar, se3_outputs, lstm_states, ekf_states, ekf_covar_states = \
                    model.build_seq_model(self.cfg, ts_inputs[i], ts_lstm_initial_state[i], ts_initial_poses[i],
                                          ts_imu_data[i], ts_ekf_initial_state[i], ts_ekf_initial_covar[i],
                                          self.t_is_training, get_activations=True,
                                          use_initializer=self.t_use_initializer,
                                          use_ekf=self.cfg.use_ekf)

                # this returns lstm states as a tuple, we need to stack them
                lstm_states = tf.stack(lstm_states, 0)
                ts_lstm_states.append(lstm_states)
                ts_ekf_states.append(ekf_states)
                ts_ekf_covar_states.append(ekf_covar_states)

                with tf.variable_scope("loss"):
                    se3_loss, se3_xyz_loss, se3_quat_loss \
                        = losses.se3_losses(se3_outputs, ts_se3_labels[i], self.cfg.k_se3)
                    fc_loss, fc_xyz_loss, fc_ypr_loss, x_loss, y_loss, z_loss \
                        = losses.fc_losses(fc_outputs, fc_covar, ts_fc_labels[i], self.cfg.k_fc)
                    total_loss = (1 - self.t_alpha) * se3_loss + self.t_alpha * fc_loss

                    for k, v in ts_losses_dict.items():
                        v.append(locals()[k])

                tf.get_variable_scope().reuse_variables()

        with tf.variable_scope("tower_join"), tf.device("/cpu:0"):
            # join the lstm states
            self.t_lstm_states = tf.concat(ts_lstm_states, 2)
            for k, v in ts_losses_dict.items():
                ts_losses_dict[k] = tf.reduce_mean(v)

            self.t_ekf_states = tf.concat(ts_ekf_states, 0)
            self.t_ekf_covar_states = tf.concat(ts_ekf_covar_states, 0)

            self.t_total_loss = ts_losses_dict["total_loss"]
            self.t_se3_loss = ts_losses_dict["se3_loss"]

        tools.printf("Building optimizer...")
        with tf.variable_scope("optimizer", reuse=tf.AUTO_REUSE):
            if self.cfg.use_init and self.cfg.only_train_init:
                train_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "initializer_layer")
            elif self.cfg.train_noise_covariance and self.cfg.static_nn:
                train_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "imu_noise_params")
            else:
                train_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)

            self.op_trainer = tf.train.AdamOptimizer(learning_rate=self.t_lr). \
                minimize(self.t_total_loss, colocate_gradients_with_ops=True, var_list=train_vars)

        # tensorboard summaries
        tools.printf("Building tensorboard summaries...")
        with tf.device("/cpu:0"):
            self.t_sequence_id = tf.placeholder(dtype=tf.uint8, shape=[])
            self.t_epoch = tf.placeholder(dtype=tf.int32, shape=[])

        tf.summary.scalar("total_loss", ts_losses_dict["total_loss"])
        tf.summary.scalar("fc_loss", ts_losses_dict["fc_loss"])
        tf.summary.scalar("se3_loss", ts_losses_dict["se3_loss"])
        tf.summary.scalar("fc_xyz_loss", ts_losses_dict["fc_xyz_loss"])
        tf.summary.scalar("fc_ypr_loss", ts_losses_dict["fc_ypr_loss"])
        tf.summary.scalar("se3_xyz_loss", ts_losses_dict["se3_xyz_loss"])
        tf.summary.scalar("se3_quat_loss", ts_losses_dict["se3_quat_loss"])
        tf.summary.scalar("x_loss", ts_losses_dict["x_loss"])
        tf.summary.scalar("y_loss", ts_losses_dict["y_loss"])
        tf.summary.scalar("z_loss", ts_losses_dict["z_loss"])
        tf.summary.scalar("alpha", self.t_alpha)
        tf.summary.scalar("lr", self.t_lr)
        tf.summary.scalar("sequence_id", self.t_sequence_id)
        tf.summary.scalar("epoch", self.t_epoch)
        self.op_train_merged_summary = tf.summary.merge_all()

        activations = tf.get_collection(tf.GraphKeys.ACTIVATIONS)
        initial_layer = tf.summary.image("1st layer activations", tf.expand_dims(activations[0][:, 0, :, :], -1))
        final_layer = tf.summary.image("Last layer activations", tf.expand_dims(activations[1][:, 0, :, :], -1))
        self.op_train_image_summary = tf.summary.merge([initial_layer, final_layer])

        val_loss_sum = tf.summary.scalar("val_total_loss", ts_losses_dict["total_loss"])
        val_fc_sum = tf.summary.scalar("val_fc_losses", ts_losses_dict["fc_loss"])
        val_se3_sum = tf.summary.scalar("val_se3_losses", ts_losses_dict["se3_loss"])
        val_fc_xyz_loss = tf.summary.scalar("val_fc_xyz_loss", ts_losses_dict["fc_xyz_loss"])
        val_fc_ypr_loss = tf.summary.scalar("val_fc_ypr_loss", ts_losses_dict["fc_ypr_loss"])
        val_se3_xyz_loss = tf.summary.scalar("val_se3_xyz_loss", ts_losses_dict["se3_xyz_loss"])
        val_se3_quat_loss = tf.summary.scalar("val_se3_quat_loss", ts_losses_dict["se3_quat_loss"])
        val_x_sum = tf.summary.scalar("val_x_loss", ts_losses_dict["x_loss"])
        val_y_sum = tf.summary.scalar("val_y_loss", ts_losses_dict["y_loss"])
        val_z_sum = tf.summary.scalar("val_z_loss", ts_losses_dict["z_loss"])
        self.op_val_merged_summary = tf.summary.merge(
                [val_loss_sum, val_fc_sum, val_se3_sum, val_fc_xyz_loss, val_fc_ypr_loss, val_se3_xyz_loss,
                 val_se3_quat_loss, val_x_sum, val_y_sum, val_z_sum])
示例#9
0
    def __run_train(self):
        sess_config = tf.ConfigProto(allow_soft_placement=True)
        if self.cfg.debug:
            self.tf_session = tf_debug.LocalCLIDebugWrapperSession(tf.Session(config=sess_config))
        else:
            self.tf_session = tf.Session(config=sess_config)

        self.tf_session.run(tf.global_variables_initializer())
        if self.restore_file:
            tools.printf("Restoring model weights from %s..." % self.restore_file)
            self.tf_saver_restore.restore(self.tf_session, self.restore_file)

        else:
            if self.cfg.use_init and self.cfg.only_train_init:
                raise ValueError("Set to only train initializer, but restore file was not provided!?!?!")
            tools.printf("Initializing variables...")

        # initialize tensorboard writer
        self.tf_tb_writer = tf.summary.FileWriter(os.path.join(self.results_dir_path, 'graph_viz'))
        self.tf_tb_writer.add_graph(tf.get_default_graph())
        self.tf_tb_writer.flush()

        # initialize lstm and ekf states
        curr_lstm_states = np.zeros([2, self.cfg.lstm_layers, self.cfg.batch_size, self.cfg.lstm_size],
                                    dtype=np.float32)
        curr_ekf_state = np.zeros([self.cfg.batch_size, 17], dtype=np.float32)
        curr_ekf_cov_state = 0.01 * np.repeat(np.expand_dims(np.identity(17, dtype=np.float32), axis=0),
                                              repeats=self.cfg.batch_size, axis=0)

        lstm_states_dic = {}
        ekf_states_dic = {}
        ekf_cov_states_dic = {}
        for seq in self.train_sequences:
            lstm_states_dic[seq] = np.zeros(
                    [self.train_data_gen.batch_counts[seq], 2, self.cfg.lstm_layers, self.cfg.batch_size,
                     self.cfg.lstm_size], dtype=np.float32)
            ekf_states_dic[seq] = np.zeros([self.train_data_gen.batch_counts[seq], self.cfg.batch_size, 17],
                                           dtype=np.float32)
            ekf_cov_states_dic[seq] = np.repeat(np.expand_dims(curr_ekf_cov_state, axis=0),
                                                self.train_data_gen.batch_counts[seq], axis=0)

        _train_image_summary = None
        total_batches = self.train_data_gen.total_batches()
        best_val_loss = 9999999999
        i_epoch = 0

        for i_epoch in range(self.start_epoch, self.cfg.num_epochs):
            tools.printf("Training Epoch: %d ..." % i_epoch)
            start_time = time.time()

            alpha_set = Train.__set_from_schedule(self.cfg.alpha_schedule, i_epoch)
            lr_set = Train.__set_from_schedule(self.cfg.lr_schedule, i_epoch)
            tools.printf("alpha set to %f" % alpha_set)
            tools.printf("learning rate set to %f" % lr_set)

            while self.train_data_gen.has_next_batch():
                j_batch = self.train_data_gen.curr_batch()

                # get inputs
                batch_id, curr_seq, batch_data, fc_ground_truth, se3_ground_truth, imu_measurements = self.train_data_gen.next_batch()
                data_roller.get_init_lstm_state(lstm_states_dic, curr_lstm_states, curr_seq, batch_id,
                                                self.cfg.bidir_aug)

                data_roller.get_init_ekf_states(ekf_states_dic, ekf_cov_states_dic, curr_ekf_state, curr_ekf_cov_state,
                                                curr_seq, batch_id, self.cfg.bidir_aug)

                # shift se3 ground truth to be relative to the first pose
                init_poses = se3_ground_truth[0, :, :]

                nrnd = np.random.rand(1)
                use_init_train = False
                if self.cfg.use_init and (j_batch == 0 or nrnd < self.cfg.init_prob):
                    use_init_train = True

                # Run training session
                _, _curr_lstm_states, _curr_ekf_states, _curr_ekf_covar, _train_summary, _train_image_summary, _total_losses = \
                    self.tf_session.run(
                            [self.op_trainer, self.t_lstm_states, self.t_ekf_states, self.t_ekf_covar_states,
                             self.op_train_merged_summary, self.op_train_image_summary,
                             self.t_total_loss],
                            feed_dict={
                                self.t_inputs: batch_data,
                                self.t_se3_labels: se3_ground_truth[1:, :, :],
                                self.t_fc_labels: fc_ground_truth,
                                self.t_lstm_initial_state: curr_lstm_states,
                                self.t_initial_poses: init_poses,
                                self.t_lr: lr_set,
                                self.t_alpha: alpha_set,
                                self.t_is_training: True,
                                self.t_use_initializer: use_init_train,
                                self.t_sequence_id: int(curr_seq),
                                self.t_epoch: i_epoch,
                                self.t_ekf_initial_state: curr_ekf_state,
                                self.t_ekf_initial_covariance: curr_ekf_cov_state,
                                self.t_imu_data: imu_measurements
                            },
                            options=self.tf_run_options,
                            run_metadata=self.tf_run_metadata)

                data_roller.update_lstm_state(lstm_states_dic, _curr_lstm_states, curr_seq, batch_id)
                data_roller.update_ekf_state(ekf_states_dic, ekf_cov_states_dic, _curr_ekf_states, _curr_ekf_covar,
                                             curr_seq, batch_id)

                if self.tensorboard_meta:
                    self.tf_tb_writer.add_run_metadata(self.tf_run_metadata,
                                                       'epochid=%d_batchid=%d' % (i_epoch, j_batch))
                self.tf_tb_writer.add_summary(_train_summary, i_epoch * total_batches + j_batch)

                # print stats
                tools.printf("batch %d/%d: Loss:%.7f" % (j_batch + 1, total_batches, _total_losses))

            self.tf_tb_writer.add_summary(_train_image_summary, (i_epoch + 1) * total_batches)

            tools.printf("Evaluating validation loss...")
            curr_val_loss = self.__run_val_loss(i_epoch, alpha_set)

            # check for best results
            if curr_val_loss < best_val_loss:
                tools.printf("Saving best result...")
                best_val_loss = curr_val_loss
                self.tf_saver_best.save(self.tf_session, os.path.join(self.results_dir_path,
                                                                      "best_val", "model_best_val_checkpoint"),
                                        global_step=i_epoch)
                tools.printf("Best val loss, model saved.")
            if i_epoch % 5 == 0:
                tools.printf("Saving checkpoint...")
                self.tf_saver_checkpoint.save(self.tf_session,
                                              os.path.join(self.results_dir_path, "model_epoch_checkpoint"),
                                              global_step=i_epoch)
                tools.printf("Checkpoint saved")

            self.tf_tb_writer.flush()
            tools.printf("ave_val_loss(se3): %f, time: %f\n" % (curr_val_loss, time.time() - start_time))

            self.train_data_gen.next_epoch()

        tools.printf("Final save...")
        self.tf_saver_checkpoint.save(self.tf_session,
                                      os.path.join(self.results_dir_path, "model_epoch_checkpoint"),
                                      global_step=i_epoch)
        tools.printf("Saved results to %s" % self.results_dir_path)

        self.tf_session.close()
#     80:  0.000002,
#     100: 0.000001
# }
start_epoch = 0
# alpha_schedule = {0: 0.99,  # epoch: alpha
#                   20: 0.9,
#                   40: 0.5,
#                   60: 0.1,
#                   80: 0.025}
alpha_schedule = {0: 0.5}
alpha_set = 0.5

tensorboard_meta = False

# =================== MODEL + LOSSES + Optimizer ========================
tools.printf("Building losses...")
with tf.device("/cpu:0"):
    alpha = tf.placeholder(tf.float32, name="alpha",
                           shape=[])  # between 0 and 1, larger favors fc loss

with tf.device(
        tf.train.replica_device_setter(
            ps_tasks=1,
            ps_device='/job:localhost/replica:0/task:0/device:GPU:0',
            worker_device='/job:localhost/replica:0/task:0/device:GPU:0')):
    inputs, lstm_initial_state, initial_poses, is_training, fc_outputs, se3_outputs, lstm_states = model.build_seq_model(
        cfg, True)
    se3_labels, fc_labels = simple_model.model_labels(cfg)

    with tf.variable_scope("Losses"):
        se3_losses, se3_xyz_losses, se3_quat_losses = losses.se3_losses(
示例#11
0
    "-f": ("classfirst", bool_flag, False),
    "-g": ("info_gain", bool_flag, True),  # turn off which sets gini
    "-i": ("iterations", int_flag, 10),
    "-l": ("learn_curve", bool_flag,
           True),  # generate a learning curve; -l turns off
    "--mspl": ("msplit", int_flag, 10),
    "--mleaf": ("mleaf", int_flag, 10),
    "-p": ("prune", bool_flag, True),  # turn on prune
    "-s": ("rstate", int_flag, 100),
    "-t": ("tsize", val_flag, 30)
}
opts = {}  # map where option values or defaults come back
pparms = parse_args(flags, opts, "training-file [testing-file]")

if pparms == None or len(pparms) < 1:
    printf("missing filename on command line\n")
    sys.exit(1)

if opts["info_gain"]:
    method = "entropy"  # sklearn constant for info gain
else:
    method = "gini"

filen = pparms[0]  # raw data file manditory first parameter
gen_lc = opts["learn_curve"]
pr_ave = opts["pr_ave"]
tsize = opts[
    "tsize"] / 100  # size of the raw data to resrve for validation (pct)
rstate = opts["rstate"]  # random state value
max_depth = opts["depth"]
mspl = opts["msplit"]  # minimum split
示例#12
0
           50),  # max number of estimators (starting with est - iterations)
    "-E": ("est_start", int_flag,
           0),  # number of estimators to start with; 0 == beginning of pool
    "-f": ("classfirst", bool_flag, False),
    "-g": ("info_gain", bool_flag, True),  # turn off which sets gini
    "-l": ("learn_curve", bool_flag, True),  # -l turns lc generation off 
    "-p": ("prune", bool_flag, True),  # turn on prune
    "-r": ("learn_rate", val_flag, 1.0),  # set the learning rate
    "-s": ("rstate", int_flag, 100),
    "-t": ("tsize", val_flag, 30)
}
opts = {}  # map where option values or defaults come back
pparms = parse_args(flags, opts, "training-file [testing-file]")

if pparms == None or len(pparms) < 1:
    printf("missing filename on command line\n")
    sys.exit(1)

if opts["info_gain"]:
    method = "entropy"  # sklearn constant for info gain
else:
    method = "gini"

filen = pparms[0]  # raw data file manditory first parameter
gen_lc = opts["learn_curve"]
pr_ave = opts["pr_ave"]
tsize = opts[
    "tsize"] / 100  # size of the raw data to resrve for validation (pct)
rstate = opts["rstate"]  # random state value
depth = opts["depth"]
lrate = opts["learn_rate"]
cfg_si = config_class()

# Manipulate the configurations for evaluation
cfg.timesteps = 1
cfg.sequence_stride = 1
cfg.batch_size = 1
cfg.bidir_aug = False
cfg.use_init = False

cfg_si.timesteps = 1
cfg_si.sequence_stride = 1
cfg_si.batch_size = 1
cfg_si.bidir_aug = False
# cfg_si.use_init = what ever the original setting was

tools.printf("Building eval model....")
inputs, lstm_initial_state, initial_poses, imu_data, ekf_initial_state, ekf_initial_covariance, _, _, dt \
    = model.seq_model_inputs(cfg)
fc_outputs, fc_covar, se3_outputs, lstm_states, ekf_out_states, ekf_out_covar, _, _, _ = \
    model.build_seq_model(cfg, inputs, lstm_initial_state, initial_poses, imu_data, ekf_initial_state,
                          ekf_initial_covariance,
                          dt,
                          tf.constant(False, dtype=tf.bool),  # is training
                          False,  # get_activation
                          tf.constant(False, dtype=tf.bool),  # use initializer
                          cfg.use_ekf)  # use ekf

if cfg_si.use_init:
    tools.printf("Building eval model for initial LSTM states...")
    inputs_si, _, initial_poses_si, imu_data_si, _, _, _, _ = model.seq_model_inputs(cfg_si)
    _, _, _, _, _, _, feed_lstm_initial_states, feed_ekf_inital_states, feed_initial_covariance = \
示例#14
0
def usage():
    printf(
        "usage: dimred.py [-c components] [-f] [-i iterations] [-k k-value] train-path test-path\n"
    )
import numpy as np
import os

dir_name = "trajectory_results"
kitti_seqs = ["00", "01", "02", "03", "04", "05", "06", "07", "08", "09", "10"]
#kitti_seqs = ["01"]

# if kitti_seq in ["11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21"]:
#     save_ground_truth = False
# else:
#     save_ground_truth = True

save_ground_truth = True
cfg = config.SeqEvalLidarConfig

tools.printf("Building eval model....")
inputs, lstm_initial_state, initial_poses, \
is_training, fc_outputs, se3_outputs, lstm_states = model.build_seq_model(cfg)

for kitti_seq in kitti_seqs:
    tools.printf("Loading training data...")
    train_data_gen = data.StatefulRollerDataGen(cfg, config.dataset_path,
                                                [kitti_seq])

    results_dir_path = os.path.join(config.save_path, dir_name)
    if not os.path.exists(results_dir_path):
        os.makedirs(results_dir_path)

    # ==== Read Model Checkpoints =====
    restore_model_file = "/home/cs4li/Dev/end_to_end_visual_odometry/results/train_seq_20180418-16-37-02/best_val/model_best_val_checkpoint-143"
示例#16
0
import data
import config
import tools

cfg = config.PairTrainConfigs

tools.printf("Loading training data...")
train_data_gen = data.StatefulDataGen(cfg, "/home/cs4li/Dev/KITTI/dataset/",
                                      ["00", "01", "02", "03", "04", "05"])
# train_data_gen = data.StatefulDataGen(cfg, "/home/cs4li/Dev/KITTI/dataset/", ["01"], frames=[range(0, 100)])
tools.printf("Loading validation data...")
val_data_gen = data.StatefulDataGen(cfg,
                                    "/home/cs4li/Dev/KITTI/dataset/", ["10"],
                                    frames=[None])

import os
import model
import losses
import tensorflow as tf
import numpy as np
import time
import tools

# =================== MODEL + LOSSES + Optimizer ========================
inputs, is_training, fc_outputs = model.build_pair_training_model()
_, fc_labels = model.model_labels(cfg)

tools.printf("Building losses...")
with tf.device("/gpu:0"):
    with tf.variable_scope("Losses"):
        fc_losses = losses.pair_train_fc_losses(fc_outputs, fc_labels, cfg.k)
示例#17
0
    "-i":
    ("iterations", int_flag,
     50),  # max value for iterations (values selected from the step pool)
    "-l":
    ("learn_curve", bool_flag, True),  # output should be learning curve info
    "-r": ("learn_rate", str_flag,
           "constant"),  # must be one of constant, adaptive, invscaling
    "-s": ("rstate", int_flag, 100),
    "-S": ("solver", str_flag, "adam"),  # solver (must be adam, lbfgs, or sgd)
    "-t": ("tsize", val_flag, 30)
}
opts = {}  # map where option values or defaults come back
pparms = parse_args(flags, opts, "training-file [testing-file]")

if pparms == None or len(pparms) < 1:
    printf("missing filename on command line\n")
    sys.exit(1)

filen = pparms[0]  # raw data file manditory first parameter
gen_lc = opts["learn_curve"]
pr_ave = opts["pr_ave"]
tsize = opts[
    "tsize"] / 100  # size of the raw data to resrve for validation (pct)
max_iter = opts["iterations"]
rstate = opts["rstate"]  # random state value

if len(
        pparms
) > 1:  # a second filename assumed to be a separate test data set for validation
    vfilen = pparms[1]
    have_val_data = True
se3_outputs = model.se3_layer(rel_disp, initial_poses)

data_gen = data_roller.StatefulRollerDataGen(cfg,
                                             config.dataset_path, [kitti_seq],
                                             frames=frames)

results_dir_path = os.path.join(config.save_path, "ekf_debug")
if not os.path.exists(results_dir_path):
    os.makedirs(results_dir_path)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    total_batches = data_gen.total_batches()
    tools.printf("Start evaluation loop...")

    prediction = np.zeros([total_batches + 1, 7])
    ground_truths = np.zeros([total_batches + 1, 7])
    ekf_states = np.zeros([total_batches + 1, 17])
    fc_ground_truths = np.zeros([total_batches + 1, 6])
    imu_measurements = np.zeros([total_batches + 1, 6])
    init_pose = np.expand_dims(data_gen.get_sequence_initial_pose(kitti_seq),
                               axis=0)
    prediction[0, :] = init_pose
    ground_truths[0, :] = init_pose
    fc_ground_truths[0, :] = np.zeros([6])
    imu_measurements[0, :] = np.zeros([6])

    curr_ekf_state = np.zeros([cfg.batch_size, 17], dtype=np.float32)
    ekf_states[0, :] = curr_ekf_state
    def __init__(self, cfg, base_dir, sequences, frames=None):
        self.cfg = cfg
        self.sequences = sequences
        self.curr_batch_sequence = 0
        self.current_batch = 0

        if (self.cfg.bidir_aug == True) and (self.cfg.batch_size % 2 != 0):
            raise ValueError("Batch size must be even")

        if self.cfg.data_type != "cam" and self.cfg.data_type != "lidar":
            raise ValueError("lidar or camera for data type!")

        frames_data_type = np.uint8
        if self.cfg.data_type == "lidar":
            frames_data_type = np.float16

        if not frames:
            frames = [None] * len(sequences)

        self.input_frames = {}
        self.poses = {}

        # Mirror in y-z plane
        self.H = np.identity(3, dtype=np.float32)
        self.H[1][1] = -1.0

        self.se3_ground_truth = {}
        self.se3_mirror_ground_truth = {}
        self.fc_ground_truth = {}
        self.fc_reverse_ground_truth = {}
        self.fc_mirror_ground_truth = {}
        self.fc_reverse_mirror_ground_truth = {}

        self.imu_measurements = {}
        self.imu_measurements_mirror = {}
        self.imu_measurements_reverse = {}
        self.imu_measurements_reverse_mirror = {}

        # This keeps track of the number of batches in each sequence
        self.batch_counts = {}
        # this holds the batch ids for each sequence. It is randomized and the order determines the order in
        # which the batches are used
        self.batch_order = {}
        # this keeps track of the starting offsets in the input frames for batch id 0 in each sequence
        self.batch_offsets = {}
        # contains batch_cnt counters, each sequence is represented by it's index repeated for
        # however many batches are in that sequence
        self.sequence_ordering = None
        # contains batch_size counters. Keeps track of how many batches from each sequence
        # have already been used for the current epoch
        self.sequence_batch = {}

        self.batch_cnt = 0
        self.total_frames = 0

        for i_seq, seq in enumerate(sequences):
            seq_loader = DataLoader(self.cfg, base_dir, seq, frames=frames[i_seq])
            num_frames = seq_loader.get_num_frames()

            self.input_frames[seq] = np.zeros(
                    [num_frames, self.cfg.input_channels, self.cfg.input_height, self.cfg.input_width],
                    dtype=frames_data_type)
            self.poses[seq] = seq_loader.get_poses_in_corresponding_frame()
            self.se3_ground_truth[seq] = np.zeros([num_frames, 7], dtype=np.float32)
            self.se3_mirror_ground_truth[seq] = np.zeros([num_frames, 7], dtype=np.float32)
            self.fc_ground_truth[seq] = np.zeros([num_frames - 1, 6], dtype=np.float32)
            self.fc_reverse_ground_truth[seq] = np.zeros([num_frames - 1, 6], dtype=np.float32)
            self.fc_mirror_ground_truth[seq] = np.zeros([num_frames - 1, 6], dtype=np.float32)
            self.fc_reverse_mirror_ground_truth[seq] = np.zeros([num_frames - 1, 6], dtype=np.float32)

            self.imu_measurements[seq] = np.zeros([num_frames - 1, 6], dtype=np.float32)
            self.imu_measurements_mirror[seq] = np.zeros([num_frames - 1, 6], dtype=np.float32)
            self.imu_measurements_reverse[seq] = np.zeros([num_frames - 1, 6], dtype=np.float32)
            self.imu_measurements_reverse_mirror[seq] = np.zeros([num_frames - 1, 6], dtype=np.float32)

            for i_img in range(num_frames):
                if i_img % 100 == 0:
                    tools.printf("Loading sequence %s %.1f%% " % (seq, (i_img / num_frames) * 100))

                img = seq_loader.get_img(i_img)
                self.input_frames[seq][i_img, :] = img

                # now convert all the ground truth from 4x4 to xyz + quat, this is after the SE3 layer
                translation = transformations.translation_from_matrix(self.poses[seq][i_img])
                quat = transformations.quaternion_from_matrix(self.poses[seq][i_img])

                mirror_pose = np.identity(4, dtype=np.float32)
                mirror_pose[0:3, 0:3] = np.dot(self.H, np.dot(self.poses[seq][i_img][0:3, 0:3], self.H))
                mirror_pose[0:3, 3] = np.dot(self.H, translation)

                mirror_quat = transformations.quaternion_from_matrix(mirror_pose[0:3, 0:3])
                self.se3_ground_truth[seq][i_img] = np.concatenate([translation, quat])
                self.se3_mirror_ground_truth[seq][i_img] = np.concatenate([mirror_pose[0:3, 3], mirror_quat])

                # relative transformation labels
                if i_img + 1 < num_frames:
                    mirror_pose_next = np.identity(4, dtype=np.float32)
                    mirror_pose_next[0:3, 0:3] = np.dot(self.H, np.dot(self.poses[seq][i_img + 1][0:3, 0:3], self.H))
                    trans_next = transformations.translation_from_matrix(self.poses[seq][i_img + 1])
                    mirror_pose_next[0:3, 3] = np.dot(self.H, trans_next)

                    m_forward = np.dot(np.linalg.inv(self.poses[seq][i_img]), self.poses[seq][i_img + 1])
                    m_forward_mirror = np.dot(np.linalg.inv(mirror_pose), mirror_pose_next)
                    m_reverse = np.dot(np.linalg.inv(self.poses[seq][i_img + 1]), self.poses[seq][i_img])
                    m_reverse_mirror = np.dot(np.linalg.inv(mirror_pose_next), mirror_pose)

                    trans_forward = transformations.translation_from_matrix(m_forward)
                    ypr_forward = transformations.euler_from_matrix(m_forward, axes="rzyx")
                    trans_forward_mirror = transformations.translation_from_matrix(m_forward_mirror)
                    ypr_forward_mirror = transformations.euler_from_matrix(m_forward_mirror, axes="rzyx")
                    trans_reverse = transformations.translation_from_matrix(m_reverse)
                    ypr_reverse = transformations.euler_from_matrix(m_reverse, axes="rzyx")
                    trans_reverse_mirror = transformations.translation_from_matrix(m_reverse_mirror)
                    ypr_reverse_mirror = transformations.euler_from_matrix(m_reverse_mirror, axes="rzyx")

                    self.fc_ground_truth[seq][i_img] = np.concatenate([trans_forward, ypr_forward])
                    self.fc_mirror_ground_truth[seq][i_img] = np.concatenate([trans_forward_mirror, ypr_forward_mirror])
                    self.fc_reverse_ground_truth[seq][i_img] = np.concatenate([trans_reverse, ypr_reverse])
                    self.fc_reverse_mirror_ground_truth[seq][i_img] = np.concatenate(
                            [trans_reverse_mirror, ypr_reverse_mirror])

                    get_imu = seq_loader.get_averaged_imu_in_corresponding_frame
                    self.imu_measurements[seq][i_img] = get_imu(i_img, mirror=False, reverse=False)
                    self.imu_measurements_mirror[seq][i_img] = get_imu(i_img, mirror=True, reverse=False)
                    self.imu_measurements_reverse[seq][i_img] = get_imu(i_img, mirror=False, reverse=True)
                    self.imu_measurements_reverse_mirror[seq][i_img] = get_imu(i_img, mirror=True, reverse=True)

            # How many examples the sequence contains, were it to be processed using a batch size of 1
            sequence_examples = np.ceil((num_frames - self.cfg.timesteps) / self.cfg.sequence_stride).astype(
                    np.int32)
            # how many batches in the sequence, cutting off any extra
            if self.cfg.bidir_aug:
                self.batch_counts[seq] = np.floor(2 * sequence_examples / self.cfg.batch_size).astype(np.int32)
            else:
                self.batch_counts[seq] = np.floor(sequence_examples / self.cfg.batch_size).astype(np.int32)

            self.batch_cnt += self.batch_counts[seq].astype(np.int32)
            self.batch_order[seq] = np.arange(0, self.batch_counts[seq], dtype=np.uint32)
            self.batch_offsets[seq] = np.zeros([self.cfg.batch_size], dtype=np.uint32)
            self.sequence_batch[seq] = 0

            self.total_frames += num_frames

        tools.printf("All data loaded, batch_size=%d, timesteps=%d, num_batches=%d" % (
            self.cfg.batch_size, self.cfg.timesteps, self.batch_cnt))

        gc.collect()
        self.sequence_ordering = np.zeros([self.batch_cnt], dtype=np.uint16)
        self.set_batch_offsets()
        self.next_epoch(False)
示例#20
0
from sklearn import neighbors
from sklearn.model_selection import cross_val_score
import matplotlib.pyplot as plt
from tools import printf  #print with separation
'''
All files need to be converted from R.data format first. The can be done by opening up R console and issuing the following commands:
load("/home/sam/Documents/BDS/APM/AppliedPredictiveModeling/data/segmentationOriginal.RData") ('Route dir of .RData File)
write.csv(segmentationOriginal, file = "/home/sam/Documents/BDS/APM/segmentation_original.csv")
'''

if __name__ == '__main__':

    twoClassData = pd.read_csv('data/twoClassData.csv')
    twoClassData.columns = ['ID', 'PredictorA', 'PredictorB', 'Classes']

    printf('Columns of twoClassData\n\n', twoClassData.columns)
    printf('First five rows of twoClassData\n\n', twoClassData.head())

    predictors = twoClassData[['PredictorA', 'PredictorB']]
    classes = twoClassData.Classes

    # Split arrays or matrices into random train and test subsets. Test size 20%
    predictors_train, predictors_test, classes_train, classes_test = train_test_split(
        predictors, classes, test_size=0.2, random_state=42)
    plt.figure(figsize=(10, 6))
    plt.plot(twoClassData.PredictorA[twoClassData.Classes == 'Class1'],
             twoClassData.PredictorB[twoClassData.Classes == 'Class1'],
             '^r',
             label='$Class$ $1$',
             alpha=0.6)
    plt.plot(twoClassData.PredictorA[twoClassData.Classes == 'Class2'],
示例#21
0
        "components", int_flag, 5
    ),  # number of features (parameters) to reduce to (components isn't descriptive)
    "-f": ("classfirst", bool_flag, False),
    "-i": ("max-iterations", int_flag, 600),
    "-r": ("learn_rate", str_flag,
           "constant"),  # must be one of constant, adaptive, invscaling
    "-s": ("stats", bool_flag, True),  # number of clusters to divide into
    "-S":
    ("solver", str_flag, "all"),  # solver (must be all, adam, lbfgs, or sgd)
    "-t": ("trials", int_flag, "1"),  # number of trials on fitting
}
opts = {}  # map where option values or defaults come back
pparms = parse_args(flags, opts, "training-file [testing-file]")

if pparms == None or len(pparms) < 1:
    printf("missing filename on command line (training data)\n")
    sys.exit(1)

train_fn = pparms[0]  # file names; training validation (test)
#test_fn = pparms[1];

max_iters = opts["max-iterations"]
comps = opts["components"]  # number to reduce features/parameters down to
show_stats = opts["stats"]

if opts["solver"] == "all":  # configure list of solvers based on command line or defaul (all)
    solvers = ["adam", "lbfgs", "sgd"]  # list of solvers that we'll run later
else:
    solvers = [opts["solver"]]

np.random.seed(17)
示例#22
0
import seaborn as sns
from tools import printf  #print with separation
'''
All files need to be converted from R.data format first. The can be done by opening up R console and issuing the following commands:
load("/home/sam/Documents/BDS/APM/AppliedPredictiveModeling/data/segmentationOriginal.RData") ('Route dir of .RData File)
write.csv(segmentationOriginal, file = "/home/sam/Documents/BDS/APM/segmentation_original.csv")
'''

seg_org = pd.read_csv('data/segmentation_original.csv')

# Add name numbering
seg_org = seg_org.set_index('ID')

# Get a skew value for all rows where Case == 'Train'
train_skews = seg_org[seg_org.Case == 'Train'].skew()
printf('Skew of first five columns\n\n', train_skews.head())

train = seg_org[seg_org.Case == 'Train']

# Reduce skew for AreaCh1 column by boxcox transformation
AreaCh1_boxcox = stats.boxcox(train.AreaCh1)

printf('Descriptive statistics for AreaCh1\n\n', train.AreaCh1.describe())

# Do principal component analysis for n components
n_components = 10
pca = PCA(n_components)

# Run singular value decomposition
pca.fit(seg_org.select_dtypes(include=['float64', 'int']))
示例#23
0
def print_configs(cfg):
    for attr in dir(cfg):
        if not callable(getattr(cfg, attr)) and not attr.startswith("__"):
            tools.printf("%s: %s" % (attr, getattr(cfg, attr)))
示例#24
0
    "-N": ("normalise", bool_flag,
           False),  # this seems to hurt dispite what stack abuse says!
    "-f": ("classfirst", bool_flag, False),
    "-i": (
        "iterations", int_flag, 0
    ),  # if set > 0, then we subtract from max-neighbours and use as starting point
    "-l": ("learn_curve", bool_flag, True),  # -l turns off
    "-n": ("max_neighbours", int_flag, 10),
    "-s": ("rstate", int_flag, 100),
    "-t": ("tsize", int_flag, 30)
}
opts = {}  # map where option values or defaults come back
pparms = parse_args(flags, opts, "training-file [testing-file]")

if len(pparms) < 1:
    printf("missing filename on command line\n")
    sys.exit(1)

filen = pparms[0]
tsize = opts[
    "tsize"] / 100  # size of the raw data to resrve for validation (pct)
rstate = opts["rstate"]  # random state value
iterations = opts["iterations"]  # random state value
gen_lc = opts["learn_curve"]
pr_ave = opts["pr_ave"]  # precision/recall average method
max_neighbours = opts["max_neighbours"]
if len(
        pparms
) > 1:  # a second filename assumed to be a separate test data set for validation
    vfilen = pparms[1]
    have_val_data = True
示例#25
0
# if you programme in go, then you recognise the beauty here :)
flags = {  # define possible flags and the default: map key, type, default value
    "-c": (
        "components", int_flag, 5
    ),  # number of features (parameters) to reduce to (components isn't descriptive)
    "-f": ("classfirst", bool_flag, False),
    "-i": ("iterations", int_flag, 10),
    "-k": ("k-clusters", int_flag,
           0),  # override cvalues with a single k-cluster setting
    "-s": ("stats", bool_flag, True),  # number of clusters to divide into
}
opts = {}  # map where option values or defaults come back
pparms = parse_args(flags, opts, "training-file [testing-file]")

if pparms == None or len(pparms) < 2:
    printf("missing filenames on command line (training testing)\n")
    sys.exit(1)

train_fn = pparms[0]  # file names; training validation (test)
test_fn = pparms[1]

clusters = opts["k-clusters"]
comps = opts["components"]  # number to reduce features/parameters down to
show_stats = opts["stats"]

np.random.seed(17)
# -----------------------------------------------------------------------------------------
train_data = pd.read_csv(train_fn, sep=',')  # suck in datasets
test_data = pd.read_csv(test_fn, sep=',')
train_n, train_p = train_data.shape  # number of training instances and parameters
test_n, test_p = test_data.shape
示例#26
0
# -- parse command line and convert to convenience variables -----------------------------------------------
# if you programme in go, then you recognise the beauty here :) 
flags = {                              # define possible flags and the default: map key, type, default value
    "-f": ("classfirst", bool_flag, False),
    "-i": ("max-iterations", int_flag, 600),
    "-k": ("k-clusters", int_flag, 5),          # number of clusters to generate and add as features
    "-r": ("learn_rate", str_flag, "constant"), # must be one of constant, adaptive, invscaling
    "-s": ("stats", bool_flag, True),          # number of clusters to divide into
    "-S": ("solver", str_flag, "all"),         # solver (must be all, adam, lbfgs, or sgd)
    "-t": ("trials", int_flag, "1"),         # number of trials on fitting
}
opts = { }                                    # map where option values or defaults come back
pparms = parse_args( flags, opts, "training-file [testing-file]" )

if pparms == None  or  len( pparms ) < 1 :
    printf( "missing filenames on command line (training testing)\n" )
    sys.exit( 1 )

train_fn = pparms[0]              # file names; training validation (test)

max_iters = opts["max-iterations"]
kclusters = opts["k-clusters"]          # number to reduce features/parameters down to
show_stats = opts["stats"]

if opts["solver"] == "all" :                    # configure list of solvers based on command line or defaul (all)
    solvers = [ "adam", "lbfgs", "sgd" ]        # list of solvers that we'll run later
else :
    solvers = [ opts["solver"] ]

np.random.seed( 17 )
# -----------------------------------------------------------------------------------------
示例#27
0
def km_header(nfeatures):
    printf("\n")
    printf("# features kept: %d\n", nfeatures)
    printf("#%6s %6s %6s %6s %6s %6s %6s %10s %s\n", "Mthod", "ACC", "H**O",
           "COMPL", "VM", "ARAND", "MI", "CH-scr", "N-clus")
示例#28
0
def print_header( ) :
    printf( "# %5s  %6s  %6s  %6s  %5s %5s %5s\n", "Mthd", "Acc", "Prcsn", "Recall", "Elap", "Kclust", "Solvr" )
示例#29
0
# -- parse command line and convert to convenience variables -----------------------------------------------
flags = {  # define possible flags and the default: map key, type, default value
    "-a": ("pr_ave", str_flag, "binary"),
    "-N": ("normalise", bool_flag, True),
    "-f": ("classfirst", bool_flag, False),
    "-k": ("kernel_type", str_flag, "linear"),
    "-l": ("learn_curve", bool_flag,
           True),  # generate learning curve unless -l given
    "-s": ("rstate", int_flag, 100),
    "-t": ("tsize", int_flag, 30)
}
opts = {}  # map where option values or defaults come back
pparms = parse_args(flags, opts, "training-file [testing-file]")

if pparms == None or len(pparms) < 1:
    printf("missing filename on command line\n")
    sys.exit(1)

kernel_type = opts["kernel_type"]
if not kernel_type in valid_ktypes:
    printf("[FAIL] %s is not a valid kernel type for -k parameter\n",
           kernel_type)
    printf("\tvalid types are: linear, polynomial, or RBF\n")
    os.exit(1)

filen = pparms[0]  # raw data file manditory first parameter
pr_ave = opts["pr_ave"]
gen_lc = opts["learn_curve"]
tsize = opts[
    "tsize"] / 100  # size of the raw data to resrve for validation (pct)
rstate = opts["rstate"]  # random state value
示例#30
0
def print_stats( rmethod, acc, elapsed, kclusters, precsn, recall, method ) :
    printf( "  %5s %6.2f%% %6.2f%% %6.2f%% %5ds %5d %5s\n", rmethod, acc*100, precsn*100, recall*100, elapsed, kclusters, method )