Python load_csv示例，learnlarge.util.io.load_csv Python示例

示例#1

0

显示文件

def eval_loss_cpu_thread(tuple_shape):
    global EVAL_LOSS_CPU_IN_QUEUE
    global EVAL_LOSS_GPU_IN_QUEUE

    current_epoch = EPOCH
    meta = load_csv(
        os.path.join(SHUFFLED_ROOT,
                     '{}_{:03d}.csv'.format(OTHER_REF_SET, current_epoch)))
    xy = get_xy(meta)
    ref_tree = KDTree(xy)
    yaw = np.array(meta['yaw'], dtype=float)

    while True:
        t = time()
        original_indices = EVAL_LOSS_CPU_IN_QUEUE.get()
        if not EPOCH == current_epoch:
            current_epoch = EPOCH
            meta = load_csv(
                os.path.join(
                    SHUFFLED_ROOT,
                    '{}_{:03d}.csv'.format(OTHER_REF_SET, current_epoch)))
            xy = get_xy(meta)
            ref_tree = KDTree(xy)
            yaw = np.array(meta['yaw'], dtype=float)

        distances, image_info, used_indices = get_tuple(
            original_indices, tuple_shape, False, meta, xy, yaw, ref_tree)

        if len(image_info) == TUPLES_PER_BATCH * sum(tuple_shape):
            images = load_images(image_info)
            EVAL_LOSS_GPU_IN_QUEUE.put((distances, images), block=True)
        EVAL_LOSS_CPU_IN_QUEUE.task_done()
        print('Loaded eval loss tuples in {}s.'.format(time() - t))

示例#2

0

显示文件

文件： 014_presample_anchors.py 项目： janinethoma/learning1M

def sample_anchors(shuffled_root, cluster_root, out_root, s, mode, r, epoch):
    train_meta = load_csv(os.path.join(shuffled_root, '{}_{}_{:03d}.csv'.format(s, mode, epoch)))
    train_xy = get_xy(train_meta)

    out_file = os.path.join(out_root, '{}_{}_{}_{:03d}.csv'.format(s, mode, r, epoch))
    if not os.path.exists(out_file):

        ref_meata = load_csv(os.path.join(cluster_root, '{}_{}_{}.csv'.format(s, mode, r)))
        ref_xy = get_xy(ref_meata)

        # Sample reference images (random image withing r/2 of reference location)
        ref_tree = KDTree(train_xy)
        ref_neighbors = ref_tree.query_radius(ref_xy, r=1, return_distance=False)
        anchors = [np.random.choice(potential_anchors) for potential_anchors in ref_neighbors]

        np.random.shuffle(anchors)
        anchor_indices = {'idx': anchors}
        save_csv(anchor_indices, out_file)

    else:
        anchor_indices = load_csv(out_file)

    anchor_xy = np.array([train_xy[int(i), :] for i in anchor_indices['idx']])

    out_img = os.path.join(out_root, '{}_{}_{}_{}.png'.format(s, mode, r, epoch))
    plt.clf()
    plt.clf()
    f, (ax1) = plt.subplots(1, 1, sharey=False)
    f.set_figheight(50)
    f.set_figwidth(50)
    ax1.scatter(anchor_xy[:, 0], anchor_xy[:, 1], c=np.arange(len(anchor_xy)))
    plt.savefig(out_img)

示例#3

0

显示文件

def get_l_based_fixed_localization_reference(in_root, out_root, s, r):
    out_txt = os.path.join(out_root, '{}_ref_l_{}.txt'.format(s, int(r)))
    out_csv = os.path.join(out_root, '{}_ref_l_{}.csv'.format(s, int(r)))

    if not os.path.exists(out_csv):
        meta = load_csv(os.path.join(in_root, '{}_ref.csv'.format(s)))  # Not using query locations for this

        l = np.array(meta['l']).reshape(-1, 1)
        ll = np.arange(math.floor(l[-1]), step=r).reshape(-1, 1)

        l_tree = KDTree(l)
        i_l = l_tree.query(ll, return_distance=False, k=1)
        i_l = np.squeeze(i_l)

        save_txt('\n'.join(['{}'.format(i) for i in i_l]), out_txt)

        selected_meta = dict()
        for key in meta.keys():
            selected_meta[key] = [meta[key][i] for i in i_l]

        save_csv(selected_meta, out_csv)

    else:
        selected_meta = load_csv(out_csv)

    out_folder = os.path.join(out_root, '{}_ref_l_{}'.format(s, int(r)))
    if not os.path.exists(out_folder):
        os.makedirs(out_folder)

    for i, (d, f, t) in tqdm(enumerate(zip(selected_meta['date'], selected_meta['folder'], selected_meta['t']))):
        f = int(f)
        img = load_img(img_path((d, f, t)))
        save_img(img, os.path.join(out_folder, '{:04d}_{}_{:02d}_{}.png'.format(i, d, f, t)))

示例#4

0

显示文件

def evaluate_localization(global_step, ref_set_name, query_set_name, mode,
                          out_name, tuple_shape, writer, epoch):
    # Get ref features
    ref_meta = load_csv(
        os.path.join(LOC_REF_ROOT, '{}_{}.csv'.format(ref_set_name,
                                                      EVAL_REF_R)))
    num_ref = len(ref_meta['t'])
    padding = np.zeros(TUPLES_PER_BATCH * sum(tuple_shape) -
                       (num_ref % (TUPLES_PER_BATCH * sum(tuple_shape))),
                       dtype=int)
    ref_image_info = [(ref_meta['date'][i], ref_meta['folder'][i],
                       ref_meta['t'][i])
                      for i in np.concatenate((np.arange(num_ref), padding))]
    ref_features = extract_features(ref_image_info, tuple_shape)
    ref_features = np.array(ref_features[0:num_ref])
    ref_image_info = ref_image_info[0:num_ref]
    ref_xy = get_xy(ref_meta)

    # Get query features
    query_meta = load_csv(
        os.path.join(SHUFFLED_ROOT,
                     '{}_{:03d}.csv'.format(query_set_name, epoch)))
    test_number = (global_step // EVAL_STEP)
    query_indices = np.arange(test_number * NUM_EVAL_QUERIES, (test_number + 1) * NUM_EVAL_QUERIES) \
                    % len(query_meta['t'])
    padding = np.zeros(TUPLES_PER_BATCH * sum(tuple_shape) -
                       (NUM_EVAL_QUERIES %
                        (TUPLES_PER_BATCH * sum(tuple_shape))),
                       dtype=int)
    query_image_info = [(query_meta['date'][i], query_meta['folder'][i],
                         query_meta['t'][i])
                        for i in np.concatenate((query_indices, padding))]
    query_features = np.array(extract_features(
        query_image_info, tuple_shape))[:len(query_indices), :]
    query_xy = np.array(
        [xy for i, xy in enumerate(get_xy(query_meta)) if i in query_indices])

    ref_feature_tree = KDTree(ref_features)
    nearest_latent_dists, nearest_latent_indices = ref_feature_tree.query(
        query_features, k=5)

    ref_xy_tree = KDTree(ref_xy)
    nearest_d_dist, nearest_d_indices = ref_xy_tree.query(query_xy, k=1)

    # CPU part of evaluation is done asynchronously
    worker = Thread(target=evaluate_localization_thread,
                    args=(global_step, mode, nearest_d_dist, nearest_d_indices,
                          nearest_latent_indices, out_name, query_image_info,
                          query_xy, ref_image_info, ref_xy, writer))
    worker.setDaemon(True)
    worker.start()
    return

示例#5

0

显示文件

def create_reference(s):
    date = getattr(sys.modules[__name__], '{}_ref_date'.format(s))
    out_file = os.path.join(out_root, '{}_{}_geodesic.csv'.format(s, date))
    if not os.path.exists(out_file):

        data = load_csv(os.path.join(in_root, 'clean_{}.csv'.format(s)))

        ref_data = dict()
        for key in data.keys():
            ref_data[key] = [
                e for e, d in zip(data[key], data['date']) if d == date
            ]

        ref_xy = [(float(x), float(y))
                  for x, y in zip(ref_data['easting'], ref_data['northing'])]
        ref_d = [0] + [
            math.sqrt((p[0] - q[0])**2 + (p[1] - q[1])**2)
            for p, q in zip(ref_xy[1:], ref_xy[:-1])
        ]
        ref_l = [sum(ref_d[:i]) for i in range(1, len(ref_data['date']) + 1)]

        vmin = min(ref_l)
        vmax = max(ref_l)

        ref_data['l'] = ref_l
        ref_yaw = np.array(ref_data['yaw'], dtype=float)
        plot_results(ref_xy, ref_yaw, ref_l, date, ref_data, s, vmin, vmax)
        save_csv(ref_data, out_file)

示例#6

0

显示文件

文件： 009_plot_statistics.py 项目： janinethoma/learning1M

def get_tags(tag_root):
    tags = dict()
    all_tags = []
    for date in os.listdir(tag_root):
        tags[date] = load_csv(os.path.join(tag_root, date, 'tags.csv'))
        all_tags = list(set(all_tags + tags[date]))
    return tags, all_tags

示例#7

0

显示文件

文件： 004_merge_and_clean.py 项目： janinethoma/learning1M

def merge_dates(in_root, ins_root, out_root):
    # Find all dates with INS data (not all images have ins, but all ins should have images)
    all_dates = sorted(
        os.listdir(ins_root))  # Sort to make sure we always get the same order

    first = True
    all_info = dict()
    for date in all_dates:

        split_file = os.path.join(in_root, '{}.csv'.format(date))
        if not os.path.exists(split_file):
            print('Missing {}.'.format(split_file))
            continue

        date_info = load_csv(split_file)

        # Add date and tags column
        num_entries = len(date_info['easting'])
        rep_date = [date] * num_entries

        date_info['date'] = rep_date

        if first:
            all_info = date_info
            first = False
        else:
            for key in all_info.keys():
                all_info[key] = all_info[key] + date_info[key]

    out_file = os.path.join(out_root, 'merged.csv')
    save_csv(all_info, out_file)

示例#8

0

显示文件

def get_eval_loss(global_step, test_writer, epoch):
    meta = load_csv(
        os.path.join(SHUFFLED_ROOT,
                     '{}_{:03d}.csv'.format(OTHER_REF_SET, epoch)))
    test_number = (global_step // EVAL_STEP)
    actual_num_eval_queries = (NUM_EVAL_QUERIES //
                               TUPLES_PER_BATCH) * TUPLES_PER_BATCH
    test_indices = np.arange(
        test_number * actual_num_eval_queries,
        (test_number + 1) * actual_num_eval_queries) % len(meta['t'])

    batched_indices = np.reshape(test_indices, (-1, TUPLES_PER_BATCH))

    # Start queues
    for index_batch in batched_indices:
        EVAL_LOSS_CPU_IN_QUEUE.put(index_batch)

    # Wait for completion & order output
    EVAL_LOSS_CPU_IN_QUEUE.join()
    EVAL_LOSS_GPU_IN_QUEUE.join()

    eval_losses = list(EVAL_LOSS_GPU_OUT_QUEUE.queue)
    EVAL_LOSS_GPU_OUT_QUEUE.queue.clear()

    if len(eval_losses) > 0:
        summary = tf.Summary()
        loss = np.mean(eval_losses)
        summary.value.add(tag='loss', simple_value=loss)
        log('Other region loss: {}'.format(loss))
        test_writer.add_summary(summary, global_step)
    else:
        log('Evaluated but got no valid losses.')

示例#9

0

显示文件

def downsize_images(task_id, max_side, img_root, ins_root, tar_root, out_img_root, out_root, cams):
    # Find all dates with INS data (not all images have ins, but all ins should have images)
    all_dates = sorted(os.listdir(ins_root))  # Sort to make sure we always get the same order

    date = all_dates[int(task_id) - 1]
    print(date)

    out_file = os.path.join(out_root, 'img_info_{}'.format(max_side), '{}.csv'.format(date))
    if os.path.exists(out_file):
        print('Output already exists.')
        return

    imgs = load_csv(os.path.join(img_root, date, 'stereo.timestamps'), has_header=False, delimiter=' ',
                    keys=['t', 'folder'])
    cam = oxford_camera.CameraModel(cams,
                                    '/stereo/centre/')
    exposures = [0] * len(imgs['t'])
    max_folder = max(np.array(imgs['folder'], dtype=int))

    if date == '2015-09-02-10-37-32':
        max_folder = 4  # Folders 5 and 6 are missing from the website
        imgs['t'] = [t for f, t in zip(imgs['folder'], imgs['t']) if int(f) <= max_folder]
        imgs['folder'] = [f for f in imgs['folder'] if int(f) <= max_folder]

    for folder in range(1, max_folder + 1):
        filename = os.path.join(tar_root, '{}_stereo_centre_{:02d}.tar'.format(date, folder))
        print(filename)
        if not os.path.exists(filename):
            print("MISSING!!")
            save_txt(txt=filename, mode='a', out_file=os.path.join(out_root, 'missing.txt'))

        with tarfile.open(filename) as archive:
            print(archive)
            for entry in archive.getmembers():
                img_name = os.path.basename(entry.name)
                if '.png' not in img_name:
                    continue
                ts = img_name.split('.')[0]
                img_path = entry.name
                with archive.extractfile(archive.getmember(img_path)) as file:
                    timer = time.time()
                    index = imgs['t'].index(ts)  # Assuming that timestamps are not ordered
                    try:
                        img = oxford_image.load_image(file, cam)  # One file has unloadable image...
                        img = resize_img(img, max_side)
                        exposures[index] = sum(np.array(img).flatten())
                        out_img_folder = os.path.join(out_img_root, '{}_stereo_centre_{:02d}'.format(date, folder))
                        if not os.path.exists(out_img_folder):
                            os.makedirs(out_img_folder)
                        out_img_path = os.path.join(out_img_folder, img_name)
                        save_img(img, out_img_path)
                        print('Processed {} in {}s.'.format(ts, time.time() - timer))
                    except:
                        del exposures[index]
                        del imgs['t'][index]
                        del imgs['folder'][index]

    imgs['exposure'] = exposures
    save_csv(imgs, out_file)

示例#10

0

显示文件

def get_splits(task_id, grids, in_root, ins_root, out_root):
    # Find all dates with INS data (not all images have ins, but all ins should have images)
    all_dates = sorted(os.listdir(ins_root))  # Sort to make sure we always get the same order

    date = all_dates[int(task_id) - 1]
    print(date)

    out_file = os.path.join(out_root, '{}.csv'.format(date))
    if os.path.exists(out_file):
        print('Already calculated {}.'.format(out_file))
        return

    xy_file = os.path.join(in_root, '{}.csv'.format(date))
    if not os.path.exists(xy_file):
        print('Missing {}.'.format(xy_file))
        return

    xy = load_csv(xy_file)

    X = [0 if math.isnan(float(e)) else int(float(e) - 619500.0) for e in xy['easting']]
    Y = [0 if math.isnan(float(n)) else int(5736480.0 - float(n)) for n in xy['northing']]

    out_img_grid = os.path.join(out_root, '{}_grid.png'.format(date))
    draw_grid(X, Y, out_img_grid)

    out_img_scatter = os.path.join(out_root, '{}_scatter.png'.format(date))
    plt.clf()
    plt.scatter(np.array(xy['easting'], dtype=float), np.array(xy['northing'], dtype=float),
                c=np.array(xy['yaw'], dtype=float))
    plt.savefig(out_img_scatter)

    for grid_name in grids.keys():

        grid = cv2.imread(grids[grid_name])
        grid = np.asarray(grid, dtype=np.uint8)  # Fix for failing img loading

        in_fold = list()

        for x, y in zip(X, Y):
            if x < 0 or y < 0 or x >= grid.shape[1] or y >= grid.shape[0]:
                in_fold.append(0)
            elif grid[y, x, 0] > 0:  # All color channels are the same
                in_fold.append(1)
            else:
                in_fold.append(0)

        xy[grid_name] = in_fold

    max_assigned = [a1 + a2 + a3 for a1, a2, a3 in zip(xy['train'], xy['test'], xy['val'])]
    assert max(max_assigned) <= 1, 'Please increase in_fold grid threshold.'

    for grid_name in grids.keys():
        X_g = [x for x, in_fold in zip(X, xy[grid_name]) if in_fold == 1]
        Y_g = [y for y, in_fold in zip(Y, xy[grid_name]) if in_fold == 1]
        print('Found {} imgs in {} for {}.'.format(len(X_g), grid_name, date))
        out_img_file = os.path.join(out_root, '{}_{}.png'.format(date, grid_name))
        draw_grid(X_g, Y_g, out_img_file)
    save_csv(xy, out_file)

示例#11

0

显示文件

def train_cpu_thread(tuple_shape):
    global TRAIN_CPU_IN_QUEUE
    global TRAIN_GPU_IN_QUEUE
    global USED_IMAGE_LOCK
    global USED_IMAGES

    current_epoch = EPOCH
    meta = load_csv(
        os.path.join(SHUFFLED_ROOT,
                     '{}_{:03d}.csv'.format(LOCAL_REF_SET, current_epoch)))
    xy = get_xy(meta)
    ref_tree = KDTree(xy)
    yaw = np.array(meta['yaw'], dtype=float)

    while True:
        t = time()
        original_indices = TRAIN_CPU_IN_QUEUE.get()

        if not EPOCH == current_epoch:
            current_epoch = EPOCH
            meta = load_csv(
                os.path.join(
                    SHUFFLED_ROOT,
                    '{}_{:03d}.csv'.format(LOCAL_REF_SET, current_epoch)))
            xy = get_xy(meta)
            ref_tree = KDTree(xy)
            yaw = np.array(meta['yaw'], dtype=float)

        distances, image_info, used_indices = get_tuple(
            original_indices, tuple_shape, True, meta, xy, yaw, ref_tree)

        if len(image_info) == TUPLES_PER_BATCH * sum(tuple_shape):
            images = load_images(image_info)
            TRAIN_GPU_IN_QUEUE.put((distances, images), block=True)
            with USED_IMAGE_LOCK:
                USED_IMAGES.update(used_indices)
        else:
            log('Faulty training batch... ')
            log(image_info)
        TRAIN_CPU_IN_QUEUE.task_done()
        print('Loaded train tuples in {}s.'.format(time() - t))

示例#12

0

显示文件

def get_greedy_fixed_localization_reference(in_root, out_root, s, r):
    out_file = os.path.join(out_root, '{}_greedy_{}_ref.txt'.format(s, r))

    if not os.path.exists(out_file):
        meta = load_csv(os.path.join(in_root, '{}_ref.csv'.format(s)))  # Not using query locations for this

        xy = np.array([(e, n) for e, n in zip(meta['northing'], meta['easting'])], dtype=float)

        ref_ids = greedy(xy, 1)
        print(len(ref_ids))

        save_txt('\n'.join(['{}'.format(i) for i in ref_ids]), os.path.join(out_root))

示例#13

0

显示文件

def infer():
    with tf.Graph().as_default() as graph:
        print("In Graph")

        ops, tuple_shape = build_inference_model()
        sess = restore_weights()

        # For better gpu utilization, loading processes and gpu inference are done in separate threads.
        # Start CPU threads
        num_loader_threads = 6
        for i in range(num_loader_threads):
            worker = Thread(target=cpu_thread)
            worker.setDaemon(True)
            worker.start()

        # Start GPU threads
        worker = Thread(target=gpu_thread, args=(sess, ops))
        worker.setDaemon(True)
        worker.start()

        csv_file = os.path.join(CSV_ROOT, '{}.csv'.format(SET))
        meta = load_csv(csv_file)
        num = len(meta['path'])

        # Clean list
        padding = [0 for i in range(IMAGES_PER_PASS - (num % IMAGES_PER_PASS))]
        image_info = [(meta['path'][i])
                      for i in np.concatenate((np.arange(num),
                                               np.array(padding)))]
        padded_num = len(image_info)

        batched_indices = np.reshape(np.arange(padded_num),
                                     (-1, TUPLES_PER_BATCH * sum(tuple_shape)))
        batched_image_info = np.reshape(
            image_info, (-1, TUPLES_PER_BATCH * sum(tuple_shape)))

        for batch_indices, batch_image_info in zip(batched_indices,
                                                   batched_image_info):
            CPU_IN_QUEUE.put((batch_indices, batch_image_info))

        # Wait for completion & order output
        CPU_IN_QUEUE.join()
        GPU_IN_QUEUE.join()
        feature_pairs = list(GPU_OUT_QUEUE.queue)
        GPU_OUT_QUEUE.queue.clear()
        features = [[]] * padded_num
        for pair in feature_pairs:
            for i, f in zip(pair[0], pair[1]):
                features[i] = f
        features = features[:num]
        save_pickle(
            features,
            os.path.join(OUT_ROOT, '{}_{}.pickle'.format(SET, OUT_NAME)))

示例#14

0

显示文件

文件： 013_cluster_linear.py 项目： janinethoma/learning1M

def cluster(in_root, out_root, s, mode, r):
    out_file = os.path.join(out_root, '{}_{}_{}.pickle'.format(s, mode, r))

    meta_file = os.path.join(in_root, '{}_{}_000.csv'.format(s, mode))
    meta = load_csv(meta_file)

    if not os.path.exists(out_file):

        date = getattr(sys.modules[__name__], '{}_ref_date'.format(s))

        temp_meta = dict()
        for key in meta.keys():
            temp_meta[key] = [
                e for e, d in zip(meta[key], meta['date']) if d in date
            ]

        t_idx = np.argsort(temp_meta['t'])
        date_meta = dict()
        for key in meta.keys():
            date_meta[key] = [temp_meta[key][i] for i in t_idx]

        print(len(date_meta['t']))
        xy = get_xy(date_meta)

        ref_xy = [xy[0, :]]
        ref_idx = [0]
        for i in tqdm(range(len(date_meta['t']))):
            if sum((xy[i, :] - ref_xy[-1])**2) > r**2:
                ref_xy.append(xy[i, :])
                ref_idx.append(i)

        ref_xy = np.array(ref_xy)
        save_pickle([ref_xy, date_meta, ref_idx], out_file)
    else:
        ref_xy, date_meta, ref_idx = load_pickle(out_file)

    print('{}: {}'.format(s, len(ref_idx)))

    out_img = os.path.join(out_root, '{}_{}_{}.png'.format(s, mode, r))
    plt.clf()
    plt.clf()
    f, (ax1) = plt.subplots(1, 1, sharey=False)
    f.set_figheight(50)
    f.set_figwidth(50)
    ax1.scatter(ref_xy[:, 0], ref_xy[:, 1], c=np.arange(len(ref_xy)))
    plt.savefig(out_img)

    out_meta = dict()
    for key in meta.keys():
        out_meta[key] = [date_meta[key][i] for i in ref_idx]

    out_file = os.path.join(out_root, '{}_{}_{}.csv'.format(s, mode, r))
    save_csv(out_meta, out_file)

示例#15

0

显示文件

def shuffle(in_root, out_root, s, mode, num_epochs):
    meta = load_csv(os.path.join(in_root, '{}_{}.csv'.format(
        s, mode)))  # Not using query locations for this
    for e in range(num_epochs):
        out_file = os.path.join(out_root,
                                '{}_{}_{:03d}.csv'.format(s, mode, e))
        if os.path.exists(out_file):
            print('{} exists. Not recalculating.'.format(out_file))
        else:
            print('Shuffling {}.'.format(out_file))
            shuffled_indices = np.random.permutation(len(meta['t']))

            shuffled_meta = dict()
            for key in meta.keys():
                shuffled_meta[key] = [meta[key][i] for i in shuffled_indices]
            save_csv(shuffled_meta, out_file)

示例#16

0

显示文件

文件： 007_set_aside_queries.py 项目： janinethoma/learning1M

def set_aside_queries(in_root, folds, query_dates):
    num_per_fold = dict()

    for fold in folds:
        clean_file = os.path.join(in_root, '{}.csv'.format(fold))
        data = load_csv(clean_file)

        query_out = clean_file.replace(fold, '{}_query'.format(fold))
        ref_out = clean_file.replace(fold, '{}_ref'.format(fold))

        query_data = dict()
        ref_data = dict()

        for key in data.keys():
            query_data[key] = [el for el, date in zip(data[key], data['date']) if date in query_dates]
            ref_data[key] = [el for el, date in zip(data[key], data['date']) if date not in query_dates]

        num_per_fold['{}_query'.format(fold)] = len(query_data['t'])
        num_per_fold['{}_ref'.format(fold)] = len(ref_data['t'])
        save_csv(query_data, query_out)
        save_csv(ref_data, ref_out)
    save_csv(num_per_fold, os.path.join(in_root, 'num_per_fold.csv'))

示例#17

0

显示文件

def merge_parametrized(in_root, folds, cols_to_keep, out_root):
    files = os.listdir(in_root)

    meta_info = dict()

    full_data = dict()
    for c in cols_to_keep:
        full_data[c] = []

    for fold in folds:
        data = dict()
        date_count = dict()
        for c in cols_to_keep:
            data[c] = []

        fold_files = [f for f in files if f.split('_')[0] == fold]
        for file in fold_files:
            if '.csv' in file:
                date_data = load_csv(os.path.join(in_root, file))
                if len(
                        date_data['t']
                ) < 100:  # Very few files indicate bad l alignment or bad ins estimates
                    continue

                for c in cols_to_keep:
                    data[c].extend(date_data[c])
                    full_data[c].extend(date_data[c])
                date_count[file.split('_')[1]] = len(date_data['t'])
        out_file = os.path.join(out_root, '{}.csv'.format(fold))
        save_csv(data, out_file)
        meta_info[fold] = len(data['t'])
        save_csv(date_count,
                 os.path.join(out_root, '{}_date_count.csv'.format(fold)))
    out_file = os.path.join(out_root, 'full.csv')
    save_csv(full_data, out_file)
    meta_info['full'] = len(full_data['t'])
    save_csv(meta_info, os.path.join(out_root, 'meta.csv'))

示例#18

0

显示文件

def train_one_epoch(sess, epoch, writers, saver, part_saver, tuple_shape):
    global GLOBAL_STEP
    global GLOBAL_STEP_LOCK
    global CACHED_FEATURE_LOCK
    global CACHED_FEATURE_INDICES
    global CACHED_FEATURES
    global CACHED_FEATURE_TREE
    global USED_IMAGE_LOCK
    global USED_IMAGES
    global REF_FEATURE_LOCK
    global REF_FEATURES
    global TRAIN_XY
    global TRAIN_XY_LOCK

    train_meta = load_csv(
        os.path.join(SHUFFLED_ROOT,
                     '{}_{:03d}.csv'.format(LOCAL_REF_SET, epoch)))
    train_xy = get_xy(train_meta)
    with TRAIN_XY_LOCK:
        TRAIN_XY = train_xy

    anchor_indices = np.array(load_csv(
        os.path.join(
            ANCHOR_ROOT, '{}_{}_{:03d}.csv'.format(LOCAL_REF_SET, TRAIN_REF_R,
                                                   epoch)))['idx'],
                              dtype=int)

    mining_count = 0
    for step in np.arange(len(anchor_indices), step=TUPLES_PER_BATCH):
        print(step)

        if step % EVAL_STEP == 0:
            TRAIN_CPU_IN_QUEUE.join()
            TRAIN_GPU_IN_QUEUE.join()
            log('EVALUATING')
            with GLOBAL_STEP_LOCK:
                global_step = GLOBAL_STEP  # Some steps produce invalid tuples, and are therefore skipped

            save_path = saver.save(sess,
                                   os.path.join(OUT_DIR, "checkpoint"),
                                   global_step=global_step)
            out_name = '{:02d}_{}'.format(epoch, os.path.basename(save_path))

            # Get loss for other region
            log('Calculating test loss.')
            get_eval_loss(global_step, writers['other'], epoch)

            # Test localization on other region
            evaluate_localization(global_step, OTHER_REF_SET, OTHER_QUERY_SET,
                                  'other', out_name, tuple_shape,
                                  writers['other'], epoch)

            # Evaluate localization on training region
            evaluate_localization(global_step, LOCAL_REF_SET, LOCAL_QUERY_SET,
                                  'local', out_name, tuple_shape,
                                  writers['local'], epoch)

        if step % MINING_STEP == 0:
            TRAIN_CPU_IN_QUEUE.join()
            TRAIN_GPU_IN_QUEUE.join()
            log('Caching features for hard negative mining.')

            mining_indices = np.arange(
                mining_count * MINING_CACHE_SIZE,
                (mining_count + 1) * MINING_CACHE_SIZE) % len(train_meta['t'])
            anchors_to_mine = np.array(anchor_indices[step:np.min(
                [step + MINING_STEP, len(anchor_indices)])])
            mining_indices = np.concatenate([mining_indices, anchors_to_mine])
            num_to_mine = len(mining_indices)
            padding = np.zeros(TUPLES_PER_BATCH * sum(tuple_shape) -
                               (num_to_mine %
                                (TUPLES_PER_BATCH * sum(tuple_shape))),
                               dtype=int)
            image_info = [(train_meta['date'][i], train_meta['folder'][i],
                           train_meta['t'][i])
                          for i in np.concatenate((mining_indices, padding))]
            with CACHED_FEATURE_LOCK:
                CACHED_FEATURES = np.array(
                    extract_features(image_info, tuple_shape)[:num_to_mine])
                CACHED_FEATURE_INDICES = mining_indices
                CACHED_FEATURE_TREE = KDTree(CACHED_FEATURES)
            mining_count = mining_count + 1

        if step % SAVE_STEP == 0:
            TRAIN_CPU_IN_QUEUE.join()
            TRAIN_GPU_IN_QUEUE.join()
            with GLOBAL_STEP_LOCK:
                global_step = GLOBAL_STEP  # Some steps produce invalid tuples, and are therefore skipped
            log('Saving model.')
            part_saver.save(sess,
                            os.path.join(OUT_DIR, "part-checkpoint"),
                            global_step=global_step)

        # Train one step:
        TRAIN_CPU_IN_QUEUE.put(anchor_indices[step:step + TUPLES_PER_BATCH])

    # Finish training at end of epoch
    TRAIN_CPU_IN_QUEUE.join()
    TRAIN_GPU_IN_QUEUE.join()

示例#19

0

显示文件

mkdir(out_root)

# Oxford
place = 'oxford'


def img_path(info):
    date = info[0]
    folder = info[1]
    t = info[2]
    return os.path.join('datasets/oxford_512', '{}_stereo_centre_{:02d}'.format(date, int(folder)), '{}.png'.format(t))


# Preselected reference
preselected_ref = os.path.join(fs_root(), 'data/learnlarge/shuffled/train_ref_000.csv')
p_meta = load_csv(preselected_ref)
p_meta['path'] = [img_path((d, f, t)) for d, f, t in
                  zip(p_meta['date'], p_meta['folder'], p_meta['t'])]
idxs_to_keep = np.linspace(0, len(p_meta['path']), num=N_SAMPLES, endpoint=False, dtype=int)
for key in p_meta.keys():
    p_meta[key] = [p_meta[key][i] for i in idxs_to_keep]
save_csv(p_meta, os.path.join(out_root, '{}_pca.csv'.format(place)))

# Cold
place = 'cold'


def parse_cold_folder(path, pattern):
    all_files = get_recursive_file_list(path, pattern)
    all_files, TXYA = parse_file_list(all_files)

示例#20

0

显示文件

文件： 004_any_grad_cam.py 项目： janinethoma/learning1M

def get_grad_cam():
    with tf.Graph().as_default() as graph:
        print("In Graph")

        ops, tuple_shape = build_inference_model()
        sess = restore_weights()

        print('\n'.join([n.name for n in tf.all_variables()]))

        # For better gpu utilization, loading processes and gpu inference are done in separate threads.
        # Start CPU threads
        num_loader_threads = 3
        for i in range(num_loader_threads):
            worker = Thread(target=cpu_thread)
            worker.setDaemon(True)
            worker.start()

            worker = Thread(target=save_thread)
            worker.setDaemon(True)
            worker.start()

        # Start GPU threads
        worker = Thread(target=gpu_thread, args=(sess, ops))
        worker.setDaemon(True)
        worker.start()

        ref_meta = load_csv(REF_CSV)
        query_meta = load_csv(QUERY_CSV)
        ref_xy = get_xy(ref_meta)
        query_xy = get_xy(query_meta)

        [top_i, top_g_dists, top_f_dists, gt_i, gt_g_dist,
         ref_idx] = load_pickle(TOP_N_PICKLE)
        top_n = np.array(top_i)

        num = len(query_meta['path'])
        # Fewer queries for speed
        last_xy = query_xy[0, :]
        selected = [0]
        if QUERY_CSV.startswith('pittsburgh'):
            selected = np.linspace(0, num, 500, dtype=int)
        else:
            if 'freiburg' in QUERY_CSV:
                r = 0.5
            else:
                r = 2
            for i in range(num):
                if sum((query_xy[i, :] - last_xy)**2) > r**2:
                    last_xy = query_xy[i, :]
                    selected.append(i)

            selected = np.array(selected, dtype=int)

        xy_dists = pairwise_distances(query_xy, ref_xy, metric='euclidean')

        # Clean list
        image_info = [(query_meta['path'][i], ref_meta['path'][top_n[i, 0]])
                      for i in selected]
        image_dist = [(np.linalg.norm(query_xy[i] - ref_xy[top_n[i, 0]]))
                      for i in selected]

        batched_indices = np.reshape(selected, (-1, TUPLES_PER_BATCH))
        batched_image_info = np.reshape(image_info, (-1, TUPLES_PER_BATCH * 2))
        batched_distances = np.reshape(image_dist, (-1, TUPLES_PER_BATCH))

        for batch_indices, batch_image_info, batched_distance in zip(
                batched_indices, batched_image_info, batched_distances):
            CPU_IN_QUEUE.put(
                (batch_indices, batch_image_info, batched_distance))

        # Wait for completion & order output
        CPU_IN_QUEUE.join()
        GPU_IN_QUEUE.join()
        GPU_OUT_QUEUE.join()

示例#21

0

显示文件

文件： top-n.py 项目： janinethoma/soft_contrastive_learning

def get_top_n():
    # check if complete:
    ld_checkpoints = get_checkpoints('obm')

    ld_cp_names = []
    for cp in ld_checkpoints:
        cp_name = cp.split('/')[-2]
        cp_name = ''.join(os.path.basename(cp_name).split('.'))  # Removing '.'
        cp_name += '_e{}'.format(cp[-1])
        ld_cp_names.append(cp_name)

    if any([x in QUERY_LV_PICKLE for x in ld_cp_names]):
        L = [0.0, 0.3, 1.0, 5.0]
        D = [64, 128, 256, 512, 1024, 2048, 4096]
    else:
        L = [0.0]
        D = [256]

    complete = True
    for l in L:
        for d in D:

            out_folder = os.path.join(OUT_ROOT, 'l{}_dim{}'.format(l, d))
            name = ''.join(os.path.basename(QUERY_LV_PICKLE).split('.')[:-1])
            out_pickle = os.path.join(out_folder, '{}.pickle'.format(name))

            if not os.path.exists(out_pickle):
                complete = False
                break
        if not complete:
            break

    if complete:
        print('Skipping complete {}'.format(QUERY_LV_PICKLE))
        return

    ref_meta = load_csv(REF_CSV)
    query_meta = load_csv(QUERY_CSV)
    full_ref_xy = get_xy(ref_meta)
    full_query_xy = get_xy(query_meta)
    num_q = full_query_xy.shape[0]

    pca_f = np.array(load_pickle(PCA_LV_PICKLE))
    full_ref_f = np.array(load_pickle(REF_LV_PICKLE))
    full_query_f = np.array(load_pickle(QUERY_LV_PICKLE))

    full_xy_dists = pairwise_distances(full_query_xy,
                                       full_ref_xy,
                                       metric='euclidean')

    for d in D:

        print(d)
        pca = PCA(whiten=True, n_components=d)
        pca = pca.fit(pca_f)
        pca_ref_f = pca.transform(full_ref_f)
        pca_query_f = pca.transform(full_query_f)

        for l in L:
            print(l)

            out_folder = os.path.join(OUT_ROOT, 'l{}_dim{}'.format(l, d))
            mkdir(out_folder)
            name = ''.join(os.path.basename(QUERY_LV_PICKLE).split('.')[:-1])
            out_pickle = os.path.join(out_folder, '{}.pickle'.format(name))

            if os.path.exists(out_pickle):
                print('{} already exists. Skipping.'.format(out_pickle))
                continue

            ref_idx = [0]
            for i in range(len(full_ref_xy)):
                if sum((full_ref_xy[i, :] - full_ref_xy[ref_idx[-1], :])**
                       2) >= l**2:
                    ref_idx.append(i)

            if len(ref_idx) < N:
                continue

            ref_f = np.array([pca_ref_f[i, :] for i in ref_idx])
            xy_dists = np.array([full_xy_dists[:, i]
                                 for i in ref_idx]).transpose()

            print('Building tree')
            ref_tree = KDTree(ref_f)

            print('Retrieving')
            top_f_dists, top_i = np.array(
                ref_tree.query(pca_query_f,
                               k=N,
                               return_distance=True,
                               sort_results=True))
            top_f_dists = np.array(top_f_dists)
            top_i = np.array(top_i, dtype=int)

            top_g_dists = [[xy_dists[q, r] for r in top_i[q, :]]
                           for q in range(num_q)]

            gt_i = np.argmin(xy_dists, axis=1)
            gt_g_dist = np.min(xy_dists, axis=1)

            # Translate to original indices
            top_i = [[ref_idx[r] for r in top_i[q, :]] for q in range(num_q)]
            gt_i = [ref_idx[r] for r in gt_i]

            save_pickle(
                [top_i, top_g_dists, top_f_dists, gt_i, gt_g_dist, ref_idx],
                out_pickle)

示例#22

0

显示文件

def get_top_n():
    ref_meta = load_csv(REF_CSV)
    query_meta = load_csv(QUERY_CSV)
    full_ref_xy = get_xy(ref_meta)
    full_query_xy = get_xy(query_meta)
    num_q = full_query_xy.shape[0]

    pca_f = np.array(load_pickle(PCA_LV_PICKLE))
    full_ref_f = np.array(load_pickle(REF_LV_PICKLE))
    full_query_f = np.array(load_pickle(QUERY_LV_PICKLE))

    full_xy_dists = pairwise_distances(full_query_xy,
                                       full_ref_xy,
                                       metric='euclidean')

    for d in DIMS:

        print(d)
        pca = PCA(whiten=True, n_components=d)
        pca = pca.fit(pca_f)
        pca_ref_f = pca.transform(full_ref_f)
        pca_query_f = pca.transform(full_query_f)

        for l in L:
            print(l)

            out_folder = os.path.join(OUT_ROOT, 'l{}_dim{}'.format(l, d))
            mkdir(out_folder)
            name = ''.join(os.path.basename(QUERY_LV_PICKLE).split('.')[:-1])
            out_pickle = os.path.join(out_folder, '{}.pickle'.format(name))

            if os.path.exists(out_pickle):
                print('{} already exists. Skipping.'.format(out_pickle))
                continue

            ref_idx = [0]
            for i in range(len(full_ref_xy)):
                if sum((full_ref_xy[i, :] - full_ref_xy[ref_idx[-1], :])**
                       2) >= l**2:
                    ref_idx.append(i)

            if len(ref_idx) < N:
                continue

            ref_f = np.array([pca_ref_f[i, :] for i in ref_idx])
            xy_dists = np.array([full_xy_dists[:, i]
                                 for i in ref_idx]).transpose()

            print('Building tree')
            ref_tree = KDTree(ref_f)

            print('Retrieving')
            top_f_dists, top_i = np.array(
                ref_tree.query(pca_query_f,
                               k=N,
                               return_distance=True,
                               sort_results=True))
            top_f_dists = np.array(top_f_dists)
            top_i = np.array(top_i, dtype=int)

            top_g_dists = [[xy_dists[q, r] for r in top_i[q, :]]
                           for q in range(num_q)]

            gt_i = np.argmin(xy_dists, axis=1)
            gt_g_dist = np.min(xy_dists, axis=1)

            # Translate to original indices
            top_i = [[ref_idx[r] for r in top_i[q, :]] for q in range(num_q)]
            gt_i = [ref_idx[r] for r in gt_i]

            save_pickle(
                [top_i, top_g_dists, top_f_dists, gt_i, gt_g_dist, ref_idx],
                out_pickle)

示例#23

0

显示文件

def interpolate_xy(task_id, in_root, ins_root, out_root):
    # Find all dates with INS data (not all images have ins, but all ins should have images)
    all_dates = sorted(
        os.listdir(ins_root))  # Sort to make sure we always get the same order

    date = all_dates[int(task_id) - 1]

    out_file = os.path.join(out_root, '{}.csv'.format(date))
    if os.path.exists(out_file):
        # print('Already calculated {}.'.format(out_file))
        return

    imgs_file = os.path.join(in_root, '{}.csv'.format(date))
    if not os.path.exists(imgs_file):
        print('Missing {}: {}.'.format(task_id, imgs_file))
        return

    imgs = load_csv(imgs_file)
    ins = load_csv(os.path.join(ins_root, date, 'gps', 'ins.csv'))

    ins_ts = np.array(ins['timestamp'], dtype=int).reshape(
        (-1, 1))  # num_samples x num_features
    img_ts = np.array(imgs['t'], dtype=int).reshape((-1, 1))
    northing = np.array(ins['northing'], dtype=float)
    easting = np.array(ins['easting'], dtype=float)
    yaw = np.array(ins['yaw'], dtype=float)  # Yaw range: 0-2pi
    status = ins['ins_status']

    # Ins measures are roughly 3 times more frequent than images
    mean_td_img = np.mean(
        [img_ts[i, 0] - img_ts[i - 1, 0] for i in range(1, img_ts.shape[0])])
    mean_td_ins = np.mean(
        [ins_ts[i, 0] - ins_ts[i - 1, 0] for i in range(1, ins_ts.shape[0])])
    print('Found {} times more ins measures than images.'.format(mean_td_img /
                                                                 mean_td_ins))
    print('The mean time between ins measures is {}.'.format(mean_td_ins))
    print('The mean time between img measures is {}.'.format(mean_td_img))

    ins_ts_tree = KDTree(ins_ts)
    d_closest, i_closest = ins_ts_tree.query(img_ts, 2)

    img_northing = [
        lin_ip(northing[i_c[0]], northing[i_c[1]], d_c[0], d_c[1])
        for d_c, i_c in zip(d_closest, i_closest)
    ]
    img_easting = [
        lin_ip(easting[i_c[0]], easting[i_c[1]], d_c[0], d_c[1])
        for d_c, i_c in zip(d_closest, i_closest)
    ]

    img_yaw = [
        lin_ip(yaw[i_c[0]], yaw[i_c[1]], d_c[0], d_c[1]) % (2 * pi)
        for d_c, i_c in zip(d_closest, i_closest)
    ]  # Yaw range: 0-2pi

    # Remove interpolations of unclean ins states
    ins_good = [0] * len(img_easting)
    for j, i_c in enumerate(i_closest):
        if status[i_c[0]] == 'INS_SOLUTION_GOOD' and status[
                i_c[1]] == 'INS_SOLUTION_GOOD':
            ins_good[j] = 1

    imgs['northing'] = img_northing
    imgs['easting'] = img_easting
    imgs['ins_good'] = ins_good
    imgs['yaw'] = img_yaw

    ic1 = [i_c[0] for i_c in i_closest]
    ic2 = [i_c[1] for i_c in i_closest]
    tn1 = [ins_ts[i, 0] for i in ic1]
    tn2 = [ins_ts[i, 0] for i in ic2]

    imgs['ic1'] = ic1  # Index of closest ins point
    imgs['ic2'] = ic2
    imgs['tn1'] = tn1  # Timestamp of closest ins point
    imgs['tn2'] = tn2

    save_csv(imgs, out_file)

示例#24

0

显示文件


def img_path(info):
    date = info[0]
    folder = info[1]
    t = info[2]
    return os.path.join('datasets/oxford',
                        '{}_stereo_centre_{:02d}'.format(date, int(folder)),
                        '{}.png'.format(t))


# Preselected reference
preselected_ref = os.path.join(
    fs_root(), 'data/learnlarge/clean_merged_parametrized/test.csv')
ref_date = '2014-12-02-15-30-08'
p_meta = load_csv(preselected_ref)
for key in p_meta.keys():
    p_meta[key] = [
        e for e, d in zip(p_meta[key], p_meta['date']) if d == ref_date
    ]
p_meta['path'] = [
    img_path((d, f, t))
    for d, f, t in zip(p_meta['date'], p_meta['folder'], p_meta['t'])
]
ref_xy = get_xy(p_meta)
save_csv(p_meta, os.path.join(list_out_root, '{}_ref.csv'.format(place)))

ax = axs[0, 2]
ax.plot(ref_xy[:, 0],
        ref_xy[:, 1],
        label='{} overcast reference images'.format(len(ref_xy)),

示例#25

0

显示文件

文件： 004_merge_and_clean.py 项目： janinethoma/learning1M

def clean(in_root, out_root, folds, cols_to_keep):
    merged_file = os.path.join(in_root, 'merged.csv')
    meta_file = os.path.join(out_root, 'meta.csv')
    meta_info = dict()

    merged = load_csv(merged_file)

    # Original number of imgs
    meta_info['total_imgs'] = len(merged['exposure'])

    # Valid ins
    valid_ins = np.array(merged['ins_good'], dtype=int)
    meta_info['valid_ins'] = sum(valid_ins)

    # Valid location on grid
    valid_grid = np.array(merged['full'], dtype=int)
    meta_info['valid_grid'] = sum(valid_grid)

    # Analise and clean exposure
    # Visual inspection shows that images below 50'000'000 are very dark and above 110'000'000 very light
    exposures = np.array(merged['exposure'], dtype=float)
    low_exposure = np.percentile(exposures, 1)
    high_exposure = np.percentile(exposures, 99)
    print('Lo: {} \nHi: {}'.format(low_exposure, high_exposure))

    plt.clf()
    plt.hist(exposures, bins=10000, histtype='step')
    plt.xticks(rotation=90)
    plt.savefig(os.path.join(out_root, 'exposures.pdf'))

    valid_exposure = [
        1 if low_exposure < e < high_exposure else 0 for e in exposures
    ]
    meta_info['valid_exposures'] = sum(valid_exposure)

    # Manual cleaning
    valid_date = [1 if d not in bad_dates else 0 for d in merged['date']]
    meta_info['valid_date'] = sum(valid_date)

    # Get fully valid
    fully_valid = np.array(valid_exposure) * np.array(valid_grid) * np.array(
        valid_ins) * np.array(valid_date)
    meta_info['fully_valid'] = sum(fully_valid)

    # Save for different folds
    for fold in folds:
        fold_valid = np.array(fully_valid) * np.array(merged[fold], dtype=int)
        meta_info['valid_{}'.format(fold)] = sum(fold_valid)

        out_data = dict()
        for col in cols_to_keep:
            out_col = [e for e, v in zip(merged[col], fold_valid) if v == 1]
            out_data[col] = out_col
        clean_file = os.path.join(out_root, 'clean_{}.csv'.format(fold))
        save_csv(out_data, clean_file)

        # Plot fold exposure:
        fold_exposure = [e for e, v in zip(exposures, fold_valid) if v == 1]
        plt.clf()
        plt.hist(fold_exposure, bins=10000, histtype='step')
        plt.xticks(rotation=90)
        plt.savefig(os.path.join(out_root, 'exposures_{}.pdf'.format(fold)))

    save_csv(meta_info, meta_file)
    dict_to_bar(meta_info, os.path.join(out_root, 'meta_info.pdf'))

示例#26

0

显示文件

    if not os.path.exists(out_root):
        os.makedirs(out_root)

    if not os.path.exists(log_root):
        os.makedirs(log_root)

    settings = list()

    # Get settings:
    sets = ['train', 'test', 'val']
    for s in sets:
        dates = sorted(
            list(
                set(
                    load_csv(os.path.join(in_root,
                                          'clean_{}.csv'.format(s)))['date'])))
        for date in dates:
            if not (s == 'val'
                    and date in ['2014-05-14-13-59-05', '2014-05-14-13-53-47'
                                 ]):  # Wrong direction
                settings.append((s, date))

    if task_id == -1:
        for s in sets:
            create_reference(s)
        create_array_job(len(settings), log_root)
    else:
        setting = settings[task_id - 1]
        parametrize(s=setting[0], date=setting[1])

示例#27

0

显示文件

文件： 008_clean_parametrization.py 项目： janinethoma/learning1M

def clean_parametrization(in_root, folds, cols_to_keep, out_root):
    full_data = dict()
    full_ref_data = dict()
    full_query_data = dict()

    for key in cols_to_keep:
        full_data[key] = []
        full_ref_data[key] = []
        full_query_data[key] = []

    meta = dict()
    for s in folds:
        ref_data = load_csv(os.path.join(in_root, '{}_ref.csv'.format(s)))
        query_data = load_csv(os.path.join(in_root, '{}_query.csv'.format(s)))  # Not used to detect ref outliers

        for key in ['l', 'northing', 'easting']:
            ref_data[key] = np.array(ref_data[key], dtype=float)
            query_data[key] = np.array(query_data[key], dtype=float)

        l_max = max(ref_data['l'])
        num_bins = math.ceil(l_max)

        ref_member_path = os.path.join(out_root, '{}_ref_bin_raw_members.pickle'.format(s))
        if not os.path.exists(ref_member_path):
            bin_members = [[i for i in range(len(ref_data['t'])) if math.floor(ref_data['l'][i]) == j] for j in
                           tqdm(range(num_bins))]
            save_pickle(bin_members, ref_member_path)
        else:
            bin_members = load_pickle(ref_member_path)

        ref_bin_xy_path = os.path.join(out_root, '{}_ref_bin_raw_xy.pickle'.format(s))
        if not os.path.exists(ref_bin_xy_path):
            ref_bin_xy = [
                np.median(np.array([[ref_data['easting'][i], ref_data['northing'][i]] for i in bin_members[j]]),
                          axis=0) if len(
                    bin_members[j]) else np.array([-1, -1]) for j
                in tqdm(range(num_bins))]
            save_pickle(ref_bin_xy, ref_bin_xy_path)
        else:
            ref_bin_xy = load_pickle(ref_bin_xy_path)

        meta['{}_ref'.format(s)], clean_ref_data = find_and_remove_errors('ref', out_root, ref_bin_xy, ref_data, s)

        # Cleaning query files to allow for more efficient testing, should not influence performance
        # (other than possibly excluding faulty gps/ins 'ground truth', which we don't want anyways)
        meta['{}_query'.format(s)], clean_query_data = find_and_remove_errors('query', out_root, ref_bin_xy, query_data,
                                                                              s)

        fold_clean_data = dict()
        for key in clean_ref_data.keys():
            fold_clean_data[key] = []

            fold_clean_data[key].extend(clean_ref_data[key])
            fold_clean_data[key].extend(clean_query_data[key])

            full_data[key].extend(clean_ref_data[key])
            full_data[key].extend(clean_query_data[key])

            full_query_data[key].extend(clean_ref_data[key])
            full_ref_data[key].extend(clean_query_data[key])

        save_csv(fold_clean_data, os.path.join(out_root, '{}.csv'.format(s)))

    save_csv(full_data, os.path.join(out_root, 'full.csv'.format(s)))
    save_csv(full_ref_data, os.path.join(out_root, 'full_ref.csv'.format(s)))
    save_csv(full_query_data, os.path.join(out_root, 'full_query.csv'.format(s)))

    save_csv(meta, os.path.join(out_root, 'meta.csv'))

示例#28

0

显示文件

def parametrize(s, date):
    ref_date = getattr(sys.modules[__name__], '{}_ref_date'.format(s))
    ref_file = os.path.join(out_root, '{}_{}_geodesic.csv'.format(s, ref_date))

    data = load_csv(os.path.join(in_root, 'clean_{}.csv'.format(s)))

    ref_data = load_csv(ref_file)
    ref_xy = [(float(x), float(y))
              for x, y in zip(ref_data['easting'], ref_data['northing'])]

    ref_l = np.array(ref_data['l'], dtype=float)
    ref_yaw = np.array(ref_data['yaw'], dtype=float)

    ref_tree = KDTree(np.array(ref_xy))

    vmin = min(ref_l)
    vmax = max(ref_l)

    date_data = dict()
    for key in data.keys():
        date_data[key] = [
            e for e, d in zip(data[key], data['date']) if d == date
        ]
    date_xy = [(float(x), float(y))
               for x, y in zip(date_data['easting'], date_data['northing'])]
    date_d = [0] + [
        math.sqrt((p[0] - q[0])**2 + (p[1] - q[1])**2)
        for p, q in zip(date_xy[1:], date_xy[:-1])
    ]
    date_l = [sum(date_d[:i]) for i in range(1, len(date_d) + 1)]
    date_yaw = np.array(date_data['yaw'], dtype=float)

    matched_l = np.zeros(len(date_yaw))
    matchable = []
    r = 20
    if s == 'val':
        r = 100

    date_ni, date_nd = ref_tree.query_radius(np.array(date_xy),
                                             r=100,
                                             return_distance=True,
                                             sort_results=True)

    current_l = 0
    latest_valid = 0

    for j, (yaw, ni, nd) in enumerate(zip(date_yaw, date_ni, date_nd)):

        if len(ni) < 2:
            continue

        angle_neighbors = [
            i for i in range(len(ni))
            if abs(yaw - ref_yaw[ni[i]]) % (2 * math.pi) < math.pi / 3
        ]

        ni = [ni[i] for i in angle_neighbors]
        nd = [nd[i] for i in angle_neighbors]

        if len(ni) < 2:
            continue

        potential_l = np.array([ref_l[i] for i in ni])

        if j == 0:
            threshold = 40
            if s == 'val':
                threshold = 5

            km = KMeans(n_clusters=2,
                        random_state=0).fit(potential_l.reshape(-1, 1))
            if abs(km.cluster_centers_[0] -
                   km.cluster_centers_[1]) > threshold:
                closest_center = km.predict(
                    np.array(current_l).reshape(-1, 1))[0]
                assignments = km.labels_
                l_neighbors = [
                    i for i, a in zip(range(len(ni)), assignments)
                    if a == closest_center
                ]
            else:
                l_neighbors = range(len(ni))
        else:
            l_neighbors = [
                i for i, l in enumerate(potential_l)
                if abs(current_l - date_l[latest_valid] + date_l[j] - l) < 500
            ]
        ni = [ni[i] for i in l_neighbors]
        nd = [nd[i] for i in l_neighbors]

        if len(ni) < 2:
            continue

        interp_l = lin_ip(ref_l[ni[0]], ref_l[ni[1]], nd[0], nd[1])
        current_l = interp_l
        latest_valid = j
        matched_l[j] = interp_l
        print(interp_l)
        matchable.append(j)

    if len(matchable) > 0:
        date_data['l'] = matched_l
        for key in ref_data.keys():
            date_data[key] = [date_data[key][i] for i in matchable]
        plot_results(date_xy, date_yaw, date_l, date, date_data, s, vmin, vmax)
        out_file = os.path.join(out_root, '{}_{}_geodesic.csv'.format(s, date))
        save_csv(date_data, out_file)

示例#29

0

显示文件

文件： 009_plot_statistics.py 项目： janinethoma/learning1M

def plot_statistics(in_root, out_root, folds, tag_root):
    date_tags, all_tags = get_tags(tag_root)

    for fold in folds:
        print('Plotting {} statistics.'.format(fold))
        clean_file = os.path.join(in_root, '{}.csv'.format(fold))
        data = load_csv(clean_file)

        # Images per date
        images_per_date = Counter(data['date'])
        save_csv(images_per_date, os.path.join(out_root, 'images_per_date_{}.csv'.format(fold)))
        dict_to_bar(images_per_date, os.path.join(out_root, 'images_per_date_{}.pdf'.format(fold)))

        # Images/dates per tag, month and hour
        images_per_tag = dict.fromkeys(all_tags, 0)
        images_per_month = dict.fromkeys(range(1, 13), 0)
        images_per_hour = dict.fromkeys(range(0, 24), 0)

        dates_per_tag = dict.fromkeys(all_tags, 0)
        dates_per_month = dict.fromkeys(range(1, 13), 0)
        dates_per_hour = dict.fromkeys(range(0, 24), 0)

        for date in images_per_date.keys():
            month = int(date[5:7])
            hour = int(date[11:13])
            images_per_month[month] = images_per_date[date] + images_per_month[month]
            images_per_hour[hour] = images_per_date[date] + images_per_hour[hour]

            dates_per_month[month] = 1 + dates_per_month[month]
            dates_per_hour[hour] = 1 + dates_per_hour[hour]
            for tag in date_tags[date]:
                images_per_tag[tag] = images_per_date[date] + images_per_tag[tag]
                dates_per_tag[tag] = 1 + dates_per_tag[tag]

        save_csv(images_per_tag, os.path.join(out_root, 'images_per_tag_{}.csv'.format(fold)))
        dict_to_bar(images_per_tag, os.path.join(out_root, 'images_per_tag_{}.pdf'.format(fold)))

        save_csv(images_per_tag, os.path.join(out_root, 'images_per_tag_{}.csv'.format(fold)))
        dict_to_bar(images_per_tag, os.path.join(out_root, 'images_per_tag_{}.pdf'.format(fold)))

        save_csv(images_per_month, os.path.join(out_root, 'images_per_month_{}.csv'.format(fold)))
        dict_to_bar(images_per_month, os.path.join(out_root, 'images_per_month_{}.pdf'.format(fold)))

        new_months = OrderedDict()
        months = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October',
                  'November', 'December']
        for i in range(12):
            new_months[months[i]] = images_per_month[i + 1]

        save_csv(new_months, os.path.join(out_root, 'images_per_month_pretty_{}.csv'.format(fold)))
        dict_to_bar(new_months, os.path.join(out_root, 'images_per_month_pretty_{}.pdf'.format(fold)))

        save_csv(images_per_hour, os.path.join(out_root, 'images_per_hour_{}.csv'.format(fold)))
        dict_to_bar(images_per_hour, os.path.join(out_root, 'images_per_hour_{}.pdf'.format(fold)))

        new_hours = OrderedDict()
        for i in range(6, 22):
            new_hours['{:02d}:00'.format(i)] = images_per_hour[i]
        save_csv(new_hours, os.path.join(out_root, 'images_per_pretty_hour_{}.csv'.format(fold)))
        dict_to_bar(new_hours, os.path.join(out_root, 'images_per_pretty_hour_{}.pdf'.format(fold)))

        save_csv(dates_per_tag, os.path.join(out_root, 'dates_per_tag_{}.csv'.format(fold)))
        dict_to_bar(dates_per_tag, os.path.join(out_root, 'dates_per_tag_{}.pdf'.format(fold)))

        save_csv(dates_per_month, os.path.join(out_root, 'dates_per_month_{}.csv'.format(fold)))
        dict_to_bar(dates_per_month, os.path.join(out_root, 'dates_per_month_{}.pdf'.format(fold)))

        save_csv(dates_per_hour, os.path.join(out_root, 'dates_per_hour_{}.csv'.format(fold)))
        dict_to_bar(dates_per_hour, os.path.join(out_root, 'dates_per_hour_{}.pdf'.format(fold)))

示例#30

0

显示文件

def get_top_n():
    name = os.path.basename(QUERY_LV_PICKLE).split('.')[0]
    print(name)
    sampling = 1

    out_png_1 = os.path.join(
        OUT_ROOT,
        '{}_top{}_t{}_path_{}_s{}.pdf'.format(name, N, T, PERPLEXITY,
                                              sampling))
    out_png_1c = os.path.join(
        OUT_ROOT, '{}_top{}_t{}_ct_{}_s{}.pdf'.format(name, N, T, PERPLEXITY,
                                                      sampling))

    out_pickle = os.path.join(
        OUT_ROOT, '{}_top{}_t{}_{}_s{}.pickle'.format(name, N, T, PERPLEXITY,
                                                      sampling))
    if os.path.exists(out_pickle):
        print('{} already exists. Skipping.'.format(out_pickle))
        return

    pca_f = np.array(load_pickle(PCA_LV_PICKLE))

    pca = PCA(whiten=True, n_components=256)
    pca = pca.fit(pca_f)

    query_meta = load_csv(QUERY_CSV)

    query_xy = get_xy(query_meta)[::sampling]

    l_query_f = np.array(load_pickle(QUERY_LV_PICKLE))
    l_query_f = l_query_f[::sampling, :]

    query_f = pca.transform(l_query_f)

    Y = TSNE(n_components=2, perplexity=PERPLEXITY).fit_transform(query_f)

    Y[:, 0] = (Y[:, 0] - min(Y[:, 0])) / (max(Y[:, 0]) - min(Y[:, 0]))
    Y[:, 1] = (Y[:, 1] - min(Y[:, 1])) / (max(Y[:, 1]) - min(Y[:, 1]))

    plt.clf()
    plt.figure(figsize=(3, 3))
    x = [p[0] for p in query_xy]
    y = [p[1] for p in query_xy]

    x_max = np.max(x)
    x_min = np.min(x)
    y_max = np.max(y)
    y_min = np.min(y)
    x_span = float(x_max - x_min)
    y_span = float(y_max - y_min)

    query_color = [(0, float(p[1] - y_min) / y_span,
                    float(p[0] - x_min) / x_span) for p in query_xy]

    s1 = plt.scatter(x, y, c=query_color, s=2)
    s1.set_rasterized(True)
    plt.savefig(out_png_1, bbox_inches='tight', pad_inches=0)

    plt.clf()
    plt.figure(figsize=(3, 3))
    s2 = plt.scatter(Y[:, 0], Y[:, 1], c=query_color, s=2)
    s2.set_rasterized(True)
    plt.savefig(out_png_1c, bbox_inches='tight', pad_inches=0)