Exemplo n.º 1
0
def infer():
    with tf.Graph().as_default() as graph:
        print("In Graph")

        ops, tuple_shape = build_inference_model()
        sess = restore_weights()

        # For better gpu utilization, loading processes and gpu inference are done in separate threads.
        # Start CPU threads
        num_loader_threads = 6
        for i in range(num_loader_threads):
            worker = Thread(target=cpu_thread)
            worker.setDaemon(True)
            worker.start()

        # Start GPU threads
        worker = Thread(target=gpu_thread, args=(sess, ops))
        worker.setDaemon(True)
        worker.start()

        csv_file = os.path.join(CSV_ROOT, '{}.csv'.format(SET))
        meta = load_csv(csv_file)
        num = len(meta['path'])

        # Clean list
        padding = [0 for i in range(IMAGES_PER_PASS - (num % IMAGES_PER_PASS))]
        image_info = [(meta['path'][i])
                      for i in np.concatenate((np.arange(num),
                                               np.array(padding)))]
        padded_num = len(image_info)

        batched_indices = np.reshape(np.arange(padded_num),
                                     (-1, TUPLES_PER_BATCH * sum(tuple_shape)))
        batched_image_info = np.reshape(
            image_info, (-1, TUPLES_PER_BATCH * sum(tuple_shape)))

        for batch_indices, batch_image_info in zip(batched_indices,
                                                   batched_image_info):
            CPU_IN_QUEUE.put((batch_indices, batch_image_info))

        # Wait for completion & order output
        CPU_IN_QUEUE.join()
        GPU_IN_QUEUE.join()
        feature_pairs = list(GPU_OUT_QUEUE.queue)
        GPU_OUT_QUEUE.queue.clear()
        features = [[]] * padded_num
        for pair in feature_pairs:
            for i, f in zip(pair[0], pair[1]):
                features[i] = f
        features = features[:num]
        save_pickle(
            features,
            os.path.join(OUT_ROOT, '{}_{}.pickle'.format(SET, OUT_NAME)))
Exemplo n.º 2
0
def cluster(in_root, out_root, s, mode, r):
    out_file = os.path.join(out_root, '{}_{}_{}.pickle'.format(s, mode, r))

    meta_file = os.path.join(in_root, '{}_{}_000.csv'.format(s, mode))
    meta = load_csv(meta_file)

    if not os.path.exists(out_file):

        date = getattr(sys.modules[__name__], '{}_ref_date'.format(s))

        temp_meta = dict()
        for key in meta.keys():
            temp_meta[key] = [
                e for e, d in zip(meta[key], meta['date']) if d in date
            ]

        t_idx = np.argsort(temp_meta['t'])
        date_meta = dict()
        for key in meta.keys():
            date_meta[key] = [temp_meta[key][i] for i in t_idx]

        print(len(date_meta['t']))
        xy = get_xy(date_meta)

        ref_xy = [xy[0, :]]
        ref_idx = [0]
        for i in tqdm(range(len(date_meta['t']))):
            if sum((xy[i, :] - ref_xy[-1])**2) > r**2:
                ref_xy.append(xy[i, :])
                ref_idx.append(i)

        ref_xy = np.array(ref_xy)
        save_pickle([ref_xy, date_meta, ref_idx], out_file)
    else:
        ref_xy, date_meta, ref_idx = load_pickle(out_file)

    print('{}: {}'.format(s, len(ref_idx)))

    out_img = os.path.join(out_root, '{}_{}_{}.png'.format(s, mode, r))
    plt.clf()
    plt.clf()
    f, (ax1) = plt.subplots(1, 1, sharey=False)
    f.set_figheight(50)
    f.set_figwidth(50)
    ax1.scatter(ref_xy[:, 0], ref_xy[:, 1], c=np.arange(len(ref_xy)))
    plt.savefig(out_img)

    out_meta = dict()
    for key in meta.keys():
        out_meta[key] = [date_meta[key][i] for i in ref_idx]

    out_file = os.path.join(out_root, '{}_{}_{}.csv'.format(s, mode, r))
    save_csv(out_meta, out_file)
def clean_parametrization(in_root, folds, cols_to_keep, out_root):
    full_data = dict()
    full_ref_data = dict()
    full_query_data = dict()

    for key in cols_to_keep:
        full_data[key] = []
        full_ref_data[key] = []
        full_query_data[key] = []

    meta = dict()
    for s in folds:
        ref_data = load_csv(os.path.join(in_root, '{}_ref.csv'.format(s)))
        query_data = load_csv(os.path.join(in_root, '{}_query.csv'.format(s)))  # Not used to detect ref outliers

        for key in ['l', 'northing', 'easting']:
            ref_data[key] = np.array(ref_data[key], dtype=float)
            query_data[key] = np.array(query_data[key], dtype=float)

        l_max = max(ref_data['l'])
        num_bins = math.ceil(l_max)

        ref_member_path = os.path.join(out_root, '{}_ref_bin_raw_members.pickle'.format(s))
        if not os.path.exists(ref_member_path):
            bin_members = [[i for i in range(len(ref_data['t'])) if math.floor(ref_data['l'][i]) == j] for j in
                           tqdm(range(num_bins))]
            save_pickle(bin_members, ref_member_path)
        else:
            bin_members = load_pickle(ref_member_path)

        ref_bin_xy_path = os.path.join(out_root, '{}_ref_bin_raw_xy.pickle'.format(s))
        if not os.path.exists(ref_bin_xy_path):
            ref_bin_xy = [
                np.median(np.array([[ref_data['easting'][i], ref_data['northing'][i]] for i in bin_members[j]]),
                          axis=0) if len(
                    bin_members[j]) else np.array([-1, -1]) for j
                in tqdm(range(num_bins))]
            save_pickle(ref_bin_xy, ref_bin_xy_path)
        else:
            ref_bin_xy = load_pickle(ref_bin_xy_path)

        meta['{}_ref'.format(s)], clean_ref_data = find_and_remove_errors('ref', out_root, ref_bin_xy, ref_data, s)

        # Cleaning query files to allow for more efficient testing, should not influence performance
        # (other than possibly excluding faulty gps/ins 'ground truth', which we don't want anyways)
        meta['{}_query'.format(s)], clean_query_data = find_and_remove_errors('query', out_root, ref_bin_xy, query_data,
                                                                              s)

        fold_clean_data = dict()
        for key in clean_ref_data.keys():
            fold_clean_data[key] = []

            fold_clean_data[key].extend(clean_ref_data[key])
            fold_clean_data[key].extend(clean_query_data[key])

            full_data[key].extend(clean_ref_data[key])
            full_data[key].extend(clean_query_data[key])

            full_query_data[key].extend(clean_ref_data[key])
            full_ref_data[key].extend(clean_query_data[key])

        save_csv(fold_clean_data, os.path.join(out_root, '{}.csv'.format(s)))

    save_csv(full_data, os.path.join(out_root, 'full.csv'.format(s)))
    save_csv(full_ref_data, os.path.join(out_root, 'full_ref.csv'.format(s)))
    save_csv(full_query_data, os.path.join(out_root, 'full_query.csv'.format(s)))

    save_csv(meta, os.path.join(out_root, 'meta.csv'))
def get_top_n():
    # check if complete:
    ld_checkpoints = get_checkpoints('obm')

    ld_cp_names = []
    for cp in ld_checkpoints:
        cp_name = cp.split('/')[-2]
        cp_name = ''.join(os.path.basename(cp_name).split('.'))  # Removing '.'
        cp_name += '_e{}'.format(cp[-1])
        ld_cp_names.append(cp_name)

    if any([x in QUERY_LV_PICKLE for x in ld_cp_names]):
        L = [0.0, 0.3, 1.0, 5.0]
        D = [64, 128, 256, 512, 1024, 2048, 4096]
    else:
        L = [0.0]
        D = [256]

    complete = True
    for l in L:
        for d in D:

            out_folder = os.path.join(OUT_ROOT, 'l{}_dim{}'.format(l, d))
            name = ''.join(os.path.basename(QUERY_LV_PICKLE).split('.')[:-1])
            out_pickle = os.path.join(out_folder, '{}.pickle'.format(name))

            if not os.path.exists(out_pickle):
                complete = False
                break
        if not complete:
            break

    if complete:
        print('Skipping complete {}'.format(QUERY_LV_PICKLE))
        return

    ref_meta = load_csv(REF_CSV)
    query_meta = load_csv(QUERY_CSV)
    full_ref_xy = get_xy(ref_meta)
    full_query_xy = get_xy(query_meta)
    num_q = full_query_xy.shape[0]

    pca_f = np.array(load_pickle(PCA_LV_PICKLE))
    full_ref_f = np.array(load_pickle(REF_LV_PICKLE))
    full_query_f = np.array(load_pickle(QUERY_LV_PICKLE))

    full_xy_dists = pairwise_distances(full_query_xy,
                                       full_ref_xy,
                                       metric='euclidean')

    for d in D:

        print(d)
        pca = PCA(whiten=True, n_components=d)
        pca = pca.fit(pca_f)
        pca_ref_f = pca.transform(full_ref_f)
        pca_query_f = pca.transform(full_query_f)

        for l in L:
            print(l)

            out_folder = os.path.join(OUT_ROOT, 'l{}_dim{}'.format(l, d))
            mkdir(out_folder)
            name = ''.join(os.path.basename(QUERY_LV_PICKLE).split('.')[:-1])
            out_pickle = os.path.join(out_folder, '{}.pickle'.format(name))

            if os.path.exists(out_pickle):
                print('{} already exists. Skipping.'.format(out_pickle))
                continue

            ref_idx = [0]
            for i in range(len(full_ref_xy)):
                if sum((full_ref_xy[i, :] - full_ref_xy[ref_idx[-1], :])**
                       2) >= l**2:
                    ref_idx.append(i)

            if len(ref_idx) < N:
                continue

            ref_f = np.array([pca_ref_f[i, :] for i in ref_idx])
            xy_dists = np.array([full_xy_dists[:, i]
                                 for i in ref_idx]).transpose()

            print('Building tree')
            ref_tree = KDTree(ref_f)

            print('Retrieving')
            top_f_dists, top_i = np.array(
                ref_tree.query(pca_query_f,
                               k=N,
                               return_distance=True,
                               sort_results=True))
            top_f_dists = np.array(top_f_dists)
            top_i = np.array(top_i, dtype=int)

            top_g_dists = [[xy_dists[q, r] for r in top_i[q, :]]
                           for q in range(num_q)]

            gt_i = np.argmin(xy_dists, axis=1)
            gt_g_dist = np.min(xy_dists, axis=1)

            # Translate to original indices
            top_i = [[ref_idx[r] for r in top_i[q, :]] for q in range(num_q)]
            gt_i = [ref_idx[r] for r in gt_i]

            save_pickle(
                [top_i, top_g_dists, top_f_dists, gt_i, gt_g_dist, ref_idx],
                out_pickle)
Exemplo n.º 5
0
def get_top_n():
    ref_meta = load_csv(REF_CSV)
    query_meta = load_csv(QUERY_CSV)
    full_ref_xy = get_xy(ref_meta)
    full_query_xy = get_xy(query_meta)
    num_q = full_query_xy.shape[0]

    pca_f = np.array(load_pickle(PCA_LV_PICKLE))
    full_ref_f = np.array(load_pickle(REF_LV_PICKLE))
    full_query_f = np.array(load_pickle(QUERY_LV_PICKLE))

    full_xy_dists = pairwise_distances(full_query_xy,
                                       full_ref_xy,
                                       metric='euclidean')

    for d in DIMS:

        print(d)
        pca = PCA(whiten=True, n_components=d)
        pca = pca.fit(pca_f)
        pca_ref_f = pca.transform(full_ref_f)
        pca_query_f = pca.transform(full_query_f)

        for l in L:
            print(l)

            out_folder = os.path.join(OUT_ROOT, 'l{}_dim{}'.format(l, d))
            mkdir(out_folder)
            name = ''.join(os.path.basename(QUERY_LV_PICKLE).split('.')[:-1])
            out_pickle = os.path.join(out_folder, '{}.pickle'.format(name))

            if os.path.exists(out_pickle):
                print('{} already exists. Skipping.'.format(out_pickle))
                continue

            ref_idx = [0]
            for i in range(len(full_ref_xy)):
                if sum((full_ref_xy[i, :] - full_ref_xy[ref_idx[-1], :])**
                       2) >= l**2:
                    ref_idx.append(i)

            if len(ref_idx) < N:
                continue

            ref_f = np.array([pca_ref_f[i, :] for i in ref_idx])
            xy_dists = np.array([full_xy_dists[:, i]
                                 for i in ref_idx]).transpose()

            print('Building tree')
            ref_tree = KDTree(ref_f)

            print('Retrieving')
            top_f_dists, top_i = np.array(
                ref_tree.query(pca_query_f,
                               k=N,
                               return_distance=True,
                               sort_results=True))
            top_f_dists = np.array(top_f_dists)
            top_i = np.array(top_i, dtype=int)

            top_g_dists = [[xy_dists[q, r] for r in top_i[q, :]]
                           for q in range(num_q)]

            gt_i = np.argmin(xy_dists, axis=1)
            gt_g_dist = np.min(xy_dists, axis=1)

            # Translate to original indices
            top_i = [[ref_idx[r] for r in top_i[q, :]] for q in range(num_q)]
            gt_i = [ref_idx[r] for r in gt_i]

            save_pickle(
                [top_i, top_g_dists, top_f_dists, gt_i, gt_g_dist, ref_idx],
                out_pickle)
    image_info, features, xy = load_pickle(lv_file)
    tuple_info = load_pickle(tuple_file)
    xy = np.array(xy)

    f_dists = []
    e_dists = []
    for i in tqdm(range(len(xy))):
        for j in tuple_info['positives'][i]:
            if j < i:
                f_dist = np.sum((features[i] - features[j])**2)
                f_dists.append(f_dist)
                e_dist = np.sum((xy[i, :] - xy[j, :])**2)
                e_dists.append(e_dist)

    save_pickle([e_dists, f_dists], out_file)

else:
    e_dists, f_dists = load_pickle(out_file)

full_info = dict()
full_info['f_mean'] = np.mean(f_dists)
full_info['e_mean'] = np.mean(e_dists)
full_info['f_med'] = np.median(f_dists)
full_info['e_med'] = np.median(e_dists)
full_info['f_max'] = np.max(f_dists)
full_info['e_max'] = np.max(e_dists)
save_csv(full_info, out_file_meta)

plt.clf()
f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)