def siftgeo_read(filename): # I/O via double pointers (too lazy to make proper swig interface) v_out = yael.BytePtrArray(1) meta_out = yael.FloatPtrArray(1) d_out = yael.ivec(2) n = yael.bvecs_new_from_siftgeo(filename, d_out, v_out.cast(), d_out.plus(1), meta_out.cast()) if n < 0: raise IOError("cannot read " + filename) if n == 0: v = numpy.array([[]], dtype = numpy.uint8) meta = numpy.array([[]*9], dtype = numpy.float32) return v, meta v_out = yael.bvec.acquirepointer(v_out[0]) meta_out = yael.fvec.acquirepointer(meta_out[0]) d = d_out[0] d_meta = d_out[1] assert d_meta == 9 v = yael.bvec_to_numpy(v_out, n * d) v = v.reshape((n, d)) meta = yael.fvec_to_numpy(meta_out, n * d_meta) meta = meta.reshape((n, d_meta)) return v, meta
def load_features(filename, file_format, total_nuse, dimension, lsh, index_folder, offset = 0, run_index = 'n'): np_feature_vecs = None actual_total_nuse = 0 for feature_idx_begin in range(offset, total_nuse + offset, 10000000): print "loading from " + str(feature_idx_begin) nuse = 0 if (total_nuse + offset) > (feature_idx_begin + 10000000): nuse = 10000000 else: nuse = (total_nuse + offset) - feature_idx_begin (feature_vecs, actual_nuse) = yutils.load_vectors_fmt(filename, file_format, dimension, nuse, feature_idx_begin , verbose = True) part_np_feature_vecs = None if file_format == 'fvecs': part_np_feature_vecs = yael.fvec_to_numpy(feature_vecs, int(actual_nuse) * dimension) elif file_format == 'bvecs': part_np_feature_vecs = yael.bvec_to_numpy(feature_vecs, int(actual_nuse) * dimension) # for CUDA-based batch indexing, skip the reshaping #part_np_feature_vecs = part_np_feature_vecs.reshape((int(actual_nuse), dimension)) if run_index != 'y': part_np_feature_vecs = part_np_feature_vecs.reshape((int(actual_nuse), dimension)) if np_feature_vecs != None: np_feature_vecs = numpy.concatenate((np_feature_vecs, part_np_feature_vecs)) else: np_feature_vecs = part_np_feature_vecs else: index(lsh, part_np_feature_vecs, actual_total_nuse) del part_np_feature_vecs if index_folder != None: save_index(lsh, index_folder, feature_idx_begin) actual_total_nuse += int(actual_nuse) if run_index != 'y': print np_feature_vecs.shape return np_feature_vecs
def load_features(filename, file_format, total_nuse, dimension, offset = 0): np_feature_vecs = None actual_total_nuse = 0 for feature_idx_begin in range(offset, total_nuse + offset, 10000000): print "loading from " + str(feature_idx_begin) nuse = 0 if (total_nuse + offset) > (feature_idx_begin + 10000000): nuse = 10000000 else: nuse = (total_nuse + offset) - feature_idx_begin (feature_vecs, actual_nuse) = yutils.load_vectors_fmt(filename, file_format, dimension, nuse, feature_idx_begin , verbose = True) part_np_feature_vecs = None if file_format == 'fvecs': part_np_feature_vecs = yael.fvec_to_numpy(feature_vecs, int(actual_nuse) * dimension) elif file_format == 'bvecs': part_np_feature_vecs = yael.bvec_to_numpy(feature_vecs, int(actual_nuse) * dimension) part_np_feature_vecs = part_np_feature_vecs.reshape((int(actual_nuse), dimension)) if np_feature_vecs != None: np_feature_vecs = numpy.concatenate((np_feature_vecs, part_np_feature_vecs)) else: np_feature_vecs = part_np_feature_vecs actual_total_nuse += int(actual_nuse) print np_feature_vecs.shape return np_feature_vecs
def load_features(filename, file_format, total_nuse, dimension, lsh, index_folder, offset=0, run_index='n'): np_feature_vecs = None actual_total_nuse = 0 for feature_idx_begin in range(offset, total_nuse + offset, 10000000): print "loading from " + str(feature_idx_begin) nuse = 0 if (total_nuse + offset) > (feature_idx_begin + 10000000): nuse = 10000000 else: nuse = (total_nuse + offset) - feature_idx_begin (feature_vecs, actual_nuse) = yutils.load_vectors_fmt(filename, file_format, dimension, nuse, feature_idx_begin, verbose=True) part_np_feature_vecs = None if file_format == 'fvecs': part_np_feature_vecs = yael.fvec_to_numpy( feature_vecs, int(actual_nuse) * dimension) elif file_format == 'bvecs': part_np_feature_vecs = yael.bvec_to_numpy( feature_vecs, int(actual_nuse) * dimension) # for CUDA-based batch indexing, skip the reshaping #part_np_feature_vecs = part_np_feature_vecs.reshape((int(actual_nuse), dimension)) if run_index != 'y': part_np_feature_vecs = part_np_feature_vecs.reshape( (int(actual_nuse), dimension)) if np_feature_vecs != None: np_feature_vecs = numpy.concatenate( (np_feature_vecs, part_np_feature_vecs)) else: np_feature_vecs = part_np_feature_vecs else: index(lsh, part_np_feature_vecs, actual_total_nuse) del part_np_feature_vecs if index_folder != None: save_index(lsh, index_folder, feature_idx_begin) actual_total_nuse += int(actual_nuse) if run_index != 'y': print np_feature_vecs.shape return np_feature_vecs