Пример #1
0
def get_indexes_da(source_folder, target_folder, nTrain, transferImages):
    get_logger().info("Loading indexes")
    allowed = ['backpack.hdf5', 'headphones.hdf5', 'monitor.hdf5', 'bike.hdf5', 'keyboard.hdf5', 'mouse.hdf5', 'projector.hdf5', 'calculator.hdf5', 'laptop.hdf5', 'mug.hdf5']
    train = []
    test = []
    MAX_TEST = 100
    for categ in allowed:
        #support_filename = join(".", basename(filename))
        s_filename = join(source_folder, categ)
        t_filename = join(target_folder, categ)
        get_logger().info("Loading " + s_filename)
        shfile = HDF5File(s_filename, 'r')
        thfile = HDF5File(t_filename, 'r')
        siid = shfile["image_index"][:]
        tiid = thfile["image_index"][:]
        np.random.shuffle(siid)
        np.random.shuffle(tiid)
        trainIdx = siid[0:nTrain]
        trainTIdx =tiid[0:transferImages]
        testIdx  = tiid[transferImages:transferImages+MAX_TEST]
        train.append((ClassIndexes(s_filename, trainIdx), ClassIndexes(t_filename, trainTIdx))) #data is actually loaded only when needed
        test.append([ClassIndexes(t_filename, testIdx)])
        shfile.close()
        thfile.close()
    return Data(train, test)
Пример #2
0
 def load_model(self, ubm_file):
     """Loads the projector (UBM) from a file."""
     hdf5file = HDF5File(ubm_file, "r")
     logger.debug("Loading model from file '%s'", ubm_file)
     # Read the UBM
     self.ubm = GMMMachine.from_hdf5(hdf5file)
     self.ubm.variance_thresholds = self.variance_threshold
Пример #3
0
 def from_hdf5(cls, hdf5):
     """Creates a new GMMStats object from an `HDF5File` object."""
     if isinstance(hdf5, str):
         hdf5 = HDF5File(hdf5, "r")
     try:
         version_major, version_minor = hdf5.attrs["file_version"].split(".")
         logger.debug(
             f"Reading a GMMStats HDF5 file of version {version_major}.{version_minor}"
         )
     except (KeyError, RuntimeError):
         version_major, version_minor = 0, 0
     if int(version_major) >= 1:
         if hdf5.attrs["writer_class"] != str(cls):
             logger.warning(f"{hdf5.attrs['writer_class']} is not {cls}.")
         self = cls(
             n_gaussians=hdf5["n_gaussians"][()],
             n_features=hdf5["n_features"][()],
         )
         self.log_likelihood = hdf5["log_likelihood"][()]
         self.t = hdf5["T"][()]
         self.n = hdf5["n"][...]
         self.sum_px = hdf5["sumPx"][...]
         self.sum_pxx = hdf5["sumPxx"][...]
     else:  # Legacy file version
         logger.info("Loading a legacy HDF5 stats file.")
         self = cls(
             n_gaussians=int(hdf5["n_gaussians"][()]),
             n_features=int(hdf5["n_inputs"][()]),
         )
         self.log_likelihood = hdf5["log_liklihood"][()]
         self.t = int(hdf5["T"][()])
         self.n = np.reshape(hdf5["n"], (self.n_gaussians,))
         self.sum_px = np.reshape(hdf5["sumPx"], (self.shape))
         self.sum_pxx = np.reshape(hdf5["sumPxx"], (self.shape))
     return self
Пример #4
0
def get_support(filename, size):
    hfile = HDF5File(filename, 'r')
    ds = hfile['support']
    patches = ds[:min(size, ds.shape[0]), :]

    hfile.close()
    return patches.astype('float')
Пример #5
0
def test_gmm_ML_1():
    """Trains a GMMMachine with ML_GMMTrainer"""
    ar = load_array(
        resource_filename("bob.learn.em", "data/faithful.torch3_f64.hdf5"))
    gmm_ref = GMMMachine.from_hdf5(
        HDF5File(resource_filename("bob.learn.em", "data/gmm_ML.hdf5"), "r"))

    for transform in (to_numpy, to_dask_array):
        ar = transform(ar)

        gmm = loadGMM()

        # test rng handling
        gmm.convergence_threshold = 0.001
        gmm.update_means = True
        gmm.update_variances = True
        gmm.update_weights = True
        gmm.random_state = np.random.RandomState(seed=12345)
        gmm = gmm.fit(ar)

        gmm = loadGMM()
        gmm.convergence_threshold = 0.001
        gmm.update_means = True
        gmm.update_variances = True
        gmm.update_weights = True
        # Generate reference
        # gmm.save(HDF5File(resource_filename("bob.learn.em", "data/gmm_ML.hdf5"), "w"))

        gmm = gmm.fit(ar)

        assert_gmm_equal(gmm, gmm_ref)
Пример #6
0
def test_GMMMachine_stats():
    """Tests a GMMMachine (statistics)"""

    arrayset = load_array(
        resource_filename("bob.learn.em", "data/faithful.torch3_f64.hdf5"))
    gmm = GMMMachine(n_gaussians=2)
    gmm.weights = np.array([0.5, 0.5], "float64")
    gmm.means = np.array([[3, 70], [4, 72]], "float64")
    gmm.variances = np.array([[1, 10], [2, 5]], "float64")
    gmm.variance_thresholds = np.array([[0, 0], [0, 0]], "float64")

    stats = gmm_module.e_step(
        arrayset,
        gmm,
    )

    stats_ref = GMMStats(n_gaussians=2, n_features=2)
    stats_ref.load(
        HDF5File(resource_filename("bob.learn.em", "data/stats.hdf5"), "r"))

    np.testing.assert_equal(stats.t, stats_ref.t)
    np.testing.assert_almost_equal(stats.n, stats_ref.n, decimal=10)
    # np.testing.assert_equal(stats.sum_px, stats_ref.sum_px)
    # Note AA: precision error above
    np.testing.assert_almost_equal(stats.sum_px, stats_ref.sum_px, decimal=10)
    np.testing.assert_almost_equal(stats.sum_pxx,
                                   stats_ref.sum_pxx,
                                   decimal=10)
Пример #7
0
def write_out_file(infile, outfile, tree=None):
    f = uproot.open(infile)["fancy_tree;1"]
    #f = root_open(infile)
    #T = f[tree]

    names = f.keys()
    cells = list(filter(lambda x: x.startswith(b'cell'), names))
    

    assert len(cells) == sum(map(np.prod, LAYER_SPECS)) + OVERFLOW_BINS
    
    for df in uproot.pandas.iterate(infile, "fancy_tree;1", branches = cells):
        X = df

    for df in uproot.pandas.iterate(infile, "fancy_tree;1", branches = b'TotalEnergy'):
        E = df

    X = X.values
    E = E.values.ravel()
    #X = tree.pd.DataFrame(tree2array(T, branches=cells)).values
    #E = tree.pd.DataFrame(tree2array(T, branches=['TotalEnergy'])).values.ravel()
    print(X.shape)

    with HDF5File(outfile, 'w') as h5:
            for layer, (sh, (l, u)) in enumerate(zip(LAYER_SPECS, LAYER_DIV)):
                h5['layer_{}'.format(layer)] = X[:, l:u].reshape((-1, ) + sh)
                print(u)

            h5['overflow'] = X[:, -OVERFLOW_BINS:]
            h5['energy'] = E.reshape(-1, 1)
Пример #8
0
def select_random_support(train_dir, support_dir, num_train_images,
                          support_size, position_influence):
    log = get_logger()

    train_files = [
        f for f in glob(join(train_dir, '*'))
        if splitext(f.lower())[1] == '.hdf5'
    ]

    try:
        os.makedirs(support_dir)
    except:
        pass

    for target_file in train_files:
        log.info('Extracting random support from "%s"...',
                 basename(target_file))
        #(patches, _)= get_standardized_patches(target_file, num_train_images, position_influence)
        (patches, _) = get_patches(target_file, num_train_images,
                                   position_influence)
        rand_ix = random.sample(range(patches.shape[0]),
                                min(patches.shape[0], support_size))
        patches = patches[np.array(rand_ix), :]

        fh = HDF5File(join(support_dir, basename(target_file)), 'w')
        ds = fh.create_dataset('support', patches.shape, dtype='float')
        ds[:] = patches
        ds.attrs['cursor'] = patches.shape[0]

        fh.close()
Пример #9
0
    def save_model(self, ubm_file):
        """Saves the projector (UBM) to file."""
        # Saves the UBM to file
        logger.debug("Saving model to file '%s'", ubm_file)

        hdf5 = (ubm_file if isinstance(ubm_file, HDF5File) else HDF5File(
            ubm_file, "w"))
        self.ubm.save(hdf5)
Пример #10
0
 def from_hdf5(cls, hdf5, ubm=None):
     """Creates a new GMMMachine object from an `HDF5File` object."""
     if isinstance(hdf5, str):
         hdf5 = HDF5File(hdf5, "r")
     try:
         version_major, version_minor = hdf5.attrs["file_version"].split(".")
         logger.debug(
             f"Reading a GMMMachine HDF5 file of version {version_major}.{version_minor}"
         )
     except (KeyError, RuntimeError):
         version_major, version_minor = 0, 0
     if int(version_major) >= 1:
         if hdf5.attrs["writer_class"] != str(cls):
             logger.warning(f"{hdf5.attrs['writer_class']} is not {cls}.")
         if hdf5["trainer"] == "map" and ubm is None:
             raise ValueError(
                 "The UBM is needed when loading a MAP machine."
             )
         self = cls(
             n_gaussians=hdf5["n_gaussians"][()],
             trainer=hdf5["trainer"][()],
             ubm=ubm,
             convergence_threshold=1e-5,
             max_fitting_steps=hdf5["max_fitting_steps"][()],
             weights=hdf5["weights"][...],
             k_means_trainer=None,
             update_means=hdf5["update_means"][()],
             update_variances=hdf5["update_variances"][()],
             update_weights=hdf5["update_weights"][()],
         )
         gaussians_group = hdf5["gaussians"]
         self.means = gaussians_group["means"][...]
         self.variances = gaussians_group["variances"][...]
         self.variance_thresholds = gaussians_group["variance_thresholds"][
             ...
         ]
     else:  # Legacy file version
         logger.info("Loading a legacy HDF5 machine file.")
         n_gaussians = hdf5["m_n_gaussians"][()]
         g_means = []
         g_variances = []
         g_variance_thresholds = []
         for i in range(n_gaussians):
             gaussian_group = hdf5[f"m_gaussians{i}"]
             g_means.append(gaussian_group["m_mean"][...])
             g_variances.append(gaussian_group["m_variance"][...])
             g_variance_thresholds.append(
                 gaussian_group["m_variance_thresholds"][...]
             )
         weights = np.reshape(hdf5["m_weights"], (n_gaussians,))
         self = cls(n_gaussians=n_gaussians, ubm=ubm, weights=weights)
         self.means = np.array(g_means).reshape(n_gaussians, -1)
         self.variances = np.array(g_variances).reshape(n_gaussians, -1)
         self.variance_thresholds = np.array(g_variance_thresholds).reshape(
             n_gaussians, -1
         )
     return self
Пример #11
0
	def save(self, filepath):
		with HDF5File(filepath, "w") as hdf5:
			hdf5.create_dataset("size", data=self.size)
			hdf5.create_dataset("depth", data=self.depth)
			hdf5.create_dataset("levels", data=self.levels)
			hdf5.create_dataset("first_channels", data=self.first_channels)
			hdf5.create_dataset("last_channels", data=self.last_channels)
			hdf5.create_dataset("categories", data=self.categories)
			HDF5Serializer(hdf5.create_group("weights")).save(self)
Пример #12
0
def test_gmm_MAP_1():
    # Train a GMMMachine with MAP_GMMTrainer
    ar = load_array(
        resource_filename("bob.learn.em", "data/faithful.torch3_f64.hdf5"))

    # test with rng
    gmmprior = GMMMachine.from_hdf5(
        HDF5File(resource_filename("bob.learn.em", "data/gmm_ML.hdf5"), "r"))
    gmm = GMMMachine.from_hdf5(
        HDF5File(resource_filename("bob.learn.em", "data/gmm_ML.hdf5"), "r"),
        ubm=gmmprior,
    )
    gmm.update_means = True
    gmm.update_variances = False
    gmm.update_weights = False
    rng = np.random.RandomState(seed=12345)
    gmm.random_state = rng
    gmm = gmm.fit(ar)

    gmmprior = GMMMachine.from_hdf5(
        HDF5File(resource_filename("bob.learn.em", "data/gmm_ML.hdf5"), "r"))
    gmm = GMMMachine.from_hdf5(
        HDF5File(resource_filename("bob.learn.em", "data/gmm_ML.hdf5"), "r"),
        ubm=gmmprior,
    )
    gmm.update_means = True
    gmm.update_variances = False
    gmm.update_weights = False

    # Generate reference
    # gmm.save(HDF5File(resource_filename("bob.learn.em", "data/gmm_MAP.hdf5"), "w"))

    gmm_ref = GMMMachine.from_hdf5(
        HDF5File(resource_filename("bob.learn.em", "data/gmm_MAP.hdf5"), "r"))

    for transform in (to_numpy, to_dask_array):
        ar = transform(ar)
        gmm = gmm.fit(ar)

        np.testing.assert_almost_equal(gmm.means, gmm_ref.means, decimal=3)
        np.testing.assert_almost_equal(gmm.variances,
                                       gmm_ref.variances,
                                       decimal=3)
        np.testing.assert_almost_equal(gmm.weights, gmm_ref.weights, decimal=3)
Пример #13
0
 def load_states(self, filepath):
     with HDF5File(filepath, "r") as hdf5:
         self.averaged_path_length = float(hdf5["averaged_path_length"][()])
         self.augumentation_probability = float(
             hdf5["augumentation_probability"][()])
         HDF5Deserializer(hdf5["generator"]).load(self.generator)
         HDF5Deserializer(hdf5["averaged_generator"]).load(
             self.averaged_generator)
         HDF5Deserializer(hdf5["discriminator"]).load(self.discriminator)
         for key, optimizer in dict(self.optimizers).items():
             HDF5Deserializer(hdf5["optimizers"][key]).load(optimizer)
Пример #14
0
	def load(filepath):
		with HDF5File(filepath, "r") as hdf5:
			size = int(hdf5["size"][()])
			depth = int(hdf5["depth"][()])
			levels = int(hdf5["levels"][()])
			first_channels = int(hdf5["first_channels"][()])
			last_channels = int(hdf5["last_channels"][()])
			categories = int(hdf5["categories"][()])
			generator = Generator(size, depth, levels, first_channels, last_channels, categories)
			HDF5Deserializer(hdf5["weights"]).load(generator)
		return generator
Пример #15
0
 def save(self, hdf5):
     """Saves the current statistsics in an `HDF5File` object."""
     if isinstance(hdf5, str):
         hdf5 = HDF5File(hdf5, "w")
     hdf5.attrs["file_version"] = "1.0"
     hdf5.attrs["writer_class"] = str(self.__class__)
     hdf5["n_gaussians"] = self.n_gaussians
     hdf5["n_features"] = self.n_features
     hdf5["log_likelihood"] = float(self.log_likelihood)
     hdf5["T"] = int(self.t)
     hdf5["n"] = np.array(self.n)
     hdf5["sumPx"] = np.array(self.sum_px)
     hdf5["sumPxx"] = np.array(self.sum_pxx)
Пример #16
0
def doSnapshots():
    """
    Read the snapshots and plot the corresponding variables.
    """

    # get all the filenames
    filenames = glob("simple_orbits_*.hdf5")
    N = len(filenames)
    filenames.sort()

    # generate the output arrays
    E = np.zeros((N, makeIC.num_part))
    t = np.zeros(N)
    p = np.zeros((N, 3))
    v = np.zeros((N, 3))

    for i, f in enumerate(filenames):
        # get the data from the file
        f = HDF5File(f, "r")
        ids = f["PartType1/ParticleIDs"][:]
        sort = np.argsort(ids)
        ids = ids[sort]
        pos = f["PartType1/Coordinates"][sort, :]
        pos -= center
        vel = f["PartType1/Velocities"][sort, :]

        t[i] = f["Header"].attrs["Time"]

        r = np.sum(pos**2, axis=1)**0.5
        v2 = np.sum(vel**2, axis=1)
        E[i, :] = 0.5 * v2 - G * M / r

        # Get the pos / vel of the required particle
        ind = ids == id_focus
        p[i, :] = pos[ind, :]
        v[i, :] = vel[ind, :]

    # Compute the solution
    y0 = np.zeros(4)
    y0[:2] = p[0, :2]
    y0[2:] = v[0, :2]

    # compute the plotting variables
    plt.figure(fig_1.number)
    plotRelative(t, E, ".", label="Snapshot")

    plt.figure(fig_2.number)
    plt.plot(p[:, 0], p[:, 1], "-", label="Snapshot", lw=1.)

    plt.figure(fig_3.number)
    plt.plot(v[:, 0], v[:, 1], "-", label="Snapshot", lw=1.)
def create_hdf5_dataset(output_filename, patches, positions):
    log = get_logger()
    log.debug('Saving extracted descriptors to %s', output_filename)
    hfile = HDF5File(output_filename, 'w', compression='gzip', fillvalue=0.0)
    hpatches = hfile.create_dataset('patches',
                                    patches.shape,
                                    dtype="float32",
                                    chunks=True)
    hpositions = hfile.create_dataset('positions',
                                      positions.shape,
                                      dtype="uint16",
                                      chunks=True)
    hpatches[:] = patches
    hpositions[:] = positions
    hfile.close()
Пример #18
0
 def save_states(self, filepath):
     with HDF5File(filepath, "w") as hdf5:
         hdf5.create_dataset("averaged_path_length",
                             data=self.averaged_path_length)
         hdf5.create_dataset("augumentation_probability",
                             data=self.augumentation_probability)
         HDF5Serializer(hdf5.create_group("generator")).save(self.generator)
         HDF5Serializer(hdf5.create_group("averaged_generator")).save(
             self.averaged_generator)
         HDF5Serializer(hdf5.create_group("discriminator")).save(
             self.discriminator)
         optimizer_group = hdf5.create_group("optimizers")
         for key, optimizer in dict(self.optimizers).items():
             HDF5Serializer(
                 optimizer_group.create_group(key)).save(optimizer)
Пример #19
0
def get_num_patches(filename, num_images):
    hfile = HDF5File(filename, 'r')
    total_num_patches = hfile[PATCH_TYPE].attrs['cursor']
    dim = hfile[PATCH_TYPE].shape[1]

    if num_images == 0:
        num_images = hfile['image_index'].shape[0]

    image_index = hfile['image_index'][:min(num_images, hfile['image_index'].
                                            shape[0])]

    # Getting patches only from desired number of images
    num_patches = min(image_index[-1, 1], total_num_patches)

    hfile.close()
    return (num_patches, dim)
Пример #20
0
    def __init__(self, raw_files, comm=None, blocksize=2**16*20*2*4):
        """
        Initialize a lofar observation, tracking/joining the two polarizations.
        We also parse the corresponding HDF5 files to initialize:
        nchan, samplerate, fwidth
        """
        # read the HDF5 file and get useful data
        h0 = HDF5File(raw_files[0].replace('.raw', '.h5'), 'r')
        saps = sorted([i for i in h0.keys() if 'SUB_ARRAY_POINTING' in i])
        s0 = h0[saps[0]]
        time0 = Time(s0.attrs['EXPTIME_START_UTC'].replace('Z',''),
                     scale='utc')

        beams = sorted([i for i in s0.keys() if 'BEAM' in i])
        b0 = s0[beams[0]]
        frequencies = (b0['COORDINATES']['COORDINATE_1']
                       .attrs['AXIS_VALUES_WORLD'] * u.Hz).to(u.MHz)
        fbottom = frequencies[0]

        stokes = sorted([i for i in b0.keys()
                         if 'STOKES' in i and 'i2f' not in i])
        st0 = b0[stokes[0]]
        dtype = _lofar_dtypes[st0.attrs['DATATYPE']]

        nchan = len(frequencies)  # = st0.attrs['NOF_SUBBANDS']

        # can also get from np.diff(frequencies.diff).mean()
        fwidth = (b0.attrs['SUBBAND_WIDTH'] *
                  u.__dict__[b0.attrs['CHANNEL_WIDTH_UNIT']]).to(u.MHz)

        samplerate = (b0.attrs['SAMPLING_RATE'] *
                      u.__dict__[b0.attrs['SAMPLING_RATE_UNIT']]).to(u.MHz)
        h0.close()

        self.time0 = time0
        self.samplerate = samplerate
        self.fwidth = fwidth
        self.frequencies = frequencies
        self.fedge = fbottom
        self.fedge_at_top = False
        self.dtsample = (1./self.fwidth).to(u.s)

        super(LOFARdata, self).__init__(raw_files, blocksize, dtype, nchan,
                                        comm=comm)
        # update some of the hdu data
        self['PRIMARY'].header['DATE-OBS'] = self.time0.isot
        self[0].header.update('TBIN', (1./samplerate).to('s').value)
Пример #21
0
def write_out_file(infile, outfile, tree=None):
    f = root_open(infile)
    T = f[tree]

    cells = filter(lambda x: x.startswith('cell'), T.branchnames)

    assert len(cells) == sum(map(np.prod, LAYER_SPECS)) + OVERFLOW_BINS

    X = pd.DataFrame(tree2array(T, branches=cells)).values
    E = pd.DataFrame(tree2array(T, branches=['TotalEnergy'])).values.ravel()

    with HDF5File(outfile, 'w') as h5:
        for layer, (sh, (l, u)) in enumerate(zip(LAYER_SPECS, LAYER_DIV)):
            h5['layer_{}'.format(layer)] = X[:, l:u].reshape((-1, ) + sh)

        h5['overflow'] = X[:, -OVERFLOW_BINS:]
        h5['energy'] = E.reshape(-1, 1)
Пример #22
0
 def save(self, hdf5):
     """Saves the current statistics in an `HDF5File` object."""
     if isinstance(hdf5, str):
         hdf5 = HDF5File(hdf5, "w")
     hdf5.attrs["file_version"] = "1.0"
     hdf5.attrs["writer_class"] = str(self.__class__)
     hdf5["n_gaussians"] = self.n_gaussians
     hdf5["trainer"] = self.trainer
     hdf5["convergence_threshold"] = self.convergence_threshold
     hdf5["max_fitting_steps"] = self.max_fitting_steps
     hdf5["weights"] = self.weights
     hdf5["update_means"] = self.update_means
     hdf5["update_variances"] = self.update_variances
     hdf5["update_weights"] = self.update_weights
     gaussians_group = hdf5.create_group("gaussians")
     gaussians_group["means"] = self.means
     gaussians_group["variances"] = self.variances
     gaussians_group["variance_thresholds"] = self.variance_thresholds
Пример #23
0
def imread_mat(filename, name=None):
    """
    Read an 'image' from a MATLAB .MAT file. The file can be any version. Files
    that are v7.3 require the h5py module. If no name is given, the first
    variable is taken.
    """
    try:
        # Try general first (doesn't work for v7.3+ files)
        # SciPy has this built in
        # Supports loading just the given variable name
        # Otherwise have to load all variables and skip special keys starting with "__" to find the variable to load
        # Loaded matracies are already arrays
        from scipy.io import loadmat
        if name == None:
            try:
                # Try to get first variable name without loading entire file (only supported in SciPy 0.12+)
                from scipy.io import whosmat
                keys in whosmat(file_name)
                if len(keys) == 0: raise KeyError()
                name = keys[0][0]
            except:
                pass
        x = loadmat(filename, variable_names=name)
        if name == None:
            name = '__'  # we need to find first
            for name in x.iterkeys():
                if name[:2] != '__': break
            if name[:2] == '__': raise KeyError()  # no variables
        return x[name]  # can raise key error
    except NotImplementedError:
        # Try v7.3 file which is an HDF5 file
        # We have to use h5py for this (or PyTables...)
        # Always loads entire metadata (not just specific variable) but none of the data
        # Data needs to be actually loaded (.value) and transposed (.T)
        from h5py import File as HDF5File  # TODO: if import error try using PyTables
        with HDF5File(
                filename, 'r'
        ) as x:  # IOError if it doesn't exist or is the wrong format
            if name == None:
                try:
                    name = x.iterkeys().next()
                except StopIteration:
                    raise KeyError()  # no variables
            return x[name].value.T  # can raise key error
Пример #24
0
    def __init__(self,
                 output_name,
                 output_dir,
                 num_files,
                 patches,
                 feature_type,
                 patch_dim=128,
                 patch_type='uint8',
                 pos_type='uint16'):
        self.log = get_logger()

        output_subdir = output_dir
        try:
            makedirs(output_subdir)
        except:
            pass

        output_filename = join(output_subdir, basename(output_name))
        self.log.debug('Saving extracted descriptors to %s', output_filename)

        self.mode = 'creating'
        dt = special_dtype(vlen=bytes)
        patches += 10  #for safety
        self.hfile = HDF5File(output_filename,
                              'w',
                              compression='gzip',
                              fillvalue=0.0)
        self.patches = self.hfile.create_dataset(
            'patches', (num_files * patches, patch_dim),
            dtype=patch_type,
            chunks=True)
        self.positions = self.hfile.create_dataset('positions',
                                                   (num_files * patches, 2),
                                                   dtype=pos_type,
                                                   chunks=True)
        self.image_index = self.hfile.create_dataset(
            'image_index', (num_files, 2),
            dtype='uint64')  # Start, End positions of an image
        self.keys = self.hfile.create_dataset('keys', (num_files, ), dtype=dt)
        self.key_set = set()
        self.patches.attrs['cursor'] = 0
        self.patches.attrs['feature_type'] = feature_type

        self.output_filename = output_filename
Пример #25
0
def get_patches(filename, num_images, position_influence=0):
    hfile = HDF5File(filename, 'r')
    total_num_patches = hfile[PATCH_TYPE].attrs['cursor']

    if num_images == 0:
        num_images = hfile['image_index'].shape[0]

    image_index = hfile['image_index'][:min(num_images, hfile['image_index'].
                                            shape[0])]

    # Getting patches only from desired number of images
    num_patches = min(image_index[-1, 1], total_num_patches)
    patches = hfile[PATCH_TYPE][:num_patches, :]

    # patches = patches.astype(float)
    # norms = (patches**2).sum(axis=1)**0.5
    # patches /= norms.max()

    #patches = patches.astype(float)
    #patches /= patches.max()

    feature_type = hfile[PATCH_TYPE].attrs.get('feature_type', None)

    # if feature_type == 'DECAF':
    #     norms = (patches**2).sum(axis=1)**0.5
    #     patches /= norms.max()

    if position_influence > 0:
        pos = hfile['positions'][:num_patches, :]

        pos = pos.astype(float)
        max_x = pos[:, 0].max()
        max_y = pos[:, 1].max()

        if max_x > 0:
            pos[:, 0] /= max_x
        if max_y > 0:
            pos[:, 1] /= max_y

        patches = np.hstack([patches, position_influence * pos])

    hfile.close()
    return (patches, image_index)
Пример #26
0
 def load(self):
     get_logger().info("Loading patches for " + self.file_name)
     hfile = HDF5File(self.file_name, 'r')
     patches = hfile[self.patch_name]
     feature_dim = patches.shape[1]
     indexes = self.indexes
     num_patches = (indexes[:, 1] - indexes[:, 0]).sum()
     self.patches = np.empty([num_patches, feature_dim])
     self.new_index = np.empty([indexes.shape[0], 2])
     patch_start = n_image = 0
     for iid in indexes:
         n_patches = iid[1] - iid[0]
         self.patches[patch_start:patch_start +
                      n_patches, :] = patches[iid[0]:iid[1], :]
         self.new_index[n_image] = [patch_start, patch_start + n_patches]
         patch_start += n_patches
         n_image += 1
     hfile.close()
     get_logger().info("Loaded " + str(num_patches) + " patches")
Пример #27
0
def getIndexes(patch_folder, nTrain, nTest, position_influence):
    files = sorted(glob(join(patch_folder, '*.hdf5')), key=basename)
    train = []
    test = []
    for (classNumber, filename) in enumerate(files):
        #support_filename = join(".", basename(filename))
        hfile = HDF5File(filename, 'r')
        iid = hfile["image_index"][:]
        nImages = iid.shape[0]
        assert nImages >= (nTrain + nTest), "Not enough images!"
        np.random.shuffle(iid)
        trainIdx = iid[0:nTrain]
        testIdx = iid[nTrain:nTrain + nTest]
        trainData = ClassPatches(filename, trainIdx, PATCH_TYPE)
        testData = ClassPatches(filename, testIdx, PATCH_TYPE)
        test.append(testData)
        train.append(
            trainData)  #train data is actually loaded only when needed
        hfile.close()
    Data = namedtuple("Data", "Train Test")
    return Data(train, test)
Пример #28
0
def make_split(output_name, output_subdir, _patches, _positions, _image_indexes):
    log = get_logger()
    #import pdb; pdb.set_trace()
    try:
        makedirs(output_subdir)
    except:
        pass
    output_filename = join(output_subdir, basename(output_name))
    log.debug('Saving extracted descriptors to %s', output_filename)


    hfile = HDF5File(output_filename, 'w', compression='gzip', fillvalue=0.0)
    patches = hfile.create_dataset('patches', _patches.shape, dtype="float32", chunks=True)
    positions = hfile.create_dataset('positions', _positions.shape, dtype="uint16", chunks=True)
    image_index = hfile.create_dataset('image_index', _image_indexes.shape, dtype='uint64') # Start, End positions of an image
    patches[:]= _patches
    positions[:]=_positions
    image_index[:]=_image_indexes
    patches.attrs['cursor'] = 0
    patches.attrs['feature_type'] = "CAFFE"
    hfile.close()
Пример #29
0
def load_split_whole_image_only(input_folder, nTrain, nTest):
    logger = get_logger()
    files = sorted(glob(join(input_folder, '*.hdf5')), key=basename)
    nClasses = len(files)
    logger.info("Loading " + str(nClasses) + " classes")
    train_patches = np.empty([nClasses * nTrain, patchOptions.size
                              ])  # nClasses*nSamples x nFeatures
    test_patches = np.empty([nClasses * nTest, patchOptions.size])
    train_labels = np.empty([nClasses * nTrain])
    test_labels = np.empty([nClasses * nTest])
    start = time.clock()
    train_patch_count = test_patch_count = 0
    for (classNumber, filename) in enumerate(files):
        hfile = HDF5File(filename, 'r')
        iid = hfile["image_index"][:]
        nImages = iid.shape[0]
        assert nImages >= (nTrain + nTest), "Not enough images!"
        np.random.shuffle(iid)
        trainIdx = iid[0:nTrain]
        testIdx = iid[nTrain:nTrain + nTest]
        patches = hfile[patchOptions.patch_name]
        for iid in trainIdx:
            train_patches[train_patch_count] = patches[iid[0]]
            train_patch_count += 1
        train_labels[classNumber * nTrain:(classNumber + 1) *
                     nTrain] = classNumber * np.ones(nTrain)
        for iid in testIdx:
            test_patches[test_patch_count] = patches[iid[0]]
            test_patch_count += 1
        test_labels[classNumber * nTest:(classNumber + 1) *
                    nTest] = classNumber * np.ones(nTest)
        logger.info("Patch count: " + str(train_patch_count) +
                    " training and " + str(test_patch_count) +
                    " test patches for class " + filename)
        hfile.close()
    end = time.clock()
    logger.info("It took " + str((end - start)) + " seconds")
    LoadedData = namedtuple(
        "LoadedData", "train_patches train_labels test_patches test_labels")
    return LoadedData(train_patches, train_labels, test_patches, test_labels)
Пример #30
0
def load_patches(class_data):
        hfile = HDF5File(class_data.filename, 'r')
        patches = hfile[patchOptions.patch_name][:]
        positions = hfile[patchOptions.position_name][:]
        feature_dim = patchOptions.patch_dim
        indexes = class_data.index
        num_patches=(indexes[:,1]-indexes[:,0]).sum()
        loaded_patches = np.empty([num_patches, feature_dim])
        loaded_positions = np.empty([num_patches, 2])
        loaded_idx = np.empty([class_data.index.shape[0], 2])
        tags = np.zeros([num_patches,1])
        patch_start = n_image = 0
        #import pdb; pdb.set_trace()
        for n, iid in enumerate(indexes):
            n_patches = iid[1]-iid[0]
            loaded_patches[patch_start:patch_start+n_patches,:] = patches[iid[0]:iid[1],:]
            loaded_positions[patch_start:patch_start+n_patches,:] = positions[iid[0]:iid[1],:]
            loaded_idx[n,:] = np.array([patch_start, patch_start+n_patches])
            tags[patch_start] = 1
            patch_start += n_patches
            n_image += 1
        hfile.close()
        return ClassData(loaded_patches, loaded_positions, tags, num_patches, loaded_idx)