Python get_hdf5s_from_dir示例，ava.models.vae_dataset.get_hdf5s_from_dir Python示例

示例#1

0

显示文件

文件： data_container.py 项目： davidhildebrand/autoencoded-vocal-analysis

    def _make_latent_means(self):
        """
		Write latent means for the syllables in self.spec_dirs.

		Returns
		-------
		latent_means : numpy.ndarray
			Latent means of shape (max_num_syllables, z_dim)

		Note
		----
		* Duplicated code with ``_write_projection``?

		"""
        self._check_for_dirs(['projection_dirs', 'spec_dirs', 'model_filename'],\
         'latent_means')
        # First, see how many syllables are in each file.
        temp = get_hdf5s_from_dir(self.spec_dirs[0])
        assert len(temp) > 0, "Found no specs in" + self.spec_dirs[0]
        hdf5_file = temp[0]
        with h5py.File(hdf5_file, 'r') as f:
            self.sylls_per_file = len(f['specs'])
        spf = self.sylls_per_file
        # Load the model, making sure to get z_dim correct.
        z_dim = torch.load(self.model_filename)['z_dim']
        model = VAE(z_dim=z_dim)
        model.load_state(self.model_filename)
        # For each directory...
        all_latent = []
        for i in range(len(self.spec_dirs)):
            spec_dir, proj_dir = self.spec_dirs[i], self.projection_dirs[i]
            # Make the projection directory if it doesn't exist.
            if proj_dir != '' and not os.path.exists(proj_dir):
                os.makedirs(proj_dir)
            # Make a DataLoader for the syllables.
            partition = get_syllable_partition([spec_dir], 1, shuffle=False)
            try:
                loader = get_syllable_data_loaders(partition, \
                 shuffle=(False,False))['train']
                # Get the latent means from the model.
                latent_means = model.get_latent(loader)
                all_latent.append(latent_means)
                # Write them to the corresponding projection directory.
                hdf5s = get_hdf5s_from_dir(spec_dir)
                assert len(latent_means) // len(hdf5s) == spf, "Inconsistent number\
					of syllables per file ("                                         +str(len(latent_means) // len(hdf5s))+\
                 ") in directory "+spec_dir+". Expected "+str(spf)+"."
                for j in range(len(hdf5s)):
                    filename = os.path.join(proj_dir,
                                            os.path.split(hdf5s[j])[-1])
                    data = latent_means[j * spf:(j + 1) * spf]
                    with h5py.File(filename, 'a') as f:
                        f.create_dataset('latent_means', data=data)
            except AssertionError:  # No specs in this directory
                pass
        return np.concatenate(all_latent)

示例#2

0

显示文件

文件： data_container.py 项目： davidhildebrand/autoencoded-vocal-analysis

 def _check_for_fields(self):
     """Check to see which fields are saved."""
     fields = {}
     # If self.spec_dirs is registered, assume everything is there.
     if self.spec_dirs is not None:
         for field in SPEC_FIELDS:
             fields[field] = 1
     # Same for self.audio_dirs.
     if self.audio_dirs is not None:
         fields['audio'] = 1
     # Same for self.segment_dirs.
     if self.segment_dirs is not None:
         fields['segments'] = 1
         fields['segment_audio'] = 1
     # If self.projection_dirs is registered, see what we have.
     # If it's in one file, assume it's in all of them.
     if self.projection_dirs is not None:
         if os.path.exists(self.projection_dirs[0]):
             hdf5s = get_hdf5s_from_dir(self.projection_dirs[0])
             if len(hdf5s) > 0:
                 hdf5 = hdf5s[0]
                 if os.path.exists(hdf5):
                     with h5py.File(hdf5, 'r') as f:
                         for key in f.keys():
                             if key in ALL_FIELDS:
                                 fields[key] = 1
                                 self.sylls_per_file = len(f[key])
     return fields

示例#3

0

显示文件

文件： data_container.py 项目： davidhildebrand/autoencoded-vocal-analysis

 def _write_projection(self, key, data):
     """Write the given projection to self.projection_dirs."""
     sylls_per_file = self.sylls_per_file
     # For each directory...
     k = 0
     for i in range(len(self.projection_dirs)):
         spec_dir, proj_dir = self.spec_dirs[i], self.projection_dirs[i]
         hdf5s = get_hdf5s_from_dir(spec_dir)
         for j in range(len(hdf5s)):
             filename = os.path.join(proj_dir, os.path.split(hdf5s[j])[-1])
             to_write = data[k:k + sylls_per_file]
             with h5py.File(filename, 'a') as f:
                 f.create_dataset(key, data=to_write)
             k += sylls_per_file

示例#4

0

显示文件

文件： data_container.py 项目： davidhildebrand/autoencoded-vocal-analysis

    def _read_field(self, field):
        """
		Read a field from memory.

		Parameters
		----------
		field : str
			Field name to read from file.

		"""
        if field in AUDIO_FIELDS:
            raise NotImplementedError
        elif field == 'segments':
            return self._read_segments()
        elif field == 'segment_audio':
            return self._read_segment_audio()
        elif field in PROJECTION_FIELDS:
            load_dirs = self.projection_dirs
        elif field in SPEC_FIELDS:
            load_dirs = self.spec_dirs
        elif field in MUPET_FIELDS:
            load_dirs = self.projection_dirs
        elif field in DEEPSQUEAK_FIELDS:
            load_dirs = self.projection_dirs
        elif field in SAP_FIELDS:
            load_dirs = self.projection_dirs
        else:
            raise Exception("Can\'t read field: " + field +
                            "\n This should have \
				been caught in self.request!")
        to_return = []
        for i in range(len(self.spec_dirs)):
            spec_dir, load_dir = self.spec_dirs[i], load_dirs[i]
            hdf5s = get_hdf5s_from_dir(spec_dir)
            for j, hdf5 in enumerate(hdf5s):
                filename = os.path.join(load_dir, os.path.split(hdf5)[-1])
                with h5py.File(filename, 'r') as f:
                    assert (field in f), "Can\'t find field \'"+field+"\' in"+\
                     " file \'"+filename+"\'!"
                    if field == 'audio_filenames':
                        data = np.array([k.decode('UTF-8') for k in f[field]])
                        to_return.append(data)
                    else:
                        to_return.append(np.array(f[field]))
        return np.concatenate(to_return)

示例#5

0

显示文件

文件： data_container.py 项目： davidhildebrand/autoencoded-vocal-analysis

    def _make_feature_field(self, field, kind):
        """
		Read a feature from a text file and put it in an hdf5 file.

		Read from self.feature_dirs and write to self.projection_dirs.
		This gets a bit tricky because we need to match up the syllables in the
		text file with the ones in the hdf5 file.

		Parameters
		----------
		field : str
			...

		kind : str, 'mupet' or 'deepsqueak'
			...

		TO DO: cleaner error handling

		"""
        self._check_for_dirs( \
         ['spec_dirs', 'feature_dirs', 'projection_dirs'], field)
        # FInd which column the field is stored in.
        if kind == 'mupet':
            file_fields = MUPET_FIELDS
            onset_col = MUPET_ONSET_COL
        elif kind == 'deepsqueak':
            file_fields = DEEPSQUEAK_FIELDS
            onset_col = DEEPSQUEAK_ONSET_COL
        elif kind == 'sap':
            file_fields = SAP_FIELDS
            onset_col = SAP_ONSET_COL
        else:
            assert NotImplementedError
        field_col = file_fields.index(field)
        to_return = []
        # Run through each directory.
        for i in range(len(self.spec_dirs)):
            spec_dir = self.spec_dirs[i]
            feature_dir = self.feature_dirs[i]
            proj_dir = self.projection_dirs[i]
            hdf5s = get_hdf5s_from_dir(spec_dir)
            current_fn, k = None, None
            for hdf5 in hdf5s:
                # Get the filenames and onsets from self.spec_dirs.
                with h5py.File(hdf5, 'r') as f:
                    audio_filenames = np.array(f['audio_filenames'])
                    spec_onsets = np.array(f['onsets'])
                    # if kind == 'sap': # SAP writes onsets in milliseconds.
                    # 	spec_onsets /= 1e3
                feature_arr = np.zeros(len(spec_onsets))
                # Loop through each syllable.
                for j in range(len(spec_onsets)):
                    audio_fn, spec_onset = audio_filenames[j], spec_onsets[j]
                    audio_fn = audio_fn.decode('UTF-8')
                    # Update the feature file, if needed.
                    if audio_fn != current_fn:
                        current_fn = audio_fn
                        feature_fn = os.path.split(audio_fn)[-1][:-4]
                        if kind == 'deepsqueak':  # DeepSqueak appends '_Stats'
                            feature_fn += '_Stats'  # when exporting features.
                        feature_fn += '.csv'
                        feature_fn = os.path.join(feature_dir, feature_fn)
                        # Read the onsets and features.
                        feature_onsets, features = \
                         _read_columns(feature_fn, [onset_col, field_col])
                        if kind == 'sap':  # SAP writes onsets in milliseconds.
                            feature_onsets /= 1e3
                        k = 0
                    # Look for the corresponding onset in the feature file.
                    while spec_onset > feature_onsets[k] + 0.01:
                        k += 1
                        assert k < len(feature_onsets)
                    if abs(spec_onset - feature_onsets[k]) > 0.01:
                        print("Mismatch between spec_dirs and feature_dirs!")
                        print("hdf5 file:", hdf5)
                        print("\tindex:", j)
                        print("audio filename:", audio_fn)
                        print("feature filename:", feature_fn)
                        print("Didn't find spec_onset", spec_onset)
                        print("in feature onsets of min:", \
                          np.min(feature_onsets), "max:", \
                          np.max(feature_onsets))
                        print("field:", field)
                        print("kind:", kind)
                        quit()
                    # And add it to the feature array.
                    feature_arr[j] = features[k]
                # Write the fields to self.projection_dirs.
                write_fn = os.path.join(proj_dir, os.path.split(hdf5)[-1])
                with h5py.File(write_fn, 'a') as f:
                    f.create_dataset(field, data=feature_arr)
                to_return.append(feature_arr)
        self.fields[field] = 1
        return np.concatenate(to_return)