Exemplo n.º 1
0
def store_std_dev_of_noises(src_file: H5File) -> None:
    """Stores the standard deviation of the noise in each channel as an attribute on the file

    Args:
        src_file (H5File): The file containing the experimental data

    Note:
        The attribute is called '<chan>_noise_std_dev' where '<chan>' is 'perp', 'par', or 'ref'
    """
    mean_perp_noise = src_file.attrs['perp_noise_mean']
    mean_par_noise = src_file.attrs['par_noise_mean']
    mean_ref_noise = src_file.attrs['ref_noise_mean']
    perp_sum = 0
    par_sum = 0
    ref_sum = 0
    counts = 0
    for path in rawnav.pump_group_paths(src_file):
        perp_path = path + '/perp'
        par_path = path + '/par'
        ref_path = path + '/ref'
        perp_noise = src_file[perp_path].attrs['noise']
        par_noise = src_file[par_path].attrs['noise']
        ref_noise = src_file[ref_path].attrs['noise']
        perp_sum += (perp_noise - mean_perp_noise)**2
        par_sum += (par_noise - mean_par_noise)**2
        ref_sum += (ref_noise - mean_ref_noise)**2
        counts += 1
    src_file.attrs['perp_noise_std_dev'] = np.sqrt(perp_sum / (counts - 1))
    src_file.attrs['par_noise_std_dev'] = np.sqrt(par_sum / (counts - 1))
    src_file.attrs['ref_noise_std_dev'] = np.sqrt(ref_sum / (counts - 1))
    return
Exemplo n.º 2
0
def store_noise_means(src_file: H5File) -> None:
    """Stores the mean of the noise in each channel as a file-level attribute

    Args:
        src_file (H5File): The file containing the experimental data

    Note:
        The attribute is called '<chan>_noise_mean' where '<chan>' is 'perp', 'par', or 'ref'
    """
    perp_sum = 0
    par_sum = 0
    ref_sum = 0
    counts = 0
    for path in rawnav.pump_group_paths(src_file):
        perp_path = path + '/perp'
        par_path = path + '/par'
        ref_path = path + '/ref'
        perp_sum += src_file[perp_path].attrs['noise']
        par_sum += src_file[par_path].attrs['noise']
        ref_sum += src_file[ref_path].attrs['noise']
        counts += 1
    src_file.attrs['perp_noise_mean'] = perp_sum / counts
    src_file.attrs['par_noise_mean'] = par_sum / counts
    src_file.attrs['ref_noise_mean'] = ref_sum / counts
    return
Exemplo n.º 3
0
def make_top_level_wavelengths_group(organized_file: H5File) -> None:
    """Takes the newly reorganized data and creates a new top level group that divides the data
    by wavelength rather than round.

    The structure of the new wavelength groups will be as follows:
    File
        wavelengths
            WWWWW
                roundX
                    pump
                    nopump
    Note that these new groups and datasets are simply hard links, not new copies of the data,
    so no space penalty is incurred by adding a new organizational structure like this.

    Args:
        organized_file (H5File): A file that has already had its data reorganized into rounds
    """
    logger.info(f'Assembling a top level \'wavelengths\' group in {organized_file.filename}')
    organized_file.create_group('wavelengths')
    wav_root = organized_file['wavelengths']
    all_wavelengths = wavelength_set(organized_file)
    for wav_name in all_wavelengths:
        wav_root.create_group(wav_name)
    rounds_root = organized_file['rounds']
    for rnd_name in subgroups(rounds_root):
        for wav_name in subgroups(rounds_root[rnd_name]):
            wav_root[wav_name].create_group(rnd_name)
            old_wav_path = f'{rnd_name}/{wav_name}'
            for pump_name in subgroups(rounds_root[old_wav_path]):
                old_pump_path = f'{rnd_name}/{wav_name}/{pump_name}'
                new_pump_path = f'{wav_name}/{rnd_name}/{pump_name}'
                wav_root[new_pump_path] = rounds_root[old_pump_path]
    logger.info(f'Done creating \'wavelengths\' group')
    return
Exemplo n.º 4
0
def test_all_pump_group_paths_are_found(organized_faulty_data: H5File):
    """Ensures that all of the pump/nopump groups are found, and that no faulty groups are picked up
    """
    pump_groups_found = {path for path in nav.pump_group_paths(organized_faulty_data)}
    all_paths = []
    organized_faulty_data.visit(lambda path: all_paths.append(path))
    pump_groups_present = {'/' + p for p in all_paths if p.endswith('pump')}
    assert pump_groups_found == pump_groups_present
Exemplo n.º 5
0
def test_all_signal_dataset_paths_are_found(organized_faulty_data: H5File):
    """Ensures that all dataset paths are found
    """
    dataset_paths_found = {path for path in nav.all_signal_dataset_paths(organized_faulty_data)}
    all_paths = []
    organized_faulty_data.visit(lambda path: all_paths.append(path))
    dataset_paths_present = {'/' + p for p in all_paths
                             if any(sig in p for sig in ['perp', 'par', 'ref'])
                             and 'faulty' not in p}
    assert dataset_paths_found == dataset_paths_present
Exemplo n.º 6
0
def test_recursive_copy(organized_clean_data: H5File, starts_empty: H5File):
    """Ensures that the copy operation copies everything
    """
    reorg.recursive_copy(organized_clean_data, starts_empty)
    src_items = []
    organized_clean_data.visit(lambda x: src_items.append(x))
    dest_items = []
    starts_empty.visit(lambda x: dest_items.append(x))
    src_items = set(src_items)
    dest_items = set(src_items)
    assert src_items == dest_items
Exemplo n.º 7
0
def test_all_dataset_paths_below_are_collected(organized_faulty_data: H5File):
    """Ensures that all dataset paths are collected
    """
    dataset_paths_found = set(
        nav.dataset_paths_below(organized_faulty_data, 'rounds'))
    all_paths = []
    organized_faulty_data.visit(lambda path: all_paths.append(path))
    dataset_paths_present = set([
        p for p in all_paths
        if any(sig in p for sig in ['time', 'perp', 'par', 'ref'])
    ])
    assert dataset_paths_found == dataset_paths_present
Exemplo n.º 8
0
def test_all_wavelength_groups_under_rounds_are_found(organized_faulty_data: H5File):
    """Ensures that all of the wavelength groups that are subgroups of rounds are found
    """
    wavelength_groups_found = {path for path in
                               nav.wavelengths_under_rounds_paths(organized_faulty_data)}
    all_paths = []
    organized_faulty_data.visit(lambda path: all_paths.append(path))
    wavelength_groups_present = {'/' + p for p in all_paths
                                 if p.endswith('76487')
                                 or p.endswith('76715')
                                 or p.endswith('76940')}
    assert wavelength_groups_found == wavelength_groups_present
Exemplo n.º 9
0
def copy_rounds_structure_for_delta_a(old_file: H5File,
                                      new_file: H5File) -> None:
    """Copies the File/roundX/WWWWW group structure into a new, empty file for
    storing dA data in at a later time

    Args:
        old_file (H5File): The file whose structure will be copied
        new_file (H5File): An empty file that will have a group structure copied into it
    """
    rounds_root = old_file['rounds']
    for rnd in subgroups(rounds_root):
        for wav in subgroups(rounds_root[rnd]):
            path = f'rounds/{rnd}/{wav}'
            new_file.require_group(path)
    return
Exemplo n.º 10
0
def create_rounds_from_spectra(old_file: H5File, new_file: H5File) -> None:
    """Creates a group 'roundX' for each top level group in the original file named 'spectrumX'

    Args:
        old_file (H5File): The original file with the experimental data
        new_file (H5File): The new, empty file in which the data will be organized
    """
    spec_number_regex = re.compile(r'spectrum(\d+)')
    new_file.create_group('rounds')
    for s in subgroups(old_file):
        match = spec_number_regex.search(s)
        if match is None:
            continue
        spec_number = match.group(1)
        new_file['rounds'].create_group(f'round{spec_number:0>3s}')
    return
Exemplo n.º 11
0
def copy_datasets(old_file: H5File, new_file: H5File) -> None:
    """Walks the top level groups in the old file to copy datasets to the new file,
    dropping the last column in each dataset and renaming the dataset to either "pump"
    or "nopump"

    Args:
        old_file (H5File): The original file with the experimental data
        new_file (H5File): The new file in which the data will be organized
    """
    rounds_root = new_file['rounds']
    rounds = sorted(subgroups(rounds_root))
    spectra = [s for s in subgroups(old_file) if s != 'experiment_parameters']
    spectrum_map = make_spectrum_map(spectra)
    spectra_for_zip = sorted(spectrum_map.keys())
    for spec, rnd in zip(spectra_for_zip, rounds):
        real_spec_name = spectrum_map[spec]
        dataset_names = datasets(old_file[real_spec_name])
        pairs = sorted(pair_dataset_names(dataset_names))
        for wav, *dset_names in pairs:
            if len(dset_names) < 2:
                dset_old_path = f'/{real_spec_name}/{dset_names[0]}'
                dset = old_file[dset_old_path]
                dset_new_path = f'/rounds/{rnd}/{wav}/faulty1'
                new_file.create_group(dset_new_path)
                logger.debug(f'Copying dataset {dset_old_path} to {dset_new_path}')
                split_and_store_old_dataset(new_file[dset_new_path], dset)
                continue
            dset1_name = dset_names[0]
            dset2_name = dset_names[1]
            dset1_old_path = f'/{real_spec_name}/{dset1_name}'
            dset2_old_path = f'/{real_spec_name}/{dset2_name}'
            dset1 = old_file[dset1_old_path]
            dset2 = old_file[dset2_old_path]
            dset1_new_path, dset2_new_path = round_path_for_old_dataset(dset1, dset2, rnd, wav)
            new_file.create_group(dset1_new_path)
            new_file.create_group(dset2_new_path)
            new_group1 = new_file[dset1_new_path]
            new_group2 = new_file[dset2_new_path]
            logger.debug(f'Copying dataset {dset1_old_path} to {dset1_new_path}')
            split_and_store_old_dataset(new_group1, dset1)
            logger.debug(f'Copying dataset {dset2_old_path} to {dset2_new_path}')
            split_and_store_old_dataset(new_group2, dset2)
    return
Exemplo n.º 12
0
def make_delta_a_wavelength_groups(src_file: H5File) -> None:
    """Takes the newly reorganized data and creates a new top level group that divides the data
    by wavelength rather than round.

    The structure of the new wavelength groups will be as follows:
    File
        wavelengths
            WWWWW
                roundX
                    pump
                    nopump
    Note that these new groups and datasets are simply hard links, not new copies of the data,
    so no space penalty is incurred by adding a new organizational structure like this.

    Note:
        Not covered by tests yet

    Args:
        src_file (H5File): A file that has already had its data reorganized into rounds
    """
    src_file.create_group('wavelengths')
    wav_root = src_file['wavelengths']
    all_wavelengths = wavelength_set(src_file)
    for wav_name in all_wavelengths:
        wav_root.create_group(wav_name)
    rounds_root = src_file['rounds']
    for rnd_name in subgroups(rounds_root):
        for wav_name in subgroups(rounds_root[rnd_name]):
            wav_root[wav_name].create_group(rnd_name)
            old_time_path = f'{rnd_name}/{wav_name}/time'
            old_perp_path = f'{rnd_name}/{wav_name}/perp'
            old_par_path = f'{rnd_name}/{wav_name}/par'
            new_time_path = f'{wav_name}/{rnd_name}/time'
            new_perp_path = f'{wav_name}/{rnd_name}/perp'
            new_par_path = f'{wav_name}/{rnd_name}/par'
            wav_root[new_time_path] = rounds_root[old_time_path]
            wav_root[new_perp_path] = rounds_root[old_perp_path]
            wav_root[new_par_path] = rounds_root[old_par_path]
    return