Python DataSet示例，snomtools.data.datasets.DataSet Python示例

示例#1

0

显示文件

文件： obe_copol.py 项目： hartelt/snomtools

    def build_empty_result_dataset(self,
                                   h5target=None,
                                   chunks=True,
                                   chunk_cache_mem_size=None):
        """
        Generates a :class:`~snomtools.data.datasets.DataSet`, of shape :attr:`~OBEfit_Copol.resultshape`
        to write the OBE fit results into.
        The axes will be the ones of the input data without the delay axis.
        Empty DataArrays (containing zeroes) are initialized for the OBE fit parameters.

        :param h5target: The HDF5 target to write to.
            If `None` is given (the default), then a DataSet in numpy mode (in-memory) is generated.
        :type h5target: str *or*  :class:`h5py.Group` *or* None

        :param chunks: The chunk size to use for the HDF5 data. Ignored in numpy mode (see above).
            If `True` is given (the default), the chunk size is automatically chosen as usual.
            If `False` is given, no chunking and compression of the data will be done (not recommended).
        :type chunks: tuple of int

        :param chunk_cache_mem_size: Explicitly set chunk cache memory size (in bytes) for HDF5 File.
            This can be used to optimize I/O performance according to iteration over the data.
            Defaults are set in :mod:`snomtools.data.h5tools`
        :type chunk_cache_mem_size: int

        :return: The empty DataSet to write result parameters to.
        :rtype: :class:`~snomtools.data.datasets.DataSet`
        """
        axlist = self.data.axes[:]
        axlist.pop(self.fitaxis_ID)
        if h5target:
            dflist = []
            for l in self.result_datalabels + self.result_accuracylabels:
                dataspace = ds.Data_Handler_H5(
                    unit=self.result_params[l]['unit'],
                    shape=self.resultshape,
                    chunks=chunks)
                dflist.append(
                    ds.DataArray(dataspace,
                                 label=l,
                                 plotlabel=self.result_params[l]['plotlabel'],
                                 h5target=dataspace.h5target,
                                 chunks=chunks))
            return ds.DataSet("OBE fit results",
                              dflist,
                              axlist,
                              h5target=h5target,
                              chunk_cache_mem_size=chunk_cache_mem_size)
        else:
            dflist = [
                ds.DataArray(np.zeros(self.resultshape),
                             unit=self.result_params[l]['unit'],
                             label=l,
                             plotlabel=self.result_params[l]['plotlabel'])
                for l in self.result_datalabels + self.result_accuracylabels
            ]
            return ds.DataSet("OBE fit results", dflist, axlist)

示例#2

0

显示文件

文件： FFT.py 项目： hartelt/snomtools

    def response_data(self, n_freqs=5000):
        """
        Calculate the frequency response of the filter functions defined and return them as 1D-Dataset,
        containing the frequency axis and a DataArray with complex filter amplitudes for each frequency component.
        The DataArrays will be written in the filter order and labeled `filter response omegaN` for a component N.

        :param n_freqs: Number of frequency steps to calculate for.
        :type n_freqs: int

        :return: The DataSet containing the frequency responses.
        :rtype: :class:`~snomtools.data.datasets.DataSet`
        """
        responses = []
        frequencies = None
        for b in self.butters:
            freqs, response = b.response(n_freqs)
            if frequencies is None:
                frequencies = freqs
            else:
                assert np.allclose(
                    freqs,
                    frequencies), "Butters giving inconsistent frequencies."
            responses.append(response)
        das = [
            ds.DataArray(responses[i],
                         label="filter response omega{0}".format(i))
            for i in range(len(self.butters))
        ]
        data = ds.DataSet("Frequency Filter Response Functions", das,
                          [ds.Axis(frequencies, label='frequency')])
        return data

示例#3

0

显示文件

文件： terra.py 项目： cagonza6/snomtools

def hist_asc(source, T_start=None, T_bin=1, tif_probe=None):
    """
	Reads an DLD energy channel histogram, saved in a file with the extension ".hist.asc".

	:param str source: The path of the source file.

	:param int T_start: The start channel of the chosen time binning. By default, the first channel containing counts
		is taken.

	:param int T_bin: The binning of the chosen time binning.

	:param str tif_probe: A tif that was saved at the same time (or with the same settings) as the histogram to read,
		typically when executing "save all" in Terra. This deactivates *T_start* and *T_bin* and reads the binning from
		the	tags in the tiff file instead.

	:return: The imported data.
	:rtype: snomtools.data.datasets.DataSet
	"""
    filepath = os.path.abspath(source)
    filebase = os.path.basename(filepath)

    if tif_probe is not None:
        # Read tif probe file:
        infile = tiff.tifffile.TiffFile(tif_probe)

        # Read time binning metadata from tags:
        roi_and_bin_id = "41010"  # as defined by Christian Schneider #define TIFFTAG_ROI_AND_BIN 41010
        tag = tiff.search_tag(infile, roi_and_bin_id)
        # roi_and_bin_list = tag.value
        T_start, St, T_bin = int(tag.value[2]), int(tag.value[5]), int(
            tag.value[8])
        infile.close()

    # Read the "HistoXplusY" column from the .asc file to an array:
    count_data = numpy.loadtxt(filepath, dtype=int, skiprows=1, usecols=2)
    # Trim the trailing zeroes:
    count_data = numpy.trim_zeros(count_data, 'b')

    # If no start channel is given, guess it by taking the first non-zero entry, taking the binning into account.
    if not tif_probe and T_start is None:
        start_index = numpy.nonzero(count_data)[0][0]
        T_start = start_index * T_bin

    # Trim the leading zeroes:
    count_data = numpy.trim_zeros(count_data)

    # Initialize Channel axis and Count DataArray
    taxis = ds.Axis([T_start + i * T_bin for i in range(count_data.shape[0])],
                    label='channel',
                    plotlabel='Time Channel')
    dataarray = ds.DataArray(count_data,
                             unit='count',
                             label='counts',
                             plotlabel='Counts')

    # Return DataSet:
    return ds.DataSet(label=filebase, datafields=[dataarray], axes=[taxis])

示例#4

0

显示文件

文件： binning.py 项目： hartelt/snomtools

	def bin(self, h5target=None):
		newaxis = self.bin_axis()
		if h5target is not None:
			newda = self.bin_data(h5target=True)
		else:
			newda = self.bin_data(h5target=None)

		newds = ds.DataSet(self.data.label + " binned", (newda,), newaxis,
						   self.data.plotconf, h5target=h5target)
		return newds

示例#5

0

显示文件

 def rotate_data(self, h5target=None):
     """
     Rotates the full DataSet.
     :return:
     """
     dataarrays_rotated = [self.dataarray_rotated(d) for d in self.data_original.dlabels]
     axes_rotated = []
     for i, ax in enumerate(self.data_original.axes):
         if i not in self.rot_plane:
             axes_rotated.append(ax)
         else:
             if self.axes_mode == 'keep':
                 axes_rotated.append(ax)
             else:
                 # TODO: Implement other axes handling modes.
                 raise NotImplementedError
     self.data_rotated = ds.DataSet('rotated ' + self.data_original.label,
                                    dataarrays_rotated, axes_rotated,
                                    h5target=h5target)
     return self.data_rotated

示例#6

0

显示文件

def read_jpeg(filepath):
    """
	Reads a generic jpeg file. Therefore, the 2D image dimensions are interpreted as x and y.
	Reads only greyscale, if a color (RGB or RGBA) image is given, it will be converted to greyscale.

	:param filepath: String: The (absolute or relative) path of input file.

	:return: The dataset instance generated from the image file.
	"""
    # Translate input path to absolute path:
    filepath = os.path.abspath(filepath)
    filebase = os.path.basename(filepath)

    # Read tif file to numpy array. Axes will be (x, y):
    indata = imageio.imread(filepath, as_gray=True)

    # Initialize data for dataset:
    dataarray = ds.DataArray(indata,
                             unit='dimensionless',
                             label='brightness',
                             plotlabel='Brightness')

    # Careful about orientation! This is like a matrix:
    # rows go first and are numbered in vertical direction -> Y
    # columns go last and are numbered in horizontal direction -> X
    yaxis = ds.Axis(np.arange(0, indata.shape[0]),
                    unit='pixel',
                    label='y',
                    plotlabel='y')
    xaxis = ds.Axis(np.arange(0, indata.shape[1]),
                    unit='pixel',
                    label='x',
                    plotlabel='x')

    # Return dataset:
    return ds.DataSet(label=filebase,
                      datafields=[dataarray],
                      axes=[yaxis, xaxis])

示例#7

0

显示文件

文件： pes.py 项目： hartelt/snomtools

    def extract_data(data, data_id=0, axis_id=None, label="fermiedge"):
        """
        Extracts the energies and intensities out of a dataset. Therefore, it takes the energy axis of the input data,
        and projects the datafield onto that axis by summing over all the other axes.

        :param data: Dataset containing the spectral data.

        :param data_id: Identifier of the DataField to use.

        :param axis_id: optional, Identifier of the power axis to use. If not given, the first axis that corresponds
            to a Power in its physical dimension is taken.

        :param label: string: label for the produced DataSet

        :return: 1D-DataSet with projected Intensity Data and Power Axis.
        """
        assert isinstance(data, ds.DataSet) or isinstance(data, ds.ROI), \
            "ERROR: No dataset or ROI instance given to Powerlaw data extraction."
        if axis_id is None:
            energy_axis = data.get_axis_by_dimension("eV")
        else:
            energy_axis = data.get_axis(axis_id)
        count_data = data.get_datafield(data_id)
        energy_axis_index = data.get_axis_index(energy_axis.get_label())
        count_data_projected = count_data.project_nd(energy_axis_index,
                                                     ignorenan=True)
        count_data_projected = ds.DataArray(count_data_projected,
                                            label='intensity')
        # Normalize by scaling to 1:
        count_data_projected_norm = count_data_projected / count_data_projected.max(
        )
        count_data_projected_norm.set_label("intensity_normalized")
        # Initialize the DataSet containing only the projected powerlaw data;
        return ds.DataSet(label,
                          [count_data_projected_norm, count_data_projected],
                          [energy_axis])

示例#8

0

显示文件

def project_1d(data,
               axis_id=0,
               data_id=None,
               outlabel=None,
               normalization=None):
    """
	Plots a projection of the data onto one axis. Therefore, it sums the values over all the other axes.

	:param data: The DataSet or ROI to plot.

	:param axis_id: An identifier of the axis to project onto.

	:param data_id: Optional: An identifier of the dataarray to take data from. If not given, all DataArrays of the
		Set are projected.

	:param outlabel: String, optional: A label to assign to the projected DataSet. Default: Label of the original
		DataSet.

	:param normalization: Method for a normalization to apply to the data. Valid options:
		* None, "None" (default): No normalization.
		* "maximum", "max": divide every value by the maximum value in the set
		* "mean": divide every value by the average value in the set
		* "minimum", "min": divide every value by the minimum value in the set
		* "absolute maximum", "absmax": divide every value by the maximum absolute value in the set
		* "absolute minimum", "absmin": divide every value by the minimum absolute value in the set
		* "size": divide every value by the number of pixels that have been summed in the projection (ROI size)

	:return: A dataset instance with the projected data.
	"""
    assert isinstance(data, datasets.DataSet) or isinstance(data, datasets.ROI), \
     "No dataset or ROI instance given to projection function."

    if outlabel is None:
        outlabel = data.label

    ax_index = data.get_axis_index(axis_id)
    ax = data.get_axis(ax_index)

    sumlist = list(range(data.dimensions))
    sumlist.remove(ax_index)
    sumtup = tuple(sumlist)

    dfields = []
    if data_id:
        dlabels = [data_id]
    else:
        dlabels = data.dlabels

    for label in dlabels:
        df = data.get_datafield(label)
        sumdat = df.sum(sumtup)
        if normalization:
            pl = "normalized projected " + df.get_plotlabel()
            if normalization == "None":
                normdat = sumdat
                pl = "projected " + df.get_plotlabel()
            elif normalization in ["maximum", "max"]:
                normdat = sumdat / sumdat.max()
            elif normalization in ["minimum", "min"]:
                normdat = sumdat / sumdat.min()
            elif normalization in ["mean"]:
                normdat = sumdat / sumdat.mean()
            elif normalization in ["absolute maximum", "absmax"]:
                normdat = sumdat / abs(sumdat).max()
            elif normalization in ["absolute minimum", "absmin"]:
                normdat = sumdat / abs(sumdat).min()
            elif normalization in ["size"]:
                number_of_pixels = 1
                for ax_id in sumtup:
                    number_of_pixels *= len(data.get_axis(ax_id))
                normdat = sumdat / number_of_pixels
            else:
                try:
                    normdat = sumdat / normalization
                except TypeError:
                    warnings.warn(
                        "Normalization mode not valid. Returning unnormalized data."
                    )
                    normdat = sumdat
        else:
            normdat = sumdat
            pl = "projected " + df.get_plotlabel()
        outfield = datasets.DataArray(normdat,
                                      label=df.get_label(),
                                      plotlabel=pl)
        dfields.append(outfield)

    return datasets.DataSet(outlabel, dfields, [ax])

示例#9

0

显示文件

def timelog_folder(folderpath,
                   timeunit='s',
                   timeunitlabel=None,
                   timeformat=None,
                   prefix="",
                   postfix="",
                   h5target=True):
    """
	# TODO: UPDATE THIS FROM GENERIC COPIED DOCSTRING!
	:param folderpath: The (relative or absolute) path of the folders containing the powerlaw measurement series.

	:return: The dataset containing the images stacked along a time axis.
	"""
    if timeunitlabel is None:
        timeunitlabel = timeunit

    # Translate input path to absolute path:
    folderpath = os.path.abspath(folderpath)

    # Inspect the given folder for the image files:
    timefiles = {}
    for filename in filter(is_jpeg, os.listdir(folderpath)):
        # Strip extension, prefix, postfix:
        timestring = os.path.splitext(filename)[0]
        timestring = timestring.lstrip(prefix)
        timestring = timestring.rstrip(postfix)

        if timeformat:  # If format is given, parse accordingly:
            timestring = timestring.strip()
            imgtime = datetime.datetime.strptime(timestring, timeformat)
        else:  # Else try to parse as best as guessable:
            imgtime = dparser.parse(filename, fuzzy=True)
        timefiles[imgtime] = filename

    # Build time axis:
    axlist = []
    starttime = min(timefiles.keys())
    for imgtime in iter(sorted(timefiles.keys())):
        axlist.append((imgtime - starttime).total_seconds())
    times = u.to_ureg(axlist, 'second').to(timeunit)
    pl = 'Time / ' + timeunitlabel  # Plot label for power axis.
    timeaxis = ds.Axis(times, label='time', plotlabel=pl)

    # ----------------------Create dataset------------------------
    # Test data size:
    sample_data = read_jpeg(
        os.path.join(folderpath, timefiles[list(timefiles.keys())[0]]))
    axlist = [timeaxis] + sample_data.axes
    newshape = timeaxis.shape + sample_data.shape
    # Build the data-structure that the loaded data gets filled into
    if h5target:
        chunks = True
        compression = 'gzip'
        compression_opts = 4

        # Probe HDF5 initialization to optimize buffer size:
        if chunks is True:  # Default is auto chunk alignment, so we need to probe.
            chunk_size = probe_chunksize(shape=newshape,
                                         compression=compression,
                                         compression_opts=compression_opts)
        else:
            chunk_size = chunks
        use_cache_size = buffer_needed(newshape, (0, ),
                                       chunk_size,
                                       dtype=np.uint8)

        # Initialize full DataSet with zeroes:
        dataspace = ds.Data_Handler_H5(
            unit=sample_data.get_datafield(0).get_unit(),
            shape=newshape,
            chunks=chunks,
            compression=compression,
            compression_opts=compression_opts,
            chunk_cache_mem_size=use_cache_size,
            dtype=np.uint8)
        dataarray = ds.DataArray(
            dataspace,
            label=sample_data.get_datafield(0).get_label(),
            plotlabel=sample_data.get_datafield(0).get_plotlabel(),
            h5target=dataspace.h5target,
            chunks=chunks,
            compression=compression,
            compression_opts=compression_opts,
            chunk_cache_mem_size=use_cache_size)
        dataset = ds.DataSet("Powerlaw " + folderpath, [dataarray],
                             axlist,
                             h5target=h5target,
                             chunk_cache_mem_size=use_cache_size)
    else:
        # In-memory data processing without h5 files.
        dataspace = u.to_ureg(np.zeros(newshape, dtype=np.uint8),
                              sample_data.datafields[0].get_unit())
        dataarray = ds.DataArray(
            dataspace,
            label=sample_data.get_datafield(0).get_label(),
            plotlabel=sample_data.get_datafield(0).get_plotlabel(),
            h5target=None)
        dataset = ds.DataSet("Time Log " + folderpath, [dataarray],
                             axlist,
                             h5target=h5target)
    dataarray = dataset.get_datafield(0)

    # ----------------------Fill dataset------------------------
    # Fill in data from imported tiffs:
    slicebase = tuple([np.s_[:] for j in range(len(sample_data.shape))])

    if verbose:
        import time
        print("Reading Time Series Folder of shape: ", dataset.shape)
        if h5target:
            print("... generating chunks of shape: ",
                  dataset.get_datafield(0).data.ds_data.chunks)
            print("... using cache size {0:d} MB".format(use_cache_size //
                                                         1024**2))
        else:
            print("... in memory")
        start_time = time.time()
    for i, imgtime in zip(list(range(len(timefiles))),
                          iter(sorted(timefiles.keys()))):
        islice = (i, ) + slicebase
        # Import jpeg:
        idata = read_jpeg(os.path.join(folderpath, timefiles[imgtime]))

        # Check data consistency:
        assert idata.shape == sample_data.shape, "Trying to combine scan data with different shape."
        for ax1, ax2 in zip(idata.axes, sample_data.axes):
            assert ax1.units == ax2.units, "Trying to combine scan data with different axis dimensionality."
        assert idata.get_datafield(0).units == sample_data.get_datafield(0).units, \
         "Trying to combine scan data with different data dimensionality."

        # Write data:
        dataarray[islice] = idata.get_datafield(0).data
        if verbose:
            tpf = ((time.time() - start_time) / float(i + 1))
            etr = tpf * (dataset.shape[0] - i + 1)
            print(
                "image {0:d} / {1:d}, Time/File {3:.2f}s ETR: {2:.1f}s".format(
                    i, dataset.shape[0], etr, tpf))

    return dataset

示例#10

0

显示文件

文件： binning.py 项目： hartelt/snomtools

			newda = self.bin_data(h5target=None)

		newds = ds.DataSet(self.data.label + " binned", (newda,), newaxis,
						   self.data.plotconf, h5target=h5target)
		return newds


if __name__ == '__main__':  # Just for testing:
	print("Testing...")
	test_fakedata = True  # Create and test on a fake dataset that's easier to overview:
	if test_fakedata:
		print("Building fake data...")
		fakearray = np.stack([np.arange(50) for i in range(25)] + [np.arange(50) + 100 for i in range(25)])
		fakedata = ds.DataArray(fakearray, h5target=True, chunks=(5, 5))
		fakeds = ds.DataSet("test", [ds.DataArray(fakedata)],
							[ds.Axis(np.arange(50), label="y"), ds.Axis(np.arange(50), label="x")],
							h5target=True)
		fakeds.saveh5("binning_testdata.hdf5")
		print("Test binning on fake data...")
		b = Binning(fakeds, binAxisID=('y', 'x'), binFactor=(2, 8))
		binnedds = b.bin(h5target="binning_outdata.hdf5")
		binnedds.saveh5()

	test_realdata = False  # Testing real data from NFC Session on Ben's PC:
	if test_realdata:
		path = 'E:\\NFC15\\20171207 ZnO+aSiH\\01 DLD PSI -3 to 150 fs step size 400as\\Maximamap\\Driftcorrected\\summed_runs'
		data_dir = path + '\\projected.hdf5'
		# data_dir = path + '\\summed_data.hdf5'
		h5target = path + '\\binned_data.hdf5'
		data = ds.DataSet.from_h5file(data_dir, h5target=h5target)

示例#11

0

显示文件

文件： pes.py 项目： hartelt/snomtools

            for guesselement, guessunit in zip(guess, unitslist):
                guesslist.append(u.to_ureg(guesselement, guessunit).magnitude)
            guess = tuple(guesslist)
        return curve_fit(fermi_edge, energies.magnitude, intensities.magnitude,
                         guess)


if __name__ == "__main__":
    # Generate some test data:
    E_f, d_E, c, d = 30, 1, 100, 1
    f = FermiEdge.from_coeffs((E_f, d_E, c, d))
    energies = u.to_ureg(np.linspace(25, 35, 1000), 'eV')
    intensities = u.to_ureg(
        f.fermi_edge(energies).magnitude + np.random.randn(1000) * 5, 'count')
    testdata = ds.DataSet("testdata",
                          (ds.DataArray(intensities, label="counts"), ),
                          (ds.Axis(energies, label="E"), ))
    testroi = ds.ROI(testdata, {'E': [u.to_ureg(29.8, 'eV'), None]})

    # Test the single modules:
    guess = FermiEdge.guess_parameters(energies, intensities)
    result = FermiEdge.fit_fermi_edge(energies, intensities, guess)
    print("result: {0}".format(result[0]))
    f = FermiEdge.from_xy(energies, intensities, guess)

    # Test the full thing:
    f = FermiEdge(testroi)
    print("result: {0}".format([f.E_f, f.dE, f.c, f.d]))

    from matplotlib import pyplot as plt

示例#12

0

显示文件

文件： obe_copol.py 项目： hartelt/snomtools

    def resultACdata(self, h5target=True, write_to_indata=False):
        # Prepare DataSet to write to:
        if write_to_indata:
            if self.data.h5target:
                dh = ds.Data_Handler_H5(unit=self.countunit,
                                        shape=self.data.shape)
                self.data.add_datafield(dh,
                                        label="obefit",
                                        plotlabel="OBE fit")
            else:
                self.data.add_datafield(np.zeros(self.data.shape),
                                        self.countunit,
                                        label="obefit",
                                        plotlabel="OBE fit")
            outdata = self.data
            outdf = self.data.get_datafield('obefit')
        else:
            if h5target:
                outdf = ds.DataArray(ds.Data_Handler_H5(unit=self.countunit,
                                                        shape=self.data.shape),
                                     label="obefit",
                                     plotlabel="OBE fit")
            else:
                outdf = ds.DataArray(np.zeros(self.data.shape),
                                     self.countunit,
                                     label="obefit",
                                     plotlabel="OBE fit")
            outdata = ds.DataSet("OBE fit", [outdf],
                                 self.data.axes,
                                 h5target=h5target)
            outdf = outdata.get_datafield('obefit')

        # Set global variables for copypasted methods:
        # TODO: Use proper class methods that don't need this ugly global variables.
        # gpuOBE_stepsize is the stepsize with which the actual interferometric Autocorrelation (IAC) is calculated.
        # For non phase-resolved evaluation, the omega_0 component is extracted afterwards with a lowpass filter.
        # This means you should NOT set this to large values to save calculation time!
        # Use something well below optical cycle (e.g. 0.2fs) to have good IAC!
        global gpuOBE_stepsize
        gpuOBE_stepsize = self.cuda_IAC_stepsize.magnitude
        global gpuOBE_laserBlau
        gpuOBE_laserBlau = self.laser_lambda.magnitude
        global gpuOBE_LaserBlauFWHM
        gpuOBE_LaserBlauFWHM = self.laser_AC_FWHM.magnitude
        # gpuOBE_normparameter is used in TauACCopol to switch between normalizing the curve before scaling and offset.
        # It must be True to fit including Amplitude and Offset, as done below!
        global gpuOBE_normparameter
        gpuOBE_normparameter = True
        global gpuOBE_Phaseresolution
        gpuOBE_Phaseresolution = False

        if verbose:
            print("Writing {0} ACs for OBE fit with cuda...".format(
                np.prod(self.resultshape)))
            print("Start: ", datetime.datetime.now().isoformat())
            start_time = time.time()
            print_counter = 0
            print_interval = 1

        for ac_slice in np.ndindex(
                self.resultshape):  # Simple iteration for now.
            # Build source data slice:
            out_slice = self.source_slice_from_target_slice(ac_slice)
            # Calculate result AC and write down:
            outdf[out_slice] = self.resultAC(ac_slice)[1]

            if verbose:
                print_counter += 1
                if print_counter % print_interval == 0:
                    tpf = ((time.time() - start_time) / float(print_counter))
                    etr = tpf * (np.prod(self.resultshape) - print_counter)
                    print("AC {0:d} / {1:d}, Time/AC {3:.4f}s ETR: {2:.1f}s".
                          format(print_counter, np.prod(self.resultshape), etr,
                                 tpf))

        return outdata