Exemplo n.º 1
0
    def records(self):
        """
        The Borealis data in a dictionary of records, according to the
        site file format.

        Raises
        ------
        BorealisRestructureError
            Errors restructuring from arrays to site style file
        """
        if self.format.is_restructureable():
            try:
                records = self.format._array_to_site(self.arrays)
                BorealisUtilities.check_records(
                    self.filename, records,
                    self.format.site_single_element_types(),
                    self.format.site_array_dtypes())
            except Exception as err:
                raise borealis_exceptions.BorealisRestructureError(
                    'Arrays for {}: Error restructuring {} from array to site'
                    ' style: {}'.format(self.filename, self.format.__name__,
                                        err)) from err
        else:
            raise borealis_exceptions.BorealisRestructureError(
                'Arrays for {}: File format {} not recognized as '
                'restructureable from site to array style or vice versa.'
                ''.format(self.filename, self.format.__name__))

        return records
Exemplo n.º 2
0
    def arrays(self):
        """
        The Borealis data in a dictionary of arrays, according to the
        restructured array file format.

        Raises
        ------
        BorealisRestructureError
            Errors in restructuring to arrays style file.
        """

        if self.format.is_restructureable():
            try:
                arrays = self.format._site_to_array(self.records)
                BorealisUtilities.check_arrays(
                    self.filename, arrays,
                    self.format.array_single_element_types(),
                    self.format.array_array_dtypes(),
                    self.format.unshared_fields())
            except Exception as err:
                raise borealis_exceptions.BorealisRestructureError(
                    'Records for {}: Error restructuring {} from site to array'
                    ' style: {}'
                    ''.format(self.filename, self.format.__name__, err)) \
                    from err
        else:
            raise borealis_exceptions.BorealisRestructureError(
                'Records for {}: File format {} not recognized as '
                'restructureable from site to array style'
                ''.format(self.filename, self.format.__name__))

        return arrays
Exemplo n.º 3
0
    def _array_to_site(cls, data_dict: dict) -> OrderedDict:
        """
        Base function for converting array Borealis data to
        site format.

        Parameters
        ----------
        data_dict: dictionary of array restructured Borealis data.

        Returns
        -------
        new_data_dict
            An OrderedDict of timestamped records as if loaded from
            the original site file.

        See Also
        --------
        is_restructureable
        flatten_site_arrays
        shared_fields
        site_specific_fields_generate
        unshared_fields_dims_site

        Notes
        -----
        The results will differ based on the format class, as many of the
        class methods used inside this method should be specific
        to the format and updated in the child class. However, this is the
        process required for any restructuring, so this method itself should
        not be updated by the child class.
        """

        if not cls.is_restructureable():
            raise borealis_exceptions.BorealisRestructureError(
                'File format {} not recognized as '
                'restructureable from site to array style or vice versa.'
                ''.format(cls.__name__))

        timestamp_dict = OrderedDict()
        for record_num, seq_timestamp in \
                enumerate(data_dict["sqn_timestamps"]):
            # format dictionary key in the same way it is done
            # in datawrite on site
            seq_datetime = datetime.utcfromtimestamp(seq_timestamp[0])
            epoch = datetime.utcfromtimestamp(0)
            key = str(int((seq_datetime - epoch).total_seconds() * 1000))

            timestamp_dict[key] = dict()
            # populate shared fields in each record,
            for field in cls.shared_fields():
                timestamp_dict[key][field] = data_dict[field]

            # populate site specific fields using given functions
            # that take both the arrays data and the record number
            for field in cls.site_specific_fields():
                timestamp_dict[key][field] = cls.site_specific_fields_generate(
                )[field](data_dict, record_num)

            for field in cls.unshared_fields():
                if field in cls.single_element_types():
                    datatype = cls.single_element_types()[field]
                    # field is not an array, single element per record.
                    # unshared_field_dims_site should give empty list.
                    timestamp_dict[key][field] = datatype(
                        data_dict[field][record_num])
                else:  # field in array_dtypes
                    datatype = cls.array_dtypes()[field]
                    # need to get the dims correct, not always equal to the max
                    site_dims = [
                        dimension_function(data_dict, record_num)
                        for dimension_function in
                        cls.unshared_fields_dims_site()[field]
                    ]
                    index_slice = [slice(0, i) for i in site_dims]
                    index_slice.insert(0, record_num)
                    index_slice = tuple(index_slice)
                    timestamp_dict[key][field] = data_dict[field][index_slice]

        timestamp_dict = cls.flatten_site_arrays(timestamp_dict)

        return timestamp_dict
Exemplo n.º 4
0
    def _site_to_array(cls, data_dict: OrderedDict) -> dict:
        """
        Base function for converting site Borealis data to
        restructured array format.

        Parameters
        ----------
        data_dict: OrderedDict
            a dict of timestamped records loaded from an hdf5 Borealis site
            file

        Returns
        -------
        new_data_dict
            A dictionary containing the data from data_dict
            reformatted to be stored entirely in array style, or as
            one entry if the field does not change between records.
            This means that for fields that change between records,
            the first dimension in the array will equal num_records
            (these are called unshared_fields). For fields common to all
            records, there will only be the one value that applies (these
            are known as shared_fields).

        See Also
        --------
        is_restructureable
        reshape_site_arrays
        shared_fields
        array_specific_fields_generate
        unshared_fields_dims_array

        Notes
        -----
        The results will differ based on the format class, as many of the
        class methods used inside this method should be specific
        to the format and updated in the child class. However, this is the
        process required for any restructuring, so this method itself should
        not be updated by the child class.
        """

        if not cls.is_restructureable():
            raise borealis_exceptions.BorealisRestructureError(
                'File format {} not recognized as '
                'restructureable from site to array style or vice versa.'
                ''.format(cls.__name__))

        new_data_dict = dict()
        num_records = len(data_dict)

        # some fields are linear in site style and need to be reshaped.
        data_dict = cls.reshape_site_arrays(data_dict)

        # write shared fields to dictionary
        first_key = list(data_dict.keys())[0]
        for field in cls.shared_fields():
            new_data_dict[field] = data_dict[first_key][field]

        # write array specific fields using the given functions.
        for field in cls.array_specific_fields():
            new_data_dict[field] = cls.array_specific_fields_generate()[field](
                data_dict)

        # write the unshared fields, initializing empty arrays to start.
        temp_array_dict = dict()

        # get array dims of the unshared fields arrays
        field_dimensions = {}
        for field in cls.unshared_fields():
            dims = [
                dimension_function(data_dict) for dimension_function in
                cls.unshared_fields_dims_array()[field]
            ]
            field_dimensions[field] = dims

        # all fields to become arrays
        for field, dims in field_dimensions.items():
            array_dims = [num_records] + dims
            array_dims = tuple(array_dims)

            if field in cls.single_element_types():
                datatype = cls.single_element_types()[field]
            else:  # field in array_dtypes
                datatype = cls.array_dtypes()[field]
            empty_array = np.empty(array_dims, dtype=datatype)
            # initialize all values to NaN; some indices may not be filled
            # do to dimensions that are max values (num sequences, etc can
            # change between records)
            empty_array[:] = np.NaN
            temp_array_dict[field] = empty_array

        # iterate through the records, filling the unshared and array only
        # fields
        for rec_idx, k in enumerate(data_dict.keys()):
            for field in cls.unshared_fields():  # all unshared fields
                empty_array = temp_array_dict[field]
                if type(data_dict[first_key][field]) == np.ndarray:
                    # only fill the correct length, appended NaNs occur for
                    # dims with a determined max value
                    data_buffer = data_dict[k][field]
                    buffer_shape = data_buffer.shape
                    index_slice = [slice(0, i) for i in buffer_shape]
                    # insert record index at start of array's slice list
                    index_slice.insert(0, rec_idx)
                    index_slice = tuple(index_slice)
                    # place data buffer in the correct place
                    empty_array[index_slice] = data_buffer
                else:  # not an array, num_records is the only dimension
                    empty_array[rec_idx] = data_dict[k][field]

        new_data_dict.update(temp_array_dict)

        return new_data_dict