Python normalize_array示例，pocean.utils.normalize_array Python示例

示例#1

0

显示文件

文件： test_utils.py 项目： TomasTorsvik-tools/pocean-core-TTfork

    def test_normalization_of_string_arrays_netcdf3(self):
        thestr = 'boodsfasfasdfm'

        with nc4.Dataset(self.fp, 'w', format="NETCDF3_CLASSIC") as ncd:

            dimsize = len(thestr)
            ncd.createDimension('n', dimsize)

            # Single str (no dimension)
            ncd.createVariable('single_S', 'S1', ('n',))

            for k, v in ncd.variables.items():
                if k.startswith('single_'):
                    v[:] = nc4.stringtoarr(thestr, dimsize)

            # Array of strq
            ncd.createVariable('many_S', 'S1', ('n', 'n',))

            for k, v in ncd.variables.items():
                if k.startswith('many_'):
                    v[:, :] = np.tile(nc4.stringtoarr(thestr, dimsize), dimsize).reshape(v.shape)

        with nc4.Dataset(self.fp) as ncd:
            assert normalize_array(ncd.variables['single_S']) == thestr
            assert np.all(normalize_array(ncd.variables['many_S']) == [thestr] * dimsize)

示例#2

0

显示文件

文件： r.py 项目： pyoceans/pocean-core

    def is_mine(cls, dsg, strict=False):
        try:
            assert dsg.featureType.lower() == 'timeseriesprofile'
            assert len(dsg.t_axes()) >= 1
            assert len(dsg.x_axes()) >= 1
            assert len(dsg.y_axes()) >= 1
            assert len(dsg.z_axes()) >= 1

            o_index_vars = dsg.filter_by_attrs(
                sample_dimension=lambda x: x is not None
            )
            assert len(o_index_vars) == 1
            assert o_index_vars[0].sample_dimension in dsg.dimensions  # Sample dimension

            _ = dsg.filter_by_attrs(
                cf_role='profile_id'
            )[0]

            svar = dsg.filter_by_attrs(
                cf_role='timeseries_id'
            )[0]
            sdata = normalize_array(svar)
            if not isinstance(sdata, str) and len(sdata.shape) > 0:
                r_index_vars = dsg.filter_by_attrs(
                    instance_dimension=lambda x: x is not None
                )
                assert len(r_index_vars) == 1
                assert r_index_vars[0].instance_dimension in dsg.dimensions  # Station dimension

        except BaseException:
            if strict is True:
                raise
            return False

        return True

示例#3

0

显示文件

    def is_mine(cls, dsg):
        try:
            assert dsg.featureType.lower() == 'timeseriesprofile'
            assert len(dsg.t_axes()) >= 1
            assert len(dsg.x_axes()) >= 1
            assert len(dsg.y_axes()) >= 1
            assert len(dsg.z_axes()) >= 1

            o_index_vars = dsg.filter_by_attrs(
                sample_dimension=lambda x: x is not None)
            assert len(o_index_vars) == 1
            assert o_index_vars[
                0].sample_dimension in dsg.dimensions  # Sample dimension

            svar = dsg.filter_by_attrs(cf_role='timeseries_id')[0]
            sdata = normalize_array(svar)
            if len(sdata.shape) > 0:
                r_index_vars = dsg.filter_by_attrs(
                    instance_dimension=lambda x: x is not None)
                assert len(r_index_vars) == 1
                assert r_index_vars[
                    0].instance_dimension in dsg.dimensions  # Station dimension

        except AssertionError:
            return False

        return True

示例#4

0

显示文件

文件： test_utils.py 项目： lucmehl/pocean-core

    def test_normalization_of_string_arrays_netcdf4(self):
        thestr = 'bosadfsdfkljskfusdiofu987987987om'

        with nc4.Dataset(self.fp, 'w', format="NETCDF4") as ncd:

            dimsize = len(thestr)
            ncd.createDimension('n', dimsize)

            # Single str (no dimension)
            ncd.createVariable('single_str', str)
            ncd.createVariable('single_unicode_', np.unicode_)
            ncd.createVariable('single_U', '<U1')
            ncd.createVariable('single_S', 'S1', ('n', ))

            for k, v in ncd.variables.items():
                if k.startswith('single_'):
                    if v.dimensions:
                        v[:] = nc4.stringtoarr(thestr, dimsize)
                    else:
                        v[0] = thestr

            # Array of str
            ncd.createVariable('many_str', str, ('n', ))
            ncd.createVariable('many_unicode_', np.unicode_, ('n', ))
            ncd.createVariable('many_U', '<U1', ('n', ))
            ncd.createVariable('many_S', 'S1', (
                'n',
                'n',
            ))

            for k, v in ncd.variables.items():
                if k.startswith('many_'):
                    if len(v.dimensions) > 1:
                        v[:, :] = np.tile(nc4.stringtoarr(thestr, dimsize),
                                          dimsize)
                    else:
                        v[:] = np.tile(thestr, dimsize)

        with nc4.Dataset(self.fp) as ncd:
            assert normalize_array(ncd.variables['single_str']) == thestr
            assert normalize_array(ncd.variables['single_unicode_']) == thestr
            assert normalize_array(ncd.variables['single_U']) == thestr
            assert normalize_array(ncd.variables['single_S']) == thestr

            assert np.all(
                normalize_array(ncd.variables['many_str']) == [thestr] *
                len(thestr))
            assert np.all(
                normalize_array(ncd.variables['many_unicode_']) == [thestr] *
                len(thestr))
            assert np.all(
                normalize_array(ncd.variables['many_U']) == [thestr] *
                len(thestr))
            assert np.all(
                normalize_array(ncd.variables['many_S']) == [thestr] *
                len(thestr))

示例#5

0

显示文件

    def is_mine(cls, dsg):
        try:
            tvars = dsg.filter_by_attrs(cf_role='trajectory_id')
            assert len(tvars) == 1
            assert dsg.featureType.lower() == 'trajectory'
            assert len(dsg.t_axes()) == 1
            assert len(dsg.x_axes()) == 1
            assert len(dsg.y_axes()) == 1
            assert len(dsg.z_axes()) == 1

            # Allow for string variables
            tvar = tvars[0]
            # 0 = single
            # 1 = array of strings/ints/bytes/etc
            # 2 = array of character arrays
            assert 0 <= len(tvar.dimensions) <= 2

            ts = normalize_array(tvar)
            is_single = ts.size == 1

            t = dsg.t_axes()[0]
            x = dsg.x_axes()[0]
            y = dsg.y_axes()[0]
            z = dsg.z_axes()[0]

            assert t.dimensions == x.dimensions == y.dimensions == z.dimensions
            assert t.size == x.size == y.size == z.size

            if is_single:
                assert len(t.dimensions) == 1
                time_dim = dsg.dimensions[t.dimensions[0]]
                for dv in dsg.data_vars():
                    assert len(dv.dimensions) == 1
                    assert time_dim.name in dv.dimensions
                    assert dv.size == time_dim.size
            else:
                # This `time` being two dimensional is unique to IncompleteMultidimensionalTrajectory
                assert len(t.dimensions) == 2
                t_dim = dsg.dimensions[t.dimensions[0]]
                o_dim = dsg.dimensions[t.dimensions[1]]
                for dv in dsg.data_vars():
                    assert dv.size == t.size
                    assert len(dv.dimensions) == 2
                    assert t_dim.name in dv.dimensions
                    assert o_dim.name in dv.dimensions
                    assert dv.size == t_dim.size * o_dim.size

        except BaseException:
            return False

        return True

示例#6

0

显示文件

    def is_mine(cls, dsg):
        try:
            pvars = dsg.filter_by_attrs(cf_role='profile_id')
            assert len(pvars) == 1
            assert dsg.featureType.lower() == 'profile'
            assert len(dsg.t_axes()) == 1
            assert len(dsg.x_axes()) == 1
            assert len(dsg.y_axes()) == 1
            assert len(dsg.z_axes()) == 1

            # Allow for string variables
            pvar = pvars[0]
            # 0 = single
            # 1 = array of strings/ints/bytes/etc
            # 2 = array of character arrays
            assert 0 <= len(pvar.dimensions) <= 2

            ps = normalize_array(pvar)
            is_single = ps.size == 1

            t = dsg.t_axes()[0]
            x = dsg.x_axes()[0]
            y = dsg.y_axes()[0]
            z = dsg.z_axes()[0]
            assert len(z.dimensions) == 1
            z_dim = dsg.dimensions[z.dimensions[0]]

            if is_single:
                assert t.size == 1
                assert x.size == 1
                assert y.size == 1
                for dv in dsg.data_vars():
                    assert len(dv.dimensions) == 1
                    assert z_dim.name in dv.dimensions
                    assert dv.size == z_dim.size
            else:
                assert t.size == pvar.size
                assert x.size == pvar.size
                assert y.size == pvar.size
                p_dim = dsg.dimensions[pvar.dimensions[0]]
                for dv in dsg.data_vars():
                    assert len(dv.dimensions) == 2
                    assert z_dim.name in dv.dimensions
                    assert p_dim.name in dv.dimensions
                    assert dv.size == z_dim.size * p_dim.size

        except BaseException:
            return False

        return True

示例#7

0

显示文件

文件： cr.py 项目： joefutrelle/pocean-core

    def to_dataframe(self, clean_cols=True, clean_rows=True):
        # The index variable (trajectory_index) is identified by having an
        # attribute with name of instance_dimension whose value is the instance
        # dimension name (trajectory in this example). The index variable must
        # have the profile dimension as its sole dimension, and must be type
        # integer. Each value in the index variable is the zero-based trajectory
        # index that the profile belongs to i.e. profile p belongs to trajectory
        # i=trajectory_index(p), as in section H.2.5.
        r_index_var = self.filter_by_attrs(instance_dimension=lambda x: x is not None)[0]
        p_dim = self.dimensions[r_index_var.dimensions[0]]       # Profile dimension
        r_dim = self.dimensions[r_index_var.instance_dimension]  # Trajectory dimension

        # The count variable (row_size) contains the number of elements for
        # each profile, which must be written contiguously. The count variable
        # is identified by having an attribute with name sample_dimension whose
        # value is the sample dimension (obs in this example) being counted. It
        # must have the profile dimension as its sole dimension, and must be
        # type integer
        o_index_var = self.filter_by_attrs(sample_dimension=lambda x: x is not None)[0]
        o_dim = self.dimensions[o_index_var.sample_dimension]  # Sample dimension

        try:
            rvar = self.filter_by_attrs(cf_role='trajectory_id')[0]
            traj_indexes = normalize_array(rvar)
            assert traj_indexes.size == r_dim.size
        except BaseException:
            logger.warning('Could not pull trajectory values a variable with "cf_role=trajectory_id", using a computed range.')
            traj_indexes = np.arange(r_dim.size)
        try:
            pvar = self.filter_by_attrs(cf_role='profile_id')[0]
            profile_indexes = normalize_array(pvar)
            assert profile_indexes.size == p_dim.size
        except BaseException:
            logger.warning('Could not pull profile values from a variable with "cf_role=profile_id", using a computed range.')
            profile_indexes = np.arange(p_dim.size)

        # Profile dimension
        tvars = self.t_axes()
        if len(tvars) > 1:
            tvar = [ v for v in self.t_axes() if v.dimensions == (p_dim.name,) and getattr(v, 'axis', '').lower() == 't' ][0]
        else:
            tvar = tvars[0]

        xvars = self.x_axes()
        if len(xvars) > 1:
            xvar = [ v for v in self.x_axes() if v.dimensions == (p_dim.name,) and getattr(v, 'axis', '').lower() == 'x' ][0]
        else:
            xvar = xvars[0]

        yvars = self.y_axes()
        if len(yvars) > 1:
            yvar = [ v for v in self.y_axes() if v.dimensions == (p_dim.name,) and getattr(v, 'axis', '').lower() == 'y' ][0]
        else:
            yvar = yvars[0]

        zvars = self.z_axes()
        if len(zvars) > 1:
            zvar = [ v for v in self.z_axes() if v.dimensions == (o_dim.name,) and getattr(v, 'axis', '').lower() == 'z' ][0]
        else:
            zvar = zvars[0]

        p = np.ma.masked_all(o_dim.size, dtype=profile_indexes.dtype)
        r = np.ma.masked_all(o_dim.size, dtype=traj_indexes.dtype)
        t = np.ma.masked_all(o_dim.size, dtype=tvar.dtype)
        x = np.ma.masked_all(o_dim.size, dtype=xvar.dtype)
        y = np.ma.masked_all(o_dim.size, dtype=yvar.dtype)
        si = 0

        for i in np.arange(profile_indexes.size):
            ei = si + o_index_var[i]
            p[si:ei] = profile_indexes[i]
            r[si:ei] = traj_indexes[r_index_var[i]]
            t[si:ei] = tvar[i]
            x[si:ei] = xvar[i]
            y[si:ei] = yvar[i]
            si = ei

        t_mask = False
        tfill = get_fill_value(tvar)
        if tfill is not None:
            t_mask = np.copy(np.ma.getmaskarray(t))
            t[t_mask] = 1

        t = np.ma.MaskedArray(
            nc4.num2date(t, tvar.units, getattr(tvar, 'calendar', 'standard'))
        )
        # Patch the time variable back to its original mask, since num2date
        # breaks any missing/fill values
        t[t_mask] = np.ma.masked

        # X and Y
        x = generic_masked(x, minv=-180, maxv=180).round(5)
        y = generic_masked(y, minv=-90, maxv=90).round(5)

        # Distance
        d = np.ma.zeros(o_dim.size, dtype=np.float64)
        d[1:] = great_distance(start_latitude=y[0:-1], end_latitude=y[1:], start_longitude=x[0:-1], end_longitude=x[1:])['distance']
        d = generic_masked(np.cumsum(d), minv=0).round(2)

        # Sample dimension
        z = generic_masked(zvar[:].flatten(), attrs=self.vatts(zvar.name)).round(5)

        df_data = {
            't': t,
            'x': x,
            'y': y,
            'z': z,
            'trajectory': r,
            'profile': p,
            'distance': d
        }

        building_index_to_drop = np.ones(o_dim.size, dtype=bool)
        extract_vars = list(set(self.data_vars() + self.ancillary_vars()))
        for i, dvar in enumerate(extract_vars):

            # Profile dimensions
            if dvar.dimensions == (p_dim.name,):
                vdata = np.ma.masked_all(o_dim.size, dtype=dvar.dtype)
                si = 0
                for j in np.arange(profile_indexes.size):
                    ei = si + o_index_var[j]
                    vdata[si:ei] = dvar[j]
                    si = ei

            # Sample dimensions
            elif dvar.dimensions == (o_dim.name,):
                vdata = generic_masked(dvar[:].flatten(), attrs=self.vatts(dvar.name)).round(3)

            else:
                logger.warning("Skipping variable {}... it didn't seem like a data variable".format(dvar))

            building_index_to_drop = (building_index_to_drop == True) & (vdata.mask == True)  # noqa
            df_data[dvar.name] = vdata

        df = pd.DataFrame(df_data)

        # Drop all data columns with no data
        if clean_cols:
            df = df.dropna(axis=1, how='all')

        # Drop all data rows with no data variable data
        if clean_rows:
            df = df.iloc[~building_index_to_drop]

        return df

示例#8

0

显示文件

文件： om.py 项目： TomasTorsvik-tools/pocean-core-TTfork

    def is_mine(cls, dsg, strict=False):
        try:
            pvars = dsg.filter_by_attrs(cf_role='profile_id')
            assert len(pvars) == 1
            assert dsg.featureType.lower() == 'profile'
            assert len(dsg.t_axes()) >= 1
            assert len(dsg.x_axes()) >= 1
            assert len(dsg.y_axes()) >= 1
            assert len(dsg.z_axes()) >= 1

            # Allow for string variables
            pvar = pvars[0]
            # 0 = single
            # 1 = array of strings/ints/bytes/etc
            # 2 = array of character arrays
            assert 0 <= len(pvar.dimensions) <= 2

            t = dsg.t_axes()[0]
            x = dsg.x_axes()[0]
            y = dsg.y_axes()[0]
            z = dsg.z_axes()[0]
            assert len(z.dimensions) == 1
            z_dim = dsg.dimensions[z.dimensions[0]]

            ps = normalize_array(pvar)
            is_single = False

            if pvar.ndim == 0:
                is_single = True
            elif pvar.ndim == 2:
                is_single = False
            elif isinstance(ps, six.string_types):
                # Non-dimensioned string variable
                is_single = True
            elif pvar.ndim == 1 and hasattr(ps, 'dtype') and ps.dtype.kind in ['U', 'S']:
                is_single = True

            if is_single:
                assert t.size == 1
                assert x.size == 1
                assert y.size == 1
                for dv in dsg.data_vars():
                    assert len(dv.dimensions) == 1
                    assert z_dim.name in dv.dimensions
                    assert dv.size == z_dim.size
            else:
                assert t.size == pvar.size
                assert x.size == pvar.size
                assert y.size == pvar.size
                p_dim = dsg.dimensions[pvar.dimensions[0]]
                for dv in dsg.data_vars():
                    assert len(dv.dimensions) in [1, 2]  # dimensioned by profile or profile, z
                    assert z_dim.name in dv.dimensions or p_dim.name in dv.dimensions
                    assert dv.size in [z_dim.size, p_dim.size, z_dim.size * p_dim.size]

        except BaseException:
            if strict is True:
                raise
            return False

        return True

示例#9

0

显示文件

    def to_dataframe(self, clean_cols=True, clean_rows=True):

        zvar = self.z_axes()[0]
        zs = len(self.dimensions[zvar.dimensions[0]])

        # Profiles
        pvar = self.filter_by_attrs(cf_role='profile_id')[0]
        try:
            p = normalize_array(pvar)
        except ValueError:
            p = np.asarray(list(range(len(pvar))), dtype=np.integer)
        ps = p.size
        p = p.repeat(zs)
        logger.debug(['profile data size: ', p.size])

        # Z
        z = generic_masked(zvar[:], attrs=self.vatts(zvar.name)).round(5)
        try:
            z = np.tile(z, ps)
        except ValueError:
            z = z.flatten()
        logger.debug(['z data size: ', z.size])

        # T
        tvar = self.t_axes()[0]
        t = nc4.num2date(tvar[:], tvar.units,
                         getattr(tvar, 'calendar', 'standard'))
        if isinstance(t, datetime):
            # Size one
            t = np.array([t.isoformat()], dtype='datetime64')
        t = t.repeat(zs)
        logger.debug(['time data size: ', t.size])

        # X
        xvar = self.x_axes()[0]
        x = generic_masked(xvar[:].repeat(zs),
                           attrs=self.vatts(xvar.name)).round(5)
        logger.debug(['x data size: ', x.size])

        # Y
        yvar = self.y_axes()[0]
        y = generic_masked(yvar[:].repeat(zs),
                           attrs=self.vatts(yvar.name)).round(5)
        logger.debug(['y data size: ', y.size])

        # Distance
        d = np.ma.zeros(y.size, dtype=np.float64)
        d[1:] = great_distance(start_latitude=y[0:-1],
                               end_latitude=y[1:],
                               start_longitude=x[0:-1],
                               end_longitude=x[1:])['distance']
        d = generic_masked(np.cumsum(d), minv=0).round(2)
        logger.debug(['distance data size: ', d.size])

        df_data = {'t': t, 'x': x, 'y': y, 'z': z, 'profile': p, 'distance': d}

        building_index_to_drop = np.ones(t.size, dtype=bool)
        extract_vars = list(set(self.data_vars() + self.ancillary_vars()))
        for i, dvar in enumerate(extract_vars):
            vdata = np.ma.fix_invalid(
                np.ma.MaskedArray(dvar[:].round(3).flatten()))
            building_index_to_drop = (building_index_to_drop == True) & (
                vdata.mask == True)  # noqa
            df_data[dvar.name] = vdata

        df = pd.DataFrame(df_data)

        # Drop all data columns with no data
        if clean_cols:
            df = df.dropna(axis=1, how='all')

        # Drop all data rows with no data variable data
        if clean_rows:
            df = df.iloc[~building_index_to_drop]

        return df

示例#10

0

显示文件

文件： im.py 项目： TomasTorsvik-tools/pocean-core-TTfork

    def is_mine(cls, dsg, strict=False):
        try:
            tvars = dsg.filter_by_attrs(cf_role='trajectory_id')
            assert len(tvars) == 1
            assert dsg.featureType.lower() == 'trajectory'
            assert len(dsg.t_axes()) >= 1
            assert len(dsg.x_axes()) >= 1
            assert len(dsg.y_axes()) >= 1
            assert len(dsg.z_axes()) >= 1

            # Allow for string variables
            tvar = tvars[0]
            # 0 = single
            # 1 = array of strings/ints/bytes/etc
            # 2 = array of character arrays
            assert 0 <= len(tvar.dimensions) <= 2

            ts = normalize_array(tvar)
            is_single = False

            if tvar.ndim == 0:
                is_single = True
            elif tvar.ndim == 2:
                is_single = False
            elif isinstance(ts, six.string_types):
                # Non-dimensioned string variable
                is_single = True
            elif tvar.ndim == 1 and hasattr(ts, 'dtype') and ts.dtype.kind in ['U', 'S']:
                is_single = True

            t = dsg.t_axes()[0]
            x = dsg.x_axes()[0]
            y = dsg.y_axes()[0]
            z = dsg.z_axes()[0]

            assert t.dimensions == x.dimensions == y.dimensions == z.dimensions
            assert t.size == x.size == y.size == z.size

            if is_single:
                assert len(t.dimensions) == 1
                t_dim = dsg.dimensions[t.dimensions[0]]
                for dv in dsg.data_vars():
                    assert len(dv.dimensions) == 1
                    assert t_dim.name in dv.dimensions
                    assert dv.size == t_dim.size
            else:
                # This `time` being two dimensional is unique to IncompleteMultidimensionalTrajectory
                assert len(t.dimensions) == 2
                t_dim = dsg.dimensions[t.dimensions[0]]
                o_dim = dsg.dimensions[t.dimensions[1]]
                for dv in dsg.data_vars():
                    assert dv.size == t.size
                    assert len(dv.dimensions) == 2
                    assert t_dim.name in dv.dimensions
                    assert o_dim.name in dv.dimensions
                    assert dv.size == t_dim.size * o_dim.size

        except BaseException:
            if strict is True:
                raise
            return False

        return True

示例#11

0

显示文件

    def to_dataframe(self, clean_cols=True, clean_rows=True):
        # Z
        zvar = self.z_axes()[0]
        z = np.ma.fix_invalid(np.ma.MaskedArray(zvar[:]))
        z = z.flatten().round(5)
        logger.debug(['z data size: ', z.size])

        # T
        tvar = self.t_axes()[0]
        t = np.ma.MaskedArray(nc4.num2date(tvar[:], tvar.units, getattr(tvar, 'calendar', 'standard'))).flatten()
        # Patch the time variable back to its original mask, since num2date
        # breaks any missing/fill values
        if hasattr(tvar[0], 'mask'):
            t.mask = tvar[:].mask
        logger.debug(['time data size: ', t.size])

        # X
        xvar = self.x_axes()[0]
        x = np.ma.fix_invalid(np.ma.MaskedArray(xvar[:])).flatten().round(5)
        logger.debug(['x data size: ', x.size])

        # Y
        yvar = self.y_axes()[0]
        y = np.ma.fix_invalid(np.ma.MaskedArray(yvar[:])).flatten().round(5)
        logger.debug(['y data size: ', y.size])

        # Trajectories
        pvar = self.filter_by_attrs(cf_role='trajectory_id')[0]

        try:
            p = normalize_array(pvar)
        except BaseException:
            logger.exception('Could not pull trajectory values from the variable, using indexes.')
            p = np.asarray(list(range(len(pvar))), dtype=np.integer)

        # The Dimension that the trajectory id variable doesn't have is what
        # the trajectory data needs to be repeated by
        dim_diff = self.dimensions[list(set(tvar.dimensions).difference(set(pvar.dimensions)))[0]]
        if dim_diff:
            p = p.repeat(dim_diff.size)
        logger.debug(['trajectory data size: ', p.size])

        # Distance
        d = np.append([0], great_distance(start_latitude=y[0:-1], end_latitude=y[1:], start_longitude=x[0:-1], end_longitude=x[1:])['distance'])
        d = np.ma.fix_invalid(np.ma.MaskedArray(np.cumsum(d)).astype(np.float64).round(2))
        logger.debug(['distance data size: ', d.size])

        df_data = {
            't': t,
            'x': x,
            'y': y,
            'z': z,
            'trajectory': p,
            'distance': d
        }

        building_index_to_drop = np.ones(t.size, dtype=bool)
        extract_vars = list(set(self.data_vars() + self.ancillary_vars()))
        for i, dvar in enumerate(extract_vars):
            vdata = np.ma.fix_invalid(np.ma.MaskedArray(dvar[:].round(3).flatten()))
            building_index_to_drop = (building_index_to_drop == True) & (vdata.mask == True)  # noqa
            df_data[dvar.name] = vdata

        df = pd.DataFrame(df_data)

        # Drop all data columns with no data
        if clean_cols:
            df = df.dropna(axis=1, how='all')

        # Drop all data rows with no data variable data
        if clean_rows:
            df = df.iloc[~building_index_to_drop]

        return df

示例#12

0

显示文件

    def to_dataframe(self, clean_cols=False, clean_rows=False):

        # Don't pass around the attributes store them in the class

        # T
        tvar = self.t_axes()[0]
        t = nc4.num2date(tvar[:], tvar.units, getattr(tvar, 'calendar', 'standard'))
        if isinstance(t, datetime):
            # Size one
            t = np.array([t.isoformat()], dtype='datetime64')
        logger.debug(['time data size: ', t.size])

        svar = self.filter_by_attrs(cf_role='timeseries_id')[0]
        # Stations
        # TODO: Make sure there is a test for a file with multiple time variables
        try:
            s = normalize_array(svar)
        except ValueError:
            s = np.asarray(list(range(len(svar))), dtype=np.integer)
        s = np.repeat(s, t.size)
        logger.debug(['station data size: ', s.size])

        # X
        xvar = self.x_axes()[0]
        x = generic_masked(xvar[:].repeat(t.size), attrs=self.vatts(xvar.name)).round(5)
        logger.debug(['x data size: ', x.size])

        # Y
        yvar = self.y_axes()[0]
        y = generic_masked(yvar[:].repeat(t.size), attrs=self.vatts(yvar.name)).round(5)
        logger.debug(['y data size: ', y.size])

        # Z
        zvar = self.z_axes()[0]
        z = generic_masked(zvar[:].repeat(t.size), attrs=self.vatts(zvar.name))
        logger.debug(['z data size: ', z.size])

        # now repeat t per station

        # figure out if this is a single-station file
        # do this by checking the dimensions of the Z var
        if zvar.ndim == 1:
            t = np.repeat(t, len(svar))

        df_data = {
            't': t,
            'x': x,
            'y': y,
            'z': z,
            'station': s,
        }

        #building_index_to_drop = np.ones(t.size, dtype=bool)
        extract_vars = copy(self.variables)
        del extract_vars[svar.name]
        del extract_vars[xvar.name]
        del extract_vars[yvar.name]
        del extract_vars[zvar.name]
        del extract_vars[tvar.name]

        for i, (dnam, dvar) in enumerate(extract_vars.items()):
            if dvar[:].flatten().size > t.size:
                logger.warning("Variable {} is not the correct size, skipping.".format(dnam))
                continue

            vdata = generic_masked(dvar[:].flatten(), attrs=self.vatts(dnam))
            if vdata.size == 1:
                vdata = vdata[0]
            #building_index_to_drop = (building_index_to_drop == True) & (vdata.mask == True)  # noqa
            try:
                if re.match(r'.* since .*',dvar.units):
                    vdata = nc4.num2date(vdata[:], dvar.units, getattr(dvar, 'calendar', 'standard'))
            except AttributeError:
                pass
            df_data[dnam] = vdata
            #logger.info('{} - {}'.format(dnam, vdata.shape))

        df = pd.DataFrame()
        for k, v in df_data.items():
            df[k] = v

        # Drop all data columns with no data
        if clean_cols:
            df = df.dropna(axis=1, how='all')

        # Drop all data rows with no data variable data
        #if clean_rows:
        #    df = df.iloc[~building_index_to_drop]

        return df