示例#1
0
    def read(self, datasets_to_read=None, vars_to_retrieve=None, **kwargs):
        """Read observations

        Iter over all datasets in :attr:`datasets_to_read`, call 
        :func:`read_dataset` and append to data object
        
        Example
        -------
        >>> import pyaerocom.io.readungridded as pio
        >>> from pyaerocom import const
        >>> obj = pio.ReadUngridded(dataset_to_read=const.AERONET_SUN_V3L15_AOD_ALL_POINTS_NAME)
        >>> obj.read()
        >>> print(obj)
        >>> print(obj.metadata[0.]['latitude'])
        """
        if datasets_to_read is not None:
            self.datasets_to_read = datasets_to_read
        if vars_to_retrieve is not None:
            self.vars_to_retrieve = vars_to_retrieve

        data = UngriddedData()
        for ds in self.datasets_to_read:
            self.logger.info('Reading {} data'.format(ds))
            data.append(self.read_dataset(ds, vars_to_retrieve, **kwargs))
            self.logger.info('Successfully imported {} data'.format(ds))
        self.data = data
        return data
示例#2
0
    def read(self,
             datasets_to_read=None,
             vars_to_retrieve=None,
             only_cached=False,
             **kwargs):
        """Read observations

        Iter over all datasets in :attr:`datasets_to_read`, call
        :func:`read_dataset` and append to data object

        Parameters
        ----------
        datasets_to_read : str or list
            data ID or list of all datasets to be imported
        vars_to_retrieve : str or list
            variable or list of variables to be imported
        only_cached : bool
            if True, then nothing is reloaded but only data is loaded that is
            available as cached objects (not recommended to use but may be
            used if working offline without connection to database)

        Example
        -------
        >>> import pyaerocom.io.readungridded as pio
        >>> from pyaerocom import const
        >>> obj = pio.ReadUngridded(dataset_to_read=const.AERONET_SUN_V3L15_AOD_ALL_POINTS_NAME)
        >>> obj.read()
        >>> print(obj)
        >>> print(obj.metadata[0.]['latitude'])

        """
        if datasets_to_read is not None:
            self.datasets_to_read = datasets_to_read
        if vars_to_retrieve is not None:
            self.vars_to_retrieve = vars_to_retrieve

        data = UngriddedData()
        for ds in self.datasets_to_read:
            read_vars = self._get_vars_to_retrieve(ds)
            self.logger.info('Reading {} data, variables: {}'.format(
                ds, read_vars))
            if ds in self.post_compute:
                data.append(
                    self.read_dataset_post(ds,
                                           read_vars,
                                           only_cached=only_cached,
                                           **kwargs))
            else:
                data.append(
                    self.read_dataset(ds,
                                      read_vars,
                                      only_cached=only_cached,
                                      **kwargs))

            self.logger.info('Successfully imported {} data'.format(ds))
        return data
示例#3
0
    def read(self, vars_to_retrieve=None, files=None, first_file=None,
             last_file=None, file_pattern=None, common_meta=None):
        """Method that reads list of files as instance of :class:`UngriddedData`

        Parameters
        ----------
        vars_to_retrieve : :obj:`list` or similar, optional,
            list containing variable IDs that are supposed to be read. If None,
            all variables in :attr:`PROVIDES_VARIABLES` are loaded
        files : :obj:`list`, optional
            list of files to be read. If None, then the file list is used that
            is returned on :func:`get_file_list`.
        first_file : :obj:`int`, optional
            index of first file in file list to read. If None, the very first
            file in the list is used. Note: is ignored if input parameter
            `file_pattern` is specified.
        last_file : :obj:`int`, optional
            index of last file in list to read. If None, the very last file
            in the list is used. Note: is ignored if input parameter
            `file_pattern` is specified.
        file_pattern : str, optional
            string pattern for file search (cf :func:`get_file_list`)
        common_meta : dict, optional
            dictionary that contains additional metadata shared for this
            network (assigned to each metadata block of the
            :class:`UngriddedData` object that is returned)

        Returns
        -------
        UngriddedData
            data object
        """
        if common_meta is None:
            common_meta = {}
        if vars_to_retrieve is None:
            vars_to_retrieve = self.DEFAULT_VARS
        elif isinstance(vars_to_retrieve, str):
            vars_to_retrieve = [vars_to_retrieve]
        vars_to_retrieve = varlist_aerocom(vars_to_retrieve)
        if files is None:
            if len(self.files) == 0:
                self.get_file_list(pattern=file_pattern)
            files = self.files

        if file_pattern is None:
            if first_file is None:
                first_file = 0
            if last_file is None:
                last_file = len(files)

            files = files[first_file:last_file]

        self.read_failed = []

        data_obj = UngriddedData()
        meta_key = 0.0
        idx = 0

        #assign metadata object
        metadata = data_obj.metadata
        meta_idx = data_obj.meta_idx

        num_vars = len(vars_to_retrieve)
        num_files = len(files)
        print_log.info('Reading AERONET data')
        for i in tqdm(range(num_files)):

            _file = files[i]
            station_data = self.read_file(_file,
                                          vars_to_retrieve=vars_to_retrieve)
            # Fill the metatdata dict
            # the location in the data set is time step dependant!
            # use the lat location here since we have to choose one location
            # in the time series plot
            meta = od()
            meta['var_info'] = od()
            meta.update(station_data.get_meta())
            #metadata[meta_key].update(station_data.get_station_coords())
            meta['data_id'] = self.data_id
            meta['ts_type'] = self.TS_TYPE
            #meta['variables'] = vars_to_retrieve
            if 'instrument_name' in station_data and station_data['instrument_name'] is not None:
                instr = station_data['instrument_name']
            else:
                instr = self.INSTRUMENT_NAME
            meta['instrument_name'] = instr
            meta['data_revision'] = self.data_revision
            meta['filename'] = _file

            meta.update(**common_meta)
            # this is a list with indices of this station for each variable
            # not sure yet, if we really need that or if it speeds up things
            meta_idx[meta_key] = od()

            num_times = len(station_data['dtime'])

            #access array containing time stamps
            # TODO: check using index instead (even though not a problem here
            # since all Aerocom data files are of type timeseries)
            times = np.float64(station_data['dtime'])

            totnum = num_times * num_vars

            #check if size of data object needs to be extended
            if (idx + totnum) >= data_obj._ROWNO:
                #if totnum < data_obj._CHUNKSIZE, then the latter is used
                data_obj.add_chunk(totnum)

            for var_idx, var in enumerate(vars_to_retrieve):
                values = station_data[var]
                start = idx + var_idx * num_times
                stop = start + num_times

                #write common meta info for this station (data lon, lat and
                #altitude are set to station locations)
                data_obj._data[start:stop,
                               data_obj._LATINDEX] = station_data['latitude']
                data_obj._data[start:stop,
                               data_obj._LONINDEX] = station_data['longitude']
                data_obj._data[start:stop,
                               data_obj._ALTITUDEINDEX] = station_data['altitude']
                data_obj._data[start:stop,
                               data_obj._METADATAKEYINDEX] = meta_key

                # write data to data object
                data_obj._data[start:stop, data_obj._TIMEINDEX] = times
                data_obj._data[start:stop, data_obj._DATAINDEX] = values
                data_obj._data[start:stop, data_obj._VARINDEX] = var_idx

                meta_idx[meta_key][var] = np.arange(start, stop)

                if var in station_data['var_info']:
                    if 'units' in station_data['var_info'][var]:
                        u = station_data['var_info'][var]['units']
                    elif 'unit' in station_data['var_info'][var]:
                        from pyaerocom.exceptions import MetaDataError
                        raise MetaDataError('Metadata attr unit is deprecated, '
                                            'please use units')
                    else:
                        u = self.DEFAULT_UNIT
                elif var in self.UNITS:
                    u = self.UNITS[var]
                else:
                    u = self.DEFAULT_UNIT
                meta['var_info'][var] = od(units=u)
                if not var in data_obj.var_idx:
                    data_obj.var_idx[var] = var_idx

            idx += totnum
            metadata[meta_key] = meta
            meta_key = meta_key + 1.

        # shorten data_obj._data to the right number of points
        data_obj._data = data_obj._data[:idx]
        #data_obj.data_revision[self.data_id] = self.data_revision
        self.data = data_obj
        return data_obj
示例#4
0
    def read(self, vars_to_retrieve=None, files=[], first_file=None,
             last_file=None, file_pattern=None, list_coda_paths=False,
             local_temp_dir=None):
        """Method that reads list of files as instance of :class:`UngriddedData`

        Parameters
        ----------
        vars_to_retrieve : :obj:`list` or similar, optional,
            list containing variable IDs that are supposed to be read. If None,
            all variables in :attr:`PROVIDES_VARIABLES` are loaded
        files : :obj:`list`, optional
            list of files to be read. If None, then the file list is used that
            is returned on :func:`get_file_list`.
        first_file : :obj:`int`, optional
            index of first file in file list to read. If None, the very first
            file in the list is used. Note: is ignored if input parameter
            `file_pattern` is specified.
        last_file : :obj:`int`, optional
            index of last file in list to read. If None, the very last file
            in the list is used. Note: is ignored if input parameter
            `file_pattern` is specified.
        file_pattern : str, optional
            string pattern for file search (cf :func:`get_file_list`)
        :param local_temp_dir:

        Returns
        -------
        UngriddedData
            data object

        Example:
        >>> import pyaerocom as pya
        >>> obj = pya.io.read_aeolus_l2a_data.ReadL2Data()
        >>> testfiles = []
        >>> testfiles.append('/lustre/storeB/project/fou/kl/admaeolus/data.rev.2A02/download/2018-12/01/AE_OPER_ALD_U_N_2A_20181201T033526026_005423993_001590_0001.TGZ')
        >>> data=obj.read(files=testfiles)
        >>> data=obj.read(files=testfiles, vars_to_retrieve='ec355aer')

        """

        import pathlib
        import tarfile
        import os
        import coda

        if local_temp_dir is None:
            local_temp_dir = self.LOCAL_TMP_DIR

        if vars_to_retrieve is None:
            vars_to_retrieve = self.DEFAULT_VARS
        elif isinstance(vars_to_retrieve, str):
            vars_to_retrieve = [vars_to_retrieve]

        if files is None:
            if len(self.files) == 0:
                self.get_file_list(pattern=file_pattern)
            files = self.files

        if file_pattern is None:
            if first_file is None:
                first_file = 0
            if last_file is None:
                last_file = len(files)

            files = files[first_file:last_file]

        self.read_failed = []
        temp_files = {}

        data_obj = UngriddedData(num_points=self._COLNO, chunksize=self._CHUNKSIZE)
        meta_key = 0.0
        idx = 0

        # check if the supplied file is a supported archive file (tar in this case)
        # and extract the files with supported suffixes to const._cachedir
        non_archive_files = []
        for idx, _file in enumerate(sorted(files)):
            # temp = 'reading file: {}'.format(_file)

            self.logger.info('file: {}'.format(_file))
            suffix = pathlib.Path(_file).suffix
            if suffix in self.SUPPORTED_ARCHIVE_SUFFIXES:
                temp = 'opening archive file; using {} as temp dir.'.format(local_temp_dir)
                self.logger.info(temp)
                # untar archive files first
                tarhandle = tarfile.open(_file)
                files_in_tar = tarhandle.getnames()
                for file_in_tar in files_in_tar:
                    if pathlib.Path(file_in_tar).suffix in self.SUPPORTED_SUFFIXES:
                        # extract file to tmp path
                        member = tarhandle.getmember(file_in_tar)
                        temp = 'extracting file {}...'.format(member.name)
                        self.logger.info(temp)
                        tarhandle.extract(member, path=local_temp_dir, set_attrs=False)
                        extract_file = os.path.join(local_temp_dir, member.name)
                        non_archive_files.append(extract_file)
                        temp_files[extract_file] = True
                tarhandle.close()
            else:
                non_archive_files.append(_file)

        for idx, _file in enumerate(sorted(non_archive_files)):
            # list coda data paths in the 1st file in case the user asked for that
            if idx == 0 and list_coda_paths:
                pass
                coda_handle = coda.open(_file)
                root_field_names = coda.get_field_names(coda_handle)
                for field in root_field_names:
                    print(field)
                coda.close(coda_handle)
                data_obj = None
                return data_obj

            file_data = self.read_file(_file, vars_to_retrieve=vars_to_retrieve,
                                       loglevel=logging.INFO, return_as='numpy')
            self.logger.info('{} points read'.format(file_data.shape[0]))
            # the metadata dict is left empty for L2 data
            # the location in the data set is time step dependant!
            if idx == 0:
                data_obj._data = file_data

            else:
                data_obj._data = np.append(data_obj._data, file_data, axis=0)

            data_obj._idx = data_obj._data.shape[0] + 1
            file_data = None
            # remove file if it was temporary one
            if _file in temp_files:
                os.remove(_file)
            #     pass
            # tmp_obj = UngriddedData()
            # tmp_obj._data = file_data
            # tmp_obj._idx = data_obj._data.shape[0] + 1
            # data_obj.append(tmp_obj)

        self.logger.info('size of data object: {}'.format(data_obj._idx - 1))
        return data_obj
示例#5
0
    def read(self, vars_to_retrieve=None, files=None, first_file=None,
             last_file=None, pattern=None, check_time=True, **kwargs):
        """Read data files into `UngriddedData` object

        Parameters
        ----------
        vars_to_retrieve : :obj:`list` or similar, optional,
            list containing variable IDs that are supposed to be read. If None,
            all variables in :attr:`PROVIDES_VARIABLES` are loaded
        files : :obj:`list`, optional
            list of files to be read. If None, then the file list is used that
            is returned on :func:`get_file_list`.
        first_file : :obj:`int`, optional
            index of first file in file list to read. If None, the very first
            file in the list is used
        last_file : :obj:`int`, optional
            index of last file in list to read. If None, the very last file
            in the list is used
         file_pattern : str, optional
            string pattern for file search (cf :func:`get_file_list`)

        Returns
        -------
        UngriddedData
            data object
        """
        if vars_to_retrieve is None:
            vars_to_retrieve = self.DEFAULT_VARS
        elif isinstance(vars_to_retrieve, str):
            vars_to_retrieve = [vars_to_retrieve]

        # make sure to use AeroCom variable names in output data
        vars_to_retrieve = varlist_aerocom(vars_to_retrieve)

        vars_to_read, vars_to_compute = self.check_vars_to_retrieve(vars_to_retrieve)

        if files is None:
            files = self.get_file_list(vars_to_read, pattern=pattern)
        elif isinstance(files, str):
            files = [files]

        if first_file is None:
            first_file = 0
        if last_file is None:
            last_file = len(files)

        files = files[first_file:last_file]

        data_obj = UngriddedData(num_points=1000000)

        meta_key = -1.0
        idx = 0

        #assign metadata object
        metadata = data_obj.metadata
        meta_idx = data_obj.meta_idx
        var_count_glob = -1
        rename = self.var_names_data_inv
        from tqdm import tqdm
        for i in tqdm(range(len(files))):
            _file = files[i]
            metafile = self.get_meta_filename(_file)
            var_to_read = metafile['var_name']
            begin = metafile['start']
            end = metafile['stop']

            var_read = rename[var_to_read]
            stats = self.read_file(_file, var_to_read=var_to_read,
                                   var_to_write=var_read, **kwargs)

            stats, added = self.compute_additional_vars(stats, vars_to_compute)
            if len(stats) == 0:
                const.logger.info('File {} does not contain any of the input '
                                  'variables {}'
                                  .format(_file, vars_to_retrieve))
            vars_avail = [var_read] + added
            vars_to_add = list(np.intersect1d(vars_to_retrieve, vars_avail))
            if len(vars_to_add) == 0:
                continue
            chunksize = 500000
            for stat in stats:
                meta_key += 1
                meta_idx[meta_key] = {}

                meta = stat['meta']
                vi = meta['var_info']

                meta['var_info'] = {}

                metadata[meta_key] = meta
                metadata[meta_key]['data_id'] = self.data_id
                # duplicate for now
                metadata[meta_key]['instrument_name'] = meta['measuring_instrument_name']
                statname = metadata[meta_key]['station_name']
                if '/' in statname:
                    statname = statname.replace('/','-')
                metadata[meta_key]['station_name'] = statname

                times = stat['time'].astype('datetime64[s]')
                timenums = np.float64(times)

                if check_time and (begin > times[0] or end < times[-1]):
                    raise ValueError('Something seems to be off with time '
                                     'dimension...')

                num_vars = len(vars_to_add)
                num_times = len(times)

                totnum = num_times * num_vars

                #check if size of data object needs to be extended
                if (idx + totnum) >= data_obj._ROWNO:
                    #if totnum < data_obj._CHUNKSIZE, then the latter is used
                    data_obj.add_chunk(chunksize)

                for j, var_to_write in enumerate(vars_to_add):
                    values = stat[var_to_write]

                    start = idx + j*num_times
                    stop = start + num_times

                    if not var_to_write in data_obj.var_idx:
                        var_count_glob += 1
                        var_idx = var_count_glob
                        data_obj.var_idx[var_to_write] = var_idx
                    else:
                        var_idx = data_obj.var_idx[var_to_write]

                    meta['var_info'][var_to_write] = vi[var_to_write]
                    #write common meta info for this station (data lon, lat and
                    #altitude are set to station locations)
                    data_obj._data[start:stop,
                                   data_obj._LATINDEX] = meta['latitude']
                    data_obj._data[start:stop,
                                   data_obj._LONINDEX] = meta['longitude']
                    data_obj._data[start:stop,
                                   data_obj._ALTITUDEINDEX] = meta['altitude']
                    data_obj._data[start:stop,
                                   data_obj._METADATAKEYINDEX] = meta_key

                    # write data to data object
                    data_obj._data[start:stop, data_obj._TIMEINDEX] = timenums

                    data_obj._data[start:stop, data_obj._DATAINDEX] = values

                    # add invalid measurements
                    invalid = stat['data_flagged'][var_to_write]
                    data_obj._data[start:stop, data_obj._DATAFLAGINDEX] = invalid

                    data_obj._data[start:stop, data_obj._VARINDEX] = var_idx

                    meta_idx[meta_key][var_to_write] = np.arange(start, stop)

                idx += totnum

        data_obj._data = data_obj._data[:idx]
        data_obj._check_index()
        return data_obj
示例#6
0
    def read_dataset_post(self,
                          dataset_to_read,
                          vars_to_retrieve,
                          only_cached=False,
                          **kwargs):
        """Read dataset into an instance of :class:`ReadUngridded`

        Parameters
        ----------
        dataset_to_read : str
            name of dataset
        vars_to_retrieve : list
            variable or list of variables to be imported
        only_cached : bool
            if True, then nothing is reloaded but only data is loaded that is
            available as cached objects (not recommended to use but may be
            used if working offline without connection to database)
        **kwargs
            additional reading constraints. If any are provided, caching is
            deactivated and the data will be read from disk.

        Returns
        --------
        UngriddedData
            data object
        """
        aux_info = self.post_compute[dataset_to_read]
        loaded = []
        for var in vars_to_retrieve:
            input_data_ids_vars = []
            aux_info_var = aux_info['aux_requires'][var]
            for aux_id, aux_vars in aux_info_var.items():
                if aux_id in self.post_compute:
                    aux_data = self.read_dataset_post(
                        dataset_to_read=aux_id,
                        vars_to_retrieve=aux_vars,
                        only_cached=only_cached,
                        **kwargs)

                else:
                    aux_data = self.read_dataset(aux_id,
                                                 aux_vars,
                                                 only_cached=only_cached,
                                                 **kwargs)
                for aux_var in aux_vars:
                    input_data_ids_vars.append((aux_data, aux_id, aux_var))

            aux_merge_how = aux_info['aux_merge_how'][var]

            if var in aux_info['aux_units']:
                var_unit_out = aux_info['aux_units'][var]
            else:
                var_unit_out = None

            if aux_merge_how == 'eval':
                # function MUST be defined
                aux_fun = aux_info['aux_funs'][var]
            else:
                aux_fun = None

            merged_stats = combine_vardata_ungridded(
                data_ids_and_vars=input_data_ids_vars,
                merge_eval_fun=aux_fun,
                merge_how=aux_merge_how,
                var_name_out=var,
                var_unit_out=var_unit_out,
                data_id_out=aux_info['data_id'])
            loaded.append(UngriddedData.from_station_data(merged_stats))
        first = loaded[0]
        if len(loaded) == 1:
            return first
        for data in loaded[1:]:
            first.append(data)
        return first
示例#7
0
    def read(self,
             vars_to_retrieve=None,
             files=[],
             first_file=None,
             last_file=None,
             file_pattern=None,
             list_coda_paths=False,
             local_temp_dir=None,
             return_as='numpy',
             apply_quality_flag=0.0):
        """Method that reads list of files as instance of :class:`UngriddedData`

        Parameters
        ----------
        vars_to_retrieve : :obj:`list` or similar, optional,
            list containing variable IDs that are supposed to be read. If None,
            all variables in :attr:`PROVIDES_VARIABLES` are loaded
        files : :obj:`list`, optional
            list of files to be read. If None, then the file list is used that
            is returned on :func:`get_file_list`.
        first_file : :obj:`int`, optional
            index of first file in file list to read. If None, the very first
            file in the list is used. Note: is ignored if input parameter
            `file_pattern` is specified.
        last_file : :obj:`int`, optional
            index of last file in list to read. If None, the very last file
            in the list is used. Note: is ignored if input parameter
            `file_pattern` is specified.
        file_pattern : str, optional
            string pattern for file search (cf :func:`get_file_list`)
        :param local_temp_dir:

        Returns
        -------
        UngriddedData
            data object

        Example:
        >>> import pyaerocom as pya
        >>> obj = pya.io.read_aeolus_l2a_data.ReadL2Data()
        >>> testfiles = []
        >>> testfiles.append('/lustre/storeB/project/fou/kl/admaeolus/data.rev.2A02/download/2018-12/01/AE_OPER_ALD_U_N_2A_20181201T033526026_005423993_001590_0001.TGZ')
        >>> data=obj.read(files=testfiles)
        >>> data=obj.read(files=testfiles, vars_to_retrieve='ec355aer')

        """

        import pathlib
        import tarfile
        import os
        import coda

        if local_temp_dir is None:
            local_temp_dir = self.LOCAL_TMP_DIR

        if vars_to_retrieve is None:
            vars_to_retrieve = self.DEFAULT_VARS
        elif isinstance(vars_to_retrieve, str):
            vars_to_retrieve = [vars_to_retrieve]

        if files is None:
            if len(self.files) == 0:
                self.get_file_list(pattern=file_pattern)
            files = self.files

        if file_pattern is None:
            if first_file is None:
                first_file = 0
            if last_file is None:
                last_file = len(files)

            files = files[first_file:last_file]

        self.read_failed = []
        temp_files = {}

        data_obj = UngriddedData(num_points=self._CHUNKSIZE)
        meta_key = 0.0
        idx = 0

        # check if the supplied file is a supported archive file (tar in this case)
        # and extract the files with supported suffixes to const.CACHEDIR
        non_archive_files = []
        for idx, _file in enumerate(sorted(files)):
            # temp = 'reading file: {}'.format(_file)

            self.logger.info('file: {}'.format(_file))
            suffix = pathlib.Path(_file).suffix
            if suffix in self.SUPPORTED_ARCHIVE_SUFFIXES:
                temp = 'opening archive file; using {} as temp dir.'.format(
                    local_temp_dir)
                self.logger.info(temp)
                # untar archive files first
                tarhandle = tarfile.open(_file)
                files_in_tar = tarhandle.getnames()
                for file_in_tar in files_in_tar:
                    if pathlib.Path(
                            file_in_tar).suffix in self.SUPPORTED_SUFFIXES:
                        # extract file to tmp path
                        member = tarhandle.getmember(file_in_tar)
                        temp = 'extracting file {}...'.format(member.name)
                        self.logger.info(temp)
                        tarhandle.extract(member,
                                          path=local_temp_dir,
                                          set_attrs=False)
                        extract_file = os.path.join(local_temp_dir,
                                                    member.name)
                        non_archive_files.append(extract_file)
                        temp_files[extract_file] = True
                tarhandle.close()
            else:
                non_archive_files.append(_file)

        for idx, _file in enumerate(sorted(non_archive_files)):
            # list coda data paths in the 1st file in case the user asked for that
            if idx == 0 and list_coda_paths:
                pass
                coda_handle = coda.open(_file)
                root_field_names = coda.get_field_names(coda_handle)
                for field in root_field_names:
                    print(field)
                coda.close(coda_handle)
                data_obj = None
                return data_obj

            file_data = self.read_file(_file,
                                       vars_to_retrieve=vars_to_retrieve,
                                       loglevel=logging.INFO,
                                       return_as=return_as)
            if return_as == 'numpy':
                self.logger.info('{} points read'.format(file_data.shape[0]))
                # the metadata dict is left empty for L2 data
                # the location in the data set is time step dependant!
                if idx == 0:
                    data_obj._data = file_data

                else:
                    data_obj._data = np.append(data_obj._data,
                                               file_data,
                                               axis=0)

                data_obj._idx = data_obj._data.shape[0] + 1
                file_data = None
                # remove file if it was temporary one
                if _file in temp_files:
                    os.remove(_file)
                #     pass
                # tmp_obj = UngriddedData()
                # tmp_obj._data = file_data
                # tmp_obj._idx = data_obj._data.shape[0] + 1
                # data_obj.append(tmp_obj)
                self.logger.info(
                    'size of data object: {}'.format(data_obj._idx - 1))
            elif return_as == 'dict':
                if idx == 0:
                    data_obj._data = {}
                    shape_store = {}
                    index_store = {}
                    file_start_index_arr = [0]
                    # apply quality flags
                    if apply_quality_flag > 0.:
                        qflags = file_data[self._QANAME]
                        keep_indexes = np.where(qflags >= apply_quality_flag)
                        elements_to_add = keep_indexes.size
                    else:
                        keep_indexes = np.arange(0,
                                                 len(file_data[self._QANAME]))
                        elements_to_add = file_data[self._QANAME].shape[0]

                    for _key in file_data:
                        # print('key: {}'.format(_key))
                        shape_store[_key] = file_data[_key].shape
                        index_store[_key] = file_data[_key].shape[0]
                        input_shape = list(file_data[_key].shape)
                        input_shape[0] = self._ROWNO
                        data_obj._data[_key] = np.empty(input_shape,
                                                        dtype=np.float_)
                        if len(input_shape) == 1:
                            data_obj._data[_key][0:file_data[_key].
                                                 shape[0]] = file_data[_key]
                        elif len(input_shape) == 2:
                            data_obj._data[_key][0:file_data[_key].
                                                 shape[0], :] = file_data[_key]
                        elif len(input_shape) == 3:
                            data_obj._data[_key][
                                0:file_data[_key].
                                shape[0], :, :] = file_data[_key]
                        elif len(input_shape) == 4:
                            data_obj._data[_key][
                                0:file_data[_key].
                                shape[0], :, :, :] = file_data[_key]
                        else:
                            pass

                # 2nd + file
                else:
                    if apply_quality_flag > 0.:
                        qflags = file_data[self._QANAME]
                        keep_indexes = np.where(qflags >= apply_quality_flag)
                        elements_to_add = keep_indexes.size

                    file_start_index_arr.append(
                        file_data[self.TSSIZENAME].shape[0])
                    for _key in file_data:
                        if _key in self.STATICFIELDNAMES:
                            print('key: {}'.format(_key))
                            continue
                        # shape_store[_key] = file_data[_key].shape
                        elements_to_add = file_data[_key].shape[0]
                        # extend data_obj._data[_key] if necessary
                        if index_store[_key] + elements_to_add > data_obj._data[
                                _key].shape[0]:
                            current_shape = list(data_obj._data[_key].shape)
                            current_shape[
                                0] = current_shape[0] + self._CHUNKSIZE
                            tmp_data = np.empty(current_shape, dtype=np.float_)
                            if len(current_shape) == 1:
                                tmp_data[0:data_obj._data[_key].
                                         shape[0]] = data_obj._data[_key]
                            elif len(current_shape) == 2:
                                tmp_data[0:data_obj._data[_key].
                                         shape[0], :] = data_obj._data[_key]
                            elif len(current_shape) == 3:
                                tmp_data[0:data_obj._data[_key].
                                         shape[0], :, :] = data_obj._data[_key]
                            elif len(current_shape) == 4:
                                tmp_data[
                                    0:data_obj._data[_key].
                                    shape[0], :, :, :] = data_obj._data[_key]
                            else:
                                pass

                        input_shape = list(file_data[_key].shape)
                        if len(input_shape) == 1:
                            data_obj._data[_key][
                                index_store[_key]:index_store[_key] +
                                file_data[_key].shape[0]] = file_data[_key]
                        elif len(input_shape) == 2:
                            data_obj._data[_key][
                                index_store[_key]:index_store[_key] +
                                file_data[_key].shape[0], :] = file_data[_key]
                        elif len(input_shape) == 3:
                            data_obj._data[_key][
                                index_store[_key]:index_store[_key] +
                                file_data[_key].
                                shape[0], :, :] = file_data[_key]
                        elif len(input_shape) == 4:
                            data_obj._data[_key][
                                index_store[_key]:index_store[_key] +
                                file_data[_key].
                                shape[0], :, :, :] = file_data[_key]
                        else:
                            pass
                        index_store[_key] += elements_to_add

                file_data = None
                # remove file if it was temporary one
                if _file in temp_files:
                    os.remove(_file)
            else:
                pass

        # now shorten the data dict to the necessary size
        if return_as == 'dict':
            for _key in data_obj._data:
                data_obj._data[_key] = data_obj._data[_key][:index_store[_key]]
            data_obj._data['file_indexes'] = file_start_index_arr

            # apply the quality flags
            if apply_quality_flag > 0.:
                pass

        return data_obj
示例#8
0
    def read_dataset(self,
                     dataset_to_read,
                     vars_to_retrieve=None,
                     only_cached=False,
                     **kwargs):
        """Read dataset into an instance of :class:`ReadUngridded`

        Parameters
        ----------
        dataset_to_read : str
            name of dataset
        vars_to_retrieve : str or list
            variable or list of variables to be imported
        only_cached : bool
            if True, then nothing is reloaded but only data is loaded that is
            available as cached objects (not recommended to use but may be
            used if working offline without connection to database)
        **kwargs
            additional reading constraints. If any are provided, caching is
            deactivated and the data will be read from disk.

        Returns
        --------
        UngriddedData
            data object
        """
        _caching = None
        if len(kwargs) > 0:
            _caching = const.CACHING
            const.CACHING = False

            print_log.info('Received additional reading constraints, '
                           'ignoring caching')

        reader = self.get_reader(dataset_to_read)

        if vars_to_retrieve is not None:
            # Note: self.vars_to_retrieve may be None as well, then
            # default variables of each network are read
            self.vars_to_retrieve = vars_to_retrieve

        if self.vars_to_retrieve is None:
            self.vars_to_retrieve = reader.PROVIDES_VARIABLES

        vars_to_retrieve = varlist_aerocom(self.vars_to_retrieve)

        # data_dir will be None in most cases, but can be specified when
        # creating the instance, by default, data_dir is inferred automatically
        # in the reading class, using database location
        data_dir = self._get_data_dir(dataset_to_read)
        if data_dir is not None:
            if not os.path.exists(data_dir):
                raise FileNotFoundError(
                    'Trying to read {} from specified data_dir {} failed. '
                    'Directory does not exist'.format(dataset_to_read,
                                                      data_dir))
            reader._dataset_path = data_dir
            const.print_log.info(
                'Reading {} from specified data loaction: {}'.format(
                    dataset_to_read, data_dir))

        # Since this interface enables to load multiple datasets, each of
        # which support a number of variables, here, only the variables are
        # considered that are supported by the dataset
        vars_available = [
            var for var in vars_to_retrieve if reader.var_supported(var)
        ]
        if len(vars_available) == 0:
            raise DataRetrievalError('None of the input variables ({}) is '
                                     'supported by {} interface'.format(
                                         vars_to_retrieve, dataset_to_read))
        cache = CacheHandlerUngridded(reader)
        if not self.ignore_cache:
            # initate cache handler
            for var in vars_available:
                try:
                    cache.check_and_load(var, force_use_outdated=only_cached)
                except Exception:
                    self.logger.exception(
                        'Fatal: compatibility error between '
                        'old cache file {} and current version '
                        'of code ')

        if not only_cached:
            vars_to_read = [
                v for v in vars_available if not v in cache.loaded_data
            ]
        else:
            vars_to_read = []

        data_read = None
        if len(vars_to_read) > 0:

            _loglevel = print_log.level
            print_log.setLevel(logging.INFO)
            data_read = reader.read(vars_to_read, **kwargs)
            print_log.setLevel(_loglevel)

            for var in vars_to_read:
                # write the cache file
                if not self.ignore_cache:
                    try:
                        cache.write(data_read, var)
                    except Exception as e:
                        _caching = False
                        print_log.warning(
                            'Failed to write to cache directory. '
                            'Error: {}. Deactivating caching in '
                            'pyaerocom'.format(repr(e)))

        if len(vars_to_read) == len(vars_available):
            data_out = data_read
        else:
            data_out = UngriddedData()
            for var in vars_available:
                if var in cache.loaded_data:
                    data_out.append(cache.loaded_data[var])
            if data_read is not None:
                data_out.append(data_read)

        if _caching is not None:
            const.CACHING = _caching
        return data_out
示例#9
0
    def read(self, vars_to_retrieve=None):
        """ Method that reads list of files as instance of :class:`UngriddedData`

        Parameters
        ----------
        vars_to_retrieve : :obj:`list` or `str`:, optional,
            list containing variable IDs that are supposed to be read. If None,
            all variables in :attr:`PROVIDES_VARIABLES` are loaded

        files : :obj:`list`, optional
            list of files to be read. If None, then the file list is used that
            is returned on :func:`get_file_list`.

        Returns
        -------
        UngriddedData : :class:`UngriddedData`
            data object

        """

        files = self.get_file_list()
        if vars_to_retrieve is None:
            vars_to_retrieve = self.DEFAULT_VARS
        elif isinstance(vars_to_retrieve, str):
            vars_to_retrieve = [vars_to_retrieve]
        data_obj = UngriddedData()

        meta_key = 0.0
        idx = 0
        varindex = -1

        #assign metadata object
        metadata = data_obj.metadata  # OrderedDict
        meta_idx = data_obj.meta_idx  # OrderedDict

        for file in files:
            filename = os.path.basename(file)
            if not filename in self.FILES_CONTAIN:
                raise IOError(
                    'Invalid file name {}, this should not happen.'.format(
                        filename))
            var_matches = [
                var for var in vars_to_retrieve
                if var in self.FILES_CONTAIN[filename]
            ]
            if len(var_matches) == 0:
                continue
            stat_list = self.read_file(file, vars_to_retrieve=var_matches)
            for stat in stat_list:
                #self.counter += 1
                metadata[meta_key] = OrderedDict()
                metadata[meta_key].update(stat.get_meta())
                metadata[meta_key].update(stat.get_station_coords())
                metadata[meta_key]['data_id'] = self.data_id
                metadata[meta_key]['ts_type'] = self.TS_TYPE
                #metadata[meta_key]['variables'] = stat["variables"]
                # Is instrumentname
                if 'instrument_name' in stat and stat[
                        'instrument_name'] is not None:
                    instr = stat['instrument_name']
                else:
                    instr = self.INSTRUMENT_NAME

                metadata[meta_key]['instrument_name'] = instr
                metadata[meta_key]['data_revision'] = self.data_revision

                # this is a list with indices of this station for each variable
                # not sure yet, if we really need that or if it speeds up things
                meta_idx[meta_key] = OrderedDict()

                num_times = len(stat['dtime'])
                num_vars = len(stat["var_info"])
                temp_vars = list(stat["var_info"].keys())
                tconv = stat['dtime'].astype('datetime64[s]')
                times = np.float64(tconv)
                totnum = num_times * num_vars

                if (idx + totnum) >= data_obj._ROWNO:
                    # This results in a error because it doesn't want to multiply empty with nan
                    data_obj.add_chunk(totnum)

                metadata[meta_key]['var_info'] = OrderedDict()
                for var_count, var in enumerate(temp_vars):

                    values = stat[var]
                    start = idx + var_count * num_times
                    stop = start + num_times

                    if not var in data_obj.var_idx:
                        varindex += 1
                        data_obj.var_idx[var] = varindex
                        var_idx = varindex
                    else:
                        var_idx = data_obj.var_idx[var]

                    metadata[meta_key]['var_info'][var] = stat['var_info'][var]

                    data_obj._data[start:stop,
                                   data_obj._LATINDEX] = stat['latitude']
                    data_obj._data[start:stop,
                                   data_obj._LONINDEX] = stat['longitude']
                    data_obj._data[start:stop,
                                   data_obj._ALTITUDEINDEX] = stat['altitude']
                    data_obj._data[start:stop,
                                   data_obj._METADATAKEYINDEX] = meta_key

                    # write data to data object
                    data_obj._data[start:stop, data_obj._TIMEINDEX] = times
                    data_obj._data[start:stop, data_obj._DATAINDEX] = values
                    data_obj._data[start:stop, data_obj._VARINDEX] = var_idx

                    meta_idx[meta_key][var] = np.arange(start, stop)

                meta_key += 1
                idx += totnum

        data_obj._data = data_obj._data[:idx]
        # sanity check
        data_obj._check_index()
        self.data = data_obj  # initalizing a pointer to it selves
        return data_obj
示例#10
0
    def read(self,
             vars_to_retrieve=None,
             files=None,
             first_file=None,
             last_file=None):
        """Method that reads list of files as instance of :class:`UngriddedData`
        
        Parameters
        ----------
        vars_to_retrieve : :obj:`list` or similar, optional
            List containing variable IDs that are supposed to be read. If None, 
            all variables in :attr:`PROVIDES_VARIABLES` are loaded.
        files : :obj:`list`, optional
            List of files to be read. If None, then the file list used is the
            returned from :func:`get_file_list`.
        first_file : :obj:`int`, optional
            Index of the first file in :obj:'file' to be read. If None, the 
            very first file in the list is used.
        last_file : :obj:`int`, optional
            Index of the last file in :obj:'file' to be read. If None, the very
            last file in the list is used.
            
        Returns
        -------
        UngriddedData
            data object
        """

        if vars_to_retrieve is None:
            vars_to_retrieve = self.DEFAULT_VARS
        elif isinstance(vars_to_retrieve, str):
            vars_to_retrieve = [vars_to_retrieve]

        if files is None:
            if len(self.files) == 0:
                self.get_file_list()
            files = self.files

        if first_file is None:
            first_file = 0
        if last_file is None:
            last_file = len(files)

        files = files[first_file:last_file]

        data_obj = UngriddedData()
        meta_key = 0.0
        idx = 0

        # Assign metadata object and index
        metadata = data_obj.metadata
        meta_idx = data_obj.meta_idx

        for i, _file in enumerate(files):
            station_data = self.read_file(_file,
                                          vars_to_retrieve=vars_to_retrieve)

            # only the variables in the file
            num_vars = len(station_data.var_info.keys())

            # Fill the metadata dict.
            # The location in the data set is time step dependant
            metadata[meta_key] = od()
            metadata[meta_key].update(station_data.get_meta())
            metadata[meta_key].update(station_data.get_station_coords())
            metadata[meta_key]['variables'] = list(
                station_data.var_info.keys())  #vars_to_retrieve
            if ('instrument_name' in station_data
                    and station_data['instrument_name'] is not None):
                instr = station_data['instrument_name']
            else:
                instr = self.INSTRUMENT_NAME
            metadata[meta_key]['instrument_name'] = instr

            metadata[meta_key]['var_info'] = station_data['var_info']

            # List with indices of this station for each variable
            meta_idx[meta_key] = od()

            num_times = len(station_data['dtime'])

            totnum = num_times * num_vars

            # Check whether the size of the data object needs to be extended
            if (idx + totnum) >= data_obj._ROWNO:
                # if totnum < data_obj._CHUNKSIZE, then the latter is used
                data_obj.add_chunk(totnum)

            for var_idx, var in enumerate(list(station_data.var_info)):
                values = station_data[var]
                start = idx + var_idx * num_times
                stop = start + num_times

                # Write common meta info for this station (data lon, lat and
                # altitude are set to station locations)
                data_obj._data[start:stop,
                               data_obj._LATINDEX] = station_data['latitude']
                data_obj._data[start:stop,
                               data_obj._LONINDEX] = station_data['longitude']
                data_obj._data[
                    start:stop,
                    data_obj._ALTITUDEINDEX] = station_data['altitude']
                data_obj._data[start:stop,
                               data_obj._METADATAKEYINDEX] = meta_key
                data_obj._data[
                    start:stop,
                    data_obj._DATAHEIGHTINDEX] = station_data['dataaltitude']
                data_obj._data[start:stop,
                               data_obj._DATAERRINDEX] = station_data['sd']
                data_obj._data[start:stop,
                               data_obj._DATAFLAGINDEX] = station_data['f']
                data_obj._data[start:stop,
                               data_obj._TIMEINDEX] = station_data['dtime']
                data_obj._data[start:stop, data_obj._DATAINDEX] = values
                data_obj._data[start:stop, data_obj._VARINDEX] = var_idx
                meta_idx[meta_key][var] = np.arange(start, stop)

                if not var in data_obj.var_idx:
                    data_obj.var_idx[var] = var_idx

            idx += totnum
            meta_key = meta_key + 1.

        # Shorten data_obj._data to the right number of points
        data_obj._data = data_obj._data[:idx]
        #data_obj.data_revision[self.DATASET_NAME] = self.data_revision
        self.data = data_obj

        return data_obj
示例#11
0
    def read(self,
             vars_to_retrieve=None,
             files=None,
             first_file=None,
             last_file=None,
             read_err=None,
             remove_outliers=True,
             pattern=None):
        """Method that reads list of files as instance of :class:`UngriddedData`

        Parameters
        ----------
        vars_to_retrieve : :obj:`list` or similar, optional,
            list containing variable IDs that are supposed to be read. If None,
            all variables in :attr:`PROVIDES_VARIABLES` are loaded
        files : :obj:`list`, optional
            list of files to be read. If None, then the file list is used that
            is returned on :func:`get_file_list`.
        first_file : :obj:`int`, optional
            index of first file in file list to read. If None, the very first
            file in the list is used
        last_file : :obj:`int`, optional
            index of last file in list to read. If None, the very last file
            in the list is used
        read_err : bool
            if True, uncertainty data is also read (where available). If
            unspecified (None), then the default is used (cf. :attr:`READ_ERR`)
         pattern : str, optional
            string pattern for file search (cf :func:`get_file_list`)

        Returns
        -------
        UngriddedData
            data object
        """
        if vars_to_retrieve is None:
            vars_to_retrieve = self.DEFAULT_VARS
        elif isinstance(vars_to_retrieve, str):
            vars_to_retrieve = [vars_to_retrieve]

        if read_err is None:
            read_err = self.READ_ERR

        if files is None:
            if len(self.files) == 0:
                self.get_file_list(vars_to_retrieve, pattern=pattern)
            files = self.files

        if first_file is None:
            first_file = 0
        if last_file is None:
            last_file = len(files)

        files = files[first_file:last_file]

        self.read_failed = []

        data_obj = UngriddedData()
        col_idx = data_obj.index
        meta_key = -1.0
        idx = 0

        #assign metadata object
        metadata = data_obj.metadata
        meta_idx = data_obj.meta_idx

        #last_station_id = ''
        num_files = len(files)

        disp_each = int(num_files * 0.1)
        if disp_each < 1:
            disp_each = 1

        VAR_IDX = -1
        for i, _file in enumerate(files):
            if i % disp_each == 0:
                print("Reading file {} of {} ({})".format(
                    i + 1, num_files,
                    type(self).__name__))
            try:
                stat = self.read_file(_file,
                                      vars_to_retrieve=vars_to_retrieve,
                                      read_err=read_err,
                                      remove_outliers=remove_outliers)
                if not any(
                    [var in stat.vars_available for var in vars_to_retrieve]):
                    self.logger.info("Station {} contains none of the desired "
                                     "variables. Skipping station...".format(
                                         stat.station_name))
                    continue
                #if last_station_id != station_id:
                meta_key += 1
                # Fill the metatdata dict
                # the location in the data set is time step dependant!
                # use the lat location here since we have to choose one location
                # in the time series plot
                metadata[meta_key] = od()
                metadata[meta_key].update(stat.get_meta())
                for add_meta in self.KEEP_ADD_META:
                    if add_meta in stat:
                        metadata[meta_key][add_meta] = stat[add_meta]
                #metadata[meta_key]['station_id'] = station_id

                metadata[meta_key]['data_revision'] = self.data_revision
                metadata[meta_key]['variables'] = []
                metadata[meta_key]['var_info'] = od()
                # this is a list with indices of this station for each variable
                # not sure yet, if we really need that or if it speeds up things
                meta_idx[meta_key] = od()
                #last_station_id = station_id

                # Is floating point single value
                time = stat.dtime[0]
                for var in stat.vars_available:
                    if not var in data_obj.var_idx:
                        VAR_IDX += 1
                        data_obj.var_idx[var] = VAR_IDX

                    var_idx = data_obj.var_idx[var]

                    val = stat[var]
                    metadata[meta_key]['var_info'][var] = vi = od()
                    if isinstance(val, VerticalProfile):
                        altitude = val.altitude
                        data = val.data
                        add = len(data)
                        err = val.data_err
                        metadata[meta_key]['var_info']['altitude'] = via = od()

                        vi.update(val.var_info[var])
                        via.update(val.var_info['altitude'])
                    else:
                        add = 1
                        altitude = np.nan
                        data = val
                        if var in stat.data_err:
                            err = stat.err[var]
                        else:
                            err = np.nan
                    vi.update(stat.var_info[var])
                    stop = idx + add
                    #check if size of data object needs to be extended
                    if stop >= data_obj._ROWNO:
                        #if totnum < data_obj._CHUNKSIZE, then the latter is used
                        data_obj.add_chunk(add)

                    #write common meta info for this station
                    data_obj._data[idx:stop,
                                   col_idx['latitude']] = stat['latitude']
                    data_obj._data[idx:stop,
                                   col_idx['longitude']] = stat['longitude']
                    data_obj._data[idx:stop,
                                   col_idx['altitude']] = stat['altitude']
                    data_obj._data[idx:stop, col_idx['meta']] = meta_key

                    # write data to data object
                    data_obj._data[idx:stop, col_idx['time']] = time
                    data_obj._data[idx:stop,
                                   col_idx['stoptime']] = stat.stopdtime[0]
                    data_obj._data[idx:stop, col_idx['data']] = data
                    data_obj._data[idx:stop,
                                   col_idx['dataaltitude']] = altitude
                    data_obj._data[idx:stop, col_idx['varidx']] = var_idx

                    if read_err:
                        data_obj._data[idx:stop, col_idx['dataerr']] = err

                    if not var in meta_idx[meta_key]:
                        meta_idx[meta_key][var] = []
                    meta_idx[meta_key][var].extend(list(range(idx, stop)))

                    if not var in metadata[meta_key]['variables']:
                        metadata[meta_key]['variables'].append(var)

                    idx += add

            except Exception as e:
                self.read_failed.append(_file)
                self.logger.exception(
                    'Failed to read file {} (ERR: {})'.format(
                        os.path.basename(_file), repr(e)))

        # shorten data_obj._data to the right number of points
        data_obj._data = data_obj._data[:idx]

        self.data = data_obj
        return data_obj
示例#12
0
    def read(self,
             vars_to_retrieve=None,
             files=None,
             first_file=None,
             last_file=None):
        """Method that reads list of files as instance of :class:`UngriddedData`
        
        Parameters
        ----------
        vars_to_retrieve : :obj:`list` or similar, optional,
            list containing variable IDs that are supposed to be read. If None, 
            all variables in :attr:`PROVIDES_VARIABLES` are loaded
        files : :obj:`list`, optional
            list of files to be read. If None, then the file list is used that
            is returned on :func:`get_file_list`.
        first_file : :obj:`int`, optional
            index of first file in file list to read. If None, the very first
            file in the list is used
        last_file : :obj:`int`, optional
            index of last file in list to read. If None, the very last file 
            in the list is used
            
        Returns
        -------
        UngriddedData
            data object
        """

        if vars_to_retrieve is None:
            vars_to_retrieve = self.DEFAULT_VARS
        elif isinstance(vars_to_retrieve, str):
            vars_to_retrieve = [vars_to_retrieve]

        if files is None:
            if len(self.files) == 0:
                self.get_file_list()
            files = self.files

        if first_file is None:
            first_file = 0
        if last_file is None:
            last_file = len(files)

        files = files[first_file:last_file]

        self.read_failed = []

        data_obj = UngriddedData()
        meta_key = 0.0
        idx = 0

        #assign metadata object
        metadata = data_obj.metadata
        meta_idx = data_obj.meta_idx

        num_vars = len(vars_to_retrieve)
        num_files = len(files)
        disp_each = int(num_files * 0.1)
        if disp_each < 1:
            disp_each = 1

        for i, _file in enumerate(files):

            if i % disp_each == 0:
                print_log.info("Reading file {} of {} ({})".format(
                    i, num_files,
                    type(self).__name__))
            station_data = self.read_file(_file,
                                          vars_to_retrieve=vars_to_retrieve)
            # Fill the metatdata dict
            # the location in the data set is time step dependant!
            # use the lat location here since we have to choose one location
            # in the time series plot
            metadata[meta_key] = od()
            metadata[meta_key].update(station_data.get_meta())
            metadata[meta_key].update(station_data.get_station_coords())
            metadata[meta_key]['dataset_name'] = self.DATASET_NAME
            metadata[meta_key]['ts_type'] = self.TS_TYPE
            metadata[meta_key]['variables'] = vars_to_retrieve
            if 'instrument_name' in station_data and station_data[
                    'instrument_name'] is not None:
                instr = station_data['instrument_name']
            else:
                instr = self.INSTRUMENT_NAME
            metadata[meta_key]['instrument_name'] = instr
            # this is a list with indices of this station for each variable
            # not sure yet, if we really need that or if it speeds up things
            meta_idx[meta_key] = od()

            num_times = len(station_data['dtime'])

            #access array containing time stamps
            # TODO: check using index instead (even though not a problem here
            # since all Aerocom data files are of type timeseries)
            times = np.float64(station_data['dtime'])

            totnum = num_times * num_vars

            #check if size of data object needs to be extended
            if (idx + totnum) >= data_obj._ROWNO:
                #if totnum < data_obj._CHUNKSIZE, then the latter is used
                data_obj.add_chunk(totnum)

            for var_idx, var in enumerate(vars_to_retrieve):
                values = station_data[var]
                start = idx + var_idx * num_times
                stop = start + num_times

                #write common meta info for this station (data lon, lat and
                #altitude are set to station locations)
                data_obj._data[start:stop,
                               data_obj._LATINDEX] = station_data['stat_lat']
                data_obj._data[start:stop,
                               data_obj._LONINDEX] = station_data['stat_lat']
                data_obj._data[
                    start:stop,
                    data_obj._ALTITUDEINDEX] = station_data['stat_alt']
                data_obj._data[start:stop,
                               data_obj._METADATAKEYINDEX] = meta_key

                # write data to data object
                data_obj._data[start:stop, data_obj._TIMEINDEX] = times
                data_obj._data[start:stop, data_obj._DATAINDEX] = values
                data_obj._data[start:stop, data_obj._VARINDEX] = var_idx

                meta_idx[meta_key][var] = np.arange(start, stop)

                if not var in data_obj.var_idx:
                    data_obj.var_idx[var] = var_idx

            idx += totnum
            meta_key = meta_key + 1.

        # shorten data_obj._data to the right number of points
        data_obj._data = data_obj._data[:idx]
        data_obj.data_revision[self.DATASET_NAME] = self.data_revision
        self.data = data_obj
        return data_obj
示例#13
0
    def read_dataset(self, dataset_to_read, vars_to_retrieve=None, **kwargs):
        """Read dataset into an instance of :class:`ReadUngridded`
        
        Note
        ----
        This method does not assign loaded data obj to class attribute 
        :attr:`data` (only :func:`read` does)
        
        Parameters
        ----------
        dataset_to_read : str
            name of dataset
        vars_to_retrieve : list
            list of variables to be retrieved. If None (default), the default
            variables of each reading routine are imported
            
        Returns
        --------
        UngriddedData
            data object
        """
        _caching = None
        if len(kwargs) > 0:
            _caching = const.CACHING
            const.CACHING = False

            print_log.info('Received additional reading constraints, '
                           'ignoring caching')
        if vars_to_retrieve is None:
            # Note: self.vars_to_retrieve may be None as well, then
            # default variables of each network are read
            vars_to_retrieve = self.vars_to_retrieve

        reader = self.get_reader(dataset_to_read)

        if vars_to_retrieve is None:
            vars_to_retrieve = reader.PROVIDES_VARIABLES
        elif isinstance(vars_to_retrieve, str):
            vars_to_retrieve = [vars_to_retrieve]

        # Since this interface enables to load multiple datasets, each of
        # which support a number of variables, here, only the variables are
        # considered that are supported by the dataset
        vars_available = [
            var for var in vars_to_retrieve if var in reader.PROVIDES_VARIABLES
        ]

        cache = CacheHandlerUngridded(reader)
        if not self.ignore_cache:
            # initate cache handler
            for var in vars_available:
                try:
                    cache.check_and_load(var_name=var)
                except:
                    self.logger.exception(
                        'Fatal: compatibility error between '
                        'old cache file {} and current version '
                        'of code ')

        vars_to_read = [
            v for v in vars_available if not v in cache.loaded_data
        ]
        data_read = None
        if len(vars_to_read) > 0:

            _loglevel = print_log.level
            print_log.setLevel(logging.INFO)
            data_read = reader.read(vars_to_read, **kwargs)
            print_log.setLevel(_loglevel)

            for var in vars_to_read:
                # write the cache file
                if not self.ignore_cache:
                    try:
                        cache.write(data_read, var)
                    except Exception as e:
                        _caching = False
                        print_log.warning(
                            'Failed to write to cache directory. '
                            'Error: {}. Deactivating caching in '
                            'pyaerocom'.format(repr(e)))

        if len(vars_to_read) == len(vars_available):
            data_out = data_read
        else:
            data_out = UngriddedData()
            for var in vars_available:
                if var in cache.loaded_data:
                    data_out.append(cache.loaded_data[var])
            if data_read is not None:
                data_out.append(data_read)

        if _caching is not None:
            const.CACHING = _caching
        return data_out