示例#1
0
 def iter_generic_obs(self, sdmxobj, with_value, with_attributes):
     ObsKeyTuple = ObsAttrTuple = None
     if self.dsd:
         # this is a structure-specific dataset
         for obs in sdmxobj._elem.iterchildren('Obs'):
             # dimensions:
             obs_attrib = obs.attrib  # XML attrib
             if not ObsKeyTuple:
                 obs_key_id = [k for k in self.dim_ids if k in obs_attrib]
                 ObsKeyTuple = namedtuple_factory('ObsKey', obs_key_id)
             obs_key_values = [obs_attrib[k]
                               for k in self.dim_ids if k in obs_attrib]
             obs_key = ObsKeyTuple._make(obs_key_values)
             obs_value = obs_attrib['OBS_VALUE'] if with_value else None
             if with_attributes:
                 if not ObsAttrTuple:
                     obs_attr_id = [
                         k for k in self.attrib_ids if k in obs_attrib]
                     ObsAttrTuple = namedtuple_factory(
                         'ObsAttributes', obs_attr_id)
                 obs_attr_values = [obs_attrib[k] for k in obs_attr_id]
                 obs_attr = ObsAttrTuple(*obs_attr_values)
             else:
                 obs_attr = None
             yield self._ObsTuple(obs_key, obs_value, obs_attr)
     else:
         # we have a generic dataset
         for obs in self._paths['generic_obs_path'](sdmxobj._elem):
             # Construct the namedtuple for the ObsKey.
             # The namedtuple class is created on first iteration.
             obs_key_values = self._paths['obs_key_values_path'](obs)
             if not ObsKeyTuple:
                 obs_key_id = self._paths['obs_key_id_path'](obs)
                 ObsKeyTuple = namedtuple_factory('ObsKey', obs_key_id)
             obs_key = ObsKeyTuple._make(obs_key_values)
             if with_value:
                 obs_value = self._paths['obs_value_path'](obs)[0]
             else:
                 obs_value = None
             if with_attributes:
                 obs_attr_values = self._paths['attr_values_path'](obs)
                 obs_attr_id = self._paths['attr_id_path'](obs)
                 obs_attr_type = namedtuple_factory(
                     'ObsAttributes', obs_attr_id)
                 obs_attr = obs_attr_type(*obs_attr_values)
             else:
                 obs_attr = None
             yield self._ObsTuple(obs_key, obs_value, obs_attr)
示例#2
0
 def iter_generic_series_obs(self,
                             sdmxobj,
                             with_value,
                             with_attributes,
                             reverse_obs=False):
     obs_l = sorted(sdmxobj._elem['observations'].items(),
                    key=self.getitem0,
                    reverse=reverse_obs)
     for obs in obs_l:
         # value for dim at obs, e.g. '2014' for time series.
         # As this method is called only when each obs has but one dimension, we
         # it is at index 0.
         obs_dim_value = self._obs_dim[0]['values'][int(obs[0])]['id']
         obs_value = obs[1][0] if with_value else None
         if with_attributes and len(obs[1]) > 1:
             obs_attr_idx = obs[1][1:]
             obs_attr_raw = [
                 (d['id'], d['values'][i].get('id', d['values'][i]['name']))
                 for i, d in zip(obs_attr_idx, self._obs_attrib)
                 if i is not None
             ]
             if obs_attr_raw:
                 obs_attr_id, obs_attr_values = zip(*obs_attr_raw)
                 obs_attr_type = namedtuple_factory('ObsAttributes',
                                                    obs_attr_id)
                 obs_attr = obs_attr_type(*obs_attr_values)
             else:
                 obs_attr = None
         else:
             obs_attr = None
         yield self._SeriesObsTuple(obs_dim_value, obs_value, obs_attr)
示例#3
0
 def initialize(self, source):
     tree = json.load(source)
     # pre-fetch some structures for efficient use in series and obs
     a = tree['structure'].get('attributes', {})
     self._dataset_attrib = a.get('dataSet', [])
     self._series_attrib = a.get('series', [])
     self._obs_attrib = a.get('observation', [])
     d = tree['structure'].get('dimensions', {})
     self._dataset_dim = d.get('dataSet', [])
     self._series_dim = d.get('series', [])
     self._obs_dim = d.get('observation', [])
     self._dataset_dim_key = {dim['keyPosition']: dim['id']
                              for dim in self._dataset_dim}
     self._dataset_dim_values = {dim['keyPosition']: dim['values'][0]['id']
                                 for dim in self._dataset_dim}
     if self._series_dim:
         self._key_len = len(self._dataset_dim) + len(self._series_dim)
         # Map keyPositions of dimensions at series level to dimension IDs, like with dataset-level dims above.
         # In case of cross-sectional dataset, the only dimension at series level has no
         # keyPosition, eg. TIME_PERIOD. Instead, the keyPosition of the dim at observation
         # is used to fill the gap.
         self._series_dim_key = {dim.get('keyPosition',
                                         self._obs_dim[0].get('keyPosition')):
                                 dim['id'] for dim in self._series_dim}
         self.SeriesKeyTuple = namedtuple_factory('SeriesKeyTuple',
                                                  (self._dataset_dim_key.get(i) or self._series_dim_key.get(i)
                                                   for i in range(self._key_len)))
     else:
         # Dataset must be flat
         self._key_len = len(self._dataset_dim) + len(self._obs_dim)
     self.obs_attr_id = [d['id'] for d in self._obs_attrib]
     # init message instance
     cls = model.DataMessage
     self.message = cls(self, tree)
     return self.message
示例#4
0
 def iter_generic_series_obs(self, sdmxobj, with_value, with_attributes,
                             reverse_obs=False):
     obs_l = sorted(sdmxobj._elem['observations'].items(),
                    key=self.getitem0, reverse=reverse_obs)
     for obs in obs_l:
         # value for dim at obs, e.g. '2014' for time series.
         # As this method is called only when each obs has but one dimension, we
         # it is at index 0.
         obs_dim_value = self._obs_dim[0]['values'][int(obs[0])]['id']
         obs_value = obs[1][0] if with_value else None
         if with_attributes and len(obs[1]) > 1:
             obs_attr_idx = obs[1][1:]
             obs_attr_raw = [(d['id'],
                              d['values'][i].get('id'))
                             for i, d in zip(obs_attr_idx, self._obs_attrib) if i is not None]
             if obs_attr_raw:
                 obs_attr_id, obs_attr_values = zip(*obs_attr_raw)
                 obs_attr_type = namedtuple_factory(
                     'ObsAttributes', obs_attr_id)
                 obs_attr = obs_attr_type(*obs_attr_values)
             else:
                 obs_attr = None
         else:
             obs_attr = None
         yield self._SeriesObsTuple(obs_dim_value, obs_value, obs_attr)
示例#5
0
 def iter_generic_series_obs(self,
                             sdmxobj,
                             with_value,
                             with_attributes,
                             reverse_obs=False):
     obs_l = sorted(sdmxobj._elem.value['observations'].items(),
                    key=self.getitem0,
                    reverse=reverse_obs)
     obs_dim_l = parse('$.structure.dimensions.observation[*]').find(
         sdmxobj._elem)
     for obs in obs_l:
         obs_dim = obs_dim_l[0].value['values'][int(obs[0])]['id']
         if with_value:
             obs_value = obs[1][0]
         else:
             obs_value = None
         if with_attributes:
             obs_attr_values = self._paths['attr_values_path'](obs)
             obs_attr_id = self._paths['attr_id_path'](obs)
             obs_attr_type = namedtuple_factory('ObsAttributes',
                                                obs_attr_id)
             obs_attr = obs_attr_type(*obs_attr_values)
         else:
             obs_attr = None
         yield self._SeriesObsTuple(obs_dim, obs_value, obs_attr)
示例#6
0
 def series_attrib(self, sdmxobj):
     value_idx = sdmxobj._elem.get('attributes')
     if value_idx:
         attrib_list = [(a['id'],
                         a['values'][i].get('id', a['values'][i]['name']))
                        for i, a in zip(value_idx, self._series_attrib) if i is not None]
         attrib_ids, attrib_values = zip(*attrib_list)
         return namedtuple_factory('Attrib', attrib_ids)(*attrib_values)
示例#7
0
    def iter_generic_obs(self, sdmxobj, with_value, with_attributes):
        # Make type namedtuple for obs_key. It must be
        # merged with any dimension values at dataset level maintaining the
        # key position order.
        # Note that the measure dimension (such as TIME_PERIOD) has no key position.
        # We fill this gap by injecting the highest key position.
        _obs_dim_key = {
            dim.get('keyPosition', self._key_len - 1): dim['id']
            for dim in self._obs_dim
        }
        _GenericObsKey = namedtuple_factory(
            'GenericObservationKey',
            (self._dataset_dim_key.get(d) or _obs_dim_key.get(d)
             for d in range(self._key_len)))
        obs_l = sorted(sdmxobj._elem.value['observations'].items(),
                       key=self.getitem0)
        for dim, value in obs_l:
            # Construct the key for this observation
            key_idx = [int(i) for i in dim.split(':')]
            obs_key_values = [
                d['values'][i]['id'] for i, d in zip(key_idx, self._obs_dim)
            ]
            obs_key = _GenericObsKey._make(
                self._dataset_dim_values.get(d) or obs_key_values.pop(0)
                for d in range(self._key_len))

            # Read the value
            obs_value = value[0] if with_value else None

            # Read any attributes
            if with_attributes and len(value) > 1:
                obs_attr_idx = value[1:]
                obs_attr_raw = [(d['id'], d['values'][i].get('id'))
                                for i, d in zip(obs_attr_idx, self._obs_attrib)
                                ]
                if obs_attr_raw:
                    obs_attr_id, obs_attr_values = zip(*obs_attr_raw)
                    obs_attr_type = namedtuple_factory('ObsAttributes',
                                                       obs_attr_id)
                    obs_attr = obs_attr_type(*obs_attr_values)
                else:
                    obs_attr = None
            else:
                obs_attr = None
            yield self._SeriesObsTuple(obs_key, obs_value, obs_attr)
示例#8
0
 def series_attrib(self, sdmxobj):
     value_idx = sdmxobj._elem.get('attributes')
     if value_idx:
         attrib_list = [(a['id'],
                         a['values'][i].get('id', a['values'][i]['name']))
                        for i, a in zip(value_idx, self._series_attrib)
                        if i is not None]
         attrib_ids, attrib_values = zip(*attrib_list)
         return namedtuple_factory('Attrib', attrib_ids)(*attrib_values)
示例#9
0
    def iter_generic_obs(self, sdmxobj, with_value, with_attributes):
        # Make type namedtuple for obs_key. It must be
        # merged with any dimension values at dataset level maintaining the
        # key position order.
        # Note that the measure dimension (such as TIME_PERIOD) has no key position.
        # We fill this gap by injecting the highest key position.
        _obs_dim_key = {dim.get('keyPosition', self._key_len - 1): dim['id']
                        for dim in self._obs_dim}
        _GenericObsKey = namedtuple_factory('GenericObservationKey',
                                            (self._dataset_dim_key.get(d,
                                                                       _obs_dim_key.get(d))
                                             for d in range(self._key_len)))
        obs_l = sorted(sdmxobj._elem.value['observations'].items(),
                       key=self.getitem0)
        for dim, value in obs_l:
            # Construct the key for this observation
            key_idx = [int(i) for i in dim.split(':')]
            obs_key_values = (d['values'][i]['id'] for i, d in
                              zip(key_idx, self._obs_dim))
            obs_key = _GenericObsKey._make(self._dataset_dim_values.get(d)
                                           or next(obs_key_values)
                                           for d in range(self._key_len))

            # Read the value
            obs_value = value[0] if with_value else None

            # Read any attributes
            if with_attributes and len(value) > 1:
                obs_attr_idx = value[1:]
                obs_attr_raw = [(d['id'],
                                 d['values'][i].get('id') if i is not None else None)
                                for i, d in zip(obs_attr_idx, self._obs_attrib)]
                if obs_attr_raw:
                    obs_attr_id, obs_attr_values = zip(*obs_attr_raw)
                    obs_attr_type = namedtuple_factory(
                        'ObsAttributes', obs_attr_id)
                    obs_attr = obs_attr_type(*obs_attr_values)
                else:
                    obs_attr = None
            else:
                obs_attr = None
            yield self._SeriesObsTuple(obs_key, obs_value, obs_attr)
示例#10
0
    def series_key(self, sdmxobj):
        series_key_id = self._paths['series_key_id_path'](sdmxobj._elem)

        # Translate IDs to match with SDMX 2.1.
        translate_dict = {'FREQUENCY': 'FREQ'}
        series_key_id = [translate_dict.get(x, x) for x in series_key_id]

        series_key_values = self._paths['series_key_values_path'](
            sdmxobj._elem)
        SeriesKeyTuple = namedtuple_factory('SeriesKey', series_key_id)
        return SeriesKeyTuple._make(series_key_values)
示例#11
0
    def series_key(self, sdmxobj):
        series_key_id = self._paths['series_key_id_path'](sdmxobj._elem)

        # Translate IDs to match with SDMX 2.1.
        translate_dict = {'FREQUENCY': 'FREQ'}
        series_key_id = [translate_dict.get(x,x) for x in series_key_id]

        series_key_values = self._paths[
            'series_key_values_path'](sdmxobj._elem)
        SeriesKeyTuple = namedtuple_factory('SeriesKey', series_key_id)
        return SeriesKeyTuple._make(series_key_values)
示例#12
0
def test_concat_namedtuples():
    num = list(range(26))
    chars = [chr(65 + i) for i in num]
    limits = [0, 4, 5, 8, 14, 22, 25]
    tuples = []
    for i in range(len(limits) - 1):
        newtype = namedtuple_factory('Test', chars[limits[i]:limits[i + 1]])
        t = newtype(*num[limits[i]:limits[i + 1]])
        tuples.append(t)
    concat1 = concat_namedtuples(*tuples)
    assert isinstance(concat1, tuple)
    assert concat1.A == 0
示例#13
0
 def iter_generic_series_obs(self, sdmxobj, with_value, with_attributes,
                             reverse_obs=False):
     ObsAttrTuple = None
     if self.dsd:
         # this is a structure-specific dataset
         for obs in sdmxobj._elem.iterchildren(reversed=reverse_obs):
             obs_attrib = obs.attrib  # XML attributes
             # dim at obs
             obs_dim = obs_attrib[self.message.data.dim_at_obs]
             obs_value = obs_attrib['OBS_VALUE'] if with_value else None
             if with_attributes:
                 if not ObsAttrTuple:
                     obs_attr_id = [
                         k for k in self.attrib_ids if k in obs_attrib]
                     ObsAttrTuple = namedtuple_factory(
                         'ObsAttributes', obs_attr_id)
                 obs_attr_values = [obs_attrib[k] for k in obs_attr_id]
                 obs_attr = ObsAttrTuple(*obs_attr_values)
             else:
                 obs_attr = None
             yield self._SeriesObsTuple(obs_dim, obs_value, obs_attr)
     else:
         # we have a generic dataset
         for obs in sdmxobj._elem.iterchildren(
                 '{http://www.sdmx.org/resources/sdmxml/schemas/v2_1/data/generic}Obs',
                 reversed=reverse_obs):
             obs_dim = self._paths['generic_series_dim_path'](obs)[0]
             if with_value:
                 obs_value = self._paths['obs_value_path'](obs)[0]
             else:
                 obs_value = None
             if with_attributes:
                 obs_attr_values = self._paths['attr_values_path'](obs)
                 obs_attr_id = self._paths['attr_id_path'](obs)
                 obs_attr_type = namedtuple_factory(
                     'ObsAttributes', obs_attr_id)
                 obs_attr = obs_attr_type(*obs_attr_values)
             else:
                 obs_attr = None
             yield self._SeriesObsTuple(obs_dim, obs_value, obs_attr)
示例#14
0
 def series_key(self, sdmxobj):
     # pull down dataset key
     full_key_ids = [d['id'] for d in self._dataset_dim]
     full_key_values = [d['values'][0]['id'] for d in self._dataset_dim]
     key_idx = [int(i) for i in sdmxobj._elem['_key'].split(':')]
     series_key_ids = [d['id'] for d in self._series_dim]
     series_key_values = [
         d['values'][i]['id'] for i, d in zip(key_idx, self._series_dim)
     ]
     full_key_ids.extend(series_key_ids)
     full_key_values.extend(series_key_values)
     SeriesKeyTuple = namedtuple_factory('SeriesKey', full_key_ids)
     return SeriesKeyTuple._make(full_key_values)
示例#15
0
 def group_key(self, sdmxobj):
     if self.dsd:
         # handle structure-specific dataset
         group_attrib = sdmxobj._elem.attrib
         group_key_id, group_key_values = zip(*((k, group_attrib[k])
                                                for k in self.dim_ids if k in group_attrib))
     else:
         # generic dataset
         group_key_id = self._paths['group_key_id_path'](sdmxobj._elem)
         group_key_values = self._paths[
             'group_key_values_path'](sdmxobj._elem)
     GroupKeyTuple = namedtuple_factory('GroupKey', group_key_id)
     return GroupKeyTuple._make(group_key_values)
示例#16
0
 def series_key(self, sdmxobj):
     if self.dsd:
         # handle structured-specific dataset
         series_attrib = sdmxobj._elem.attrib
         series_key_id, series_key_values = zip(*((k, series_attrib[k])
                                                  for k in self.dim_ids if k in series_attrib))
     else:
         # generic dataset
         series_key_id = self._paths['series_key_id_path'](sdmxobj._elem)
         series_key_values = self._paths[
             'series_key_values_path'](sdmxobj._elem)
     SeriesKeyTuple = namedtuple_factory('SeriesKey', series_key_id)
     return SeriesKeyTuple._make(series_key_values)
示例#17
0
 def iter_generic_obs(self, sdmxobj, with_value, with_attributes):
     for obs in self._paths['generic_obs_path'](sdmxobj._elem):
         # Construct the namedtuple for the ObsKey.
         # The namedtuple class is created on first iteration.
         obs_key_values = self._paths['obs_key_values_path'](obs)
         try:
             obs_key = ObsKeyTuple._make(obs_key_values)
         except NameError:
             obs_key_id = self._paths['obs_key_id_path'](obs)
             ObsKeyTuple = namedtuple_factory('ObsKey', obs_key_id)
             obs_key = ObsKeyTuple._make(obs_key_values)
         if with_value:
             obs_value = self._paths['obs_value_path'](obs)[0]
         else:
             obs_value = None
         if with_attributes:
             obs_attr_values = self._paths['attr_values_path'](obs)
             obs_attr_id = self._paths['attr_id_path'](obs)
             obs_attr_type = namedtuple_factory(
                 'ObsAttributes', obs_attr_id)
             obs_attr = obs_attr_type(*obs_attr_values)
         else:
             obs_attr = None
         yield self._ObsTuple(obs_key, obs_value, obs_attr)
示例#18
0
 def iter_generic_obs(self, sdmxobj, with_value, with_attributes):
     for obs in self._paths['generic_obs_path'](sdmxobj._elem):
         # Construct the namedtuple for the ObsKey.
         # The namedtuple class is created on first iteration.
         obs_key_values = self._paths['obs_key_values_path'](obs)
         try:
             obs_key = ObsKeyTuple._make(obs_key_values)
         except NameError:
             obs_key_id = self._paths['obs_key_id_path'](obs)
             ObsKeyTuple = namedtuple_factory('ObsKey', obs_key_id)
             obs_key = ObsKeyTuple._make(obs_key_values)
         if with_value:
             obs_value = self._paths['obs_value_path'](obs)[0]
         else:
             obs_value = None
         if with_attributes:
             obs_attr_values = self._paths['attr_values_path'](obs)
             obs_attr_id = self._paths['attr_id_path'](obs)
             obs_attr_type = namedtuple_factory('ObsAttributes',
                                                obs_attr_id)
             obs_attr = obs_attr_type(*obs_attr_values)
         else:
             obs_attr = None
         yield self._ObsTuple(obs_key, obs_value, obs_attr)
示例#19
0
 def series_attrib(self, sdmxobj):
     if self.dsd:
         # structure-specific dataset
         series_attrib = sdmxobj._elem.attrib
         attrib_l = [(k, series_attrib[k])
                     for k in self.attrib_ids if k in series_attrib]
         if attrib_l:
             attr_id, attr_values = zip(*attrib_l)
         else:
             attr_id = attr_values = []
     else:
         # generic dataset
         attr_id = self._paths['attr_id_path'](sdmxobj._elem)
         attr_values = self._paths['attr_values_path'](sdmxobj._elem)
     return namedtuple_factory('Attrib', attr_id)(*attr_values)
示例#20
0
 def series_key(self, sdmxobj):
     # pull down dataset key
     dataset_dim = parse(
         '$.structure.dimensions.dataSet[*]').find(sdmxobj._elem)
     full_key_ids = [d.value['id'] for d in dataset_dim]
     full_key_values = [d.value['values'][0]['id'] for d in dataset_dim]
     key_idx = [int(i) for i in sdmxobj._elem.value['_key'].split(':')]
     struct_dim = parse('$.structure.dimensions.series').find(
         sdmxobj._elem)[0].value
     series_key_ids = [d['id'] for d in struct_dim]
     series_key_values = [d['values'][i]['id'] for i, d in
                          zip(key_idx, struct_dim)]
     full_key_ids.extend(series_key_ids)
     full_key_values.extend(series_key_values)
     SeriesKeyTuple = namedtuple_factory('SeriesKey', full_key_ids)
     return SeriesKeyTuple._make(full_key_values)
示例#21
0
 def series_key(self, sdmxobj):
     # pull down dataset key
     dataset_dim = parse('$.structure.dimensions.dataSet[*]').find(
         sdmxobj._elem)
     full_key_ids = [d.value['id'] for d in dataset_dim]
     full_key_values = [d.value['values'][0]['id'] for d in dataset_dim]
     key_idx = [int(i) for i in sdmxobj._elem.value['_key'].split(':')]
     struct_dim = parse('$.structure.dimensions.series').find(
         sdmxobj._elem)[0].value
     series_key_ids = [d['id'] for d in struct_dim]
     series_key_values = [
         d['values'][i]['id'] for i, d in zip(key_idx, struct_dim)
     ]
     full_key_ids.extend(series_key_ids)
     full_key_values.extend(series_key_values)
     SeriesKeyTuple = namedtuple_factory('SeriesKey', full_key_ids)
     return SeriesKeyTuple._make(full_key_values)
示例#22
0
 def iter_generic_series_obs(self, sdmxobj, with_value, with_attributes,
                             reverse_obs=False):
     for obs in sdmxobj._elem.iterchildren(
             '{http://www.sdmx.org/resources/sdmxml/schemas/v2_1/data/generic}Obs',
             reversed=reverse_obs):
         obs_dim = self._paths['generic_series_dim_path'](obs)[0]
         if with_value:
             obs_value = self._paths['obs_value_path'](obs)[0]
         else:
             obs_value = None
         if with_attributes:
             obs_attr_values = self._paths['attr_values_path'](obs)
             obs_attr_id = self._paths['attr_id_path'](obs)
             obs_attr_type = namedtuple_factory(
                 'ObsAttributes', obs_attr_id)
             obs_attr = obs_attr_type(*obs_attr_values)
         else:
             obs_attr = None
         yield self._SeriesObsTuple(obs_dim, obs_value, obs_attr)
示例#23
0
 def initialize(self, source):
     tree = json.load(source)
     # pre-fetch some structures for efficient use in series and obs
     a = tree['structure']['attributes']
     self._dataset_attrib = a['dataSet']
     self._series_attrib = a['series']
     self._obs_attrib = a['observation']
     d = tree['structure']['dimensions']
     self._dataset_dim = d.get('dataSet', [])
     self._series_dim = d['series']
     self._obs_dim = d['observation']
     self._dataset_dim_key = {
         dim['keyPosition']: dim['id']
         for dim in self._dataset_dim
     }
     self._dataset_dim_values = {
         dim['keyPosition']: dim['values'][0]['id']
         for dim in self._dataset_dim
     }
     if self._series_dim:
         self._key_len = len(self._dataset_dim) + len(self._series_dim)
         # Map keyPositions of dimensions at series level to dimension IDs, like with dataset-level dims above.
         # In case of cross-sectional dataset, the only dimension at series level has no
         # keyPosition, eg. TIME_PERIOD. Instead, the keyPosition of the dim at observation
         # is used to fill the gap.
         self._series_dim_key = {
             dim.get('keyPosition', self._obs_dim[0].get('keyPosition')):
             dim['id']
             for dim in self._series_dim
         }
         self.SeriesKeyTuple = namedtuple_factory(
             'SeriesKeyTuple',
             (self._dataset_dim_key.get(i) or self._series_dim_key.get(i)
              for i in range(self._key_len)))
     else:
         # Dataset must be flat
         self._key_len = len(self._dataset_dim) + len(self._obs_dim)
     self.obs_attr_id = [d['id'] for d in self._obs_attrib]
     # init message instance
     cls = model.DataMessage
     self.message = cls(self, tree)
     return self.message
示例#24
0
 def iter_generic_series_obs(self, sdmxobj, with_value, with_attributes,
                             reverse_obs=False):
     obs_l = sorted(sdmxobj._elem.value[
                    'observations'].items(), key=self.getitem0, reverse=reverse_obs)
     obs_dim_l = parse(
         '$.structure.dimensions.observation[*]').find(sdmxobj._elem)
     for obs in obs_l:
         obs_dim = obs_dim_l[0].value['values'][int(obs[0])]['id']
         if with_value:
             obs_value = obs[1][0]
         else:
             obs_value = None
         if with_attributes:
             obs_attr_values = self._paths['attr_values_path'](obs)
             obs_attr_id = self._paths['attr_id_path'](obs)
             obs_attr_type = namedtuple_factory(
                 'ObsAttributes', obs_attr_id)
             obs_attr = obs_attr_type(*obs_attr_values)
         else:
             obs_attr = None
         yield self._SeriesObsTuple(obs_dim, obs_value, obs_attr)
示例#25
0
 def iter_generic_series_obs(self,
                             sdmxobj,
                             with_value,
                             with_attributes,
                             reverse_obs=False):
     for obs in sdmxobj._elem.iterchildren(
             '{http://www.sdmx.org/resources/sdmxml/schemas/v2_1/data/generic}Obs',
             reversed=reverse_obs):
         obs_dim = self._paths['generic_series_dim_path'](obs)[0]
         if with_value:
             obs_value = self._paths['obs_value_path'](obs)[0]
         else:
             obs_value = None
         if with_attributes:
             obs_attr_values = self._paths['attr_values_path'](obs)
             obs_attr_id = self._paths['attr_id_path'](obs)
             obs_attr_type = namedtuple_factory('ObsAttributes',
                                                obs_attr_id)
             obs_attr = obs_attr_type(*obs_attr_values)
         else:
             obs_attr = None
         yield self._SeriesObsTuple(obs_dim, obs_value, obs_attr)
示例#26
0
 def series_attrib(self, sdmxobj):
     attr_id = self._paths['attr_id_path'](sdmxobj._elem)
     attr_values = self._paths['attr_values_path'](sdmxobj._elem)
     return namedtuple_factory('Attrib', attr_id)(*attr_values)
示例#27
0
 def group_key(self, sdmxobj):
     group_key_id = self._paths['group_key_id_path'](sdmxobj._elem)
     group_key_values = self._paths[
         'group_key_values_path'](sdmxobj._elem)
     GroupKeyTuple = namedtuple_factory('GroupKey', group_key_id)
     return GroupKeyTuple._make(group_key_values)
示例#28
0
 def series_key(self, sdmxobj):
     series_key_id = self._paths['series_key_id_path'](sdmxobj._elem)
     series_key_values = self._paths[
         'series_key_values_path'](sdmxobj._elem)
     SeriesKeyTuple = namedtuple_factory('SeriesKey', series_key_id)
     return SeriesKeyTuple._make(series_key_values)
示例#29
0
class Reader(BaseReader):
    """
    Read SDMXJSON 2.1 and expose it as instances from pandasdmx.model
    """
    def read_as_str(self, name, sdmxobj, first_only=True):
        result = self._paths[name](sdmxobj._elem)
        if result:
            if first_only:
                return result[0].value
            else:
                return [r.value for r in result]

    def initialize(self, source):
        tree = json.load(source)
        # pre-fetch some structures for efficient use in series and obs
        a = tree['structure']['attributes']
        self._dataset_attrib = a['dataSet']
        self._series_attrib = a['series']
        self._obs_attrib = a['observation']
        d = tree['structure']['dimensions']
        self._dataset_dim = d.get('dataSet', [])
        self._series_dim = d['series']
        self._obs_dim = d['observation']
        self._dataset_dim_key = {
            dim['keyPosition']: dim['id']
            for dim in self._dataset_dim
        }
        self._dataset_dim_values = {
            dim['keyPosition']: dim['values'][0]['id']
            for dim in self._dataset_dim
        }
        if self._series_dim:
            self._key_len = len(self._dataset_dim) + len(self._series_dim)
            # Map keyPositions of dimensions at series level to dimension IDs, like with dataset-level dims above.
            # In case of cross-sectional dataset, the only dimension at series level has no
            # keyPosition, eg. TIME_PERIOD. Instead, the keyPosition of the dim at observation
            # is used to fill the gap.
            self._series_dim_key = {
                dim.get('keyPosition', self._obs_dim[0].get('keyPosition')):
                dim['id']
                for dim in self._series_dim
            }
            self.SeriesKeyTuple = namedtuple_factory(
                'SeriesKeyTuple',
                (self._dataset_dim_key.get(i) or self._series_dim_key.get(i)
                 for i in range(self._key_len)))
        else:
            # Dataset must be flat
            self._key_len = len(self._dataset_dim) + len(self._obs_dim)
        self.obs_attr_id = [d['id'] for d in self._obs_attrib]
        # init message instance
        cls = model.DataMessage
        self.message = cls(self, tree)
        return self.message

    # flag to prevent multiple compiling. See BaseReader.__init__
    _compiled = False

    def write_source(self, filename):
        '''
        Save source to file by calling `write` on the root element.
        '''
        with open(filename, 'w') as fp:
            return json.dump(self.message._elem, fp, indent=4, sort_keys=True)

    _paths = {
        #         'footer_text': 'com:Text/text()',
        #         'footer_code': '@code',
        #         'footer_severity': '@severity',
        #         'dataflow_from_msg': 'mes:Structures/str:Dataflows',
        #         'constraint_attachment': 'str:ConstraintAttachment',
        #         'include': '@include',
        #         'id': '@id',
        #         'urn': '@urn',
        #         'url': '@url',
        #         'uri': '@uri',
        #         'agencyID': '@agencyID',
        #         'maintainable_parent_id': '@maintainableParentID',
        #         'value': 'com:Value/text()',
        'headerID':
        '$.header.id',
        'header_prepared':
        '$.header.prepared',
        'header_sender':
        '$.header.sender.*',
        #         'header_receiver': 'mes:Receiver/@*',
        #         'assignment_status': '@assignmentStatus',
        #         'error': 'mes:error/@*',
        #         'ref_version': '@version',
        #         'concept_id': 'str:ConceptIdentity',
        #         'position': '@position',
        #         'isfinal': '@isfinal',
        #         'ref_package': '@package',
        #         'ref_class': '@class',
        #         'ref_target': 'str:Target',
        #         'ref_source': 'str:Source',
        #         'ref_structure': 'str:Structure',
        #         'annotationtype': 'com:AnnotationType/text()',
        #         'generic_obs_path': 'gen:Obs',
        #         'obs_key_id_path': 'gen:ObsKey/gen:Value/@id',
        #         'obs_key_values_path': 'gen:ObsKey/gen:Value/@value',
        #         'series_key_values_path': 'gen:SeriesKey/gen:Value/@value',
        #         'series_key_id_path':        'gen:SeriesKey/gen:Value/@id',
        #         'generic_series_dim_path': 'gen:ObsDimension/@value',
        #         'group_key_values_path': 'gen:GroupKey/gen:Value/@value',
        #         'group_key_id_path': 'gen:GroupKey/gen:Value/@id',
        #         'obs_value_path': 'gen:ObsValue/@value',
        #         'attr_id_path': 'gen:Attributes/gen:Value/@id',
        #         'attr_values_path': 'gen:Attributes/gen:Value/@value',
        #         model.Code: 'str:Code',
        #         model.Categorisation: 'str:Categorisation',
        #         model.CategoryScheme: 'mes:Structures/str:CategorySchemes/str:CategoryScheme',
        #         model.DataStructureDefinition: 'mes:Structures/str:DataStructures/str:DataStructure',
        #         model.DataflowDefinition: 'str:Dataflow',
        #         model.ConceptScheme: 'mes:Structures/str:Concepts/str:ConceptScheme',
        #         model.ContentConstraint: 'mes:Structures/str:Constraints/str:ContentConstraint',
        #         model.Concept: 'str:Concept',
        #         model.Codelist: 'mes:Structures/str:Codelists/str:Codelist',
        #         model.Categorisations: 'mes:Structures/str:Categorisations',
        model.Footer:
        'footer.message',
        #         model.Category: 'str:Category',
        #         model.DimensionDescriptor: 'str:DataStructureComponents/str:DimensionList',
        #         model.Dimension: 'str:Dimension',
        #         model.TimeDimension: 'str:TimeDimension',
        #         model.MeasureDimension: 'str:MeasureDimension',
        #         model.MeasureDescriptor: 'str:DataStructureComponents/str:MeasureList',
        #         model.PrimaryMeasure: 'str:PrimaryMeasure',
        #         model.AttributeDescriptor: 'str:DataStructureComponents/str:AttributeList',
        #         model.DataAttribute: 'str:Attribute',
        #         model.CubeRegion: 'str:CubeRegion',
        #         model.KeyValue: 'com:KeyValue',
        #         model.Ref: 'Ref',
        model.Header:
        '$.header',
        #         model.Annotation: 'com:Annotations/com:Annotation',
        #         model.Group: 'gen:Group',
        #         model.Series: 'gen:Series',
        model.DataSet:
        '$.dataSets[0]',
        #         'int_str_names': './*[local-name() = $name]/@xml:lang',
        #         model.Representation: 'str:LocalRepresentation',
        #         'int_str_values': './*[local-name() = $name]/text()',
        #         'enumeration': 'str:Enumeration',
        #         'texttype': 'str:TextFormat/@textType',
        #         'maxlength': 'str:TextFormat/@maxLength',
        #         # need this? It is just a non-offset Ref
        #         'attr_relationship': '*/Ref/@id',
    }

    @classmethod
    def _compile_paths(cls):
        for key, path in cls._paths.items():
            cls._paths[key] = XPath(path)

    def international_str(self, name, sdmxobj):
        '''
        return DictLike of xml:lang attributes. If node has no attributes,
        assume that language is 'en'.
        '''
        # Get language tokens like 'en', 'fr'...
        elem_attrib = self._paths['int_str_names'](sdmxobj._elem, name=name)
        values = self._paths['int_str_values'](sdmxobj._elem, name=name)
        # Unilingual strings have no attributes. Assume 'en' instead.
        if not elem_attrib:
            elem_attrib = ['en']
        return DictLike(zip(elem_attrib, values))

    def header_error(self, sdmxobj):
        try:
            return DictLike(sdmxobj._elem.Error.attrib)
        except AttributeError:
            return None

    def dim_at_obs(self, sdmxobj):
        if len(self._obs_dim) > 1:
            return 'AllDimensions'
        else:
            return self._obs_dim[0]['id']

    def structured_by(self, sdmxobj):
        return None  # complete this

    # Types for generic observations
    _ObsTuple = namedtuple_factory('GenericObservation',
                                   ('key', 'value', 'attrib'))
    _SeriesObsTuple = namedtuple_factory('SeriesObservation',
                                         ('dim', 'value', 'attrib'))

    # Operators
    getitem0 = itemgetter(0)
    getitem_key = itemgetter('_key')

    def iter_generic_obs(self, sdmxobj, with_value, with_attributes):
        # Make type namedtuple for obs_key. It must be
        # merged with any dimension values at dataset level maintaining the
        # key position order.
        # Note that the measure dimension (such as TIME_PERIOD) has no key position.
        # We fill this gap by injecting the highest key position.
        _obs_dim_key = {
            dim.get('keyPosition', self._key_len - 1): dim['id']
            for dim in self._obs_dim
        }
        _GenericObsKey = namedtuple_factory(
            'GenericObservationKey',
            (self._dataset_dim_key.get(d) or _obs_dim_key.get(d)
             for d in range(self._key_len)))
        obs_l = sorted(sdmxobj._elem.value['observations'].items(),
                       key=self.getitem0)
        for dim, value in obs_l:
            # Construct the key for this observation
            key_idx = [int(i) for i in dim.split(':')]
            obs_key_values = [
                d['values'][i]['id'] for i, d in zip(key_idx, self._obs_dim)
            ]
            obs_key = _GenericObsKey._make(
                self._dataset_dim_values.get(d) or obs_key_values.pop(0)
                for d in range(self._key_len))

            # Read the value
            obs_value = value[0] if with_value else None

            # Read any attributes
            if with_attributes and len(value) > 1:
                obs_attr_idx = value[1:]
                obs_attr_raw = [(d['id'], d['values'][i].get('id'))
                                for i, d in zip(obs_attr_idx, self._obs_attrib)
                                ]
                if obs_attr_raw:
                    obs_attr_id, obs_attr_values = zip(*obs_attr_raw)
                    obs_attr_type = namedtuple_factory('ObsAttributes',
                                                       obs_attr_id)
                    obs_attr = obs_attr_type(*obs_attr_values)
                else:
                    obs_attr = None
            else:
                obs_attr = None
            yield self._SeriesObsTuple(obs_key, obs_value, obs_attr)

    def generic_series(self, sdmxobj):
        for key, series in sdmxobj._elem.value['series'].items():
            series['_key'] = key
        for series in sorted(sdmxobj._elem.value['series'].values(),
                             key=self.getitem_key):
            yield model.Series(self, series, dataset=sdmxobj)

    def generic_groups(self, sdmxobj):
        return []

    def series_key(self, sdmxobj):
        key_idx = [int(i) for i in sdmxobj._elem['_key'].split(':')]
        series_key_values = [
            d['values'][i]['id'] for i, d in zip(key_idx, self._series_dim)
        ]
        full_key_values = [
            self._dataset_dim_values.get(d) or series_key_values.pop(0)
            for d in range(self._key_len)
        ]
        return self.SeriesKeyTuple._make(full_key_values)

    def group_key(self, sdmxobj):
        group_key_id = self._paths['group_key_id_path'](sdmxobj._elem)
        group_key_values = self._paths['group_key_values_path'](sdmxobj._elem)
        GroupKeyTuple = namedtuple_factory('GroupKey', group_key_id)
        return GroupKeyTuple._make(group_key_values)

    def dataset_attrib(self, sdmxobj):
        value_idx = sdmxobj._elem.value.get('attributes')
        if value_idx:
            attrib_list = [(a['id'],
                            a['values'][i].get('id', a['values'][i]['name']))
                           for i, a in zip(value_idx, self._dataset_attrib)
                           if i is not None]
            attrib_ids, attrib_values = zip(*attrib_list)
            return namedtuple_factory('Attrib', attrib_ids)(*attrib_values)

    def series_attrib(self, sdmxobj):
        value_idx = sdmxobj._elem.get('attributes')
        if value_idx:
            attrib_list = [(a['id'],
                            a['values'][i].get('id', a['values'][i]['name']))
                           for i, a in zip(value_idx, self._series_attrib)
                           if i is not None]
            attrib_ids, attrib_values = zip(*attrib_list)
            return namedtuple_factory('Attrib', attrib_ids)(*attrib_values)

    def iter_generic_series_obs(self,
                                sdmxobj,
                                with_value,
                                with_attributes,
                                reverse_obs=False):
        obs_l = sorted(sdmxobj._elem['observations'].items(),
                       key=self.getitem0,
                       reverse=reverse_obs)
        for obs in obs_l:
            # value for dim at obs, e.g. '2014' for time series.
            # As this method is called only when each obs has but one dimension, we
            # it is at index 0.
            obs_dim_value = self._obs_dim[0]['values'][int(obs[0])]['id']
            obs_value = obs[1][0] if with_value else None
            if with_attributes and len(obs[1]) > 1:
                obs_attr_idx = obs[1][1:]
                obs_attr_raw = [(d['id'], d['values'][i].get('id'))
                                for i, d in zip(obs_attr_idx, self._obs_attrib)
                                if i is not None]
                if obs_attr_raw:
                    obs_attr_id, obs_attr_values = zip(*obs_attr_raw)
                    obs_attr_type = namedtuple_factory('ObsAttributes',
                                                       obs_attr_id)
                    obs_attr = obs_attr_type(*obs_attr_values)
                else:
                    obs_attr = None
            else:
                obs_attr = None
            yield self._SeriesObsTuple(obs_dim_value, obs_value, obs_attr)
示例#30
0
 def series_attrib(self, sdmxobj):
     attr_id = self._paths['attr_id_path'](sdmxobj._elem)
     attr_values = self._paths['attr_values_path'](sdmxobj._elem)
     return namedtuple_factory('Attrib', attr_id)(*attr_values)
示例#31
0
 def group_key(self, sdmxobj):
     group_key_id = self._paths['group_key_id_path'](sdmxobj._elem)
     group_key_values = self._paths['group_key_values_path'](sdmxobj._elem)
     GroupKeyTuple = namedtuple_factory('GroupKey', group_key_id)
     return GroupKeyTuple._make(group_key_values)
示例#32
0
 def series_key(self, sdmxobj):
     series_key_id = self._paths['series_key_id_path'](sdmxobj._elem)
     series_key_values = self._paths['series_key_values_path'](
         sdmxobj._elem)
     SeriesKeyTuple = namedtuple_factory('SeriesKey', series_key_id)
     return SeriesKeyTuple._make(series_key_values)
示例#33
0
class Reader(BaseReader):
    """
    Read SDMX-ML 2.1 and expose it as instances from pandasdmx.model
    """

    _nsmap = {
        'com':
        'http://www.sdmx.org/resources/sdmxml/schemas/v2_1/common',
        'str':
        'http://www.sdmx.org/resources/sdmxml/schemas/v2_1/structure',
        'mes':
        'http://www.sdmx.org/resources/sdmxml/schemas/v2_1/message',
        'gen':
        'http://www.sdmx.org/resources/sdmxml/schemas/v2_1/data/generic',
        'footer':
        'http://www.sdmx.org/resources/sdmxml/schemas/v2_1/message/footer'
    }

    def initialize(self, source):
        tree = etree.parse(source)
        root = tree.getroot()
        if root.tag.endswith('Structure'):
            cls = model.StructureMessage
        elif root.tag.endswith('Data'):
            cls = model.DataMessage
        else:
            raise ValueError('Unsupported root tag: %s' % root.tag)
        self.message = cls(self, root)
        return self.message

    # flag to prevent multiple compiling. See BaseReader.__init__
    _compiled = False

    def write_source(self, filename):
        '''
        Save XML source to file by calling `write` on the root element.
        '''
        return self.message._elem.getroottree().write(filename,
                                                      encoding='utf8')

    _paths = {
        'footer_text': 'com:Text/text()',
        'footer_code': '@code',
        'footer_severity': '@severity',
        'dataflow_from_msg': 'mes:Structures/str:Dataflows',
        'constraint_attachment': 'str:ConstraintAttachment',
        'include': '@include',
        'id': '@id',
        'urn': '@urn',
        'url': '@url',
        'uri': '@uri',
        'agencyID': '@agencyID',
        'maintainable_parent_id': '@maintainableParentID',
        'value': 'com:Value/text()',
        'headerID': 'mes:ID/text()',
        'header_prepared': 'mes:Prepared/text()',
        'header_sender': 'mes:Sender/@*',
        'header_receiver': 'mes:Receiver/@*',
        'assignment_status': '@assignmentStatus',
        'error': 'mes:error/@*',
        'ref_version': '@version',
        'concept_identity': 'str:ConceptIdentity',
        'position': '@position',
        'isfinal': '@isfinal',
        'ref_package': '@package',
        'ref_class': '@class',
        'ref_target': 'str:Target',
        'ref_source': 'str:Source',
        'ref_structure': 'str:Structure',
        'annotationtype': 'com:AnnotationType/text()',
        'structured_by': 'mes:Structure/@structureID',
        'dim_at_obs': '//mes:Header/mes:Structure/@dimensionAtObservation',
        'generic_obs_path': 'gen:Obs',
        'obs_key_id_path': 'gen:ObsKey/gen:Value/@id',
        'obs_key_values_path': 'gen:ObsKey/gen:Value/@value',
        'series_key_values_path': 'gen:SeriesKey/gen:Value/@value',
        'series_key_id_path': 'gen:SeriesKey/gen:Value/@id',
        'generic_series_dim_path': 'gen:ObsDimension/@value',
        'group_key_values_path': 'gen:GroupKey/gen:Value/@value',
        'group_key_id_path': 'gen:GroupKey/gen:Value/@id',
        'obs_value_path': 'gen:ObsValue/@value',
        'attr_id_path': 'gen:Attributes/gen:Value/@id',
        'attr_values_path': 'gen:Attributes/gen:Value/@value',
        model.Code: 'str:Code',
        model.Categorisation: 'str:Categorisation',
        model.CategoryScheme:
        'mes:Structures/str:CategorySchemes/str:CategoryScheme',
        model.DataStructureDefinition:
        'mes:Structures/str:DataStructures/str:DataStructure',
        model.DataflowDefinition: 'str:Dataflow',
        model.ConceptScheme: 'mes:Structures/str:Concepts/str:ConceptScheme',
        model.ContentConstraint:
        'mes:Structures/str:Constraints/str:ContentConstraint',
        model.Concept: 'str:Concept',
        model.Codelist: 'mes:Structures/str:Codelists/str:Codelist',
        model.Categorisations: 'mes:Structures/str:Categorisations',
        model.Footer: 'footer:Footer/footer:Message',
        model.Category: 'str:Category',
        model.DimensionDescriptor:
        'str:DataStructureComponents/str:DimensionList',
        model.Dimension: 'str:Dimension',
        model.TimeDimension: 'str:TimeDimension',
        model.MeasureDimension: 'str:MeasureDimension',
        model.MeasureDescriptor: 'str:DataStructureComponents/str:MeasureList',
        model.PrimaryMeasure: 'str:PrimaryMeasure',
        model.AttributeDescriptor:
        'str:DataStructureComponents/str:AttributeList',
        model.DataAttribute: 'str:Attribute',
        model.CubeRegion: 'str:CubeRegion',
        model.KeyValue: 'com:KeyValue',
        model.Ref: 'Ref',
        model.Header: 'mes:Header',
        model.Annotation: 'com:Annotations/com:Annotation',
        model.Group: 'gen:Group',
        model.Series: 'gen:Series',
        model.DataSet: 'mes:DataSet',
        'int_str_names': './*[local-name() = $name]/@xml:lang',
        model.Representation: 'str:LocalRepresentation',
        'int_str_values': './*[local-name() = $name]/text()',
        'enumeration': 'str:Enumeration',
        'texttype': 'str:TextFormat/@textType',
        'maxlength': 'str:TextFormat/@maxLength',
        # need this? It is just a non-offset Ref
        'attr_relationship': '*/Ref/@id',
        'cat_scheme_id': '../@id'
    }

    @classmethod
    def _compile_paths(cls):
        for key, path in cls._paths.items():
            cls._paths[key] = XPath(path,
                                    namespaces=cls._nsmap,
                                    smart_strings=False)

    def international_str(self, name, sdmxobj):
        '''
        return DictLike of xml:lang attributes. If node has no attributes,
        assume that language is 'en'.
        '''
        # Get language tokens like 'en', 'fr'...
        elem_attrib = self._paths['int_str_names'](sdmxobj._elem, name=name)
        values = self._paths['int_str_values'](sdmxobj._elem, name=name)
        # Unilingual strings have no attributes. Assume 'en' instead.
        if not elem_attrib:
            elem_attrib = ['en']
        return DictLike(zip(elem_attrib, values))

    def header_error(self, sdmxobj):
        try:
            return DictLike(sdmxobj._elem.Error.attrib)
        except AttributeError:
            return None

    def dim_at_obs(self, sdmxobj):
        return self.read_as_str('dim_at_obs', sdmxobj)

    def structured_by(self, sdmxobj):
        return self.read_as_str('structured_by', sdmxobj)

    # Types for generic observations
    _ObsTuple = namedtuple_factory('GenericObservation',
                                   ('key', 'value', 'attrib'))
    _SeriesObsTuple = namedtuple_factory('SeriesObservation',
                                         ('dim', 'value', 'attrib'))

    def iter_generic_obs(self, sdmxobj, with_value, with_attributes):
        for obs in self._paths['generic_obs_path'](sdmxobj._elem):
            # Construct the namedtuple for the ObsKey.
            # The namedtuple class is created on first iteration.
            obs_key_values = self._paths['obs_key_values_path'](obs)
            try:
                obs_key = ObsKeyTuple._make(obs_key_values)
            except NameError:
                obs_key_id = self._paths['obs_key_id_path'](obs)
                ObsKeyTuple = namedtuple_factory('ObsKey', obs_key_id)
                obs_key = ObsKeyTuple._make(obs_key_values)
            if with_value:
                obs_value = self._paths['obs_value_path'](obs)[0]
            else:
                obs_value = None
            if with_attributes:
                obs_attr_values = self._paths['attr_values_path'](obs)
                obs_attr_id = self._paths['attr_id_path'](obs)
                obs_attr_type = namedtuple_factory('ObsAttributes',
                                                   obs_attr_id)
                obs_attr = obs_attr_type(*obs_attr_values)
            else:
                obs_attr = None
            yield self._ObsTuple(obs_key, obs_value, obs_attr)

    def generic_series(self, sdmxobj):
        path = self._paths[model.Series]
        for series in path(sdmxobj._elem):
            yield model.Series(self, series, dataset=sdmxobj)

    def generic_groups(self, sdmxobj):
        path = self._paths[model.Group]
        for series in path(sdmxobj._elem):
            yield model.Group(self, series)

    def series_key(self, sdmxobj):
        series_key_id = self._paths['series_key_id_path'](sdmxobj._elem)
        series_key_values = self._paths['series_key_values_path'](
            sdmxobj._elem)
        SeriesKeyTuple = namedtuple_factory('SeriesKey', series_key_id)
        return SeriesKeyTuple._make(series_key_values)

    def group_key(self, sdmxobj):
        group_key_id = self._paths['group_key_id_path'](sdmxobj._elem)
        group_key_values = self._paths['group_key_values_path'](sdmxobj._elem)
        GroupKeyTuple = namedtuple_factory('GroupKey', group_key_id)
        return GroupKeyTuple._make(group_key_values)

    def series_attrib(self, sdmxobj):
        attr_id = self._paths['attr_id_path'](sdmxobj._elem)
        attr_values = self._paths['attr_values_path'](sdmxobj._elem)
        return namedtuple_factory('Attrib', attr_id)(*attr_values)

    dataset_attrib = series_attrib

    def iter_generic_series_obs(self,
                                sdmxobj,
                                with_value,
                                with_attributes,
                                reverse_obs=False):
        for obs in sdmxobj._elem.iterchildren(
                '{http://www.sdmx.org/resources/sdmxml/schemas/v2_1/data/generic}Obs',
                reversed=reverse_obs):
            obs_dim = self._paths['generic_series_dim_path'](obs)[0]
            if with_value:
                obs_value = self._paths['obs_value_path'](obs)[0]
            else:
                obs_value = None
            if with_attributes:
                obs_attr_values = self._paths['attr_values_path'](obs)
                obs_attr_id = self._paths['attr_id_path'](obs)
                obs_attr_type = namedtuple_factory('ObsAttributes',
                                                   obs_attr_id)
                obs_attr = obs_attr_type(*obs_attr_values)
            else:
                obs_attr = None
            yield self._SeriesObsTuple(obs_dim, obs_value, obs_attr)
示例#34
0
文件: api.py 项目: dr-leo/pandaSDMX
    def preview_data(self, flow_id, key=None, count=True, total=True, dsd=None):
        '''
        Get keys or number of series for a prospective dataset query allowing for
        keys with multiple values per dimension.
        It downloads the complete list of series keys for a dataflow rather than using constraints and DSD. This feature is,
        however, not supported by all data providers.
        ECB, IMF_SDMXCENTRAL and UNSD are known to work.

        Args:

        flow_id(str): dataflow id

        key(dict): optional key mapping dimension names to values or lists of values.
            Must have been validated before. It is not checked if key values
            are actually valid dimension names and values. Default: {}

        count(bool): if True (default), return the number of series
            of the dataset designated by flow_id and key. If False,
            the actual keys are returned as a pandas DataFrame or dict of dataframes, depending on
            the value of 'total'.

        total(bool): if True (default), return the aggregate number
            of series or a single dataframe (depending on the value of 'count'). If False,
            return a dict mapping keys to dataframes of series keys.
            E.g., if key={'COUNTRY':'IT+CA+AU'}, the dict will
            have 3 items describing the series keys for each country
            respectively. If 'count' is True, dict values will be int rather than
            PD.DataFrame.
        '''
        all_keys = self.series_keys(flow_id, dsd=dsd)
        # Handle the special case that no key is provided
        if not key:
            if count:
                return all_keys.shape[0]
            else:
                return all_keys

        # So there is a key specifying at least one dimension value.
        # Wrap single values in 1-elem list for uniform treatment
        key_l = self.prepare_key(key)
        # order dim_names that are present in the key
        dim_names = [k for k in all_keys if k in key]
        # Drop columns that are not in the key
        key_df = all_keys.loc[:, dim_names]
        if total:
            # DataFrame with matching series keys
            bool_series = reduce(
                and_, (key_df.isin(key_l)[col] for col in dim_names))
            if count:
                return bool_series.value_counts()[True]
            else:
                return all_keys[bool_series]
        else:
            # Dict of value combinations as dict keys
            key_product = product(*(key_l[k] for k in dim_names))
            # Replace key tuples by namedtuples
            PartialKey = namedtuple_factory('PartialKey', dim_names)

            matches = {PartialKey(k): reduce(and_, (key_df.isin({k1: [v1]
                                                                 for k1, v1 in zip(dim_names, k)})[col]
                                                    for col in dim_names))
                       for k in key_product}

            if not count:
                # dict mapping each key to DataFrame with selected key-set
                return {k: all_keys[v] for k, v in matches.items()}
            else:
                # Number of series per key
                return {k: v.value_counts()[True] for k, v in matches.items()}
示例#35
0
class Reader(BaseReader):
    """
    Read SDMXJSON 2.1 and expose it as instances from pandasdmx.model
    """
    def read_as_str(self, name, sdmxobj, first_only=True):
        result = self._paths[name](sdmxobj._elem)
        if result:
            if first_only:
                return result[0].value
            else:
                return [r.value for r in result]

    def initialize(self, source):
        tree = json.load(source)
        cls = model.DataMessage
        self.message = cls(self, tree)
        return self.message

    # flag to prevent multiple compiling. See BaseReader.__init__
    _compiled = False

    def write_source(self, filename):
        '''
        Save source to file by calling `write` on the root element.
        '''
        return json.dumps(self.message._elem, filename)

    _paths = {
        #         'footer_text': 'com:Text/text()',
        #         'footer_code': '@code',
        #         'footer_severity': '@severity',
        #         'dataflow_from_msg': 'mes:Structures/str:Dataflows',
        #         'constraint_attachment': 'str:ConstraintAttachment',
        #         'include': '@include',
        #         'id': '@id',
        #         'urn': '@urn',
        #         'url': '@url',
        #         'uri': '@uri',
        #         'agencyID': '@agencyID',
        #         'maintainable_parent_id': '@maintainableParentID',
        #         'value': 'com:Value/text()',
        'headerID':
        'id',
        #         'header_prepared': 'mes:Prepared/text()',
        #         'header_sender': 'mes:Sender/@*',
        #         'header_receiver': 'mes:Receiver/@*',
        #         'assignment_status': '@assignmentStatus',
        #         'error': 'mes:error/@*',
        #         'ref_version': '@version',
        #         'concept_id': 'str:ConceptIdentity',
        #         'position': '@position',
        #         'isfinal': '@isfinal',
        #         'ref_package': '@package',
        #         'ref_class': '@class',
        #         'ref_target': 'str:Target',
        #         'ref_source': 'str:Source',
        #         'ref_structure': 'str:Structure',
        #         'annotationtype': 'com:AnnotationType/text()',
        'structured_by':
        '$.structure.links',
        'dim_at_obs':
        '$.structure.dimensions.observations',
        #         'generic_obs_path': 'gen:Obs',
        #         'obs_key_id_path': 'gen:ObsKey/gen:Value/@id',
        #         'obs_key_values_path': 'gen:ObsKey/gen:Value/@value',
        #         'series_key_values_path': 'gen:SeriesKey/gen:Value/@value',
        #         'series_key_id_path':        'gen:SeriesKey/gen:Value/@id',
        #         'generic_series_dim_path': 'gen:ObsDimension/@value',
        #         'group_key_values_path': 'gen:GroupKey/gen:Value/@value',
        #         'group_key_id_path': 'gen:GroupKey/gen:Value/@id',
        #         'obs_value_path': 'gen:ObsValue/@value',
        #         'attr_id_path': 'gen:Attributes/gen:Value/@id',
        #         'attr_values_path': 'gen:Attributes/gen:Value/@value',
        #         model.Code: 'str:Code',
        #         model.Categorisation: 'str:Categorisation',
        #         model.CategoryScheme: 'mes:Structures/str:CategorySchemes/str:CategoryScheme',
        #         model.DataStructureDefinition: 'mes:Structures/str:DataStructures/str:DataStructure',
        #         model.DataflowDefinition: 'str:Dataflow',
        #         model.ConceptScheme: 'mes:Structures/str:Concepts/str:ConceptScheme',
        #         model.ContentConstraint: 'mes:Structures/str:Constraints/str:ContentConstraint',
        #         model.Concept: 'str:Concept',
        #         model.Codelist: 'mes:Structures/str:Codelists/str:Codelist',
        #         model.Categorisations: 'mes:Structures/str:Categorisations',
        model.Footer:
        'footer.message',
        #         model.Category: 'str:Category',
        #         model.DimensionDescriptor: 'str:DataStructureComponents/str:DimensionList',
        #         model.Dimension: 'str:Dimension',
        #         model.TimeDimension: 'str:TimeDimension',
        #         model.MeasureDimension: 'str:MeasureDimension',
        #         model.MeasureDescriptor: 'str:DataStructureComponents/str:MeasureList',
        #         model.PrimaryMeasure: 'str:PrimaryMeasure',
        #         model.AttributeDescriptor: 'str:DataStructureComponents/str:AttributeList',
        #         model.DataAttribute: 'str:Attribute',
        #         model.CubeRegion: 'str:CubeRegion',
        #         model.KeyValue: 'com:KeyValue',
        #         model.Ref: 'Ref',
        model.Header:
        'header',
        #         model.Annotation: 'com:Annotations/com:Annotation',
        #         model.Group: 'gen:Group',
        #         model.Series: 'gen:Series',
        model.DataSet:
        'dataSets[0]',
        #         'int_str_names': './*[local-name() = $name]/@xml:lang',
        #         model.Representation: 'str:LocalRepresentation',
        #         'int_str_values': './*[local-name() = $name]/text()',
        #         'enumeration': 'str:Enumeration',
        #         'texttype': 'str:TextFormat/@textType',
        #         'maxlength': 'str:TextFormat/@maxLength',
        #         # need this? It is just a non-offset Ref
        #         'attr_relationship': '*/Ref/@id',
    }

    @classmethod
    def _compile_paths(cls):
        for key, path in cls._paths.items():
            cls._paths[key] = XPath(path)

    def international_str(self, name, sdmxobj):
        '''
        return DictLike of xml:lang attributes. If node has no attributes,
        assume that language is 'en'.
        '''
        # Get language tokens like 'en', 'fr'...
        elem_attrib = self._paths['int_str_names'](sdmxobj._elem, name=name)
        values = self._paths['int_str_values'](sdmxobj._elem, name=name)
        # Unilingual strings have no attributes. Assume 'en' instead.
        if not elem_attrib:
            elem_attrib = ['en']
        return DictLike(zip(elem_attrib, values))

    def header_error(self, sdmxobj):
        try:
            return DictLike(sdmxobj._elem.Error.attrib)
        except AttributeError:
            return None

    # Types for generic observations
    _ObsTuple = namedtuple_factory('GenericObservation',
                                   ('key', 'value', 'attrib'))
    _SeriesObsTuple = namedtuple_factory('SeriesObservation',
                                         ('dim', 'value', 'attrib'))

    def iter_generic_obs(self, sdmxobj, with_value, with_attributes):
        for obs in self._paths['generic_obs_path'](sdmxobj._elem):
            # Construct the namedtuple for the ObsKey.
            # The namedtuple class is created on first iteration.
            obs_key_values = self._paths['obs_key_values_path'](obs)
            try:
                obs_key = ObsKeyTuple._make(obs_key_values)
            except NameError:
                obs_key_id = self._paths['obs_key_id_path'](obs)
                ObsKeyTuple = namedtuple_factory('ObsKey', obs_key_id)
                obs_key = ObsKeyTuple._make(obs_key_values)
            if with_value:
                obs_value = self._paths['obs_value_path'](obs)[0]
            else:
                obs_value = None
            if with_attributes:
                obs_attr_values = self._paths['attr_values_path'](obs)
                obs_attr_id = self._paths['attr_id_path'](obs)
                obs_attr_type = namedtuple_factory('ObsAttributes',
                                                   obs_attr_id)
                obs_attr = obs_attr_type(*obs_attr_values)
            else:
                obs_attr = None
            yield self._ObsTuple(obs_key, obs_value, obs_attr)

    @staticmethod
    def getitem_key(obj):
        return obj.value['_key']

    def generic_series(self, sdmxobj):
        for key, series in sdmxobj._elem.value['series'].items():
            series['_key'] = key
        for series in sorted(parse('series.*').find(sdmxobj._elem),
                             key=self.getitem_key):
            yield model.Series(self, series, dataset=sdmxobj)

    def generic_groups(self, sdmxobj):
        return []

    def series_key(self, sdmxobj):
        # pull down dataset key
        dataset_dim = parse('$.structure.dimensions.dataSet[*]').find(
            sdmxobj._elem)
        full_key_ids = [d.value['id'] for d in dataset_dim]
        full_key_values = [d.value['values'][0]['id'] for d in dataset_dim]
        key_idx = [int(i) for i in sdmxobj._elem.value['_key'].split(':')]
        struct_dim = parse('$.structure.dimensions.series').find(
            sdmxobj._elem)[0].value
        series_key_ids = [d['id'] for d in struct_dim]
        series_key_values = [
            d['values'][i]['id'] for i, d in zip(key_idx, struct_dim)
        ]
        full_key_ids.extend(series_key_ids)
        full_key_values.extend(series_key_values)
        SeriesKeyTuple = namedtuple_factory('SeriesKey', full_key_ids)
        return SeriesKeyTuple._make(full_key_values)

    def group_key(self, sdmxobj):
        group_key_id = self._paths['group_key_id_path'](sdmxobj._elem)
        group_key_values = self._paths['group_key_values_path'](sdmxobj._elem)
        GroupKeyTuple = namedtuple_factory('GroupKey', group_key_id)
        return GroupKeyTuple._make(group_key_values)

    def dataset_attrib(self, sdmxobj):
        value_idx = sdmxobj._elem.value.get('attributes')
        if value_idx:
            struct_attrib = parse('$.structure.attributes.dataset').find(
                sdmxobj._elem)[0].value
            return [(a['id'], a['values'][i].get('id', a['values'][i]['name']))
                    for i, a in zip(value_idx, struct_attrib)]

    def series_attrib(self, sdmxobj):
        value_idx = sdmxobj._elem.value.get('attributes')
        if value_idx:
            struct_attrib = parse('$.structure.attributes.series').find(
                sdmxobj._elem)[0].value
            return [(a['id'], a['values'][i].get('id', a['values'][i]['name']))
                    for i, a in zip(value_idx, struct_attrib)]

    getitem0 = itemgetter(0)

    def iter_generic_series_obs(self,
                                sdmxobj,
                                with_value,
                                with_attributes,
                                reverse_obs=False):
        obs_l = sorted(sdmxobj._elem.value['observations'].items(),
                       key=self.getitem0,
                       reverse=reverse_obs)
        obs_dim_l = parse('$.structure.dimensions.observation[*]').find(
            sdmxobj._elem)
        for obs in obs_l:
            obs_dim = obs_dim_l[0].value['values'][int(obs[0])]['id']
            if with_value:
                obs_value = obs[1][0]
            else:
                obs_value = None
            if with_attributes:
                obs_attr_values = self._paths['attr_values_path'](obs)
                obs_attr_id = self._paths['attr_id_path'](obs)
                obs_attr_type = namedtuple_factory('ObsAttributes',
                                                   obs_attr_id)
                obs_attr = obs_attr_type(*obs_attr_values)
            else:
                obs_attr = None
            yield self._SeriesObsTuple(obs_dim, obs_value, obs_attr)
示例#36
0
    def preview_data(self, flow_id, key=None, count=True, total=True):
        '''
        Get keys or number of series for a prospective dataset query allowing for
        keys with multiple values per dimension.
        It downloads the complete list of series keys for a dataflow rather than using constraints and DSD. This feature is,
        however, not supported by all data providers.
        ECB and UNSD are known to work.

        Args:

        flow_id(str): dataflow id

        key(dict): optional key mapping dimension names to values or lists of values.
            Must have been validated before. It is not checked if key values
            are actually valid dimension names and values. Default: {}

        count(bool): if True (default), return the number of series
            of the dataset designated by flow_id and key. If False,
            the actual keys are returned as a pandas DataFrame or dict of dataframes, depending on
            the value of 'total'.

        total(bool): if True (default), return the aggregate number
            of series or a single dataframe (depending on the value of 'count'). If False,
            return a dict mapping keys to dataframes of series keys.
            E.g., if key={'COUNTRY':'IT+CA+AU'}, the dict will
            have 3 items describing the series keys for each country
            respectively. If 'count' is True, dict values will be int rather than
            PD.DataFrame.
        '''
        all_keys = self.series_keys(flow_id)
        # Handle the special case that no key is provided
        if not key:
            if count:
                return all_keys.shape[0]
            else:
                return all_keys

        # So there is a key specifying at least one dimension value.
        # Wrap single values in 1-elem list for uniform treatment
        key_l = {
            k: [v] if isinstance(v, str_type) else v
            for k, v in key.items()
        }
        # order dim_names that are present in the key
        dim_names = [k for k in all_keys if k in key]
        # Drop columns that are not in the key
        key_df = all_keys.loc[:, dim_names]
        if total:
            # DataFrame with matching series keys
            bool_series = reduce(and_, (key_df.isin(key_l)[col]
                                        for col in dim_names))
            if count:
                return bool_series.value_counts()[True]
            else:
                return all_keys[bool_series]
        else:
            # Dict of value combinations as dict keys
            key_product = product(*(key_l[k] for k in dim_names))
            # Replace key tuples by namedtuples
            PartialKey = namedtuple_factory('PartialKey', dim_names)

            matches = {
                PartialKey(k):
                reduce(and_,
                       (key_df.isin({k1: [v1]
                                     for k1, v1 in zip(dim_names, k)})[col]
                        for col in dim_names))
                for k in key_product
            }

            if not count:
                # dict mapping each key to DataFrame with selected key-set
                return {k: all_keys[v] for k, v in matches.items()}
            else:
                # Number of series per key
                return {k: v.value_counts()[True] for k, v in matches.items()}