def _lbl2pos(self, lbl): """from label to position :param lbl: index for ex.: "2013" :returns: integer """ if not self.__valid: raise JsonStatException("dimension '{}': is not initialized".format(self.__did)) if lbl not in self.__idx2cat: raise JsonStatException("dimension '{}': do not have label {}".format(self.__did, lbl)) return self.__lbl2cat[lbl].pos
def _idx2pos(self, idx): """from index to position :param idx: index for ex.: "2013" :returns: integer """ if not self.__valid: raise JsonStatException("dimension '{}': is not initialized".format(self.__did)) if idx not in self.__idx2cat: raise JsonStatException("dimension '{}': do not have index '{}'".format(self.__did, idx)) return self.__idx2cat[idx].pos
def all_pos(self, blocked_dims={}, order=None): """all_pos doc :param blocked_dims: {"year":2013, country:"IT"} :param order: order :returns: """ nr_dim = len(self.__pos2dim) if order is not None: if len(order) != nr_dim: msg = "length of the order vector is different from number of dimension {}".format( nr_dim) raise JsonStatException(msg) if not isinstance(order[1], int): order = [self.__did2dim[iid].pos for iid in order] vec_pos_blocked = nr_dim * [False] vec_pos = nr_dim * [0] for (cat, idx) in blocked_dims.items(): d = self.dimension(cat) vec_pos_blocked[d.pos] = True vec_pos[d.pos] = d._idx2pos(idx) pos2size = self.__pos2size if order is None: vec_dimension_reorder = range(nr_dim) else: vec_dimension_reorder = order nrd = nr_dim - 1 while nrd >= 0: yield list(vec_pos) # make a shallow copy of vec_pos nrd = nr_dim - 1 cur_dim = vec_dimension_reorder[nrd] # se la posizione non e bloccata allora puoi far andare avanti la cifra if not vec_pos_blocked[cur_dim]: vec_pos[cur_dim] += 1 # se non si arrivati all'ultima dimensione # e se la dimensione corrente non e al massimo valore o se la dimensione corrente e bloccata while nrd >= 0 and \ (vec_pos[cur_dim] == pos2size[cur_dim] or vec_pos_blocked[cur_dim]): # se la posizione non e' bloccata allora puoi far partire il valore dall'inizio if not vec_pos_blocked[cur_dim]: vec_pos[cur_dim] = 0 # esamina la prossima posizione nrd -= 1 # se la dimensione corrente non e' la prima if nrd >= 0: cur_dim = vec_dimension_reorder[nrd] # se la dimensione corrente non e bloccata puoi farla avanzare if not vec_pos_blocked[cur_dim]: vec_pos[cur_dim] += 1
def dcat_to_lint(self, dims): """Transforms a dimension dict to dimension array :: {"country":"AU", "year":2014} -> [1,2,3] :param dims: keys are dimension (id or label), value are categories "country" is the id of dimension "AU" is the category of dimension :returns: a list of integer """ apos = len(self.__pos2dim) * [0] for (cat, val) in dims.items(): # key is id if cat in self.__did2dim: dim = self.__did2dim[cat] # key is label elif cat in self.__lbl2dim: dim = self.__lbl2dim[cat] # key is not id or label so raise error else: allowed_categories = ", ".join( ["'{}'".format(dim.did) for dim in self.__pos2dim]) msg = "dataset '{}': category '{}' don't exists allowed categories are: {}" msg = msg.format(self.__name, cat, allowed_categories) raise JsonStatException(msg) apos[dim.pos] = dim.category(val).pos return apos
def _2idx(self, *args, **kargs): """convert args to integer index """ if len(args) == 1: # data(int) if isinstance(args[0], int): return args[0] # data([i1,i2,i3]) elif isinstance(args[0], list): idx = self.lint_as_idx(args[0]) return idx # data({k1:v1, k2:v2}) elif isinstance(args[0], dict): dims = args[0] apos = self.dcat_to_lint(dims) idx = self.lint_as_idx(apos) return idx elif len(args) == 0: # data(k1:v1, k2:v2) dims = kargs # print(dims) apos = self.dcat_to_lint(dims) # print(apos) idx = self.lint_as_idx(apos) # print(idx) return idx msg = "unexpected parameters" raise JsonStatException(msg)
def __parse_json_index_helper(self, idx, pos): if pos >= self.__size: msg = "dimension '{}': index {} is greater than size {}" msg = msg.format(self.__did, pos, self.__size) raise JsonStatException(msg) cat = JsonStatCategory(pos=pos, index=idx, label=None) self.__pos2cat[pos] = cat self.__idx2cat[idx] = cat
def data(self, *args, **kargs): """Returns a JsonStatValue containings value and status about a datapoint The datapoint will be retrieved according the parameters :param args: - data(<int>) where i is index into the - data(<list>) where lst = [i1,i2,i3,...]) each i indicate the dimension len(lst) == number of dimension - data(<dict>) where dict is {k1:v1, k2:v2, ...} dimension of size 1 can be ommitted :param kargs: - data(k1=v1,k2=v2,...) where **ki** are the id or label of dimension **vi** are the index or label of the category dimension of size 1 can be ommitted :returns: a JsonStatValue object kargs { cat1:value1, ..., cati:valuei, ... } cati can be the id of the dimension or the label of dimension valuei can be the index or label of category ex.:{country:"AU", "year":"2014"} >>> import os, jsonstat # doctest: +ELLIPSIS >>> filename = os.path.join(jsonstat._examples_dir, "www.json-stat.org", "oecd-canada-col.json") >>> dataset = jsonstat.from_file(filename).dataset(0) >>> dataset.data(0) JsonStatValue(idx=0, value=5.943826289, status=None) >>> dataset.data(concept='UNR', area='AU', year='2003') JsonStatValue(idx=0, value=5.943826289, status=None) >>> dataset.data(area='AU', year='2003') JsonStatValue(idx=0, value=5.943826289, status=None) >>> dataset.data({'area':'AU', 'year':'2003'}) JsonStatValue(idx=0, value=5.943826289, status=None) """ if not self.__valid: raise JsonStatException('dataset not initialized') # decoding args idx = str(self._2idx(*args, **kargs)) value = self.__value[idx] # # status # if self.__status is None: status = None elif isinstance(self.__status, str): status = self.__status elif isinstance(self.__status, list) and len(self.__status) == 1: status = self.__status[0] elif isinstance(self.__status, dict) and idx not in self.__status: status = None else: status = self.__status[idx] return JsonStatValue(idx, value, status)
def _pos2cat(self, pos): """get the category associated with the position (integer) :param pos: integer :returns: the label or None if the label not exists at position pos ex.: JsonStatCategory(index='2013', label='2013', pos=pos) """ if not self.__valid: raise JsonStatException("dimension '{}': is not initialized".format(self.__did)) if self.__pos2cat is None: return None return self.__pos2cat[pos]
def category(self, spec): """return JsonStatCategory according to spec :param spec: can be index (string) or label (string) or a position (integer) :returns: a JsonStatCategory """ if not self.__valid: raise JsonStatException("dimension '{}': is not initialized".format(self.__did)) if isinstance(spec, int) and spec < len(self.__pos2cat): cat = self.__pos2cat[spec] return cat # try first indexes if spec in self.__idx2cat: cat = self.__idx2cat[spec] return cat if self.__lbl2cat is not None and spec in self.__lbl2cat: cat = self.__lbl2cat[spec] return cat raise JsonStatException("dimension '{}': unknown index or label '{}'".format(self.__did, spec))
def dimension(self, spec): """get a JsonStatDimension by spec :param spec: spec can be: - (string) or id of the dimension - int position of dimension :returns: a JsonStatDimension """ if type(spec) is int: return self.__pos2dim[spec] if spec not in self.__did2dim: msg = "dataset '{}': unknown dimension '{}' know dimensions ids are: {}" msg = msg.format(self.__name, spec, ", ".join([dim.did for dim in self.__pos2dim])) raise JsonStatException(msg) return self.__did2dim[spec]
def __write_page_to_cache(self, pathname, content): """write content to pathname :param pathname: :param content: """ if pathname is None: return # create cache directory only the fist time it is needed if not os.path.exists(self.__cache_dir): os.makedirs(self.__cache_dir) if not os.path.isdir(self.__cache_dir): msg = "cache_dir '{}' is not a directory".format(self.__cache_dir) raise JsonStatException(msg) # note: # in python 3 file must be open without b (binary) option to write string # otherwise the following error will be generated # TypeError: a bytes-like object is required, not 'str' with open(pathname, 'w') as f: f.write(content)
def __parse_dimensions(self, json_data_dimension, json_data_roles, pos2iid): """Parse dimension in json stat it used for format v1 and v2 :param json_data_dimension: :param json_data_roles: :returns: """ # parsing roles roles = {} if json_data_roles is not None: json_roles = json_data_roles for r in json_roles.items(): role = r[0] for dname in r[1]: roles[dname] = role # parsing each dimensions self.__pos2dim = self.__dim_nr * [None] for dpos, dname in enumerate(pos2iid): dsize = self.__pos2size[dpos] if dname not in json_data_dimension: msg = "dataset '{}': malformed json: missing key {} in dimension".format( self.__name, dname) raise JsonStatException(msg) dimension = JsonStatDimension(dname, dsize, dpos, roles.get(dname)) dimension.from_json(json_data_dimension[dname]) self.__did2dim[dname] = dimension self.__pos2dim[dpos] = dimension if dimension.label is not None: self.__lbl2dim[dimension.label] = dimension
def __parse_category(self, json_data_category): """It is used to describe the possible values of a dimension. See https://json-stat.org/format/#category :param json_data_category: :returns: jsonschema for dimension is about:: "category": { "type": "object", "properties": { "index": {"$ref": "#/definitions/category_index"}, "label": {"type": "object"}, "unit": {"$ref": "#/definitions/category_index"}, "child": {"type": "object", "properties": {"additionalProperties": {"type": "array"}}}, "coordinates": {"type": "object", "properties": {"additionalProperties": {"type": "array"}}}, "note": {"type": "array"} }, "additionalProperties": false }, """ # validate: label or index must be present if 'index' not in json_data_category and 'label' not in json_data_category: msg = "dimension '{}': one of keys 'label' or 'index' must be presents" raise JsonStatMalformedJson(msg) if 'index' in json_data_category: self.__parse_json_index(json_data_category) if 'label' in json_data_category: self.__parse_json_label(json_data_category) # validate: number of indexes and labels must the same?? if self.__idx2cat is not None and self.__lbl2cat is not None: if len(self.__idx2cat) != len(self.__lbl2cat): # TODO: cannot raise exception, emit warning see hierarchy.json msg = "dimension '{}': the number of indexes ({}) are different of the numbers of labels ({})" msg = msg.format(self.__did, len(self.__idx2cat), len(self.__lbl2cat)) # raise JsonStatMalformedJson(msg) if len(self.__idx2cat) < len(self.__lbl2cat): msg = "dimension '{}': the number of labels ({}) are greater than number of indexes ({})" msg = msg.format(self.__did, len(self.__lbl2cat), len(self.__idx2cat)) raise JsonStatMalformedJson(msg) # validate: indexes must be consistent with size if self.__size != len(self.__idx2cat): msg = "dimension '{}': malformed json: number of indexes {} not match with size {}" msg = msg.format(self.__did, len(self.__idx2cat), self.__size) raise JsonStatMalformedJson(msg) # validate: no hole in the indexes if any(v is None for v in self.__pos2cat): msg = "dimension '{}':hole in index".format(self.__did) raise JsonStatMalformedJson(msg) # "category_unit": { # "type": "object", # "properties": { # "additionalProperties": { # "type": "object", # "properties": {"label": {"type": "string"}, # "decimals": {"type": "number"}, # "type": {"type": "string"}, # "base": {"type": "string"}, # "multiplier": {"type": "number"}, # "position": {"type": "string"}}, # "additionalProperties": false # } # } # }, # TODO: parse 'unit' # "unit" : { # "exp" : { # "decimals": 1, # "label" : "millions", # "symbol" : "$", # "position" : "start" # } # } # "category" : { # "label" : { # "UNR" : "unemployment rate" # }, # "unit" : { # "UNR" : { # "label" : "%", # "decimals" : 9, # "type" : "ratio", # "base" : "per cent", # "multiplier" : 0 # } # } # } if 'unit' in json_data_category: if self.__role != "metric": msg = "dimension {}: 'unit' can be used only when role is 'metric'" msg = msg.format(self.__did) JsonStatException(msg) self.__unit = json_data_category['unit']