def test_get_pointer(root_table): child = add_child_table(root_table, "/tender/items", "tender", "items") child_child = add_child_table(child, "/tender/items/additionalClassifications", "items", "additionalClassifications") pointer = get_pointer( child_child, "/tender/items/0/additionalClassifications/0/id", "/tender/items/additionalClassifications/id", True, ) assert pointer == "/tender/items/additionalClassifications/id" pointer = get_pointer(child, "/tender/items/0/additionalClassifications/0/id", "/tender/items/additionalClassifications/id", True) assert pointer == "/tender/items/additionalClassifications/0/id" pointer = get_pointer(child, "/tender/items/0/additionalClassifications/0", "/tender/items/additionalClassifications", True) assert pointer == "/tender/items/additionalClassifications/0" pointer = get_pointer(child, "/tender/items/0/additionalClassifications", "/tender/items/additionalClassifications", True) assert pointer == "/tender/items/additionalClassifications" pointer = get_pointer( root_table, "/tender/items/0/additionalClassifications/0/id", "/tender/items/additionalClassifications/id", True, ) assert pointer == "/tender/items/0/additionalClassifications/0/id" pointer = get_pointer(root_table, "/tender/items/0/id", "/tender/items/id", True) assert pointer == "/tender/items/0/id" pointer = get_pointer(child, "/tender/items/0/id", "/tender/items/id", True) assert pointer == "/tender/items/id" pointer = get_pointer(root_table, "/tender/id", "/tender/id", True) assert pointer == "/tender/id" pointer = get_pointer(root_table, "/tender/items", "/tender/items", True, index="0") assert pointer == "/tender/items/0" pointer = get_pointer(root_table, "/tender", "/tender", True, index="0") assert pointer == "/tender"
def set_preview_path(self, abs_path, path, value, max_items): header = get_pointer(self, abs_path, path, True) array = self.is_array(path) self.preview_rows_combined[-1][header] = value if header in self.combined_columns: if not array or (array and self.arrays[array] < max_items): self.preview_rows[-1][header] = value if not self.is_root: self.parent.set_preview_path(abs_path, path, value, max_items)
def inc_column(self, abs_path, path): """ Increment the number of non-empty cells in the column. :param abs_path: The column's full JSON path :param path: The column's JSON path without array indexes """ header = get_pointer(self, abs_path, path, True) if header in self.combined_columns: self.combined_columns[header].hits += 1 if not self.is_root: self.parent.inc_column(abs_path, path)
def inc_column(self, abs_path, path): """Increment data counter in column :param abs_path: Full column jsonpath :param path: Path without indexes """ header = get_pointer(self, abs_path, path, True) if header in self.columns: self.columns[header].hits += 1 if header in self.combined_columns: self.combined_columns[header].hits += 1 if header in self.additional_columns: self.additional_columns[header].hits += 1 if not self.is_root: self.parent.inc_column(abs_path, path)
def flatten(self, releases): """Flatten releases :param releases: releases as iterable object :return: Iterator over mapping between table name and list of rows for each release """ for counter, release in enumerate(releases): to_flatten = deque([("", "", "", {}, release, {})]) rows = Rows(ocid=release["ocid"], buyer=release.get("buyer", {}), data=defaultdict(list)) while to_flatten: abs_path, path, parent_key, parent, record, repeat = to_flatten.pop( ) table = self._path_map.get(path) if path == "/buyer": # only useful in analysis continue if table: # Strict match /tender /parties etc., so this is a new row row = rows.new_row(table, record.get("id", "")) only = self.options.selection[table.name].only if only: row = { col: col_v for col, col_v in row.items() if col in only } if table.is_root: repeat = {} if repeat: row.update(repeat) rows.data[table.name].append(row) for key, item in record.items(): pointer = SEPARATOR.join((path, key)) abs_pointer = SEPARATOR.join((abs_path, key)) table = self.get_table(pointer) if not table: continue item_type = table.types.get(pointer) options = self.options.selection[table.name] split = options.split if pointer in options.repeat: repeat[pointer] = item if isinstance(item, dict): to_flatten.append( (abs_pointer, pointer, key, record, item, repeat)) elif isinstance(item, list): if item_type == JOINABLE: value = JOINABLE_SEPARATOR.join( (str(i) for i in item)) rows.data[table.name][-1][pointer] = value else: if self.options.count and table.splitted: abs_pointer = get_pointer( table, abs_pointer, pointer, split, ) abs_pointer += "Count" if abs_pointer in table: rows.data[table.name][-1][ abs_pointer] = len(item) for index, value in enumerate(item): if isinstance(value, dict): abs_pointer = get_pointer( table, SEPARATOR.join((abs_path, key)), pointer, split, index=str(index), ) to_flatten.append(( abs_pointer, pointer, key, record, value, repeat, )) else: if table.is_combined: pointer = SEPARATOR + SEPARATOR.join( (parent_key, key)) abs_pointer = pointer if not table.is_root: root = get_root(table) unnest = self.options.selection[root.name].unnest if unnest and abs_pointer in unnest: rows.data[root.name][-1][abs_pointer] = item continue pointer = get_pointer(table, abs_pointer, pointer, split) if pointer in table.combined_columns: rows.data[table.name][-1][pointer] = item yield counter, rows
def flatten(self, releases): """Flatten releases :param releases: releases as iterable object :return: Iterator over mapping between table name and list of rows for each release """ for counter, release in enumerate(releases): rows = defaultdict(list) to_flatten = deque([("", "", "", {}, release, {})]) separator = "/" ocid = release["ocid"] top_level_id = release["id"] while to_flatten: abs_path, path, parent_key, parent, record, repeat = to_flatten.pop( ) table = self._path_cache.get(path) if table: # Strict match /tender /parties etc., so this is a new row row_id = generate_row_id(ocid, record.get("id", ""), parent_key, top_level_id) new_row = { "rowID": row_id, "id": top_level_id, "parentID": parent.get("id"), "ocid": ocid, } if table.is_root: repeat = {} if repeat: new_row.update(repeat) rows[table.name].append(new_row) for key, item in record.items(): pointer = separator.join((path, key)) abs_pointer = separator.join((abs_path, key)) table = self._lookup_cache.get( pointer) or self._types_cache.get(pointer) if not table: continue item_type = table.types.get(pointer) options = self.options.selection[table.name] split = options.split if pointer in options.repeat: repeat[pointer] = item if isinstance(item, dict): to_flatten.append( (abs_pointer, pointer, key, record, item, repeat)) elif isinstance(item, list): if item_type == JOINABLE: value = JOINABLE_SEPARATOR.join(item) rows[table.name][-1][pointer] = value else: if self.options.count and pointer not in table.path and split and table.should_split: abs_pointer = get_pointer( table, abs_pointer, pointer, split, separator=separator, ) abs_pointer += "Count" if abs_pointer in table: rows[table.name][-1][abs_pointer] = len( item) for index, value in enumerate(item): if isinstance(value, dict): abs_pointer = get_pointer( table, separator.join((abs_path, key)), pointer, split, separator=separator, index=str(index), ) to_flatten.append(( abs_pointer, pointer, key, record, value, repeat, )) else: if not table.is_root: root = get_root(table) unnest = self.options.selection[root.name].unnest if unnest and abs_pointer in unnest: rows[root.name][-1][abs_pointer] = item continue pointer = get_pointer(table, abs_pointer, pointer, split, separator=separator) rows[table.name][-1][pointer] = item yield counter, rows