示例#1
0
def test_get_pointer(root_table):
    child = add_child_table(root_table, "/tender/items", "tender", "items")
    child_child = add_child_table(child,
                                  "/tender/items/additionalClassifications",
                                  "items", "additionalClassifications")
    pointer = get_pointer(
        child_child,
        "/tender/items/0/additionalClassifications/0/id",
        "/tender/items/additionalClassifications/id",
        True,
    )
    assert pointer == "/tender/items/additionalClassifications/id"

    pointer = get_pointer(child,
                          "/tender/items/0/additionalClassifications/0/id",
                          "/tender/items/additionalClassifications/id", True)
    assert pointer == "/tender/items/additionalClassifications/0/id"

    pointer = get_pointer(child, "/tender/items/0/additionalClassifications/0",
                          "/tender/items/additionalClassifications", True)
    assert pointer == "/tender/items/additionalClassifications/0"

    pointer = get_pointer(child, "/tender/items/0/additionalClassifications",
                          "/tender/items/additionalClassifications", True)
    assert pointer == "/tender/items/additionalClassifications"

    pointer = get_pointer(
        root_table,
        "/tender/items/0/additionalClassifications/0/id",
        "/tender/items/additionalClassifications/id",
        True,
    )
    assert pointer == "/tender/items/0/additionalClassifications/0/id"

    pointer = get_pointer(root_table, "/tender/items/0/id", "/tender/items/id",
                          True)
    assert pointer == "/tender/items/0/id"

    pointer = get_pointer(child, "/tender/items/0/id", "/tender/items/id",
                          True)
    assert pointer == "/tender/items/id"
    pointer = get_pointer(root_table, "/tender/id", "/tender/id", True)
    assert pointer == "/tender/id"

    pointer = get_pointer(root_table,
                          "/tender/items",
                          "/tender/items",
                          True,
                          index="0")
    assert pointer == "/tender/items/0"

    pointer = get_pointer(root_table, "/tender", "/tender", True, index="0")
    assert pointer == "/tender"
示例#2
0
 def set_preview_path(self, abs_path, path, value, max_items):
     header = get_pointer(self, abs_path, path, True)
     array = self.is_array(path)
     self.preview_rows_combined[-1][header] = value
     if header in self.combined_columns:
         if not array or (array and self.arrays[array] < max_items):
             self.preview_rows[-1][header] = value
     if not self.is_root:
         self.parent.set_preview_path(abs_path, path, value, max_items)
示例#3
0
    def inc_column(self, abs_path, path):
        """
        Increment the number of non-empty cells in the column.

        :param abs_path: The column's full JSON path
        :param path: The column's JSON path without array indexes
        """
        header = get_pointer(self, abs_path, path, True)
        if header in self.combined_columns:
            self.combined_columns[header].hits += 1
        if not self.is_root:
            self.parent.inc_column(abs_path, path)
示例#4
0
文件: spec.py 项目: lttga/test2
    def inc_column(self, abs_path, path):
        """Increment data counter in column

        :param abs_path: Full column jsonpath
        :param path: Path without indexes
        """
        header = get_pointer(self, abs_path, path, True)
        if header in self.columns:
            self.columns[header].hits += 1
        if header in self.combined_columns:
            self.combined_columns[header].hits += 1
        if header in self.additional_columns:
            self.additional_columns[header].hits += 1
        if not self.is_root:
            self.parent.inc_column(abs_path, path)
示例#5
0
    def flatten(self, releases):
        """Flatten releases

        :param releases: releases as iterable object
        :return: Iterator over mapping between table name and list of rows for each release
        """

        for counter, release in enumerate(releases):
            to_flatten = deque([("", "", "", {}, release, {})])
            rows = Rows(ocid=release["ocid"],
                        buyer=release.get("buyer", {}),
                        data=defaultdict(list))

            while to_flatten:
                abs_path, path, parent_key, parent, record, repeat = to_flatten.pop(
                )

                table = self._path_map.get(path)
                if path == "/buyer":
                    # only useful in analysis
                    continue
                if table:
                    # Strict match /tender /parties etc., so this is a new row
                    row = rows.new_row(table, record.get("id", ""))
                    only = self.options.selection[table.name].only
                    if only:
                        row = {
                            col: col_v
                            for col, col_v in row.items() if col in only
                        }
                    if table.is_root:
                        repeat = {}
                    if repeat:
                        row.update(repeat)
                    rows.data[table.name].append(row)
                for key, item in record.items():
                    pointer = SEPARATOR.join((path, key))
                    abs_pointer = SEPARATOR.join((abs_path, key))
                    table = self.get_table(pointer)
                    if not table:
                        continue

                    item_type = table.types.get(pointer)
                    options = self.options.selection[table.name]
                    split = options.split
                    if pointer in options.repeat:
                        repeat[pointer] = item

                    if isinstance(item, dict):
                        to_flatten.append(
                            (abs_pointer, pointer, key, record, item, repeat))
                    elif isinstance(item, list):
                        if item_type == JOINABLE:
                            value = JOINABLE_SEPARATOR.join(
                                (str(i) for i in item))
                            rows.data[table.name][-1][pointer] = value
                        else:
                            if self.options.count and table.splitted:
                                abs_pointer = get_pointer(
                                    table,
                                    abs_pointer,
                                    pointer,
                                    split,
                                )
                                abs_pointer += "Count"
                                if abs_pointer in table:
                                    rows.data[table.name][-1][
                                        abs_pointer] = len(item)
                            for index, value in enumerate(item):
                                if isinstance(value, dict):
                                    abs_pointer = get_pointer(
                                        table,
                                        SEPARATOR.join((abs_path, key)),
                                        pointer,
                                        split,
                                        index=str(index),
                                    )
                                    to_flatten.append((
                                        abs_pointer,
                                        pointer,
                                        key,
                                        record,
                                        value,
                                        repeat,
                                    ))
                    else:
                        if table.is_combined:
                            pointer = SEPARATOR + SEPARATOR.join(
                                (parent_key, key))
                            abs_pointer = pointer
                        if not table.is_root:
                            root = get_root(table)
                            unnest = self.options.selection[root.name].unnest
                            if unnest and abs_pointer in unnest:
                                rows.data[root.name][-1][abs_pointer] = item
                                continue

                        pointer = get_pointer(table, abs_pointer, pointer,
                                              split)
                        if pointer in table.combined_columns:
                            rows.data[table.name][-1][pointer] = item
            yield counter, rows
示例#6
0
文件: flatten.py 项目: lttga/test2
    def flatten(self, releases):
        """Flatten releases

        :param releases: releases as iterable object
        :return: Iterator over mapping between table name and list of rows for each release
        """

        for counter, release in enumerate(releases):
            rows = defaultdict(list)
            to_flatten = deque([("", "", "", {}, release, {})])
            separator = "/"
            ocid = release["ocid"]
            top_level_id = release["id"]

            while to_flatten:
                abs_path, path, parent_key, parent, record, repeat = to_flatten.pop(
                )

                table = self._path_cache.get(path)
                if table:
                    # Strict match /tender /parties etc., so this is a new row
                    row_id = generate_row_id(ocid, record.get("id", ""),
                                             parent_key, top_level_id)
                    new_row = {
                        "rowID": row_id,
                        "id": top_level_id,
                        "parentID": parent.get("id"),
                        "ocid": ocid,
                    }
                    if table.is_root:
                        repeat = {}
                    if repeat:
                        new_row.update(repeat)
                    rows[table.name].append(new_row)

                for key, item in record.items():
                    pointer = separator.join((path, key))
                    abs_pointer = separator.join((abs_path, key))

                    table = self._lookup_cache.get(
                        pointer) or self._types_cache.get(pointer)
                    if not table:
                        continue

                    item_type = table.types.get(pointer)
                    options = self.options.selection[table.name]
                    split = options.split

                    if pointer in options.repeat:
                        repeat[pointer] = item

                    if isinstance(item, dict):
                        to_flatten.append(
                            (abs_pointer, pointer, key, record, item, repeat))
                    elif isinstance(item, list):
                        if item_type == JOINABLE:
                            value = JOINABLE_SEPARATOR.join(item)
                            rows[table.name][-1][pointer] = value
                        else:
                            if self.options.count and pointer not in table.path and split and table.should_split:
                                abs_pointer = get_pointer(
                                    table,
                                    abs_pointer,
                                    pointer,
                                    split,
                                    separator=separator,
                                )
                                abs_pointer += "Count"
                                if abs_pointer in table:
                                    rows[table.name][-1][abs_pointer] = len(
                                        item)
                            for index, value in enumerate(item):
                                if isinstance(value, dict):
                                    abs_pointer = get_pointer(
                                        table,
                                        separator.join((abs_path, key)),
                                        pointer,
                                        split,
                                        separator=separator,
                                        index=str(index),
                                    )
                                    to_flatten.append((
                                        abs_pointer,
                                        pointer,
                                        key,
                                        record,
                                        value,
                                        repeat,
                                    ))
                    else:
                        if not table.is_root:
                            root = get_root(table)
                            unnest = self.options.selection[root.name].unnest
                            if unnest and abs_pointer in unnest:
                                rows[root.name][-1][abs_pointer] = item
                                continue
                        pointer = get_pointer(table,
                                              abs_pointer,
                                              pointer,
                                              split,
                                              separator=separator)
                        rows[table.name][-1][pointer] = item
            yield counter, rows