Пример #1
0
def test_shape_mismatch_data():
    dp1 = create_datapackage()
    data_array = np.arange(10)
    indices_array = np.array([(x, y)
                              for x, y in zip(range(10), range(10, 20))],
                             dtype=INDICES_DTYPE)
    dp1.add_persistent_vector(
        matrix="sa_matrix",
        data_array=data_array,
        name="sa-data-vector",
        indices_array=indices_array,
    )

    dp2 = create_datapackage()
    data_array = np.arange(5)
    indices_array = np.array([(x, y) for x, y in zip(range(5), range(10, 15))],
                             dtype=INDICES_DTYPE)
    dp2.add_persistent_vector(
        matrix="sa_matrix",
        data_array=data_array,
        name="sa-data-vector2",
        indices_array=indices_array,
    )
    with pytest.raises(LengthMismatch):
        merge_datapackages_with_mask(
            first_dp=dp1,
            first_resource_group_label="sa-data-vector",
            second_dp=dp2,
            second_resource_group_label="sa-data-vector2",
            mask_array=np.zeros((5, ), dtype=bool),
        )
Пример #2
0
def test_interface_error():
    dp1 = create_datapackage()
    data_array = np.arange(10)
    indices_array = np.array([(x, y)
                              for x, y in zip(range(10), range(10, 20))],
                             dtype=INDICES_DTYPE)
    dp1.add_persistent_vector(
        matrix="sa_matrix",
        data_array=data_array,
        name="sa-data-vector",
        indices_array=indices_array,
    )

    class Dummy:
        pass

    dp2 = create_datapackage()
    indices_array = np.array([(x, y)
                              for x, y in zip(range(10), range(10, 20))],
                             dtype=INDICES_DTYPE)
    dp2.add_dynamic_vector(
        interface=Dummy(),
        indices_array=indices_array,
        matrix="sa_matrix",
        name="sa-vector-interface",
    )
    with pytest.raises(ValueError):
        merge_datapackages_with_mask(
            first_dp=dp1,
            first_resource_group_label="sa-data-vector",
            second_dp=dp2,
            second_resource_group_label="sa-vector-interface",
            mask_array=np.zeros((10, ), dtype=bool),
        )
def generate_local_sa_biosphere_datapackage(cutoff=1e-4, const_factor=10):

    lca = setup_bw_project_archetypes()
    uncertain_biosphere_exchanges = filter_uncertain_biosphere_exchanges(
        lca, cutoff)

    dp = bwp.create_datapackage(
        fs=ZipFS(str(DATA_DIR / "local-sa-biosphere.zip"), write=True),
        name="local sa biosphere",
    )

    amounts = np.array([exc.amount for exc in uncertain_biosphere_exchanges])
    num_samples = len(amounts)
    data_array = np.tile(amounts, num_samples) * (np.diag(
        np.ones(num_samples) * const_factor))

    indices_array = np.array(
        [(exc.input.id, exc.output.id)
         for exc in uncertain_biosphere_exchanges],
        dtype=bwp.INDICES_DTYPE,
    )

    # All inputs -> all True
    flip_array = np.ones(len(indices_array), dtype=bool)

    dp.add_persistent_array(
        matrix="biosphere_matrix",
        data_array=data_array,
        name="local sa biosphere",
        indices_array=indices_array,
        flip_array=flip_array,
    )

    dp.finalize_serialization()
Пример #4
0
def create_dp(vector, array, distributions):
    dp = bwp.create_datapackage()
    if distributions:
        dp.add_persistent_vector(
            matrix="foo",
            name="distributions",
            indices_array=np.array([(0, 0)], dtype=bwp.INDICES_DTYPE),
            distributions_array=np.array(
                [
                    (4, 0.5, np.NaN, np.NaN, 0.2, 0.8, False),
                ],
                dtype=bwp.UNCERTAINTY_DTYPE,
            ),
        )
    if vector:
        dp.add_persistent_vector(
            matrix="foo",
            name="vector",
            indices_array=np.array(
                [(10, 10), (12, 9), (14, 8), (18, 7)], dtype=bwp.INDICES_DTYPE
            ),
            data_array=np.array([11, 12.3, 14, 125]),
        )
    if array:
        dp.add_persistent_array(
            matrix="foo",
            name="array",
            indices_array=np.array(
                [(1, 0), (2, 1), (5, 1), (8, 1)], dtype=bwp.INDICES_DTYPE
            ),
            data_array=np.array([[1, 2.3, 4, 25]]).T,
        )
    return dp
Пример #5
0
    def process(self, **extra_metadata):
        """
Process intermediate data from a Python dictionary to a `stats_arrays <https://pypi.python.org/pypi/stats_arrays/>`_ array, which is a `NumPy <http://numpy.scipy.org/>`_ `Structured <http://docs.scipy.org/doc/numpy/reference/generated/numpy.recarray.html#numpy.recarray>`_ `Array <http://docs.scipy.org/doc/numpy/user/basics.rec.html>`_. A structured array (also called record array) is a heterogeneous array, where each column has a different label and data type.

Processed arrays are saved in the ``processed`` directory.

If the uncertainty type is no uncertainty, undefined, or not specified, then the 'amount' value is used for 'loc' as well. This is needed for the random number generator.

Doesn't return anything, but writes a file to disk.

        """
        data = self.load()
        dp = create_datapackage(
            fs=ZipFS(str(self.filepath_processed()), write=True),
            name=self.filename_processed(),
            sum_intra_duplicates=True,
            sum_inter_duplicates=False,
        )
        dp.add_persistent_vector_from_iterator(
            matrix=self.matrix,
            name=clean_datapackage_name(str(self.name) + " matrix data"),
            dict_iterator=(self.process_row(row) for row in data),
            nrows=len(data),
            **extra_metadata)
        dp.finalize_serialization()
def create_ordering_datapackages():
    dp = create_datapackage(
        fs=ZipFS(str(dirpath / "a-first.zip"), write=True),
        name="test-fixture-a",
        id_="fixture-a",
    )
    add_data(dp)
    dp.finalize_serialization()

    dp = create_datapackage(
        fs=ZipFS(str(dirpath / "b-second.zip"), write=True),
        name="test-fixture-b",
        id_="fixture-b",
    )
    add_data(dp)
    dp.finalize_serialization()
Пример #7
0
def test_integration_test_in_memory():
    dp = create_datapackage(fs=None, name="test-fixture", id_="fixture-42")
    assert isinstance(dp.fs, MemoryFS)
    add_data(dp)

    check_metadata(dp)
    check_data(dp)
Пример #8
0
def erg():
    dp = create_datapackage(fs=None,
                            name="frg-fixture",
                            id_="something something danger zone")

    data_array = np.arange(3)
    indices_array = np.array([(0, 1), (2, 3), (4, 5)], dtype=INDICES_DTYPE)
    flip_array = np.array([1, 0, 1], dtype=bool)
    distributions_array = np.array(
        [
            (5, 1, 2, 3, 4, 5, False),
            (4, 1, 2, 3, 4, 5, False),
            (0, 1, 2, 3, 4, 5, False),
        ],
        dtype=UNCERTAINTY_DTYPE,
    )

    dp.add_persistent_vector(
        matrix="one",
        data_array=data_array,
        name="first",
        indices_array=indices_array,
        distributions_array=distributions_array,
        nrows=3,
        flip_array=flip_array,
    )
    dp.add_persistent_array(
        matrix="two",
        data_array=np.arange(12).reshape((3, 4)),
        indices_array=indices_array,
        name="second",
    )
    return dp
Пример #9
0
def empty_biosphere():
    # Flow 1: The flow
    # Activity 1: The activity

    dp = create_datapackage(fs=ZipFS(str(fixture_dir / "empty_biosphere.zip"),
                                     write=True), )

    data_array = np.array([1, 2, 3])
    indices_array = np.array([(2, 1), (1, 1), (2, 2)], dtype=INDICES_DTYPE)
    flip_array = np.array([1, 0, 0], dtype=bool)
    dp.add_persistent_vector(
        matrix="technosphere_matrix",
        data_array=data_array,
        name="eb-technosphere",
        indices_array=indices_array,
        nrows=3,
        flip_array=flip_array,
    )

    data_array = np.array([1])
    indices_array = np.array([(1, 0)], dtype=INDICES_DTYPE)
    dp.add_persistent_vector(
        matrix="characterization_matrix",
        data_array=data_array,
        name="eb-characterization",
        indices_array=indices_array,
        global_index=0,
        nrows=1,
    )

    dp.finalize_serialization()
Пример #10
0
def process_delta_database(name, tech, bio, dependents):
    """A modification of ``bw2data.backends.base.SQLiteBackend.process`` to skip retrieving data from the database."""
    print("Tech:", tech)
    print("Bio:", bio)

    db = bd.Database(name)
    db.metadata["processed"] = datetime.datetime.now().isoformat()

    # Create geomapping array, from dataset interger ids to locations
    inv_mapping_qs = ActivityDataset.select(
        ActivityDataset.id, ActivityDataset.location
    ).where(ActivityDataset.database == name, ActivityDataset.type == "process")

    # self.filepath_processed checks if data is dirty,
    # and processes if it is. This causes an infinite loop.
    # So we construct the filepath ourselves.
    fp = str(db.dirpath_processed() / db.filename_processed())

    dp = bwp.create_datapackage(
        fs=ZipFS(fp, write=True),
        name=bwp.clean_datapackage_name(name),
        sum_intra_duplicates=True,
        sum_inter_duplicates=False,
    )
    dp.add_persistent_vector_from_iterator(
        matrix="inv_geomapping_matrix",
        name=bwp.clean_datapackage_name(name + " inventory geomapping matrix"),
        dict_iterator=(
            {
                "row": row[0],
                "col": bd.geomapping[
                    bd.backends.utils.retupleize_geo_strings(row[1])
                    or bd.config.global_location
                ],
                "amount": 1,
            }
            for row in inv_mapping_qs.tuples()
        ),
        nrows=inv_mapping_qs.count(),
    )

    dp.add_persistent_vector_from_iterator(
        matrix="biosphere_matrix",
        name=bwp.clean_datapackage_name(name + " biosphere matrix"),
        dict_iterator=bio,
    )
    dp.add_persistent_vector_from_iterator(
        matrix="technosphere_matrix",
        name=bwp.clean_datapackage_name(name + " technosphere matrix"),
        dict_iterator=tech,
    )
    dp.finalize_serialization()

    db.metadata["depends"] = sorted(dependents.difference({name}))
    db.metadata["dirty"] = False
    db._metadata.flush()
Пример #11
0
def aggregation(**kwargs):
    dp = bwp.create_datapackage(**kwargs)
    dp.add_persistent_vector(
        matrix="foo",
        name="vector",
        indices_array=np.array([(0, 0), (2, 1), (4, 2), (4, 2), (8, 3)],
                               dtype=bwp.INDICES_DTYPE),
        data_array=np.array([1, 2.3, 4, 17, 25]),
    )
    return dp
Пример #12
0
def diagonal(**kwargs):
    dp = bwp.create_datapackage(**kwargs)
    dp.add_persistent_vector(
        matrix="foo",
        name="vector",
        indices_array=np.array([(0, 1), (1, 1), (2, 0), (3, 1)],
                               dtype=bwp.INDICES_DTYPE),
        data_array=np.array([1, 2.3, 4, 25]),
        flip_array=np.array([0, 1, 0, 0], dtype=bool),
    )
    return dp
Пример #13
0
def test_add_persistent_array_data_shapemismatch_nrows():
    dp = create_datapackage()
    data_array = np.arange(12).reshape(4, 3)
    indices_array = np.array([(1, 4), (2, 5), (3, 6)], dtype=INDICES_DTYPE)
    with pytest.raises(ShapeMismatch):
        dp.add_persistent_array(
            matrix="sa_matrix",
            data_array=data_array,
            name="sa-data-vector",
            indices_array=indices_array,
        )
Пример #14
0
def test_add_persistent_vector_data_shapemismatch_ndimensions():
    dp = create_datapackage()
    data_array = np.array([[2, 7, 12], [4, 5, 15]])
    indices_array = np.array([(1, 4), (2, 5), (3, 6)], dtype=INDICES_DTYPE)
    with pytest.raises(ShapeMismatch):
        dp.add_persistent_vector(
            matrix="sa_matrix",
            data_array=data_array,
            name="sa-data-vector",
            indices_array=indices_array,
        )
Пример #15
0
def test_exclude_no_match():
    dp = create_datapackage()
    add_data(dp)
    assert isinstance(dp.fs, MemoryFS)

    reference_length = len(dp)
    assert "sa-data-vector.indices" in [o["name"] for o in dp.resources]
    ndp = dp.exclude({"foo": "bar"})
    assert ndp is not dp
    assert "sa-data-vector.indices" in [o["name"] for o in dp.resources]
    assert "sa-data-vector.indices" in [o["name"] for o in ndp.resources]
    assert len(ndp) == reference_length
Пример #16
0
def test_add_dynamic_vector_flip_shapemistmatch():
    dp = create_datapackage()
    flip_array = np.array([0, 1, 0, 1], dtype=bool)
    indices_array = np.array([(1, 4), (2, 5), (3, 6)], dtype=ShapeMismatch)
    with pytest.raises(ShapeMismatch):
        dp.add_dynamic_vector(
            matrix="sa_matrix",
            interface=Dummy(),
            name="sa-data-vector",
            flip_array=flip_array,
            indices_array=indices_array,
        )
Пример #17
0
    def write_exchanges(self, technosphere, biosphere, dependents):
        """

        Write IO data directly to processed arrays.

        Product data is stored in SQLite as normal activities.
        Exchange data is written directly to NumPy structured arrays.

        Technosphere and biosphere data has format ``(row id, col id, value, flip)``.

        """
        print("Starting IO table write")

        dp = create_datapackage(
            fs=ZipFS(str(self.filepath_processed()), write=True),
            name=clean_datapackage_name(self.name),
            sum_intra_duplicates=True,
            sum_inter_duplicates=False,
        )

        dp.add_persistent_vector_from_iterator(
            dict_iterator=({
                "row":
                obj.id,
                "col":
                geomapping[obj["location"] or config.global_location],
                "amount":
                1,
            } for obj in self),
            matrix="inv_geomapping_matrix",
            name=clean_datapackage_name(self.name +
                                        " inventory geomapping matrix"),
            nrows=len(self),
        )
        print("Adding technosphere matrix")
        dp.add_persistent_vector_from_iterator(
            matrix="technosphere_matrix",
            name=clean_datapackage_name(self.name + " technosphere matrix"),
            dict_iterator=technosphere,
        )

        print("Adding biosphere matrix")
        dp.add_persistent_vector_from_iterator(
            matrix="biosphere_matrix",
            name=clean_datapackage_name(self.name + " biosphere matrix"),
            dict_iterator=biosphere,
        )
        dp.finalize_serialization()

        databases[self.name]["depends"] = sorted(
            set(dependents).difference({self.name}))
        databases[self.name]["processed"] = datetime.datetime.now().isoformat()
        databases.flush()
Пример #18
0
def test_add_dynamic_vector_flip_dtype():
    dp = create_datapackage()
    flip_array = np.array([0, 1, 0])
    indices_array = np.array([(1, 4), (2, 5), (3, 6)], dtype=INDICES_DTYPE)
    with pytest.raises(WrongDatatype):
        dp.add_dynamic_vector(
            matrix="sa_matrix",
            interface=Dummy(),
            name="sa-data-vector",
            flip_array=flip_array,
            indices_array=indices_array,
        )
Пример #19
0
def test_add_resource_with_same_name():
    dp = create_datapackage()
    add_data(dp)

    data_array = np.array([2, 7, 12])
    indices_array = np.array([(1, 4), (2, 5), (3, 6)], dtype=INDICES_DTYPE)
    with pytest.raises(NonUnique):
        dp.add_persistent_vector(
            matrix="sa_matrix",
            data_array=data_array,
            name="sa-data-vector",
            indices_array=indices_array,
        )
Пример #20
0
def test_del_resource_group_in_memory():
    dp = create_datapackage()
    add_data(dp)
    assert isinstance(dp.fs, MemoryFS)

    reference_length = len(dp)
    assert "sa-data-vector.indices" in [o["name"] for o in dp.resources]
    dp.del_resource_group("sa-data-vector")
    assert "sa-data-vector.indices" not in [o["name"] for o in dp.resources]
    assert len(dp) == reference_length - 3
    assert len(dp.data) == reference_length - 3
    assert len(dp.metadata["resources"]) == reference_length - 3
    assert len(dp.resources) == reference_length - 3
Пример #21
0
def test_add_persistent_array_flip_shapemistmatch():
    dp = create_datapackage()
    data_array = np.arange(12).reshape(3, 4)
    flip_array = np.array([0, 1, 0, 1], dtype=bool)
    indices_array = np.array([(1, 4), (2, 5), (3, 6)], dtype=ShapeMismatch)
    with pytest.raises(ShapeMismatch):
        dp.add_persistent_array(
            matrix="sa_matrix",
            data_array=data_array,
            name="sa-data-vector",
            flip_array=flip_array,
            indices_array=indices_array,
        )
Пример #22
0
def test_add_persistent_array_flip_dtype():
    dp = create_datapackage()
    data_array = np.arange(12).reshape(3, 4)
    flip_array = np.array([0, 1, 0])
    indices_array = np.array([(1, 4), (2, 5), (3, 6)], dtype=INDICES_DTYPE)
    with pytest.raises(WrongDatatype):
        dp.add_persistent_array(
            matrix="sa_matrix",
            data_array=data_array,
            name="sa-data-vector",
            flip_array=flip_array,
            indices_array=indices_array,
        )
Пример #23
0
def test_reindex_custom_id_field_datapackage():
    dp = create_datapackage()
    add_data(dp, "bar")

    destination = [
        {
            "id": 21,
            "a": 1,
            "c": 3,
            "d": 11
        },
        {
            "id": 22,
            "a": 2,
            "c": 4,
            "d": 11
        },
        {
            "id": 23,
            "a": 1,
            "c": 4,
            "d": 11
        },
        {
            "id": 24,
            "a": 3,
            "c": 5,
            "d": 11
        },
        {
            "id": 25,
            "a": 4,
            "c": 5,
            "d": 11
        },
        {
            "id": 26,
            "a": 4,
            "c": 6,
            "d": 11
        },
    ]
    array, _ = dp.get_resource("vector.indices")
    df, _ = dp.get_resource("vector-csv-rows")
    assert np.allclose(array["row"], np.array([11, 11, 13]))
    assert np.allclose(df["bar"], np.array([11, 12, 13, 14, 15, 16]))

    reindex(dp, "vector-csv-rows", destination, id_field_datapackage="bar")

    assert np.allclose(array["row"], np.array([21, 21, 23]))
    assert np.allclose(df["bar"], np.array([21, 22, 23, 24, 25, 26]))
Пример #24
0
def test_exclude_multiple_matrix():
    dp = create_datapackage()
    add_data(dp)
    assert isinstance(dp.fs, MemoryFS)

    assert "sa-data-vector.indices" in [o["name"] for o in dp.resources]
    ndp = dp.exclude({"matrix": "sa_matrix"})
    assert ndp is not dp
    assert "sa-data-vector.indices" in [o["name"] for o in dp.resources]
    assert "sa-data-vector.indices" not in [o["name"] for o in ndp.resources]
    assert len(ndp) == 2
    assert len(ndp.data) == 2
    assert len(ndp.metadata["resources"]) == 2
    assert len(ndp.resources) == 2
Пример #25
0
def test_exclude_basic():
    dp = create_datapackage()
    add_data(dp)
    assert isinstance(dp.fs, MemoryFS)

    reference_length = len(dp)
    assert "sa-data-vector.indices" in [o["name"] for o in dp.resources]
    ndp = dp.exclude({"group": "sa-data-vector"})
    assert ndp is not dp
    assert "sa-data-vector.indices" in [o["name"] for o in dp.resources]
    assert "sa-data-vector.indices" not in [o["name"] for o in ndp.resources]
    assert len(ndp) == reference_length - 3
    assert len(ndp.data) == reference_length - 3
    assert len(ndp.metadata["resources"]) == reference_length - 3
    assert len(ndp.resources) == reference_length - 3
Пример #26
0
def test_one_empty_after_custom_filter():
    s = bwp.create_datapackage()
    s.add_persistent_vector(
        matrix="foo",
        data_array=np.arange(2),
        indices_array=np.array([(0, 0), (1, 0)], dtype=bwp.INDICES_DTYPE),
    )
    s.add_persistent_vector(
        matrix="foo",
        data_array=np.arange(10, 12),
        indices_array=np.array([(0, 1), (1, 1)], dtype=bwp.INDICES_DTYPE),
    )
    mm = MappedMatrix(packages=[s],
                      matrix="foo",
                      custom_filter=lambda x: x['col'] > 0)
    assert mm.matrix.sum() == 21
Пример #27
0
def test_all_empty_after_custom_filter():
    s = bwp.create_datapackage()
    s.add_persistent_vector(
        matrix="foo",
        data_array=np.arange(2),
        indices_array=np.array([(0, 0), (1, 0)], dtype=bwp.INDICES_DTYPE),
    )
    with pytest.raises(EmptyArray):
        MappedMatrix(packages=[s],
                     matrix="foo",
                     custom_filter=lambda x: x['col'] > 0)

    assert MappedMatrix(packages=[s],
                        matrix="foo",
                        custom_filter=lambda x: x['col'] > 0,
                        empty_ok=True)
Пример #28
0
def test_integration_test_fs_temp_directory():
    with tempfile.TemporaryDirectory() as td:
        dp = create_datapackage(fs=OSFS(td),
                                name="test-fixture",
                                id_="fixture-42")
        add_data(dp)
        dp.finalize_serialization()

        check_metadata(dp)
        check_data(dp)

        loaded = load_datapackage(OSFS(td))

        check_metadata(loaded, False)
        check_data(loaded)

        loaded.fs.close()
Пример #29
0
def test_add_persistent_vector_distributions_shapemismatch():
    dp = create_datapackage()
    distributions_array = np.array(
        [
            (3, 1.3, 2.5, np.NaN, np.NaN, np.NaN, False),
            (0, 1.3, 2.5, np.NaN, np.NaN, np.NaN, False),
        ],
        dtype=UNCERTAINTY_DTYPE,
    )
    indices_array = np.array([(1, 4), (2, 5), (3, 6)], dtype=INDICES_DTYPE)
    with pytest.raises(ShapeMismatch):
        dp.add_persistent_vector(
            matrix="sa_matrix",
            distributions_array=distributions_array,
            name="sa-data-vector",
            indices_array=indices_array,
        )
Пример #30
0
def test_exclude_multiple_filters():
    dp = create_datapackage()
    add_data(dp)
    assert isinstance(dp.fs, MemoryFS)

    reference_length = len(dp)
    assert "sa-array-interface.indices" in [o["name"] for o in dp.resources]
    ndp = dp.exclude({"group": "sa-array-interface", "matrix": "sa_matrix"})
    assert ndp is not dp
    assert "sa-array-interface.indices" in [o["name"] for o in dp.resources]
    assert "sa-array-interface.indices" not in [
        o["name"] for o in ndp.resources
    ]
    assert len(ndp) == reference_length - 2
    assert len(ndp.data) == reference_length - 2
    assert len(ndp.metadata["resources"]) == reference_length - 2
    assert len(ndp.resources) == reference_length - 2