示例#1
0
def test_set_nested():
    with set({"abc": {"x": 123}}):
        assert config["abc"] == {"x": 123}
        with set({"abc.y": 456}):
            assert config["abc"] == {"x": 123, "y": 456}
        assert config["abc"] == {"x": 123}
    assert "abc" not in config
def test_set_nested():
    with set({'abc': {'x': 123}}):
        assert config['abc'] == {'x': 123}
        with set({'abc.y': 456}):
            assert config['abc'] == {'x': 123, 'y': 456}
        assert config['abc'] == {'x': 123}
    assert 'abc' not in config
示例#3
0
def test_set_nested():
    with set({'abc': {'x': 123}}):
        assert config['abc'] == {'x': 123}
        with set({'abc.y': 456}):
            assert config['abc'] == {'x': 123, 'y': 456}
        assert config['abc'] == {'x': 123}
    assert 'abc' not in config
def test_get_context_using_python3_posix():
    """ get_context() respects configuration.

    If default context is changed this test will need to change too.
    """
    assert get_context() is multiprocessing.get_context(None)
    with config.set({"multiprocessing.context": "forkserver"}):
        assert get_context() is multiprocessing.get_context("forkserver")
    with config.set({"multiprocessing.context": "spawn"}):
        assert get_context() is multiprocessing.get_context("spawn")
 def initialize(self) -> Client:
     if not self.initialized:
         config.set(
             temporary_directory=ask_parallel_actions_dask_temp_dir())
         self.client = Client(
             processes=ask_parallel_actions_dask_use_process(),
             threads_per_worker=1,
             n_workers=ask_parallel_actions_count(),
         )
         self.initialized = True
     return self.client
示例#6
0
def test_set_kwargs():
    with set(foo__bar=1, foo__baz=2):
        assert config["foo"] == {"bar": 1, "baz": 2}
    assert "foo" not in config

    # Mix kwargs and dict, kwargs override
    with set({"foo.bar": 1, "foo.baz": 2}, foo__buzz=3, foo__bar=4):
        assert config["foo"] == {"bar": 4, "baz": 2, "buzz": 3}
    assert "foo" not in config

    # Mix kwargs and nested dict, kwargs override
    with set({"foo": {"bar": 1, "baz": 2}}, foo__buzz=3, foo__bar=4):
        assert config["foo"] == {"bar": 4, "baz": 2, "buzz": 3}
    assert "foo" not in config
示例#7
0
def test_files_per_partition():
    files3 = {f"{n:02}.txt": "line from {:02}" for n in range(20)}
    with filetexts(files3):
        # single-threaded scheduler to ensure the warning happens in the
        # same thread as the pytest.warns
        with config.set({"scheduler": "single-threaded"}):
            with pytest.warns(UserWarning):
                b = read_text("*.txt", files_per_partition=10)
                l = len(b.take(100, npartitions=1))

            assert l == 10, "10 files should be grouped into one partition"

            assert b.count().compute() == 20, "All 20 lines should be read"

            with pytest.warns(UserWarning):
                b = read_text("*.txt",
                              files_per_partition=10,
                              include_path=True)
                p = b.take(100, npartitions=1)

            p_paths = tuple(zip(*p))[1]
            p_unique_paths = set(p_paths)
            assert len(p_unique_paths) == 10

            b_paths = tuple(zip(*b.compute()))[1]
            b_unique_paths = set(b_paths)
            assert len(b_unique_paths) == 20
示例#8
0
def test_arg_reductions(dfunc, func):
    x = np.random.random((10, 10, 10))
    a = da.from_array(x, chunks=(3, 4, 5))

    assert_eq(dfunc(a), func(x))
    assert_eq(dfunc(a, 0), func(x, 0))
    assert_eq(dfunc(a, 1), func(x, 1))
    assert_eq(dfunc(a, 2), func(x, 2))
    with config.set(split_every=2):
        assert_eq(dfunc(a), func(x))
        assert_eq(dfunc(a, 0), func(x, 0))
        assert_eq(dfunc(a, 1), func(x, 1))
        assert_eq(dfunc(a, 2), func(x, 2))
    if _numpy_122:
        assert_eq(dfunc(a, keepdims=True), func(x, keepdims=True))

    pytest.raises(ValueError, lambda: dfunc(a, 3))
    pytest.raises(TypeError, lambda: dfunc(a, (0, 1)))

    x2 = np.arange(10)
    a2 = da.from_array(x2, chunks=3)
    assert_eq(dfunc(a2), func(x2))
    assert_eq(dfunc(a2, 0), func(x2, 0))
    assert_eq(dfunc(a2, 0, split_every=2), func(x2, 0))

    x3 = np.array(1)
    a3 = da.from_array(x3)
    assert_eq(dfunc(a3), func(x3))
示例#9
0
    def fix_dask_settings(self):
        """
        Fix "standard" dask behaviour for time+space testing.

        Currently this is single-threaded mode, with known chunksize,
        which is optimised for space saving so we can test largest data.

        """

        import dask.config as dcfg

        # Use single-threaded, to avoid process-switching costs and minimise memory usage.
        # N.B. generally may be slower, but use less memory ?
        dcfg.set(scheduler="single-threaded")
        # Configure iris._lazy_data.as_lazy_data to aim for 100Mb chunks
        dcfg.set({"array.chunk-size": "128Mib"})
示例#10
0
def test_take_uses_config():
    chunks = ((1, 1, 1, 1), (500,), (500,))
    index = np.array([0, 1] + [2] * 101 + [3])
    itemsize = 8
    with config.set(**{"array.chunk-size": "10GB"}):
        chunks2, dsk = take("a", "b", chunks, index, itemsize)
    assert chunks2 == ((1, 1, 101, 1), (500,), (500,))
    assert len(dsk) == 4
示例#11
0
def test_take_uses_config():
    with dask.config.set({"array.slicing.split-large-chunks": True}):
        chunks = ((1, 1, 1, 1), (500, ), (500, ))
        index = np.array([0, 1] + [2] * 101 + [3])
        itemsize = 8
        with config.set({"array.chunk-size": "10GB"}):
            chunks2, dsk = take("a", "b", chunks, index, itemsize)
        assert chunks2 == ((1, 1, 101, 1), (500, ), (500, ))
        assert len(dsk) == 4
示例#12
0
    def setup(self, resource, steal_interval):
        config.set(
            {"distributed.scheduler.work-stealing-interval": steal_interval})
        rdict = {"resource": resource} if resource else None
        cluster = LocalCluster(n_workers=1,
                               threads_per_worker=1,
                               resources=rdict,
                               worker_class=Worker)

        spec = copy.deepcopy(cluster.new_worker_spec())

        if resource:
            del spec[1]['options']['resources']
        cluster.worker_spec.update(spec)
        cluster.scale(2)
        client = Client(cluster)

        self.client = client
def test_custom_context_ignored_elsewhere():
    """ On Python 2/Windows, setting 'multiprocessing.context' doesn't explode.

    Presumption is it's not used since unsupported, but mostly we care about
    not breaking anything.
    """
    assert get({'x': (inc, 1)}, 'x') == 2
    with pytest.warns(UserWarning):
        with config.set({"multiprocessing.context": "forkserver"}):
            assert get({'x': (inc, 1)}, 'x') == 2
示例#14
0
def test_meta_commands(c, client, capsys):
    _meta_commands("?", context=c, client=client)
    captured = capsys.readouterr()
    assert "Commands" in captured.out

    _meta_commands("help", context=c, client=client)
    captured = capsys.readouterr()
    assert "Commands" in captured.out

    _meta_commands("\\d?", context=c, client=client)
    captured = capsys.readouterr()
    assert "Commands" in captured.out

    _meta_commands("\\l", context=c, client=client)
    captured = capsys.readouterr()
    assert "Schemas" in captured.out

    _meta_commands("\\dt", context=c, client=client)
    captured = capsys.readouterr()
    assert "Tables" in captured.out

    _meta_commands("\\dm", context=c, client=client)
    captured = capsys.readouterr()
    assert "Models" in captured.out

    _meta_commands("\\df", context=c, client=client)
    captured = capsys.readouterr()
    assert "Functions" in captured.out

    _meta_commands("\\de", context=c, client=client)
    captured = capsys.readouterr()
    assert "Experiments" in captured.out

    c.create_schema("test_schema")
    _meta_commands("\\dss test_schema", context=c, client=client)
    assert c.schema_name == "test_schema"

    _meta_commands("\\dss not_exists", context=c, client=client)
    captured = capsys.readouterr()
    assert "Schema not_exists not available\n" == captured.out

    with pytest.raises(
            OSError,
            match="Timed out .* to tcp://localhost:8787 after 5 s",
    ):
        with dask_config.set({"distributed.comm.timeouts.connect": 5}):
            client = _meta_commands("\\dsc localhost:8787",
                                    context=c,
                                    client=client)
            assert client.scheduler.__dict__["addr"] == "localhost:8787"
def test_custom_context_used_python3_posix():
    """ The 'multiprocessing.context' config is used to create the pool.

    We assume default is 'fork', and therefore test for 'spawn'.  If default
    context is changed this test will need to be modified to be different than
    that.
    """
    # We check for spawn by ensuring subprocess doesn't have modules only
    # parent process should have:
    def check_for_pytest():
        import sys
        return "FAKE_MODULE_FOR_TEST" in sys.modules

    import sys
    sys.modules["FAKE_MODULE_FOR_TEST"] = 1
    try:
        with config.set({"multiprocessing.context": "spawn"}):
            result = get({"x": (check_for_pytest,)}, "x")
        assert not result
    finally:
        del sys.modules["FAKE_MODULE_FOR_TEST"]
示例#16
0
def test_arg_reductions(dfunc, func):
    x = np.random.random((10, 10, 10))
    a = da.from_array(x, chunks=(3, 4, 5))

    assert_eq(dfunc(a), func(x))
    assert_eq(dfunc(a, 0), func(x, 0))
    assert_eq(dfunc(a, 1), func(x, 1))
    assert_eq(dfunc(a, 2), func(x, 2))
    with config.set(split_every=2):
        assert_eq(dfunc(a), func(x))
        assert_eq(dfunc(a, 0), func(x, 0))
        assert_eq(dfunc(a, 1), func(x, 1))
        assert_eq(dfunc(a, 2), func(x, 2))

    pytest.raises(ValueError, lambda: dfunc(a, 3))
    pytest.raises(TypeError, lambda: dfunc(a, (0, 1)))

    x2 = np.arange(10)
    a2 = da.from_array(x2, chunks=3)
    assert_eq(dfunc(a2), func(x2))
    assert_eq(dfunc(a2, 0), func(x2, 0))
    assert_eq(dfunc(a2, 0, split_every=2), func(x2, 0))
示例#17
0
def test_set():
    with set(abc=123):
        assert config['abc'] == 123
        with set(abc=456):
            assert config['abc'] == 456
        assert config['abc'] == 123

    assert 'abc' not in config

    with set({'abc': 123}):
        assert config['abc'] == 123

    with set({'abc.x': 1, 'abc.y': 2, 'abc.z.a': 3}):
        assert config['abc'] == {'x': 1, 'y': 2, 'z': {'a': 3}}

    d = {}
    set({'abc.x': 123}, config=d)
    assert d['abc']['x'] == 123
示例#18
0
def test_set():
    with set(abc=123):
        assert config["abc"] == 123
        with set(abc=456):
            assert config["abc"] == 456
        assert config["abc"] == 123

    assert "abc" not in config

    with set({"abc": 123}):
        assert config["abc"] == 123
    assert "abc" not in config

    with set({"abc.x": 1, "abc.y": 2, "abc.z.a": 3}):
        assert config["abc"] == {"x": 1, "y": 2, "z": {"a": 3}}
    assert "abc" not in config

    d = {}
    set({"abc.x": 123}, config=d)
    assert d["abc"]["x"] == 123
示例#19
0
def test_tree_reduce_set_options():
    x = da.from_array(np.arange(242).reshape((11, 22)), chunks=(3, 4))
    with config.set(split_every={0: 2, 1: 3}):
        assert_max_deps(x.sum(), 2 * 3)
        assert_max_deps(x.sum(axis=0), 2)
示例#20
0
def test_set_hard_to_copyables():
    import threading
    with set(x=threading.Lock()):
        with set(y=1):
            pass
示例#21
0
文件: base.py 项目: jakirkham/dask
def annotate(**annotations):
    """Context Manager for setting HighLevelGraph Layer annotations.

    Annotations are metadata or soft constraints associated with
    tasks that dask schedulers may choose to respect: They signal intent
    without enforcing hard constraints. As such, they are
    primarily designed for use with the distributed scheduler.

    Almost any object can serve as an annotation, but small Python objects
    are preferred, while large objects such as NumPy arrays are discouraged.

    Callables supplied as an annotation should take a single *key* argument and
    produce the appropriate annotation. Individual task keys in the annotated collection
    are supplied to the callable.

    Parameters
    ----------
    **annotations : key-value pairs

    Examples
    --------

    All tasks within array A should have priority 100 and be retried 3 times
    on failure.

    >>> import dask
    >>> import dask.array as da
    >>> with dask.annotate(priority=100, retries=3):
    ...     A = da.ones((10000, 10000))

    Prioritise tasks within Array A on flattened block ID.

    >>> nblocks = (10, 10)
    >>> with dask.annotate(priority=lambda k: k[1]*nblocks[1] + k[2]):
    ...     A = da.ones((1000, 1000), chunks=(100, 100))

    Annotations may be nested.

    >>> with dask.annotate(priority=1):
    ...     with dask.annotate(retries=3):
    ...         A = da.ones((1000, 1000))
    ...     B = A + 1
    """

    # Sanity check annotations used in place of
    # legacy distributed Client.{submit, persist, compute} keywords
    if "workers" in annotations:
        if isinstance(annotations["workers"], (list, set, tuple)):
            annotations["workers"] = list(annotations["workers"])
        elif isinstance(annotations["workers"], str):
            annotations["workers"] = [annotations["workers"]]
        elif callable(annotations["workers"]):
            pass
        else:
            raise TypeError(
                "'workers' annotation must be a sequence of str, a str or a callable, but got %s."
                % annotations["workers"]
            )

    if (
        "priority" in annotations
        and not isinstance(annotations["priority"], Number)
        and not callable(annotations["priority"])
    ):
        raise TypeError(
            "'priority' annotation must be a Number or a callable, but got %s"
            % annotations["priority"]
        )

    if (
        "retries" in annotations
        and not isinstance(annotations["retries"], Number)
        and not callable(annotations["retries"])
    ):
        raise TypeError(
            "'retries' annotation must be a Number or a callable, but got %s"
            % annotations["retries"]
        )

    if (
        "resources" in annotations
        and not isinstance(annotations["resources"], dict)
        and not callable(annotations["resources"])
    ):
        raise TypeError(
            "'resources' annotation must be a dict, but got %s"
            % annotations["resources"]
        )

    if (
        "allow_other_workers" in annotations
        and not isinstance(annotations["allow_other_workers"], bool)
        and not callable(annotations["allow_other_workers"])
    ):
        raise TypeError(
            "'allow_other_workers' annotations must be a bool or a callable, but got %s"
            % annotations["allow_other_workers"]
        )

    prev_annotations = config.get("annotations", {})
    new_annotations = {
        **prev_annotations,
        **{f"annotations.{k}": v for k, v in annotations.items()},
    }

    with config.set(new_annotations):
        yield
示例#22
0
def test_tree_reduce_set_options():
    x = da.from_array(np.arange(242).reshape((11, 22)), chunks=(3, 4))
    with config.set(split_every={0: 2, 1: 3}):
        assert_max_deps(x.sum(), 2 * 3)
        assert_max_deps(x.sum(axis=()), 1)
        assert_max_deps(x.sum(axis=0), 2)
示例#23
0
def setup_temp_root(**kwargs):
    """ Setup a temporary file root for testing purposes. """
    path = tempfile.mkdtemp(**kwargs)
    config.set({"geomodeling.root": path})
    return path
示例#24
0
    def generate_scripts(self):
        self.log_file.write("Reading from: \t" + self.cf.smx_path)
        self.log_file.write("Output folder: \t" + self.cf.output_path)
        self.log_file.write("SMX files:")
        print("Reading from: \t" + self.cf.smx_path)
        print("Output folder: \t" + self.cf.output_path)
        print("Scripts to be generated: \t" + self.scripts_flag)
        print("SMX files:")
        filtered_sources = []
        self.start_time = dt.datetime.now()

        try:
            smx_files = funcs.get_smx_files(self.cf.smx_path, self.smx_ext,
                                            self.sheets)
            for smx in smx_files:
                try:
                    self.count_smx = self.count_smx + 1
                    smx_file_path = self.cf.smx_path + "/" + smx
                    smx_file_name = os.path.splitext(smx)[0]
                    print("\t" + smx_file_name)
                    self.log_file.write("\t" + smx_file_name)
                    home_output_path = self.cf.output_path + "/" + smx_file_name

                    # self.parallel_remove_output_home_path.append(delayed(md.remove_folder)(home_output_path))
                    self.parallel_create_output_home_path.append(
                        delayed(md.create_folder)(home_output_path))

                    # COPY SMX USED INTO PATH OF ITS UDI SCRIPTS
                    smx_file_path_destination = os.path.join(
                        home_output_path, "USED_SMX_FILE")
                    self.parallel_create_smx_copy_path.append(
                        delayed(md.create_folder)(smx_file_path_destination))
                    smx_file_path_destination += '/' + smx_file_name + '.xlsx'
                    self.parallel_used_smx_copy.append(
                        delayed(shutil.copy)(smx_file_path,
                                             smx_file_path_destination))

                    self.parallel_templates.append(
                        delayed(gcfr.gcfr)(self.cf, home_output_path))
                    ##################################### end of read_smx_folder ################################
                    if self.cf.source_names:
                        System_sht_filter = [[
                            'Source system name', self.cf.source_names
                        ]]
                    else:
                        System_sht_filter = None

                    System = funcs.read_excel(smx_file_path,
                                              sheet_name=self.System_sht)
                    teradata_sources = System[System['Source type'] ==
                                              'TERADATA']
                    teradata_sources = funcs.df_filter(teradata_sources,
                                                       System_sht_filter,
                                                       False)
                    self.count_sources = self.count_sources + len(
                        teradata_sources.index)

                    Supplements = delayed(funcs.read_excel)(
                        smx_file_path, sheet_name=self.Supplements_sht)
                    Data_types = delayed(funcs.read_excel)(
                        smx_file_path, sheet_name=self.Data_types_sht)
                    Column_mapping = delayed(funcs.read_excel)(
                        smx_file_path, sheet_name=self.Column_mapping_sht)
                    BMAP_values = delayed(funcs.read_excel)(
                        smx_file_path, sheet_name=self.BMAP_values_sht)
                    BMAP = delayed(funcs.read_excel)(smx_file_path,
                                                     sheet_name=self.BMAP_sht)
                    BKEY = delayed(funcs.read_excel)(smx_file_path,
                                                     sheet_name=self.BKEY_sht)
                    Core_tables = delayed(funcs.read_excel)(
                        smx_file_path, sheet_name=self.Core_tables_sht)
                    Core_tables = delayed(funcs.rename_sheet_reserved_word)(
                        Core_tables, Supplements, 'TERADATA',
                        ['Column name', 'Table name'])
                    RI_relations = delayed(funcs.read_excel)(
                        smx_file_path, sheet_name=self.RI_relations_sht)
                    ##################################### end of read_smx_sheet ################################

                    for system_index, system_row in teradata_sources.iterrows(
                    ):
                        try:
                            Loading_Type = system_row['Loading type'].upper()
                            if Loading_Type != "":
                                source_name = system_row['Source system name']

                                filtered_sources.append(source_name)

                                source_name_filter = [[
                                    'Source', [source_name]
                                ]]
                                core_layer_filter = [['Layer', ["CORE"]]]
                                stg_layer_filter = [['Layer', ["STG"]]]
                                stg_source_name_filter = [[
                                    'Source system name', [source_name]
                                ]]

                                Table_mapping = delayed(funcs.read_excel)(
                                    smx_file_path, self.Table_mapping_sht,
                                    source_name_filter)

                                core_Table_mapping = delayed(funcs.df_filter)(
                                    Table_mapping, core_layer_filter, False)
                                stg_Table_mapping = delayed(funcs.df_filter)(
                                    Table_mapping, stg_layer_filter, False)

                                STG_tables = delayed(funcs.read_excel)(
                                    smx_file_path, self.STG_tables_sht,
                                    stg_source_name_filter)
                                STG_tables_export = delayed(funcs.read_excel)(
                                    smx_file_path, self.STG_tables_sht,
                                    stg_source_name_filter)

                                STG_tables = delayed(
                                    funcs.rename_sheet_reserved_word)(
                                        STG_tables, Supplements, 'TERADATA',
                                        ['Column name', 'Table name'])

                                main_output_path = home_output_path + "/" + Loading_Type + "/" + source_name
                                source_output_path = os.path.join(
                                    main_output_path, "UDI")
                                source_smx_output_path = os.path.join(
                                    source_output_path, "Source smx")

                                output_path_testing = os.path.join(
                                    main_output_path, "TestCases_scripts")
                                process_check_output_path_testing = os.path.join(
                                    output_path_testing,
                                    "PROCESS_CHECK_Cases_scripts")
                                cso_output_path_testing = os.path.join(
                                    output_path_testing, "CSO_Cases_scripts")
                                nulls_output_path_testing = os.path.join(
                                    output_path_testing, "NULLS_Cases_scripts")
                                duplicate_output_path_testing = os.path.join(
                                    output_path_testing,
                                    "DUPLICATE_Cases_scripts")
                                data_src_output_path_testing = os.path.join(
                                    output_path_testing,
                                    "DATA_SRC_Cases_scripts")
                                bmaps_output_path_testing = os.path.join(
                                    output_path_testing, "BMAPS_Cases_scripts")
                                history_output_path_testing = os.path.join(
                                    output_path_testing,
                                    "HISTORY_Cases_scripts")
                                ri_output_path_testing = os.path.join(
                                    output_path_testing, "RI_Cases_scripts")
                                input_view_output_path_testing = os.path.join(
                                    output_path_testing,
                                    "SMX QUALITY TESTING SCRIPTS")
                                compare_stg_counts_output_path_testing = os.path.join(
                                    output_path_testing, "STG TESTING SCRIPTS")

                                self.parallel_create_output_source_path.append(
                                    delayed(
                                        md.create_folder)(main_output_path))

                                #UDI SCRIPTS
                                if 'UDI' in self.scripts_flag:
                                    self.parallel_create_output_source_path.append(
                                        delayed(md.create_folder)(
                                            source_output_path))
                                    self.parallel_templates.append(
                                        delayed(D000.d000)(self.cf,
                                                           source_output_path,
                                                           source_name,
                                                           core_Table_mapping,
                                                           STG_tables, BKEY))
                                    self.parallel_templates.append(
                                        delayed(D001.d001)(self.cf,
                                                           source_output_path,
                                                           source_name,
                                                           STG_tables))
                                    self.parallel_templates.append(
                                        delayed(D002.d002)(self.cf,
                                                           source_output_path,
                                                           Core_tables,
                                                           core_Table_mapping))
                                    self.parallel_templates.append(
                                        delayed(D003.d003)(self.cf,
                                                           source_output_path,
                                                           source_name,
                                                           STG_tables,
                                                           BMAP_values, BMAP))

                                    self.parallel_templates.append(
                                        delayed(D110.d110)(self.cf,
                                                           source_output_path,
                                                           stg_Table_mapping,
                                                           STG_tables,
                                                           Loading_Type))

                                    self.parallel_templates.append(
                                        delayed(D200.d200)(self.cf,
                                                           source_output_path,
                                                           STG_tables,
                                                           Loading_Type))
                                    self.parallel_templates.append(
                                        delayed(D210.d210)(self.cf,
                                                           source_output_path,
                                                           STG_tables,
                                                           Loading_Type))

                                    self.parallel_templates.append(
                                        delayed(D215.d215)(self.cf,
                                                           source_output_path,
                                                           source_name,
                                                           system_row,
                                                           STG_tables))

                                    self.parallel_templates.append(
                                        delayed(D320.d320)(self.cf,
                                                           source_output_path,
                                                           STG_tables, BKEY))
                                    self.parallel_templates.append(
                                        delayed(D330.d330)(self.cf,
                                                           source_output_path,
                                                           STG_tables, BKEY))
                                    self.parallel_templates.append(
                                        delayed(D340.d340)(self.cf,
                                                           source_output_path,
                                                           STG_tables, BKEY))

                                    self.parallel_templates.append(
                                        delayed(D300.d300)(self.cf,
                                                           source_output_path,
                                                           STG_tables, BKEY))
                                    self.parallel_templates.append(
                                        delayed(D400.d400)(self.cf,
                                                           source_output_path,
                                                           STG_tables))
                                    self.parallel_templates.append(
                                        delayed(D410.d410)(self.cf,
                                                           source_output_path,
                                                           STG_tables))
                                    self.parallel_templates.append(
                                        delayed(D415.d415)(self.cf,
                                                           source_output_path,
                                                           STG_tables))
                                    self.parallel_templates.append(
                                        delayed(D420.d420)(self.cf,
                                                           source_output_path,
                                                           STG_tables, BKEY,
                                                           BMAP, Loading_Type))

                                    self.parallel_templates.append(
                                        delayed(D600.d600)(self.cf,
                                                           source_output_path,
                                                           core_Table_mapping,
                                                           Core_tables))
                                    self.parallel_templates.append(
                                        delayed(D607.d607)(self.cf,
                                                           source_output_path,
                                                           Core_tables,
                                                           BMAP_values))
                                    self.parallel_templates.append(
                                        delayed(D608.d608)(self.cf,
                                                           source_output_path,
                                                           source_name,
                                                           STG_tables,
                                                           Core_tables,
                                                           BMAP_values))
                                    self.parallel_templates.append(
                                        delayed(D609.d609)(self.cf,
                                                           source_output_path,
                                                           core_Table_mapping,
                                                           Core_tables))
                                    self.parallel_templates.append(
                                        delayed(D610.d610)(self.cf,
                                                           source_output_path,
                                                           core_Table_mapping,
                                                           STG_tables,
                                                           source_name))
                                    self.parallel_templates.append(
                                        delayed(D615.d615)(self.cf,
                                                           source_output_path,
                                                           Core_tables))
                                    self.parallel_templates.append(
                                        delayed(D620.d620)(
                                            self.cf, source_output_path,
                                            core_Table_mapping, Column_mapping,
                                            Core_tables, Loading_Type, 'UDI',
                                            STG_tables))
                                    self.parallel_templates.append(
                                        delayed(D630.d630)(self.cf,
                                                           source_output_path,
                                                           core_Table_mapping))
                                    self.parallel_templates.append(
                                        delayed(D640.d640)(self.cf,
                                                           source_output_path,
                                                           source_name,
                                                           core_Table_mapping))

                                #TESTING SCRIPTS
                                if 'Testing' in self.scripts_flag:
                                    #CREATING  PATHS FOR THE OUTPUT SCRIPTS
                                    self.parallel_create_output_source_path.append(
                                        delayed(md.create_folder)(
                                            output_path_testing))
                                    self.parallel_create_output_source_path.append(
                                        delayed(md.create_folder)(
                                            process_check_output_path_testing))
                                    self.parallel_create_output_source_path.append(
                                        delayed(md.create_folder)(
                                            cso_output_path_testing))
                                    self.parallel_create_output_source_path.append(
                                        delayed(md.create_folder)(
                                            nulls_output_path_testing))
                                    self.parallel_create_output_source_path.append(
                                        delayed(md.create_folder)(
                                            duplicate_output_path_testing))
                                    self.parallel_create_output_source_path.append(
                                        delayed(md.create_folder)(
                                            data_src_output_path_testing))
                                    self.parallel_create_output_source_path.append(
                                        delayed(md.create_folder)(
                                            bmaps_output_path_testing))
                                    self.parallel_create_output_source_path.append(
                                        delayed(md.create_folder)(
                                            history_output_path_testing))
                                    self.parallel_create_output_source_path.append(
                                        delayed(md.create_folder)(
                                            ri_output_path_testing))
                                    self.parallel_create_output_source_path.append(
                                        delayed(md.create_folder)(
                                            input_view_output_path_testing))
                                    self.parallel_create_output_source_path.append(
                                        delayed(md.create_folder)
                                        (compare_stg_counts_output_path_testing
                                         ))

                                    #
                                    self.parallel_templates.append(
                                        delayed(testing_script_01.
                                                source_testing_script)(
                                                    self.cf,
                                                    output_path_testing,
                                                    source_name,
                                                    core_Table_mapping,
                                                    Column_mapping, STG_tables,
                                                    BKEY))
                                    self.parallel_templates.append(
                                        delayed(testing_script_02.
                                                source_testing_script)(
                                                    self.cf,
                                                    output_path_testing,
                                                    source_name,
                                                    core_Table_mapping,
                                                    Core_tables))
                                    self.parallel_templates.append(
                                        delayed(PROCESS_CHECK_TEST_SHEET.
                                                process_check)
                                        (self.cf,
                                         process_check_output_path_testing,
                                         source_name, core_Table_mapping,
                                         Core_tables))
                                    self.parallel_templates.append(
                                        delayed(CSO_TEST_SHEET.cso_check)(
                                            self.cf, cso_output_path_testing,
                                            source_name, core_Table_mapping,
                                            Column_mapping))
                                    self.parallel_templates.append(
                                        delayed(NULLS_TEST_SHEET.nulls_check)(
                                            self.cf, nulls_output_path_testing,
                                            core_Table_mapping, Core_tables))
                                    self.parallel_templates.append(
                                        delayed(
                                            DUP_TEST_SHEET.duplicates_check)(
                                                self.cf,
                                                duplicate_output_path_testing,
                                                core_Table_mapping,
                                                Core_tables))
                                    self.parallel_templates.append(
                                        delayed(
                                            DATA_SRC_TEST_SHEET.data_src_check)
                                        (self.cf, data_src_output_path_testing,
                                         source_name, core_Table_mapping,
                                         Column_mapping))
                                    self.parallel_templates.append(
                                        delayed(
                                            BMAP_CHECK_TEST_SHEET.bmap_check)(
                                                self.cf,
                                                bmaps_output_path_testing,
                                                core_Table_mapping,
                                                Core_tables, BMAP_values))
                                    self.parallel_templates.append(
                                        delayed(BMAP_DUP_CD_TEST_SHEET.
                                                bmap_dup_check)(
                                                    self.cf,
                                                    bmaps_output_path_testing,
                                                    core_Table_mapping,
                                                    Core_tables, BMAP_values))
                                    self.parallel_templates.append(
                                        delayed(BMAP_DUP_DESC_TEST_SHEET.
                                                bmap_dup_desc_check)(
                                                    self.cf,
                                                    bmaps_output_path_testing,
                                                    core_Table_mapping,
                                                    Core_tables, BMAP_values))
                                    self.parallel_templates.append(
                                        delayed(BMAP_NULL_TEST_SHEET.
                                                bmap_null_check)(
                                                    self.cf,
                                                    bmaps_output_path_testing,
                                                    core_Table_mapping,
                                                    Core_tables, BMAP_values))
                                    self.parallel_templates.append(
                                        delayed(BMAP_UNMATCHED_TEST_SHEET.
                                                bmap_unmatched_values_check)(
                                                    self.cf,
                                                    bmaps_output_path_testing,
                                                    core_Table_mapping,
                                                    Core_tables, BMAP,
                                                    BMAP_values))
                                    self.parallel_templates.append(
                                        delayed(
                                            HIST_STRT_END_NULL_TEST_SHEET.
                                            hist_start_end_null_check)(
                                                self.cf,
                                                history_output_path_testing,
                                                core_Table_mapping,
                                                Core_tables))
                                    self.parallel_templates.append(
                                        delayed(
                                            HIST_DUP_TEST_SHEET.hist_dup_check)
                                        (self.cf, history_output_path_testing,
                                         core_Table_mapping, Core_tables))
                                    self.parallel_templates.append(
                                        delayed(
                                            HIST_STRT_GRT_END_TEST_SHEET.
                                            hist_start_end_null_check)(
                                                self.cf,
                                                history_output_path_testing,
                                                core_Table_mapping,
                                                Core_tables))
                                    self.parallel_templates.append(
                                        delayed(
                                            HIST_TIME_GAP_TEST_SHEET.
                                            hist_timegap_check)(
                                                self.cf,
                                                history_output_path_testing,
                                                core_Table_mapping,
                                                Core_tables))
                                    self.parallel_templates.append(
                                        delayed(
                                            HIST_STRT_NULL_TEST_SHEET.
                                            hist_start_null_check)(
                                                self.cf,
                                                history_output_path_testing,
                                                core_Table_mapping,
                                                Core_tables))
                                    self.parallel_templates.append(
                                        delayed(RI_TEST_SHEET.ri_check)(
                                            self.cf, ri_output_path_testing,
                                            core_Table_mapping, RI_relations))
                                    self.parallel_templates.append(
                                        delayed(D620.d620)(
                                            self.cf,
                                            input_view_output_path_testing,
                                            core_Table_mapping, Column_mapping,
                                            Core_tables, Loading_Type,
                                            'TESTING', STG_tables))
                                    self.parallel_templates.append(
                                        delayed(
                                            compare_testing_inputview.
                                            compare_views_check)(
                                                self.cf,
                                                input_view_output_path_testing,
                                                core_Table_mapping,
                                                'FROM_TESTING_TO_UDI'))
                                    self.parallel_templates.append(
                                        delayed(
                                            compare_testing_inputview.
                                            compare_views_check)(
                                                self.cf,
                                                input_view_output_path_testing,
                                                core_Table_mapping,
                                                'FROM_UDI_TO_TESTING'))
                                    self.parallel_templates.append(
                                        delayed(stgCounts.stgCounts)
                                        (self.cf,
                                         compare_stg_counts_output_path_testing,
                                         system_row, STG_tables, Loading_Type,
                                         'Accepted'))
                                    self.parallel_templates.append(
                                        delayed(stgCounts.stgCounts)
                                        (self.cf,
                                         compare_stg_counts_output_path_testing,
                                         system_row, STG_tables, Loading_Type,
                                         'All'))
                                    self.parallel_templates.append(
                                        delayed(dataValidation.dataValidation)
                                        (self.cf,
                                         compare_stg_counts_output_path_testing,
                                         source_name, system_row, STG_tables,
                                         Loading_Type, 'Accepted'))
                                    self.parallel_templates.append(
                                        delayed(dataValidation.dataValidation)
                                        (self.cf,
                                         compare_stg_counts_output_path_testing,
                                         source_name, system_row, STG_tables,
                                         Loading_Type, 'All'))

                                # TESTING SCRIPTS
                                if 'Source smx' in self.scripts_flag:
                                    self.parallel_create_output_source_path.append(
                                        delayed(md.create_folder)(
                                            source_smx_output_path))
                                    self.parallel_templates.append(
                                        delayed(
                                            generate_source_smx.source_smx)(
                                                STG_tables_export,
                                                Table_mapping, Column_mapping,
                                                System, BKEY, BMAP,
                                                BMAP_values, Supplements,
                                                Core_tables, Data_types,
                                                source_smx_output_path))

                        except Exception as e_source:
                            # print(error)

                            # log: smx_file_name, source_name
                            print(system_row.to_dict())
                            funcs.SMXFilesLogError(
                                self.cf.output_path, smx,
                                str(system_row.to_dict()),
                                traceback.format_exc()).log_error()
                            self.count_sources = self.count_sources - 1

                except Exception as e_smx_file:
                    # print(error)
                    funcs.SMXFilesLogError(self.cf.output_path, smx, None,
                                           traceback.format_exc()).log_error()
                    self.count_smx = self.count_smx - 1

        except Exception as e1:
            # print(error)
            # traceback.print_exc()
            self.elapsed_time = dt.datetime.now() - self.start_time
            funcs.SMXFilesLogError(self.cf.output_path, None, None,
                                   traceback.format_exc()).log_error()

        if len(self.parallel_templates) > 0:
            sources = funcs.list_to_string(filtered_sources, ', ')
            print("Sources:", sources)
            self.log_file.write("Sources:" + sources)
            scheduler_value = 'processes' if self.cf.read_sheets_parallel == 1 else ''
            with config.set(scheduler=scheduler_value):
                compute(*self.parallel_create_output_home_path)
                compute(*self.parallel_create_smx_copy_path)
                compute(*self.parallel_used_smx_copy)
                compute(*self.parallel_create_output_source_path)
                compute(*self.parallel_templates)
            self.error_message = ""
        else:
            self.error_message = "No SMX Files Found!"

        with ProgressBar():
            smx_files = " smx files" if self.count_smx > 1 else " smx file"
            smx_file_sources = " sources" if self.count_sources > 1 else " source"
            print("Start generating " + str(len(self.parallel_templates)) +
                  " script for " + str(self.count_sources) + smx_file_sources +
                  " from " + str(self.count_smx) + smx_files)
            self.log_file.write(
                str(len(self.parallel_templates)) + " script generated for " +
                str(self.count_sources) + smx_file_sources + " from " +
                str(self.count_smx) + smx_files)
            self.elapsed_time = dt.datetime.now() - self.start_time
            self.log_file.write("Elapsed Time: " + str(self.elapsed_time))

        if sys.platform == "win32":
            os.startfile(self.cf.output_path)
        else:
            opener = "open" if sys.platform == "darwin" else "xdg-open"
            subprocess.call([opener, self.cf.output_path])

        self.log_file.close()
示例#25
0
def test_set_hard_to_copyables():
    import threading

    with set(x=threading.Lock()):
        with set(y=1):
            pass
示例#26
0
文件: base.py 项目: yjx520/simpeg
def dask_linear_operator(self):
    self.nC = self.modelMap.shape[0]

    n_data_comp = len(self.survey.components)
    components = np.array(list(self.survey.components.keys()))
    active_components = np.hstack(
        [np.c_[values] for values in self.survey.components.values()]
    ).tolist()

    row = delayed(self.evaluate_integral, pure=True)
    rows = [
        array.from_delayed(
            row(receiver_location, components[component]),
            dtype=np.float32,
            shape=(n_data_comp, self.nC),
        )
        for receiver_location, component in zip(
            self.survey.receiver_locations.tolist(), active_components
        )
    ]
    stack = array.vstack(rows)

    # Chunking options
    if self.chunk_format == "row" or self.store_sensitivities == "forward_only":
        config.set({"array.chunk-size": f"{self.max_chunk_size}MiB"})
        # Autochunking by rows is faster and more memory efficient for
        # very large problems sensitivty and forward calculations
        stack = stack.rechunk({0: "auto", 1: -1})

    elif self.chunk_format == "equal":
        # Manual chunks for equal number of blocks along rows and columns.
        # Optimal for Jvec and Jtvec operations
        row_chunk, col_chunk = compute_chunk_sizes(*stack.shape, self.max_chunk_size)
        stack = stack.rechunk((row_chunk, col_chunk))
    else:
        # Auto chunking by columns is faster for Inversions
        config.set({"array.chunk-size": f"{self.max_chunk_size}MiB"})
        stack = stack.rechunk({0: -1, 1: "auto"})

    if self.store_sensitivities == "disk":
        sens_name = self.sensitivity_path + "sensitivity.zarr"
        if os.path.exists(sens_name):
            kernel = array.from_zarr(sens_name)
            if np.all(
                np.r_[
                    np.any(np.r_[kernel.chunks[0]] == stack.chunks[0]),
                    np.any(np.r_[kernel.chunks[1]] == stack.chunks[1]),
                    np.r_[kernel.shape] == np.r_[stack.shape],
                ]
            ):
                # Check that loaded kernel matches supplied data and mesh
                print("Zarr file detected with same shape and chunksize ... re-loading")
                return kernel
        else:
            print("Writing Zarr file to disk")
            with ProgressBar():
                print("Saving kernel to zarr: " + sens_name)
                kernel = array.to_zarr(
                    stack, sens_name, compute=True, return_stored=True, overwrite=True
                )
    elif self.store_sensitivities == "forward_only":
        with ProgressBar():
            print("Forward calculation: ")
            pred = (stack @ self.model).compute()
        return pred
    else:
        print(stack.chunks)
        with ProgressBar():
            print("Computing sensitivities to local ram")
            kernel = array.asarray(stack.compute())
    return kernel
示例#27
0
def test_dask_setconfig():
    dask_config.set({"sql.foo.bar": 1})
    with dask_config.set({"sql.foo.baz": "2"}):
        assert dask_config.get("sql.foo") == {"bar": 1, "baz": "2"}
    assert dask_config.get("sql.foo") == {"bar": 1}
    dask_config.refresh()
示例#28
0
    def generate_scripts(self):
        self.log_file.write("Reading from: \t" + self.cf.smx_path)
        self.log_file.write("Output folder: \t" + self.cf.output_path)
        self.log_file.write("SMX files:")
        print("Reading from: \t" + self.cf.smx_path)
        print("Output folder: \t" + self.cf.output_path)
        print("SMX files:")
        filtered_sources = []
        self.start_time = dt.datetime.now()

        try:
            smx_files = funcs.get_smx_files(self.cf.smx_path, self.smx_ext,
                                            self.staging_sheets,
                                            self.smx_sheets,
                                            self.scripts_generation_flag)
            for smx in smx_files:
                try:
                    self.count_smx = self.count_smx + 1
                    self.count_sources = 1
                    smx_file_path = self.cf.smx_path + "/" + smx
                    smx_file_name = os.path.splitext(smx)[0]
                    print("\t" + smx_file_name)
                    self.log_file.write("\t" + smx_file_name)
                    home_output_path = self.cf.output_path + "/" + smx_file_name + "/"
                    self.parallel_create_output_home_path.append(
                        delayed(md.create_folder)(home_output_path))
                    if self.scripts_generation_flag == 'Staging Tables':
                        main_output_path = home_output_path + "/" + "DDLs"
                        bteq_stg_dm_scripts_output_path = home_output_path + "/" + "BTEQ_Scrtipts" + "/" + "BTEQ_STG_TO_DATAMARAT_SCRIPTS"
                        bteq_stg_oi_scripts_output_path = home_output_path + "/" + "BTEQ_Scrtipts" + "/" + "BTEQ_STG_TO_OI_SCRIPTS"
                        self.parallel_create_output_source_path.append(
                            delayed(md.create_folder)(main_output_path))
                        self.parallel_create_output_source_path.append(
                            delayed(md.create_folder)(
                                bteq_stg_dm_scripts_output_path))
                        self.parallel_create_output_source_path.append(
                            delayed(md.create_folder)(
                                bteq_stg_oi_scripts_output_path))
                        Data_Types = delayed(funcs.read_excel)(
                            smx_file_path, sheet_name=self.Data_types_sht)
                        STG_tables = delayed(funcs.read_excel)(
                            smx_file_path, sheet_name=self.STG_tables_sht)
                        self.parallel_templates.append(
                            delayed(Staging_DDL.stg_temp_DDL)(self.cf,
                                                              main_output_path,
                                                              STG_tables,
                                                              Data_Types,
                                                              'Staging'))
                        self.parallel_templates.append(
                            delayed(Staging_DDL.stg_temp_DDL)(self.cf,
                                                              main_output_path,
                                                              STG_tables,
                                                              Data_Types,
                                                              'Data_mart'))
                        self.parallel_templates.append(
                            delayed(Staging_DDL.stg_temp_DDL)(self.cf,
                                                              main_output_path,
                                                              STG_tables,
                                                              Data_Types,
                                                              'OI_staging'))
                        self.parallel_templates.append(
                            delayed(Staging_DDL.stg_temp_DDL)(self.cf,
                                                              main_output_path,
                                                              STG_tables,
                                                              Data_Types,
                                                              'UV_staging'))
                        self.parallel_templates.append(
                            delayed(Staging_DDL.stg_temp_DDL)(self.cf,
                                                              main_output_path,
                                                              STG_tables,
                                                              Data_Types,
                                                              'LOG_staging'))

                        self.parallel_templates.append(
                            delayed(BTEQ_Scripts.bteq_temp_script)(
                                self.cf, bteq_stg_dm_scripts_output_path,
                                STG_tables, 'from stg to datamart'))
                        self.parallel_templates.append(
                            delayed(BTEQ_Scripts.bteq_temp_script)(
                                self.cf, bteq_stg_oi_scripts_output_path,
                                STG_tables, 'from stg to oi'))
                    elif self.scripts_generation_flag == 'SMX':
                        main_output_path_apply = home_output_path + "/" + "APPLY_SCRIPTS"
                        main_output_path_sgk = home_output_path + "/" + "SGK"
                        main_output_path_TFN = home_output_path + "/" + "TFN"
                        secondary_output_path_TFN = home_output_path + "/" + "SPECIAL_ATTENTION" + "/" + "TFN"
                        secondary_output_path_HIST = home_output_path + "/" + "SPECIAL_ATTENTION"
                        historyLegacy_subsequent_histLoads_path = home_output_path + "/" + "APPLY_SCRIPTS" + "/" + "Apply_History_Legacy/SUBSEQUENT_LOADS"
                        #historyLegacy_subsequent_histLoads_path_secondary = home_output_path + "/" + "SPECIAL_ATTENTION"
                        source_name = self.cf.sgk_source
                        self.parallel_create_output_source_path.append(
                            delayed(md.create_folder)(main_output_path_apply))
                        self.parallel_create_output_source_path.append(
                            delayed(md.create_folder)(main_output_path_sgk))
                        self.parallel_create_output_source_path.append(
                            delayed(md.create_folder)(main_output_path_TFN))
                        self.parallel_create_output_source_path.append(
                            delayed(
                                md.create_folder)(secondary_output_path_TFN))
                        self.parallel_create_output_source_path.append(
                            delayed(
                                md.create_folder)(secondary_output_path_HIST))
                        self.parallel_create_output_source_path.append(
                            delayed(md.create_folder)(
                                historyLegacy_subsequent_histLoads_path))
                        smx_sheet = delayed(funcs.read_excel)(
                            smx_file_path, sheet_name=self.smx_sheet)

                        if source_name != 'ALL':
                            smx_sheet = smx_sheet[smx_sheet['Stg_Schema'] ==
                                                  source_name]
                        Rid_list = self.cf.Rid_List

                        print("RIDLIST", Rid_list)
                        if not Rid_list:
                            smx_sheet = smx_sheet
                        else:
                            smx_sheet = smx_sheet[smx_sheet.Record_ID.isin(
                                Rid_list)]

                        hist_legacy_subsheet = funcs.histLegacy_To_hist_for_subsequent_runs_df(
                            smx_sheet)

                        self.parallel_templates.append(
                            delayed(History_Legacy_Apply.history_legacy_apply)(
                                self.cf, main_output_path_apply,
                                secondary_output_path_HIST, smx_sheet))
                        self.parallel_templates.append(
                            delayed(History_Apply.history_apply)(
                                self.cf, main_output_path_apply,
                                secondary_output_path_HIST,
                                hist_legacy_subsheet, False))

                        self.parallel_templates.append(
                            delayed(Apply_Insert_Upsert.apply_insert_upsert)(
                                self.cf, main_output_path_apply, smx_sheet,
                                "Apply_Insert"))
                        self.parallel_templates.append(
                            delayed(Apply_Insert_Upsert.apply_insert_upsert)(
                                self.cf, main_output_path_apply, smx_sheet,
                                "Apply_Upsert"))
                        self.parallel_templates.append(
                            delayed(Apply_Insert_Upsert.apply_insert_upsert)(
                                self.cf, main_output_path_apply, smx_sheet,
                                "Apply_Delete_Insert"))
                        self.parallel_templates.append(
                            delayed(History_Apply.history_apply)(
                                self.cf, main_output_path_apply,
                                secondary_output_path_HIST, smx_sheet, True))

                        self.parallel_templates.append(
                            delayed(History_Delete_Insert_Apply.
                                    history_delete_insert_apply)(
                                        self.cf, main_output_path_apply,
                                        secondary_output_path_HIST, smx_sheet))
                        self.parallel_templates.append(
                            delayed(SGK_insertion.sgk_insertion)(
                                self.cf, main_output_path_sgk, smx_sheet))
                        self.parallel_templates.append(
                            delayed(TFN_insertion.TFN_insertion)(
                                self.cf, main_output_path_TFN,
                                secondary_output_path_TFN, smx_sheet))

                except Exception as e_smx_file:
                    # print(error)
                    funcs.SMXFilesLogError(self.cf.output_path, smx, None,
                                           traceback.format_exc()).log_error()
                    self.count_smx = self.count_smx - 1
        except Exception as e1:
            self.elapsed_time = dt.datetime.now() - self.start_time
            funcs.SMXFilesLogError(self.cf.output_path, None, None,
                                   traceback.format_exc()).log_error()

        if len(self.parallel_templates) > 0:
            sources = funcs.list_to_string(filtered_sources, ', ')
            print("Sources:", sources)
            self.log_file.write("Sources:" + sources)
            scheduler_value = 'processes' if self.cf.read_sheets_parallel == 1 else ''
            with config.set(scheduler=scheduler_value):
                compute(*self.parallel_create_output_home_path)
                compute(*self.parallel_create_output_source_path)
            self.error_message = ""
        else:
            self.error_message = "No SMX Files Found!"

        with ProgressBar():
            smx_files = " smx files" if self.count_smx > 1 else " smx file"
            smx_file_sources = " sources" if self.count_sources > 1 else " source"
            print("Start generating " + str(len(self.parallel_templates)) +
                  " script for " + str(self.count_sources) + smx_file_sources +
                  " from " + str(self.count_smx) + smx_files)
            compute(*self.parallel_templates)
            self.log_file.write(
                str(len(self.parallel_templates)) + " script generated for " +
                str(self.count_sources) + smx_file_sources + " from " +
                str(self.count_smx) + smx_files)
            self.elapsed_time = dt.datetime.now() - self.start_time
            self.log_file.write("Elapsed Time: " + str(self.elapsed_time))

        if sys.platform == "win32":
            os.startfile(self.cf.output_path)
        else:
            opener = "open" if sys.platform == "darwin" else "xdg-open"
            subprocess.call([opener, self.cf.output_path])

        self.log_file.close()
示例#29
0
    def test_safe_file_url(self):
        f = utils.safe_file_url
        if not sys.platform.startswith("win"):
            # prepends file:// if necessary
            assert f("/tmp") == "file:///tmp"
            assert f("/tmp", "/") == "file:///tmp"

            # absolute input
            assert f("file:///tmp") == "file:///tmp"
            assert f("file:///tmp", "/") == "file:///tmp"
            assert f("file://tmp", "/") == "file:///tmp"

            # relative input
            assert f("path", "/tmp/abs") == "file:///tmp/abs/path"
            assert f("../abs/path", "/tmp/abs") == "file:///tmp/abs/path"

            # raise on unknown protocol
            with pytest.raises(NotImplementedError):
                f("unknown://tmp")

            # paths outside of 'start'
            assert f("file://../x", "/tmp") == "file:///x"
            assert f("/etc/abs", "/tmp") == "file:///etc/abs"
            assert f("../", "/tmp") == "file:///"

            # raise on path outside start when strict-file-paths=True
            with config.set({"geomodeling.strict-file-paths": True}):
                with pytest.raises(IOError):
                    f("file://../x", "/tmp")
                with pytest.raises(IOError):
                    f("/etc/abs", "/tmp")
                with pytest.raises(IOError):
                    f("../", "/tmp")
        else:
            # prepends file:// if necessary
            assert f("C:\\tmp") == "file://C:\\tmp"
            assert f("C:\\tmp", "C:\\") == "file://C:\\tmp"

            # absolute input
            assert f("file://C:\\tmp") == "file://C:\\tmp"
            assert f("file://C:\\tmp", "C:\\") == "file://C:\\tmp"
            assert f("file://tmp", "C:\\") == "file://C:\\tmp"

            # relative input
            assert f("path", "C:\\tmp\\abs") == "file://C:\\tmp\\abs\\path"
            assert f("..\\abs\\path", "C:\\tmp\\abs") == "file://C:\\tmp\\abs\\path"

            # raise on unknown protocol
            with pytest.raises(NotImplementedError):
                f("unknown://tmp")

            # paths outside of 'start'
            assert f("file://..\\x", "C:\\tmp") == "file://C:\\x"
            assert f("D:\\tmp", "C:\\tmp") == "file://D:\\tmp"
            assert f("..\\", "C:\\tmp") == "file://C:\\"

            # raise on path outside start when strict-file-paths=True
            with config.set({"geomodeling.strict-file-paths": True}):
                with pytest.raises(IOError):
                    f("file://..\\x", "C:\\tmp")
                with pytest.raises(IOError):
                    f("D:\\tmp", "C:\\tmp")
                with pytest.raises(IOError):
                    f("..\\", "C:\\tmp")
示例#30
0
        del X, Y

        X, Y = self.make_dataset(table=self.t, var='SB', additional=['L_MAX', 'LAYER', 'SOL_Z'])
        X, Y = self.duplicate_dataset(X, Y)
        self.SB = continuous.model('SB', X, Y, x, logger=logger, load_save=True)
        del X, Y

        X, Y = self.make_dataset(table=self.t, var='CS', additional=['L_MAX', 'LAYER', 'SOL_Z', 'SOL_SAND'])
        X, Y = self.duplicate_dataset(X, Y)
        x = self.modify_dataset(x, series=self.SOL_Z.Y_mod, name='SOL_SAND')
        self.CS = continuous.model('CS', X, Y, x, logger=logger, load_save=True)
        del X, Y

        X, Y = self.make_dataset(table=self.t, var='FS', additional=['L_MAX', 'LAYER', 'SOL_Z', 'SOL_SAND'])
        X, Y = self.duplicate_dataset(X, Y)
        self.FS = continuous.model('FS', X, Y, x, logger=logger, load_save=True)
        del X, Y, x

        self.write_results()

if __name__ == '__main__':
    from dask.diagnostics import ProgressBar
    from dask import config
    from multiprocessing import freeze_support
    freeze_support()
    pbar = ProgressBar()
    pbar.register()
    config.set(scheduler='processes')
    main(r"C:\Users\putzr\Documents\GitHub\sleepy\model\training.txt",
         r"C:\Users\putzr\Documents\GitHub\sleepy\model\modelling.txt").run()
    pbar.unregister()
示例#31
0
    def generate_scripts(self):
        self.log_file.write("Reading from: \t" + self.cf.smx_path)
        self.log_file.write("Output folder: \t" + self.cf.output_path)
        self.log_file.write("SMX files:")
        print("Reading from: \t" + self.cf.smx_path)
        print("Output folder: \t" + self.cf.output_path)
        print("SMX files:")
        filtered_sources = []
        self.start_time = dt.datetime.now()
        try:
            smx_files = funcs.get_smx_files(self.cf.smx_path, self.smx_ext,
                                            self.sheets)
            for smx in smx_files:
                try:
                    self.count_smx = self.count_smx + 1
                    smx_file_path = self.cf.smx_path + "/" + smx
                    smx_file_name = os.path.splitext(smx)[0]
                    print("\t" + smx_file_name)
                    self.log_file.write("\t" + smx_file_name)
                    home_output_path = self.cf.output_path + "/" + smx_file_name + "/"

                    # self.parallel_remove_output_home_path.append(delayed(md.remove_folder)(home_output_path))
                    self.parallel_create_output_home_path.append(
                        delayed(md.create_folder)(home_output_path))

                    self.parallel_templates.append(
                        delayed(gcfr.gcfr)(self.cf, home_output_path))
                    ##################################### end of read_smx_folder ################################
                    if self.cf.source_names:
                        System_sht_filter = [[
                            'Source system name', self.cf.source_names
                        ]]
                    else:
                        System_sht_filter = None

                    System = funcs.read_excel(smx_file_path,
                                              sheet_name=self.System_sht)
                    teradata_sources = System[System['Source type'] ==
                                              'TERADATA']
                    teradata_sources = funcs.df_filter(teradata_sources,
                                                       System_sht_filter,
                                                       False)
                    self.count_sources = self.count_sources + len(
                        teradata_sources.index)

                    Supplements = delayed(funcs.read_excel)(
                        smx_file_path, sheet_name=self.Supplements_sht)
                    Column_mapping = delayed(funcs.read_excel)(
                        smx_file_path, sheet_name=self.Column_mapping_sht)
                    BMAP_values = delayed(funcs.read_excel)(
                        smx_file_path, sheet_name=self.BMAP_values_sht)
                    BMAP = delayed(funcs.read_excel)(smx_file_path,
                                                     sheet_name=self.BMAP_sht)
                    BKEY = delayed(funcs.read_excel)(smx_file_path,
                                                     sheet_name=self.BKEY_sht)
                    Core_tables = delayed(funcs.read_excel)(
                        smx_file_path, sheet_name=self.Core_tables_sht)
                    Core_tables = delayed(funcs.rename_sheet_reserved_word)(
                        Core_tables, Supplements, 'TERADATA',
                        ['Column name', 'Table name'])
                    ##################################### end of read_smx_sheet ################################

                    for system_index, system_row in teradata_sources.iterrows(
                    ):
                        try:
                            Loading_Type = system_row['Loading type'].upper()
                            if Loading_Type != "":
                                source_name = system_row['Source system name']
                                filtered_sources.append(source_name)

                                source_name_filter = [[
                                    'Source', [source_name]
                                ]]
                                core_layer_filter = [['Layer', ["CORE"]]]
                                stg_layer_filter = [['Layer', ["STG"]]]
                                stg_source_name_filter = [[
                                    'Source system name', [source_name]
                                ]]

                                Table_mapping = delayed(funcs.read_excel)(
                                    smx_file_path, self.Table_mapping_sht,
                                    source_name_filter)

                                core_Table_mapping = delayed(funcs.df_filter)(
                                    Table_mapping, core_layer_filter, False)
                                stg_Table_mapping = delayed(funcs.df_filter)(
                                    Table_mapping, stg_layer_filter, False)

                                STG_tables = delayed(funcs.read_excel)(
                                    smx_file_path, self.STG_tables_sht,
                                    stg_source_name_filter)
                                STG_tables = delayed(
                                    funcs.rename_sheet_reserved_word)(
                                        STG_tables, Supplements, 'TERADATA',
                                        ['Column name', 'Table name'])

                                source_output_path = home_output_path + "/" + Loading_Type + "/" + source_name

                                self.parallel_create_output_source_path.append(
                                    delayed(
                                        md.create_folder)(source_output_path))

                                self.parallel_templates.append(
                                    delayed(D000.d000)(self.cf,
                                                       source_output_path,
                                                       source_name,
                                                       core_Table_mapping,
                                                       STG_tables, BKEY))
                                self.parallel_templates.append(
                                    delayed(D001.d001)(self.cf,
                                                       source_output_path,
                                                       source_name,
                                                       STG_tables))
                                self.parallel_templates.append(
                                    delayed(D002.d002)(self.cf,
                                                       source_output_path,
                                                       Core_tables,
                                                       core_Table_mapping))
                                self.parallel_templates.append(
                                    delayed(D003.d003)(self.cf,
                                                       source_output_path,
                                                       BMAP_values, BMAP))

                                self.parallel_templates.append(
                                    delayed(D110.d110)(self.cf,
                                                       source_output_path,
                                                       stg_Table_mapping,
                                                       STG_tables,
                                                       Loading_Type))

                                self.parallel_templates.append(
                                    delayed(D200.d200)(self.cf,
                                                       source_output_path,
                                                       STG_tables,
                                                       Loading_Type))
                                self.parallel_templates.append(
                                    delayed(D210.d210)(self.cf,
                                                       source_output_path,
                                                       STG_tables,
                                                       Loading_Type))

                                self.parallel_templates.append(
                                    delayed(D300.d300)(self.cf,
                                                       source_output_path,
                                                       STG_tables, BKEY))
                                self.parallel_templates.append(
                                    delayed(D320.d320)(self.cf,
                                                       source_output_path,
                                                       STG_tables, BKEY))
                                self.parallel_templates.append(
                                    delayed(D330.d330)(self.cf,
                                                       source_output_path,
                                                       STG_tables, BKEY))
                                self.parallel_templates.append(
                                    delayed(D340.d340)(self.cf,
                                                       source_output_path,
                                                       STG_tables, BKEY))

                                # self.parallel_templates.append(delayed(D400.d400)(self.cf, source_output_path, STG_tables))
                                # self.parallel_templates.append(delayed(D410.d410)(self.cf, source_output_path, STG_tables))
                                # self.parallel_templates.append(delayed(D415.d415)(self.cf, source_output_path, STG_tables))
                                self.parallel_templates.append(
                                    delayed(D420.d420)(self.cf,
                                                       source_output_path,
                                                       STG_tables, BKEY, BMAP,
                                                       Loading_Type))

                                self.parallel_templates.append(
                                    delayed(D600.d600)(self.cf,
                                                       source_output_path,
                                                       core_Table_mapping,
                                                       Core_tables))
                                self.parallel_templates.append(
                                    delayed(D607.d607)(self.cf,
                                                       source_output_path,
                                                       Core_tables,
                                                       BMAP_values))
                                self.parallel_templates.append(
                                    delayed(D608.d608)(self.cf,
                                                       source_output_path,
                                                       Core_tables,
                                                       BMAP_values))
                                self.parallel_templates.append(
                                    delayed(D610.d610)(self.cf,
                                                       source_output_path,
                                                       core_Table_mapping))
                                self.parallel_templates.append(
                                    delayed(D615.d615)(self.cf,
                                                       source_output_path,
                                                       Core_tables))
                                self.parallel_templates.append(
                                    delayed(D620.d620)(self.cf,
                                                       source_output_path,
                                                       core_Table_mapping,
                                                       Column_mapping,
                                                       Core_tables,
                                                       Loading_Type))
                                self.parallel_templates.append(
                                    delayed(D630.d630)(self.cf,
                                                       source_output_path,
                                                       core_Table_mapping))
                                self.parallel_templates.append(
                                    delayed(D640.d640)(self.cf,
                                                       source_output_path,
                                                       source_name,
                                                       core_Table_mapping))

                                self.parallel_templates.append(
                                    delayed(
                                        testing_script_01.source_testing_script
                                    )(self.cf, source_output_path, source_name,
                                      core_Table_mapping, Column_mapping,
                                      STG_tables, BKEY))
                                self.parallel_templates.append(
                                    delayed(
                                        testing_script_02.source_testing_script
                                    )(self.cf, source_output_path, source_name,
                                      Table_mapping, Core_tables))

                        except Exception as e_source:
                            # print(error)

                            # log: smx_file_name, source_name
                            print(system_row.to_dict())
                            funcs.SMXFilesLogError(
                                self.cf.output_path, smx,
                                str(system_row.to_dict()),
                                traceback.format_exc()).log_error()
                            self.count_sources = self.count_sources - 1

                except Exception as e_smx_file:
                    # print(error)
                    funcs.SMXFilesLogError(self.cf.output_path, smx, None,
                                           traceback.format_exc()).log_error()
                    self.count_smx = self.count_smx - 1

        except Exception as e1:
            # print(error)
            # traceback.print_exc()
            funcs.SMXFilesLogError(self.cf.output_path, None, None,
                                   traceback.format_exc()).log_error()

        if len(self.parallel_templates) > 0:
            sources = funcs.list_to_string(filtered_sources, ', ')
            print("Sources:", sources)
            self.log_file.write("Sources:" + sources)
            scheduler_value = 'processes' if self.cf.read_sheets_parallel == 1 else ''
            with config.set(scheduler=scheduler_value):
                # compute(*self.parallel_remove_output_home_path)
                compute(*self.parallel_create_output_home_path)
                compute(*self.parallel_create_output_source_path)

                with ProgressBar():
                    smx_files = " smx files" if self.count_smx > 1 else " smx file"
                    smx_file_sources = " sources" if self.count_sources > 1 else " source"
                    print("Start generating " +
                          str(len(self.parallel_templates)) + " script for " +
                          str(self.count_sources) + smx_file_sources +
                          " from " + str(self.count_smx) + smx_files)
                    compute(*self.parallel_templates)
                    self.log_file.write(
                        str(len(self.parallel_templates)) +
                        " script generated for " + str(self.count_sources) +
                        smx_file_sources + " from " + str(self.count_smx) +
                        smx_files)
                    self.elapsed_time = dt.datetime.now() - self.start_time
                    self.log_file.write("Elapsed Time: " +
                                        str(self.elapsed_time))
            self.error_message = ""
            os.startfile(self.cf.output_path)
        else:
            self.error_message = "No SMX Files Found!"

        self.log_file.close()
def test_get_context_always_default():
    """ On Python 2/Windows, get_context() always returns same context."""
    assert get_context() is multiprocessing
    with pytest.warns(UserWarning):
        with config.set({"multiprocessing.context": "forkserver"}):
            assert get_context() is multiprocessing
示例#33
0
    def sql(
        self,
        sql: str,
        return_futures: bool = True,
        dataframes: Dict[str, Union[dd.DataFrame, pd.DataFrame]] = None,
        gpu: bool = False,
        config_options: Dict[str, Any] = None,
    ) -> Union[dd.DataFrame, pd.DataFrame]:
        """
        Query the registered tables with the given SQL.
        The SQL follows approximately the postgreSQL standard - however, not all
        operations are already implemented.
        In general, only select statements (no data manipulation) works.

        For more information, see :ref:`sql`.

        Example:
            In this example, a query is called
            using the registered tables and then
            executed using dask.

            .. code-block:: python

                result = c.sql("SELECT a, b FROM my_table")
                print(result.compute())

        Args:
            sql (:obj:`str`): The query string to execute
            return_futures (:obj:`bool`): Return the unexecuted dask dataframe or the data itself.
                Defaults to returning the dask dataframe.
            dataframes (:obj:`Dict[str, dask.dataframe.DataFrame]`): additional Dask or pandas dataframes
                to register before executing this query
            gpu (:obj:`bool`): Whether or not to load the additional Dask or pandas dataframes (if any) on GPU;
                requires cuDF / dask-cuDF if enabled. Defaults to False.
            config_options (:obj:`Dict[str,Any]`): Specific configuration options to pass during
                query execution

        Returns:
            :obj:`dask.dataframe.DataFrame`: the created data frame of this query.

        """
        with dask_config.set(config_options):
            if dataframes is not None:
                for df_name, df in dataframes.items():
                    self.create_table(df_name, df, gpu=gpu)

            rel, select_names, _ = self._get_ral(sql)

            dc = RelConverter.convert(rel, context=self)

            if dc is None:
                return

            if select_names:
                # Rename any columns named EXPR$* to a more human readable name
                cc = dc.column_container
                cc = cc.rename({
                    df_col: select_name
                    for df_col, select_name in zip(cc.columns, select_names)
                })
                dc = DataContainer(dc.df, cc)

            df = dc.assign()
            if not return_futures:
                df = df.compute()

        return df
示例#34
0
def teardown_temp_root(path):
    """ Delete the temporary file root. """
    shutil.rmtree(path)
    config.set({"geomodeling.root": defaults["root"]})