def func():
        with worker_client() as ee:
            x = ee.submit(inc, 1, workers=a_address)
            y = ee.submit(inc, 2, workers=b_address)

            xx, yy = ee.gather([x, y])
        return xx, yy
 def f(i):
     with worker_client(separate_thread=False) as client:
         get_worker().count += 1
         assert get_worker().count <= 3
         sleep(random.random() / 40)
         assert get_worker().count <= 3
         get_worker().count -= 1
     return i
    def mysum():
        result = 0
        sub_tasks = [delayed(double)(i) for i in range(100)]

        with worker_client() as lc:
            futures = lc.compute(sub_tasks)
            for f in as_completed(futures):
                result += f.result()
        return result
示例#4
0
 def f(i):
     with worker_client() as c:
         q = Queue('x', client=c)
         for _ in range(100):
             future = q.get()
             x = future.result()
             y = c.submit(inc, x)
             q.put(y)
             sleep(0.01)
         result = q.get().result()
         return result
    def func():
        with worker_client() as c:
            correct = True
            for data in [[1, 2], (1, 2), {1, 2}]:
                futures = c.scatter(data)
                correct &= type(futures) == type(data)

            o = object()
            futures = c.scatter({'x': o})
            correct &= get_worker().data['x'] is o
            return correct
示例#6
0
 def f(i):
     with worker_client() as c:
         v = Variable('x', client=c)
         for _ in range(NITERS):
             future = v.get()
             x = future.result()
             y = c.submit(inc, x)
             v.set(y)
             sleep(0.01 * random.random())
         result = v.get().result()
         sleep(0.1)  # allow fire-and-forget messages to clear
         return result
    def func():
        with worker_client() as c:
            futures = c.scatter([1, 2, 3, 4, 5])
            assert isinstance(futures, (list, tuple))
            assert len(futures) == 5

            x = dict(get_worker().data)
            y = {f.key: i for f, i in zip(futures, [1, 2, 3, 4, 5])}
            assert x == y

            total = c.submit(sum, futures)
            return total.result()
示例#8
0
 def func():
     with worker_client() as c:
         x = np.ones(5)
         future = c.scatter(x)
         assert future.type == np.ndarray
 def f():
     with worker_client() as lc:
         return lc.loop is get_worker().loop
示例#10
0
    def full_func(*args, **kwargs):
        # global funcs_to_debug

        # funcs_to_debug = []
        print('DECORATOR distributed...', args)

        if not is_io_path(args[0]):
            return func(*args, **kwargs)

        # fileworker_address = '10.11.8.149:8795'
        # fileworker_address = '10.11.8.149'

        # mtimes = []
        # this should be processed on a fileworker
        # recursive_func_application_with_linear_output(list(args),get_mtime_from_path,mtimes)
        # try:

        from distributed import worker_client
        with worker_client(timeout=1000) as e:

            mtimes = e.submit(recursive_func_application_with_list_output,
                              *(list(args), get_mtime_from_path),
                              resources={
                                  'files': 1
                              }).result()
            # except:
            #     mtimes = recursive_func_application_with_list_output(list(args),get_mtime_from_path)

            # recursive_func_application_with_linear_output(list(args),get_mtime_from_path,mtimes)
            # print(mtimes)
            highest_mtime = np.array(mtimes[1:]).max()

            # print funcs_to_debug[0].orig_name
            # print func

            if not mtimes[0] == -1:
                # if os.path.exists(args[0]):
                #     if func.func_name not in [i.orig_name for i in funcs_to_debug]:
                #     if os.path.getmtime(args[0]) >= highest_mtime:

                if mtimes[0] >= highest_mtime:
                    return args[0]

            # print('here its calculating!')

            nargs = []
            for iarg, arg in enumerate(args):
                if not iarg: continue
                # try:
                # with worker_client() as e:
                res = e.submit(recursive_func_application,
                               *(arg, process_input_element),
                               resources={
                                   'files': 1
                               }).result()
                # except:
                #     res = recursive_func_application(arg,process_input_element)
                # print(arg,res)
                nargs.append(res)
                # nargs.append(recursive_func_application(arg,process_input_element))

            # print('la'+str(args))
            result = func(*nargs, **kwargs)
            # this should be processed on a fileworker
            # try:
            nresult = e.submit(process_output_element,
                               *(result, args[0]),
                               resources={
                                   'files': 1
                               }).result()
            # except:
            #     nresult = process_output_element(result,args[0])
        return nresult
    def f():
        with worker_client():
            pass

        return threading.current_thread() in get_worker().executor._threads
 def func(x):
     with worker_client() as wc:
         y = wc.submit(lambda: 1 + x)
         return wc.gather(y)
 def func():
     with worker_client(timeout=0) as wc:
         print("hello")
 def mysum():
     with worker_client() as c:
         with c.get_executor() as e:
             return sum(e.map(double, range(30)))
示例#15
0
 def f():
     with worker_client():
         return dask.delayed(lambda x: x)(1).compute()
示例#16
0
 def func(x):
     with worker_client() as wc:
         y = wc.submit(lambda: 1 + x)
         return wc.gather(y)
示例#17
0
 def func():
     with worker_client(timeout=0) as wc:
         print('hello')
示例#18
0
 def f(x):
     with worker_client() as c:
         return True
示例#19
0
def add(x, y):
    with distributed.worker_client():
        time.sleep(30 * 60)
    return x + y
示例#20
0
 def long(delay):
     with worker_client() as c:
         sleep(delay)
示例#21
0
    def run(self,
            matrices: Optional[Union[Union[str, Path], List[Path]]] = None,
            filepath_column: str = "filepath",
            **kwargs) -> List[Path]:
        """
        Invert the list of matrices provided.

        If running in the command line, this will lookup the prior step's produced
        manifest for matrice retrieval. If running in the workflow, uses the direct
        output of the prior step.

        Parameters
        ----------
        matrices: Optional[Union[Union[str, Path], List[Path]]]
            A path to a csv manifest to use or directly a list of paths of serialized
            arrays to invert.
            Default: self.step_local_staging_dir.parent / "mappedraw" / manifest.csv
        filepath_column: str
            If providing a path to a csv manifest, the column to use for matrices.
            Default: "filepath"

        Returns
        -------
        inverted: List[Path]
            The list of paths to the inverted matrices.
        """
        # Default matrices value
        if matrices is None:
            matrices = self.step_local_staging_dir.parent / "mappedraw" / "manifest.csv"

        # Get the matrices from the csv if provided a path
        if isinstance(matrices, (str, Path)):
            # Resolve the filepath and check for existance
            matrices = Path(matrices).resolve(strict=True)

            # Read csv
            raw_data = pd.read_csv(matrices)

            # Convert the specified column into a list of paths
            matrices = [Path(f) for f in raw_data[filepath_column]]

        # Storage dir
        inverted_dir = self.step_local_staging_dir / "inverted"

        # Connect to an executor
        with worker_client() as client:
            # Create random arrays
            futures = client.map(
                self._invert_array,
                matrices,
                [inverted_dir for i in range(len(matrices))],
            )

            # Blocking until all are done
            inversion_infos = client.gather(futures)

        # Configure manifest dataframe for storage tracking
        self.manifest = pd.DataFrame(index=range(len(matrices)),
                                     columns=["filepath"])
        for i, path in inversion_infos:
            self.manifest.at[i, "filepath"] = path

        # Save the manifest
        self.manifest.to_csv(self.step_local_staging_dir / "manifest.csv",
                             index=False)

        # Return list of paths
        return list(self.manifest["filepath"])
 def f():
     with worker_client() as lc:
         return lc.loop is get_worker().loop
示例#23
0
    def f():
        with worker_client():
            pass

        return threading.current_thread() in get_worker().executor._threads
 def f(x):
     with worker_client() as c:
         return True
示例#25
0
def _process_table_identifiers(
        pdf: DataFrame,
        dimension_combinations: Optional[List[List[str]]] = None,
        max_combination_length: int = 5) -> List[List[str]]:
    """
    Dask wrapper around extracting identifiers from a single sampled table (pdf).

    This method submits multiple sub-tasks to identify possible identifier combinations, waits for them to complete
    and returns one or more dimension combinations.

    Note that the `worker_client` call forces the task to secede from the Worker's thread-pool, therefore it does not
    block any other computations and cannot cause a deadlock while waiting for sub-tasks to finish.
    """
    with timed_block('[idparser] Computing number of rows took {:.3f} seconds',
                     logger, logging.DEBUG):
        num_rows = len(pdf)

    with timed_block('[idparser] Pruning columns took {:.3f} seconds', logger,
                     logging.DEBUG):
        # filter out columns that contain at least X% null values - null values can't be parts of the primary key
        columns = [
            col for col, count in pdf.count().compute().items()
            if count / num_rows >= NON_NULL_VALUES_RATIO
        ]

    with worker_client(separate_thread=True) as client:  # type: Client
        with timed_block(
                '[idparser] Generating combinations took {:.3f} seconds',
                logger, logging.DEBUG):
            # explore all possible dimension combinations if none are provided
            if dimension_combinations is None:
                all_possible_combinations = itertools.chain.from_iterable(
                    itertools.combinations(columns, i) for i in range(
                        1,
                        min(max_combination_length, len(columns)) + 1))
                generated_combinations: List[List[str]] = [
                    sorted(combination)
                    for combination in all_possible_combinations
                ]
            else:
                generated_combinations = dimension_combinations

        with timed_block(
                '[idparser] Waiting for all combination tasks took {:.3f} seconds',
                logger, logging.DEBUG):
            with timed_block(
                    '[idparser] Submitting all combination tasks took {:.3f} seconds',
                    logger, logging.DEBUG):
                # submit "per dimension combination" tasks
                futures = client.map(
                    lambda combination:
                    _process_possible_identifier_combination(pdf, combination),
                    generated_combinations,
                    key=[
                        f'comb_{combination}_{str(uuid4())}'
                        for combination in generated_combinations
                    ],
                    # priority=100,
                    # batch_size=32,
                    retries=2,
                )
            results = client.gather(futures)

    return [
        dimensions for dimensions, num_duplicates in results
        if num_duplicates == 0
    ]
 def func(x):
     with worker_client() as c:
         x = c.submit(inc, x)
         y = c.submit(double, x)
         result = x.result() + y.result()
         return result
示例#27
0
 def func():
     with worker_client() as c:
         x = np.ones(5)
         future = c.scatter(x)
         assert future.type == np.ndarray
 def f():
     with worker_client():
         return dask.delayed(lambda x: x)(1).compute()
示例#29
0
 def f():
     with worker_client() as lc:
         return lc.loop is lc.worker.loop
示例#30
0
 def func(x):
     with worker_client() as c:
         x = c.submit(inc, x)
         y = c.submit(double, x)
         result = x.result() + y.result()
         return result
示例#31
0
 def long(delay):
     with worker_client() as c:
         sleep(delay)
示例#32
0
 def mysum():
     with worker_client() as c:
         with c.get_executor() as e:
             return sum(e.map(double, range(30)))
示例#33
0
 def go(self):
     with worker_client() as wc:
         futures = [wc.submit(self.go_, pset_i=i, **pset) \
                    for i, pset in self.iterpsets()]
         futures = wc.gather(futures)
     return futures