Пример #1
0
    def _execute(self, future_or_delayed, *args, **kwargs):
        """
        Execute Delayed jobs using the distributed Client to get a future object. Save the key of the future object for later use.

        Args:
            future_or_delayed (dask.Delayed or dask.distributed.Future): dask task object to track using TethysJobs.
        """  # noqa: E501
        if not isinstance(future_or_delayed, Future) and not isinstance(future_or_delayed, Delayed):
            raise ValueError('Must pass a valid instance of Delayed or Future.')

        if isinstance(future_or_delayed, Delayed):
            future = self.client.compute(future_or_delayed)
        else:
            future = future_or_delayed

        self.key = future.key

        # NOTE: Job may not actually be running at this point, but we don't have another
        # way to know when the first node in the workflow starts running.
        self._status = 'RUN'
        self.start_time = timezone.now()

        # Send key to the dask scheduler so the scheduler knows which jobs to send status updates to Tethys.
        self.client.set_metadata(self.key, True)

        # Save updated attributes
        self.save()

        # Must use fire and forget to ensure job runs after the future goes out of scope.
        fire_and_forget(future)
Пример #2
0
    def _get_temp_file(self) -> None:
        """
        Creates self.File and writes self._DataFrame to it. File will always
        use gzip compression and end in .gz. Overwriting may cause a critical
        error and data corruption. Will wait for any previous file write to
        complete.
        """
        def _get_temp_file_1(
                temp_folder_location: str = self._temporary_folder_location,
                dataset_name: str = self._dataset_name,
                log: callable = self.log,
                client: dd.Client = self._client,
                DataFrame: object = self._DataFrame) -> None:
            try:
                file: object = NamedTemporaryFile(dir=temp_folder_location,
                                                  suffix="__" + dataset_name +
                                                  '.gz')
            except OSError as err:
                log(err, 'DEBUG')
                raise DatasetNameError(dataset_name)
            File: object = client.submit(DataFrame.to_parquet,
                                         file,
                                         compression='gzip')
            return File

        def _get_temp_file_2() -> None:
            self._File = self._File.result()
            self.log("File saved")

        if self._File is not None:
            dd.wait(self._File)
        self._File = self._client.submit(_get_temp_file_1)
        self._File.add_done_callback(_get_temp_file_2)
        dd.fire_and_forget(self._File)
Пример #3
0
def plugin_f_and_f(dump, plugin, params, user_pk):
    """
    Fire and forget plugin on dask
    """
    dask_client = Client(settings.DASK_SCHEDULER_URL)
    fire_and_forget(
        dask_client.submit(run_plugin, dump, plugin, params, user_pk))
Пример #4
0
    def submit(
        self,
        client: Client = None,
        scheduler_address: str = None,
        priority: int = None,
        resources: Dict[str, Any] = None,
        show_progress=False,
        **kwargs,
    ) -> None:

        if not priority:
            priority = self.priority

        if not resources:
            resources = self.resources

        if not client:
            client = Client(scheduler_address)

        self.scheduler_address = client.scheduler.address

        computation = client.compute(self.graph,
                                     retries=3,
                                     priority=priority,
                                     resources=resources)
        if show_progress:
            progress(computation)
        fire_and_forget(computation)
        if scheduler_address:
            client.close()
        return None
Пример #5
0
    def _get_data(self) -> None:
        """
        Retrieve self.DataFrame from connected database using self.sql and
        self.connection.
        """
        def _get_data_1(
                backup_folder_location: str = self._backup_folder_location,
                dataset_name: str = self._dataset_name,
                log: callable = self.log,
                sql: str = self._sql,
                connection: object = self._connection) -> dd.Future:
            file_location: str = os.path.join(backup_folder_location,
                                              dataset_name + '.gz')
            if not os.path.isfile(file_location):
                log("Querying database")
                DataFrame = pd.read_sql(sql, connection)
            else:
                log("Reading backup file")
                DataFrame = pd.read_parquet(backup_folder_location)
            return DataFrame

        def _get_data_2() -> None:
            self._DataFrame = self._DataFrame.result()
            self.log("DataFrame retrieved")
            if len(self._DataFrame.index) == 0:
                self.log("Query was empty", 'WARNING')

        if self._DataFrame is not None:
            dd.wait(self._DataFrame)
        self._DataFrame = self._client.submit(_get_data_1)
        self._DataFrame.add_done_callback(_get_data_2)
        dd.fire_and_forget(self._DataFrame)
Пример #6
0
    def _execute(self, future_or_delayed, *args, **kwargs):
        """
        Execute Delayed jobs using the distributed Client to get a future object. Save the key of the future object for later use.

        Args:
            future_or_delayed (dask.Delayed or dask.distributed.Future): dask task object to track using TethysJobs.
        """  # noqa: E501
        if not isinstance(future_or_delayed, Future) and not isinstance(
                future_or_delayed, Delayed):
            raise ValueError(
                'Must pass a valid instance of Delayed or Future.')

        if isinstance(future_or_delayed, Delayed):
            future = self.client.compute(future_or_delayed)
        else:
            future = future_or_delayed

        self.key = future.key

        # NOTE: Job may not actually be running at this point, but we don't have another
        # way to know when the first node in the workflow starts running.
        self._status = 'RUN'
        self.start_time = timezone.now()

        # Send key to the dask scheduler so the scheduler knows which jobs to send status updates to Tethys.
        self.client.set_metadata(self.key, True)

        # Save updated attributes
        self.save()

        # Must use fire and forget to ensure job runs after the future goes out of scope.
        fire_and_forget(future)
Пример #7
0
    def refit_task(project_id, model_id, selected_models: dict):
        docker = bool(os.getenv("IN_DOCKER", False))

        if docker:
            client = Client(dask_scheduler)
            fire_and_forget(client.submit(DaskTasks.set_prediction_estimators, int(project_id), int(model_id), selected_models))
        else:
            DaskTasks.set_prediction_estimators(int(project_id), int(model_id), selected_models)
Пример #8
0
def index_f_and_f(dump_pk, user_pk):
    """
    Run all plugin for a new index on dask
    """
    dask_client = Client(settings.DASK_SCHEDULER_URL)
    fire_and_forget(
        dask_client.submit(unzip_then_run, dump_pk, user_pk, settings.ELASTICSEARCH_URL)
    )
Пример #9
0
def plugin_f_and_f(dump, plugin, params):
    """
    Fire and forget plugin on dask
    """
    dask_client = Client(settings.DASK_SCHEDULER_URL)
    fire_and_forget(
        dask_client.submit(run_plugin, dump, plugin, settings.ELASTICSEARCH_URL, params)
    )
Пример #10
0
    def setup_task(project_id, dataset_id, pipeline_id):
        docker = bool(os.getenv("IN_DOCKER", False))

        if docker:
            client = Client(dask_scheduler)
            fire_and_forget(client.submit(DaskTasks.execute_task, int(project_id), int(dataset_id), int(pipeline_id)))
        else:
            DaskTasks.execute_task(int(project_id), int(dataset_id), int(pipeline_id))
Пример #11
0
def load_valid_tiles(ds, save=False):
    import imageio

    if save:
        dst_dir = create_dst_dir("mip")

    # generate thumbnails
    images = []
    for j, (_, ds_x) in enumerate(ds.groupby("tile_y")):
        for i, (_, tile) in enumerate(ds_x.groupby("tile_x")):
            print(f".. iter (i: {i}, j: {j})")

            uuid = tile.loc["488_1X"].values[0]
            # ignore missing tiles
            if not uuid:
                continue

            if save:
                data = ds[uuid].max(axis=0)
                dst_path = os.path.join(dst_dir, f"tile-{i:03d}-{j:03d}_mip.tif")
                # delayed(imageio.imwrite)(dst_path, data)

                future = client.submit(imageio.imwrite, dst_path, data)
                fire_and_forget(future)

            images.append(((j, i), uuid))

    #   2 3 4
    # 6
    # 7
    #
    # generate neighbor linkage
    links = dict()
    for (apos, auuid), (bpos, buuid) in product(images[:-1], images[1:]):
        if apos == bpos:
            continue

        if bpos < apos:
            apos, bpos = bpos, apos
            auuid, buuid = buuid, auuid
        print(f"{apos} <> {bpos}")

        if (apos, bpos) in links:
            print(".. duplicate")
            continue
        else:
            aj, ai = apos
            bj, bi = bpos
            if ((aj - 1 == bj or aj + 1 == bj) and (ai == bi)) or (
                (ai - 1 == bi or ai + 1 == bi) and (aj == bj)
            ):
                print(".. NEW NEIGHBOR")
                links[(apos, bpos)] = (auuid, buuid)
            else:
                print(".. not neighbor")

    return images, links
Пример #12
0
    def setup_task(dataset_id, amodel_id, prepro_id=None):

        dataset = Dataset.objects.get(id=int(dataset_id))
        amodel = AnalyticalModel.objects.get(id=int(amodel_id))
        amodel.dataset = dataset.id
        amodel.save()

        client = Client(dask_scheduler)
        df = pd.read_csv(StringIO(bytes(dataset.data).decode())).drop("ID",
                                                                      axis=1)
        # add preprocessing to task
        fire_and_forget(
            client.submit(DaskTasks.execute_task, df, int(amodel.id),
                          str(amodel.name), int(dataset_id)))
Пример #13
0
def run_and_score_submission(client, submission):
    """
    Runs public and private, plus scoring
    """
    delayed_conditional = dask.delayed(True)
    for is_public in (True, False):
        delayed_conditional = _trigger_submission_run(
            submission, delayed_conditional, is_public=is_public
        )

    if settings.VISUALIZE_DASK_GRAPH:
        delayed_conditional.visualize(filename="task_graph.svg")

    future = client.submit(delayed_conditional.compute)  # pylint:disable=no-member
    logger.info("Future key: %s", future.key)

    dd.fire_and_forget(future)
    return future
Пример #14
0
 def launch(s, jid, job):
     """Launch a task.
     jid is a job id, job is a db.Job.  This modifies job.
     Caller should provide a transaction and commit if launch returns.
     Otherwise we may have a running job that is not in the database.
     """
     canc = Bogo_var()
     canc.set(False)
     spec = Task_spec(jid, job)
     fut = s.client.compute(model.task(spec, canc))
     s.tasks[jid] = fut, canc
     job.status = db.Job_status.SCHEDULED
     fut.add_done_callback(lambda f: s.task_done(jid, f))
     dd.fire_and_forget(fut)
     if s.monitor is not None:
         try:
             s.monitor(jid, fut)
         except:
             s.logger.exception("monitor.launch failed for job %s", jid)
    def train_model(id):
        model, config, dataset = Dataset.model_from_id(id)

        # Check if training is already done or in progress
        if model.status == "done":
            return {"error": "Model is already trained"}, 409
        if model.status not in ["not started", "error"]:
            return {"error": "Model is currently training"}, 409

        app.logger.info(f"Starting training dataset {dataset.name}")
        app.logger.info(f"config: {config.to_json()}")
        app.logger.info(f"model: {model.to_json()}")
        app.logger.info(f"Found configuration {config}")

        # update status
        model.status = "starting"
        dataset.save()

        fut = client.submit(training.train_model, id)
        fire_and_forget(fut)
        return {"status": model.status}, 202
Пример #16
0
    def submit(self,
               client: Client = None,
               client_address: str = None,
               priority: int = None) -> None:

        if not priority:
            priority = self.priority

        if not client:
            client = Client(client_address)
        computation = client.compute(self.graph, retries=3, priority=priority)
        fire_and_forget(computation)
        self.status = "submitted"
        if (
                not client
        ):  # if cient is provided, we assume the user will close it on their end
            client.close()

        schema = "administration"
        table = "workflow_queue"
        engine = os.getenv("QUEUE_ENGINE") or "mssql+pyodbc://redshift_acoe"
        self.submit_to_queue(engine, schema, table, priority)
        return None
Пример #17
0
    def run(
        self,
        pars,
        sims,
        sim_status,
        indices,
        collect_in_memory: bool = True,
        batch_size: Optional[int] = None,
    ):
        """Run the simulator on the input parameters.

        Args:
            pars: array with all the input parameters. Should have shape
                (num. samples, num. parameters)
            sims: dictionary of arrays where to store the simulation output.
                All arrays should have the number of samples as the size of the
                first dimension
            sim_status: array where to store the simulation status (size should
                be equal to the number of samples)
            indices: indices of the samples that need to be run by the
                simulator
            collect_in_memory: if True, collect the simulation output in
                memory; if False, instruct Dask workers to save the output to
                the corresponding arrays. The latter option is asynchronous,
                thus this method immediately returns.
            batch_size: simulations will be submitted in batches of the
                specified size
        """
        self.set_dask_cluster(self.cluster)

        # open parameter array as Dask array
        chunks = getattr(pars, "chunks", "auto")
        z = da.from_array(pars, chunks=chunks)
        idx = da.from_array(indices, chunks=(batch_size or -1, ))
        z = z[idx]

        z = z.persist()  # load the parameters in the distributed memory

        # block-wise run the model function on the parameter array
        out = da.map_blocks(
            _run_model_chunk,
            z,
            model=self.model,
            sim_shapes=self.sim_shapes,
            fail_on_non_finite=self.fail_on_non_finite,
            drop_axis=1,
            dtype=np.object,
        )

        # FIXME: Deprecated?
        #        print("Simulator: Running...")
        #        bag = db.from_sequence(z, npartitions=npartitions)
        #        bag = bag.map(_run_one_sample, self.model, self.fail_on_non_finite)
        #        result = bag.compute(scheduler=self.client or "processes")
        #        print("Simulator: ...done.")
        #        return result

        # split result dictionary and simulation status array
        results = out.map_blocks(getitem, 0, dtype=np.object)
        status = out.map_blocks(getitem, 1, meta=np.array(()), dtype=np.int)

        # unpack array of dictionaries to dictionary of arrays
        result_dict = {}
        for obs, shape in self.sim_shapes.items():
            result_dict[obs] = results.map_blocks(
                getitem,
                obs,
                new_axis=[i + 1 for i in range(len(shape))],
                chunks=(z.chunks[0], *shape),
                meta=np.array(()),
                dtype=np.float,
            )

        sources = [result_dict[k] for k in self.sim_shapes.keys()]
        targets = [sims[k] for k in self.sim_shapes.keys()]

        if collect_in_memory:
            # submit computation and collect results
            *sources, status = self.client.compute([*sources, status],
                                                   sync=True)

            # update simulation results
            for source, target in zip(sources, targets):
                target[indices.tolist()] = source

            # finally, update the simulation status
            sim_status[indices.tolist()] = status

        else:
            sources = da.store(
                sources=sources,
                targets=targets,
                regions=(indices.tolist(), ),
                lock=False,
                compute=False,
                return_stored=True,
            )

            # submit computation
            *sources, status = self.client.persist([*sources, status])

            # the following dummy array is generated after results are stored.
            zeros_when_done = [
                source.map_blocks(
                    lambda x: np.zeros(x.shape[0], dtype=np.int),
                    chunks=(source.chunks[0], ),
                    drop_axis=[i for i in range(1, source.ndim)],
                    meta=np.array((), dtype=np.int),
                    dtype=np.int,
                ) for source in sources
            ]
            status = sum([*zeros_when_done, status])
            status = status.store(
                target=sim_status,
                regions=(indices.tolist(), ),
                lock=False,
                compute=False,
                return_stored=True,
            )
            # when the simulation results are stored, we can update the status
            status = self.client.persist(status)
            fire_and_forget(status)
Пример #18
0
    async def send_computation_tasks(
        self,
        user_id: UserID,
        project_id: ProjectID,
        cluster_id: ClusterID,
        tasks: Dict[NodeID, Image],
        callback: Callable[[], None],
        remote_fct: Callable = None,
    ):
        """actually sends the function remote_fct to be remotely executed. if None is kept then the default
        function that runs container will be started."""
        def _done_dask_callback(dask_future: Future):
            job_id = dask_future.key
            logger.debug("Dask future %s completed", job_id)
            # remove the future from the dict to remove any handle to the future, so the worker can free the memory
            self._taskid_to_future_map.pop(job_id)
            callback()

        def _comp_sidecar_fct(job_id: str, user_id: int, project_id: ProjectID,
                              node_id: NodeID) -> None:
            """This function is serialized by the Dask client and sent over to the Dask sidecar(s)
            Therefore, (screaming here) DO NOT MOVE THAT IMPORT ANYWHERE ELSE EVER!!"""
            from simcore_service_dask_sidecar.tasks import (
                run_task_in_service,  # type: ignore
            )

            run_task_in_service(job_id, user_id, project_id, node_id)

        if remote_fct is None:
            remote_fct = _comp_sidecar_fct

        for node_id, node_image in tasks.items():
            # NOTE: the job id is used to create a folder in the sidecar,
            # so it must be a valid file name too
            # Also, it must be unique
            # and it is shown in the Dask scheduler dashboard website
            job_id = f"{node_image.name}_{node_image.tag}__projectid_{project_id}__nodeid_{node_id}__{uuid4()}"
            dask_resources = _from_node_reqs_to_dask_resources(
                node_image.node_requirements)
            # add the cluster ID here
            dask_resources.update({
                f"{self.settings.DASK_CLUSTER_ID_PREFIX}{cluster_id}":
                CLUSTER_RESOURCE_MOCK_USAGE
            })

            _check_valid_connection_to_scheduler(self.client)
            _check_cluster_able_to_run_pipeline(
                node_id=node_id,
                scheduler_info=self.client.scheduler_info(),
                task_resources=dask_resources,
                node_image=node_image,
                cluster_id_prefix=self.settings.
                DASK_CLUSTER_ID_PREFIX,  # type: ignore
                cluster_id=cluster_id,
            )
            try:
                task_future = self.client.submit(
                    remote_fct,
                    job_id,
                    user_id,
                    project_id,
                    node_id,
                    key=job_id,
                    resources=dask_resources,
                    retries=0,
                )
            except Exception:
                # Dask raises a base Exception here in case of connection error, this will raise a more precise one
                _check_valid_connection_to_scheduler(self.client)
                # if the connection is good, then the problem is different, so we re-raise
                raise
            task_future.add_done_callback(_done_dask_callback)
            self._taskid_to_future_map[job_id] = task_future
            fire_and_forget(
                task_future
            )  # this should ensure the task will run even if the future goes out of scope
            logger.debug("Dask task %s started", task_future.key)
        "/nfs/paper-big-data-engines/utils.py")  # Allow workers to use module
    client.upload_file(
        "/nfs/paper-big-data-engines/incrementation/Increment.py")

    # Read images
    paths = crawl_dir(os.path.abspath(args.bb_dir))
    client.scatter(paths)

    results = []
    for path in paths:
        img = client.submit(read_img, path, start=start, args=args)

        # Increment the data n time:
        for _ in range(args.iterations):
            img = client.submit(increment,
                                img,
                                delay=args.delay,
                                start=start,
                                args=args)

        # Save the data
        results.append(client.submit(save_results, img, start=start,
                                     args=args))

        # Execute the tasks
        fire_and_forget(img)

    client.gather(results)

    client.close()
    inq = Queue('inq')
    outq = Queue('outq')
    lock = Lock('x')

    stopiter = Variable(False)
    brake = Variable(True)


    saver_started = False
    workers_started = False

    #start workers
    for workers in range(NCORE*ncpu ):
        w = client.submit(  calculate_small_parsimony , inq= None ,outq = None  ,stopiter= stopiter ,  treefile=treefile , bootstrap_replicates = bootstrap_replicates,
        matfile= alnfile+'.h5' ,  row_index= remote_index , iolock = lock, verbose  = False  )
        fire_and_forget(w)

    s = client.submit(  collect_futures , queue= None , stopiter=stopiter , brake = brake, runName= runName , nucleotides_only =False  )
    saver_started = True
    fire_and_forget(s)

    for annot_index,annot_row in annotation.iterrows():
        #indexing starts at 1 for blast
        #####switch to sending the coordinates and masking for the matrix
        for j,codon in enumerate(range(annot_row.qstart-1, annot_row.qend-1 , 3 )):
            positions = []
            for col in [codon, codon+1 , codon+2]:
                if col in informativesites:
                    positions.append( (col, None) )
                else:
                    #just add the alignment character if it doesnt change.
Пример #21
0
from dask import delayed

cluster = SLURMCluster(cores=40,
                       processes=40,
                       memory='250GB',
                       queue='scavenger',
                       walltime='02:00')

cluster.start_workers(2)
client = Client(cluster)

if __name__ == '__main__':

    guest = load_molecule('aaa.res')
    trial_xyz, trial_rad = perturb_mol(guest)
    trial_xyz = delayed(trial_xyz)
    trial_rad = delayed(trial_rad)
    structures = load_crystals()

    bag = db.from_sequence([(structure, trial_xyz, trial_rad)
                            for structure in structures],
                           partition_size=40)

    guest_hits = client.map(gen_guests, bag)

    fire_and_forget(save_expansion, guest_hits)

    inserts = client.map(insert_guests, guest_hits)

    reopts = client.map(reoptimize_inserts, inserts)