Exemplo n.º 1
0
    def local_worker(self, *args, kf: KTSFrame):
        run_id = RunID(kf._scope, kf._fold, kf.hash())
        return_state = kf._train  # default for cached FCs or first calls of not cached FCs
        if not self.cache and bool(kf._state):
            # second call of not cached FC does not return state, as it is saved previously
            # refer to https://github.com/konodyuk/kts/tree/master/kts/core#caching-policy
            return_state = False
        stats = Stats(kf)
        if in_worker() and self.verbose:
            report = None
            io = self.remote_io(run_id)
            rs.send(ProgressSignal(0, 1, None, None, None, run_id))
        elif not in_worker() and self.verbose:
            report = kf.__meta__['report']
            io = self.local_io(report, run_id)
            report.update(run_id, 0, 1)
        else:
            report = None
            io = self.suppress_io()
        with stats, io, self.suppress_stderr(), pbar.local_mode(report, run_id):
            res_kf = self.compute(*args, kf)

        if 'columns' in dir(res_kf) and '__columns' not in kf._state:
            kf._state['__columns'] = list(res_kf.columns)

        if return_state:
            res_state = kf._state
        else:
            res_state = None
        if in_worker() and self.verbose:
            rs.send(ProgressSignal(1, 1, stats.data['took'], None, None, run_id))
        elif not in_worker() and self.verbose:
            report = kf.__meta__['report']
            report.update(run_id, 1, 1, stats.data['took'])
        return res_kf, res_state, stats.data
Exemplo n.º 2
0
def worker(self, *args, df: pd.DataFrame, meta: Dict):
    assert 'run_manager' not in meta
    assert 'report' not in meta
    assert 'pid' in meta
    signal.pid = meta['pid']
    address_manager.pid = meta['pid']
    kf = KTSFrame(df, meta=meta)
    kf.__meta__['remote'] = True
    return_state = kf._train
    if self.verbose:
        rs.send(ProgressSignal(0, 1, None, None, None))
        io = self.remote_io()
    else:
        io = self.suppress_io()
    rs.send(RunPID(os.getpid()))

    stats = Stats(df)
    with stats, io, self.suppress_stderr():
        try:
            res_kf = self.compute(*args, kf)
        except:
            rs.send(rs.ErrorSignal(traceback.format_exc()))
            return None, None, None

    if 'columns' in dir(res_kf) and '__columns' not in kf._state:
        kf._state['__columns'] = list(res_kf.columns)

    if return_state:
        res_state = kf._state
    else:
        res_state = None
    if self.verbose:
        rs.send(ProgressSignal(1, 1, stats.data['took'], None, None))
    return res_kf, res_state, stats.data
Exemplo n.º 3
0
 def sync(self, run_id, res_df, res_state, stats, df):
     if not self.cache:
         # result frame is not saved to cache if FC is not cached
         # refer to https://github.com/konodyuk/kts/tree/master/kts/core#caching-policy
         res_df = None
     if in_worker():
         if not isinstance(res_df, ObjectID):
             res_df = ray.put(res_df)
         if not isinstance(res_state, ObjectID):
             res_state = ray.put(res_state)
         if not isinstance(stats, ObjectID):
             stats = ray.put(stats)
         rs.send(Sync(run_id, res_df, res_state, stats))
     else:
         rm = df.__meta__['run_manager']
         rm.sync(run_id, res_df, res_state, stats)
Exemplo n.º 4
0
 def request_resource(self, key, df):
     if in_worker():
         request_time = time.time()
         rs.send(ResourceRequest(key))
         address_manager = get_address_manager()
         while not ray.get(address_manager.has.remote(key)) or ray.get(address_manager.timestamp.remote(key)) < request_time:
             time.sleep(0.01)
         address = ray.get(address_manager.get.remote(key))
         resource = ray.get(address)
         return resource
     else:
         rm = df.__meta__['run_manager']
         resource = rm.get_resource(key)
         if isinstance(resource, ObjectID):
             resource = ray.get(resource)
         return resource
Exemplo n.º 5
0
 def __iter__(self):
     last_update = 0.
     rs.send(ProgressSignal(value=0, total=self.total, took=None, eta=None, title=self.title))
     self.update_time(0, time.time())
     for i, o in enumerate(self.iterable):
         yield o
         cur = time.time()
         self.update_time(i + 1, cur)
         if cur - last_update >= self._min_interval:
             rs.send(ProgressSignal(value=i + 1, total=self.total, took=self.took, eta=self.eta, title=self.title))
             last_update = cur
     rs.send(ProgressSignal(value=self.total, total=self.total, took=self.took, eta=None, title=self.title))