def _estimate_progress_completion_time(self, now): """ Estimate the moment when the underlying process is expected to reach completion. This function should only return future times. Also this function is not allowed to return time moments less than self._next_poll_time if the actual progress is below 100% (this is because we won't know that the process have finished until we poll the external progress function). """ assert self._next_poll_time >= now tlast, wlast = self._progress_data[-1] # If reached 100%, make sure that we finish as soon as possible, but maybe not immediately if wlast == self._maxval: current_completion_time = (1 - self._x0) / self._v0 + self._t0 return clamp(current_completion_time, now, now + self.FINISH_DELAY) # Calculate the approximate speed of the raw progress based on recent data tacc, wacc = 0, 0 factor = self.GAMMA for t, x in self._progress_data[-2::-1]: tacc += factor * (tlast - t) wacc += factor * (wlast - x) factor *= self.GAMMA if factor < 1e-2: break # If there was no progress at all, then just assume it's 5 minutes from now if wacc == 0: return now + 300 # Estimate the completion time assuming linear progress t_estimate = tlast + tacc * (self._maxval - wlast) / wacc # Adjust the estimate if it looks like it may happen too soon if t_estimate <= self._next_poll_time: t_estimate = self._next_poll_time + self.FINISH_DELAY return t_estimate
def _store_model_progress(self, res, now): """ Save the current model progress into ``self._progress_data``, and update ``self._next_poll_time``. :param res: tuple (progress level, poll delay). :param now: current timestamp. """ raw_progress, delay = res raw_progress = clamp(raw_progress, 0, self._maxval) self._progress_data.append((now, raw_progress)) if delay < 0: # calculation of ``_guess_next_poll_interval()`` should be done only *after* we pushed the fresh data to # ``self._progress_data``. delay = self._guess_next_poll_interval() self._next_poll_time = now + clamp(delay, self.MIN_PROGRESS_CHECK_INTERVAL, self.MAX_PROGRESS_CHECK_INTERVAL)
def _compute_progress_at_time(self, t): """ Calculate the modelled progress state for the given time moment. :returns: tuple (x, v) of the progress level and progress speed. """ t0, x0, v0, ve = self._t0, self._x0, self._v0, self._ve z = (v0 - ve) * math.exp(-self.BETA * (t - t0)) vt = ve + z xt = clamp(x0 + ve * (t - t0) + (v0 - ve - z) / self.BETA, 0, 1) return xt, vt
def _refresh_job_status(self): if self._poll_count <= 0: raise StopIteration("") jobs = h2o.api("GET /3/Jobs/%s" % self.job_key) self.job = jobs["jobs"][0] if "jobs" in jobs else jobs["job"][0] self.status = self.job["status"] self.progress = self.job["progress"] self.exception = self.job["exception"] self.warnings = self.job["warnings"] if "warnings" in self.job else None self._poll_count -= 1 # Sometimes the server may report the job at 100% but still having status "RUNNING" -- we work around this # by showing progress at 99% instead. Sometimes the server may report the job at 0% but having status "DONE", # in this case we set the progress to 100% manually. if self.status == "CREATED": self.progress = 0 if self.status == "RUNNING": self.progress = clamp(self.progress, 0, 0.99) if self.status == "DONE": self.progress = 1 if self.status == "FAILED": raise StopIteration("failed") if self.status == "CANCELLED": raise StopIteration("cancelled by the server") return self.progress