Пример #1
0
  async def set_finished(self, job_id, succeeded):
    """Mark an started job as finished."""
    now = format_utc(utcnow())
    async with self.lock:
      if job_id not in self._state['started_jobs']:
        return False

      job = self._state['started_jobs'][job_id]
      job['finished'] = now
      job['duration'] = (
        diff_sec(parse_utc(now), parse_utc(job['started'])))
      job['pid'] = None
      job['succeeded'] = succeeded

      if succeeded:
        await self.history.update(job)

      self._state['finished_jobs'][job_id] = job
      del self._state['started_jobs'][job_id]

      if succeeded:
        self.logger.info(f'Finished job {job_id} [succeeded]')
      else:
        self.logger.warning(f'Finished job {job_id} [FAILED]')
      self.lock.notify_all()
      self._check_empty()
      self._schedule_dump()
      return True
Пример #2
0
  async def _handle_du(self):
    """summarize parameters with time information"""
    params = await self._execute_chain('params')

    param_id_max_len = self._get_param_id_max_len(params)

    for param in params:
      meta = param['_']

      line = f'{get_param_id(param):{param_id_max_len}} '
      line += f'{get_hash_id(param)} '

      if meta['finished'] is not None:
        finished = meta['finished']
        if self.args.local:
          finished = format_local(parse_utc(finished))
        line += f"[{finished.partition('.')[0]:>19}] "
      else:
        line += ' ' * (19 + 3)

      if meta['duration'] is not None:
        line += f"[{format_sec_short(meta['duration']):>7}] "
      else:
        line += ' ' * (7 + 3)

      if meta['succeeded'] is None:
        line += f'           '
      elif meta['succeeded']:
        line += f'succeeded  '
      else:
        line += f'FAILED     '

      line += get_name(param)

      print(line)
Пример #3
0
    async def estimate_remaining_time(self, state, oneshot, use_similar):
        """Estimate the remaining time using the queue state."""

        now = utcnow()

        epsilon = 0.001  # Potential underestimation until the progress reaches 0.1%

        # Future parallelism cannot be higher than the remaining job count
        concurrency = max(
            1.,
            min(state['concurrency'],
                len(state['started_jobs']) + len(state['queued_jobs'])))

        hash_ids = await self.history.hash_ids()
        history_list = await self.history.history_list(hash_ids)
        history_map = dict(zip(hash_ids, history_list))

        if use_similar:
            fid_mapping, fvec_map = await self._make_fid_mapping(
                history_map, hash_ids)

        # Estimate average per-job duration
        known_hash_ids = set()
        known_duration = 0.
        known_count = 0

        # Consider recent jobs first (in case some jobs have duplicate hash_id)
        for job in reversed(state['started_jobs']):
            hash_id = get_hash_id(job['param'])
            if hash_id in known_hash_ids:
                continue
            known_hash_ids.add(hash_id)

            if job['started'] is None:
                started = now
            else:
                started = parse_utc(job['started'])

            if job.get('status',
                       None) and job['status'].get('progress') >= epsilon:
                known_duration += diff_sec(now,
                                           started) / job['status']['progress']
                known_count += 1

        for hash_id, history in history_map.items():
            if hash_id in known_hash_ids:
                continue
            known_hash_ids.add(hash_id)

            if history['duration'] is not None:
                known_duration += history['duration']
                known_count += 1

        avg_duration = known_duration / max(known_count, 1)

        remaining_time_map = {}

        for job in state['finished_jobs']:
            remaining_time_map[job['job_id']] = 0.

        # Calculate started jobs' remaining time
        remaining_duration = 0.
        for job in state['started_jobs']:
            hash_id = get_hash_id(job['param'])
            history = history_map.get(hash_id, None)

            if job['started'] is None:
                started = now
            else:
                started = parse_utc(job['started'])

            if job.get('status',
                       None) and job['status'].get('progress') >= epsilon:
                exp_duration = diff_sec(now,
                                        started) / job['status']['progress']
                remaining_duration += max(
                    exp_duration - diff_sec(now, started), 0.)
            elif history and history['duration'] is not None:
                remaining_duration += max(
                    history['duration'] - diff_sec(now, started), 0.)
            else:
                if use_similar:
                    exp_duration = (await self._find_closest_duration(
                        history_map, fid_mapping, fvec_map, job['param']))
                    if exp_duration is None:
                        exp_duration = avg_duration
                else:
                    exp_duration = avg_duration

                remaining_duration += max(
                    exp_duration - diff_sec(now, started), 0.)

            # Take into account concurrency
            remaining_time_map[
                job['job_id']] = remaining_duration / concurrency

        # Calculate queued jobs' remaining time
        if not oneshot:
            for job in state['queued_jobs']:
                hash_id = get_hash_id(job['param'])
                history = history_map.get(hash_id, None)

                if history and history['duration'] is not None:
                    remaining_duration += history['duration']
                else:
                    if use_similar:
                        exp_duration = (await self._find_closest_duration(
                            history_map, fid_mapping, fvec_map, job['param']))
                        if exp_duration is None:
                            exp_duration = avg_duration
                    else:
                        exp_duration = avg_duration
                    remaining_duration += exp_duration

                # Take into account concurrency
                remaining_time_map[
                    job['job_id']] = remaining_duration / concurrency
        else:
            for job in state['queued_jobs']:
                remaining_time_map[
                    job['job_id']] = remaining_duration / concurrency

        # Take into account concurrency
        remaining_time = remaining_duration / concurrency

        return remaining_time, remaining_time_map
Пример #4
0
def test_diff_sec():
    t1 = parse_utc('2000-01-02 03:04:05.678901')
    t2 = parse_utc('2000-01-02 02:03:04.678900')
    assert diff_sec(t1, t2) == 3600 + 60 + 1 + 0.000001
Пример #5
0
def test_parse_utc_format_utc_short():
    assert format_utc_short(
        parse_utc('2000-01-02 03:04:05.678901')) == '2000-01-02 03:04:05'
Пример #6
0
def test_as_utc_as_local_as_utc():
    t = parse_utc('2000-01-02 03:04:05.678901')
    assert t == as_utc(as_local(t))

    t = parse_local('2000-01-02 03:04:05.678901')
    assert t == as_local(as_utc(t))
Пример #7
0
def test_as_utc():
    t = parse_utc('2000-01-02 03:04:05.678901')
    assert t == as_utc(t)
Пример #8
0
def test_job_elapsed_time_started_job(mock_utcnow):
    mock_utcnow.return_value = parse_utc('2000-01-02 03:04:15.678901')
    job = {'finished': None, 'started': '2000-01-02 03:04:05.678901'}
    assert job_elapsed_time(job) == 10.