def test_delete_archived_tasks(self, swh_scheduler): self._create_task_types(swh_scheduler) _time = utcnow() recurring = tasks_from_template(TEMPLATES["git"], _time, 12) oneshots = tasks_from_template(TEMPLATES["hg"], _time, 12) total_tasks = len(recurring) + len(oneshots) pending_tasks = swh_scheduler.create_tasks(recurring + oneshots) backend_tasks = [{ "task": task["id"], "backend_id": str(uuid.uuid4()), "scheduled": utcnow(), } for task in pending_tasks] swh_scheduler.mass_schedule_task_runs(backend_tasks) _tasks = [] percent = random.randint(0, 100) # random election removal boundary for task in backend_tasks: t = swh_scheduler.end_task_run(task["backend_id"], status="eventful") c = random.randint(0, 100) if c <= percent: _tasks.append({"task_id": t["task"], "task_run_id": t["id"]}) swh_scheduler.delete_archived_tasks(_tasks) all_tasks = [task["id"] for task in swh_scheduler.search_tasks()] tasks_count = len(all_tasks) tasks_run_count = len(swh_scheduler.get_task_runs(all_tasks)) assert tasks_count == total_tasks - len(_tasks) assert tasks_run_count == total_tasks - len(_tasks)
def test_origin_visit_stats_upsert_cardinality_failing( self, swh_scheduler) -> None: """Batch upsert does not support altering multiple times the same origin-visit-status """ with pytest.raises(SchedulerException, match="CardinalityViolation"): swh_scheduler.origin_visit_stats_upsert([ OriginVisitStats( url="foo", visit_type="git", last_eventful=None, last_uneventful=utcnow(), last_notfound=None, last_failed=None, last_snapshot=None, ), OriginVisitStats( url="foo", visit_type="git", last_eventful=None, last_uneventful=utcnow(), last_notfound=None, last_failed=None, last_snapshot=None, ), ])
def test_journal_client_origin_visit_status_from_journal_ignored_status( swh_scheduler): """Only final statuses (full, partial) are important, the rest remain ignored.""" # Trace method calls on the swh_scheduler swh_scheduler = Mock(wraps=swh_scheduler) visit_statuses = [ { "origin": "foo", "visit": 1, "status": "created", "date": utcnow(), "type": "git", "snapshot": None, }, { "origin": "bar", "visit": 1, "status": "ongoing", "date": utcnow(), "type": "svn", "snapshot": None, }, ] process_journal_objects({"origin_visit_status": visit_statuses}, scheduler=swh_scheduler) # All messages have been ignored: no stats have been upserted swh_scheduler.origin_visit_stats_upsert.assert_not_called()
def test_origin_visit_stats_upsert_batch(self, swh_scheduler) -> None: """Batch upsert is ok""" visit_stats = [ OriginVisitStats( url="foo", visit_type="git", last_eventful=utcnow(), last_uneventful=None, last_failed=None, last_notfound=None, last_snapshot=hash_to_bytes( "d81cc0710eb6cf9efd5b920a8453e1e07157b6cd"), ), OriginVisitStats( url="bar", visit_type="git", last_eventful=None, last_uneventful=utcnow(), last_notfound=None, last_failed=None, last_snapshot=hash_to_bytes( "fffcc0710eb6cf9efd5b920a8453e1e07157bfff"), ), ] swh_scheduler.origin_visit_stats_upsert(visit_stats) for visit_stat in swh_scheduler.origin_visit_stats_get([ (vs.url, vs.visit_type) for vs in visit_stats ]): assert visit_stat is not None
def test_get_task_runs_with_executed(self, swh_scheduler): """Some tasks have been executed, get_task_runs() should not return an empty list. limit should behave as expected. """ self._create_task_types(swh_scheduler) _time = utcnow() recurring = tasks_from_template(TEMPLATES["git"], _time, 12) oneshots = tasks_from_template(TEMPLATES["hg"], _time, 12) pending_tasks = swh_scheduler.create_tasks(recurring + oneshots) backend_tasks = [{ "task": task["id"], "backend_id": str(uuid.uuid4()), "scheduled": utcnow(), } for task in pending_tasks] swh_scheduler.mass_schedule_task_runs(backend_tasks) btask = backend_tasks[0] ts = utcnow() swh_scheduler.start_task_run(btask["backend_id"], metadata={"something": "stupid"}, timestamp=ts) runs = swh_scheduler.get_task_runs(task_ids=[btask["task"]]) assert len(runs) == 1 assert subdict(runs[0], excl=("id")) == { "task": btask["task"], "backend_id": btask["backend_id"], "scheduled": btask["scheduled"], "started": ts, "ended": None, "metadata": { "something": "stupid" }, "status": "started", } ts2 = utcnow() swh_scheduler.end_task_run( btask["backend_id"], metadata={"other": "stuff"}, timestamp=ts2, status="eventful", ) runs = swh_scheduler.get_task_runs(task_ids=[btask["task"]]) assert len(runs) == 1 assert subdict(runs[0], excl=("id")) == { "task": btask["task"], "backend_id": btask["backend_id"], "scheduled": btask["scheduled"], "started": ts, "ended": ts2, "metadata": { "something": "stupid", "other": "stuff" }, "status": "eventful", }
def test_create_tasks(self, swh_scheduler): self._create_task_types(swh_scheduler) num_git = 100 tasks_1 = tasks_from_template(TEMPLATES["git"], utcnow(), num_git) tasks_2 = tasks_from_template(TEMPLATES["hg"], utcnow(), num_priorities=NUM_PRIORITY_TASKS) tasks = tasks_1 + tasks_2 # tasks are returned only once with their ids ret1 = swh_scheduler.create_tasks(tasks + tasks) set_ret1 = set([t["id"] for t in ret1]) # creating the same set result in the same ids ret = swh_scheduler.create_tasks(tasks) set_ret = set([t["id"] for t in ret]) # Idempotence results assert set_ret == set_ret1 assert len(ret) == len(ret1) ids = set() actual_priorities = defaultdict(int) for task, orig_task in zip(ret, tasks): task = copy.deepcopy(task) task_type = TASK_TYPES[orig_task["type"].split("-")[-1]] assert task["id"] not in ids assert task["status"] == "next_run_not_scheduled" assert task["current_interval"] == task_type["default_interval"] assert task["policy"] == orig_task.get("policy", "recurring") priority = task.get("priority") actual_priorities[priority] += 1 assert task["retries_left"] == (task_type["num_retries"] or 0) ids.add(task["id"]) del task["id"] del task["status"] del task["current_interval"] del task["retries_left"] if "policy" not in orig_task: del task["policy"] if "priority" not in orig_task: del task["priority"] assert task == orig_task expected_priorities = NUM_PRIORITY_TASKS.copy() expected_priorities[None] += num_git assert dict(actual_priorities) == expected_priorities
def try_perform_actions(actions=actions): logger.debug("Try perform pending actions") if actions["queue"] and ( len(actions["queue"]) > ACTION_QUEUE_MAX_LENGTH or utcnow() - actions["last_send"] > ACTION_SEND_DELAY ): perform_actions(actions)
def test_peek_ready_tasks_returns_only_no_priority_tasks( self, swh_scheduler): """Peek ready tasks only return standard tasks (no priority)""" self._create_task_types(swh_scheduler) t = utcnow() task_type = TEMPLATES["git"]["type"] # Create tasks with and without priorities tasks = tasks_from_template( TEMPLATES["git"], t, num_priorities=NUM_PRIORITY_TASKS, ) count_priority = 0 for task in tasks: count_priority += 0 if task.get("priority") is None else 1 assert count_priority > 0, "Some created tasks should have some priority" random.shuffle(tasks) swh_scheduler.create_tasks(tasks) # take all available no priority tasks ready_tasks = swh_scheduler.peek_ready_tasks(task_type) assert len(ready_tasks) == len(tasks) - count_priority # No read task should have any priority for task in ready_tasks: assert task.get("priority") is None
def test_metrics_origins_never_visited(self, swh_scheduler, listed_origins): swh_scheduler.record_listed_origins(listed_origins) # Pretend that we've recorded a visit on one origin visited_origin = listed_origins[0] swh_scheduler.origin_visit_stats_upsert([ OriginVisitStats( url=visited_origin.url, visit_type=visited_origin.visit_type, last_eventful=utcnow(), last_uneventful=None, last_failed=None, last_notfound=None, last_snapshot=hash_to_bytes( "d81cc0710eb6cf9efd5b920a8453e1e07157b6cd"), ), ]) ret = swh_scheduler.update_metrics(lister_id=visited_origin.lister_id) for metric in ret: if metric.visit_type == visited_origin.visit_type: # We visited one of these origins assert metric.origins_known - metric.origins_never_visited == 1 else: # But none of these have been visited assert metric.origins_known == metric.origins_never_visited
def respawn_tasks(ctx, task_ids: List[str], next_run: datetime.datetime): """Respawn tasks. Respawn tasks given by their ids (see the 'task list' command to find task ids) at the given date (immediately by default). Eg. swh-scheduler task respawn 1 3 12 """ from swh.scheduler.utils import utcnow scheduler = ctx.obj["scheduler"] if not scheduler: raise ValueError("Scheduler class (local/remote) must be instantiated") if next_run is None: next_run = utcnow() output = [] task_ids_int = [int(id_) for id_ in task_ids] scheduler.set_status_tasks(task_ids_int, status="next_run_not_scheduled", next_run=next_run) output.append("Respawn tasks %s\n" % (task_ids_int, )) click.echo("\n".join(output))
def perform_actions(actions, backend=backend): logger.info("Perform %s pending actions" % len(actions["queue"])) action_map = { "start_task_run": backend.start_task_run, "end_task_run": backend.end_task_run, } messages = [] db = backend.get_db() try: cursor = db.cursor(None) for action in actions["queue"]: messages.append(action["message"]) function = action_map[action["action"]] args = action.get("args", ()) kwargs = action.get("kwargs", {}) kwargs["cur"] = cursor function(*args, **kwargs) except Exception: db.conn.rollback() else: db.conn.commit() finally: backend.put_db(db) for message in messages: if not message.acknowledged: message.ack() actions["queue"] = [] actions["last_send"] = utcnow()
def schedule_task_run(self, task_id, backend_id, metadata=None, timestamp=None, db=None, cur=None): """Mark a given task as scheduled, adding a task_run entry in the database. Args: task_id (int): the identifier for the task being scheduled backend_id (str): the identifier of the job in the backend metadata (dict): metadata to add to the task_run entry timestamp (datetime.datetime): the instant the event occurred Returns: a fresh task_run entry """ if metadata is None: metadata = {} if timestamp is None: timestamp = utcnow() cur.execute( "select * from swh_scheduler_schedule_task_run(%s, %s, %s, %s)", (task_id, backend_id, metadata, timestamp), ) return cur.fetchone()
def start_task_run(self, backend_id, metadata=None, timestamp=None, db=None, cur=None): """Mark a given task as started, updating the corresponding task_run entry in the database. Args: backend_id (str): the identifier of the job in the backend metadata (dict): metadata to add to the task_run entry timestamp (datetime.datetime): the instant the event occurred Returns: the updated task_run entry """ if metadata is None: metadata = {} if timestamp is None: timestamp = utcnow() cur.execute( "select * from swh_scheduler_start_task_run(%s, %s, %s)", (backend_id, metadata, timestamp), ) return cur.fetchone()
def test_journal_client_origin_visit_status_from_journal_last_successful( swh_scheduler): visit_statuses = [ { "origin": "bar", "visit": 1, "status": "partial", "date": utcnow(), "type": "git", "snapshot": hash_to_bytes("d81cc0710eb6cf9efd5b920a8453e1e07157b6cd"), }, { "origin": "foo", "visit": 1, "status": "full", "date": DATE1, "type": "git", "snapshot": hash_to_bytes("eeecc0710eb6cf9efd5b920a8453e1e07157bfff"), }, { "origin": "foo", "visit": 2, "status": "partial", "date": DATE2, "type": "git", "snapshot": hash_to_bytes("aaacc0710eb6cf9efd5b920a8453e1e07157baaa"), }, { "origin": "foo", "visit": 3, "status": "full", "date": DATE3, "type": "git", "snapshot": hash_to_bytes("dddcc0710eb6cf9efd5b920a8453e1e07157bddd"), }, ] process_journal_objects({"origin_visit_status": visit_statuses}, scheduler=swh_scheduler) actual_origin_visit_stats = swh_scheduler.origin_visit_stats_get([("foo", "git")]) assert_visit_stats_ok( actual_origin_visit_stats[0], OriginVisitStats( url="foo", visit_type="git", last_successful=DATE3, last_visit=DATE3, last_visit_status=LastVisitStatus.successful, last_snapshot=hash_to_bytes( "dddcc0710eb6cf9efd5b920a8453e1e07157bddd"), next_position_offset=0, successive_visits=3, ), )
def test_get_task_runs_with_scheduled(self, swh_scheduler): """Some tasks have been scheduled but not executed yet, get_task_runs() should not return an empty list. limit should behave as expected. """ self._create_task_types(swh_scheduler) _time = utcnow() recurring = tasks_from_template(TEMPLATES["git"], _time, 12) oneshots = tasks_from_template(TEMPLATES["hg"], _time, 12) total_tasks = len(recurring) + len(oneshots) pending_tasks = swh_scheduler.create_tasks(recurring + oneshots) backend_tasks = [{ "task": task["id"], "backend_id": str(uuid.uuid4()), "scheduled": utcnow(), } for task in pending_tasks] swh_scheduler.mass_schedule_task_runs(backend_tasks) assert not swh_scheduler.get_task_runs(task_ids=[total_tasks + 1]) btask = backend_tasks[0] runs = swh_scheduler.get_task_runs(task_ids=[btask["task"]]) assert len(runs) == 1 run = runs[0] assert subdict(run, excl=("id", )) == { "task": btask["task"], "backend_id": btask["backend_id"], "scheduled": btask["scheduled"], "started": None, "ended": None, "metadata": None, "status": "scheduled", } runs = swh_scheduler.get_task_runs( task_ids=[bt["task"] for bt in backend_tasks], limit=2) assert len(runs) == 2 runs = swh_scheduler.get_task_runs( task_ids=[bt["task"] for bt in backend_tasks]) assert len(runs) == total_tasks keys = ("task", "backend_id", "scheduled") assert (sorted([subdict(x, keys) for x in runs], key=lambda x: x["task"]) == backend_tasks)
def _check_grab_next_visit(self, swh_scheduler, visit_type, policy, expected): """Calls grab_next_visits with the passed policy, and check that all the origins returned are the expected ones (in the same order), and that no extra origins are returned. Also checks the origin visits have been marked as scheduled, and are only re-scheduled a week later""" assert len(expected) != 0 before = utcnow() ret = swh_scheduler.grab_next_visits( visit_type=visit_type, # Request one more than expected to check that no extra origin is returned count=len(expected) + 1, policy=policy, ) after = utcnow() assert ret == expected visit_stats_list = swh_scheduler.origin_visit_stats_get([ (origin.url, origin.visit_type) for origin in expected ]) assert len(visit_stats_list) == len(expected) for visit_stats in visit_stats_list: # Check that last_scheduled got updated assert before <= visit_stats.last_scheduled <= after # They should not be scheduled again ret = swh_scheduler.grab_next_visits( visit_type=visit_type, count=len(expected) + 1, policy=policy, ) assert ret == [], "grab_next_visits returned already-scheduled origins" # But a week later, they should ret = swh_scheduler.grab_next_visits( visit_type=visit_type, count=len(expected) + 1, policy=policy, timestamp=after + datetime.timedelta(days=7), ) # We need to sort them because their 'last_scheduled' field is updated to # exactly the same value, so the order is not deterministic assert sorted(ret) == sorted( expected), "grab_next_visits didn't reschedule visits after a week"
def test_update_metrics_twice(self, swh_scheduler, listed_origins): swh_scheduler.record_listed_origins(listed_origins) ts = utcnow() ret = swh_scheduler.update_metrics(timestamp=ts) assert all(metric.last_update == ts for metric in ret) second_ts = ts + datetime.timedelta(seconds=1) ret = swh_scheduler.update_metrics(timestamp=second_ts) assert all(metric.last_update == second_ts for metric in ret)
def test_search_tasks(self, swh_scheduler): def make_real_dicts(lst): """RealDictRow is not a real dict.""" return [dict(d.items()) for d in lst] self._create_task_types(swh_scheduler) t = utcnow() tasks = tasks_from_template(TEMPLATES["git"], t, 100) tasks = swh_scheduler.create_tasks(tasks) assert make_real_dicts( swh_scheduler.search_tasks()) == make_real_dicts(tasks)
def task_failed(event, message): logger.debug("task_failed: event: %s" % event) logger.debug(" message: %s" % message) queue_action( { "action": "end_task_run", "args": [event["uuid"]], "kwargs": {"timestamp": utcnow(), "status": "failed",}, "message": message, } )
def test_journal_client_origin_visit_status_from_journal_last_failed( swh_scheduler): visit_statuses = [ { "origin": "foo", "visit": 1, "status": "partial", "date": utcnow(), "type": "git", "snapshot": None, }, { "origin": "bar", "visit": 1, "status": "full", "date": DATE1, "type": "git", "snapshot": None, }, { "origin": "bar", "visit": 2, "status": "full", "date": DATE2, "type": "git", "snapshot": None, }, { "origin": "bar", "visit": 3, "status": "full", "date": DATE3, "type": "git", "snapshot": None, }, ] process_journal_objects({"origin_visit_status": visit_statuses}, scheduler=swh_scheduler) actual_origin_visit_stats = swh_scheduler.origin_visit_stats_get([("bar", "git")]) assert_visit_stats_ok( actual_origin_visit_stats[0], OriginVisitStats( url="bar", visit_type="git", last_visit=DATE3, last_visit_status=LastVisitStatus.failed, next_position_offset=6, successive_visits=3, ), )
def test_journal_client_origin_visit_status_after_grab_next_visits( swh_scheduler, stored_lister): """Ensure OriginVisitStat entries created in the db as a result of calling grab_next_visits() do not mess the OriginVisitStats upsert mechanism. """ listed_origins = [ ListedOrigin(lister_id=stored_lister.id, url=url, visit_type=visit_type) for (url, visit_type) in set( (v["origin"], v["type"]) for v in VISIT_STATUSES_2) ] swh_scheduler.record_listed_origins(listed_origins) before = utcnow() swh_scheduler.grab_next_visits(visit_type="git", count=10, policy="oldest_scheduled_first") after = utcnow() assert swh_scheduler.origin_visit_stats_get([("cavabarder", "hg")]) == [] assert swh_scheduler.origin_visit_stats_get([("cavabarder", "git") ])[0] is not None process_journal_objects({"origin_visit_status": VISIT_STATUSES_2}, scheduler=swh_scheduler) for url in ("cavabarder", "iciaussi"): ovs = swh_scheduler.origin_visit_stats_get([(url, "git")])[0] assert before <= ovs.last_scheduled <= after ovs = swh_scheduler.origin_visit_stats_get([(url, "hg")])[0] assert ovs.last_scheduled is None ovs = swh_scheduler.origin_visit_stats_get([("cavabarder", "git")])[0] assert ovs.last_successful == DATE1 + 5 * ONE_DAY assert ovs.last_visit == DATE1 + 5 * ONE_DAY assert ovs.last_visit_status == LastVisitStatus.successful assert ovs.last_snapshot == hash_to_bytes( "5555555555555555555555555555555555555555")
def task_started(event, message): logger.debug("task_started: %s %s", event["type"], event.get("name", "N/A")) queue_action( { "action": "start_task_run", "args": [event["uuid"]], "kwargs": { "timestamp": utcnow(), "metadata": {"worker": event["hostname"],}, }, "message": message, } )
def test_get_task_runs_no_task_executed(self, swh_scheduler): """No task has been executed yet, get_task_runs() should always return an empty list. """ self._create_task_types(swh_scheduler) _time = utcnow() recurring = tasks_from_template(TEMPLATES["git"], _time, 12) oneshots = tasks_from_template(TEMPLATES["hg"], _time, 12) swh_scheduler.create_tasks(recurring + oneshots) assert not swh_scheduler.get_task_runs(task_ids=()) assert not swh_scheduler.get_task_runs(task_ids=(1, 2, 3)) assert not swh_scheduler.get_task_runs(task_ids=(1, 2, 3), limit=10)
def test_get_tasks(self, swh_scheduler): self._create_task_types(swh_scheduler) t = utcnow() tasks = tasks_from_template(TEMPLATES["git"], t, 100) tasks = swh_scheduler.create_tasks(tasks) random.shuffle(tasks) while len(tasks) > 1: length = random.randrange(1, len(tasks)) cur_tasks = sorted(tasks[:length], key=lambda x: x["id"]) tasks[:length] = [] ret = swh_scheduler.get_tasks(task["id"] for task in cur_tasks) # result is not guaranteed to be sorted ret.sort(key=lambda x: x["id"]) assert ret == cur_tasks
def process_event(event, scheduler_backend): uuid = event.get("uuid") if not uuid: return event_type = event["type"] statsd.increment("swh_scheduler_listener_handled_event_total", tags={"event_type": event_type}) if event_type == "task-started": scheduler_backend.start_task_run( uuid, timestamp=utcnow(), metadata={"worker": event.get("hostname")}, ) elif event_type == "task-result": result = event["result"] status = None if isinstance(result, dict) and "status" in result: status = result["status"] if status == "success": status = "eventful" if result.get("eventful") else "uneventful" if status is None: status = "eventful" if result else "uneventful" scheduler_backend.end_task_run(uuid, timestamp=utcnow(), status=status, result=result) elif event_type == "task-failed": scheduler_backend.end_task_run(uuid, timestamp=utcnow(), status="failed")
def test_schedule_tasks(swh_scheduler): csv_data = (b'swh-test-ping;[["arg1", "arg2"]];{"key": "value"};' + utcnow().isoformat().encode() + b"\n" + b'swh-test-ping;[["arg3", "arg4"]];{"key": "value"};' + utcnow().isoformat().encode() + b"\n") with tempfile.NamedTemporaryFile(suffix=".csv") as csv_fd: csv_fd.write(csv_data) csv_fd.seek(0) result = invoke(swh_scheduler, False, ["task", "schedule", "-d", ";", csv_fd.name]) expected = r""" Created 2 tasks Task 1 Next run: today \(.*\) Interval: 1 day, 0:00:00 Type: swh-test-ping Policy: recurring Args: \['arg1', 'arg2'\] Keyword args: key: 'value' Task 2 Next run: today \(.*\) Interval: 1 day, 0:00:00 Type: swh-test-ping Policy: recurring Args: \['arg3', 'arg4'\] Keyword args: key: 'value' """.lstrip() assert result.exit_code == 0, result.output assert re.fullmatch(expected, result.output, re.MULTILINE), result.output
def schedule_task(ctx, type, options, policy, priority, next_run): """Schedule one task from arguments. The first argument is the name of the task type, further ones are positional and keyword argument(s) of the task, in YAML format. Keyword args are of the form key=value. Usage sample: swh-scheduler --database 'service=swh-scheduler' \ task add list-pypi swh-scheduler --database 'service=swh-scheduler' \ task add list-debian-distribution --policy=oneshot distribution=stretch Note: if the priority is not given, the task won't have the priority set, which is considered as the lowest priority level. """ from swh.scheduler.utils import utcnow from .utils import parse_options scheduler = ctx.obj["scheduler"] if not scheduler: raise ValueError("Scheduler class (local/remote) must be instantiated") now = utcnow() (args, kw) = parse_options(options) task = { "type": type, "policy": policy, "priority": priority, "arguments": { "args": args, "kwargs": kw, }, "next_run": next_run or now, } created = scheduler.create_tasks([task]) output = [ "Created %d tasks\n" % len(created), ] for task in created: output.append(pretty_print_task(task)) click.echo("\n".join(output))
def grab_ready_priority_tasks( self, task_type: str, timestamp: Optional[datetime.datetime] = None, num_tasks: Optional[int] = None, db=None, cur=None, ) -> List[Dict]: if timestamp is None: timestamp = utcnow() cur.execute( """select * from swh_scheduler_grab_any_ready_priority_tasks( %s, %s, %s :: bigint)""", (task_type, timestamp, num_tasks), ) logger.debug("GRAB %s => %s", task_type, cur.rowcount) return cur.fetchall()
def test_grab_ready_priority_tasks(self, swh_scheduler): """check the grab and peek priority tasks endpoint behave as expected""" self._create_task_types(swh_scheduler) t = utcnow() task_type = TEMPLATES["git"]["type"] num_tasks = 100 # Create tasks with and without priorities tasks0 = tasks_with_priority_from_template( TEMPLATES["git"], t, num_tasks, "high", ) tasks1 = tasks_with_priority_from_template( TEMPLATES["hg"], t, num_tasks, "low", ) tasks2 = tasks_with_priority_from_template( TEMPLATES["hg"], t, num_tasks, "normal", ) tasks = tasks0 + tasks1 + tasks2 random.shuffle(tasks) swh_scheduler.create_tasks(tasks) ready_tasks = swh_scheduler.peek_ready_priority_tasks(task_type, num_tasks=50) grabbed_tasks = swh_scheduler.grab_ready_priority_tasks(task_type, num_tasks=50) for peeked, grabbed in zip(ready_tasks, grabbed_tasks): assert peeked["status"] == "next_run_not_scheduled" del peeked["status"] assert grabbed["status"] == "next_run_scheduled" del grabbed["status"] assert peeked == grabbed assert peeked["priority"] == grabbed["priority"] assert peeked["priority"] is not None
def test_origin_visit_stats_upsert_with_snapshot(self, swh_scheduler) -> None: eventful_date = utcnow() url = "https://github.com/666/test" visit_stats = OriginVisitStats( url=url, visit_type="git", last_eventful=eventful_date, last_uneventful=None, last_failed=None, last_notfound=None, last_snapshot=hash_to_bytes( "d81cc0710eb6cf9efd5b920a8453e1e07157b6cd"), ) swh_scheduler.origin_visit_stats_upsert([visit_stats]) assert swh_scheduler.origin_visit_stats_get([(url, "git") ]) == [visit_stats] assert swh_scheduler.origin_visit_stats_get([(url, "svn")]) == []