Пример #1
0
def test_list_tasks_id(swh_scheduler):
    task1 = create_task_dict("swh-test-ping", "oneshot", key="value1")
    task2 = create_task_dict("swh-test-ping", "oneshot", key="value2")
    task3 = create_task_dict("swh-test-ping", "oneshot", key="value3")
    swh_scheduler.create_tasks([task1, task2, task3])

    result = invoke(swh_scheduler, False, [
        "task",
        "list",
        "--task-id",
        "2",
    ])

    expected = r"""
Found 1 tasks

Task 2
  Next run: today \(.*\)
  Interval: 1 day, 0:00:00
  Type: swh-test-ping
  Policy: oneshot
  Status: next_run_not_scheduled
  Priority:\x20
  Args:
  Keyword args:
    key: 'value2'

""".lstrip()
    assert result.exit_code == 0, result.output
    assert re.fullmatch(expected, result.output, re.MULTILINE), result.output
Пример #2
0
def test_list_pending_tasks_filter_2(swh_scheduler):
    swh_scheduler.create_tasks([
        create_task_dict("swh-test-multiping", "oneshot", key="value"),
        create_task_dict("swh-test-ping", "oneshot", key="value2"),
    ])

    result = invoke(swh_scheduler, False, [
        "task",
        "list-pending",
        "swh-test-ping",
    ])

    expected = r"""
Found 1 swh-test-ping tasks

Task 2
  Next run: today \(.*\)
  Interval: 1 day, 0:00:00
  Type: swh-test-ping
  Policy: oneshot
  Args:
  Keyword args:
    key: 'value2'

""".lstrip()
    assert result.exit_code == 0, result.output
    assert re.fullmatch(expected, result.output, re.MULTILINE), result.output
Пример #3
0
def test_list_pending_tasks_before(swh_scheduler):
    task1 = create_task_dict("swh-test-ping", "oneshot", key="value")
    task2 = create_task_dict("swh-test-ping", "oneshot", key="value2")
    task1["next_run"] += datetime.timedelta(days=3)
    task2["next_run"] += datetime.timedelta(days=1)
    swh_scheduler.create_tasks([task1, task2])

    result = invoke(
        swh_scheduler,
        False,
        [
            "task",
            "list-pending",
            "swh-test-ping",
            "--before",
            (datetime.date.today() + datetime.timedelta(days=2)).isoformat(),
        ],
    )

    expected = r"""
Found 1 swh-test-ping tasks

Task 2
  Next run: tomorrow \(.*\)
  Interval: 1 day, 0:00:00
  Type: swh-test-ping
  Policy: oneshot
  Args:
  Keyword args:
    key: 'value2'

""".lstrip()
    assert result.exit_code == 0, result.output
    assert re.fullmatch(expected, result.output, re.MULTILINE), result.output
Пример #4
0
def test_list_pending_tasks(swh_scheduler):
    task1 = create_task_dict("swh-test-ping", "oneshot", key="value1")
    task2 = create_task_dict("swh-test-ping", "oneshot", key="value2")
    task2["next_run"] += datetime.timedelta(days=1)
    swh_scheduler.create_tasks([task1, task2])

    result = invoke(
        swh_scheduler,
        False,
        [
            "task",
            "list-pending",
            "swh-test-ping",
        ],
    )

    expected = r"""
Found 1 swh-test-ping tasks

Task 1
  Next run: today \(.*\)
  Interval: 1 day, 0:00:00
  Type: swh-test-ping
  Policy: oneshot
  Args:
  Keyword args:
    key: 'value1'

""".lstrip()
    assert result.exit_code == 0, result.output
    assert re.fullmatch(expected, result.output, re.MULTILINE), result.output

    swh_scheduler.grab_ready_tasks("swh-test-ping")

    result = invoke(
        swh_scheduler,
        False,
        [
            "task",
            "list-pending",
            "swh-test-ping",
        ],
    )

    expected = r"""
Found 0 swh-test-ping tasks

""".lstrip()
    assert result.exit_code == 0, result.output
    assert re.fullmatch(expected, result.output, re.MULTILINE), result.output
Пример #5
0
def test_create_task_dict(mock_datetime):
    mock_datetime.now.return_value = "date"

    actual_task = utils.create_task_dict(
        "task-type",
        "recurring",
        "arg0",
        "arg1",
        priority="low",
        other_stuff="normal",
        retries_left=3,
    )

    expected_task = {
        "policy": "recurring",
        "type": "task-type",
        "next_run": "date",
        "arguments": {
            "args": ("arg0", "arg1"),
            "kwargs": {
                "other_stuff": "normal"
            },
        },
        "priority": "low",
        "retries_left": 3,
    }

    assert actual_task == expected_task
    mock_datetime.now.assert_called_once_with(tz=timezone.utc)
Пример #6
0
def test_list_tasks(swh_scheduler):
    task1 = create_task_dict("swh-test-ping", "oneshot", key="value1")
    task2 = create_task_dict("swh-test-ping", "oneshot", key="value2")
    task1["next_run"] += datetime.timedelta(days=3, hours=2)
    swh_scheduler.create_tasks([task1, task2])

    swh_scheduler.grab_ready_tasks("swh-test-ping")

    result = invoke(
        swh_scheduler,
        False,
        [
            "task",
            "list",
        ],
    )

    expected = r"""
Found 2 tasks

Task 1
  Next run: .+ \(.*\)
  Interval: 1 day, 0:00:00
  Type: swh-test-ping
  Policy: oneshot
  Status: next_run_not_scheduled
  Priority:\x20
  Args:
  Keyword args:
    key: 'value1'

Task 2
  Next run: today \(.*\)
  Interval: 1 day, 0:00:00
  Type: swh-test-ping
  Policy: oneshot
  Status: next_run_scheduled
  Priority:\x20
  Args:
  Keyword args:
    key: 'value2'

""".lstrip()
    assert result.exit_code == 0, result.output
    assert re.fullmatch(expected, result.output, re.MULTILINE), result.output
Пример #7
0
    def task_dict(self, origin_type, origin_url, **kwargs):
        """Return special dict format for the tasks list

        Args:
            origin_type (string)
            origin_url (string)
        Returns:
            the same information in a different form
        """
        _type = 'origin-update-%s' % origin_type
        _policy = 'recurring'
        return utils.create_task_dict(_type, _policy, origin_url)
Пример #8
0
def test_list_pending_tasks_limit(swh_scheduler):
    swh_scheduler.create_tasks([
        create_task_dict("swh-test-ping", "oneshot", key="value%d" % i)
        for i in range(10)
    ])

    result = invoke(
        swh_scheduler,
        False,
        [
            "task",
            "list-pending",
            "swh-test-ping",
            "--limit",
            "3",
        ],
    )

    expected = r"""
Found 2 swh-test-ping tasks

Task 1
  Next run: today \(.*\)
  Interval: 1 day, 0:00:00
  Type: swh-test-ping
  Policy: oneshot
  Args:
  Keyword args:
    key: 'value0'

Task 2
  Next run: today \(.*\)
  Interval: 1 day, 0:00:00
  Type: swh-test-ping
  Policy: oneshot
  Args:
  Keyword args:
    key: 'value1'

Task 3
  Next run: today \(.*\)
  Interval: 1 day, 0:00:00
  Type: swh-test-ping
  Policy: oneshot
  Args:
  Keyword args:
    key: 'value2'

""".lstrip()
    assert result.exit_code == 0, result.output
    assert re.fullmatch(expected, result.output, re.MULTILINE), result.output
def test_scheduler_fixture(swh_scheduler_celery_app,
                           swh_scheduler_celery_worker, swh_scheduler):
    "Test that the scheduler fixture works properly"
    task_type = swh_scheduler.get_task_type("swh-test-ping")

    assert task_type
    assert task_type["backend_name"] == TASK_PING

    swh_scheduler.create_tasks([create_task_dict("swh-test-ping", "oneshot")])

    backend_tasks = run_ready_tasks(swh_scheduler, swh_scheduler_celery_app)
    assert backend_tasks
    for task in backend_tasks:
        # Make sure the task completed
        AsyncResult(id=task["backend_id"]).get()
def test_task_exception(swh_scheduler_celery_app, swh_scheduler_celery_worker,
                        swh_scheduler):
    task_type = swh_scheduler.get_task_type("swh-test-error")
    assert task_type
    assert task_type["backend_name"] == TASK_ERROR

    swh_scheduler.create_tasks([create_task_dict("swh-test-error", "oneshot")])

    backend_tasks = run_ready_tasks(swh_scheduler, swh_scheduler_celery_app)
    assert len(backend_tasks) == 1

    task = backend_tasks[0]
    result = AsyncResult(id=task["backend_id"])
    with pytest.raises(NotImplementedError):
        result.get()
Пример #11
0
def test_list_pending_tasks_filter(swh_scheduler):
    task = create_task_dict("swh-test-multiping", "oneshot", key="value")
    swh_scheduler.create_tasks([task])

    result = invoke(swh_scheduler, False, [
        "task",
        "list-pending",
        "swh-test-ping",
    ])

    expected = r"""
Found 0 swh-test-ping tasks

""".lstrip()
    assert result.exit_code == 0, result.output
    assert re.fullmatch(expected, result.output, re.MULTILINE), result.output
Пример #12
0
    def task_dict(self, origin_type, origin_url, **kwargs):
        """(Override) Return task dict for loading a npm package into the archive

        This is overridden from the lister_base as more information is
        needed for the ingestion task creation.

        """
        task_type = 'origin-update-%s' % origin_type
        task_policy = self.config['loading_task_policy']
        package_name = kwargs.get('name')
        package_metadata_url = kwargs.get('html_url')
        return create_task_dict(task_type,
                                task_policy,
                                package_name,
                                origin_url,
                                package_metadata_url=package_metadata_url)
Пример #13
0
    def task_dict(self, origin_type, origin_url, **kwargs):
        """(Override) Return task format dict

        This is overridden from the lister_base as more information is
        needed for the ingestion task creation.

        """
        _type = 'origin-update-%s' % origin_type
        _policy = 'recurring'
        project_name = kwargs.get('name')
        project_metadata_url = kwargs.get('html_url')
        return utils.create_task_dict(
            _type,
            _policy,
            project_name,
            origin_url,
            project_metadata_url=project_metadata_url)
def test_run_ready_task_with_priority(swh_scheduler_celery_app,
                                      swh_scheduler_celery_worker,
                                      swh_scheduler):
    """Ensure scheduler runner schedules priority tasks ready for scheduling"""
    task_type_name, backend_name = "swh-test-add", TASK_ADD
    task_type = swh_scheduler.get_task_type(task_type_name)
    assert task_type
    assert task_type["backend_name"] == backend_name

    task_inputs = [
        ("oneshot", (10, 22), "low"),
        ("oneshot", (20, 10), "normal"),
        ("recurring", (30, 10), "high"),
    ]

    tasks = swh_scheduler.create_tasks(
        create_task_dict(task_type_name, policy, *args, priority=priority)
        for (policy, args, priority) in task_inputs)

    assert len(tasks) == len(task_inputs)

    task_ids = set()
    for task in tasks:
        assert task["status"] == "next_run_not_scheduled"
        assert task["priority"] is not None
        task_ids.add(task["id"])

    backend_tasks = run_ready_tasks(swh_scheduler,
                                    swh_scheduler_celery_app,
                                    task_types=[],
                                    with_priority=True)
    assert len(backend_tasks) == len(tasks)

    scheduled_tasks = swh_scheduler.search_tasks(task_type=task_type_name)
    assert len(scheduled_tasks) == len(tasks)
    for task in scheduled_tasks:
        assert task["status"] == "next_run_scheduled"
        assert task["id"] in task_ids

    # Ensure each priority task is indeed scheduled to the queue backend
    for i, (_, args, _) in enumerate(task_inputs):
        task = backend_tasks[i]
        value = AsyncResult(id=task["backend_id"]).get()
        assert value == sum(args)
Пример #15
0
def schedule_origin_batches(scheduler, task_type, origins, origin_batch_size,
                            kwargs):
    from itertools import islice

    from swh.scheduler.utils import create_task_dict

    nb_origins = 0
    nb_tasks = 0

    while True:
        task_batch = []
        for _ in range(TASK_BATCH_SIZE):
            # Group origins
            origin_batch = []
            for origin in islice(origins, origin_batch_size):
                origin_batch.append(origin)
            nb_origins += len(origin_batch)
            if not origin_batch:
                break

            # Create a task for these origins
            args = [origin_batch]
            task_dict = create_task_dict(task_type, "oneshot", *args, **kwargs)
            task_batch.append(task_dict)

        # Schedule a batch of tasks
        if not task_batch:
            break
        nb_tasks += len(task_batch)
        if scheduler:
            scheduler.create_tasks(task_batch)
        click.echo("Scheduled %d tasks (%d origins)." % (nb_tasks, nb_origins))

    # Print final status.
    if nb_tasks:
        click.echo("Done.")
    else:
        click.echo("Nothing to do (no origin metadata matched the criteria).")
Пример #16
0
def process_origin_visits(visits, scheduler, task_names):
    task_dicts = []
    logging.debug("processing origin visits %r", visits)
    if task_names.get("origin_metadata"):
        visits = [visit for visit in visits if visit["status"] == "full"]
        visit_batches = grouper(visits, MAX_ORIGINS_PER_TASK)
        for visit_batch in visit_batches:
            visit_urls = []
            for visit in visit_batch:
                if isinstance(visit["origin"], str):
                    visit_urls.append(visit["origin"])
                else:
                    visit_urls.append(visit["origin"]["url"])
            task_dicts.append(
                create_task_dict(
                    task_names["origin_metadata"],
                    "oneshot",
                    visit_urls,
                    retries_left=1,
                ))

    if task_dicts:
        scheduler.create_tasks(task_dicts)