def test_lister_gitlab_url_computation(url, swh_scheduler):
    lister = GitLabLister(scheduler=swh_scheduler, url=url)
    assert not lister.url.endswith("/")

    page_url = lister.page_url()
    # ensure the generated url contains the separated /
    assert page_url.startswith(f"{lister.url}/projects")
def test_lister_gitlab_heptapod(datadir, swh_scheduler, requests_mock):
    """Heptapod lister happily lists hg, hg_git as hg and git origins"""
    name = "heptapod"
    instance = "foss.heptapod.net"
    lister = GitLabLister(
        swh_scheduler, url=api_url(instance), name=name, instance=instance
    )
    assert lister.LISTER_NAME == name

    response = gitlab_page_response(datadir, instance, 1)

    requests_mock.get(
        lister.page_url(),
        [{"json": response}],
        additional_matcher=_match_request,
    )

    listed_result = lister.run()
    expected_nb_origins = len(response)

    for entry in response:
        assert entry["vcs_type"] in ("hg", "hg_git")

    assert listed_result == ListerStats(pages=1, origins=expected_nb_origins)

    scheduler_origins = lister.scheduler.get_listed_origins(
        lister.lister_obj.id
    ).results
    assert len(scheduler_origins) == expected_nb_origins

    for listed_origin in scheduler_origins:
        assert listed_origin.visit_type == "hg"
        assert listed_origin.url.startswith(f"https://{instance}")
        assert listed_origin.last_update is not None
def test_lister_gitlab(datadir, swh_scheduler, requests_mock):
    """Gitlab lister supports full listing"""
    instance = "gitlab.com"
    lister = GitLabLister(swh_scheduler, url=api_url(instance), instance=instance)

    response = gitlab_page_response(datadir, instance, 1)

    requests_mock.get(
        lister.page_url(),
        [{"json": response}],
        additional_matcher=_match_request,
    )

    listed_result = lister.run()
    expected_nb_origins = len(response)
    assert listed_result == ListerStats(pages=1, origins=expected_nb_origins)

    scheduler_origins = lister.scheduler.get_listed_origins(
        lister.lister_obj.id
    ).results
    assert len(scheduler_origins) == expected_nb_origins

    for listed_origin in scheduler_origins:
        assert listed_origin.visit_type == "git"
        assert listed_origin.url.startswith(f"https://{instance}")
        assert listed_origin.last_update is not None
def test_lister_gitlab_with_pages(swh_scheduler, requests_mock, datadir):
    """Gitlab lister supports pagination"""
    instance = "gite.lirmm.fr"
    lister = GitLabLister(swh_scheduler, url=api_url(instance))

    response1 = gitlab_page_response(datadir, instance, 1)
    response2 = gitlab_page_response(datadir, instance, 2)

    requests_mock.get(
        lister.page_url(),
        [{"json": response1, "headers": {"Link": f"<{lister.page_url(2)}>; rel=next"}}],
        additional_matcher=_match_request,
    )

    requests_mock.get(
        lister.page_url(2),
        [{"json": response2}],
        additional_matcher=_match_request,
    )

    listed_result = lister.run()

    expected_nb_origins = len(response1) + len(response2)
    assert listed_result == ListerStats(pages=2, origins=expected_nb_origins)

    scheduler_origins = lister.scheduler.get_listed_origins(
        lister.lister_obj.id
    ).results
    assert len(scheduler_origins) == expected_nb_origins

    for listed_origin in scheduler_origins:
        assert listed_origin.visit_type == "git"
        assert listed_origin.url.startswith(f"https://{instance}")
        assert listed_origin.last_update is not None
示例#5
0
def test_lister_gitlab_rate_limit(swh_scheduler, requests_mock, datadir,
                                  mocker):
    """Gitlab lister supports rate-limit

    """
    instance = "gite.lirmm.fr"
    url = api_url(instance)
    lister = GitLabLister(swh_scheduler, url=url, instance=instance)

    url_page1 = lister.page_url()
    response1 = gitlab_page_response(datadir, instance, 1)
    url_page2 = lister.page_url(2)
    response2 = gitlab_page_response(datadir, instance, 2)

    requests_mock.get(
        url_page1,
        [{
            "json": response1,
            "headers": {
                "Link": f"<{url_page2}>; rel=next"
            }
        }],
        additional_matcher=_match_request,
    )
    requests_mock.get(
        url_page2,
        [
            # rate limited twice
            {
                "status_code": codes.forbidden,
                "headers": {
                    "RateLimit-Remaining": "0"
                }
            },
            {
                "status_code": codes.forbidden,
                "headers": {
                    "RateLimit-Remaining": "0"
                }
            },
            # ok
            {
                "json": response2
            },
        ],
        additional_matcher=_match_request,
    )

    # To avoid this test being too slow, we mock sleep within the retry behavior
    mock_sleep = mocker.patch.object(lister.get_page_result.retry, "sleep")

    listed_result = lister.run()

    expected_nb_origins = len(response1) + len(response2)
    assert listed_result == ListerStats(pages=2, origins=expected_nb_origins)

    assert_sleep_calls(mocker, mock_sleep, [1, WAIT_EXP_BASE])
def test_lister_gitlab_credentials(swh_scheduler):
    """Gitlab lister supports credentials configuration"""
    instance = "gitlab"
    credentials = {
        "gitlab": {instance: [{"username": "******", "password": "******"}]}
    }
    url = api_url(instance)
    lister = GitLabLister(
        scheduler=swh_scheduler, url=url, instance=instance, credentials=credentials
    )
    assert lister.session.headers["Authorization"] == "Bearer api-token"
def test_lister_gitlab_http_error_500(swh_scheduler, requests_mock, datadir):
    """Gitlab lister should skip buggy URL and move to next page."""
    instance = "gite.lirmm.fr"
    url = api_url(instance)
    lister = GitLabLister(swh_scheduler, url=url, instance=instance)

    url_page1 = lister.page_url()
    response1 = gitlab_page_response(datadir, instance, 1)
    url_page2 = lister.page_url(lister.per_page)
    url_page3 = lister.page_url(2 * lister.per_page)
    response3 = gitlab_page_response(datadir, instance, 3)

    requests_mock.get(
        url_page1,
        [{"json": response1, "headers": {"Link": f"<{url_page2}>; rel=next"}}],
        additional_matcher=_match_request,
    )
    requests_mock.get(
        url_page2,
        [
            {"status_code": 500},
        ],
        additional_matcher=_match_request,
    )

    requests_mock.get(
        url_page3,
        [{"json": response3}],
        additional_matcher=_match_request,
    )

    listed_result = lister.run()

    expected_nb_origins = len(response1) + len(response3)
    assert listed_result == ListerStats(pages=2, origins=expected_nb_origins)
def test_lister_gitlab_http_errors(
    swh_scheduler, requests_mock, datadir, mocker, status_code
):
    """Gitlab lister should retry requests when encountering HTTP 50x errors"""
    instance = "gite.lirmm.fr"
    url = api_url(instance)
    lister = GitLabLister(swh_scheduler, url=url, instance=instance)

    url_page1 = lister.page_url()
    response1 = gitlab_page_response(datadir, instance, 1)
    url_page2 = lister.page_url(2)
    response2 = gitlab_page_response(datadir, instance, 2)

    requests_mock.get(
        url_page1,
        [{"json": response1, "headers": {"Link": f"<{url_page2}>; rel=next"}}],
        additional_matcher=_match_request,
    )
    requests_mock.get(
        url_page2,
        [
            # first request ends up with error
            {"status_code": status_code},
            # second request is ok
            {"json": response2},
        ],
        additional_matcher=_match_request,
    )

    # To avoid this test being too slow, we mock sleep within the retry behavior
    mock_sleep = mocker.patch.object(lister.get_page_result.retry, "sleep")

    listed_result = lister.run()

    expected_nb_origins = len(response1) + len(response2)
    assert listed_result == ListerStats(pages=2, origins=expected_nb_origins)

    assert_sleep_calls(mocker, mock_sleep, [1])
示例#9
0
def list_gitlab_full(**lister_args):
    """Full update of a GitLab instance"""
    lister = GitLabLister.from_configfile(incremental=False, **lister_args)
    return lister.run().dict()
示例#10
0
def test_lister_gitlab_incremental(swh_scheduler, requests_mock, datadir):
    """Gitlab lister supports incremental visits"""
    instance = "gite.lirmm.fr"
    url = api_url(instance)
    lister = GitLabLister(swh_scheduler, url=url, instance=instance, incremental=True)

    url_page1 = lister.page_url()
    response1 = gitlab_page_response(datadir, instance, 1)
    url_page2 = lister.page_url(2)
    response2 = gitlab_page_response(datadir, instance, 2)
    url_page3 = lister.page_url(3)
    response3 = gitlab_page_response(datadir, instance, 3)

    requests_mock.get(
        url_page1,
        [{"json": response1, "headers": {"Link": f"<{url_page2}>; rel=next"}}],
        additional_matcher=_match_request,
    )
    requests_mock.get(
        url_page2,
        [{"json": response2}],
        additional_matcher=_match_request,
    )

    listed_result = lister.run()

    expected_nb_origins = len(response1) + len(response2)
    assert listed_result == ListerStats(pages=2, origins=expected_nb_origins)
    assert lister.state.last_seen_next_link == url_page2

    lister2 = GitLabLister(swh_scheduler, url=url, instance=instance, incremental=True)

    # Lister will start back at the last stop
    requests_mock.get(
        url_page2,
        [{"json": response2, "headers": {"Link": f"<{url_page3}>; rel=next"}}],
        additional_matcher=_match_request,
    )
    requests_mock.get(
        url_page3,
        [{"json": response3}],
        additional_matcher=_match_request,
    )

    listed_result2 = lister2.run()

    assert listed_result2 == ListerStats(
        pages=2, origins=len(response2) + len(response3)
    )
    assert lister2.state.last_seen_next_link == url_page3

    assert lister.lister_obj.id == lister2.lister_obj.id
    scheduler_origins = lister2.scheduler.get_listed_origins(
        lister2.lister_obj.id
    ).results

    assert len(scheduler_origins) == len(response1) + len(response2) + len(response3)

    for listed_origin in scheduler_origins:
        assert listed_origin.visit_type == "git"
        assert listed_origin.url.startswith(f"https://{instance}")
        assert listed_origin.last_update is not None