def test_delic_html_parser_srcset():
    '''Test parsing of srcset attributes'''
    # Setup mocks
    link_queue = mock.MagicMock()
    checked_urls = []

    # Setup test data
    html_snippet = f'<img srcset="/test.png 500w, /test2.png 1000w" />'

    # Call parser
    parser = DelicHTMLParser(
        delic_config=get_test_config(),
        base_url='http://example.com',
        link_queue=link_queue,
        checked_urls=checked_urls,
        page='http://example.com/subfolder/index.html',
    )
    parser.feed(html_snippet)

    # Assert results
    link_queue.put.assert_has_calls([
        mock.call(
            Link(page='http://example.com/subfolder/index.html',
                 url='http://example.com/test.png')),
        mock.call(
            Link(page='http://example.com/subfolder/index.html',
                 url='http://example.com/test2.png')),
    ])
def test_check_link_invalid_status_code(mock_parser):
    '''Invalid status codes should be printed as-is'''
    # Setup Requests mock
    url = 'http://example.com/test.html'
    responses.add(responses.HEAD, url, status=999, body='test-invalid-status')

    # Setup mocks
    broken_links = []
    mock_parser_instance = mock_parser.return_value

    # Call function
    check_link(
        config=get_test_config(),
        link_queue=None,
        checked_urls=[],
        broken_links=broken_links,
        base_url='http://example.com',
        link=Link(
            page='http://example.com/index.html',
            url=url,
        ),
    )

    # Assert results
    mock_parser_instance.feed.assert_not_called()
    assert len(broken_links) == 1
    assert '999' in broken_links[0].status
    assert 'test-invalid-status' in broken_links[0].status
def test_delic_html_parser_absolute_url(schema, schema_accepted, tag, attr,
                                        tag_accepted):
    '''Test parsing of absolute urls'''
    # Setup mocks
    link_queue = mock.MagicMock()
    checked_urls = []

    # Setup test data
    html_snippet = f'<{tag} {attr}="{schema}://example.com/test">Test</{tag}>'

    # Call parser
    parser = DelicHTMLParser(
        delic_config=get_test_config(),
        base_url='http://example.com',
        link_queue=link_queue,
        checked_urls=checked_urls,
        page='http://example.com/test.html',
    )
    parser.feed(html_snippet)

    # Assert results
    if schema_accepted and tag_accepted:
        expected_link = Link(page='http://example.com/test.html',
                             url=f'{schema}://example.com/test')
        link_queue.put.assert_called_with(expected_link)
    else:
        link_queue.put.assert_not_called()
def test_check_link_retry_with_get(mock_parser, status, success):
    '''Should retry with GET when HEAD returns 405 (Method not allowed)'''
    # Setup Requests mock
    url = 'http://example.com/test.html'
    responses.add(responses.HEAD, url, status=405)
    responses.add(responses.GET, url, status=status)

    # Setup mocks
    broken_links = []
    mock_parser_instance = mock_parser.return_value

    # Call function
    check_link(
        config=get_test_config(),
        link_queue=None,
        checked_urls=[],
        broken_links=broken_links,
        base_url='http://example.com',
        link=Link(
            page='http://example.com/index.html',
            url=url,
        ),
    )

    # Assert results
    mock_parser_instance.feed.assert_not_called()
    if success:
        assert len(broken_links) == 0
    else:
        assert len(broken_links) == 1
        assert str(status) in broken_links[0].status
def test_check_link_external_html_page(mock_parser, status, success):
    '''External HTML pages should only be checked with HEAD'''
    # Setup Requests mock
    url = 'http://external.com'
    responses.add(responses.HEAD, url, status=status, content_type='text/html')

    # Setup mocks
    broken_links = []
    mock_parser_instance = mock_parser.return_value

    # Call function
    check_link(
        config=get_test_config(),
        link_queue=None,
        checked_urls=[],
        broken_links=broken_links,
        base_url='http://example.com',
        link=Link(
            page='http://example.com/index.html',
            url=url,
        ),
    )

    # Assert results
    mock_parser_instance.feed.assert_not_called()
    if success:
        assert len(broken_links) == 0
    else:
        assert len(broken_links) == 1
        assert str(status) in broken_links[0].status
def test_check_link_internal_html_page(mock_parser, status, success):
    '''Internal HTML pages should be fetched and fed to parser'''
    # Setup Requests mock
    url = 'http://example.com/test.html'
    responses.add(responses.HEAD, url, status=status, content_type='text/html')
    responses.add(responses.GET, url, body='test-html-page', status=200)

    # Setup mocks
    broken_links = []
    mock_parser_instance = mock_parser.return_value

    # Call function
    link = Link(
        page='http://example.com/index.html',
        url=url,
    )
    check_link(
        config=get_test_config(),
        link_queue=None,
        checked_urls=[],
        broken_links=broken_links,
        base_url='http://example.com',
        link=link,
    )

    # Assert results
    if success:
        assert len(broken_links) == 0
        mock_parser_instance.feed.assert_called_with('test-html-page')
    else:
        assert len(broken_links) == 1
        assert str(status) in broken_links[0].status
        mock_parser_instance.feed.assert_not_called()
def test_check_link_worker_success(mock_check_link):
    '''Should get link from queue and call check_link'''
    # Setup mocks
    link_queue = mock.MagicMock()
    link = Link(
        page='http://example.com/index.html',
        url='http://example.com/test.html',
    )
    link_queue.get.side_effect = [link, Empty]
    checked_urls = []

    # Call worker
    with pytest.raises(Empty):
        check_link_worker(
            get_test_config(),
            link_queue,
            checked_urls,
            [],
            'http://example.com',
        )

    # Assert results
    assert link_queue.get.call_count == 2
    assert link_queue.task_done.call_count == 1
    assert checked_urls == ['http://example.com/test.html']
    assert mock_check_link.call_count == 1
    mock_check_link.assert_called_with(
        get_test_config(),
        link_queue,
        ['http://example.com/test.html'],
        [],
        'http://example.com',
        link,
    )
def test_check_link_worker_fail(mock_check_link):
    '''Should handle RequestException raised by Requests'''
    # Setup mocks
    link_queue = mock.MagicMock()
    link = Link(
        page='http://example.com/index.html',
        url='http://example.com/test.html',
    )
    link_queue.get.side_effect = [link.copy(), Empty]
    mock_check_link.side_effect = RequestException(
        999,
        'test-request-exception',
    )
    checked_urls = []
    broken_links = []

    # Call worker
    with pytest.raises(Empty):
        check_link_worker(
            get_test_config(),
            link_queue,
            checked_urls,
            broken_links,
            'http://example.com',
        )

    # Assert results
    assert link_queue.get.call_count == 2
    assert link_queue.task_done.call_count == 1
    assert checked_urls == ['http://example.com/test.html']
    assert len(broken_links) == 1
    assert 'test-request-exception' in broken_links[0].status
    assert mock_check_link.call_count == 1
    mock_check_link.assert_called_with(
        get_test_config(),
        link_queue,
        ['http://example.com/test.html'],
        mock.ANY,
        'http://example.com',
        link,
    )
def test_check_site(mock_queue, mock_thread):
    '''Should start the worker threads and feed initial link to queue'''
    # Setup mocks
    mock_queue_instance: mock.MagicMock = mock_queue.return_value
    mock_thread_instance: mock.MagicMock = mock_thread.return_value

    # Call function
    result = check_site(get_test_config(), 'http://example.com', 4)

    # Expected result
    expected = SiteResult(
        site='http://example.com',
        summary=SiteResultSummary(
            urls_checked=0,
            urls_broken=0,
        ),
        details=SiteResultDetails(broken=[], ),
    )

    # Assert results
    assert mock_thread.call_count == 4
    assert mock_thread.call_args_list == 4 * [
        mock.call(
            target=check_link_worker,
            args=(
                get_test_config(),
                mock_queue_instance,
                [],
                [],
                'http://example.com',
            ),
            daemon=True,
        )
    ]
    assert mock_thread_instance.start.call_count == 4
    mock_queue_instance.put.assert_called_with(
        Link(
            page='',
            url='http://example.com',
        ))
    mock_queue_instance.join.assert_called_with()
def test_delic_html_parser_relative_url(link, expected_url):
    '''Test parsing of relative urls'''
    # Setup mocks
    link_queue = mock.MagicMock()
    checked_urls = []

    # Setup test data
    html_snippet = f'<a href="{link}">Test</a>'

    # Call parser
    parser = DelicHTMLParser(
        delic_config=get_test_config(),
        base_url='http://example.com',
        link_queue=link_queue,
        checked_urls=checked_urls,
        page='http://example.com/subfolder/index.html',
    )
    parser.feed(html_snippet)

    # Assert results
    expected_link = Link(page='http://example.com/subfolder/index.html',
                         url=expected_url)
    link_queue.put.assert_called_with(expected_link)