Пример #1
0
def test_policy_constructor_blank_name():
    created_at = datetime.now(timezone.utc)
    updated_at = datetime.now(timezone.utc) + timedelta(minutes=1)
    doc = {
        'id':
        '01b60eeb-2ac9-4f41-9b0c-47dcbcf637f7',
        'name':
        '',
        'created_at':
        created_at,
        'updated_at':
        updated_at,
        'authentication': {
            'enabled': True
        },
        'limits': {
            'max_cost': 10,
        },
        'mime_type_rules': [
            {
                'match': 'MATCHES',
                'pattern': '^text/',
                'save': True
            },
            {
                'save': False
            },
        ],
        'proxy_rules': [
            {
                'proxy_url': 'socks5://localhost:1234'
            },
        ],
        'robots_txt': {
            'usage': 'IGNORE',
        },
        'url_normalization': {
            'enabled': True,
            'strip_parameters': ['PHPSESSID'],
        },
        'url_rules': [
            {
                'action': 'ADD',
                'amount': 1,
                'match': 'MATCHES',
                'pattern': '^https?://({SEED_DOMAINS})/'
            },
            {
                'action': 'MULTIPLY',
                'amount': 0
            },
        ],
        'user_agents': [{
            'name': 'Test User Agent'
        }]
    }
    with pytest.raises(PolicyValidationError):
        policy = Policy(doc, version='1.0.0', seeds=[])
Пример #2
0
def make_policy(proxy=None):
    ''' Make a sample policy. '''
    dt = datetime(2018, 12, 31, 13, 47, 00)
    doc = {
        'id':
        '01b60eeb-2ac9-4f41-9b0c-47dcbcf637f7',
        'name':
        'Test',
        'created_at':
        dt,
        'updated_at':
        dt,
        'authentication': {
            'enabled': False,
        },
        'limits': {
            'max_cost': 10,
            'max_duration': 3600,
            'max_items': 10_000,
        },
        'mime_type_rules': [
            {
                'match': 'MATCHES',
                'pattern': '^text/',
                'save': True
            },
            {
                'save': False
            },
        ],
        'proxy_rules':
        proxy or [],
        'robots_txt': {
            'usage': 'IGNORE',
        },
        'url_normalization': {
            'enabled': True,
            'strip_parameters': [],
        },
        'url_rules': [
            {
                'action': 'ADD',
                'amount': 1,
                'match': 'MATCHES',
                'pattern': '^https?://({SEED_DOMAINS})/'
            },
            {
                'action': 'MULTIPLY',
                'amount': 0
            },
        ],
        'user_agents': [{
            'name': 'Test User Agent'
        }]
    }
    return Policy(doc, '1.0.0', ['https://seeds.example'])
Пример #3
0
def make_policy():
    created_at = datetime(2018, 12, 31, 13, 47, 00)
    policy_doc = {
        'id':
        'bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb',
        'name':
        'Test',
        'created_at':
        created_at,
        'updated_at':
        created_at,
        'authentication': {
            'enabled': True,
        },
        'limits': {
            'max_cost': 10,
            'max_duration': 3600,
            'max_items': 10_000,
        },
        'mime_type_rules': [
            {
                'match': 'MATCHES',
                'pattern': '^text/',
                'save': True
            },
            {
                'save': False
            },
        ],
        'proxy_rules': [],
        'robots_txt': {
            'usage': 'IGNORE',
        },
        'url_normalization': {
            'enabled': True,
            'strip_parameters': ['b'],
        },
        'url_rules': [
            {
                'action': 'ADD',
                'amount': 1,
                'match': 'MATCHES',
                'pattern': '^https?://({SEED_DOMAINS})/'
            },
            {
                'action': 'MULTIPLY',
                'amount': 0
            },
        ],
        'user_agents': [{
            'name': 'Test User Agent'
        }]
    }
    return Policy(policy_doc, '1.0.0', ['https://frontier.example'])
Пример #4
0
def test_convert_policy_pb_to_doc_captcha():
    created_at = datetime.now(timezone.utc)
    updated_at = datetime.now(timezone.utc) + timedelta(minutes=1)
    pb = starbelly.starbelly_pb2.Policy()
    pb.policy_id = \
        b'\x01\xb6\x0e\xeb*\xc9OA\x9b\x0cG\xdc\xbc\xf67\xf7'
    pb.name = 'Test'
    pb.created_at = created_at.isoformat()
    pb.updated_at = updated_at.isoformat()
    pb.captcha_solver_id = \
        b'\xe2\x72\x23\xd3\x85\xef\x4e\x89\x8f\xc8\xdb\xcf\x8d\xf0\xce\x97'

    doc = Policy.convert_pb_to_doc(pb)
    assert doc['captcha_solver_id'] == 'e27223d3-85ef-4e89-8fc8-dbcf8df0ce97'
Пример #5
0
def make_policy(usage, user_agent):
    ''' Make a sample policy. '''
    dt = datetime(2018, 12, 31, 13, 47, 00)
    doc = {
        'id': '01b60eeb-2ac9-4f41-9b0c-47dcbcf637f7',
        'name': 'Test',
        'created_at': dt,
        'updated_at': dt,
        'authentication': {
            'enabled': False,
        },
        'limits': {
            'max_cost': 10,
        },
        'mime_type_rules': [
            {
                'save': True
            },
        ],
        'proxy_rules': [],
        'robots_txt': {
            'usage': usage,
        },
        'url_normalization': {
            'enabled': True,
            'strip_parameters': [],
        },
        'url_rules': [
            {
                'action': 'MULTIPLY',
                'amount': 0
            },
        ],
        'user_agents': [{
            'name': user_agent
        }]
    }
    return Policy(doc, '1.0.0', ['https://seeds.example'])
Пример #6
0
def test_policy_constructor_captcha():
    created_at = datetime.now(timezone.utc)
    updated_at = datetime.now(timezone.utc) + timedelta(minutes=1)
    doc = {
        'id':
        '01b60eeb-2ac9-4f41-9b0c-47dcbcf637f7',
        'name':
        'Test',
        'created_at':
        created_at,
        'updated_at':
        updated_at,
        'authentication': {
            'enabled': True
        },
        'captcha_solver': {
            'id': b'captcha1',
            'name': 'CAPTCHA Solver 1',
            'service_url': 'https://solver.example',
            'api_key': 'test-key',
            'require_phrase': False,
            'case_sensitive': False,
            'characters': 'abcdefg',
            'require_math': False,
            'min_length': 6,
            'max_length': 6,
        },
        'limits': {
            'max_cost': 10,
        },
        'mime_type_rules': [
            {
                'match': 'MATCHES',
                'pattern': '^text/',
                'save': True
            },
            {
                'save': False
            },
        ],
        'proxy_rules': [
            {
                'proxy_url': 'socks5://localhost:1234'
            },
        ],
        'robots_txt': {
            'usage': 'IGNORE',
        },
        'url_normalization': {
            'enabled': True,
            'strip_parameters': ['PHPSESSID'],
        },
        'url_rules': [
            {
                'action': 'ADD',
                'amount': 1,
                'match': 'MATCHES',
                'pattern': '^https?://({SEED_DOMAINS})/'
            },
            {
                'action': 'MULTIPLY',
                'amount': 0
            },
        ],
        'user_agents': [{
            'name': 'Test User Agent'
        }]
    }
    policy = Policy(doc, version='1.0.0', seeds=[])
    assert isinstance(policy.captcha_solver, CaptchaSolver)
Пример #7
0
def test_policy_constructor():
    created_at = datetime.now(timezone.utc)
    updated_at = datetime.now(timezone.utc) + timedelta(minutes=1)
    doc = {
        'id':
        '01b60eeb-2ac9-4f41-9b0c-47dcbcf637f7',
        'name':
        'Test',
        'created_at':
        created_at,
        'updated_at':
        updated_at,
        'authentication': {
            'enabled': True
        },
        'limits': {
            'max_cost': 10,
        },
        'mime_type_rules': [
            {
                'match': 'MATCHES',
                'pattern': '^text/',
                'save': True
            },
            {
                'save': False
            },
        ],
        'proxy_rules': [
            {
                'proxy_url': 'socks5://localhost:1234'
            },
        ],
        'robots_txt': {
            'usage': 'IGNORE',
        },
        'url_normalization': {
            'enabled': True,
            'strip_parameters': ['PHPSESSID'],
        },
        'url_rules': [
            {
                'action': 'ADD',
                'amount': 1,
                'match': 'MATCHES',
                'pattern': '^https?://({SEED_DOMAINS})/'
            },
            {
                'action': 'MULTIPLY',
                'amount': 0
            },
        ],
        'user_agents': [{
            'name': 'Test User Agent'
        }]
    }
    policy = Policy(doc, version='1.0.0', seeds=[])
    assert isinstance(policy.authentication, PolicyAuthentication)
    assert policy.captcha_solver is None
    assert isinstance(policy.limits, PolicyLimits)
    assert isinstance(policy.mime_type_rules, PolicyMimeTypeRules)
    assert isinstance(policy.proxy_rules, PolicyProxyRules)
    assert isinstance(policy.robots_txt, PolicyRobotsTxt)
    assert isinstance(policy.url_normalization, PolicyUrlNormalization)
    assert isinstance(policy.url_rules, PolicyUrlRules)
    assert isinstance(policy.user_agents, PolicyUserAgents)
Пример #8
0
def test_convert_policy_doc_to_pb():
    created_at = datetime.now(timezone.utc)
    updated_at = datetime.now(timezone.utc) + timedelta(minutes=1)
    doc = {
        'id':
        '01b60eeb-2ac9-4f41-9b0c-47dcbcf637f7',
        'name':
        'Test',
        'created_at':
        created_at,
        'updated_at':
        updated_at,
        'authentication': {
            'enabled': True
        },
        'limits': {
            'max_cost': 10,
            'max_duration': 3600,
            'max_items': 10_000,
        },
        'mime_type_rules': [
            {
                'match': 'MATCHES',
                'pattern': '^text/',
                'save': True
            },
            {
                'save': False
            },
        ],
        'proxy_rules': [
            {
                'proxy_url': 'socks5://localhost:1234',
                'pattern': r'\.onion',
                'match': 'MATCHES'
            },
        ],
        'robots_txt': {
            'usage': 'IGNORE',
        },
        'url_normalization': {
            'enabled': True,
            'strip_parameters': ['PHPSESSID'],
        },
        'url_rules': [
            {
                'action': 'ADD',
                'amount': 1,
                'match': 'MATCHES',
                'pattern': '^https?://({SEED_DOMAINS})/'
            },
            {
                'action': 'MULTIPLY',
                'amount': 0
            },
        ],
        'user_agents': [{
            'name': 'Test User Agent'
        }]
    }
    pb = starbelly.starbelly_pb2.Policy()
    Policy.convert_doc_to_pb(doc, pb)
    assert pb.policy_id == b'\x01\xb6\x0e\xeb*\xc9OA\x9b\x0cG\xdc\xbc\xf67\xf7'
    assert pb.name == 'Test'
    assert pb.created_at == created_at.isoformat()
    assert pb.updated_at == updated_at.isoformat()

    # Authentication
    assert pb.authentication.enabled

    # Limits
    assert pb.limits.max_cost == 10

    # MIME type rules
    assert len(pb.mime_type_rules) == 2
    assert pb.mime_type_rules[0].match == MATCH_ENUM.Value('MATCHES')
    assert pb.mime_type_rules[0].pattern, '^text/'
    assert pb.mime_type_rules[0].save
    assert not pb.mime_type_rules[1].save

    # Proxy rules
    assert len(pb.proxy_rules) == 1
    assert pb.proxy_rules[0].proxy_url == 'socks5://localhost:1234'

    # Robots.txt
    assert pb.robots_txt.usage == USAGE_ENUM.Value('IGNORE')

    # URL normalization
    assert pb.url_normalization.enabled
    assert pb.url_normalization.strip_parameters == ['PHPSESSID']

    # URL rules
    assert len(pb.url_rules) == 2
    assert pb.url_rules[0].action == ACTION_ENUM.Value('ADD')
    assert pb.url_rules[0].amount == 1
    assert pb.url_rules[0].match == MATCH_ENUM.Value('MATCHES')
    assert pb.url_rules[0].pattern == '^https?://({SEED_DOMAINS})/'
    assert pb.url_rules[1].action == ACTION_ENUM.Value('MULTIPLY')
    assert pb.url_rules[1].amount == 0

    # User agents
    assert len(pb.user_agents) == 1
    assert pb.user_agents[0].name == 'Test User Agent'
Пример #9
0
def test_convert_policy_pb_to_doc():
    created_at = datetime.now(timezone.utc)
    updated_at = datetime.now(timezone.utc) + timedelta(minutes=1)
    pb = starbelly.starbelly_pb2.Policy()
    pb.policy_id = \
        b'\x01\xb6\x0e\xeb*\xc9OA\x9b\x0cG\xdc\xbc\xf67\xf7'
    pb.name = 'Test'
    pb.created_at = created_at.isoformat()
    pb.updated_at = updated_at.isoformat()

    # Authentication
    pb.authentication.enabled = True

    # Limits
    pb.limits.max_cost = 10
    pb.limits.max_duration = 3600
    pb.limits.max_items = 10_000

    # MIME type rules
    mime1 = pb.mime_type_rules.add()
    mime1.match = MATCH_ENUM.Value('MATCHES')
    mime1.pattern = '^text/'
    mime1.save = True
    mime2 = pb.mime_type_rules.add()
    mime2.save = False

    # Proxy rules
    proxy1 = pb.proxy_rules.add()
    proxy1.proxy_url = 'socks5://localhost:1234'
    proxy1.pattern = r'\.onion'
    proxy1.match = MATCH_ENUM.Value('MATCHES')

    # Robots.txt
    pb.robots_txt.usage = USAGE_ENUM.Value('IGNORE')

    # URL normalization
    pb.url_normalization.enabled = True
    pb.url_normalization.strip_parameters.append('PHPSESSID')

    # URL rules
    url1 = pb.url_rules.add()
    url1.action = ACTION_ENUM.Value('ADD')
    url1.amount = 1
    url1.match = MATCH_ENUM.Value('MATCHES')
    url1.pattern = '^https?://({SEED_DOMAINS})/'
    url2 = pb.url_rules.add()
    url2.action = ACTION_ENUM.Value('MULTIPLY')
    url2.amount = 0

    # User agents
    agent1 = pb.user_agents.add()
    agent1.name = 'Test User Agent'

    doc = Policy.convert_pb_to_doc(pb)
    assert doc['id'] == '01b60eeb-2ac9-4f41-9b0c-47dcbcf637f7'
    assert doc['name'] == 'Test'
    assert doc['created_at'] == created_at
    assert doc['updated_at'] == updated_at

    # Authentication
    assert doc['authentication']['enabled']

    # Limits
    assert doc['limits']['max_cost'] == 10

    # MIME type rules
    assert len(doc['mime_type_rules']) == 2
    mime1 = doc['mime_type_rules'][0]
    mime2 = doc['mime_type_rules'][1]
    assert mime1['match'] == 'MATCHES'
    assert mime1['pattern'] == '^text/'
    assert mime1['save']
    assert not mime2['save']

    # Proxy rules
    assert len(doc['proxy_rules']) == 1
    proxy1 = doc['proxy_rules'][0]
    assert proxy1['proxy_url'] == 'socks5://localhost:1234'

    # Robots.txt
    assert doc['robots_txt']['usage'] == 'IGNORE'

    # URL normalization
    assert doc['url_normalization']['enabled']
    assert doc['url_normalization']['strip_parameters'] == ['PHPSESSID']

    # URL rules
    assert len(doc['url_rules']) == 2
    url1 = doc['url_rules'][0]
    url2 = doc['url_rules'][1]
    assert url1['action'] == 'ADD'
    assert url1['amount'] == 1
    assert url1['match'] == 'MATCHES'
    assert url1['pattern'] == '^https?://({SEED_DOMAINS})/'
    assert url2['action'] == 'MULTIPLY'
    assert url2['amount'] == 0

    # User agents
    assert len(doc['user_agents']) == 1
    agent1 = doc['user_agents'][0]
    assert agent1['name'] == 'Test User Agent'
Пример #10
0
def test_convert_policy_doc_to_pb_captcha():
    created_at = datetime.now(timezone.utc)
    updated_at = datetime.now(timezone.utc) + timedelta(minutes=1)
    doc = {
        'id':
        '01b60eeb-2ac9-4f41-9b0c-47dcbcf637f7',
        'name':
        'Test',
        'created_at':
        created_at,
        'updated_at':
        updated_at,
        'captcha_solver_id':
        'e27223d3-85ef-4e89-8fc8-dbcf8df0ce97',
        'authentication': {
            'enabled': True
        },
        'limits': {
            'max_cost': 10,
        },
        'mime_type_rules': [
            {
                'match': 'MATCHES',
                'pattern': '^text/',
                'save': True
            },
            {
                'save': False
            },
        ],
        'proxy_rules': [
            {
                'proxy_url': 'socks5://localhost:1234'
            },
        ],
        'robots_txt': {
            'usage': 'IGNORE',
        },
        'url_normalization': {
            'enabled': True,
            'strip_parameters': ['PHPSESSID'],
        },
        'url_rules': [
            {
                'action': 'ADD',
                'amount': 1,
                'match': 'MATCHES',
                'pattern': '^https?://({SEED_DOMAINS})/'
            },
            {
                'action': 'MULTIPLY',
                'amount': 0
            },
        ],
        'user_agents': [{
            'name': 'Test User Agent'
        }]
    }
    pb = starbelly.starbelly_pb2.Policy()
    Policy.convert_doc_to_pb(doc, pb)
    assert pb.captcha_solver_id == \
        b'\xe2\x72\x23\xd3\x85\xef\x4e\x89\x8f\xc8\xdb\xcf\x8d\xf0\xce\x97'
Пример #11
0
def test_policy_replace_mime_rules():
    created_at = datetime.now(timezone.utc)
    updated_at = datetime.now(timezone.utc) + timedelta(minutes=1)
    doc = {
        'id':
        '01b60eeb-2ac9-4f41-9b0c-47dcbcf637f7',
        'name':
        'Test',
        'created_at':
        created_at,
        'updated_at':
        updated_at,
        'authentication': {
            'enabled': True
        },
        'limits': {
            'max_cost': 10,
        },
        'mime_type_rules': [
            {
                'match': 'MATCHES',
                'pattern': '^text/',
                'save': True
            },
            {
                'save': False
            },
        ],
        'proxy_rules': [
            {
                'proxy_url': 'socks5://localhost:1234'
            },
        ],
        'robots_txt': {
            'usage': 'IGNORE',
        },
        'url_normalization': {
            'enabled': True,
            'strip_parameters': ['PHPSESSID'],
        },
        'url_rules': [
            {
                'action': 'ADD',
                'amount': 1,
                'match': 'MATCHES',
                'pattern': '^https?://({SEED_DOMAINS})/'
            },
            {
                'action': 'MULTIPLY',
                'amount': 0
            },
        ],
        'user_agents': [{
            'name': 'Test User Agent'
        }]
    }
    policy1 = Policy(doc, version='1.0.0', seeds=[])
    policy2 = policy1.replace_mime_type_rules([
        {
            'match': 'MATCHES',
            'pattern': '^application/',
            'save': True
        },
        {
            'save': False
        },
    ])
    # These properties are all the same:
    assert policy1.authentication is policy2.authentication
    assert policy1.captcha_solver is policy2.captcha_solver
    assert policy1.limits is policy2.limits
    assert policy1.proxy_rules is policy2.proxy_rules
    assert policy1.robots_txt is policy2.robots_txt
    assert policy1.url_normalization is policy2.url_normalization
    assert policy1.url_rules is policy2.url_rules
    assert policy1.user_agents is policy2.user_agents
    # The MIME type rules are different:
    assert policy1.mime_type_rules is not policy2.mime_type_rules
    assert policy1.mime_type_rules.should_save('text/plain')
    assert not policy1.mime_type_rules.should_save('application/json')
    assert not policy2.mime_type_rules.should_save('text/plain')
    assert policy2.mime_type_rules.should_save('application/json')
Пример #12
0
async def test_crawl_extractor(nursery):
    # Create test fixtures.
    job_id = 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa'
    db = Mock()
    db.delete_frontier_item = AsyncMock()
    db.insert_frontier_items = AsyncMock()
    to_extractor, extractor_recv = trio.open_memory_channel(0)
    extractor_send, from_extractor = trio.open_memory_channel(0)
    created_at = datetime(2018,12,31,13,47,00)
    policy_doc = {
        'id': 'bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb',
        'name': 'Test',
        'created_at': created_at,
        'updated_at': created_at,
        'authentication': {
            'enabled': False,
        },
        'limits': {
            'max_cost': 10,
            'max_duration': 3600,
            'max_items': 10_000,
        },
        'mime_type_rules': [
            {'match': 'MATCHES', 'pattern': '^text/', 'save': True},
            {'save': False},
        ],
        'proxy_rules': [],
        'robots_txt': {
            'usage': 'IGNORE',
        },
        'url_normalization': {
            'enabled': True,
            'strip_parameters': ['b'],
        },
        'url_rules': [
            {'action': 'ADD', 'amount': 1, 'match': 'MATCHES',
             'pattern': '^https?://({SEED_DOMAINS})/'},
            {'action': 'MULTIPLY', 'amount': 0},
        ],
        'user_agents': [
            {'name': 'Test User Agent'}
        ]
    }
    policy = Policy(policy_doc, '1.0.0', ['https://extractor.example'])
    downloader = Mock()
    robots_txt_manager = Mock()
    robots_txt_manager.is_allowed = AsyncMock(return_value=True)
    old_urls = {b'\xd2\x1b\x9b(p-\xed\xb2\x10\xdf\xf0\xa8\xe1\xa2*<'}
    stats_dict = {'frontier_size': 0}
    extractor = CrawlExtractor(job_id, db, extractor_send, extractor_recv,
        policy, downloader, robots_txt_manager, old_urls, stats_dict,
        batch_size=3)
    assert repr(extractor) == '<CrawlExtractor job_id=aaaaaaaa>'
    nursery.start_soon(extractor.run)

    # The HTML document has 5 valid links (enough to create two batches when the
    # `insert_batch` is set to 3) as well as 1 link that's out of domain (should
    # not be added to frontier) and 1 link that's in `old_urls` (also should not
    # be added to frontier).
    html_body = \
    b'''<!DOCTYPE html>
        <html>
            <head><meta charset="UTF-8"><title>Test</title></head>
            <body>
                <a href='http://extractor.example/alpha'>Alpha</a>
                <a href='http://extractor.example/bravo'>Bravo</a>
                <a href='http://extractor.example/charlie'>Charlie</a>
                <a href='http://invalid.example/'>Invalid</a>
                <a href='http://extractor.example/delta'>Delta</a>
                <a href='http://extractor.example/echo'>Echo</a>
                <a href='http://extractor.example/old-url'>Echo</a>
            </body>
        </html>'''
    response = DownloadResponse(
        frontier_id='bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb',
        cost=1.0,
        url='https://extractor.example',
        canonical_url='https://extractor.example',
        content_type='text/html',
        body=html_body,
        started_at=datetime(2019, 2, 1, 10, 2, 0, tzinfo=timezone.utc),
        completed_at=datetime(2019, 2, 1, 10, 2, 0, tzinfo=timezone.utc),
        exception=None,
        status_code=200,
        headers=dict()
    )
    await to_extractor.send(response)
    await from_extractor.receive()
    # The item should be deleted from the frontier:
    assert db.delete_frontier_item.call_count == 1
    assert db.delete_frontier_item.call_args[0] == \
        'bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb'
    # The insert function should be called twice: once with three items
    # (alpha, bravo charlie), and once with two items (delta, echo).
    assert db.insert_frontier_items.call_count == 2
    assert len(db.insert_frontier_items.call_args[0]) == 2
    assert stats_dict['frontier_size'] == 5
    assert robots_txt_manager.is_allowed.call_count == 6
Пример #13
0
    assert server_db.get_policy.call_args[0] == policy_id
    assert response1.policy.name == 'Test Policy'

    # List policies
    command2 = new_request(2)
    command2.list_policies.page.limit = 10
    command2.list_policies.page.offset = 0
    response2 = await send_test_command(client, command2)
    assert response2.list_policies.total == 1
    assert response2.list_policies.policies[0].name == 'Test Policy'

    # Set policy
    command3 = new_request(3)
    policy2_doc = policy_doc.copy()
    del policy2_doc['id']
    Policy.convert_doc_to_pb(policy2_doc, command3.set_policy.policy)
    response3 = await send_test_command(client, command3)
    assert response3.new_policy.policy_id == b'\xaa' * 16

    # Delete policy
    command4 = new_request(4)
    command4.delete_policy.policy_id = b'\xaa' * 16
    response4 = await send_test_command(client, command4)
    assert response4.is_success
    assert server_db.delete_policy.call_args[0] == policy_id


@fail_after(3)
async def test_list_rate_limits(client, server_db):
    token = b'\xaa' * 16
    rate_limit = {
Пример #14
0
def make_policy(captcha_port=80):
    policy_doc = {
        'id':
        '01b60eeb-2ac9-4f41-9b0c-47dcbcf637f7',
        'name':
        'Test',
        'created_at':
        datetime(2019, 1, 28, 14, 26, 0, tzinfo=timezone.utc),
        'updated_at':
        datetime(2019, 1, 28, 14, 26, 0, tzinfo=timezone.utc),
        'authentication': {
            'enabled': False,
        },
        'captcha_solver': {
            'id': '01b60eeb-2ac9-4f41-9b0c-47dcbcf637f8',
            'name': 'Example CAPTCHA',
            'service_url': 'http://127.0.0.1:{}'.format(captcha_port),
            'api_key': None,
            'require_phrase': False,
            'case_sensitive': True,
            'characters': 'ALPHANUMERIC',
            'require_math': False,
        },
        'limits': {
            'max_cost': 10,
            'max_duration': 3600,
            'max_items': 10_000,
        },
        'mime_type_rules': [
            {
                'match': 'MATCHES',
                'pattern': '^text/',
                'save': True
            },
            {
                'save': False
            },
        ],
        'proxy_rules': [],
        'robots_txt': {
            'usage': 'IGNORE',
        },
        'url_normalization': {
            'enabled': True,
            'strip_parameters': [],
        },
        'url_rules': [
            {
                'action': 'ADD',
                'amount': 1,
                'match': 'MATCHES',
                'pattern': '^https?://({SEED_DOMAINS})/'
            },
            {
                'action': 'MULTIPLY',
                'amount': 0
            },
        ],
        'user_agents': [{
            'name': 'Test User Agent'
        }]
    }
    return Policy(policy_doc, '1.0.0', ['https://login.example'])