def test_db_empty():
    helper.set_env({
        'SCRAPED_ADVERTS_TABLE': 'scraped',
        'FILTERED_ADVERTS_TABLE': 'filtered'
    })
    db = boto3.resource('dynamodb')
    helper.create_table(db, 'scraped', 'title_hash', 'S')
    filterted_table = helper.create_table(db, 'filtered', 'title_hash', 'S')
    lambda_handler.handle(None, None)
    filtered_res = filterted_table.scan()
    assert len(filtered_res['Items']) == 0
def test_db_not_empty():
    helper.set_env({
        'SCRAPED_ADVERTS_TABLE': 'scraped',
        'FILTERED_ADVERTS_TABLE': 'filtered'
    })
    db = boto3.resource('dynamodb')
    scraped_table = helper.create_table(db, 'scraped', 'title_hash', 'S')
    filterted_table = helper.create_table(db, 'filtered', 'title_hash', 'S')
    test_items = json.load(open('data_files/test_items.json', 'r'))
    for item in test_items['items']:
        item['processed'] = False
        scraped_table.put_item(Item=item)
    lambda_handler.handle(None, None)
    filtered_res = filterted_table.scan()
    assert len(filtered_res['Items']) > 0
示例#3
0
def test_db_cleaner_all_to_clear():
    helper.set_env({'SCRAPED_ADVERTS_TABLE': 'scraped'})
    db = boto3.resource('dynamodb')
    table = helper.create_table(db, 'scraped', 'title_hash', 'S')
    current_time = int(time.time()) - (16 * 24 * 60 * 60)
    populate_table(table, current_time, current_time)
    lambda_handler.handle(None, None)
    items = table.scan()
    assert len(items['Items']) == 0
示例#4
0
 def _set_output(self, data):
     import tables
     ## If already set, do nothing
     if self._output is not None:
         return
     self._output_format = Format.h5
     filters = data.filters
     node_path = data._v_pathname
     description = data.description
     self._output = tables.open_file(self._output_name,
                                     'w',
                                     filters=filters)
     self._payload = create_table(self._output, node_path, description)
def test_adverts_controller_get_valid_pages():
    helper.set_env({'FILTERED_ADVERTS_TABLE': 'filtered'})
    populate_table(
        helper.create_table(boto3.resource('dynamodb'), 'filtered',
                            'title_hash', 'S'))

    for i in range(0, 5):
        response = lambda_handler.handle(
            {'queryStringParameters': {
                'page': str(i)
            }}, None)
        assert response is not None
        response_items = json.loads(response['body'])
        test_response_items = json.load(
            open('data_files/filtered_adverts_page_{}.json'.format(i), 'r'))
        assert response_items == test_response_items
def test_adverts_get_non_existing_page():
    helper.set_env({'FILTERED_ADVERTS_TABLE': 'filtered'})
    populate_table(
        helper.create_table(boto3.resource('dynamodb'), 'filtered',
                            'title_hash', 'S'))
    response = lambda_handler.handle({'queryStringParameters': {
        'page': '-1'
    }}, None)
    assert response is not None
    assert response[
        'body'] == '{"items": [], "page": -1, "number_of_pages": 5, "count": 112, "page_count": 0}'
    response = lambda_handler.handle(
        {'queryStringParameters': {
            'page': '1000'
        }}, None)
    assert response is not None
    assert response[
        'body'] == '{"items": [], "page": 1000, "number_of_pages": 5, "count": 112, "page_count": 0}'
示例#7
0
# Go through each of the links and parse them
count = 1
tot = len(to_parse)
for url in to_parse:
    # Get the appropriate fields of the URL
    holders = url.split('/')
    COUNTRY = holders[6]
    holders.pop(7)

    # Create the temporary URL and base location
    temp_url = F1 + '/'.join(holders) + '/'
    base_loc = 'data/' + YEAR + '/' + COUNTRY + '/'

    print('\n=====', COUNTRY.upper(), '=====', str(count), 'out of', str(tot))
    # Get and write PRACTICE sessions
    write_file(create_table(temp_url + PRACTICE1),
               base_loc + PRACTICE1.replace('.html', '.txt'))
    write_file(create_table(temp_url + PRACTICE2),
               base_loc + PRACTICE2.replace('.html', '.txt'))
    write_file(create_table(temp_url + PRACTICE3),
               base_loc + PRACTICE3.replace('.html', '.txt'))
    # Get and write QUALIFYING data
    write_file(create_table(temp_url + QUALIFYING),
               base_loc + QUALIFYING.replace('.html', '.txt'))
    # Get and write RACE data
    write_file(create_table(temp_url + STARTING),
               base_loc + STARTING.replace('.html', '.txt'))
    write_file(create_table(temp_url + FASTEST),
               base_loc + FASTEST.replace('.html', '.txt'))
    write_file(create_table(temp_url + RACE),
               base_loc + RACE.replace('.html', '.txt'))