def get_content_chunk(): """ Get metadata for 5 chunks on each request :return: List of metadata, empty list if no content chunks """ # Get the first number_of_chunks crawled chunk ids results = db_manager.get_first_n_crawled_chunks(number_of_chunks) print(results) temp_chunks = [] if len(results) != 0: for chunk in results: temp_dict = {} temp_dict['chunk_id'] = chunk['chunk_id'] temp_dict['host'] = chunk['c_host'] temp_chunks.append(temp_dict) # Insert to index builder relation and mark chunk as 'building' db_manager.operate_on_index_builder_relation( 'INSERT', chunk['chunk_id'], host=request.remote_addr, task='building') return jsonify(temp_chunks) else: return jsonify([])
def test_get_first_n_crawled_chunk_ids(): # Setup test env db_manager = DatabaseManager() db_manager.operate_on_chunk_relation('INSERT', chunk_id='101c') db_manager.operate_on_host_relation('INSERT', host='101.101.101.101:101', type='Test Server') db_manager.operate_on_crawler_relation('INSERT', chunk_id='101c', host='101.101.101.101:101', task='crawled') # Test result = db_manager.get_first_n_crawled_chunks(1) if len(result) == 1: print('> PASSED | get_first_n_crawled_chunk_ids()') else: print('> FAILED | get_first_n_crawled_chunk_ids()') # Clean up test env db_manager.operate_on_chunk_relation('DELETE', chunk_id='101c') db_manager.operate_on_host_relation('DELETE', host='101.101.101.101:101') db_manager.operate_on_crawler_relation('DELETE', chunk_id='101c')