def get_latest_available_date(model, test_corpus, bucket=EMMAA_BUCKET_NAME): if not test_corpus: logger.error('Test corpus is missing, cannot find latest date') return model_date = last_updated_date(model, 'model_stats', extension='.json', bucket=bucket) test_date = last_updated_date(model, 'test_stats', tests=test_corpus, extension='.json', bucket=bucket) if model_date == test_date: logger.info(f'Latest available date for {model} model and ' f'{test_corpus} is {model_date}.') return model_date min_date = min(model_date, test_date) if is_available(model, test_corpus, min_date, bucket=bucket): logger.info(f'Latest available date for {model} model and ' f'{test_corpus} is {min_date}.') return min_date min_date_obj = datetime.strptime(min_date, "%Y-%m-%d") for day_count in range(1, 30): earlier_date = min_date_obj - timedelta(days=day_count) if is_available(model, test_corpus, earlier_date, bucket=bucket): logger.info(f'Latest available date for {model} model and ' f'{test_corpus} is {earlier_date}.') return earlier_date logger.info(f'Could not find latest available date for {model} model ' f'and {test_corpus}.')
def test_run_model_tests_from_s3(): # Local imports are recommended when using moto from emmaa.model_tests import run_model_tests_from_s3, ModelManager from emmaa.model import last_updated_date client = setup_bucket(add_tests=True, add_mm=True) # There should not be any results assert not last_updated_date('test', 'test_results', tests='simple_tests', extension='.json', bucket=TEST_BUCKET_NAME) mm = run_model_tests_from_s3('test', 'simple_tests', upload_results=True, bucket=TEST_BUCKET_NAME) assert isinstance(mm, ModelManager) # Results are saved now assert last_updated_date('test', 'test_results', tests='simple_tests', extension='.json', bucket=TEST_BUCKET_NAME)
def test_last_updated(): # Local imports are recommended when using moto from emmaa.model import last_updated_date client = setup_bucket(add_model=True, add_results=True, add_model_stats=True, add_test_stats=True) # Test for different file types key_str = last_updated_date('test', 'model', 'datetime', extension='.pkl', bucket=TEST_BUCKET_NAME) assert key_str assert re.search(RE_DATETIMEFORMAT, key_str).group() key_str = last_updated_date('test', 'test_results', 'datetime', 'simple_tests', extension='.json', bucket=TEST_BUCKET_NAME) assert key_str assert re.search(RE_DATETIMEFORMAT, key_str).group() key_str = last_updated_date('test', 'test_stats', 'datetime', 'simple_tests', extension='.json', bucket=TEST_BUCKET_NAME) assert key_str assert re.search(RE_DATETIMEFORMAT, key_str).group() key_str = last_updated_date('test', 'model_stats', 'datetime', extension='.json', bucket=TEST_BUCKET_NAME) assert key_str assert re.search(RE_DATETIMEFORMAT, key_str).group() # Test for different date format key_str = last_updated_date('test', 'model', 'date', extension='.pkl', bucket=TEST_BUCKET_NAME) assert key_str assert re.search(RE_DATEFORMAT, key_str).group() # Test with wrong extension key_str = last_updated_date('test', 'test_stats', 'datetime', 'simple_tests', extension='.pkl', bucket=TEST_BUCKET_NAME) assert not key_str
def get_model_tests_page(model): model_type = request.args.get('model_type') test_hash = request.args.get('test_hash') test_corpus = request.args.get('test_corpus') if not test_corpus: abort(Response('Test corpus has to be provided', 404)) date = request.args.get('date') if model_type not in ALL_MODEL_TYPES: abort(Response(f'Model type {model_type} does not exist', 404)) test_stats, file_key = get_model_stats(model, 'test', tests=test_corpus, date=date) if not test_stats: abort(Response(f'Data for {model} for {date} was not found', 404)) try: current_test = \ test_stats['test_round_summary']['all_test_results'][test_hash] except KeyError: abort(Response(f'Result for this test does not exist for {date}', 404)) current_model_types = [ mt for mt in ALL_MODEL_TYPES if mt in test_stats['test_round_summary'] ] test = current_test["test"] test_status, path_list = current_test[model_type] correct, incorrect = _label_curations() if isinstance(path_list, list): for path in path_list: for edge in path['edge_list']: for stmt in edge['stmts']: cur = '' url = stmt[0] if 'stmt_hash' in url: stmt_hashes = parse.parse_qs( parse.urlparse(url).query)['stmt_hash'] cur = _set_curation(stmt_hashes, correct, incorrect) stmt.append(cur) latest_date = get_latest_available_date(model, test_corpus) prefix = f'stats/{model}/test_stats_{test_corpus}_' cur_ix = find_index_of_s3_file(file_key, EMMAA_BUCKET_NAME, prefix) if test_hash in test_stats['tests_delta']['applied_hashes_delta']['added']: prev_date = None elif (cur_ix + 1) < find_number_of_files_on_s3(EMMAA_BUCKET_NAME, prefix, '.json'): prev_date = last_updated_date(model, 'test_stats', 'date', tests=test_corpus, extension='.json', n=(cur_ix + 1), bucket=EMMAA_BUCKET_NAME) else: prev_date = None if cur_ix > 0: next_date = last_updated_date(model, 'test_stats', 'date', tests=test_corpus, extension='.json', n=(cur_ix - 1), bucket=EMMAA_BUCKET_NAME) else: next_date = None return render_template('tests_template.html', link_list=link_list, model=model, model_type=model_type, all_model_types=current_model_types, test_hash=test_hash, test=test, test_status=test_status, path_list=path_list, formatted_names=FORMATTED_TYPE_NAMES, date=date, latest_date=latest_date, prev=prev_date, next=next_date)