Exemplo n.º 1
0
def get_latest_available_date(model, test_corpus, bucket=EMMAA_BUCKET_NAME):
    if not test_corpus:
        logger.error('Test corpus is missing, cannot find latest date')
        return
    model_date = last_updated_date(model,
                                   'model_stats',
                                   extension='.json',
                                   bucket=bucket)
    test_date = last_updated_date(model,
                                  'test_stats',
                                  tests=test_corpus,
                                  extension='.json',
                                  bucket=bucket)
    if model_date == test_date:
        logger.info(f'Latest available date for {model} model and '
                    f'{test_corpus} is {model_date}.')
        return model_date
    min_date = min(model_date, test_date)
    if is_available(model, test_corpus, min_date, bucket=bucket):
        logger.info(f'Latest available date for {model} model and '
                    f'{test_corpus} is {min_date}.')
        return min_date
    min_date_obj = datetime.strptime(min_date, "%Y-%m-%d")
    for day_count in range(1, 30):
        earlier_date = min_date_obj - timedelta(days=day_count)
        if is_available(model, test_corpus, earlier_date, bucket=bucket):
            logger.info(f'Latest available date for {model} model and '
                        f'{test_corpus} is {earlier_date}.')
            return earlier_date
    logger.info(f'Could not find latest available date for {model} model '
                f'and {test_corpus}.')
Exemplo n.º 2
0
def test_run_model_tests_from_s3():
    # Local imports are recommended when using moto
    from emmaa.model_tests import run_model_tests_from_s3, ModelManager
    from emmaa.model import last_updated_date
    client = setup_bucket(add_tests=True, add_mm=True)
    # There should not be any results
    assert not last_updated_date('test',
                                 'test_results',
                                 tests='simple_tests',
                                 extension='.json',
                                 bucket=TEST_BUCKET_NAME)
    mm = run_model_tests_from_s3('test',
                                 'simple_tests',
                                 upload_results=True,
                                 bucket=TEST_BUCKET_NAME)
    assert isinstance(mm, ModelManager)
    # Results are saved now
    assert last_updated_date('test',
                             'test_results',
                             tests='simple_tests',
                             extension='.json',
                             bucket=TEST_BUCKET_NAME)
Exemplo n.º 3
0
def test_last_updated():
    # Local imports are recommended when using moto
    from emmaa.model import last_updated_date
    client = setup_bucket(add_model=True,
                          add_results=True,
                          add_model_stats=True,
                          add_test_stats=True)
    # Test for different file types
    key_str = last_updated_date('test',
                                'model',
                                'datetime',
                                extension='.pkl',
                                bucket=TEST_BUCKET_NAME)
    assert key_str
    assert re.search(RE_DATETIMEFORMAT, key_str).group()
    key_str = last_updated_date('test',
                                'test_results',
                                'datetime',
                                'simple_tests',
                                extension='.json',
                                bucket=TEST_BUCKET_NAME)
    assert key_str
    assert re.search(RE_DATETIMEFORMAT, key_str).group()
    key_str = last_updated_date('test',
                                'test_stats',
                                'datetime',
                                'simple_tests',
                                extension='.json',
                                bucket=TEST_BUCKET_NAME)
    assert key_str
    assert re.search(RE_DATETIMEFORMAT, key_str).group()
    key_str = last_updated_date('test',
                                'model_stats',
                                'datetime',
                                extension='.json',
                                bucket=TEST_BUCKET_NAME)
    assert key_str
    assert re.search(RE_DATETIMEFORMAT, key_str).group()
    # Test for different date format
    key_str = last_updated_date('test',
                                'model',
                                'date',
                                extension='.pkl',
                                bucket=TEST_BUCKET_NAME)
    assert key_str
    assert re.search(RE_DATEFORMAT, key_str).group()
    # Test with wrong extension
    key_str = last_updated_date('test',
                                'test_stats',
                                'datetime',
                                'simple_tests',
                                extension='.pkl',
                                bucket=TEST_BUCKET_NAME)
    assert not key_str
Exemplo n.º 4
0
def get_model_tests_page(model):
    model_type = request.args.get('model_type')
    test_hash = request.args.get('test_hash')
    test_corpus = request.args.get('test_corpus')
    if not test_corpus:
        abort(Response('Test corpus has to be provided', 404))
    date = request.args.get('date')
    if model_type not in ALL_MODEL_TYPES:
        abort(Response(f'Model type {model_type} does not exist', 404))
    test_stats, file_key = get_model_stats(model,
                                           'test',
                                           tests=test_corpus,
                                           date=date)
    if not test_stats:
        abort(Response(f'Data for {model} for {date} was not found', 404))
    try:
        current_test = \
            test_stats['test_round_summary']['all_test_results'][test_hash]
    except KeyError:
        abort(Response(f'Result for this test does not exist for {date}', 404))
    current_model_types = [
        mt for mt in ALL_MODEL_TYPES if mt in test_stats['test_round_summary']
    ]
    test = current_test["test"]
    test_status, path_list = current_test[model_type]
    correct, incorrect = _label_curations()
    if isinstance(path_list, list):
        for path in path_list:
            for edge in path['edge_list']:
                for stmt in edge['stmts']:
                    cur = ''
                    url = stmt[0]
                    if 'stmt_hash' in url:
                        stmt_hashes = parse.parse_qs(
                            parse.urlparse(url).query)['stmt_hash']
                        cur = _set_curation(stmt_hashes, correct, incorrect)
                    stmt.append(cur)
    latest_date = get_latest_available_date(model, test_corpus)
    prefix = f'stats/{model}/test_stats_{test_corpus}_'
    cur_ix = find_index_of_s3_file(file_key, EMMAA_BUCKET_NAME, prefix)
    if test_hash in test_stats['tests_delta']['applied_hashes_delta']['added']:
        prev_date = None
    elif (cur_ix + 1) < find_number_of_files_on_s3(EMMAA_BUCKET_NAME, prefix,
                                                   '.json'):
        prev_date = last_updated_date(model,
                                      'test_stats',
                                      'date',
                                      tests=test_corpus,
                                      extension='.json',
                                      n=(cur_ix + 1),
                                      bucket=EMMAA_BUCKET_NAME)
    else:
        prev_date = None
    if cur_ix > 0:
        next_date = last_updated_date(model,
                                      'test_stats',
                                      'date',
                                      tests=test_corpus,
                                      extension='.json',
                                      n=(cur_ix - 1),
                                      bucket=EMMAA_BUCKET_NAME)
    else:
        next_date = None
    return render_template('tests_template.html',
                           link_list=link_list,
                           model=model,
                           model_type=model_type,
                           all_model_types=current_model_types,
                           test_hash=test_hash,
                           test=test,
                           test_status=test_status,
                           path_list=path_list,
                           formatted_names=FORMATTED_TYPE_NAMES,
                           date=date,
                           latest_date=latest_date,
                           prev=prev_date,
                           next=next_date)