示例#1
0
def test_import_empty_tsv(config, database, caplog):
    """ a TSV but no data """
    caplog.set_level(logging.DEBUG)
    prj_id = create_project(ADMIN_USER_ID, "Test LS 3")

    params = ImportReq(source_path=str(EMPTY_TSV_DIR))
    with FileImport(prj_id, params) as sce:
        rsp: ImportRsp = sce.run(ADMIN_USER_ID)
    job = wait_for_stable(rsp.job_id)
    check_job_errors(job)
    assert len(get_job_errors(job)) == 1
示例#2
0
    def test_import_classif_issue(config, database, caplog):
        """ The TSV contains an unknown classification id """
        caplog.set_level(logging.DEBUG)
        prj_id = create_project(ADMIN_USER_ID, "Test LS 5")

        params = ImportReq(source_path=str(ISSUES_DIR2))
        with FileImport(prj_id, params) as sce:
            rsp: ImportRsp = sce.run(ADMIN_USER_ID)
        job = wait_for_stable(rsp.job_id)
        check_job_errors(job)
        errors = get_job_errors(job)
        assert errors == [
            "Some specified classif_id don't exist, correct them prior to reload: 99999999"
        ]
示例#3
0
def test_import_breaking_unicity(config, database, caplog):
    """
        Sample orig_id is unique per project
        Acquisition orig_id is unique per project and belongs to a single Sample
        Process orig_id is unique per acquisition (structurally as it's 1<->1 relationship)
        So, if:
            S("a") -> A("b") -> P ("c")
        Then:
            S("a2") -> A("b") is illegal
        Message should be like 'Acquisition 'b' already belongs to sample 'a' so it cannot be created under 'a2'
    """
    caplog.set_level(logging.DEBUG)
    srch = search_unique_project(ADMIN_USER_ID, "Test Create Update")
    prj_id = srch.projid  # <- need the project from first test
    # Do preparation
    params = ImportReq(source_path=str(BREAKING_HIERARCHY_DIR))

    with FileImport(prj_id, params) as sce:
        rsp: ImportRsp = sce.run(ADMIN_USER_ID)
    job = wait_for_stable(rsp.job_id)
    errors = check_job_errors(job)
    assert errors == [
        "Acquisition 'generic_m106_mn01_n1_sml' is already associated with sample "
        "'{'m106_mn01_n1_sml'}', it cannot be associated as well with "
        "'m106_mn01_n1_sml_brk"
    ]
示例#4
0
def test_import_again_not_skipping_nor_imgs(config, database, caplog):
    """ Re-import into same project, not skipping TSVs or images
        CANNOT RUN BY ITSELF """
    caplog.set_level(logging.DEBUG)
    srch = search_unique_project(ADMIN_USER_ID, "Test Create Update")
    prj_id = srch.projid  # <- need the project from first test
    params = ImportReq(source_path=str(PLAIN_DIR))
    with FileImport(prj_id, params) as sce:
        rsp: ImportRsp = sce.run(ADMIN_USER_ID)
    job = wait_for_stable(rsp.job_id)
    check_job_errors(job)
    nb_errs = len([
        an_err for an_err in get_job_errors(job)
        if "Duplicate object" in an_err
    ])
    assert nb_errs == 11
示例#5
0
def test_import_issues(config, database, caplog):
    """ The TSV contains loads of problems """
    caplog.set_level(logging.DEBUG)
    prj_id = create_project(ADMIN_USER_ID, "Test LS 4")

    params = ImportReq(source_path=str(ISSUES_DIR))
    with FileImport(prj_id, params) as sce:
        rsp: ImportRsp = sce.run(ADMIN_USER_ID)
    job = wait_for_stable(rsp.job_id)
    check_job_errors(job)
    errors = get_job_errors(job)
    assert errors == [
        "Invalid Header 'nounderscorecol' in file ecotaxa_m106_mn01_n3_sml.tsv. Format must be Table_Field. Field ignored",
        "Invalid Header 'unknown_target' in file ecotaxa_m106_mn01_n3_sml.tsv. Unknown table prefix. Field ignored",
        "Invalid Type '[H]' for Field 'object_wrongtype' in file ecotaxa_m106_mn01_n3_sml.tsv. Incorrect Type. Field ignored",
        "Invalid float value 'a' for Field 'object_buggy_float' in file ecotaxa_m106_mn01_n3_sml.tsv.",
        "Invalid Lat. value '100' for Field 'object_lat' in file ecotaxa_m106_mn01_n3_sml.tsv. Incorrect range -90/+90°.",
        "Invalid Long. value '200' for Field 'object_lon' in file ecotaxa_m106_mn01_n3_sml.tsv. Incorrect range -180/+180°.",
        "Invalid Date value '20140433' for Field 'object_date' in file ecotaxa_m106_mn01_n3_sml.tsv.",
        "Invalid Time value '9920' for Field 'object_time' in file ecotaxa_m106_mn01_n3_sml.tsv.",
        "Invalid Annotation Status 'predit' for Field 'object_annotation_status' in file ecotaxa_m106_mn01_n3_sml.tsv.",
        "Missing Image 'm106_mn01_n3_sml_1081.jpg2' in file ecotaxa_m106_mn01_n3_sml.tsv. ",
        "Error while reading image 'm106_mn01_n3_sml_corrupted_image.jpg' "
        "from file ecotaxa_m106_mn01_n3_sml.tsv: cannot identify image file '.../m106_mn01_n3_sml_corrupted_image.jpg' <class 'PIL.UnidentifiedImageError'>",
        "Missing object_id in line '5' of file ecotaxa_m106_mn01_n3_sml.tsv. ",
        "Missing Image 'nada.png' in file ecotaxa_m106_mn01_n3_sml.tsv. "
    ]

    # @pytest.mark.skip()

    def test_import_classif_issue(config, database, caplog):
        """ The TSV contains an unknown classification id """
        caplog.set_level(logging.DEBUG)
        prj_id = create_project(ADMIN_USER_ID, "Test LS 5")

        params = ImportReq(source_path=str(ISSUES_DIR2))
        with FileImport(prj_id, params) as sce:
            rsp: ImportRsp = sce.run(ADMIN_USER_ID)
        job = wait_for_stable(rsp.job_id)
        check_job_errors(job)
        errors = get_job_errors(job)
        assert errors == [
            "Some specified classif_id don't exist, correct them prior to reload: 99999999"
        ]
示例#6
0
def test_import_again_irrelevant_skipping(config, database, caplog):
    """ Re-import similar files into same project
        CANNOT RUN BY ITSELF """
    caplog.set_level(logging.DEBUG)
    srch = search_unique_project(ADMIN_USER_ID, "Test Create Update")
    prj_id = srch.projid  # <- need the project from first test
    # Do preparation
    params = ImportReq(source_path=str(EMPTY_TSV_IN_UPD_DIR),
                       skip_loaded_files=True,
                       skip_existing_objects=True)
    with FileImport(prj_id, params) as sce:
        rsp: ImportRsp = sce.run(ADMIN_USER_ID)
    job = wait_for_stable(rsp.job_id)
    check_job_errors(job)
    errs = get_job_errors(job)
    found_err = False
    for an_err in errs:
        if "new TSV file(s) are not compliant" in an_err:
            found_err = True
    assert found_err
示例#7
0
def test_import_too_many_custom_columns(config, database, caplog):
    """ The TSV contains too many custom columns.
        Not a realistic case, but it simulates what happens if importing into a project with
         mappings """
    caplog.set_level(logging.DEBUG)
    prj_id = create_project(ADMIN_USER_ID, "Test LS 6")

    params = ImportReq(source_path=str(ISSUES_DIR3))
    with FileImport(prj_id, params) as sce:
        rsp: ImportRsp = sce.run(ADMIN_USER_ID)
    job = wait_for_stable(rsp.job_id)
    check_job_errors(job)
    errors = get_job_errors(job)
    assert errors == [
        'Field acq_cus29, in file ecotaxa_m106_mn01_n3_sml.tsv, cannot be mapped. Too '
        'many custom fields, or bad type.',
        'Field acq_cus30, in file ecotaxa_m106_mn01_n3_sml.tsv, cannot be mapped. Too '
        'many custom fields, or bad type.',
        'Field acq_cus31, in file ecotaxa_m106_mn01_n3_sml.tsv, cannot be mapped. Too '
        'many custom fields, or bad type.'
    ]
示例#8
0
def test_import_sparse(config, database, caplog):
    """
        Import a sparse file, some columns are missing.
    """
    caplog.set_level(logging.DEBUG)
    prj_id = create_project(ADMIN_USER_ID, "Test Sparse")

    params = ImportReq(source_path=str(SPARSE_DIR))
    with FileImport(prj_id, params) as sce:
        rsp: ImportRsp = sce.run(ADMIN_USER_ID)
    job = wait_for_stable(rsp.job_id)
    errors = check_job_errors(job)
    assert errors == \
           [
               "In ecotaxa_20160719B-163000ish-HealyVPR08-2016_d200_h18_roi.tsv, field acq_id is mandatory as there are some acq columns: ['acq_hardware', 'acq_imgtype', 'acq_instrument'].",
               "In ecotaxa_20160719B-163000ish-HealyVPR08-2016_d200_h18_roi.tsv, field sample_id is mandatory as there are some sample columns: ['sample_program', 'sample_ship', 'sample_stationid']."
           ]
    print("\n".join(caplog.messages))
    with AsciiDumper() as sce:
        sce.run(projid=prj_id, out="chk.dmp")