def make_config(data_file):
     return Config({
         'output_dir': str(tmpdir),
         'base_dir': os.path.join(base_dir, "."),
         'data_file': os.path.join(base_dir, data_file),
         'config_dir': os.path.join(base_dir, "config")
     })
 def make_config(data_file):
     return Config({
         'output_dir':
         str(tmpdir),
         'base_dir':
         os.path.join(base_dir, "."),
         'data_file':
         os.path.join(base_dir, data_file),
         'config_dir':
         os.path.join(base_dir, "config"),
         'ontology':
         os.path.join(base_dir, 'config/ppo-reasoned-no-imports.owl')
     })
    def make_config(data_file):
        ns = Namespace(chunk_size=50000,
                       config_dir='test/config',
                       data_file=os.path.join(base_dir, data_file),
                       drop_invalid=True,
                       input_dir='test/data',
                       log_file=False,
                       num_processes=4,
                       ontology='test/test-ontology.owl',
                       output_dir='test/data',
                       preprocessor=None,
                       project='test',
                       project_base='projects',
                       reasoner_config=None,
                       split_data_column=None,
                       verbose=True)

        #Build the Config class
        return Config(**ns.__dict__)
Exemplo n.º 4
0
    def make_config(data_file):
        #ns = Namespace(chunk_size=50000, config_dir='test/config', data_file=os.path.join(base_dir, data_file), drop_invalid=True, input_dir='test/data/input', log_file=False, num_processes=4, ontology='https://raw.githubusercontent.com/PlantPhenoOntology/ppo/master/releases/2018-07-31/ppo.owl', output_dir='test/data/output', preprocessor=None, project='test', project_base='projects', reasoner_config=None, split_data_column=None, verbose=True)
        ns = Namespace(chunk_size=50000,
                       config_dir='test/config',
                       data_file=os.path.join(base_dir, data_file),
                       drop_invalid=True,
                       input_dir='test/data/input',
                       log_file=False,
                       num_processes=4,
                       ontology='test/test-ontology.owl',
                       output_dir='test/data/output',
                       preprocessor=None,
                       project='test',
                       project_base='projects',
                       reasoner_config=None,
                       split_data_column=None,
                       verbose=True)

        # Build the Config class
        return Config(**ns.__dict__)
def test_config(tmpdir):
    base_dir = os.path.dirname(__file__)

    # Mimic the argument constructor from process:
    ns = Namespace(chunk_size=50000,
                   config_dir='test/config',
                   data_file=None,
                   drop_invalid=True,
                   input_dir='test/data/input',
                   log_file=False,
                   num_processes=4,
                   ontology='test/test-ontology.owl',
                   output_dir='test/data/output',
                   preprocessor=None,
                   project='test',
                   project_base='projects',
                   reasoner_config=None,
                   split_data_column=None,
                   verbose=True)

    # Build the Config class
    config = Config(**ns.__dict__)

    # Test that attributes are accessible
    assert config.output_dir == 'test/data/output'

    # verify that none existent attribute returns None
    assert config.doesnt_exist is None

    # verify rules were parsed 5 rules
    assert len(config.rules) == 5
    # should split | delimited rule columns
    assert isinstance(config.rules[0]['columns'], list)
    # should assign default error level
    for rule in config.rules:
        assert rule['level'] in ["error", "warning"]
Exemplo n.º 6
0
def test_config(tmpdir):
    base_dir = os.path.dirname(__file__)
    config = Config(
        {
            'output_dir': str(tmpdir),
            'base_dir': os.path.join(base_dir, "."),
            'data_file': os.path.join(base_dir, "data/invalid_input.csv"),
            'config_dir': os.path.join(base_dir, "config")
        },
        kw=False)

    # verify that passed in args & kwargs are accessible as attributes
    assert config.kw == False
    assert config.output_dir == tmpdir

    # should setup some attributes
    # These two failing turning off for now
    assert config.invalid_data_file == tmpdir.join('invalid_data.csv')

    # verify that none existent attribute returns None
    assert config.doesnt_exist is None

    # verify rules were parsed 5 rules + 1 default
    assert len(config.rules) == 6
    # should split | delimited rule columns
    assert isinstance(config.rules[0]['columns'], list)
    # should assign default error level
    for rule in config.rules:
        assert rule['level'] in ["error", "warning"]

    # should parse phenophase_descriptions file
    descriptions = config.lists['phenophase_descriptions.csv']
    assert {
        'field': 'Reproductive',
        'defined_by': 'http://purl.obolibrary.org/obo/PPO_0002025'
    } in descriptions
    assert {
        'field': 'Flowering',
        'defined_by': 'http://purl.obolibrary.org/obo/PPO_0002035'
    } in descriptions
    assert {
        'field': 'Fruiting',
        'defined_by': 'http://purl.obolibrary.org/obo/PPO_0002045'
    } in descriptions

    # should be 3 valid phenophase_descriptions list items
    assert len(descriptions) == 3
    assert len(config.lists['phenophase_descriptions.csv']) == 3

    # should parse entities and perform label substitution
    assert {
        'alias':
        'plantStructurePresence',
        'concept_uri':
        'http://www.w3.org/1999/02/22-rdf-syntax-ns#type',
        'unique_key':
        'record_id',
        'identifier_root':
        'http://n2t.net/ark:/21547/Anl2',
        'columns': [('phenophase_name',
                     'http://www.w3.org/1999/02/22-rdf-syntax-ns#type')]
    } in config.entities
    assert {
        'alias':
        'phenologicalObservingProcess',
        'concept_uri':
        'http://purl.obolibrary.org/obo/BCO_0000003',
        'unique_key':
        'record_id',
        'identifier_root':
        'http://n2t.net/ark:/21547/Anm2',
        'columns':
        [('record_id', 'http://rs.tdwg.org/dwc/terms/EventID'),
         ('latitude', 'http://rs.tdwg.org/dwc/terms/decimalLatitude'),
         ('longitude', 'http://rs.tdwg.org/dwc/terms/decimalLongitude'),
         ('year', 'http://rs.tdwg.org/dwc/terms/year'),
         ('day_of_year', 'http://rs.tdwg.org/dwc/terms/startDayOfYear'),
         ('source', 'http://purl.org/dc/elements/1.1/creator')]
    } in config.entities

    assert len(config.entities) == 2

    # should parse relations and perform label substitution
    assert {
        'subject_entity_alias': 'plantStructurePresence',
        'predicate': 'http://purl.obolibrary.org/obo/OBI_0000295',
        'object_entity_alias': 'phenologicalObservingProcess'
    } in config.relations
Exemplo n.º 7
0
def test_config(tmpdir):
    base_dir = os.path.dirname(__file__)
    config = Config(
        {
            'output_dir': str(tmpdir),
            'base_dir': os.path.join(base_dir, "."),
            'data_file': os.path.join(base_dir, "data/invalid_input.csv"),
            'config_dir': os.path.join(base_dir, "config")
        },
        kw=False)

    # verify that passed in args & kwargs are accessible as attributes
    assert config.kw == False
    assert config.output_dir == tmpdir

    # should setup some attributes
    # These two failing turning off for now
    assert config.invalid_data_file == tmpdir.join('invalid_data.csv')

    # verify that none existent attribute returns None
    assert config.doesnt_exist is None

    print(len(config.rules))
    # verify rules were parsed 5 rules + 1 default
    assert len(config.rules) == -1  # TODO: confirm rule length
    # should split | delimited rule columns
    assert isinstance(config.rules[0]['columns'], list)
    # should assign default error level
    for rule in config.rules:
        assert rule['level'] in ["error", "warning"]

    # should parse entities and perform label substitution
    assert {
        'alias': 'vertebrateOrganism',
        'concept_uri': 'http://purl.obolibrary.org/obo/NCBITaxon_7742',
        'unique_key': 'occurence_id',
        'identifier_root': 'urn:vertOrganism',
        'columns': [('genus', 'http://rs.tdwg.org/dwc/terms/genus')]
    } in config.entities

    assert {
        'alias':
        'organismalTraitObsProc',
        'concept_uri':
        'http://purl.obolibrary.org/obo/OVT_0000002',
        'unique_key':
        'occurence_id',
        'identifier_root':
        'urn:traitObsProc',
        'columns':
        [('record_id', 'http://rs.tdwg.org/dwc/terms/EventID'),
         ('latitude', 'http://rs.tdwg.org/dwc/terms/decimalLatitude'),
         ('longitude', 'http://rs.tdwg.org/dwc/terms/decimalLongitude'),
         ('year', 'http://rs.tdwg.org/dwc/terms/year'),
         ('event_date', 'http://rs.tdwg.org/dwc/terms/even')]
    } in config.entities

    assert len(config.entities) == 2

    # should parse relations and perform label substitution
    assert {
        'subject_entity_alias': 'vertebrateOrganism',
        'predicate': 'http://purl.obolibrary.org/obo/OBI_0000295',
        'object_entity_alias': 'organismalTraitObsgProc'
    } in config.relations