def test_forge_reset_query(): f = forge.Forge() # Term will return results f.match_field("elements", "Al") f.reset_query() # Specifying no query will return no results assert f.search() == []
def test_forge_match_resource_types(): f1 = forge.Forge() # Test one type f1.match_resource_types("record") res1 = f1.search(limit=10) assert check_field(res1, "mdf.resource_type", "record") == 0 # Test two types f2 = forge.Forge() f2.match_resource_types(["collection", "dataset"]) res2 = f2.search() assert check_field(res2, "mdf.resource_type", "record") == -1 #TODO: Re-enable this assert after we get collections in MDF # assert check_field(res2, "mdf.resource_type", "dataset") == 2 # Test zero types f3 = forge.Forge() assert f3.match_resource_types("") == f3
def test_forge_exclude_field(): f = forge.Forge(index="mdf") # Basic usage f.exclude_field("mdf.elements", "Al") f.match_field("mdf.source_name", "core_mof") res1 = f.search() assert check_field(res1, "mdf.elements", "Al") == -1
def test_forge_aggregate_source(): # Test limit f1 = forge.Forge() res1 = f1.aggregate_source("amcs") assert type(res1) is list assert len(res1) > 10000 assert type(res1[0]) is dict
def test_forge_match_tags(): # Get one tag f = forge.Forge(index="mdf") res0 = f.search("mdf.source_name:trinkle_elastic_fe_bcc", advanced=True, limit=1) tags1 = res0[0]["mdf"]["tags"][0] # One tag res1 = f.match_tags(tags1).search() assert check_field(res1, "mdf.tags", tags1) == 2 tags2 = "\"ab initio\"" f.match_tags(tags2) res2 = f.search() # Elasticsearch splits ["ab-initio"] into ["ab", "initio"] assert check_field(res2, "mdf.tags", "ab-initio") == 2 # Multiple tags tags3 = ["\"density functional theory calculations\"", "\"X-ray\""] res3 = f.match_tags(tags3, match_all=True).search() # "source_name": "ge_nanoparticles", # "tags": [ "amorphization","density functional theory calculations","Ge nanoparticles", # "high pressure","phase transformation","Raman","X-ray absorption","zip" ] assert check_field(res3, "mdf.tags", "Raman") == 1 assert check_field(res3, "mdf.tags", "X-ray absorption") == 1 assert check_field(res3, "mdf.tags", "density functional theory calculations") == 1 # No tag assert f.match_tags("") == f
def test_forge_aggregate_source(): # Test limit f = forge.Forge(index="mdf") res1 = f.aggregate_source("amcs") assert isinstance(res1, list) assert len(res1) > 10000 assert isinstance(res1[0], dict)
def test_forge_reset_query(): f = forge.Forge() # Term will return results f.match_term("data") f.reset_query() # Specifying no query will return no results assert f.search() == []
def test_forge_search_by_tags(): f1 = forge.Forge() tags1 = "DFT" res1 = f1.search_by_tags(tags1) assert check_field(res1, "mdf.tags", "DFT") == 2 f2 = forge.Forge() tags2 = ["\"Density Functional Theory\"", "\"X-ray\""] res2 = f2.search_by_tags(tags2, match_all=True) f3 = forge.Forge() tags3 = ["\"Density Functional Theory\"", "\"X-ray\""] res3 = f3.search_by_tags(tags3, match_all=False) # res2 is a subset of res3 assert len(res3) > len(res2) assert all([r in res3 for r in res2])
def test_forge_match_elements(): f1 = forge.Forge() # One element f1.match_elements("Al") res1 = f1.search() assert res1 != [] check_val1 = check_field(res1, "mdf.elements", "Al") assert check_val1 == 0 or check_val1 == 1 # Multi-element f2 = forge.Forge() f2.match_elements(["Al", "Cu"]) res2 = f2.search() assert check_field(res2, "mdf.elements", "Al") == 1 assert check_field(res2, "mdf.elements", "Cu") == 1 # No elements f3 = forge.Forge() assert f3.match_elements("") == f3
def test_forge_chaining(): f = forge.Forge(index="mdf") f.match_field("source_name", "cip") f.match_field("elements", "Al") res1 = f.search() res2 = f.match_field("source_name", "cip").match_field("elements", "Al").search() assert all([r in res2 for r in res1]) and all([r in res1 for r in res2])
def test_forge_exclude_range(): # Single-value use f1 = forge.Forge() f1.exclude_range("mdf.elements", "Am", "*") f1.exclude_range("mdf.elements", "*", "Ak") res1, info1 = f1.search(info=True) assert (check_field(res1, "mdf.elements", "Al") == 0 or check_field(res1, "mdf.elements", "Al") == 2) f2 = forge.Forge() res2, info2 = f2.search("mdf.elements:Al", advanced=True, info=True) assert info1["total_query_matches"] <= info2["total_query_matches"] # Non-matching use, test inclusive f3 = forge.Forge() f3.exclude_range("mdf.elements", "Am", "*") f3.exclude_range("mdf.elements", "*", "Ak") f3.exclude_range("mdf.elements", "Al", "Al", inclusive=False) res3, info3 = f3.search(info=True) assert info1["total_query_matches"] == info3["total_query_matches"]
def test_forge_match_sources(): f1 = forge.Forge() # One source f1.match_sources("nist_janaf") res1 = f1.search() assert res1 != [] assert check_field(res1, "mdf.source_name", "nist_janaf") == 0 # Multi-source f2 = forge.Forge() f2.match_sources(["nist_janaf", "hopv"]) res2 = f2.search() # res1 is a subset of res2 assert len(res2) > len(res1) assert all([r1 in res2 for r1 in res1]) assert check_field(res2, "mdf.source_name", "nist_janaf") == 2 # No source f3 = forge.Forge() assert f3.match_sources("") == f3
def test_forge_search_by_titles(): f = forge.Forge(index="mdf") titles1 = ["\"AMCS - Tungsten\""] res1 = f.search_by_titles(titles1) assert check_field(res1, "mdf.title", "AMCS - Tungsten") == 0 titles2 = ["Tungsten"] res2 = f.search_by_titles(titles2) assert check_field(res2, "mdf.title", "AMCS - Tungsten") == 2
def test_forge_match_titles(): # One title f1 = forge.Forge() titles1 = '"OQMD - Na1Y2Zr1"' res1 = f1.match_titles(titles1).search() assert res1 != [] assert check_field(res1, "mdf.title", "OQMD - Na1Y2Zr1") == 0 # Multiple titles f2 = forge.Forge() titles2 = ['"AMCS - Tungsten"', '"Cytochrome QSAR"'] res2 = f2.match_titles(titles2).search() assert res2 != [] assert check_field(res2, "mdf.title", "Cytochrome QSAR - C13F2N6O") == 2 # No titles f3 = forge.Forge() assert f3.match_titles("") == f3
def test_forge_http_return(): f = forge.Forge() # Simple case res1 = f.http_return(example_result1) assert isinstance(res1, list) assert res1 == ["This is a test document for Forge testing. Please do not remove.\n"] # With multiple files res2 = f.http_return(example_result2) assert isinstance(res2, list) assert res2 == ["This is a test document for Forge testing. Please do not remove.\n", "This is a second test document for Forge testing. Please do not remove.\n"]
def test_forge_aggregate(): # Test that aggregate uses the current query properly # And returns results # And respects the reset_query arg f1 = forge.Forge() f1.match_field("mdf.source_name", "nist_xps_db") res1 = f1.aggregate(reset_query=False) assert len(res1) > 10000 res2 = f1.aggregate() assert all([r in res2 for r in res1]) and all([r in res1 for r in res2])
def test_forge_http_stream(): f = forge.Forge() # Simple case res1 = f.http_stream(example_result1) assert isinstance(res1, types.GeneratorType) assert res1.__next__() == "This is a test document for Forge testing. Please do not remove.\n" # With multiple files res2 = f.http_stream(example_result2) assert isinstance(res2, types.GeneratorType) assert res2.__next__() == "This is a test document for Forge testing. Please do not remove.\n" assert res2.__next__() == "This is a second test document for Forge testing. Please do not remove.\n"
def test_forge_fetch_datasets_from_results(): # Get some results f = forge.Forge(index="mdf") # Record from OQMD res01 = f.search("mdf.source_name:oqmd AND mdf.resource_type:record", advanced=True, limit=1) # Record from OQMD with info res02 = f.search("mdf.source_name:oqmd AND mdf.resource_type:record", advanced=True, limit=1, info=True) # Records from JANAF res03 = f.search("mdf.source_name:nist_janaf AND mdf.resource_type:record", advanced=True, limit=2) # Dataset for NIST XPS DB res04 = f.search( "mdf.source_name:nist_xps_db AND mdf.resource_type:dataset", advanced=True) # Get the correct dataset entries oqmd = f.search("mdf.source_name:oqmd AND mdf.resource_type:dataset", advanced=True)[0] nist_janaf = f.search( "mdf.source_name:nist_janaf AND mdf.resource_type:dataset", advanced=True)[0] # Fetch single dataset res1 = f.fetch_datasets_from_results(res01[0]) assert res1[0] == oqmd # Fetch dataset with results + info res2 = f.fetch_datasets_from_results(res02) assert res2[0] == oqmd # Fetch multiple datasets rtemp = res01 + res03 res3 = f.fetch_datasets_from_results(rtemp) assert len(res3) == 2 assert oqmd in res3 assert nist_janaf in res3 # Fetch dataset from dataset res4 = f.fetch_datasets_from_results(res04) assert res4 == res04 # Fetch entries from current query f.match_sources("nist_xps_db") assert f.fetch_datasets_from_results() == res04 # Fetch nothing unknown_entry = {"mdf": {"resource_type": "unknown"}} assert f.fetch_datasets_from_results(unknown_entry) == []
def test_forge_match_range(): # Single-value use f1 = forge.Forge() f1.match_range("mdf.elements", "Al", "Al") res1, info1 = f1.search(info=True) assert check_field(res1, "mdf.elements", "Al") == 1 f2 = forge.Forge() res2, info2 = f2.search("mdf.elements:Al", advanced=True, info=True) assert info1["total_query_matches"] == info2["total_query_matches"] # Non-matching use, test inclusive f3 = forge.Forge() f3.match_range("mdf.elements", "Al", "Al", inclusive=False) assert f3.search() == [] # Actual range f4 = forge.Forge() f4.match_range("mdf.elements", "Al", "Cu") res4, info4 = f4.search(info=True) assert info1["total_query_matches"] < info4["total_query_matches"] assert (check_field(res4, "mdf.elements", "Al") >= 0 or check_field(res4, "mdf.elements", "Cu") >= 0)
def test_forge_exclusive_match(): f = forge.Forge(index="mdf") f.exclusive_match("mdf.elements", "Al") res1 = f.search() assert check_field(res1, "mdf.elements", "Al") == 0 f.exclusive_match("mdf.elements", ["Al", "Cu"]) res2 = f.search() assert check_field(res2, "mdf.elements", "Al") == 1 assert check_field(res2, "mdf.elements", "Cu") == 1 assert check_field(res2, "mdf.elements", "Cp") == -1 assert check_field(res2, "mdf.elements", "Fe") == -1
def test_forge_aggregate(): # Test that aggregate uses the current query properly # And returns results # And respects the reset_query arg f = forge.Forge(index="mdf") f.match_field("mdf.source_name", "nist_xps_db") res1 = f.aggregate(reset_query=False, index="mdf") assert len(res1) > 10000 assert check_field(res1, "mdf.source_name", "nist_xps_db") == 0 res2 = f.aggregate() assert len(res2) == len(res1) assert check_field(res2, "mdf.source_name", "nist_xps_db") == 0
def test_forge_search_by_elements(): f = forge.Forge(index="mdf") elements = ["Cu", "Al"] sources = ["oqmd", "nist_xps_db"] res1, info1 = f.match_sources(sources).match_elements(elements).search( limit=10000, info=True) res2, info2 = f.search_by_elements(elements, sources, limit=10000, info=True) assert all([r in res2 for r in res1]) and all([r in res1 for r in res2]) assert check_field(res1, "mdf.elements", "Al") == 1 assert check_field(res1, "mdf.source_name", "oqmd") == 2
def test_forge_match_field(): f = forge.Forge(index="mdf") # Basic usage f.match_field("mdf.source_name", "nist_janaf") res1 = f.search() assert check_field(res1, "mdf.source_name", "nist_janaf") == 0 # Check that query clears assert f.search() == [] # Also checking check_field f.match_field("mdf.elements", "Al") res2 = f.search() assert check_field(res2, "mdf.elements", "Al") == 1
def test_forge_match_ids(): # Get a couple IDs f0 = forge.Forge() res0 = f0.search("mdf.source_name:nist_janaf", advanced=True, limit=2) id1 = res0[0]["mdf"]["mdf_id"] id2 = res0[1]["mdf"]["mdf_id"] f1 = forge.Forge() # One ID f1.match_ids(id1) res1 = f1.search() assert res1 != [] assert check_field(res1, "mdf.mdf_id", id1) == 0 # Multi-ID f2 = forge.Forge() f2.match_ids([id1, id2]) res2 = f2.search() # res1 is a subset of res2 assert len(res2) > len(res1) assert all([r1 in res2 for r1 in res1]) assert check_field(res2, "mdf.mdf_id", id2) == 2 # No id f3 = forge.Forge() assert f3.match_ids("") == f3
def test_forge_search(capsys): # Error on no query f1 = forge.Forge() assert f1.search() == [] out, err = capsys.readouterr() assert "Error: No query specified" in out # Return info if requested f2 = forge.Forge() res2 = f2.search(q="Al", info=False) assert type(res2) is list assert type(res2[0]) is dict f3 = forge.Forge() res3 = f3.search(q="Al", info=True) assert type(res3) is tuple assert type(res3[0]) is list assert type(res3[0][0]) is dict assert type(res3[1]) is dict # Check limit f4 = forge.Forge() res4 = f4.search("oqmd", limit=3) assert len(res4) == 3
def test_forge_match_elements(): f = forge.Forge(index="mdf") # One element f.match_elements("Al") res1 = f.search() assert res1 != [] check_val1 = check_field(res1, "mdf.elements", "Al") assert check_val1 == 0 or check_val1 == 1 # Multi-element f.match_elements(["Al", "Cu"]) res2 = f.search() assert check_field(res2, "mdf.elements", "Al") == 1 assert check_field(res2, "mdf.elements", "Cu") == 1 # No elements assert f.match_elements("") == f
def test_forge_globus_download(): f = forge.Forge() # Simple case res1 = f.globus_download(example_result1) assert os.path.exists("./test_fetch.txt") os.remove("./test_fetch.txt") # With dest and preserve_dir dest_path = os.path.expanduser("~/mdf") f.globus_download(example_result1, dest=dest_path, preserve_dir=True) assert os.path.exists(os.path.join(dest_path, "test", "test_fetch.txt")) os.remove(os.path.join(dest_path, "test", "test_fetch.txt")) os.rmdir(os.path.join(dest_path, "test")) # With multiple files f.globus_download(example_result2, dest=dest_path) assert os.path.exists(os.path.join(dest_path, "test_fetch.txt")) assert os.path.exists(os.path.join(dest_path, "test_multifetch.txt")) os.remove(os.path.join(dest_path, "test_fetch.txt")) os.remove(os.path.join(dest_path, "test_multifetch.txt"))
def test_forge_http_download(capsys): f = forge.Forge(index="mdf") # Simple case f.http_download(example_result1) assert os.path.exists("./test_fetch.txt") # Test conflicting filenames f.http_download(example_result1) assert os.path.exists("./test_fetch(1).txt") f.http_download(example_result1) assert os.path.exists("./test_fetch(2).txt") os.remove("./test_fetch.txt") os.remove("./test_fetch(1).txt") os.remove("./test_fetch(2).txt") # With dest and preserve_dir, and tuple of results dest_path = os.path.expanduser("~/mdf") f.http_download(([example_result1], { "info": None }), dest=dest_path, preserve_dir=True) assert os.path.exists(os.path.join(dest_path, "test", "test_fetch.txt")) os.remove(os.path.join(dest_path, "test", "test_fetch.txt")) os.rmdir(os.path.join(dest_path, "test")) # With multiple files f.http_download(example_result2, dest=dest_path) assert os.path.exists(os.path.join(dest_path, "test_fetch.txt")) assert os.path.exists(os.path.join(dest_path, "test_multifetch.txt")) os.remove(os.path.join(dest_path, "test_fetch.txt")) os.remove(os.path.join(dest_path, "test_multifetch.txt")) # Too many files assert f.http_download(list(range(10001)))["success"] is False # "Missing" files f.http_download(example_result_missing) out, err = capsys.readouterr() assert not os.path.exists("./missing.txt") assert ("Error 404 when attempting to access " "'https://data.materialsdatafacility.org/test/missing.txt'") in out
def test_forge_exclude_range(): # Single-value use f = forge.Forge(index="mdf") f.exclude_range("mdf.elements", "Am", "*") f.exclude_range("mdf.elements", "*", "Ak") res1, info1 = f.search(info=True) assert (check_field(res1, "mdf.elements", "Al") == 0 or check_field(res1, "mdf.elements", "Al") == 2) res2, info2 = f.search("mdf.elements:Al", advanced=True, info=True) assert info1["total_query_matches"] <= info2["total_query_matches"] # Non-matching use, test inclusive f.exclude_range("mdf.elements", "Am", "*") f.exclude_range("mdf.elements", "*", "Ak") f.exclude_range("mdf.elements", "Al", "Al", inclusive=False) res3, info3 = f.search(info=True) assert info1["total_query_matches"] == info3["total_query_matches"] # Nothing to match assert f.exclude_range("field", start=None, stop=None) == f
def test_forge_match_years(capsys): # One year of data/results f = forge.Forge(index="mdf") res1 = f.match_years("2015").search() assert res1 != [] assert check_field(res1, "mdf.year", 2015) == 0 # Multiple years res2 = f.match_years(years=["2015", 2011]).search() assert check_field(res2, "mdf.year", 2011) == 2 # Wrong input f.match_years(["20x5"]).search() out, err = capsys.readouterr() assert "Invalid year: '20x5'" in out f.match_years(start="20x5").search() out, err = capsys.readouterr() assert "Invalid start year: '20x5'" in out f.match_years(stop="20x5").search() out, err = capsys.readouterr() assert "Invalid stop year: '20x5'" in out assert f.match_years() == f # Test range res4 = f.match_years(start=2015, stop=2015, inclusive=True).search() assert check_field(res4, "mdf.year", 2015) == 0 res5 = f.match_years(start=2014, stop=2017, inclusive=False).search() assert check_field(res5, "mdf.year", 2013) == -1 assert check_field(res5, "mdf.year", 2014) == -1 assert check_field(res5, "mdf.year", 2015) == 2 assert check_field(res5, "mdf.year", 2016) == 2 assert check_field(res5, "mdf.year", 2017) == -1 assert f.match_years(start=2015, stop=2015, inclusive=False).search() == []