def main(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("-s", "--source-dir", required=True, help="Source directory, as downloaded and produced by " "`download-from-drive` script.") parser.add_argument( "-v", "--version", required=True, choices=ALL_VERSIONS, help=f"Version of the spreadsheets to use (e.g. '{CURRENT_VERSION}').") args = parser.parse_args(sys.argv[1:]) if not os.path.isdir(args.source_dir): parser.error(f"No such directory '{args.source_dir}'") version_dir = os.path.join(args.source_dir, args.version) sh = SpreadsheetHandler(version_dir) cvs_dir = os.path.join(version_dir, "AMF_CVs") pyessv_dir = os.path.join(version_dir, "amf-pyessv-vocabs") for dr in (cvs_dir, pyessv_dir): if not os.path.isdir(dr): os.makedirs(dr) sh.write_cvs(cvs_dir, write_pyessv=True, pyessv_root=pyessv_dir)
def main(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( "spreadsheets_dir", help="Directory containing spreadsheet data, as produced by " "download_from_drive.py") parser.add_argument("output_dir", help="Directory to write output JSON CVs to") # Note: default dir is not actually set in this code -- if not given # just use pyessv's default. Will need to update the help text if this # ever changes... parser.add_argument( "--pyessv-dir", default=None, dest="pyessv_root", help= "Directory to write pyessv CVs to [default: ~/.esdoc/pyessv-archive/]") args = parser.parse_args(sys.argv[1:]) if not os.path.isdir(args.spreadsheets_dir): parser.error("No such directory '{}'".format(args.spreadsheets_dir)) for dirname in (args.output_dir, args.pyessv_root): if dirname and not os.path.isdir(dirname): os.mkdir(dirname) sh = SpreadsheetHandler(args.spreadsheets_dir) sh.write_cvs(args.output_dir, write_pyessv=True, pyessv_root=args.pyessv_root)
def test_basic(self, spreadsheets_dir, tmpdir): # variables s_dir = spreadsheets_dir prod = s_dir.join("product-definitions") var = (prod.mkdir("tsv").mkdir("my-great-product").join( "variables-specific.tsv")) var.write("\n".join( ("Variable\tAttribute\tValue", "wind_speed\t\t", "\tname\twind_speed", "\ttype\tfloat32", "eastward_wind\t\t", "\tname\teastward_wind", "\tunits\tm s-1"))) # dimensions prod2 = s_dir.mkdir("other-cool-product") dim = prod2.join("Dimensions - Specific.tsv") dim = (prod.mkdir("other-cool-product").mkdir( "other-cool-product.xlsx").join("Dimensions - Specific.tsv")) dim.write("\n".join( ("Name\tLength\tunits", "layer_index\t<i>\t1", "other\t42\tm"))) output = tmpdir.mkdir("cvs") sh = SpreadsheetHandler(str(s_dir)) sh.write_cvs(str(output)) var_cv = output.join("AMF_product_my-great-product_variable.json") dim_cv = output.join("AMF_product_other-cool-product_dimension.json") assert var_cv.check() assert dim_cv.check() decoded = [] for f in (var_cv, dim_cv): try: decoded.append(json.load(f)) except json.decoder.JSONDecodeError: assert False, "{} is invalid JSON".format(str(f)) # check variables - variable CV assert decoded[0] == { "product_my-great-product_variable": { "wind_speed": { "type": "float32" }, "eastward_wind": { "units": "m s-1" } } } # check dimensions CV assert decoded[1] == { "product_other-cool-product_dimension": { "layer_index": { "length": "<i>", "units": "1" }, "other": { "length": "42", "units": "m" } } }
def test_pyessv_cvs_are_generated(self, spreadsheets_dir, tmpdir): # Create spreadsheets to generate some CVs s_dir = spreadsheets_dir # Create scientists CV, to test that @ are allowed in namespaces sci_tsv = s_dir.join("Vocabularies.xlsx").join("Creators.tsv") sci_tsv.write("\n".join(( "name\temail\torcid\tconfirmed", "Bob Smith\[email protected]\thttps://orcid.org/123\tyes", "Jane Smith\[email protected]\thttps://orcid.org/999\tyes", ))) # Create products CV, since this is a list rather dict like other CVs prod_tsv = s_dir.join("Vocabularies.xlsx").join("Data Products.tsv") prod_tsv.write("\n".join( ("Data Product", "snr-winds", "aerosol-backscatter"))) # Write JSON CVs and pyessv CVs sh = SpreadsheetHandler(str(s_dir)) json_cvs_output = tmpdir.mkdir("json_cvs") pyessv_cvs_output = tmpdir.mkdir("pyessv_cvs") sh.write_cvs(str(json_cvs_output), write_pyessv=True, pyessv_root=str(pyessv_cvs_output)) root = pyessv_cvs_output.join("ncas") assert root.join("MANIFEST").check() assert root.join("amf").check() # Check the contents of some CVs bob_term = root.join("amf").join("scientist").join("*****@*****.**") assert bob_term.check() bob_term_decoded = json.load(bob_term) assert "data" in bob_term_decoded assert bob_term_decoded["data"] == { "primary_email": "*****@*****.**", "previous_emails": [], "name": "Bob Smith", "orcid": "https://orcid.org/123" } jane_term = root.join("amf").join("scientist").join("*****@*****.**") assert jane_term.check() jane_term_decoded = json.load(jane_term) assert "data" in jane_term_decoded assert jane_term_decoded["data"] == { "primary_email": "*****@*****.**", "previous_emails": [], "name": "Jane Smith", "orcid": "https://orcid.org/999" } product_term = root.join("amf").join("product").join("snr-winds") assert product_term.check()
def test_duplicate_instrument_id(self, spreadsheets_dir, tmpdir): """ Check that if there are two instruments with the same ID, a warning is printed and one of them is overwritten """ s_dir = spreadsheets_dir instr = s_dir.join("Vocabularies.xlsx").join( "Instrument Name & Descriptors.tsv") instr.write("\n".join( ("Old Instrument Name\tNew Instrument Name\tDescriptor", "old1\tmyinstr\tFirst instrument", "old2\tmyinstr\tSecond instrument"))) output = tmpdir.mkdir("cvs") stderr = StringIO() sh = SpreadsheetHandler(str(s_dir)) sys.stderr = stderr sh.write_cvs(str(output)) sys.stderr = sys.__stderr__ instr_output = output.join("AMF_instrument.json") assert instr_output.check() assert json.load(instr_output) == { "instrument": { "myinstr": { "instrument_id": "myinstr", "previous_instrument_ids": ["old1"], "description": "First instrument" } } } stderr_contents = stderr.getvalue().lower() assert "duplicate instrument name" in stderr_contents # Normal case: warning not shown instr.write("\n".join( ("Old Instrument Name\tNew Instrument Name\tDescriptor", "old1\tmyinstr1\tFirst instrument", "old2\tmyinstr2\tSecond instrument"))) stderr = StringIO() sh = SpreadsheetHandler(str(s_dir)) sys.stderr = stderr sh.write_cvs(str(output)) sys.stderr = sys.__stderr__ stderr_contents = stderr.getvalue().lower() assert "duplicate instrument name" not in stderr_contents
def test_product(self, spreadsheets_dir, tmpdir): s_dir = spreadsheets_dir prod = s_dir.join("Vocabularies.xlsx").join("Data Products.tsv") prod.write("\n".join( ("Data Product", "snr-winds", "aerosol-backscatter", "aerosol-extinction", "cloud-base", "o3-concentration-profiles"))) sh = SpreadsheetHandler(str(s_dir)) output = tmpdir.mkdir("cvs") sh.write_cvs(str(output)) prod_cv = output.join("AMF_product.json") assert prod_cv.check() assert json.load(prod_cv) == { "product": [ "snr-winds", "aerosol-backscatter", "aerosol-extinction", "cloud-base", "o3-concentration-profiles" ] }
def test_instruments(self, spreadsheets_dir, tmpdir): s_dir = spreadsheets_dir instr = s_dir.join("Vocabularies.xlsx").join( "Instrument Name & Descriptors.tsv") instr.write("\n".join(( # Include some missing old names, some multiple names, and # extraneous whitespace "Old Instrument Name\tNew Instrument Name\tDescriptor", "man-radar-1290mhz\tncas-radar-wind-profiler-1\tNCAS Mobile Radar Wind Profiler unit 1", "\tncas-ceilometer-4\t NCAS Lidar Ceilometer unit 4", "man-o3lidar\tncas-o3-lidar-1\tNCAS Mobile O3 lidar unit 1", "cv-met-tower, cv-met-webdaq\tncas-aws-7\tNCAS Automatic Weather Station unit 7" ))) sh = SpreadsheetHandler(str(s_dir)) output = tmpdir.mkdir("cvs") sh.write_cvs(str(output)) instr_cv = output.join("AMF_instrument.json") assert instr_cv.check() assert json.load(instr_cv) == { "instrument": { "ncas-radar-wind-profiler-1": { "instrument_id": "ncas-radar-wind-profiler-1", "previous_instrument_ids": ["man-radar-1290mhz"], "description": "NCAS Mobile Radar Wind Profiler unit 1" }, "ncas-ceilometer-4": { "instrument_id": "ncas-ceilometer-4", "previous_instrument_ids": [], "description": "NCAS Lidar Ceilometer unit 4" }, "ncas-o3-lidar-1": { "instrument_id": "ncas-o3-lidar-1", "previous_instrument_ids": ["man-o3lidar"], "description": "NCAS Mobile O3 lidar unit 1" }, "ncas-aws-7": { "instrument_id": "ncas-aws-7", "previous_instrument_ids": ["cv-met-tower", "cv-met-webdaq"], "description": "NCAS Automatic Weather Station unit 7" } } }
def get_var_inner_cv(self, s_dir, tsv): """ Create a TSV from the given list of lists of columns, and process it as a variable TSV file. Return the inner dictionary of the generated JSON CV """ prod_dir = (s_dir.join("Product Definition Spreadsheets").mkdir( "wind-speed").mkdir("wind-speed.xlsx")) var_sheet = prod_dir.join("Variables - Specific.tsv") var_sheet.write("\n".join(("\t".join(x for x in row)) for row in tsv)) output = s_dir.mkdir("../output") sh = SpreadsheetHandler(str(s_dir)) sh.write_cvs(str(output)) cv_file = output.join("AMF_product_wind-speed_variable.json") assert cv_file.check() obj = json.load(cv_file) assert "product_wind-speed_variable" in obj return obj["product_wind-speed_variable"]
def test_common(self, spreadsheets_dir, tmpdir): s_dir = spreadsheets_dir common_dir = s_dir.join("Common.xlsx") var_air = common_dir.join("Variables - Air.tsv") var_sea = common_dir.join("Variables - Sea.tsv") dim_land = common_dir.join("Dimensions - Land.tsv") var_air.write("\n".join( ("Variable\tAttribute\tValue", "some_air_variable\t\t", "\tthingy\tthis_thing", "\ttype\tfloat32"))) var_sea.write("\n".join( ("Variable\tAttribute\tValue", "some_sea_variable\t\t", "\tthingy\tthat_thing", "\ttype\tstring"))) dim_land.write("\n".join(("Name\tLength\tunits", "some_dim\t42\tm"))) sh = SpreadsheetHandler(str(s_dir)) cv_output = tmpdir.mkdir("cvs") yaml_output = tmpdir.mkdir("yaml") sh.write_cvs(str(cv_output)) sh.write_yaml(str(yaml_output)) # Check CV and YAML files exist var_air_output = cv_output.join("AMF_product_common_variable_air.json") assert var_air_output.check() assert cv_output.join("AMF_product_common_variable_sea.json").check() assert cv_output.join("AMF_product_common_dimension_land.json").check() assert yaml_output.join("AMF_product_common_variable_air.yml").check() assert yaml_output.join("AMF_product_common_variable_sea.yml").check() # Check the content of one of the CVs assert json.load(var_air_output) == { "product_common_variable_air": { "some_air_variable": { "thingy": "this_thing", "type": "float32" } } }
def test_platform(self, spreadsheets_dir, tmpdir): s_dir = spreadsheets_dir plat = s_dir.join("Vocabularies.xlsx").join("Platforms.tsv") plat.write("\n".join(("Platform ID\tPlatform Description", "wao\tweybourne atmospheric observatory", "cvao\tcape verde atmospheric observatory"))) output = tmpdir.mkdir("cvs") sh = SpreadsheetHandler(str(s_dir)) sh.write_cvs(str(output)) plat_output = output.join("AMF_platform.json") assert plat_output.check() assert json.load(plat_output) == { "platform": { "wao": { "platform_id": "wao", "description": "weybourne atmospheric observatory" }, "cvao": { "platform_id": "cvao", "description": "cape verde atmospheric observatory" } } }
def test_scientist(self, spreadsheets_dir, tmpdir): s_dir = spreadsheets_dir plat = s_dir.join("Vocabularies.xlsx").join("Creators.tsv") plat.write("\n".join(( "name\temail\torcid\tconfirmed", # With 'confirmed' column "Bob Smith\[email protected]\thttps://orcid.org/123\tyes", "Bob Smath\[email protected]\thttps://orcid.org/234\tno", # and without "Dave Jones\[email protected]\thttps://orcid.org/345", # Without orcid "Paul Jones\[email protected]\t\tyes", "Paul Janes\[email protected]\t", "Paul Junes\[email protected]"))) output = tmpdir.mkdir("cvs") sh = SpreadsheetHandler(str(s_dir)) sh.write_cvs(str(output)) sci_output = output.join("AMF_scientist.json") assert sci_output.check() print(json.dumps(json.load(sci_output), indent=4)) assert json.load(sci_output) == { "scientist": { "*****@*****.**": { "name": "Bob Smith", "primary_email": "*****@*****.**", "previous_emails": [], "orcid": "https://orcid.org/123" }, "*****@*****.**": { "name": "Bob Smath", "primary_email": "*****@*****.**", "previous_emails": [], "orcid": "https://orcid.org/234" }, "*****@*****.**": { "name": "Dave Jones", "primary_email": "*****@*****.**", "previous_emails": [], "orcid": "https://orcid.org/345" }, "*****@*****.**": { "name": "Paul Jones", "primary_email": "*****@*****.**", "previous_emails": [], "orcid": None }, "*****@*****.**": { "name": "Paul Janes", "primary_email": "*****@*****.**", "previous_emails": [], "orcid": None }, "*****@*****.**": { "name": "Paul Junes", "primary_email": "*****@*****.**", "previous_emails": [], "orcid": None } } }