def get_tsv_errors(tsv_path, schema_name, optional_fields=[], offline=None, encoding=None): ''' Validate the TSV. ''' logging.info(f'Validating {schema_name} TSV...') if not Path(tsv_path).exists(): return 'File does not exist' try: rows = dict_reader_wrapper(tsv_path, encoding=encoding) except IsADirectoryError: return 'Expected a TSV, but found a directory' except UnicodeDecodeError as e: return get_context_of_decode_error(e) if not rows: return 'File has no data rows.' version = rows[0]['version'] if 'version' in rows[0] else '0' try: others = [ p.stem.split('-v')[0] for p in (Path(__file__).parent / 'table-schemas/others').iterdir() ] if schema_name in others: schema = get_other_schema(schema_name, version, offline=offline) else: schema = get_table_schema(schema_name, version, offline=offline, optional_fields=optional_fields) except OSError as e: return {e.strerror: Path(e.filename).name} return get_table_errors(tsv_path, schema)
def main(): parser = argparse.ArgumentParser() parser.add_argument('type', help='What type to generate') parser.add_argument('target', type=dir_path, help='Directory to write output to') args = parser.parse_args() schema_versions = dict_schema_versions() versions = sorted(schema_versions[args.type]) assert versions, f'No versions for {args.type}' max_version = max(versions) is_assay = get_is_assay(args.type) if is_assay: table_schemas = {v: get_table_schema(args.type, v) for v in versions} directory_schema = get_directory_schema(args.type) else: table_schemas = {v: get_other_schema(args.type, v) for v in versions} directory_schema = {} # README.md: with open(Path(args.target) / 'README.md', 'w') as f: url = f'https://hubmapconsortium.github.io/ingest-validation-tools/{args.type}/' f.write(f'Moved to [github pages]({url}).') # index.md: with open(Path(args.target) / 'index.md', 'w') as f: f.write( generate_readme_md(table_schemas, directory_schema, args.type, is_assay=is_assay)) # YAML: for v in versions: schema = table_schemas[v] first_field = schema['fields'][0] if first_field['name'] == 'version': assert first_field['constraints']['enum'] == [v], \ f'Wrong version constraint in {args.type}-v{v}.yaml' assert schema['fields'][0] with open(Path(args.target) / f'v{v}.yaml', 'w') as f: f.write('# Generated YAML: PRs should not start here!\n' + dump_yaml(schema)) # Data entry templates: with open( Path(args.target) / get_tsv_name(args.type, is_assay=is_assay), 'w') as f: max_schema = table_schemas[max_version] f.write(generate_template_tsv(max_schema)) create_xlsx(max_schema, Path(args.target) / get_xlsx_name(args.type, is_assay=is_assay), idempotent=True, sheet_name='Export as TSV')
def main(): parser = argparse.ArgumentParser() parser.add_argument('type', help='What type to generate') parser.add_argument('target', type=dir_path, help='Directory to write output to') args = parser.parse_args() schema_versions = dict_schema_versions() versions = sorted(schema_versions[args.type]) max_version = max(versions) is_assay = get_is_assay(args.type) if is_assay: table_schemas = {v: get_table_schema(args.type, v) for v in versions} directory_schema = get_directory_schema(args.type) else: table_schemas = {v: get_other_schema(args.type, v) for v in versions} directory_schema = {} # README: with open(Path(args.target) / 'README.md', 'w') as f: f.write( generate_readme_md(table_schemas, directory_schema, args.type, is_assay=is_assay)) # YAML: for v in versions: with open(Path(args.target) / f'v{v}.yaml', 'w') as f: f.write('# Generated YAML: PRs should not start here!\n' + dump_yaml(table_schemas[v])) # Data entry templates: with open( Path(args.target) / get_tsv_name(args.type, is_assay=is_assay), 'w') as f: max_schema = table_schemas[max_version] f.write(generate_template_tsv(max_schema)) create_xlsx(max_schema, Path(args.target) / get_xlsx_name(args.type, is_assay=is_assay), idempotent=True, sheet_name='Export as TSV')