def _assay_to_schema_name(assay_type, source_project): ''' Given an assay name, and a source_project (may be None), read all the schemas until one matches. Return the schema name, but not the version. >>> _assay_to_schema_name('PAS microscopy', None) 'stained' >>> _assay_to_schema_name('snRNAseq', None) 'scrnaseq' >>> _assay_to_schema_name('snRNAseq', 'HCA') 'scrnaseq-hca' Or, if a match can not be found (try-except just for shorter lines): >>> try: _assay_to_schema_name('PAS microscopy', 'HCA') ... except PreflightError as e: print(e) No schema where 'PAS microscopy' is assay_type and 'HCA' is source_project >>> try: _assay_to_schema_name('snRNAseq', 'Bad Project') ... except PreflightError as e: print(e) No schema where 'snRNAseq' is assay_type and 'Bad Project' is source_project >>> try: _assay_to_schema_name('Bad assay', None) ... except PreflightError as e: print(e) No schema where 'Bad assay' is assay_type >>> try: _assay_to_schema_name('Bad assay', 'HCA') ... except PreflightError as e: print(e) No schema where 'Bad assay' is assay_type and 'HCA' is source_project ''' for path in (Path(__file__).parent / 'table-schemas' / 'assays').glob('*.yaml'): schema = load_yaml(path) assay_type_enum = get_field_enum('assay_type', schema) source_project_enum = get_field_enum('source_project', schema) if assay_type not in assay_type_enum: continue if source_project_enum: if not source_project: continue if source_project: if not source_project_enum: continue if source_project not in source_project_enum: continue return re.match(r'.+(?=-v\d+)', path.stem)[0] message = f"No schema where '{assay_type}' is assay_type" if source_project is not None: message += f" and '{source_project}' is source_project" raise PreflightError(message)
def get_other_schema(schema_name, version, offline=None): schema = load_yaml(_table_schemas_path / 'others' / _get_schema_filename(schema_name, version)) names = [field['name'] for field in schema['fields']] for field in schema['fields']: _add_constraints(field, optional_fields=[], offline=offline, names=names) return schema
def _assay_to_schema_name(name): ''' Given an assay name, read all the schemas until one matches. Return the schema name, but not the version. ''' for path in (Path(__file__).parent / 'table-schemas' / 'assays').glob('*.yaml'): schema = load_yaml(path) for field in schema['fields']: if field['name'] == 'assay_type' and name in field['constraints']['enum']: return path.stem.split('-v')[0] raise PreflightError(f"Can't find schema where '{name}' is in the enum for assay_type")
def get_table_schema(schema_name, version, optional_fields=[], offline=None): schema = load_yaml(_table_schemas_path / 'assays' / f'{schema_name}-v{version}.yaml') for field in schema['fields']: _add_level_1_description(field) _validate_level_1_enum(field) _add_constraints(field, optional_fields, offline=offline) _validate_field(field) return schema
def get_directory_schema(directory_type): schema = load_yaml(_directory_schemas_path / f'{directory_type}.yaml') schema += [{ 'pattern': r'extras/.*', 'description': 'Free-form descriptive information supplied by the TMC', 'required': False }, { 'pattern': r'extras/thumbnail\.(png|jpg)', 'description': 'Optional thumbnail image which may be shown in search interface', 'required': False }] return schema
def get_table_schema(schema_name, version, optional_fields=[], offline=None): schema = load_yaml(_table_schemas_path / 'assays' / _get_schema_filename(schema_name, version)) names = [field['name'] for field in schema['fields']] for field in schema['fields']: _add_level_1_description(field) _validate_level_1_enum(field) _add_constraints(field, optional_fields, offline=offline, names=names) _validate_field(field) return schema
def get_other_schema(schema_name, version, offline=None): schema = load_yaml(_table_schemas_path / 'others' / f'{schema_name}-v{version}.yaml') for field in schema['fields']: _add_constraints(field, optional_fields=[], offline=offline) return schema
def _assay_to_schema_name(assay_type, source_project): ''' Given an assay name, and a source_project (may be None), read all the schemas until one matches. Return the schema name, but not the version. >>> _assay_to_schema_name('PAS microscopy', None) 'stained' >>> _assay_to_schema_name('snRNAseq', None) 'scrnaseq' >>> _assay_to_schema_name('snRNAseq', 'HCA') 'scrnaseq-hca' Or, if a match can not be found (try-except just for shorter lines): >>> try: _assay_to_schema_name('PAS microscopy', 'HCA') ... except PreflightError as e: print(e) No schema where 'PAS microscopy' is assay_type and 'HCA' is source_project >>> try: _assay_to_schema_name('snRNAseq', 'Bad Project') ... except PreflightError as e: print(e) No schema where 'snRNAseq' is assay_type and 'Bad Project' is source_project >>> try: _assay_to_schema_name('Bad assay', None) ... except PreflightError as e: print(e) No schema where 'Bad assay' is assay_type >>> try: _assay_to_schema_name('Bad assay', 'HCA') ... except PreflightError as e: print(e) No schema where 'Bad assay' is assay_type and 'HCA' is source_project ''' for path in (Path(__file__).parent / 'table-schemas' / 'assays').glob('*.yaml'): schema = load_yaml(path) assay_type_fields = [ f for f in schema['fields'] if f['name'] == 'assay_type' ] source_project_fields = [ f for f in schema['fields'] if f['name'] == 'source_project' ] # Because names are unique, these list should not contain more than one field: assert len(assay_type_fields) <= 1 assert len(source_project_fields) <= 1 if assay_type not in assay_type_fields[0]['constraints']['enum']: continue if source_project_fields: if not source_project: continue if source_project: if not source_project_fields: continue if source_project not in source_project_fields[0]['constraints'][ 'enum']: continue return re.match(r'.+(?=-v\d+)', path.stem)[0] message = f"No schema where '{assay_type}' is assay_type" if source_project is not None: message += f" and '{source_project}' is source_project" raise PreflightError(message)