def test_dataset_card(dataset_name): card_path = repo_path / "datasets" / dataset_name / "README.md" assert card_path.exists() error_messages = [] try: readme = ReadMe.from_readme(card_path) except Exception as readme_parsing_error: error_messages.append( f"The following issues have been found in the dataset cards:\nREADME Parsing:\n{readme_parsing_error}" ) try: readme = ReadMe.from_readme(card_path, suppress_parsing_errors=True) readme.validate() except Exception as readme_validation_error: error_messages.append( f"The following issues have been found in the dataset cards:\nREADME Validation:\n{readme_validation_error}" ) try: metadata = DatasetMetadata.from_readme(card_path) metadata.validate() except Exception as metadata_error: error_messages.append( f"The following issues have been found in the dataset cards:\nYAML tags:\n{metadata_error}" ) if error_messages: raise ValueError("\n".join(error_messages))
def test_readme_from_readme_suppress_parsing_errors(readme_md): with tempfile.TemporaryDirectory() as tmp_dir: path = Path(tmp_dir) / "README.md" with open(path, "w+") as readme_file: readme_file.write(readme_md) ReadMe.from_readme(path, example_yaml_structure, suppress_parsing_errors=True)
def test_readme_from_readme_parsing_errors(readme_md, expected_error): with tempfile.TemporaryDirectory() as tmp_dir: path = Path(tmp_dir) / "README.md" with open(path, "w+") as readme_file: readme_file.write(readme_md) expected_error = expected_error.format(path=path) with pytest.raises(ValueError, match=re.escape(expected_error)): ReadMe.from_readme(path, example_yaml_structure)
def test_dataset_card(dataset_name): card_path = repo_path / "datasets" / dataset_name / "README.md" assert card_path.exists() error_messages = [] try: ReadMe.from_readme(card_path) except Exception as readme_error: error_messages.append( f"The following issues have been found in the dataset cards:\nREADME:\n{readme_error}" ) try: DatasetMetadata.from_readme(card_path) except Exception as metadata_error: error_messages.append( f"The following issues have been found in the dataset cards:\nYAML tags:\n{metadata_error}" ) if error_messages: raise ValueError("\n".join(error_messages))
def test_readme_from_readme_correct(readme_md, expected_dict): with tempfile.TemporaryDirectory() as tmp_dir: path = Path(tmp_dir) / "README.md" with open(path, "w+") as readme_file: readme_file.write(readme_md) out = ReadMe.from_readme(path, example_yaml_structure).to_dict() assert out["name"] == path assert out["text"] == "" assert out["is_empty_text"] assert out["subsections"] == expected_dict["subsections"]
if args.check_all: readmes = [ dd / "README.md" for dd in (repo_path / "datasets").iterdir() ] else: changed_files = get_changed_files(repo_path) readmes = [ f for f in changed_files if f.exists() and f.name.lower() == "readme.md" and f.parent.parent.name == "datasets" ] failed: List[Path] = [] for readme in sorted(readmes): try: ReadMe.from_readme(readme) logging.debug(f"✅️ Validated '{readme.relative_to(repo_path)}'") except ValueError as e: failed.append(readme) logging.warning( f"❌ Validation failed for '{readme.relative_to(repo_path)}':\n{e}" ) except Exception as e: failed.append(readme) logging.warning( f"⁉️ Something unexpected happened on '{readme.relative_to(repo_path)}':\n{e}" ) if len(failed) > 0: logging.info(f"❌ Failed on {len(failed)} files.") exit(1)
def test_readme_from_string_suppress_parsing_errors(readme_md): ReadMe.from_string(readme_md, example_yaml_structure, suppress_parsing_errors=True)
def test_readme_from_string_parsing_errors(readme_md, expected_error): with pytest.raises(ValueError, match=re.escape(expected_error.format(path="root"))): ReadMe.from_string(readme_md, example_yaml_structure)
def test_readme_from_string_correct(readme_md, expected_dict): assert ReadMe.from_string( readme_md, example_yaml_structure).to_dict() == expected_dict