def _get_derived_website_content_data(request_data: dict, site_config: SiteConfig, website_pk: str) -> dict: """Derives values that should be added to the request data if a WebsiteContent object is being created""" added_data = {} if "text_id" not in request_data: added_data["text_id"] = uuid_string() content_type = request_data.get("type") config_item = (site_config.find_item_by_name( name=content_type) if content_type is not None else None) is_page_content = False if site_config and config_item is not None: is_page_content = site_config.is_page_content(config_item) added_data["is_page_content"] = is_page_content dirpath = request_data.get("dirpath") if dirpath is None and config_item is not None and is_page_content: dirpath = config_item.file_target added_data["dirpath"] = dirpath slug_key = config_item.item.get( "slug") if config_item is not None else None if not slug_key: slug_key = "title" slug = (added_data.get(slug_key) or request_data.get(slug_key) or request_data.get("metadata", {}).get(slug_key)) if slug is not None: added_data["filename"] = get_valid_new_filename( website_pk=website_pk, dirpath=dirpath, filename_base=slugify(get_valid_base_filename(slug, content_type)), ) return added_data
def test_data_file_deserialize(serializer_cls, file_content): """ JsonFileSerializer and YamlFileSerializer.deserialize should create the expected content object from some data file contents """ website = WebsiteFactory.create() site_config = SiteConfig(website.starter.config) file_config_item = next( config_item for config_item in site_config.iter_items() if "file" in config_item.item ) filepath = file_config_item.item["file"] website_content = serializer_cls(site_config).deserialize( website=website, filepath=filepath, file_contents=file_content, ) assert website_content.title == "Content Title" assert website_content.type == file_config_item.item["name"] assert website_content.text_id == file_config_item.item["name"] assert website_content.is_page_content is False assert website_content.metadata == { "tags": ["Design"], "description": "**This** is the description", }
def handle(self, *args, **options): filter_str = options["filter"].lower() starter_str = options["starter"] source_str = options["source"] type_str = options["type"] content_qset = WebsiteContent.objects.filter( website__starter__slug=starter_str, type=type_str) if filter_str: content_qset = content_qset.filter( Q(website__name__startswith=filter_str) | Q(website__short_id__startswith=filter_str)) if source_str: content_qset = content_qset.filter(website__source=source_str) self.stdout.write( f"Update {type_str} metadata for websites based on starter {starter_str}, source={source_str}" ) base_metadata = SiteConfig( WebsiteStarter.objects.get( slug=starter_str).config).generate_item_metadata( type_str, cls=WebsiteContent) with transaction.atomic(): for content in content_qset.iterator(): if set(base_metadata.keys()).symmetric_difference( set(content.metadata.keys())): content.metadata = {**base_metadata, **content.metadata} content.save() self.stdout.write( f"Done Updating {type_str} metadata for websites based on starter {starter_str}, source {source_str}" )
def test_generate_item_metadata(parsed_site_config, cls, resource_type, file_type, with_kwargs): """generate_item_metadata should return the expected dict""" class_data = {} if cls else {"title": "", "file": ""} expected_data = { "description": "", "resourcetype": (resource_type or "") if with_kwargs else "", "file_type": (file_type or "") if with_kwargs else "", "learning_resource_types": [], "license": "", "image_metadata": { "image-alt": "", "caption": "", "credit": "" }, "video_metadata": { "youtube_id": "", "video_speakers": "", "video_tags": "" }, "video_files": { "video_thumbnail_file": "", "video_captions_file": "", "video_transcript_file": "", }, **class_data, } site_config = SiteConfig(parsed_site_config) kwargs = ({ "resourcetype": resource_type, "file_type": file_type } if with_kwargs else {}) assert (site_config.generate_item_metadata("resource", cls, **kwargs) == expected_data)
def apply_rule(data): faulty_path_tuples = {} site_config = SiteConfig(data) for _, config_item in enumerate(site_config.iter_items()): non_menu_fields, menu_fields = partition_to_lists( config_item.fields, predicate=lambda field: field["widget"] == CONTENT_MENU_FIELD, ) if not menu_fields: continue if non_menu_fields: faulty_path_tuples[config_item.name] = ( config_item.path, ", ".join([field["widget"] for field in non_menu_fields]), ) if faulty_path_tuples: return [ "Config with 'menu' fields must not have any fields with other widget types.\n{}".format( "\n".join( [ f"{' ' * 8}'{name}' ({path_fields_tuple[0]}) – widgets: {path_fields_tuple[1]}" for name, path_fields_tuple in faulty_path_tuples.items() ] ), ) ] return []
def test_find_config_item_by_filepath(basic_site_config): """SiteConfig.find_item_by_filepath should return a config item if one is found with the given filepath""" site_config = SiteConfig(basic_site_config) all_config_items = list(site_config.iter_items()) assert (site_config.find_item_by_filepath("data/metadata.json") == all_config_items[3]) assert site_config.find_item_by_filepath("bad/path") is None
def test_find_config_item_name_singleton(basic_site_config): """SiteConfig.find_item_by_name should return a singleton config item if one is found with the given name""" site_config = SiteConfig(basic_site_config) config_item = next(item for item in site_config.iter_items() if item.is_file_item()) assert config_item is not None assert site_config.find_item_by_name(config_item.name) == config_item assert site_config.find_item_by_name("other-name-123") is None
def get_content_context(self, instance): # pylint:disable=too-many-branches """ Create mapping of uuid to a display name for any values in the metadata """ if not self.context or not self.context.get("content_context"): return None lookup = defaultdict(list) # website name -> list of text_id metadata = instance.metadata or {} site_config = SiteConfig(instance.website.starter.config) for field in site_config.iter_fields(): # pylint:disable=too-many-nested-blocks widget = field.field.get("widget") if widget in ("relation", "menu"): try: if field.parent_field is None: value = metadata.get(field.field["name"]) else: value = metadata.get(field.parent_field["name"], {}).get(field.field["name"]) if widget == "relation": content = value["content"] website_name = value["website"] if isinstance(content, str): content = [content] if (isinstance(content, list) and len(content) > 0 and isinstance(content[0], list)): # this is the data from a 'global' relation widget, # which is a list of [content_uuid, website_name] # tuples for [content_uuid, website_name] in content: lookup[website_name].extend([content_uuid]) else: lookup[website_name].extend(content) elif widget == "menu": website_name = instance.website.name lookup[website_name].extend([ item["identifier"] for item in value if not item["identifier"].startswith( constants.EXTERNAL_IDENTIFIER_PREFIX) ]) except (AttributeError, KeyError, TypeError): # Either missing or malformed relation field value continue contents = [] for website_name, text_ids in lookup.items(): contents.extend( WebsiteContent.objects.filter(website__name=website_name, text_id__in=text_ids)) return WebsiteContentDetailSerializer(contents, many=True, context={ "content_context": False }).data
def test_find_file_field(basic_site_config, content_type, field_name): """The expected file field should be returned if any""" site_config = SiteConfig(basic_site_config) config_item = next( (item for item in site_config.iter_items() if item.name == content_type), None) file_field = site_config.find_file_field(config_item) if field_name: assert file_field["name"] == "image" else: assert file_field is None
def test_is_page_content(basic_site_config, content_dir, folder_file_target, exp_result): """ SiteConfig.is_page_content should return True if the folder target of the repeatable config item starts with the content directory in the site config (or a default value) """ site_config = SiteConfig(basic_site_config) site_config.raw_data[WEBSITE_CONFIG_CONTENT_DIR_KEY] = content_dir config_item = next(item for item in site_config.iter_items() if item.is_folder_item()) config_item.item["folder"] = folder_file_target assert site_config.is_page_content(config_item) is exp_result
def create_gdrive_resource_content(drive_file: DriveFile): """Create a WebsiteContent resource from a Google Drive file""" try: resource_type = get_resource_type(drive_file.s3_key) resource = drive_file.resource if not resource: site_config = SiteConfig(drive_file.website.starter.config) config_item = site_config.find_item_by_name( name=CONTENT_TYPE_RESOURCE) dirpath = config_item.file_target if config_item else None basename, _ = os.path.splitext(drive_file.name) filename = get_valid_new_filename( website_pk=drive_file.website.pk, dirpath=dirpath, filename_base=slugify( get_valid_base_filename(basename, CONTENT_TYPE_RESOURCE)), ) resource_type_fields = { field: resource_type for field in settings.RESOURCE_TYPE_FIELDS } resource = WebsiteContent.objects.create( website=drive_file.website, title=drive_file.name, file=drive_file.s3_key, type=CONTENT_TYPE_RESOURCE, is_page_content=True, dirpath=dirpath, filename=filename, metadata={ **SiteConfig(drive_file.website.starter.config).generate_item_metadata( CONTENT_TYPE_RESOURCE, cls=WebsiteContent, file_type=drive_file.mime_type, **resource_type_fields, ) }, ) else: resource.file = drive_file.s3_key resource.save() drive_file.resource = resource drive_file.update_status(DriveFileStatus.COMPLETE) except: # pylint:disable=bare-except log.exception("Error creating resource for drive file %s", drive_file.file_id) drive_file.sync_error = ( f"Could not create a resource from google drive file {drive_file.name}" ) drive_file.update_status(DriveFileStatus.FAILED)
def test_get_destination_url(is_page_content, dirpath, filename, expected): """get_destination_url should create a url for a piece of content""" content = WebsiteContentFactory.create(is_page_content=is_page_content, dirpath=dirpath, filename=filename) assert (get_destination_url( content, SiteConfig(content.website.starter.config)) == expected)
def test_get_destination_filepath_errors(mocker, has_missing_name, is_bad_config_item): """ get_destination_filepath should log an error and return None if the site config is missing the given name, or if the config item does not have a properly configured destination. """ patched_log = mocker.patch("content_sync.utils.log") # From basic-site-config.yml config_item_name = "blog" if is_bad_config_item: mocker.patch.object( SiteConfig, "find_item_by_name", return_value=ConfigItem(item={ "name": config_item_name, "poorly": "configured" }), ) starter = WebsiteStarterFactory.build() content = WebsiteContentFactory.build( is_page_content=False, type="non-existent-config-name" if has_missing_name else config_item_name, ) return_value = get_destination_filepath(content=content, site_config=SiteConfig( starter.config)) patched_log.error.assert_called_once() assert return_value is None
def _transform_hugo_menu_data(website_content: WebsiteContent, site_config: SiteConfig) -> dict: """ Adds 'url' property to internal links in menu data. Returns the dict of all values that will be serialized to the target file, including the transformed "menu" fields. """ config_item = site_config.find_item_by_name(website_content.type) menu_fields = { field["name"] for field in config_item.fields if field.get("widget") == CONTENT_MENU_FIELD } transformed_menu_fields = {} for field_name, field_data in website_content.metadata.items(): if field_name not in menu_fields: continue uuid_content_map = _get_uuid_content_map(field_data) result_menu_items = [] for menu_item in field_data: updated_menu_item = menu_item # Add/update the 'url' value if this is an internal link if menu_item["identifier"] in uuid_content_map: menu_item_content = uuid_content_map[menu_item["identifier"]] updated_menu_item["url"] = get_destination_url( menu_item_content, site_config) result_menu_items.append(updated_menu_item) transformed_menu_fields[field_name] = result_menu_items return {**website_content.metadata, **transformed_menu_fields}
def for_content( site_config: SiteConfig, website_content: WebsiteContent) -> BaseContentFileSerializer: """ Given a WebsiteContent object and site config, returns a serializer object of the correct type for serializing the WebsiteContent object into file contents. """ if website_content.is_page_content: return HugoMarkdownFileSerializer(site_config=site_config) config_item = site_config.find_item_by_name(website_content.type) destination_filepath = config_item.file_target if not destination_filepath: raise ValueError( f"WebsiteContent object is not page content, but has no 'file' destination in config ({website_content.text_id})." ) file_ext = get_file_extension(destination_filepath) if file_ext == "json": cls = JsonFileSerializer elif file_ext in {"yml", "yaml"}: # HACK: Hugo-specific logic for properly transforming data if the "menu" widget is used if _has_menu_fields(config_item): cls = HugoMenuYamlFileSerializer else: cls = YamlFileSerializer else: raise ValueError( f"Website content cannot be serialized to a file ({website_content.text_id})." ) return cls(site_config=site_config)
def _untransform_hugo_menu_data(data: dict, filepath: str, site_config: SiteConfig) -> dict: """ Removes 'url' property from internal links in serialized menu data. Returns the dict of all values that will be deserialized to website content, including the transformed "menu" fields. """ config_item = site_config.find_item_by_filepath(filepath) menu_fields = { field["name"] for field in config_item.fields if field.get("widget") == CONTENT_MENU_FIELD } transformed_menu_fields = {} for field_name, field_data in data.items(): if field_name not in menu_fields: continue result_menu_items = [] for menu_item in field_data: updated_menu_item = menu_item.copy() if (is_valid_uuid(updated_menu_item["identifier"]) and "url" in updated_menu_item): del updated_menu_item["url"] result_menu_items.append(updated_menu_item) transformed_menu_fields[field_name] = result_menu_items return {**data, **transformed_menu_fields}
def test_factory_for_file_invalid(): """ContentFileSerializerFactory.for_file should raise when given an unsupported file type""" site_config = SiteConfig(WebsiteStarterFactory.build().config) with pytest.raises(ValueError): assert ContentFileSerializerFactory.for_file( site_config=site_config, filepath="/path/to/myfile.tar.gz" )
def test_hugo_file_serialize(markdown, exp_sections): """HugoMarkdownFileSerializer.serialize should create the expected file contents""" metadata = {"metadata1": "dummy value 1", "metadata2": "dummy value 2"} content = WebsiteContentFactory.create( text_id="abcdefg", title="Content Title", type="sometype", markdown=markdown, metadata=metadata, ) site_config = SiteConfig(content.website.starter.config) file_content = HugoMarkdownFileSerializer(site_config).serialize( website_content=content ) md_file_sections = [ part for part in re.split(re.compile(r"^---\n", re.MULTILINE), file_content) # re.split returns a blank string as the first item here even though the file contents begin with the given # pattern. if part ] assert len(md_file_sections) == exp_sections front_matter = md_file_sections[0] front_matter_lines = list(filter(None, sorted(front_matter.split("\n")))) assert front_matter_lines == sorted( [ f"title: {content.title}", f"content_type: {content.type}", f"uid: {content.text_id}", ] + [f"{k}: {v}" for k, v in metadata.items()] ) if exp_sections > 1: assert md_file_sections[1] == markdown
def test_content_dir(basic_site_config, content_dir_value, exp_result): """SiteConfig.content_dir should return the content dir value or a default if it doesn't exist""" updated_site_config = basic_site_config.copy() if content_dir_value is None: del updated_site_config[WEBSITE_CONFIG_CONTENT_DIR_KEY] else: updated_site_config[WEBSITE_CONFIG_CONTENT_DIR_KEY] = content_dir_value site_config = SiteConfig(updated_site_config) assert site_config.content_dir == exp_result
def test_website_publish_serializer_base_url(settings, is_root_site): """ The WebsitePublishSerializer should return the correct base_url value """ site = WebsiteFactory.create() site_config = SiteConfig(site.starter.config) settings.ROOT_WEBSITE_NAME = site.name if is_root_site else "some_other_root_name" serializer = WebsitePublishSerializer(site) assert serializer.data["base_url"] == ( "" if is_root_site else f"{site_config.root_url_path}/{site.name}".strip("/"))
def upload_file_to(self, filename): """Return the appropriate filepath for an upload""" site_config = SiteConfig(self.website.starter.config) url_parts = [ site_config.root_url_path, self.website.name, f"{self.text_id.replace('-', '')}_{filename}", ] return "/".join([part for part in url_parts if part != ""])
def test_factory_for_file(filepath, exp_serializer_cls): """ContentFileSerializerFactory.for_file should return the correct serializer class""" site_config = SiteConfig(WebsiteStarterFactory.build().config) assert isinstance( ContentFileSerializerFactory.for_file( site_config=site_config, filepath=filepath ), exp_serializer_cls, )
def mock_api_wrapper(settings, mocker, db_data): """Create a GithubApiWrapper with a mock Github object""" settings.GIT_TOKEN = "faketoken" settings.GIT_ORGANIZATION = "fake_org" settings.CONTENT_SYNC_RETRIES = 3 mocker.patch("content_sync.apis.github.Github", autospec=True) return GithubApiWrapper(website=db_data.website, site_config=SiteConfig( db_data.website.starter.config))
def test_factory_for_content_hugo_markdown(): """ ContentFileSerializerFactory.for_content should return the Hugo markdown serializer if the content object is page content. """ content = WebsiteContentFactory.build(is_page_content=True) site_config = SiteConfig(content.website.starter.config) assert isinstance( ContentFileSerializerFactory.for_content(site_config, content), HugoMarkdownFileSerializer, )
def upload_file_to(self, filename): """Return the appropriate filepath for an upload""" site_config = SiteConfig(self.website.starter.config) source_folder = self.source_key.split("/")[-2] url_parts = [ site_config.root_url_path, self.website.name, f"{source_folder}_{filename}", ] return "/".join([part for part in url_parts if part != ""])
def test_site_config_iter_items(basic_site_config): """SiteConfig.iter_items should yield each individual config item""" site_config = SiteConfig(basic_site_config) config_items = list(site_config.iter_items()) assert len(config_items) == 5 collections = basic_site_config["collections"] assert config_items[0] == ConfigItem(item=collections[0], parent_item=None, path="collections.0") assert config_items[1] == ConfigItem(item=collections[1], parent_item=None, path="collections.1") assert config_items[2] == ConfigItem(item=collections[2], parent_item=None, path="collections.2") assert config_items[3] == ConfigItem( item=collections[2]["files"][0], parent_item=collections[2], path="collections.2.files.0", )
def apply_rule(data): faulty_paths = {} site_config = SiteConfig(data) for _, config_item in enumerate(site_config.iter_items()): if config_item.is_folder_item() and not site_config.is_page_content( config_item ): faulty_paths[config_item.name] = config_item.path if faulty_paths: return [ "Found 'folder' item(s) that do not point to the content directory ({}).\n{}".format( site_config.content_dir, "\n".join( [ f"{' ' * 8}'{name}' ({path})" for name, path in faulty_paths.items() ] ), ) ] return []
def __init__(self, website: Website, site_config: Optional[SiteConfig]): """ Initialize the Github API backend for a specific website""" self.website = website self.site_config = site_config or SiteConfig(self.website.starter.config) self.repo = None self.git = Github( login_or_token=get_token(), **( {"base_url": settings.GIT_API_URL} if settings.GIT_API_URL is not None else {} ), ) self.org = self.git.get_organization(settings.GIT_ORGANIZATION)
def apply_rule(data): faulty_paths = {} site_config = SiteConfig(data) for _, config_item in enumerate(site_config.iter_items()): title_field = first_or_none( [field for field in config_item.fields if field["name"] == "title"] ) if title_field is not None and ( title_field.get("required", False) is False or title_field.get("widget", "string") != "string" ): faulty_paths[config_item.name] = config_item.path if faulty_paths: return [ "'title' fields must use the 'string' widget, and must be set to be required.\n{}".format( "\n".join( [ f"{' ' * 8}'{name}' ({path})" for name, path in faulty_paths.items() ] ), ) ] return []
def test_get_destination_url_errors(mocker): """ get_destination_url should log an error if it is called with a a WebsiteContent object without is_page_content set to true """ patched_log = mocker.patch("content_sync.utils.log") # From basic-site-config.yml config_item_name = "blog" starter = WebsiteStarterFactory.build() content = WebsiteContentFactory.build( is_page_content=False, type=config_item_name, ) return_value = get_destination_url(content=content, site_config=SiteConfig(starter.config)) patched_log.error.assert_called_once() assert return_value is None