class YoutubeFlagsSchema(SerializableSchema): class Meta: ordered = True kind = StringEnum( metadata={ "label": "Type", "description": "Type of collection. Only `playlist` accepts multiple IDs.", }, validate=validate.OneOf(["channel", "playlist", "user"]), data_key="type", required=True, ) ident = fields.String( metadata={ "label": "Youtube ID", "description": "Youtube ID of the collection. Seperate multiple playlists with commas.", }, data_key="id", required=True, ) api_key = fields.String( metadata={"label": "API Key", "description": "Youtube API Token"}, data_key="api-key", required=True, ) name = fields.String( metadata={ "label": "ZIM Name", "description": "Used as identifier and filename (date will be appended)", "placeholder": "mychannel_eng_all", }, required=True, ) video_format = StringEnum( metadata={ "label": "Video format", "description": "Format to download/transcode video to. webm is smaller", }, validate=validate.OneOf(["webm", "mp4"]), data_key="format", ) low_quality = fields.Boolean( truthy=[True], falsy=[False], metadata={ "label": "Low Quality", "description": "Re-encode video using stronger compression", }, data_key="low-quality", ) concurrency = fields.Integer( metadata={ "label": "Concurrency", "description": "Number of concurrent threads to use", }, ) dateafter = fields.String( metadata={ "label": "Only after date", "description": "Custom filter to download videos uploaded on or after specified date. Format: YYYYMMDD or (now|today)[+-][0-9](day|week|month|year)(s)?", } ) optimization_cache = fields.Url( metadata={ "label": "Optimization Cache URL", "description": "S3 Storage URL including credentials and bucket", "secret": True, }, data_key="optimization-cache", ) use_any_optimized_version = fields.Boolean( metadata={ "label": "Use any optimized version", "description": "Use the cached files if present, whatever the version", }, data_key="use-any-optimized-version", ) all_subtitles = fields.Boolean( truthy=[True], falsy=[False], metadata={ "label": "All Subtitles", "description": "Include auto-generated subtitles", }, data_key="all-subtitles", ) pagination = fields.Integer( metadata={ "label": "Pagination", "description": "Number of videos per page (40 otherwise)", }, ) autoplay = fields.Boolean( truthy=[True], falsy=[False], metadata={ "label": "Auto-play", "description": "Enable autoplay on video articles (home never have autoplay).", }, ) output = fields.String( metadata={ "label": "Output folder", "placeholder": "/output", "description": "Output folder for ZIM file or build folder. Leave it as `/output`", }, missing="/output", default="/output", validate=validate_output, ) zim_file = fields.String( metadata={ "label": "ZIM filename", "description": "ZIM file name (based on --name if not provided)", }, data_key="zim-file", ) language = fields.String( metadata={ "label": "Language", "description": "ISO-639-3 (3 chars) language code of content", } ) locale = fields.String( metadata={ "label": "Locale", "description": "Locale name to use for translations (if avail) and time representations. Defaults to --language or English.", } ) title = fields.String( metadata={ "label": "Title", "description": "Custom title for your project and ZIM. Default to Channel name (of first video if playlists)", } ) description = fields.String(metadata={"label": "Description", "description": ""}) creator = fields.String( metadata={ "label": "Content Creator", "description": "Name of content creator. Defaults to Channel name or “Youtue Channels”", } ) tags = fields.String( metadata={ "label": "ZIM Tags", "description": "List of Tags for the ZIM file. _videos:yes added automatically", } ) profile = fields.Url( metadata={ "label": "Profile Image", "description": "Custom profile image. Squared. Will be resized to 100x100px", } ) banner = fields.Url( metadata={ "label": "Banner Image", "description": "Custom banner image. Will be resized to 1060x175px", } ) main_color = HexColor( metadata={ "label": "Main Color", "description": "Custom color. Hex/HTML syntax (#DEDEDE). Default to main color of profile image.", }, data_key="main-color", ) secondary_color = HexColor( metadata={ "label": "Secondary Color", "description": "Custom secondary color. Hex/HTML syntax (#DEDEDE). Default to secondary color of profile image.", }, data_key="secondary-color", ) debug = fields.Boolean( truthy=[True], falsy=[False], metadata={"label": "Debug", "description": "Enable verbose output"}, )
class OpenedxFlagsSchema(SerializableSchema): class Meta: ordered = True course_url = fields.Url( metadata={ "label": "Course URL", "description": "URL of the course you wnat to scrape", }, data_key="course-url", required=True, ) email = fields.String( metadata={ "label": "Registered e-mail", "description": "The registered e-mail ID on the openedx instance", }, data_key="email", required=True, ) password = fields.String( metadata={ "label": "Password", "description": "Password to the account registered on the openedx instance", "secret": True, }, data_key="password", required=True, ) instance_login_page = fields.String( metadata={ "label": "Login page path", "description": "The login path in the instance. Must start with /", "placeholder": "/login_ajax", }, data_key="instance-login-page", ) instance_course_page = fields.String( metadata={ "label": "Course page path", "description": "The path to the course page after the course ID. Must start with /", "placeholder": "/course", }, data_key="instance-course-page", ) instance_course_prefix = fields.String( metadata={ "label": "Course prefix path", "description": "The prefix in the path before the course ID. Must start and end with /", "placeholder": "/courses/", }, data_key="instance-course-prefix", ) favicon_url = fields.Url( metadata={ "label": "Favicon URL", "description": "URL pointing to a favicon image. Recommended size >= (48px x 48px)", "placeholder": "https://github.com/edx/edx-platform/raw/master/lms/static/images/favicon.ico", }, data_key="favicon-url", ) ignore_missing_xblocks = fields.Boolean( truthy=[True], falsy=[False], metadata={ "label": "Ignore unsupported xblocks", "description": "Ignore unsupported content (xblock(s))", }, data_key="ignore-missing-xblocks", ) add_wiki = fields.Boolean( truthy=[True], falsy=[False], metadata={ "label": "Include wiki", "description": "Add wiki (if available) to the ZIM", }, data_key="add-wiki", ) add_forum = fields.Boolean( truthy=[True], falsy=[False], metadata={ "label": "Include forum", "description": "Add forum/discussion (if available) to the ZIM", }, data_key="add-forum", ) remove_seq_nav = fields.Boolean( truthy=[True], falsy=[False], metadata={ "label": "No top sequential navigation", "description": "Remove the top sequential navigation bar in the ZIM", }, data_key="remove-seq-nav", ) video_format = StringEnum( metadata={ "label": "Video format", "description": "Format to download/transcode video to. webm is smaller", }, validate=validate.OneOf(["webm", "mp4"]), data_key="format", ) low_quality = fields.Boolean( truthy=[True], falsy=[False], metadata={ "label": "Low Quality", "description": "Re-encode video using stronger compression", }, data_key="low-quality", ) autoplay = fields.Boolean( truthy=[True], falsy=[False], metadata={ "label": "Autoplay videos", "description": "Enable autoplay on videos. Behavior differs on platforms/browsers", }, data_key="autoplay", ) name = fields.String( metadata={ "label": "Name", "description": "ZIM name. Used as identifier and filename (date will be appended)", "placeholder": "topic_eng", }, data_key="name", required=True, ) title = fields.String( metadata={ "label": "Title", "description": "Custom title for your ZIM. Based on MOOC otherwise", }, data_key="title", ) description = fields.String( metadata={ "label": "Description", "description": "Custom description for your ZIM. Based on MOOC otherwise", }, data_key="description", ) creator = fields.String( metadata={ "label": "Content Creator", "description": "Name of content creator. Defaults to edX", }, data_key="creator", ) tags = fields.String( metadata={ "label": "ZIM Tags", "description": "List of comma-separated Tags for the ZIM file. category:other, and openedx added automatically", }, data_key="tags", ) optimization_cache = fields.Url( metadata={ "label": "Optimization Cache URL", "description": "URL with credentials and bucket name to S3 Optimization Cache", "secret": True, }, data_key="optimization-cache", ) use_any_optimized_version = fields.Boolean( truthy=[True], falsy=[False], metadata={ "label": "Use any optimized version", "description": "Use the cached files if present, whatever the version", }, data_key="use-any-optimized-version", ) output = fields.String( metadata={ "label": "Output folder", "placeholder": "/output", "description": "Output folder for ZIM file(s). Leave it as `/output`", }, missing="/output", default="/output", validate=validate_output, data_key="output", ) tmp_dir = fields.String( metadata={ "label": "Temp folder", "description": "Where to create temporay build folder. Leave it as `/output`", }, missing="/output", default="/output", validate=validate_output, data_key="tmp-dir", ) zim_file = fields.String( metadata={ "label": "ZIM filename", "description": "ZIM file name (based on ZIM name if not provided)", }, data_key="zim-file", ) debug = fields.Boolean( truthy=[True], falsy=[False], metadata={"label": "Debug", "description": "Enable verbose output"}, ) threads = fields.Integer( metadata={ "label": "Threads", "description": "Number of parallel threads to use while downloading", }, validate=validate.Range(min=1), ) locale = fields.String( metadata={ "label": "Locale", "description": "The locale to use for the translations in ZIM", } )
class TedFlagsSchema(SerializableSchema): class Meta: ordered = True indiv_zims = fields.Boolean( truthy=[True], falsy=[False], metadata={ "label": "Individual ZIM mode", "description": "Whether to produce one ZIM per topic/playlist", }, data_key="indiv-zims", ) topics = fields.String(metadata={ "label": "Topics", "description": "Comma-seperated list of topics to scrape; as given on ted.com/talks. Pass all for all topics", }, ) playlists = fields.String(metadata={ "label": "TED Playlists", "description": "Comma-seperated list of TED playlist IDs to scrape. Pass all for all playlists", }, ) languages = fields.String(metadata={ "label": "Languages", "description": "Comma-seperated list of languages to filter videos", }, ) subtitles_enough = fields.Boolean( truthy=[True], falsy=[False], metadata={ "label": "Subtitles enough?", "description": "Whether to include videos that have a subtitle in requested language(s) if audio is in another language", }, ) subtitles = fields.String(metadata={ "label": "Subtitles Setting", "description": "Language setting for subtitles. all: include all available subtitles, matching (default): only subtitles matching language(s), none: include no subtitle. Also accepts comma-seperated list of language(s)", }, ) video_format = StringEnum( metadata={ "label": "Video format", "description": "Format to download/transcode video to. webm is smaller", }, validate=validate.OneOf(["webm", "mp4"]), data_key="format", ) low_quality = fields.Boolean( truthy=[True], falsy=[False], metadata={ "label": "Low Quality", "description": "Re-encode video using stronger compression", }, data_key="low-quality", ) autoplay = fields.Boolean( truthy=[True], falsy=[False], metadata={ "label": "Auto-play", "description": "Enable autoplay on video articles. Behavior differs on platforms/browsers.", }, ) name = fields.String(metadata={ "label": "Name", "description": "ZIM name. Used as identifier and filename (date will be appended)", "placeholder": "topic_eng", }, ) name_format = fields.String( metadata={ "label": "Name Format", "description": "Format for building individual --name argument. Use variable {identity} for playlist id or topic name", "placeholder": "{identity}_eng", }, data_key="name-format", ) title = fields.String( metadata={ "label": "Title", "description": "Custom title for your ZIM. Based on selection otherwise", }) title_format = fields.String( metadata={ "label": "Title Format", "description": "Custom title format for individual ZIMs", }, data_key="title-format", ) description = fields.String( metadata={ "label": "Description", "description": "Custom description for your ZIM. Based on selection otherwise", }) description_format = fields.String( metadata={ "label": "Description Format", "description": "Custom description format for individual ZIMs", }, data_key="description-format", ) creator = fields.String( metadata={ "label": "Content Creator", "description": "Name of content creator. Defaults to TED", }) tags = fields.String( metadata={ "label": "ZIM Tags", "description": "List of comma-separated Tags for the ZIM file. category:ted, ted, and _videos:yes added automatically", }) optimization_cache = fields.Url( metadata={ "label": "Optimization Cache URL", "description": "URL with credentials and bucket name to S3 Optimization Cache", "secret": True, }, data_key="optimization-cache", ) use_any_optimized_version = fields.Boolean( truthy=[True], falsy=[False], metadata={ "label": "Use any optimized version", "description": "Use the cached files if present, whatever the version", }, data_key="use-any-optimized-version", ) output = fields.String( metadata={ "label": "Output folder", "placeholder": "/output", "description": "Output folder for ZIM file(s). Leave it as `/output`", }, missing="/output", default="/output", validate=validate_output, ) tmp_dir = fields.String( metadata={ "label": "Temp folder", "description": "Where to create temporay build folder. Leave it as `/output`", }, missing="/output", default="/output", validate=validate_output, data_key="tmp-dir", ) metadata_from = fields.String( metadata={ "label": "Metadata JSON", "description": "File path or URL to a JSON file holding custom metadata for individual playlists/topics", }, data_key="metadata-from", ) zim_file = fields.String( metadata={ "label": "ZIM filename", "description": "ZIM file name (based on ZIM name if not provided)", }, data_key="zim-file", ) zim_file_format = fields.String( metadata={ "label": "ZIM filename format", "description": "Format for building individual --zim-file argument for individual ZIMs. Uses --name-format otherwise", }, data_key="zim-file-format", ) debug = fields.Boolean( truthy=[True], falsy=[False], metadata={ "label": "Debug", "description": "Enable verbose output" }, ) @validates_schema def validate(self, data, **kwargs): if data.get("indiv_zims"): if not data.get("name_format"): raise ValidationError( "name-format required in individual ZIMs mode") else: if not data.get("name"): raise ValidationError("name required in normal mode")
class ZimitFlagsSchema(SerializableSchema): class Meta: ordered = True url = fields.Url( metadata={ "label": "URL", "description": "The URL to start crawling from and main page for ZIM", }, required=True, ) name = fields.String( metadata={ "label": "Name", "description": "Name of the ZIM. " "Used to compose filename if not otherwise defined", }, required=True, ) lang = fields.String( metadata={ "label": "Language", "description": "ISO-639-3 (3 chars) language code of content. " "Default to `eng`", }) title = fields.String( metadata={ "label": "Title", "description": "Custom title for ZIM. Default to title of main page", }) description = fields.String(metadata={ "label": "Description", "description": "Description for ZIM" }) favicon = fields.Url( metadata={ "label": "Favicon", "description": "URL for Favicon. " "If unspecified, will attempt to use the one used from main page.", }, required=False, ) zim_file = fields.String( metadata={ "label": "ZIM filename", "description": "ZIM file name (based on --name if not provided)", }, data_key="zim-file", ) tags = fields.String(metadata={ "label": "ZIM Tags", "description": "List of Tags for the ZIM file.", }) creator = fields.String(metadata={ "label": "Content Creator", "description": "Name of content creator.", }) source = fields.String(metadata={ "label": "Content Source", "description": "Source name/URL of content", }) workers = fields.Integer( metadata={ "label": "Workers", "description": "The number of workers to run in parallel. Default to 1", }, required=False, ) include_domains = fields.String( metadata={ "label": "Include domains", "description": "Limit to URLs from only certain domains. " "If not set, all URLs are included.", }, data_key="include-domains", required=False, ) exclude = fields.String( metadata={ "label": "Exclude", "description": "Regex of URLs that should be excluded from the crawl.", }, required=False, ) wait_until = fields.String( metadata={ "label": "WaitUntil", "description": "Puppeteer page.goto() condition to wait for " "before continuing. Default to `load`", }, data_key="waitUntil", required=False, ) limit = fields.Integer(metadata={ "label": "Limit", "description": "Limit crawl to this number of pages. 0 means no-limit.", }, ) timeout = fields.Integer( metadata={ "label": "Timeout", "description": "Timeout for each page to load (in millis). " "Default to 30000", }, required=False, ) scope = fields.String( metadata={ "label": "Scope", "description": "The scope of current page that should be included in the " "crawl (defaults to the domain of URL)", }, required=False, ) scroll = fields.Boolean( truthy=[True], falsy=[False], metadata={ "label": "Scroll", "description": "If set, will autoscroll pages to bottom.", }, required=False, ) new_context = StringEnum( metadata={ "label": "New Context", "description": "The context for each new capture. Defaults to page", }, validate=validate.OneOf(["page", "session", "browser"]), data_key="newContext", required=False, ) custom_css = fields.Url( metadata={ "label": "Custom CSS", "description": "URL to a CSS file to inject into pages", }, data_key="custom-css", required=False, ) verbose = fields.Boolean( truthy=[True], falsy=[False], metadata={ "label": "Verbose mode", "description": "Whether to display additional logs", }, required=False, ) output = fields.String( metadata={ "label": "Output folder", "placeholder": "/output", "description": "Output folder for ZIM file(s). Leave it as `/output`", }, missing="/output", default="/output", validate=validate_output, ) stats_filename = fields.String( metadata={ "label": "Stats filename", "placeholder": "/output/task_progress.json", "description": "Scraping progress file. " "Leave it as `/output/task_progress.json`", }, data_key="statsFilename", missing="/output/task_progress.json", default="/output/task_progress.json", validate=validate.Equal("/output/task_progress.json"), ) replay_viewer_source = fields.Url( metadata={ "label": "Replay Viewer Source", "description": "URL from which to load the ReplayWeb.page " "replay viewer from", }, data_key="replay-viewer-source", required=False, ) use_sitemap = fields.Url( metadata={ "label": "Use sitemap", "description": "Use as sitemap to get additional URLs for the crawl " "(usually at /sitemap.xml)", }, data_key="useSitemap", required=False, ) mobile_device = StringEnum( metadata={ "label": "As device", "description": "Device to crawl as. Defaults to `Iphone X`. " "See Pupeeter's DeviceDescriptors.", }, data_key="mobileDevice", required=False, validate=validate_devicelist, ) admin_email = fields.String( metadata={ "label": "Admin Email", "description": "Admin Email for crawler: used in UserAgent " "so website admin can contact us", }, data_key="adminEmail", required=False, )
class MWOfflinerFlagsSchema(SerializableSchema): class Meta: ordered = True mwUrl = fields.URL( required=True, metadata={ "label": "Wiki URL", "description": "The URL of the mediawiki to scrape", }, ) adminEmail = fields.Email( required=True, metadata={ "label": "Admin Email", "description": "Email of the mwoffliner user which will be put in the HTTP user-agent string", }, ) articleList = fields.URL( metadata={ "label": "Article List", "description": "URL to an UTF-8 tsv file containing article names to include (one per line)", }) customMainPage = fields.String( metadata={ "label": "Main Page", "description": "Article Name to use as home page. Automatically built or guessed otherwise.", }) customZimTitle = fields.String( metadata={ "label": "ZIM Title", "description": "Custom ZIM title. Wiki name otherwise.", }) customZimDescription = fields.String(metadata={"label": "ZIM Description"}) customZimFavicon = fields.Url( metadata={ "label": "ZIM favicon", "description": "URL to a png to use as favicon. Will be resized to 48x48px.", }) customZimTags = fields.String( metadata={ "label": "ZIM Tags", "description": "Semi-colon separated list of ZIM tags", }) publisher = fields.String( metadata={ "label": "Publisher", "description": "ZIM publisher metadata. `Kiwix` otherwise.", }) filenamePrefix = fields.String( metadata={ "label": "Filename prefix", "description": "Custome filename up to the formats and date parts.", }) formats = ListOfStringEnum( fields.String(validate=validate.OneOf([ "nodet,nopic:mini", "nodet:mini", "nopic:nopic", "novid:maxi", "", "nodet", "nopic", "novid", "nodet,nopic", ])), data_key="format", metadata={ "label": "Flavours", "description": "Which flavours to build, as `<flavour>:<custom-suffix>`. Empty option is full without suffix.", }, ) customFlavour = StringEnum( metadata={ "label": "Custom Flavour", "description": "Custom processor to filter and process articles (see extensions/*.js)", }, validate=validate.OneOf( ["/tmp/mwoffliner/extensions/wiktionary_fr.js"] # nosec ), ) optimisationCacheUrl = fields.Url( metadata={ "label": "Optimisation Cache URL", "description": "S3 Storage URL including credentials and bucket", "secret": True, }) zstd = fields.Boolean( truthy=[True], falsy=[False], metadata={ "label": "Use Zstandard compression", "description": "Use Zstandard as ZIM compression (Lzma otherwise)", }, ) addNamespaces = fields.String( metadata={ "label": "Add Namespaces", "description": "Include addional namespaces (comma separated numbers)", }) getCategories = fields.Boolean( truthy=[True], falsy=[False], metadata={ "label": "Add categories", "description": "[WIP] Download category pages", }, ) keepEmptyParagraphs = fields.Boolean( truthy=[True], falsy=[False], metadata={ "label": "Keep empty paragraphs", "description": "Keep all paragraphs, even empty ones.", }, ) minifyHtml = fields.Boolean( truthy=[True], falsy=[False], metadata={ "label": "Minify HTML", "description": "Try to reduce the size of the HTML", }, ) mwWikiPath = fields.String( metadata={ "label": "Wiki Path", "description": "Mediawiki wiki base path. Otherwise `/wiki/`.", }) mwApiPath = fields.String( metadata={ "label": "API Path", "description": "Mediawiki API path. Otherwise `/w/api.php`.", }) mwModulePath = fields.String( metadata={ "label": "Module Path", "description": "Mediawiki module load path. Otherwise `/w/load.php`.", }) mwDomain = fields.String( metadata={ "label": "User Domain", "description": "Mediawiki user domain (for private wikis)", }) mwUsername = fields.String( metadata={ "label": "Username", "description": "Mediawiki username (for private wikis)", }) mwPassword = fields.String( metadata={ "label": "Password", "description": "Mediawiki user password (for private wikis)", }) osTmpDir = fields.String( metadata={ "label": "OS Temp Dir", "description": "Override default operating system temporary directory path environnement variable", }) outputDirectory = fields.String( metadata={ "label": "Output folder", "placeholder": "/output", "description": "Output folder for ZIM file or build folder. Leave it as `/output`", }, missing="/output", default="/output", validate=validate_output, ) noLocalParserFallback = fields.Boolean( truthy=[True], falsy=[False], metadata={ "label": "Don't fallback to local Parser", "description": "Don't fall back to a local MCS or Parsoid, only use remote APIs", }, ) requestTimeout = fields.Integer( metadata={ "label": "Request Timeout", "description": "Request timeout (in seconds)", }, validate=validate.Range(min=1), ) speed = fields.Float( metadata={ "label": "Speed", "description": "Multiplicator for the number of parallel HTTP requests on Parsoid backend. Otherwise `1`. Reduce on throttled Wikis.", }) withoutZimFullTextIndex = fields.Boolean( truthy=[True], falsy=[False], metadata={ "label": "Without Full Text Index", "description": "Don't include a fulltext search index to the ZIM", }, ) verbose = fields.Boolean( truthy=[True], falsy=[False], metadata={ "label": "Verbose", "description": "Print debug information to the stdout", }, ) webp = fields.Boolean( truthy=[True], falsy=[False], metadata={ "label": "Webp", "description": "Convert images to Webp", }, )
class OpenedxFlagsSchema(SerializableSchema): class Meta: ordered = True course_url = fields.Url( metadata={ "label": "Course URL", "description": "URL of the course you wnat to scrape", }, data_key="course-url", required=True, ) email = fields.String( metadata={ "label": "Registered e-mail", "description": "The registered e-mail ID on the openedx instance", }, data_key="email", required=True, ) password = fields.String( metadata={ "label": "Password", "description": "Password to the account registered on the openedx instance", "secret": True, }, data_key="password", required=True, ) ignore_missing_xblocks = fields.Boolean( truthy=[True], falsy=[False], metadata={ "label": "Ignore unsupported xblocks", "description": "Ignore unsupported content (xblock(s))", }, data_key="ignore-missing-xblocks", ) add_wiki = fields.Boolean( truthy=[True], falsy=[False], metadata={ "label": "Include wiki", "description": "Add wiki (if available) to the ZIM", }, data_key="add-wiki", ) add_forum = fields.Boolean( truthy=[True], falsy=[False], metadata={ "label": "Include forum", "description": "Add forum/discussion (if available) to the ZIM", }, data_key="add-forum", ) video_format = StringEnum( metadata={ "label": "Video format", "description": "Format to download/transcode video to. webm is smaller", }, validate=validate.OneOf(["webm", "mp4"]), data_key="format", ) low_quality = fields.Boolean( truthy=[True], falsy=[False], metadata={ "label": "Low Quality", "description": "Re-encode video using stronger compression", }, data_key="low-quality", ) name = fields.String( metadata={ "label": "Name", "description": "ZIM name. Used as identifier and filename (date will be appended)", "placeholder": "topic_eng", }, data_key="name", required=True, ) title = fields.String( metadata={ "label": "Title", "description": "Custom title for your ZIM. Based on MOOC otherwise", }, data_key="title", ) description = fields.String( metadata={ "label": "Description", "description": "Custom description for your ZIM. Based on MOOC otherwise", }, data_key="description", ) creator = fields.String( metadata={ "label": "Content Creator", "description": "Name of content creator. Defaults to edX", }, data_key="creator", ) tags = fields.String( metadata={ "label": "ZIM Tags", "description": "List of comma-separated Tags for the ZIM file. category:openedx, and openedx added automatically", }, data_key="tags", ) optimization_cache = fields.Url( metadata={ "label": "Optimization Cache URL", "description": "URL with credentials and bucket name to S3 Optimization Cache", "secret": True, }, data_key="optimization-cache", ) use_any_optimized_version = fields.Boolean( truthy=[True], falsy=[False], metadata={ "label": "Use any optimized version", "description": "Use the cached files if present, whatever the version", }, data_key="use-any-optimized-version", ) output = fields.String( metadata={ "label": "Output folder", "placeholder": "/output", "description": "Output folder for ZIM file(s). Leave it as `/output`", }, missing="/output", default="/output", validate=validate_output, data_key="output", ) tmp_dir = fields.String( metadata={ "label": "Temp folder", "description": "Where to create temporay build folder. Leave it as `/output`", }, missing="/output", default="/output", validate=validate_output, data_key="tmp-dir", ) zim_file = fields.String( metadata={ "label": "ZIM filename", "description": "ZIM file name (based on ZIM name if not provided)", }, data_key="zim-file", ) debug = fields.Boolean( truthy=[True], falsy=[False], metadata={ "label": "Debug", "description": "Enable verbose output" }, )
class YoutubeFlagsSchema(SerializableSchema): class Meta: ordered = True indiv_playlists = fields.Boolean( truthy=[True], falsy=[False], metadata={ "label": "Playlists mode", "description": "Build one ZIM per playlist of the channel or user", }, data_key="indiv-playlists", ) kind = StringEnum( metadata={ "label": "Type", "description": "Type of collection. Only `playlist` accepts multiple IDs.", }, validate=validate.OneOf(["channel", "playlist", "user"]), data_key="type", required=True, ) ident = fields.String( metadata={ "label": "Youtube ID", "description": "Youtube ID of the collection. " "Separate multiple playlists with commas.", }, data_key="id", required=True, ) api_key = fields.String( metadata={"label": "API Key", "description": "Youtube API Token"}, data_key="api-key", required=True, ) name = fields.String( metadata={ "label": "ZIM Name", "description": "Used as identifier and filename (date will be appended)", "placeholder": "mychannel_eng_all", }, ) playlists_name = fields.String( metadata={ "label": "Playlists name", "description": "Format for building individual --name argument. " "Required in playlist mode. Variables: {title}, {description}, " "{playlist_id}, {slug} (from title), {creator_id}, {creator_name}", }, data_key="playlists-name", ) video_format = StringEnum( metadata={ "label": "Video format", "description": "Format to download/transcode video to. webm is smaller", }, validate=validate.OneOf(["webm", "mp4"]), data_key="format", ) low_quality = fields.Boolean( truthy=[True], falsy=[False], metadata={ "label": "Low Quality", "description": "Re-encode video using stronger compression", }, data_key="low-quality", ) concurrency = fields.Integer( metadata={ "label": "Concurrency", "description": "Number of concurrent threads to use", }, ) dateafter = fields.String( metadata={ "label": "Only after date", "description": "Custom filter to download videos uploaded on " "or after specified date. Format: YYYYMMDD or " "(now|today)[+-][0-9](day|week|month|year)(s)?", } ) optimization_cache = fields.Url( metadata={ "label": "Optimization Cache URL", "description": "S3 Storage URL including credentials and bucket", "secret": True, }, data_key="optimization-cache", ) use_any_optimized_version = fields.Boolean( truthy=[True], falsy=[False], metadata={ "label": "Use any optimized version", "description": "Use the cached files if present, whatever the version", }, data_key="use-any-optimized-version", ) all_subtitles = fields.Boolean( truthy=[True], falsy=[False], metadata={ "label": "All Subtitles", "description": "Include auto-generated subtitles", }, data_key="all-subtitles", ) pagination = fields.Integer( metadata={ "label": "Pagination", "description": "Number of videos per page (40 otherwise)", }, ) autoplay = fields.Boolean( truthy=[True], falsy=[False], metadata={ "label": "Auto-play", "description": "Enable autoplay on video articles " "(home never have autoplay).", }, ) output = fields.String( metadata={ "label": "Output folder", "placeholder": "/output", "description": "Output folder for ZIM file(s). Leave it as `/output`", }, missing="/output", default="/output", validate=validate_output, ) tmp_dir = fields.String( metadata={ "label": "Temp folder", "placeholder": "/output", "description": "Where to create temporay build folder. " "Leave it as `/output`", }, missing="/output", default="/output", validate=validate_output, data_key="tmp-dir", ) zim_file = fields.String( metadata={ "label": "ZIM filename", "description": "ZIM file name (based on --name if not provided). " "Include {period} to insert date period dynamically", }, data_key="zim-file", ) playlists_zim_file = fields.String( metadata={ "label": "Playlists ZIM filename", "description": "Format for building individual --zim-file argument. " "Uses --playlists-name otherwise", }, data_key="playlists-zim-file", ) language = fields.String( metadata={ "label": "Language", "description": "ISO-639-3 (3 chars) language code of content", } ) locale = fields.String( metadata={ "label": "Locale", "description": "Locale name to use for translations (if avail) " "and time representations. Defaults to --language or English.", } ) title = fields.String( metadata={ "label": "Title", "description": "Custom title for your project and ZIM. Default to " "Channel name (of first video if playlists)", } ) playlists_title = fields.String( metadata={ "label": "Playlists title", "description": "Custom title format for individual playlist ZIM", }, data_key="playlists-title", ) description = fields.String( metadata={"label": "Description", "description": "Description for ZIM"} ) playlists_description = fields.String( metadata={ "label": "Playlists description", "description": "Custom description format for individual playlist ZIM", }, data_key="playlists-description", ) creator = fields.String( metadata={ "label": "Content Creator", "description": "Name of content creator. Defaults to Channel name " "or “Youtue Channels”", } ) tags = fields.String( metadata={ "label": "ZIM Tags", "description": "List of Tags for the ZIM file. " "_videos:yes added automatically", } ) metadata_from = fields.String( metadata={ "label": "Metadata JSON", "description": "File path or URL to a JSON file holding custom metadata " "for individual playlists", }, data_key="metadata-from", ) profile = fields.Url( metadata={ "label": "Profile Image", "description": "Custom profile image. Squared. " "Will be resized to 100x100px", } ) banner = fields.Url( metadata={ "label": "Banner Image", "description": "Custom banner image. Will be resized to 1060x175px", } ) main_color = HexColor( metadata={ "label": "Main Color", "description": "Custom color. Hex/HTML syntax (#DEDEDE). " "Default to main color of profile image.", }, data_key="main-color", ) secondary_color = HexColor( metadata={ "label": "Secondary Color", "description": "Custom secondary color. Hex/HTML syntax (#DEDEDE). " "Default to secondary color of profile image.", }, data_key="secondary-color", ) debug = fields.Boolean( truthy=[True], falsy=[False], metadata={"label": "Debug", "description": "Enable verbose output"}, ) @validates_schema def validate(self, data, **kwargs): if data.get("indiv_playlists"): if not data.get("playlists_name"): raise ValidationError("playlists-name required in playlists mode") else: if not data.get("name"): raise ValidationError("name required in normal mode")