def handle(self, *args, **options): if not settings.CONTENT_SYNC_PIPELINE_BACKEND: self.stderr.write("Pipeline backend is not configured") return self.stdout.write("Creating theme asset pipeline") is_verbose = options["verbosity"] > 1 unpause = options["unpause"] if is_verbose: self.stdout.write(f"Upserting theme assets pipeline") start = now_in_utc() task = upsert_theme_assets_pipeline.delay(unpause=unpause) self.stdout.write(f"Started celery task {task} to upsert theme assets pipeline") self.stdout.write("Waiting on task...") result = task.get() if result != True: raise CommandError(f"Some errors occurred: {result}") total_seconds = (now_in_utc() - start).total_seconds() self.stdout.write( "Pipeline upsert finished, took {} seconds".format(total_seconds) )
def handle(self, *args, **options): if not settings.CONTENT_SYNC_PIPELINE_BACKEND: self.stderr.write("Pipeline backend is not configured") return self.stdout.write("Creating website pipelines") filter_str = options["filter"].lower() starter_str = options["starter"] source_str = options["source"] chunk_size = int(options["chunk_size"]) create_backend = options["create_backend"] is_verbose = options["verbosity"] > 1 unpause = options["unpause"] if filter_str: website_qset = Website.objects.filter( Q(name__startswith=filter_str) | Q(title__startswith=filter_str) ) else: website_qset = Website.objects.all() if starter_str: website_qset = website_qset.filter(starter__slug=starter_str) if source_str: website_qset = website_qset.filter(source=source_str) website_names = list(website_qset.values_list("name", flat=True)) if is_verbose: self.stdout.write( f"Upserting pipelines for the following sites: {','.join(website_names)}" ) start = now_in_utc() task = upsert_pipelines.delay( website_names, chunk_size=chunk_size, create_backend=create_backend, unpause=unpause, ) self.stdout.write( f"Started celery task {task} to upsert pipelines for {len(website_names)} sites" ) self.stdout.write("Waiting on task...") result = task.get() if set(result) != {True}: raise CommandError(f"Some errors occurred: {result}") total_seconds = (now_in_utc() - start).total_seconds() self.stdout.write( "Pipeline upserts finished, took {} seconds".format(total_seconds) )
def handle(self, *args, **options): self.stdout.write(f"Creating website permission groups") start = now_in_utc() filter_str = options["filter"].lower() is_verbose = options["verbosity"] > 1 total_websites = 0 total_created = 0 total_updated = 0 total_owners = 0 # Redo global groups too in case permissions changed if not filter_str: created, updated = create_global_groups() self.stdout.write( f"Global groups: created {created} groups, updated {updated} groups" ) if not options["only-global"]: if filter_str: website_qset = Website.objects.filter( Q(name__icontains=filter_str) | Q(title__icontains=filter_str) ) else: website_qset = Website.objects.all() for website in website_qset.iterator(): created, updated, owner_updated = setup_website_groups_permissions( website ) total_websites += 1 total_created += created total_updated += updated total_owners += 1 if owner_updated else 0 if is_verbose: self.stdout.write( f"{website.name} groups: created {created}, updated {updated}, owner updated: {str(owner_updated)}" ) total_seconds = (now_in_utc() - start).total_seconds() self.stdout.write( "Creation of website permission groups finished, took {} seconds".format( total_seconds ) ) self.stdout.write( f"{total_websites} websites processed, {total_created} groups created, {total_updated} groups updated, {total_owners} updated" )
def handle(self, *args, **options): prefix = options["prefix"] if prefix: # make sure it ends with a '/' prefix = prefix.rstrip("/") + "/" bucket_name = options["bucket"] filter_str = options["filter"] limit = options["limit"] delete_unpublished = options["delete_unpublished"] delete_from_git = options["delete_from_git"] if options["list"] is True: course_paths = list( fetch_ocw2hugo_course_paths(bucket_name, prefix=prefix, filter_list=[filter_str])) pydoc.pager("\n".join(course_paths)) return self.stdout.write(f"Importing OCW courses from '{bucket_name}' bucket") start = now_in_utc() task = import_ocw2hugo_courses.delay( bucket_name=bucket_name, prefix=prefix, filter_str=filter_str, limit=limit, delete_unpublished=delete_unpublished, chunk_size=options["chunks"], ) self.stdout.write(f"Starting task {task}...") task.get() total_seconds = (now_in_utc() - start).total_seconds() self.stdout.write("OCW course import finished, took {} seconds".format( total_seconds)) if settings.CONTENT_SYNC_BACKEND and not options["skip_sync"]: self.stdout.write( "Syncing all unsynced courses to the designated backend") start = now_in_utc() task = sync_unsynced_websites.delay( create_backends=True, delete=delete_from_git, ) self.stdout.write(f"Starting task {task}...") task.get() total_seconds = (now_in_utc() - start).total_seconds() self.stdout.write( "Backend sync finished, took {} seconds".format(total_seconds))
def test_websites_endpoint_list(drf_client, filter_by_type, websites, settings): """Test new websites endpoint for lists""" website_type = settings.OCW_IMPORT_STARTER_SLUG if filter_by_type else None filter_by_type = website_type is not None now = now_in_utc() expected_websites = websites.courses if filter_by_type: resp = drf_client.get(reverse("websites_api-list"), {"type": website_type}) assert resp.data.get("count") == 3 else: expected_websites.extend(websites.noncourses) resp = drf_client.get(reverse("websites_api-list")) assert resp.data.get("count") == 5 for idx, site in enumerate( sorted(expected_websites, reverse=True, key=lambda site: site.publish_date)): assert resp.data.get("results")[idx]["uuid"] == str(site.uuid) assert resp.data.get("results")[idx]["starter"]["slug"] == ( settings.OCW_IMPORT_STARTER_SLUG if filter_by_type else site.starter.slug) assert resp.data.get("results")[idx]["publish_date"] <= now.strftime( ISO_8601_FORMAT)
def pipeline_status(self, request, name=None): """Process webhook requests from concourse pipeline runs""" website = get_object_or_404(Website, name=name) data = request.data version = data["version"] publish_status = data.get("status") update_website_status(website, version, publish_status, now_in_utc()) return Response(status=200)
def handle(self, *args, **options): self.stdout.write("Resetting synced checksums to null") start = now_in_utc() type_str = options["type"].lower() create_backends = options["create_backends"] filter_str = options["filter"].lower() starter_str = options["starter"].lower() source_str = options["source"].lower() skip_sync = options["skip_sync"] content_qset = ContentSyncState.objects.exclude(synced_checksum__isnull=True) if type_str: content_qset = content_qset.filter(Q(content__type=type_str)) if filter_str: content_qset = content_qset.filter( Q(content__website__name__startswith=filter_str) | Q(content__website__short_id__startswith=filter_str) ) if starter_str: content_qset = content_qset.filter( content__website__starter__slug=starter_str ) if source_str: content_qset = content_qset.filter(content__website__source=source_str) content_qset.update(synced_checksum=None, data=None) total_seconds = (now_in_utc() - start).total_seconds() self.stdout.write( "Clearing of content sync state complete, took {} seconds".format( total_seconds ) ) if settings.CONTENT_SYNC_BACKEND and not skip_sync: self.stdout.write("Syncing all unsynced websites to the designated backend") start = now_in_utc() task = sync_unsynced_websites.delay(create_backends=create_backends) self.stdout.write(f"Starting task {task}...") task.get() total_seconds = (now_in_utc() - start).total_seconds() self.stdout.write( "Backend sync finished, took {} seconds".format(total_seconds) )
def handle(self, *args, **options): self.stdout.write("Fixing repos for imported OCW sites") start = now_in_utc() errors = 0 websites = (Website.objects.exclude(short_id__endswith="-2").filter( source="ocw-import", short_id__regex=r".+\-\d{1,2}$").order_by("name")) self.stdout.write(f"Repairing repos for {websites.count()} sites") for website in websites: try: with transaction.atomic(): short_id_secs = website.short_id.split("-") base_repo, idx = ("-".join(short_id_secs[:-1]), short_id_secs[-1]) website.short_id = f"{base_repo}-2" website.save() ContentSyncState.objects.filter( content__website=website).update(synced_checksum=None, data=None) backend = get_sync_backend(website) backend.sync_all_content_to_backend() get_sync_pipeline(website).upsert_pipeline() for i in range(3, int(idx) + 1): try: backend.api.org.get_repo( f"{base_repo}-{i}").delete() except GithubException as ge: if ge.status != 404: raise except Exception as exc: # pylint:disable=broad-except self.stderr.write( f"Error occurred repairing repo for {website.name}: {exc}") errors += 1 total_seconds = (now_in_utc() - start).total_seconds() if errors == 0: self.stdout.write( f"Repo repair finished, took {total_seconds} seconds") else: self.stderr.write( f"Repo repair finished with {errors} errors, took {total_seconds} seconds" )
def handle(self, *args, **options): prefix = options["prefix"] if prefix: # make sure it ends with a '/' prefix = prefix.rstrip("/") + "/" bucket_name = options["bucket"] filter_json = options["filter_json"] limit = options["limit"] create_new = options["create_new"] content_field = options["content_field"] if filter_json: with open(filter_json) as input_file: filter_list = json.load(input_file) elif options["filter"]: filter_list = [ name.strip() for name in options["filter"].split(",") if name ] else: filter_list = None if not create_new and not content_field: self.stderr.write( "Either --content-field or --create-new is required") self.stdout.write(f"Updating OCW courses from '{bucket_name}' bucket") start = now_in_utc() task = update_ocw_resource_data.delay( bucket_name=bucket_name, prefix=prefix, filter_list=filter_list, limit=limit, chunk_size=options["chunks"], content_field=options["content_field"], create_new_content=create_new, ) self.stdout.write(f"Starting task {task}...") task.get() total_seconds = (now_in_utc() - start).total_seconds() self.stdout.write( "OCW Content Update finished, took {} seconds".format( total_seconds))
def get_queryset(self): """ Generate a QuerySet for fetching websites. """ ordering = self.request.query_params.get("sort", "-updated_on") website_type = self.request.query_params.get("type", None) search = self.request.query_params.get("search", None) resourcetype = self.request.query_params.get("resourcetype", None) published = self.request.query_params.get("published", None) user = self.request.user if self.request.user.is_anonymous: # Anonymous users should get a list of all published websites (used for ocw-www carousel) ordering = "-publish_date" queryset = Website.objects.filter( publish_date__lte=now_in_utc(), # Replace this after imported ocw sites have metadata stored in WebsiteContent objects metadata__isnull=False, ) elif is_global_admin(user): # Global admins should get a list of all websites, published or not. queryset = Website.objects.all() else: # Other authenticated users should get a list of websites they are editors/admins/owners for. queryset = get_objects_for_user(user, constants.PERMISSION_VIEW) if search is not None and search != "": # search query param is used in react-select typeahead, and should # match on the title, name, and short_id search_filter = Q(search=SearchQuery(search)) | Q( search__icontains=search) if "." in search: # postgres text search behaves oddly with periods but not dashes search_filter = search_filter | Q( search=SearchQuery(search.replace(".", "-"))) queryset = queryset.annotate(search=SearchVector( "name", "title", "short_id", )).filter(search_filter) if resourcetype is not None: queryset = queryset.filter(metadata__resourcetype=resourcetype) if website_type is not None: queryset = queryset.filter(starter__slug=website_type) if published is not None: published = _parse_bool(published) queryset = queryset.filter(publish_date__isnull=not published) return queryset.select_related("starter").order_by(ordering)
def handle(self, *args, **options): if not settings.CONTENT_SYNC_PIPELINE_BACKEND: self.stderr.write("Pipeline backend is not configured") return self.stdout.write("Creating mass publish pipelines") unpause = options["unpause"] start = now_in_utc() for version in (VERSION_DRAFT, VERSION_LIVE): pipeline = get_mass_publish_pipeline(version) pipeline.upsert_pipeline() self.stdout.write(f"Created {version} mass publish pipeline") if unpause: pipeline.unpause() self.stdout.write(f"Unpaused {version} mass publish pipeline") total_seconds = (now_in_utc() - start).total_seconds() self.stdout.write( "Pipeline upsert finished, took {} seconds".format(total_seconds))
def publish(self, request, name=None): """Trigger a publish task for the website""" try: website = self.get_object() Website.objects.filter(pk=website.pk).update( has_unpublished_live=False, live_publish_status=constants.PUBLISH_STATUS_NOT_STARTED, live_publish_status_updated_on=now_in_utc(), latest_build_id_live=None, live_last_published_by=request.user, ) trigger_publish(website.name, VERSION_LIVE) return Response(status=200) except Exception as exc: # pylint: disable=broad-except log.exception("Error publishing %s", name) return Response(status=500, data={"details": str(exc)})
def handle(self, *args, **options): if not settings.CONTENT_SYNC_PIPELINE_BACKEND: self.stderr.write( "Pipeline backend is not configured for publishing") return filter_json = options["filter_json"] version = options["version"].lower() starter_str = options["starter"] source_str = options["source"] chunk_size = int(options["chunk_size"]) prepublish = options["prepublish"] is_verbose = options["verbosity"] > 1 if filter_json: with open(filter_json) as input_file: filter_list = json.load(input_file) else: filter_list = [ name.strip() for name in options["filter"].split(",") if name ] website_qset = Website.objects.filter( starter__source=STARTER_SOURCE_GITHUB) if filter_list: website_qset = website_qset.filter(name__in=filter_list) if starter_str: website_qset = website_qset.filter(starter__slug=starter_str) if source_str: website_qset = website_qset.filter(source=source_str) if source_str != WEBSITE_SOURCE_OCW_IMPORT: # do not publish any unpublished sites if version == VERSION_DRAFT: website_qset = website_qset.exclude( draft_publish_status__isnull=True) else: website_qset = website_qset.exclude( live_publish_status__isnull=True) website_names = list(website_qset.values_list("name", flat=True)) self.stdout.write( f"Triggering website {version} builds, source is {source_str}") start = now_in_utc() task = publish_websites.delay(website_names, version, chunk_size=chunk_size, prepublish=prepublish) self.stdout.write( f"Started task {task} to publish {version} versions for {len(website_names)} sites, source={source_str}" ) if is_verbose: self.stdout.write(f"{','.join(website_names)}") self.stdout.write("Waiting on task...") result = task.get() if set(result) != {True}: raise CommandError( "Some errors occurred, check sentry for details") total_seconds = (now_in_utc() - start).total_seconds() self.stdout.write( "Publishing tasks finished, took {} seconds".format(total_seconds))