示例#1
0
    def handle(self, *args, **options):
        log = Logger(path=__file__,
                     force_verbose=options.get('verbose'),
                     force_silent=options.get('silent'))
        input = Input(path=__file__)

        workshops = get_all_existing_workshops()

        if options.get('name'):
            workshops = get_all_existing_workshops(options.get('name'))

        for _ in workshops:
            slug, path = _
            DATAFILE = f'{path}/{slug}.yml'

            d = get_yaml(DATAFILE, log=log)

            # Separate out data
            imagedata = d.get('image')
            frontmatterdata = d.get('sections').get('frontmatter')
            praxisdata = d.get('sections').get('theory-to-practice')
            lessondata = d.get('sections').get('lessons')

            full_name = d.get('name')
            parent_backend = d.get('parent_backend')
            parent_branch = d.get('parent_branch')
            parent_repo = d.get('parent_repo')

            # 1. ENTER WORKSHOP
            workshop, created = Workshop.objects.update_or_create(
                name=full_name,
                slug=dhri_slugify(full_name),
                defaults={
                    'parent_backend': parent_backend,
                    'parent_branch': parent_branch,
                    'parent_repo': parent_repo,
                    'image_alt': imagedata['alt']
                })

            def _get_valid_name(filename):
                return filename.replace(
                    '@', '')  # TODO: should exist a built-in for django here?

            def _get_media_path(valid_filename):
                return settings.MEDIA_ROOT + '/' + Workshop.image.field.upload_to + valid_filename

            def _get_media_url(valid_filename):
                return Workshop.image.field.upload_to + valid_filename

            def _image_exists(valid_filename):
                media_path = _get_media_path(valid_filename)
                return os.path.exists(media_path)

            def _get_default_image():
                return Workshop.image.field.default

            if imagedata:
                source_file = imagedata['url']
                valid_filename = _get_valid_name(
                    slug + '-' + os.path.basename(imagedata['url']))
                if not _image_exists(valid_filename) or filecmp.cmp(
                        source_file,
                        _get_media_path(valid_filename),
                        shallow=False) == False:
                    try:
                        with open(source_file, 'rb') as f:
                            workshop.image = File(f, name=valid_filename)
                            workshop.save()
                    except FileNotFoundError:
                        log.error(
                            f'File `{source_file}` could not be found. Did you run `python manage.py buildworkshop` before you ran this command?'
                        )
                workshop.image.name = _get_media_url(valid_filename)
                workshop.save()
            else:
                log.warning(
                    f'Workshop {workshop.name} does not have an image assigned to it. Add filepaths to an existing file in your datafile ({DATAFILE}) if you want to update the specific workshop. Default workshop image (`{os.path.basename(_get_default_image())}`) will be assigned.'
                )
                workshop.image.name = Workshop.image.field.default
                workshop.save()

                if not _image_exists(
                        _get_valid_name(os.path.basename(
                            _get_default_image()))):
                    log.warning(
                        f'Default workshop image does not exist. You will want to add it manually to the correct folder: {_get_media_path("")}'
                    )

            # Saving the slug in a format that matches the GitHub repositories (special method `save_slug`)
            workshop.slug = slug
            workshop.save_slug()

            # 2. ENTER FRONTMATTER
            frontmatter, created = Frontmatter.objects.update_or_create(
                workshop=workshop,
                defaults={
                    'abstract': frontmatterdata.get('abstract'),
                    'estimated_time': frontmatterdata.get('estimated_time')
                })

            if frontmatterdata.get('ethical_considerations'):
                for point in frontmatterdata.get('ethical_considerations'):
                    _, created = EthicalConsideration.objects.update_or_create(
                        frontmatter=frontmatter, label=point.get('annotation'))

            if frontmatterdata.get('learning_objectives'):
                for point in frontmatterdata.get('learning_objectives'):
                    _, created = LearningObjective.objects.update_or_create(
                        frontmatter=frontmatter, label=point.get('annotation'))

            for cat in ['projects', 'readings', 'cheat_sheets', 'datasets']:
                if frontmatterdata.get(cat):
                    category, add_field = None, None
                    if cat == 'projects':
                        category = Resource.PROJECT
                        add_field = frontmatter.projects
                    elif cat == 'readings':
                        category = Resource.READING
                        add_field = frontmatter.readings
                    elif cat == 'cheat_sheets':
                        category = Resource.CHEATSHEET
                        add_field = frontmatter.cheat_sheets
                    elif cat == 'datasets':
                        category = Resource.DATASET
                        add_field = frontmatter.datasets

                    for point in frontmatterdata.get(cat):
                        if not add_field or not category:
                            log.error(
                                'Cannot interpret category `{cat}`. Make sure the script is correct and corresponds with the database structure.'
                            )

                        obj, created = Resource.objects.update_or_create(
                            category=category,
                            title=point.get('linked_text'),
                            url=point.get('url'),
                            annotation=point.get('annotation'))
                        if obj not in add_field.all():
                            add_field.add(obj)

            if frontmatterdata.get('contributors'):
                for point in frontmatterdata.get('contributors'):
                    profile = None
                    try:
                        profile = Profile.objects.get(
                            user__first_name=point.get('first_name'),
                            user__last_name=point.get('last_name'))
                    except:
                        for p in Profile.objects.all():
                            if f'{p.user.first_name} {p.user.last_name}' == point.get(
                                    'full_name'):
                                profile = p
                                log.info(
                                    f'In-depth search revealed a profile matching the full name for `{workshop.name}` contributor `{point.get("first_name")} {point.get("last_name")}`. It may or may not be the correct person, so make sure you verify it manually.'
                                )

                        if not p:
                            log.info(
                                f'Could not find user profile on the curriculum website for contributor `{point.get("full_name")}` (searching by first name `{point.get("first_name")}` and last name `{point.get("last_name")}`).'
                            )

                    contributor, created = Contributor.objects.update_or_create(
                        first_name=point.get('first_name'),
                        last_name=point.get('last_name'),
                        defaults={
                            'url': point.get('link'),
                            'profile': profile
                        })

                    collaboration, created = Collaboration.objects.update_or_create(
                        frontmatter=frontmatter,
                        contributor=contributor,
                        defaults={
                            'current': point.get('current'),
                            'role': point.get('role')
                        })

            # 3. ENTER PRAXIS
            praxis, created = Praxis.objects.update_or_create(
                workshop=workshop,
                defaults={
                    'intro': praxisdata.get('intro'),
                })

            for cat in ['discussion_questions', 'next_steps']:
                if praxisdata.get(cat):
                    obj = None
                    if cat == 'discussion_questions':
                        obj = DiscussionQuestion
                    elif cat == 'next_steps':
                        obj = NextStep

                    for order, point in enumerate(
                            praxisdata[cat], start=1
                    ):  # TODO: Should we pull out order manually here? Not necessary, right?
                        obj.objects.update_or_create(
                            praxis=praxis,
                            label=point.get('annotation'),
                            defaults={'order': order})

            for cat in ['further_readings', 'further_projects', 'tutorials']:
                if praxisdata.get(cat):
                    category, add_field = None, None
                    if cat == 'further_readings':
                        category = Resource.READING
                        add_field = praxis.further_readings
                    elif cat == 'further_projects':
                        category = Resource.PROJECT
                        add_field = praxis.further_projects
                    elif cat == 'tutorials':
                        category = Resource.TUTORIAL
                        add_field = praxis.tutorials

                    for point in praxisdata.get(cat):
                        if not add_field or not category:
                            log.error(
                                'Cannot interpret category `{cat}`. Make sure the script is correct and corresponds with the database structure.'
                            )

                        try:
                            obj, created = Resource.objects.update_or_create(
                                category=category,
                                title=point.get('linked_text'),
                                url=point.get('url'),
                                annotation=point.get('annotation'))
                            if obj not in add_field.all():
                                add_field.add(obj)
                        except IntegrityError:
                            obj = Resource.objects.get(
                                category=category,
                                title=point.get('linked_text'),
                                url=point.get('url'),
                            )
                            obj.annotation = point.get('annotation')
                            if obj not in add_field.all():
                                add_field.add(obj)
                            log.info(
                                f'Another resource with the same URL, title, and category already existed so updated with a new annotation: **{point.get("linked_text")} (old)**\n{point.get("annotation")}\n-------\n**{obj.title} (new)**\n{obj.annotation}'
                            )

            # 4. ENTER LESSONS

            for lessoninfo in lessondata:
                lesson, created = Lesson.objects.update_or_create(
                    workshop=workshop,
                    title=lessoninfo.get('header'),
                    defaults={
                        'order': lessoninfo.get('order'),
                        'text': lessoninfo.get('content'),
                    })

                #print(lesson)
                for image in lessoninfo.get('lesson_images'):
                    #print('image time!')
                    LessonImage.objects.update_or_create(url=image.get('path'),
                                                         lesson=lesson,
                                                         alt=image.get('alt'))

                if not lessoninfo.get('challenge') and lessoninfo.get(
                        'solution'):
                    log.error(
                        f'Lesson `{lesson.title}` (in workshop {workshop}) has a solution but no challenge. Correct the files on GitHub and rerun the buildworkshop command and then re-attempt the ingestworkshop command. Alternatively, you can change the datafile content manually.'
                    )

                if lessoninfo.get('challenge'):
                    challenge, created = Challenge.objects.update_or_create(
                        lesson=lesson,
                        title=lessoninfo['challenge'].get('header'),
                        defaults={
                            'text': lessoninfo['challenge'].get('content')
                        })

                    if lessoninfo.get('solution'):
                        solution, created = Solution.objects.update_or_create(
                            challenge=challenge,
                            title=lessoninfo['solution'].get('header'),
                            defaults={
                                'text': lessoninfo['solution'].get('content')
                            })

                if lessoninfo.get('evaluation'):
                    evaluation, created = Evaluation.objects.get_or_create(
                        lesson=lesson)
                    for point in lessoninfo['evaluation'].get('content'):
                        question, created = Question.objects.update_or_create(
                            evaluation=evaluation, label=point.get('question'))
                        for is_correct, answers in point.get(
                                'answers').items():
                            is_correct = is_correct == 'correct'
                            for answertext in answers:
                                answer, created = Answer.objects.update_or_create(
                                    question=question,
                                    label=answertext,
                                    defaults={'is_correct': is_correct})

                if lessoninfo.get('keywords'):
                    # lessoninfo['keywords'].get('header') # TODO: not doing anything with keyword header yet
                    for keyword in lessoninfo['keywords'].get('content'):
                        terms = Term.objects.filter(term__iexact=keyword)
                        if terms.count() == 1:
                            lesson.terms.add(terms[0])
                        elif terms.count() == 0:
                            log.warning(
                                f'Keyword `{keyword}` (used in lesson `{lesson.title}`, workshop `{workshop}` cannot be found in the existing glossary. Are you sure it is in the glossary and synchronized with the database? Make sure the data file for glossary is available ({GLOSSARY_FILE}) and that the term is defined in the file. Then run python manage.py ingestglossary.'
                            )
                        else:
                            log.error(
                                f'Multiple definitions of `{keyword}` exists in the database. Try resetting the glossary and rerun python manage.py ingestglossary before you run the ingestworkshop command again.'
                            )

        log.log('Added/updated workshops: ' +
                ', '.join([x[0] for x in workshops]))
        if not options.get('no_reminder'):
            log.log(
                'Do not forget to run `ingestprerequisites` after running the `ingestworkshop` command (without the --name flag).',
                color='yellow')

        if log._save(data='ingestworkshop', name='warnings.md',
                     warnings=True) or log._save(data='ingestworkshop',
                                                 name='logs.md',
                                                 warnings=False,
                                                 logs=True) or log._save(
                                                     data='ingestworkshop',
                                                     name='info.md',
                                                     warnings=False,
                                                     logs=False,
                                                     info=True):
            log.log(
                f'Log files with any warnings and logging information is now available in: `{log.LOG_DIR}`',
                force=True)
示例#2
0
class GitHubParser():
    def __init__(self, string: str = None, log=None):
        if log == None:
            self.log = Logger(name='github-parser')
        else:
            self.log = log

    def convert(self, string):
        c = GitHubParserCache(string=string)
        return (c.data.get('markdown', '').strip())

    def strip_from_p(self, html):
        soup = BeautifulSoup(html, 'lxml')
        if soup.p:
            return ''.join([str(x) for x in soup.p.children])
        else:
            return html

    def _fix_link(self, tag):
        def find_workshop(elements):
            if elements[-1] == 'DHRI-Curriculum':
                return '{GH_CURRICULUM}'
            for element in elements:
                for workshop in [x[0] for x in AUTO_REPOS]:
                    if workshop == element: return workshop
            return ''

        elements = tag['href'].split('/')

        if 'http:' in elements or 'https:' in elements:
            link_type = 'absolute'
        elif elements[0].startswith('#'):
            link_type = 'local'
        else:
            link_type = 'relative'

        raw_file = False
        if link_type == 'absolute':
            if 'DHRI-Curriculum' in elements:
                if 'glossary' in elements and 'terms' in elements:
                    term = elements[-1].replace('.md', '')
                    self.log.info(
                        f'Found link to an **glossary term** and adding shortcut link to: curriculum.dhinstitutes.org/shortcuts/term/{term}'
                    )
                    tag['href'] = f'https://curriculum.dhinstitutes.org/shortcuts/term/{term}'
                elif 'insights' in elements and 'pages' in elements:
                    insight = elements[-1].replace(".md", "")
                    self.log.info(
                        f'Found link to an **insight** and adding shortcut link to: curriculum.dhinstitutes.org/shortcuts/insight/{insight}'
                    )
                    tag['href'] = f'https://curriculum.dhinstitutes.org/shortcuts/insight/{insight}'
                elif 'install' in elements and 'guides' in elements:
                    install = elements[-1].replace(".md", "")
                    self.log.info(
                        f'Found link to an **installation** and adding shortcut link to: curriculum.dhinstitutes.org/shortcuts/install/{install}'
                    )
                    tag['href'] = f'https://curriculum.dhinstitutes.org/shortcuts/install/{install}'
                elif 'raw.githubusercontent.com' in elements:
                    raw_link = '/'.join(elements)
                    self.log.info(
                        f'Found link to **raw file** and will not change link: {raw_link}'
                    )
                else:
                    workshop = find_workshop(elements)
                    if workshop == '{GH_CURRICULUM}':
                        gh_link = '/'.join(elements)
                        self.log.info(
                            f'Link found to **the DHRI Curriculum on GitHub**, linking to it: {gh_link}'
                        )
                    elif workshop == '':
                        gh_link = '/'.join(elements)
                        self.log.warning(
                            f'Found link to workshop, which is not currently being loaded into the website, will therefore redirect to **workshop on GitHub**: {gh_link}'
                        )
                    else:
                        self.log.info(
                            f'Found link to **workshop** which (will) exist(s) on website, so changing to that: curriculum.dhinstitutes.org/workshops/{workshop}'
                        )
                        tag['href'] = f'https://curriculum.dhinstitutes.org/shortcuts/workshop/{workshop}'
            else:
                pass  # print(tag['href'])
        return tag

    def fix_html(self, text):
        def has_children(tag):
            children = []
            try:
                tag.children
                children = [x for x in tag.children]
            except:
                pass
            return children

        if not text:
            return ''

        multiline = False
        if '\n' in text:
            multiline = True

        # Make text into HTML...
        text = self.convert(text)
        text = smartypants.smartypants(text)  # curly quote it

        soup = BeautifulSoup(text, 'lxml')

        for tag in soup.descendants:
            if tag.name == 'a':
                # if element.text == None: # TODO: Drop links that have no text
                tag = self._fix_link(tag)

        if not multiline:
            if len([x for x in soup.body.children]) == 1 and soup.body.p:
                # We only have one paragraph, so return the _text only_ from the p
                return ''.join([str(x) for x in soup.body.p.children])
            else:
                # We have multiline
                html_string = ''.join(
                    [str(x) for x in soup.html.body.children])
        else:
            html_string = ''.join([str(x) for x in soup.html.body.children])

        return html_string

    def quote_converter(self, string, reverse=False):
        """Takes a string and returns it with dumb quotes, single and double,
        replaced by smart quotes. Accounts for the possibility of HTML tags
        within the string."""

        if string == None:
            return None

        if not isinstance(string, str):
            print('Not a string:')
            print(string)
            exit()

        if string == '':
            return string

        if reverse == True:
            string = string.replace('“', '"').replace('”', '"')
            string = string.replace('‘', "'").replace("’", "'")
            return string

        # Find dumb double quotes coming directly after letters or punctuation,
        # and replace them with right double quotes.
        string = re.sub(r'([a-zA-Z0-9.,?!;:\'\"])"', r'\1”', string)
        # Find any remaining dumb double quotes and replace them with
        # left double quotes.
        string = string.replace('"', '“')

        # Follow the same process with dumb/smart single quotes
        string = re.sub(r"([a-zA-Z0-9.,?!;:\"\'])'", r'\1’', string)
        string = string.replace("'", '‘')

        return string
示例#3
0
    def handle(self, *args, **options):
        log = Logger(path=__file__,
                     force_verbose=options.get('verbose'),
                     force_silent=options.get('silent'))
        input = Input(path=__file__)

        test_for_required_files(REQUIRED_PATHS=REQUIRED_PATHS, log=log)
        data = get_yaml(FULL_PATH, log=log)

        for termdata in data:
            try:
                term, created = Term.objects.get_or_create(
                    term=termdata.get('term'))
            except IntegrityError:
                try:
                    term = Term.objects.get(
                        slug=dhri_slugify(termdata.get('term')))
                except:
                    log.error('An unknown error occurred. Try')

            term.term = termdata.get('term')
            term.explication = termdata.get('explication')
            term.save()

            if not created and not options.get('force'):
                choice = input.ask(
                    f'Term `{termdata.get("term")}` already exists. Update with new definition? [y/N]'
                )
                if choice.lower() != 'y':
                    continue

            Term.objects.filter(term=termdata.get('term')).update(
                explication=termdata.get('explication'))

            term.refresh_from_db()

            for cat in ['tutorials', 'readings', 'cheat_sheets']:
                if termdata.get(cat):
                    category, add_field = None, None
                    if cat == 'tutorials':
                        category = Resource.TUTORIAL
                        add_field = term.tutorials
                    elif cat == 'readings':
                        category = Resource.READING
                        add_field = term.readings
                    elif cat == 'cheat_sheets':
                        category = Resource.CHEATSHEET
                        add_field = term.cheat_sheets

                    for point in termdata.get(cat):
                        if not add_field or not category:
                            log.error(
                                'Cannot interpret category `{cat}`. Make sure the script is correct and corresponds with the database structure.'
                            )

                        try:
                            obj, created = Resource.objects.update_or_create(
                                category=category,
                                title=point.get('linked_text'),
                                url=point.get('url'),
                                annotation=point.get('annotation'))
                            if obj not in add_field.all():
                                add_field.add(obj)
                        except IntegrityError:
                            obj = Resource.objects.get(
                                category=category,
                                title=point.get('linked_text'),
                                url=point.get('url'),
                            )
                            obj.annotation = point.get('annotation')
                            if obj not in add_field.all():
                                add_field.add(obj)
                            log.info(
                                f'Another resource with the same URL, title, and category already existed so updated with a new annotation: **{point.get("linked_text")} (old)**\n{point.get("annotation")}\n-------\n**{obj.title} (new)**\n{obj.annotation}'
                            )

        log.log('Added/updated terms: ' +
                ', '.join([x.get('term') for x in data]))

        if log._save(data='ingestglossary', name='warnings.md',
                     warnings=True) or log._save(data='ingestglossary',
                                                 name='logs.md',
                                                 warnings=False,
                                                 logs=True) or log._save(
                                                     data='ingestglossary',
                                                     name='info.md',
                                                     warnings=False,
                                                     logs=False,
                                                     info=True):
            log.log(
                f'Log files with any warnings and logging information is now available in: `{log.LOG_DIR}`',
                force=True)
示例#4
0
    def handle(self, *args, **options):
        log = Logger(path=__file__,
                     force_verbose=options.get('verbose'),
                     force_silent=options.get('silent'))
        input = Input(path=__file__)

        test_for_required_files(REQUIRED_PATHS=REQUIRED_PATHS, log=log)
        data = get_yaml(FULL_PATH, log=log)

        for installdata in data:
            for operating_system in installdata.get('instructions'):
                software, created = Software.objects.get_or_create(
                    operating_system=operating_system,
                    software=installdata.get('software'))
                instruction, created = Instruction.objects.update_or_create(
                    software=software,
                    defaults={
                        'what': installdata.get('what'),
                        'why': installdata.get('why')
                    })

                original_file = installdata.get('image')
                if original_file:
                    if instruction_image_exists(original_file) and filecmp.cmp(
                            original_file,
                            get_instruction_image_path(original_file),
                            shallow=False) == True:
                        log.log(
                            f'Instruction image already exists. Ensuring path is in database: `{get_instruction_image_path(original_file)}`'
                        )
                        instruction.image.name = get_instruction_image_path(
                            original_file, True)
                        instruction.save()
                    else:
                        with open(original_file, 'rb') as f:
                            instruction.image = File(f,
                                                     name=os.path.basename(
                                                         f.name))
                            instruction.save()
                        if filecmp.cmp(
                                original_file,
                                get_instruction_image_path(original_file)):
                            log.info(
                                f'Instruction image has been updated so being copied to media path: `{get_instruction_image_path(original_file)}`'
                            )
                        else:
                            log.info(
                                f'Instruction image has been copied to media path: `{get_instruction_image_path(original_file)}`'
                            )
                else:
                    log.warning(
                        f'An image for `{software}` does not exist. A default image will be saved instead. If you want a particular image for the installation instructions, follow the documentation.'
                    )
                    instruction.image.name = get_default_instruction_image()
                    instruction.save()

                for stepdata in installdata.get('instructions').get(
                        operating_system):
                    step, created = Step.objects.update_or_create(
                        instruction=instruction,
                        order=stepdata.get('step'),
                        defaults={
                            'header': stepdata.get('header'),
                            'text': stepdata.get('html')
                        })

                    for order, d in enumerate(stepdata.get('screenshots'),
                                              start=1):
                        path = d['path']
                        alt_text = d['alt']
                        if os.path.exists(get_screenshot_media_path(
                                path)) and filecmp.cmp(
                                    path,
                                    get_screenshot_media_path(path),
                                    shallow=False) == True:
                            s, _ = Screenshot.objects.get_or_create(
                                step=step, alt_text=alt_text, order=order)
                            s.image = get_screenshot_media_path(
                                path, relative_to_upload_field=True)
                            s.save()
                            log.log(
                                f'Screenshot already exists: `{get_screenshot_media_path(path)}`'
                            )
                        else:
                            s, _ = Screenshot.objects.get_or_create(
                                step=step, alt_text=alt_text, order=order)
                            with open(path, 'rb') as f:
                                s.image = File(f,
                                               name=os.path.basename(f.name))
                                s.save()
                            if filecmp.cmp(path,
                                           get_screenshot_media_path(path),
                                           shallow=False) == False:
                                log.log(
                                    f'Screenshot was updated so re-saved: `{get_screenshot_media_path(path)}`'
                                )
                            else:
                                log.log(
                                    f'New screenshot saved: `{get_screenshot_media_path(path)}`'
                                )

        log.log('Added/updated installation instructions: ' +
                ', '.join([f'{x["software"]}' for x in data]))

        if log._save(data='ingestinstalls', name='warnings.md',
                     warnings=True) or log._save(data='ingestinstalls',
                                                 name='logs.md',
                                                 warnings=False,
                                                 logs=True) or log._save(
                                                     data='ingestinstalls',
                                                     name='info.md',
                                                     warnings=False,
                                                     logs=False,
                                                     info=True):
            log.log(
                f'Log files with any warnings and logging information is now available in: `{log.LOG_DIR}`',
                force=True)
    def handle(self, *args, **options):
        log = Logger(path=__file__,
                     force_verbose=options.get('verbose'),
                     force_silent=options.get('silent'))
        input = Input(path=__file__)

        test_for_required_files(REQUIRED_PATHS=REQUIRED_PATHS, log=log)
        data = get_yaml(FULL_PATH, log=log)

        for insightdata in data:
            # TODO: Insights and Software are also connected in a database table (insight_insight_software) but this relationship is not developed yet.
            insight, created = Insight.objects.update_or_create(
                title=insightdata.get('insight'),
                defaults={
                    'text': insightdata.get('introduction'),
                    'image_alt': insightdata.get('image').get('alt')
                })

            original_file = insightdata.get('image').get('url')
            if original_file:
                if insight_image_exists(original_file) and filecmp.cmp(
                        original_file,
                        get_insight_image_path(original_file),
                        shallow=False) == True:
                    log.log(
                        f'Insight image already exists. Connecting existing paths to database: `{get_insight_image_path(original_file)}`'
                    )
                    insight.image.name = get_insight_image_path(
                        original_file, True)
                    insight.save()
                else:
                    with open(original_file, 'rb') as f:
                        insight.image = File(f,
                                             name=self.os.path.basename(
                                                 f.name))
                        insight.save()

                    if filecmp.cmp(original_file,
                                   get_insight_image_path(original_file),
                                   shallow=False):
                        log.info(
                            f'Insight image has been updated and thus was copied to the media path: `{get_insight_image_path(original_file)}`'
                        )
                    else:
                        log.info(
                            f'Insight image was not found and is copied to media path: `{get_insight_image_path(original_file)}`'
                        )
            else:
                log.warning(
                    f'An image for `{insight}` does not exist. A default image will be saved instead. If you want a particular image for the installation instructions, follow the documentation.'
                )
                insight.image.name = get_default_insight_image()
                insight.save()

            for sectiondata in insightdata.get('sections', []):
                title = sectiondata
                sectiondata = insightdata.get('sections').get(sectiondata)
                section, created = Section.objects.update_or_create(
                    insight=insight,
                    title=title,
                    defaults={
                        'order': sectiondata.get('order'),
                        'text': sectiondata.get('content')
                    })

            for operating_system, osdata in insightdata.get(
                    'os_specific').items():
                related_section = Section.objects.get(
                    title=osdata.get('related_section'))

                OperatingSystemSpecificSection.objects.update_or_create(
                    section=related_section,
                    operating_system=operating_system,
                    defaults={'text': osdata.get('content')})

        log.log('Added/updated insights: ' +
                ', '.join([x.get("insight") for x in data]))

        if log._save(data='ingestinsights', name='warnings.md',
                     warnings=True) or log._save(data='ingestinsights',
                                                 name='logs.md',
                                                 warnings=False,
                                                 logs=True) or log._save(
                                                     data='ingestinsights',
                                                     name='info.md',
                                                     warnings=False,
                                                     logs=False,
                                                     info=True):
            log.log(
                f'Log files with any warnings and logging information is now available in: `{log.LOG_DIR}`',
                force=True)