def _build_tree(node, sourcetree): for child_source_node in sourcetree: try: main_file = child_source_node['files'][0] if 'files' in child_source_node else {} kind = guess_content_kind(path=main_file.get('path'), web_video_data=main_file.get('youtube_id') or main_file.get('web_url'), questions=child_source_node.get("questions")) except UnknownContentKindError: continue if kind == content_kinds.TOPIC: child_node = nodes.TopicNode( source_id=child_source_node["id"], title=child_source_node["title"], author=child_source_node.get("author"), description=child_source_node.get("description"), thumbnail=child_source_node.get("thumbnail"), ) node.add_child(child_node) source_tree_children = child_source_node.get("children", []) _build_tree(child_node, source_tree_children) elif kind == content_kinds.EXERCISE: # node_data = json.dumps(child_source_node) if int(len(child_source_node['questions'])) < 5: exercise_data = { 'mastery_model': exercises.DO_ALL, 'randomize': True, } else: exercise_data={ 'mastery_model': exercises.M_OF_N, 'randomize': True, 'm': 4, 'n': 5, } child_node = nodes.ExerciseNode( source_id=child_source_node["id"], title=child_source_node["title"], license=child_source_node.get("license"), author=child_source_node.get("author"), description=child_source_node.get("description"), exercise_data=exercise_data, copyright_holder='GreyKite Technologies Pvt. Ltd.', thumbnail=child_source_node.get("thumbnail"), ) add_files(child_node, child_source_node.get("files") or []) for q in child_source_node.get("questions"): question = create_question(q) child_node.add_question(question) node.add_child(child_node) else: # unknown content file format continue return node
def fetch_assessment_topic_items(driver, topic_node, topic_url, topic_short_title, thumbnail=None): """Fetch the individual assessment items for a given topic. Groups every 5 assessments into an exercise node. """ next_item_url = topic_url item_count = 0 while next_item_url: driver.get(next_item_url) current_url = driver.current_url item_id = current_url.split('/')[-1] print(' Fetching question %s (%s)' % (item_count + 1, current_url)) # Create exercise node, grouping together every 5 questions. if item_count % QUESTIONS_PER_EXERCISE == 0: first_item_index_in_exercise = item_count exercise_title = _title_exercise(topic_short_title, item_count + 1, item_count + QUESTIONS_PER_EXERCISE) exercise_node = nodes.ExerciseNode(source_id=item_id, title=exercise_title, license=LICENSE, thumbnail=thumbnail, exercise_data={'randomize': False}) topic_node.add_child(exercise_node) # Now try to convert the page HTML into an assessment item, retrying # on error, and then skipping any missing images after a few failed # retries. for i in range(0, 4): try: page_html = get_generated_html_from_driver(driver) question, next_item_url = fetch_assessment_item(page_html, item_id) break except Exception as e: wait_time = (2 ** i) print("Got an error, retrying after a wait of %s seconds. " "Error was: %s" % (wait_time, str(e))) driver.get(current_url) time.sleep(wait_time) exception = e else: print("Going to try skipping any missing images") page_html = get_generated_html_from_driver(driver) question, next_item_url = fetch_assessment_item(page_html, item_id, skip_missing_images=True) exercise_node.add_question(question) item_count += 1 # Re-title the exercise, given that this is the last exercise in the topic, # which may not contain up to 5 items. (e.g. re-title it "Genetics 10-12") exercise_node.title = _title_exercise(topic_short_title, first_item_index_in_exercise + 1, item_count)
def generate_pdf_nodes(data, topic, source=""): """ Generates nodes related to pdfs Args: - data (dict) data on pdf details (split pdfs, file paths, exercises, etc.) - topic (TopicNode) node to add sub nodes to - source (str) unique string associated with this pdf Returns None """ # Iterate through chapter data for chapter in data: # Create topics if we're dealing with a section if chapter.get('header'): source_id = "{}-{}".format(source, chapter['header']) subtopic = nodes.TopicNode(title=chapter['header'], source_id=source_id) topic.add_child(subtopic) generate_pdf_nodes(chapter['chapters'], subtopic, source=source_id) # Create a document node and its related exercise nodes if it's a document elif chapter.get("chapter"): # Create doucment node source_id = "{}-{}".format(source, chapter['chapter']) topic.add_child( nodes.DocumentNode(title=chapter['chapter'], source_id=source_id, copyright_holder=COPYRIGHT_HOLDER, license=LICENSE, files=[files.DocumentFile(chapter['path']) ])) # Create exercise nodes for index, exercise in enumerate(chapter.get("exercises") or []): exercise_id = "{} Exercise {}".format(source_id, index) exercise_node = nodes.ExerciseNode( title=chapter['chapter'], source_id=exercise_id, description=exercise.get('description'), copyright_holder=COPYRIGHT_HOLDER, license=LICENSE, ) topic.add_child(exercise_node) create_exercise_questions(exercise_node, exercise.get('questions') or [])
def _build_tree(node, sourcetree): """ Parse nodes given in `sourcetree` and add as children of `node`. """ for child_source_node in sourcetree: try: main_file = child_source_node['files'][0] if 'files' in child_source_node else {} kind = guess_content_kind(path=main_file.get('path'), web_video_data=main_file.get('youtube_id') or main_file.get('web_url'), questions=child_source_node.get("questions")) except UnknownContentKindError: continue if kind == content_kinds.TOPIC: child_node = nodes.TopicNode( source_id=child_source_node["id"], title=child_source_node["title"], author=child_source_node.get("author"), description=child_source_node.get("description"), thumbnail=child_source_node.get("thumbnail"), ) node.add_child(child_node) source_tree_children = child_source_node.get("children", []) _build_tree(child_node, source_tree_children) elif kind == content_kinds.VIDEO: child_node = nodes.VideoNode( source_id=child_source_node["id"], title=child_source_node["title"], license=get_license(child_source_node.get("license"), description="Description of license"), author=child_source_node.get("author"), description=child_source_node.get("description"), derive_thumbnail=True, # video-specific data thumbnail=child_source_node.get('thumbnail'), ) add_files(child_node, child_source_node.get("files") or []) node.add_child(child_node) elif kind == content_kinds.AUDIO: child_node = nodes.AudioNode( source_id=child_source_node["id"], title=child_source_node["title"], license=child_source_node.get("license"), author=child_source_node.get("author"), description=child_source_node.get("description"), thumbnail=child_source_node.get("thumbnail"), ) add_files(child_node, child_source_node.get("files") or []) node.add_child(child_node) elif kind == content_kinds.DOCUMENT: child_node = nodes.DocumentNode( source_id=child_source_node["id"], title=child_source_node["title"], license=child_source_node.get("license"), author=child_source_node.get("author"), description=child_source_node.get("description"), thumbnail=child_source_node.get("thumbnail"), ) add_files(child_node, child_source_node.get("files") or []) node.add_child(child_node) elif kind == content_kinds.EXERCISE: child_node = nodes.ExerciseNode( source_id=child_source_node["id"], title=child_source_node["title"], license=child_source_node.get("license"), author=child_source_node.get("author"), description=child_source_node.get("description"), exercise_data={}, # Just set to default thumbnail=child_source_node.get("thumbnail"), ) add_files(child_node, child_source_node.get("files") or []) for q in child_source_node.get("questions"): question = create_question(q) child_node.add_question(question) node.add_child(child_node) elif kind == content_kinds.HTML5: child_node = nodes.HTML5AppNode( source_id=child_source_node["id"], title=child_source_node["title"], license=child_source_node.get("license"), author=child_source_node.get("author"), description=child_source_node.get("description"), thumbnail=child_source_node.get("thumbnail"), ) add_files(child_node, child_source_node.get("files") or []) node.add_child(child_node) else: # unknown content file format continue return node
def build_tree_from_json(parent_node, sourcetree): """ Recusively parse nodes in the list `sourcetree` and add them as children to the `parent_node`. Usually called with `parent_node` being a `ChannelNode`. """ EXPECTED_NODE_TYPES = [ TOPIC_NODE, VIDEO_NODE, AUDIO_NODE, EXERCISE_NODE, DOCUMENT_NODE, HTML5_NODE ] for source_node in sourcetree: kind = source_node['kind'] if kind not in EXPECTED_NODE_TYPES: LOGGER.critical('Unexpected node type found: ' + kind) raise NotImplementedError( 'Unexpected node type found in json data.') if kind == TOPIC_NODE: child_node = nodes.TopicNode( source_id=source_node.get("source_id", None), title=source_node["title"], author=source_node.get("author"), description=source_node.get("description"), language=source_node.get('language', None), thumbnail=source_node.get("thumbnail"), ) parent_node.add_child(child_node) source_tree_children = source_node.get("children", []) build_tree_from_json(child_node, source_tree_children) elif kind == VIDEO_NODE: child_node = nodes.VideoNode( source_id=source_node["source_id"], title=source_node["title"], license=get_license(**source_node['license']), author=source_node.get("author"), description=source_node.get("description"), language=source_node.get('language', None), derive_thumbnail=source_node.get( 'derive_thumbnail', True), # video-specific option thumbnail=source_node.get('thumbnail'), ) add_files(child_node, source_node.get("files") or []) parent_node.add_child(child_node) elif kind == AUDIO_NODE: child_node = nodes.AudioNode( source_id=source_node["source_id"], title=source_node["title"], license=get_license(**source_node['license']), author=source_node.get("author"), description=source_node.get("description"), language=source_node.get('language', None), thumbnail=source_node.get('thumbnail'), ) add_files(child_node, source_node.get("files") or []) parent_node.add_child(child_node) elif kind == EXERCISE_NODE: child_node = nodes.ExerciseNode( source_id=source_node["source_id"], title=source_node["title"], license=get_license(**source_node['license']), author=source_node.get("author"), description=source_node.get("description"), language=source_node.get('language', None), thumbnail=source_node.get("thumbnail"), questions=[], ) add_questions(child_node, source_node.get("questions") or []) parent_node.add_child(child_node) elif kind == DOCUMENT_NODE: child_node = nodes.DocumentNode( source_id=source_node["source_id"], title=source_node["title"], license=get_license(**source_node['license']), author=source_node.get("author"), description=source_node.get("description"), language=source_node.get('language', None), thumbnail=source_node.get("thumbnail"), ) add_files(child_node, source_node.get("files") or []) parent_node.add_child(child_node) elif kind == HTML5_NODE: child_node = nodes.HTML5AppNode( source_id=source_node["source_id"], title=source_node["title"], license=get_license(**source_node['license']), author=source_node.get("author"), description=source_node.get("description"), language=source_node.get('language', None), thumbnail=source_node.get("thumbnail"), ) add_files(child_node, source_node.get("files") or []) parent_node.add_child(child_node) else: LOGGER.critical("Encountered an unknown kind: " + str(source_node)) continue return parent_node
def build_tree_from_json(parent_node, sourcetree): """ Recusively parse nodes in the list `sourcetree` and add them as children to the `parent_node`. Usually called with `parent_node` being a `ChannelNode`. """ EXPECTED_NODE_TYPES = [TOPIC_NODE, VIDEO_NODE, AUDIO_NODE, EXERCISE_NODE, DOCUMENT_NODE, HTML5_NODE, SLIDESHOW_NODE] for source_node in sourcetree: kind = source_node['kind'] if kind not in EXPECTED_NODE_TYPES: LOGGER.critical('Unexpected node kind found: ' + kind) raise NotImplementedError('Unexpected node kind found in json data.') if kind == TOPIC_NODE: child_node = nodes.TopicNode( source_id=source_node.get('source_id', None), title=source_node['title'], description=source_node.get('description'), author=source_node.get('author'), aggregator=source_node.get('aggregator'), provider=source_node.get('provider'), # no role for topics (computed dynaically from descendants) language=source_node.get('language'), thumbnail=source_node.get('thumbnail'), tags=source_node.get('tags'), ) parent_node.add_child(child_node) source_tree_children = source_node.get('children', []) build_tree_from_json(child_node, source_tree_children) elif kind == VIDEO_NODE: child_node = nodes.VideoNode( source_id=source_node['source_id'], title=source_node['title'], description=source_node.get('description'), license=get_license(**source_node['license']), author=source_node.get('author'), aggregator=source_node.get('aggregator'), provider=source_node.get('provider'), role=source_node.get('role', roles.LEARNER), language=source_node.get('language'), derive_thumbnail=source_node.get('derive_thumbnail', True), # video-specific option thumbnail=source_node.get('thumbnail'), tags=source_node.get('tags'), ) add_files(child_node, source_node.get('files') or []) parent_node.add_child(child_node) elif kind == AUDIO_NODE: child_node = nodes.AudioNode( source_id=source_node['source_id'], title=source_node['title'], description=source_node.get('description'), license=get_license(**source_node['license']), author=source_node.get('author'), aggregator=source_node.get('aggregator'), provider=source_node.get('provider'), role=source_node.get('role', roles.LEARNER), language=source_node.get('language'), thumbnail=source_node.get('thumbnail'), tags=source_node.get('tags'), ) add_files(child_node, source_node.get('files') or []) parent_node.add_child(child_node) elif kind == EXERCISE_NODE: child_node = nodes.ExerciseNode( source_id=source_node['source_id'], title=source_node['title'], description=source_node.get('description'), license=get_license(**source_node['license']), author=source_node.get('author'), aggregator=source_node.get('aggregator'), provider=source_node.get('provider'), role=source_node.get('role', roles.LEARNER), language=source_node.get('language'), thumbnail=source_node.get('thumbnail'), tags=source_node.get('tags'), exercise_data=source_node.get('exercise_data'), questions=[], ) add_questions(child_node, source_node.get('questions') or []) parent_node.add_child(child_node) elif kind == DOCUMENT_NODE: child_node = nodes.DocumentNode( source_id=source_node['source_id'], title=source_node['title'], description=source_node.get('description'), license=get_license(**source_node['license']), author=source_node.get('author'), aggregator=source_node.get('aggregator'), provider=source_node.get('provider'), role=source_node.get('role', roles.LEARNER), language=source_node.get('language'), thumbnail=source_node.get('thumbnail'), tags=source_node.get('tags'), ) add_files(child_node, source_node.get('files') or []) parent_node.add_child(child_node) elif kind == HTML5_NODE: child_node = nodes.HTML5AppNode( source_id=source_node['source_id'], title=source_node['title'], description=source_node.get('description'), license=get_license(**source_node['license']), author=source_node.get('author'), aggregator=source_node.get('aggregator'), provider=source_node.get('provider'), role=source_node.get('role', roles.LEARNER), language=source_node.get('language'), thumbnail=source_node.get('thumbnail'), tags=source_node.get('tags'), ) add_files(child_node, source_node.get('files') or []) parent_node.add_child(child_node) elif kind == SLIDESHOW_NODE: child_node = nodes.SlideshowNode( source_id=source_node['source_id'], title=source_node['title'], description=source_node.get('description'), license=get_license(**source_node['license']), author=source_node.get('author'), aggregator=source_node.get('aggregator'), provider=source_node.get('provider'), role=source_node.get('role', roles.LEARNER), language=source_node.get('language'), thumbnail=source_node.get('thumbnail'), tags=source_node.get('tags') ) add_files(child_node, source_node.get('files') or []) parent_node.add_child(child_node) else: LOGGER.critical('Encountered an unknown kind: ' + str(source_node)) continue return parent_node
def upload_content(self, data, access_token, channel): for language, language_value in data.items(): # convert to title to apply title case for node titles language = language.title() language_node = nodes.TopicNode(title=language, source_id=language, author="TicTacLearn", description='', thumbnail=TTL_MAIN_LOGO, language=getlang_by_name(language)) for grade, grade_value in language_value.items(): grade_node = nodes.TopicNode( title='Grade {}'.format(grade), source_id="{}-{}".format(language, grade), author="TicTacLearn", description='', thumbnail=TTL_MAIN_LOGO, language=getlang_by_name(language)) for subject, subject_value in grade_value.items(): subject = subject.title() subject_node = nodes.TopicNode( title=subject, source_id="{}-{}-{}".format(language, grade, subject), author="TicTacLearn", description='', thumbnail=TTL_MAIN_LOGO, language=getlang_by_name(language)) for chapter, chapter_value in subject_value.items(): chapter = chapter.title() chapter_node = nodes.TopicNode( title=chapter, source_id="{}-{}-{}-{}".format( language, grade, subject, chapter), author="TicTacLearn", description='', thumbnail=TTL_MAIN_LOGO, language=getlang_by_name(language)) for topic, topic_value in chapter_value.items(): topic = topic.title() if topic == "Chapter Assessment": questions = self.create_question( topic_value.items()) exercise_node = nodes.ExerciseNode( source_id="{}-{}-{}-{}-{}".format( language, grade, subject, chapter, topic), title=topic, author="TicTacLearn", description="Chapter Assessment", language=getlang_by_name(language), license=licenses.CC_BYLicense( "TicTacLearn"), thumbnail=TTL_MAIN_LOGO, exercise_data={ "mastery_model": exercises.M_OF_N, "m": len(questions), "n": len(questions), "randomize": True }, questions=questions) chapter_node.add_child(exercise_node) else: topic_node = nodes.TopicNode( title=topic, source_id="{}-{}-{}-{}-{}".format( language, grade, subject, chapter, topic), author="TicTacLearn", description='', thumbnail=TTL_MAIN_LOGO, language=getlang_by_name(language)) for content_type, content in topic_value.items( ): if content_type == "video": for link, details in content.items(): try: video_node = self.video_node_from_dropbox( details, link, access_token) topic_node.add_child( video_node) except Exception as e: print(e) print( "Error getting video from dropbox with link: {}" .format(link)) self.add_to_failed( link, details, content_type) continue else: # content type is assessment questions = self.create_question( content.items()) exercise_node = nodes.ExerciseNode( source_id= "{}-{}-{}-{}-{}-Assessment".format( language, grade, subject, chapter, topic), title="{} Assessment".format( topic), author="TicTacLearn", description="{} Assessment".format( topic), license=licenses.CC_BYLicense( "TicTacLearn"), thumbnail=TTL_MAIN_LOGO, exercise_data={ "mastery_model": exercises.M_OF_N, "m": len(questions), "n": len(questions), "randomize": True }, questions=questions) topic_node.add_child(exercise_node) chapter_node.add_child(topic_node) subject_node.add_child(chapter_node) grade_node.add_child(subject_node) language_node.add_child(grade_node) channel.add_child(language_node) return channel
def convert_ka_node_to_ricecooker_node(ka_node): if ka_node.slug in SLUG_BLACKLIST: return None if isinstance(ka_node, KhanTopic): topic = nodes.TopicNode( source_id=ka_node.id, title=ka_node.title, description=ka_node.description[:400], ) for ka_subtopic in ka_node.children: subtopic = convert_ka_node_to_ricecooker_node(ka_subtopic) if subtopic: topic.add_child(subtopic) return topic elif isinstance(ka_node, KhanExercise): exercise = nodes.ExerciseNode( source_id=ka_node.id, title=ka_node.title, description=ka_node.description[:400], # exercise_data={'mastery_model': node.get('suggested_completion_criteria')}, license=licenses.SpecialPermissionsLicense( copyright_holder="Khan Academy", description= "Permission granted to distribute through Kolibri for non-commercial use" ), # need to formalize with KA thumbnail=node.thumbnail, ) for ka_assessment_item in ka_node.get_assessment_items(): assessment_item = PerseusQuestion( id=assessment_item.id, raw_data=assessment_item.data, source_url=assessment_item.source_url, ) exercise.add_question(assessment_item) return exercise elif isinstance(ka_node, KhanVideo): # TODO: Use traditional compression here to avoid breaking existing KA downloads? files = [ VideoFile( ka_node.download_urls.get("mp4-low", ka_node.download_urls.get("mp4"))) ] # if the video is in English, include any subtitles available along with it if ka_node.lang == "en": for lang_code in ka_node.get_subtitle_languages(): files.append(YouTubeSubtitleFile(node.id, language=lang_code)) # convert KA's license format into our own license classes if ka_node.license in LICENSE_MAPPING: license = LICENSE_MAPPING[ka_node.license] else: # license = licenses.CC_BY_NC_SA # or? raise Exception("Unknown license on video {}: {}".format( ka_node.id, ka_node.license)) video = nodes.VideoNode( source_id=ka_node.id, title=ka_node.title, description=ka_node.description[:400], license=license, thumbnail=node.thumbnail, files=files, ) return video elif isinstance(ka_node, KhanArticle): # TODO return None