def run(self): for project in self.rc.projects: project_path = os.path.join(self.source_dir, project.path) # Copy all the markdown files in the project root directory to the output directory for file_path in glob(os.path.join(project_path, '*.md')): output_file_path = os.path.join(self.output_dir, os.path.basename(file_path)) if os.path.isfile(file_path) and not os.path.exists(output_file_path) \ and os.path.basename(file_path) not in self.ignoreFiles: copy(file_path, output_file_path) if self.is_chunked(project): for chapter in self.get_chapters(project_path): markdown = '# {0}\n\n'.format(chapter['title']) for frame in chapter['frames']: markdown += '![Frame {0}](https://cdn.door43.org/obs/jpg/360px/obs-en-{0}.jpg)\n\n' \ .format(frame.get('id')) markdown += frame['text'] + '\n\n' markdown += '_{0}_\n'.format(chapter['reference']) output_file = os.path.join(self.output_dir, '{0}.md'.format(chapter.get('id'))) write_file(output_file, markdown) else: for chapter in self.rc.chapters(project.identifier): f = None if os.path.isfile(os.path.join(project_path, chapter, "01.md")): f = os.path.join(project_path, chapter, '01.md') elif os.path.isfile(os.path.join(project_path, chapter, 'intro.md')): f = os.path.join(project_path, chapter, 'intro.md') if f: copy(f, os.path.join(self.output_dir, '{0}.md'.format(chapter))) return True
def update_project_file(build_log, output_dir): commit_id = build_log['commit_id'] user_name = build_log['repo_owner'] repo_name = build_log['repo_name'] project_json_key = 'u/{0}/{1}/project.json'.format(user_name, repo_name) project_json = App.cdn_s3_handler().get_json(project_json_key) project_json['user'] = user_name project_json['repo'] = repo_name project_json['repo_url'] = 'https://{0}/{1}/{2}'.format(App.gogs_url, user_name, repo_name) commit = { 'id': commit_id, 'created_at': build_log['created_at'], 'status': build_log['status'], 'success': build_log['success'], 'started_at': None, 'ended_at': None } if 'started_at' in build_log: commit['started_at'] = build_log['started_at'] if 'ended_at' in build_log: commit['ended_at'] = build_log['ended_at'] if 'commits' not in project_json: project_json['commits'] = [] commits = [] for c in project_json['commits']: if c['id'] != commit_id: commits.append(c) commits.append(commit) project_json['commits'] = commits project_file = os.path.join(output_dir, 'project.json') write_file(project_file, project_json) App.cdn_s3_handler().upload_file(project_file, project_json_key, cache_time=0) return project_json
def run(self): for idx, project in enumerate(self.rc.projects): self.section_container_id = 1 toc = self.rc.toc(project.identifier) if project.identifier in self.manual_title_map: title = self.manual_title_map[project.identifier] else: title = '{0} Manual'.format(project.identifier.title()) markdown = '# {0}\n\n'.format(title) for section in toc['sections']: markdown += self.compile_section(project, section, 2) markdown = self.fix_links(markdown) output_file = os.path.join(self.output_dir, '{0}-{1}.md'.format(str(idx+1).zfill(2), project.identifier)) write_file(output_file, markdown) # Copy the toc and config.yaml file to the output dir so they can be used to # generate the ToC on live.door43.org toc_file = os.path.join(self.source_dir, project.path, 'toc.yaml') if os.path.isfile(toc_file): copy(toc_file, os.path.join(self.output_dir, '{0}-{1}-toc.yaml'.format(str(idx+1).zfill(2), project.identifier))) config_file = os.path.join(self.source_dir, project.path, 'config.yaml') if os.path.isfile(config_file): copy(config_file, os.path.join(self.output_dir, '{0}-{1}-config.yaml'.format(str(idx+1).zfill(2), project.identifier))) return True
def convert_obs(self): self.log.info('Processing OBS markdown files') # find the first directory that has md files. files = get_files(directory=self.files_dir, exclude=self.EXCLUDED_FILES) current_dir = os.path.dirname(os.path.realpath(__file__)) with open(os.path.join(current_dir, 'templates', 'template.html')) as template_file: html_template = string.Template(template_file.read()) found_chapters = {} for filename in files: if filename.endswith('.md'): # Convert files that are markdown files with codecs.open(filename, 'r', 'utf-8-sig') as md_file: md = md_file.read() html = markdown.markdown(md) html = html_template.safe_substitute(title=self.source.upper(), content=html) base_name = os.path.splitext(os.path.basename(filename))[0] found_chapters[base_name] = True html_filename = base_name + ".html" output_file = os.path.join(self.output_dir, html_filename) write_file(output_file, html) self.log.info('Converted {0} to {1}.'.format(os.path.basename(filename), os.path.basename(html_filename))) else: # Directly copy over files that are not markdown files try: output_file = os.path.join(self.output_dir, os.path.basename(filename)) if not os.path.exists(output_file): copyfile(filename, output_file) except: pass self.log.info('Finished processing OBS Markdown files.')
def update_project_json(self, commit_id, job, repo_name, repo_owner): """ :param string commit_id: :param TxJob job: :param string repo_name: :param string repo_owner: :return: """ project_json_key = 'u/{0}/{1}/project.json'.format(repo_owner, repo_name) project_json = App.cdn_s3_handler().get_json(project_json_key) project_json['user'] = repo_owner project_json['repo'] = repo_name project_json['repo_url'] = 'https://git.door43.org/{0}/{1}'.format(repo_owner, repo_name) commit = { 'id': commit_id, 'created_at': job.created_at, 'status': job.status, 'success': job.success, 'started_at': None, 'ended_at': None } if 'commits' not in project_json: project_json['commits'] = [] commits = [] for c in project_json['commits']: if c['id'] != commit_id: commits.append(c) commits.append(commit) project_json['commits'] = commits project_file = os.path.join(self.base_temp_dir, 'project.json') write_file(project_file, project_json) App.cdn_s3_handler().upload_file(project_file, project_json_key)
def mock_s3_bible_project(self, test_file_name, project_key, multi_part=False): converted_proj_dir = os.path.join(self.resources_dir, 'converted_projects') test_file_base = test_file_name.split('.zip')[0] zip_file = os.path.join(converted_proj_dir, test_file_name) out_dir = os.path.join(self.temp_dir, test_file_base) unzip(zip_file, out_dir) project_dir = os.path.join(out_dir, test_file_base) + os.path.sep self.project_files = file_utils.get_files(out_dir) self.project_key = project_key for filename in self.project_files: sub_path = filename.split(project_dir)[1].replace(os.path.sep, '/') # Make sure it is a bucket path App.cdn_s3_handler().upload_file(filename, '{0}/{1}'.format(project_key, sub_path)) if multi_part: # copy files from cdn to door43 base_name = os.path.basename(filename) if '.html' in base_name: with codecs.open(filename, 'r', 'utf-8-sig') as f: soup = BeautifulSoup(f, 'html.parser') # add nav tag new_tag = soup.new_tag('div', id='right-sidebar') soup.body.append(new_tag) html = unicode(soup) file_utils.write_file(filename, html.encode('ascii', 'xmlcharrefreplace')) App.door43_s3_handler().upload_file(filename, '{0}/{1}'.format(project_key, base_name)) # u, user, repo = project_key App.door43_s3_handler().upload_file(os.path.join(self.resources_dir, 'templates', 'project-page.html'), 'templates/project-page.html')
def test_callbackMultpleJob_build_error(self): # given self.results_key = 'u/tx-manager-test-data/en-ulb/22f3d09f7a' self.lint_callback_data['s3_results_key'] = self.results_key + '/2' self.lint_callback_data['identifier'] = '1234567890/4/2/03-LEV.usfm' self.unzip_resource_files("en_ulb.zip", convert_finished=False) build_log_path = self.get_source_path('build_log.json') build_log = file_utils.load_json_object(build_log_path) build_log['errors'].append('convert error') build_log['success'] = False build_log['status'] = 'errors' file_utils.write_file(build_log_path, build_log) self.finish_convert(self.source_folder) self.expected_error_count = 1 self.expected_success = False self.expected_status = "errors" self.expected_log_count = 36 self.expected_multipart = None linter_cb = self.mock_client_linter_callback() # when results = linter_cb.process_callback() # then self.validate_results(results, linter_cb)
def set_deployed_flags(self, project_key, part_count, skip=-1): tempf = tempfile.mktemp(prefix="temp", suffix="deployed") file_utils.write_file(tempf, ' ') for i in range(0, part_count): if i != skip: key = '{0}/{1}/deployed'.format(project_key, i) App.cdn_s3_handler().upload_file(tempf, key, cache_time=0) os.remove(tempf)
def template_converted_files(self, build_log, download_key, output_dir, repo_name, resource_type, s3_commit_key, source_dir, start, template_file): App.cdn_s3_handler().download_dir(download_key + '/', source_dir) source_dir = os.path.join(source_dir, download_key.replace('/', os.path.sep)) elapsed_seconds = int(time.time() - start) App.logger.debug("deploy download completed in " + str(elapsed_seconds) + " seconds") html_files = sorted(glob(os.path.join(source_dir, '*.html'))) if len(html_files) < 1: content = '' if len(build_log['errors']) > 0: content += """ <div style="text-align:center;margin-bottom:20px"> <i class="fa fa-times-circle-o" style="font-size: 250px;font-weight: 300;color: red"></i> <br/> <h2>Critical!</h2> <h3>Here is what went wrong with this build:</h3> </div> """ content += '<div><ul><li>' + '</li><li>'.join(build_log['errors']) + '</li></ul></div>' else: content += '<h1 class="conversion-requested">{0}</h1>'.format(build_log['message']) content += '<p><i>No content is available to show for {0} yet.</i></p>'.format(repo_name) html = """ <html lang="en"> <head> <title>{0}</title> </head> <body> <div id="content">{1}</div> </body> </html>""".format(repo_name, content) repo_index_file = os.path.join(source_dir, 'index.html') write_file(repo_index_file, html) # merge the source files with the template templater = init_template(resource_type, source_dir, output_dir, template_file) try: self.run_templater(templater) success = True except Exception as e: App.logger.error("Error applying template {0} to resource type {1}:".format(template_file, resource_type)) App.logger.error(e.message) App.logger.error('{0}: {1}'.format(str(e), traceback.format_exc())) self.close() success = False if success: # update index of templated files index_json_fname = 'index.json' index_json = self.get_templater_index(s3_commit_key, index_json_fname) App.logger.debug("initial 'index.json': " + json.dumps(index_json)[:256]) self.update_index_key(index_json, templater, 'titles') self.update_index_key(index_json, templater, 'chapters') self.update_index_key(index_json, templater, 'book_codes') App.logger.debug("final 'index.json': " + json.dumps(index_json)[:256]) self.write_data_to_file(output_dir, s3_commit_key, index_json_fname, index_json) return source_dir, success
def test_write_file_json(self): """ A call to `write_file` where the content is an object (as opposed to a string). """ d = {"one": 1, "two": 2, "child": {"numbers": [3, 4, 5]}} _, self.tmp_file = tempfile.mkstemp() file_utils.write_file(self.tmp_file, d) with open(self.tmp_file, "r") as f: self.assertEqual(json.load(f), d)
def replace_verse_to_end(self, out_dir, file_name, chapter, start_vs, replace): book_path = os.path.join(out_dir, file_name) book_text = read_file(book_path) chapter_marker = '\\c {0:02d}'.format(chapter) c_pos = book_text.find(chapter_marker) previous_section = book_text[:c_pos] next_section = book_text[c_pos:] start_pos = next_section.find('\\v {0} '.format(start_vs)) start_text = next_section[:start_pos] new_text = previous_section + start_text + replace write_file(book_path, new_text)
def replace_chapter(self, out_dir, file_name, start_ch, end_ch, replace): book_path = os.path.join(out_dir, file_name) book_text = read_file(book_path) start_chapter_marker = '\\c {0:02d}'.format(start_ch) end_chapter_marker = '\\c {0:02d}'.format(end_ch) c_start_pos = book_text.find(start_chapter_marker) c_end_pos = book_text.find(end_chapter_marker) previous_section = book_text[:c_start_pos] next_section = book_text[c_end_pos:] new_text = previous_section + replace + next_section write_file(book_path, new_text)
def replace_tag(self, out_dir, file_name, tag, replace): book_path = os.path.join(out_dir, file_name) book_text = read_file(book_path) start_marker = '\\{0}'.format(tag) end_marker = '\\' c_start_pos = book_text.find(start_marker) c_end_pos = book_text.find(end_marker, c_start_pos + 1) previous_section = book_text[:c_start_pos] next_section = book_text[c_end_pos:] new_text = previous_section + replace + next_section write_file(book_path, new_text)
def mock_download_file(self, url, target): if self.raiseDownloadException: raise Exception file_name = os.path.basename(url) if '.zip' in file_name: shutil.copyfile(self.source_zip, target) elif file_name == 'build_log.json': file_utils.write_file(target, self.build_log_json) elif file_name == 'project.json': file_utils.write_file(target, self.project_json)
def __init__(self, rc, source_dir, output_dir): """ :param RC rc: :param string source_dir: :param string output_dir: """ self.rc = rc self.source_dir = source_dir # Local directory self.output_dir = output_dir # Local directory # Write out the new manifest file based on the resource container write_file(os.path.join(self.output_dir, 'manifest.yaml'), self.rc.as_dict())
def convert_markdown(self): self.log.info('Processing Markdown files') # find the first directory that has md files. files = get_files(directory=self.files_dir, exclude=self.EXCLUDED_FILES) convert_only_list = self.check_for_exclusive_convert() current_dir = os.path.dirname(os.path.realpath(__file__)) with open(os.path.join(current_dir, 'templates', 'template.html')) as template_file: html_template = string.Template(template_file.read()) found_chapters = {} for filename in files: if filename.endswith('.md'): base_name = os.path.basename(filename) if convert_only_list and (base_name not in convert_only_list): # see if this is a file we are to convert continue # Convert files that are markdown files with codecs.open(filename, 'r', 'utf-8-sig') as md_file: md = md_file.read() if self.resource in ['ta']: html = markdown2.markdown(md, extras=['markdown-in-html', 'tables']) else: html = markdown.markdown(md) html = html_template.safe_substitute(title=self.resource.upper(), content=html) # Change headers like <h1><a id="verbs"/>Verbs</h1> to <h1 id="verbs">Verbs</h1> soup = BeautifulSoup(html, 'html.parser') for tag in soup.findAll('a', {'id': True}): if tag.parent and tag.parent.name in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']: tag.parent['id'] = tag['id'] tag.parent['class'] = tag.parent.get('class', []) + ['section-header'] tag.extract() html = unicode(soup) base_name = os.path.splitext(os.path.basename(filename))[0] found_chapters[base_name] = True html_filename = base_name + ".html" output_file = os.path.join(self.output_dir, html_filename) write_file(output_file, html) self.log.info('Converted {0} to {1}.'.format(os.path.basename(filename), os.path.basename(html_filename))) else: # Directly copy over files that are not markdown files try: output_file = os.path.join(self.output_dir, os.path.basename(filename)) if not os.path.exists(output_file): copyfile(filename, output_file) except: pass self.log.info('Finished processing Markdown files.')
def upload_build_log_to_s3(self, build_log, s3_commit_key, part=''): """ :param dict build_log: :param string s3_commit_key: :param string part: :return: """ build_log_file = os.path.join(self.base_temp_dir, 'build_log.json') write_file(build_log_file, build_log) upload_key = '{0}/{1}build_log.json'.format(s3_commit_key, part) App.logger.debug('Saving build log to ' + upload_key) App.cdn_s3_handler().upload_file(build_log_file, upload_key, cache_time=0)
def run(self): index_json = { 'titles': {}, 'chapters': {}, 'book_codes': {} } title_re = re.compile('^# +(.*?) *#*$', flags=re.MULTILINE) headers_re = re.compile('^(#+) +(.+?) *#*$', flags=re.MULTILINE) for idx, project in enumerate(self.rc.projects): term_text = {} section_dirs = sorted(glob(os.path.join(self.source_dir, project.path, '*'))) for section_dir in section_dirs: section = os.path.basename(section_dir) if section not in self.section_titles: continue key = '{0}.html'.format(section) index_json['titles'][key] = self.section_titles[section] index_json['chapters'][key] = {} index_json['book_codes'][key] = section term_files = sorted(glob(os.path.join(section_dir, '*.md'))) for term_file in term_files: term = os.path.splitext(os.path.basename(term_file))[0] text = read_file(term_file) if title_re.search(text): title = title_re.search(text).group(1) text = title_re.sub(r'# <a id="{0}"/>\1 #'.format(term), text) # inject the term by the title else: title = os.path.splitext(os.path.basename(term_file))[0] # No title found, so using term text = headers_re.sub(r'#\1 \2', text) index_json['chapters'][key][term] = title term_text[term] = text # Sort terms by title and add to markdown markdown = '' titles = index_json['chapters'][key] terms_sorted_by_title = sorted(titles, key=lambda i: titles[i].lower()) for term in terms_sorted_by_title: if markdown: markdown += '<hr>\n\n' markdown += term_text[term] + '\n\n' markdown = '# <a id="tw-section-{0}"/>{1}\n\n'.format(section, self.section_titles[section]) + markdown markdown = self.fix_links(markdown, section) output_file = os.path.join(self.output_dir, '{0}.md'.format(section)) write_file(output_file, markdown) config_file = os.path.join(self.source_dir, project.path, 'config.yaml') if os.path.isfile(config_file): copy(config_file, os.path.join(self.output_dir, 'config.yaml')) output_file = os.path.join(self.output_dir, 'index.json') write_file(output_file, index_json) return True
def run(self): index_json = { 'titles': {}, 'chapters': {}, 'book_codes': {} } headers_re = re.compile('^(#+) +(.+?) *#*$', flags=re.MULTILINE) for idx, project in enumerate(self.rc.projects): if project.identifier in BOOK_NAMES: markdown = '' book = project.identifier.lower() html_file = '{0}-{1}.html'.format(BOOK_NUMBERS[book], book.upper()) index_json['book_codes'][html_file] = book name = BOOK_NAMES[book] index_json['titles'][html_file] = name chapter_dirs = sorted(glob(os.path.join(self.source_dir, project.path, '*'))) markdown += '# <a id="tq-{0}"/> {1}\n\n'.format(book, name) index_json['chapters'][html_file] = [] for chapter_dir in chapter_dirs: chapter = os.path.basename(chapter_dir) link = 'tq-chapter-{0}-{1}'.format(book, chapter.zfill(3)) index_json['chapters'][html_file].append(link) markdown += '## <a id="{0}"/> {1} {2}\n\n'.format(link, name, chapter.lstrip('0')) chunk_files = sorted(glob(os.path.join(chapter_dir, '*.md'))) for chunk_idx, chunk_file in enumerate(chunk_files): start_verse = os.path.splitext(os.path.basename(chunk_file))[0].lstrip('0') if chunk_idx < len(chunk_files)-1: end_verse = str(int(os.path.splitext(os.path.basename(chunk_files[chunk_idx+1]))[0])-1) else: end_verse = BOOK_CHAPTER_VERSES[book][chapter.lstrip('0')] link = 'tq-chunk-{0}-{1}-{2}'.format(book, str(chapter).zfill(3), str(start_verse).zfill(3)) markdown += '### <a id="{0}"/>{1} {2}:{3}{4}\n\n'.\ format(link, name, chapter.lstrip('0'), start_verse, '-'+end_verse if start_verse != end_verse else '') text = read_file(chunk_file) + '\n\n' text = headers_re.sub(r'\1### \2', text) # This will bump any header down 3 levels markdown += text file_path = os.path.join(self.output_dir, '{0}-{1}.md'.format(BOOK_NUMBERS[book], book.upper())) write_file(file_path, markdown) else: App.logger.debug('TqPreprocessor: extra project found: {0}'.format(project.identifier)) # Write out index.json output_file = os.path.join(self.output_dir, 'index.json') write_file(output_file, index_json) return True
def run(self): for idx, project in enumerate(self.rc.projects): project_path = os.path.join(self.source_dir, project.path) if os.path.isfile(project_path): # Case #1: Project path is a file, then we copy the file over to the output dir if project.identifier.lower() in BOOK_NUMBERS: filename = '{0}-{1}.{2}'.format(BOOK_NUMBERS[project.identifier.lower()], project.identifier.upper(), self.rc.resource.file_ext) else: filename = '{0}-{1}.{2}'.format(str(idx + 1).zfill(2), project.identifier, self.rc.resource.file_ext) copy(project_path, os.path.join(self.output_dir, filename)) else: # Case #2: It's a directory of files, so we copy them over to the output directory files = glob(os.path.join(project_path, '*.{0}'.format(self.rc.resource.file_ext))) if len(files): for file_path in files: output_file_path = os.path.join(self.output_dir, os.path.basename(file_path)) if os.path.isfile(file_path) and not os.path.exists(output_file_path) \ and os.path.basename(file_path) not in self.ignoreFiles: copy(file_path, output_file_path) else: # Case #3: The project path is multiple chapters, so we piece them together chapters = self.rc.chapters(project.identifier) App.logger.debug("Merging chapters in '{0}'".format(project.identifier)) if len(chapters): text = '' for chapter in chapters: text = self.mark_chapter(project.identifier, chapter, text) for chunk in self.rc.chunks(project.identifier, chapter): text = self.mark_chunk(project.identifier, chapter, chunk, text) text += read_file(os.path.join(project_path, chapter, chunk))+"\n\n" if project.identifier.lower() in BOOK_NUMBERS: filename = '{0}-{1}.{2}'.format(BOOK_NUMBERS[project.identifier.lower()], project.identifier.upper(), self.rc.resource.file_ext) else: filename = '{0}-{1}.{2}'.format(str(idx+1).zfill(2), project.identifier, self.rc.resource.file_ext) write_file(os.path.join(self.output_dir, filename), text) return True
def test_lint_broken_links(self, mock_invoke_markdown_linter): # given mock_invoke_markdown_linter.return_value = { # Don't care about markdown linting here, just specific tw linting '/tmp/tmp_lint_EYZ5zV/en_tn/2th/front/intro.md': [ { 'errorContext': 'dummy error message', 'lineNumber': 42, 'ruleDescription': 'dummy rule' } ] } expected_warnings = 64 + 1 # 64 missing books + 1 markdown warning zip_file = os.path.join(self.resources_dir, 'tn_linter', 'en_tn.zip') out_dir = self.unzip_resource(zip_file) # remove everything past genesis for dir in BOOK_NUMBERS: book = '{0}-{1}'.format(BOOK_NUMBERS[dir], dir.upper()) link = self.get_link_for_book(book) book_path = os.path.join(out_dir, 'en_tn', link) if os.path.exists(book_path): if book > "02": file_utils.remove_tree(book_path) # put a verse in exo so that we can test that there is some content there file_path = os.path.join(out_dir, 'en_tn/exo/01/05.md') file_utils.write_file(file_path, 'dummy') # create chapter in lev with no md files so that we can test that there is no content there file_path = os.path.join(os.path.join(out_dir, 'en_tn/lev/01/readme.txt')) file_utils.write_file(file_path, 'dummy') new_zip = self.create_new_zip(out_dir) linter = TnLinter(source_file=new_zip, commit_data=self.commit_data) # when linter.run() # then self.verify_results_warnings_count(expected_warnings, linter)
def test_callbackSimpleJob_build_error(self): # given self.unzip_resource_files("id_mat_ulb.zip",convert_finished=False) build_log_path = self.get_source_path('build_log.json') build_log = file_utils.load_json_object(build_log_path) build_log['errors'].append('convert error') build_log['success'] = False build_log['status'] = 'errors' file_utils.write_file(build_log_path, build_log) self.finish_convert(self.source_folder) self.expected_log_count = 9 self.expected_error_count = 1 self.expected_success = False self.expected_status = "errors" linter_cb = self.mock_client_linter_callback() # when results = linter_cb.process_callback() # then self.validate_results_and_log(results, linter_cb, self.expected_success, self.expected_status)
def mock_s3_tn_project(self, part): zip_file = os.path.join(self.resources_dir, 'converted_projects', 'en_tn_converted.zip') out_dir = os.path.join(self.temp_dir, 'en_tn_converted') unzip(zip_file, out_dir) src_dir = os.path.join(out_dir, 'en_tn_converted') self.project_files = [f for f in os.listdir(src_dir) if os.path.isfile(os.path.join(src_dir, f))] self.project_key = 'u/door43/en_tn/12345678' build_log = file_utils.load_json_object(os.path.join(src_dir, 'build_log.json')) build_log['part'] = part file_utils.write_file(os.path.join(src_dir, 'build_log.json'), build_log) App.cdn_s3_handler().upload_file(os.path.join(src_dir, 'build_log.json'), '{0}/{1}/build_log.json'.format(self.project_key, part)) App.cdn_s3_handler().upload_file(os.path.join(src_dir, 'index.json'), '{0}/{1}/index.json'.format(self.project_key, part)) App.cdn_s3_handler().upload_file(os.path.join(src_dir, 'build_log.json'), '{0}/{1}/finished'.format(self.project_key, part)) App.cdn_s3_handler().upload_file(os.path.join(src_dir, '01-GEN.html'), '{0}/{1}/01-GEN.html'.format(self.project_key, part)) App.cdn_s3_handler().upload_file(os.path.join(src_dir, 'project.json'), 'u/door43/en_tq/project.json') App.door43_s3_handler().upload_file(os.path.join(self.resources_dir, 'templates', 'project-page.html'), 'templates/project-page.html')
def test_callbackMultpleJob_first_merged(self): # given self.results_key = 'u/tx-manager-test-data/en-ulb/22f3d09f7a' self.unzip_resource_files("en_ulb.zip", convert_finished=True) self.lint_callback_data['s3_results_key'] = self.results_key + '/0' self.lint_callback_data['identifier'] = '1234567890/4/0/01-GEN.usfm' build_log_path = self.get_source_path('build_log.json') build_log = file_utils.load_json_object(build_log_path) lint_log_path = self.get_source_path(file_name='lint_log.json') lint_log = file_utils.load_json_object(lint_log_path) build_log['log'] += lint_log['log'] merged_log_path = self.get_source_path(file_name='merged.json') file_utils.write_file(merged_log_path, build_log) self.expected_log_count = 36 self.expected_multipart = True linter_cb = self.mock_client_linter_callback() # when results = linter_cb.process_callback() # then self.validate_results(results, linter_cb)
def prepend_text(self, out_dir, file_name, prefix): file_path = os.path.join(out_dir, file_name) text = read_file(file_path) new_text = prefix + text write_file(file_path, new_text)
def save_data_to_s3(self, key, data): file_name = key.split('/')[-1:] output_file = tempfile.mktemp(suffix="_" + file_name[0], dir=self.temp_dir) file_utils.write_file(output_file, data) self.mock_cdn_upload_file(output_file, key)
def apply_template(self): language_code = self.rc.resource.language.identifier language_name = self.rc.resource.language.title language_dir = self.rc.resource.language.direction resource_title = self.rc.resource.title self.get_page_navigation() heading = '{0}: {1}'.format(language_name, resource_title) title = '' canonical = '' # soup is the template that we will replace content of for every file soup = BeautifulSoup(self.template_html, 'html.parser') left_sidebar_div = soup.body.find('div', id='left-sidebar') outer_content_div = soup.body.find('div', id='outer-content') right_sidebar_div = soup.body.find('div', id='right-sidebar') # find the outer-content div in the template if not outer_content_div: raise Exception('No div tag with id "outer-content" was found in the template') # get the canonical UTL if not canonical: links = soup.head.find_all('link[rel="canonical"]') if len(links) == 1: canonical = links[0]['href'] # loop through the html files for filename in self.files: if filename not in self.already_converted: App.logger.debug('Applying template to {0}.'.format(filename)) # read the downloaded file into a dom abject with codecs.open(filename, 'r', 'utf-8-sig') as f: file_soup = BeautifulSoup(f, 'html.parser') # get the title from the raw html file if not title and file_soup.head and file_soup.head.title: title = file_soup.head.title.text else: title = os.path.basename(filename) # get the language code, if we haven't yet if not language_code: if 'lang' in file_soup.html: language_code = file_soup.html['lang'] else: language_code = 'en' # get the body of the raw html file if not file_soup.body: body = BeautifulSoup('<div>No content</div>', 'html.parser') else: body = BeautifulSoup(''.join(['%s' % x for x in file_soup.body.contents]), 'html.parser') # insert new HTML into the template outer_content_div.clear() outer_content_div.append(body) soup.html['lang'] = language_code soup.html['dir'] = language_dir soup.head.title.clear() soup.head.title.append(heading+' - '+title) # set the page heading heading_span = soup.body.find('span', id='h1') heading_span.clear() heading_span.append(heading) if left_sidebar_div: left_sidebar_html = self.build_left_sidebar(filename) left_sidebar = BeautifulSoup(left_sidebar_html, 'html.parser').nav.extract() left_sidebar_div.clear() left_sidebar_div.append(left_sidebar) if right_sidebar_div: right_sidebar_div.clear() right_sidebar_html = self.build_right_sidebar(filename) if right_sidebar_html: right_sidebar = BeautifulSoup(right_sidebar_html, 'html.parser') if right_sidebar and right_sidebar.nav: right_sidebar_nav = right_sidebar.nav.extract() right_sidebar_div.append(right_sidebar_nav) # render the html as an unicode string html = unicode(soup) # fix the footer message, removing the title of this page in parentheses as it doesn't get filled html = html.replace( '("<a xmlns:dct="http://purl.org/dc/terms/" href="https://live.door43.org/templates/project-page.html" rel="dct:source">{{ HEADING }}</a>") ', '') # update the canonical URL - it is in several different locations html = html.replace(canonical, canonical.replace('/templates/', '/{0}/'.format(language_code))) # Replace HEADING with page title in footer html = html.replace('{{ HEADING }}', title) # write to output directory out_file = os.path.join(self.output_dir, os.path.basename(filename)) App.logger.debug('Writing {0}.'.format(out_file)) write_file(out_file, html.encode('ascii', 'xmlcharrefreplace')) else: # if already templated, need to update navigation bar # read the templated file into a dom abject with codecs.open(filename, 'r', 'utf-8-sig') as f: soup = BeautifulSoup(f, 'html.parser') right_sidebar_div = soup.body.find('div', id='right-sidebar') if right_sidebar_div: right_sidebar_html = self.build_right_sidebar(filename) right_sidebar = BeautifulSoup(right_sidebar_html, 'html.parser').nav.extract() right_sidebar_div.clear() right_sidebar_div.append(right_sidebar) # render the html as an unicode string html = unicode(soup) # write to output directory out_file = os.path.join(self.output_dir, os.path.basename(filename)) App.logger.debug('Updating nav in {0}.'.format(out_file)) write_file(out_file, html.encode('ascii', 'xmlcharrefreplace'))
def append_text(self, out_dir, file_name, append): book_path = os.path.join(out_dir, file_name) book_text = read_file(book_path) new_text = book_text + append write_file(book_path, new_text)
def cdn_upload_contents(self, contents, key): file_name = os.path.join(self.temp_dir, 'contents.json') write_file(file_name, contents) App.logger.debug('Writing file to ' + key) App.cdn_s3_handler().upload_file(file_name, key, cache_time=0)
def test_write_file(self): _, self.tmp_file = tempfile.mkstemp() file_utils.write_file(self.tmp_file, "hello world") with open(self.tmp_file, "r") as f: self.assertEqual(f.read(), "hello world")
def generate_dashboard(self, max_failures=MAX_FAILURES): """ Generate page with metrics indicating configuration of tx-manager. :param int max_failures: """ App.logger.debug("Start: generateDashboard") dashboard = { 'title': 'tX-Manager Dashboard', 'body': 'No modules found' } items = sorted(TxModule().query(), key=lambda k: k.name) if items and len(items): module_names = [] for item in items: module_names.append(item.name) App.logger.debug("Found: " + str(len(items)) + " item[s] in tx-module") App.logger.debug("Reading from Jobs table") registered_jobs = self.list_jobs({"convert_module": {"condition": "is_in", "value": module_names}}, False) total_job_count = TxJob.query().count() registered_job_count = registered_jobs.count() App.logger.debug("Finished reading from Jobs table") # sanity check since AWS can be slow to update job count reported in table (every 6 hours) if registered_job_count > total_job_count: total_job_count = registered_job_count body = BeautifulSoup('<h1>TX-Manager Dashboard - {0}</h1>' '<h2>Module Attributes</h2><br><table id="status"></table>'.format(datetime.now()), 'html.parser') for item in items: module_name = item.name App.logger.debug(module_name) body.table.append(BeautifulSoup( '<tr id="' + module_name + '"><td class="hdr" colspan="2">' + str(module_name) + '</td></tr>', 'html.parser')) self.get_jobs_counts_for_module(registered_jobs, module_name) # TBD the following code almosts walks the db record replacing next 11 lines # for attr, val in item: # if (attr != 'name') and (len(attr) > 0): # rec += ' <tr><td class="lbl">' + attr.replace("_", " ").title() + ':</td><td>' + "lst(val)" + "</td></tr>\n" # rec += '<tr><td colspan="2"></td></tr>' body.table.append(BeautifulSoup( '<tr id="' + module_name + '-type" class="module-type"><td class="lbl">Type:</td><td>' + str(item.type) + '</td></tr>', 'html.parser')) body.table.append(BeautifulSoup( '<tr id="' + module_name + '-input" class="module-input"><td class="lbl">Input Format:</td><td>' + json.dumps(item.input_format) + '</td></tr>', 'html.parser')) body.table.append(BeautifulSoup( '<tr id="' + module_name + '-output" class="module-output">' + '<td class="lbl">Output Format:</td><td>' + json.dumps(item.output_format) + '</td></tr>', 'html.parser')) body.table.append(BeautifulSoup( '<tr id="' + module_name + '-resource" class="module-resource"><td class="lbl">Resource Types:</td>' '<td>' + json.dumps(item.resource_types) + '</td></tr>', 'html.parser')) body.table.append(BeautifulSoup( '<tr id="' + module_name + '-version" class="module-version"><td class="lbl">Version:</td><td>' + str(item.version) + '</td></tr>', 'html.parser')) if len(item.options) > 0: body.table.append(BeautifulSoup( '<tr id="' + module_name + '-options" class="module-options">' + '<td class="lbl">Options:</td><td>' + json.dumps(item.options) + '</td></tr>', 'html.parser')) if len(item.private_links) > 0: body.table.append(BeautifulSoup( '<tr id="' + module_name + '-private-links" class="module-private-links">' + '<td class="lbl">Private Links:</td><td>' + json.dumps(item.private_links) + '</td></tr>', 'html.parser')) if len(item.public_links) > 0: body.table.append(BeautifulSoup( '<tr id="' + module_name + '-public-links" class="module-public-links">' + '<td class="lbl">Public Links:</td><td>' + json.dumps(item.public_links) + '</td></tr>', 'html.parser')) body.table.append(BeautifulSoup( '<tr id="' + module_name + '-job-success" class="module-public-links">' + '<td class="lbl">Job Successes:</td><td>' + str(self.jobs_success) + '</td></tr>', 'html.parser')) body.table.append(BeautifulSoup( '<tr id="' + module_name + '-job-warning" class="module-public-links">' + '<td class="lbl">Job Warnings:</td><td>' + str(self.jobs_warnings) + '</td></tr>', 'html.parser')) body.table.append(BeautifulSoup( '<tr id="' + module_name + '-job-failure" class="module-public-links">' + '<td class="lbl">Job Failures:</td><td>' + str(self.jobs_failures) + '</td></tr>', 'html.parser')) body.table.append(BeautifulSoup( '<tr id="' + module_name + '-job-total" class="module-public-links">' + '<td class="lbl">Jobs Total:</td><td>' + str(self.jobs_total) + '</td></tr>', 'html.parser')) self.get_jobs_counts(registered_jobs) body.table.append(BeautifulSoup( '<tr id="totals"><td class="hdr" colspan="2">Total Jobs</td></tr>', 'html.parser')) body.table.append(BeautifulSoup( '<tr id="totals-job-success" class="module-public-links"><td class="lbl">Success:</td><td>' + str(self.jobs_success) + '</td></tr>', 'html.parser')) body.table.append(BeautifulSoup( '<tr id="totals-job-warning" class="module-public-links"><td class="lbl">Warnings:</td><td>' + str(self.jobs_warnings) + '</td></tr>', 'html.parser')) body.table.append(BeautifulSoup( '<tr id="totals-job-failure" class="module-public-links"><td class="lbl">Failures:</td><td>' + str(self.jobs_failures) + '</td></tr>', 'html.parser')) body.table.append(BeautifulSoup( '<tr id="totals-job-unregistered" class="module-public-links"><td class="lbl">Unregistered:</td><td>' + str(total_job_count - self.jobs_total) + '</td></tr>', 'html.parser')) body.table.append(BeautifulSoup( '<tr id="totals-job-total" class="module-public-links"><td class="lbl">Total:</td><td>' + str(total_job_count) + '</td></tr>', 'html.parser')) # build job failures table job_failures = self.get_job_failures(registered_jobs, max_failures) body.append(BeautifulSoup('<h2>Failed Jobs</h2>', 'html.parser')) failure_table = BeautifulSoup('<table id="failed" cellpadding="4" border="1" ' + 'style="border-collapse:collapse"></table>', 'html.parser') failure_table.table.append(BeautifulSoup(''' <tr id="header"> <th class="hdr">Time</th> <th class="hdr">Errors</th> <th class="hdr">Repo</th> <th class="hdr">PreConvert</th> <th class="hdr">Converted</th> <th class="hdr">Destination</th>''', 'html.parser')) gogs_url = App.gogs_url if gogs_url is None: gogs_url = 'https://git.door43.org' for i in range(0, len(job_failures)): item = job_failures[i] try: identifier = item.identifier user_name, repo_name, commit_id = identifier.split('/')[:3] source_sub_path = '{0}/{1}'.format(user_name, repo_name) cdn_bucket = item.cdn_bucket destination_url = 'https://{0}/u/{1}/{2}/{3}/build_log.json'.format(cdn_bucket, user_name, repo_name, commit_id) repo_url = gogs_url + "/" + source_sub_path preconverted_url = item.source converted_url = item.output failure_table.table.append(BeautifulSoup( '<tr id="failure-' + str(i) + '" class="module-job-id">' + '<td>' + item.created_at.strftime("%Y-%m-%dT%H:%M:%SZ") + '</td>' + '<td>' + ','.join(item.errors) + '</td>' + '<td><a href="' + repo_url + '">' + source_sub_path + '</a></td>' + '<td><a href="' + preconverted_url + '">' + preconverted_url.rsplit('/', 1)[1] + '</a></td>' + '<td><a href="' + converted_url + '">' + item.job_id + '.zip</a></td>' + '<td><a href="' + destination_url + '">Build Log</a></td>' + '</tr>', 'html.parser')) except Exception as e: pass body.append(failure_table) self.build_language_popularity_tables(body, max_failures) body_html = body.prettify('UTF-8') dashboard['body'] = body_html # save to cdn in case HTTP connection times out try: self.temp_dir = tempfile.mkdtemp(suffix="", prefix="dashboard_") temp_file = os.path.join(self.temp_dir, "index.html") file_utils.write_file(temp_file, body_html) cdn_handler = App.cdn_s3_handler() cdn_handler.upload_file(temp_file, 'dashboard/index.html') except Exception as e: App.logger.debug("Could not save dashboard: " + str(e)) else: App.logger.debug("No modules found.") App.db().close() return dashboard
def replace_text(self, out_dir, file_name, match, replace): file_path = os.path.join(out_dir, file_name) text = read_file(file_path) new_text = text.replace(match, replace) self.assertNotEqual(text, new_text) write_file(file_path, new_text)
def apply_template(self): App.logger.debug('bs4 version: ' + bs4.__version__) sys.setrecursionlimit(3000) App.logger.debug('Recursion limit: ' + str(sys.getrecursionlimit())) language_code = self.rc.resource.language.identifier language_name = self.rc.resource.language.title language_dir = self.rc.resource.language.direction resource_title = self.rc.resource.title self.get_page_navigation() heading = '{0}: {1}'.format(language_name, resource_title) title = '' canonical = '' # soup is the template that we will replace content of for every file soup = BeautifulSoup(self.template_html, 'html.parser') left_sidebar_div = soup.body.find('div', id='left-sidebar') outer_content_div = soup.body.find('div', id='outer-content') right_sidebar_div = soup.body.find('div', id='right-sidebar') # find the outer-content div in the template if not outer_content_div: raise Exception( 'No div tag with id "outer-content" was found in the template') # get the canonical UTL if not canonical: links = soup.head.find_all('link[rel="canonical"]') if len(links) == 1: canonical = links[0]['href'] # loop through the html files for filename in self.files: if filename not in self.already_converted: App.logger.debug('Applying template to {0}.'.format(filename)) # read the downloaded file into a dom abject with codecs.open(filename, 'r', 'utf-8-sig') as f: file_soup = BeautifulSoup(f, 'html.parser') # get the title from the raw html file if not title and file_soup.head and file_soup.head.title: title = file_soup.head.title.text else: title = os.path.basename(filename) # get the language code, if we haven't yet if not language_code: if 'lang' in file_soup.html: language_code = file_soup.html['lang'] else: language_code = 'en' # get the body of the raw html file if not file_soup.body: body = BeautifulSoup('<div>No content</div>', 'html.parser') else: body = BeautifulSoup( ''.join(['%s' % x for x in file_soup.body.contents]), 'html.parser') # insert new HTML into the template outer_content_div.clear() outer_content_div.append(body) soup.html['lang'] = language_code soup.html['dir'] = language_dir soup.head.title.clear() soup.head.title.append(heading + ' - ' + title) # set the page heading heading_span = soup.body.find('span', id='h1') heading_span.clear() heading_span.append(heading) if left_sidebar_div: left_sidebar_html = self.build_left_sidebar(filename) left_sidebar = BeautifulSoup(left_sidebar_html, 'html.parser').nav.extract() left_sidebar_div.clear() left_sidebar_div.append(left_sidebar) if right_sidebar_div: right_sidebar_div.clear() right_sidebar_html = self.build_right_sidebar(filename) if right_sidebar_html: right_sidebar = BeautifulSoup(right_sidebar_html, 'html.parser') if right_sidebar and right_sidebar.nav: right_sidebar_nav = right_sidebar.nav.extract() right_sidebar_div.append(right_sidebar_nav) # render the html as an unicode string html = unicode(soup) # fix the footer message, removing the title of this page in parentheses as it doesn't get filled html = html.replace( '("<a xmlns:dct="http://purl.org/dc/terms/" href="https://live.door43.org/templates/project-page.html" rel="dct:source">{{ HEADING }}</a>") ', '') # update the canonical URL - it is in several different locations html = html.replace( canonical, canonical.replace('/templates/', '/{0}/'.format(language_code))) # Replace HEADING with page title in footer html = html.replace('{{ HEADING }}', title) # write to output directory out_file = os.path.join(self.output_dir, os.path.basename(filename)) App.logger.debug('Writing {0}.'.format(out_file)) write_file(out_file, html.encode('ascii', 'xmlcharrefreplace')) else: # if already templated, need to update navigation bar # read the templated file into a dom abject with codecs.open(filename, 'r', 'utf-8-sig') as f: soup = BeautifulSoup(f, 'html.parser') right_sidebar_div = soup.body.find('div', id='right-sidebar') if right_sidebar_div: right_sidebar_html = self.build_right_sidebar(filename) right_sidebar = BeautifulSoup(right_sidebar_html, 'html.parser').nav.extract() right_sidebar_div.clear() right_sidebar_div.append(right_sidebar) # render the html as an unicode string html = unicode(soup) # write to output directory out_file = os.path.join(self.output_dir, os.path.basename(filename)) App.logger.debug('Updating nav in {0}.'.format(out_file)) write_file(out_file, html.encode('ascii', 'xmlcharrefreplace'))
def run(self): index_json = {'titles': {}, 'chapters': {}, 'book_codes': {}} title_re = re.compile('^# +(.*?) *#*$', flags=re.MULTILINE) headers_re = re.compile('^(#+) +(.+?) *#*$', flags=re.MULTILINE) for idx, project in enumerate(self.rc.projects): term_text = {} section_dirs = sorted( glob(os.path.join(self.source_dir, project.path, '*'))) for section_dir in section_dirs: section = os.path.basename(section_dir) if section not in self.section_titles: continue key = '{0}.html'.format(section) index_json['titles'][key] = self.section_titles[section] index_json['chapters'][key] = {} index_json['book_codes'][key] = section term_files = sorted(glob(os.path.join(section_dir, '*.md'))) term_files_txt = sorted( glob(os.path.join(section_dir, '*.txt'))) # If there are txt files in section folders, convert them to md format if len(term_files_txt): if txt2md(section_dir): return self.run() for term_file in term_files: term = os.path.splitext(os.path.basename(term_file))[0] text = read_file(term_file) if title_re.search(text): title = title_re.search(text).group(1) text = title_re.sub( r'# <a id="{0}"/>\1 #'.format(term), text) # inject the term by the title else: title = os.path.splitext(os.path.basename(term_file))[ 0] # No title found, so using term text = headers_re.sub(r'#\1 \2', text) index_json['chapters'][key][term] = title term_text[term] = text # Sort terms by title and add to markdown markdown = '' titles = index_json['chapters'][key] terms_sorted_by_title = sorted(titles, key=lambda i: titles[i].lower()) for term in terms_sorted_by_title: if markdown: markdown += '<hr>\n\n' markdown += term_text[term] + '\n\n' markdown = '# <a id="tw-section-{0}"/>{1}\n\n'.format( section, self.section_titles[section]) + markdown markdown = self.fix_links(markdown, section) output_file = os.path.join(self.output_dir, '{0}.md'.format(section)) write_file(output_file, markdown) config_file = os.path.join(self.source_dir, project.path, 'config.yaml') if os.path.isfile(config_file): copy(config_file, os.path.join(self.output_dir, 'config.yaml')) output_file = os.path.join(self.output_dir, 'index.json') write_file(output_file, index_json) return True
def run(self): for idx, project in enumerate(self.rc.projects): project_path = os.path.join(self.source_dir, project.path) file_format = '{0}-{1}.usfm' # Case #1: The project path is a file, and thus is one book of the Bible, copy to standard filename if os.path.isfile(project_path): if project.identifier.lower() in BOOK_NUMBERS: filename = file_format.format( BOOK_NUMBERS[project.identifier.lower()], project.identifier.upper()) else: filename = file_format.format( str(idx + 1).zfill(2), project.identifier.upper()) copy(project_path, os.path.join(self.output_dir, filename)) self.books.append(filename) else: # Case #2: Project path is a dir with one or more USFM files, is one or more books of the Bible usfm_files = glob(os.path.join(project_path, '*.usfm')) if len(usfm_files): for usfm_path in usfm_files: book_code = os.path.splitext( os.path.basename(usfm_path))[0].split( '-')[-1].lower() if book_code in BOOK_NUMBERS: filename = file_format.format( BOOK_NUMBERS[book_code], book_code.upper()) else: filename = '{0}.usfm'.format( os.path.splitext( os.path.basename(usfm_path))[0]) output_file_path = os.path.join( self.output_dir, filename) if os.path.isfile(usfm_path) and not os.path.exists( output_file_path): copy(usfm_path, output_file_path) self.books.append(filename) else: # Case #3: Project path is a dir with one or more chapter dirs with chunk & title files chapters = self.rc.chapters(project.identifier) if len(chapters): # Piece the USFM file together title_file = os.path.join(project_path, 'front', 'title.txt') if os.path.isfile(title_file): title = read_file(title_file).strip() else: title = project.title if not title and os.path.isfile( os.path.join(project_path, 'title.txt')): title = read_file( os.path.join(project_path, 'title.txt')) usfm = """ \\id {0} {1} \\ide UTF-8 \\h {2} \\toc1 {2} \\toc2 {2} \\toc3 {2} \\mt {2} """.format(project.identifier.upper(), self.rc.resource.title, title) for chapter in chapters: if chapter in self.ignoreDirectories: continue chapter_num = chapter.lstrip('0') chunks = self.rc.chunks(project.identifier, chapter) if not len(chunks): continue first_chunk = read_file( os.path.join(project_path, chapter, chunks[0])) usfm += "\n\n" if '\\c {0}'.format( chapter_num) not in first_chunk: usfm += "\\c {0}\n".format(chapter_num) if os.path.isfile( os.path.join(project_path, chapter, 'title.txt')): translated_title = read_file( os.path.join(project_path, chapter, 'title.txt')) book_name = re.sub(r' \d+$', '', translated_title).strip() if book_name.lower() != title.lower(): usfm += "\cl {0}\n".format( translated_title) for chunk in chunks: if chunk in self.ignoreFiles: continue chunk_num = os.path.splitext(chunk)[0].lstrip( '0') chunk_content = read_file( os.path.join(project_path, chapter, chunk)) if '\\v {0} '.format( chunk_num) not in chunk_content: chunk_content = '\\v {0} '.format( chunk_num) + chunk_content usfm += chunk_content + "\n" if project.identifier.lower() in BOOK_NUMBERS: filename = file_format.format( BOOK_NUMBERS[project.identifier.lower()], project.identifier.upper()) else: filename = file_format.format( str(idx + 1).zfill(2), project.identifier.upper()) write_file(os.path.join(self.output_dir, filename), usfm) self.books.append(filename) return True
def run(self): index_json = {'titles': {}, 'chapters': {}, 'book_codes': {}} headers_re = re.compile('^(#+) +(.+?) *#*$', flags=re.MULTILINE) for idx, project in enumerate(self.rc.projects): App.logger.debug('TnPreprocessor: processing project: {0}'.format( project.identifier)) if project.identifier in BOOK_NAMES: markdown = '' book = project.identifier.lower() html_file = '{0}-{1}.html'.format(BOOK_NUMBERS[book], book.upper()) index_json['book_codes'][html_file] = book name = BOOK_NAMES[book] index_json['titles'][html_file] = name chapter_dirs = sorted( glob(os.path.join(self.source_dir, project.path, '*'))) markdown += '# <a id="tn-{0}"/> {1}\n\n'.format(book, name) index_json['chapters'][html_file] = [] for move_str in ['front', 'intro']: self.move_to_front(chapter_dirs, move_str) for chapter_dir in chapter_dirs: chapter = os.path.basename(chapter_dir) link = 'tn-chapter-{0}-{1}'.format(book, chapter.zfill(3)) index_json['chapters'][html_file].append(link) markdown += '## <a id="{0}"/> {1} {2}\n\n'.format( link, name, chapter.lstrip('0')) chunk_files = sorted( glob(os.path.join(chapter_dir, '*.md'))) chunk_files_txt = sorted( glob(os.path.join(chapter_dir, '*.txt'))) # If there are txt files in chapter folders, convert them to md format if len(chunk_files_txt): if txt2md(chapter_dir): return self.run() for move_str in ['front', 'intro']: self.move_to_front(chunk_files, move_str) for chunk_idx, chunk_file in enumerate(chunk_files): start_verse = os.path.splitext( os.path.basename(chunk_file))[0].lstrip('0') if chunk_idx < len(chunk_files) - 1: base_file_name = os.path.splitext( os.path.basename(chunk_files[chunk_idx + 1]))[0] if base_file_name.isdigit(): end_verse = str(int(base_file_name) - 1) else: end_verse = start_verse else: chapter_str = chapter.lstrip('0') chapter_verses = BOOK_CHAPTER_VERSES[book] end_verse = chapter_verses[ chapter_str] if chapter_str in chapter_verses else start_verse start_verse_str = str(start_verse).zfill( 3) if start_verse.isdigit() else start_verse link = 'tn-chunk-{0}-{1}-{2}'.format( book, str(chapter).zfill(3), start_verse_str) markdown += '### <a id="{0}"/>{1} {2}:{3}{4}\n\n'. \ format(link, name, chapter.lstrip('0'), start_verse, '-'+end_verse if start_verse != end_verse else '') text = read_file(chunk_file) + '\n\n' text = headers_re.sub( r'\1## \2', text) # This will bump any header down 2 levels markdown += text markdown = self.fix_links(markdown) book_file_name = '{0}-{1}.md'.format(BOOK_NUMBERS[book], book.upper()) self.books.append(book_file_name) file_path = os.path.join(self.output_dir, book_file_name) write_file(file_path, markdown) else: App.logger.debug( 'TnPreprocessor: extra project found: {0}'.format( project.identifier)) # Write out index.json output_file = os.path.join(self.output_dir, 'index.json') write_file(output_file, index_json) return True
def upload_build_log(build_log, file_name, output_dir, s3_results_key, cache_time=0): build_log_file = os.path.join(output_dir, file_name) write_file(build_log_file, build_log) upload_key = '{0}/{1}'.format(s3_results_key, file_name) App.logger.debug('Saving build log to ' + upload_key) App.cdn_s3_handler().upload_file(build_log_file, upload_key, cache_time=cache_time)