Пример #1
0
 def run(self):
     for project in self.rc.projects:
         project_path = os.path.join(self.source_dir, project.path)
         # Copy all the markdown files in the project root directory to the output directory
         for file_path in glob(os.path.join(project_path, '*.md')):
             output_file_path = os.path.join(self.output_dir, os.path.basename(file_path))
             if os.path.isfile(file_path) and not os.path.exists(output_file_path) \
                     and os.path.basename(file_path) not in self.ignoreFiles:
                 copy(file_path, output_file_path)
         if self.is_chunked(project):
             for chapter in self.get_chapters(project_path):
                 markdown = '# {0}\n\n'.format(chapter['title'])
                 for frame in chapter['frames']:
                     markdown += '![Frame {0}](https://cdn.door43.org/obs/jpg/360px/obs-en-{0}.jpg)\n\n' \
                         .format(frame.get('id'))
                     markdown += frame['text'] + '\n\n'
                 markdown += '_{0}_\n'.format(chapter['reference'])
                 output_file = os.path.join(self.output_dir, '{0}.md'.format(chapter.get('id')))
                 write_file(output_file, markdown)
         else:
             for chapter in self.rc.chapters(project.identifier):
                 f = None
                 if os.path.isfile(os.path.join(project_path, chapter, "01.md")):
                     f = os.path.join(project_path, chapter, '01.md')
                 elif os.path.isfile(os.path.join(project_path, chapter, 'intro.md')):
                     f = os.path.join(project_path, chapter, 'intro.md')
                 if f:
                     copy(f, os.path.join(self.output_dir, '{0}.md'.format(chapter)))
     return True
 def update_project_file(build_log, output_dir):
     commit_id = build_log['commit_id']
     user_name = build_log['repo_owner']
     repo_name = build_log['repo_name']
     project_json_key = 'u/{0}/{1}/project.json'.format(user_name, repo_name)
     project_json = App.cdn_s3_handler().get_json(project_json_key)
     project_json['user'] = user_name
     project_json['repo'] = repo_name
     project_json['repo_url'] = 'https://{0}/{1}/{2}'.format(App.gogs_url, user_name, repo_name)
     commit = {
         'id': commit_id,
         'created_at': build_log['created_at'],
         'status': build_log['status'],
         'success': build_log['success'],
         'started_at': None,
         'ended_at': None
     }
     if 'started_at' in build_log:
         commit['started_at'] = build_log['started_at']
     if 'ended_at' in build_log:
         commit['ended_at'] = build_log['ended_at']
     if 'commits' not in project_json:
         project_json['commits'] = []
     commits = []
     for c in project_json['commits']:
         if c['id'] != commit_id:
             commits.append(c)
     commits.append(commit)
     project_json['commits'] = commits
     project_file = os.path.join(output_dir, 'project.json')
     write_file(project_file, project_json)
     App.cdn_s3_handler().upload_file(project_file, project_json_key, cache_time=0)
     return project_json
Пример #3
0
    def run(self):
        for idx, project in enumerate(self.rc.projects):
            self.section_container_id = 1
            toc = self.rc.toc(project.identifier)
            if project.identifier in self.manual_title_map:
                title = self.manual_title_map[project.identifier]
            else:
                title = '{0} Manual'.format(project.identifier.title())
            markdown = '# {0}\n\n'.format(title)
            for section in toc['sections']:
                markdown += self.compile_section(project, section, 2)
            markdown = self.fix_links(markdown)
            output_file = os.path.join(self.output_dir, '{0}-{1}.md'.format(str(idx+1).zfill(2), project.identifier))
            write_file(output_file, markdown)

            # Copy the toc and config.yaml file to the output dir so they can be used to
            # generate the ToC on live.door43.org
            toc_file = os.path.join(self.source_dir, project.path, 'toc.yaml')
            if os.path.isfile(toc_file):
                copy(toc_file, os.path.join(self.output_dir, '{0}-{1}-toc.yaml'.format(str(idx+1).zfill(2),
                                                                                       project.identifier)))
            config_file = os.path.join(self.source_dir, project.path, 'config.yaml')
            if os.path.isfile(config_file):
                copy(config_file, os.path.join(self.output_dir, '{0}-{1}-config.yaml'.format(str(idx+1).zfill(2),
                                                                                             project.identifier)))
        return True
    def convert_obs(self):
        self.log.info('Processing OBS markdown files')

        # find the first directory that has md files.
        files = get_files(directory=self.files_dir, exclude=self.EXCLUDED_FILES)

        current_dir = os.path.dirname(os.path.realpath(__file__))
        with open(os.path.join(current_dir, 'templates', 'template.html')) as template_file:
            html_template = string.Template(template_file.read())

        found_chapters = {}

        for filename in files:
            if filename.endswith('.md'):
                # Convert files that are markdown files
                with codecs.open(filename, 'r', 'utf-8-sig') as md_file:
                    md = md_file.read()
                html = markdown.markdown(md)
                html = html_template.safe_substitute(title=self.source.upper(), content=html)
                base_name = os.path.splitext(os.path.basename(filename))[0]
                found_chapters[base_name] = True
                html_filename = base_name + ".html"
                output_file = os.path.join(self.output_dir, html_filename)
                write_file(output_file, html)
                self.log.info('Converted {0} to {1}.'.format(os.path.basename(filename),
                                                             os.path.basename(html_filename)))
            else:
                # Directly copy over files that are not markdown files
                try:
                    output_file = os.path.join(self.output_dir, os.path.basename(filename))
                    if not os.path.exists(output_file):
                        copyfile(filename, output_file)
                except:
                    pass
        self.log.info('Finished processing OBS Markdown files.')
 def update_project_json(self, commit_id, job, repo_name, repo_owner):
     """
     :param string commit_id:
     :param TxJob job:
     :param string repo_name:
     :param string repo_owner:
     :return:
     """
     project_json_key = 'u/{0}/{1}/project.json'.format(repo_owner, repo_name)
     project_json = App.cdn_s3_handler().get_json(project_json_key)
     project_json['user'] = repo_owner
     project_json['repo'] = repo_name
     project_json['repo_url'] = 'https://git.door43.org/{0}/{1}'.format(repo_owner, repo_name)
     commit = {
         'id': commit_id,
         'created_at': job.created_at,
         'status': job.status,
         'success': job.success,
         'started_at': None,
         'ended_at': None
     }
     if 'commits' not in project_json:
         project_json['commits'] = []
     commits = []
     for c in project_json['commits']:
         if c['id'] != commit_id:
             commits.append(c)
     commits.append(commit)
     project_json['commits'] = commits
     project_file = os.path.join(self.base_temp_dir, 'project.json')
     write_file(project_file, project_json)
     App.cdn_s3_handler().upload_file(project_file, project_json_key)
    def mock_s3_bible_project(self, test_file_name, project_key, multi_part=False):
        converted_proj_dir = os.path.join(self.resources_dir, 'converted_projects')
        test_file_base = test_file_name.split('.zip')[0]
        zip_file = os.path.join(converted_proj_dir, test_file_name)
        out_dir = os.path.join(self.temp_dir, test_file_base)
        unzip(zip_file, out_dir)
        project_dir = os.path.join(out_dir, test_file_base) + os.path.sep
        self.project_files = file_utils.get_files(out_dir)
        self.project_key = project_key
        for filename in self.project_files:
            sub_path = filename.split(project_dir)[1].replace(os.path.sep, '/')  # Make sure it is a bucket path
            App.cdn_s3_handler().upload_file(filename, '{0}/{1}'.format(project_key, sub_path))

            if multi_part:  # copy files from cdn to door43
                base_name = os.path.basename(filename)
                if '.html' in base_name:
                    with codecs.open(filename, 'r', 'utf-8-sig') as f:
                        soup = BeautifulSoup(f, 'html.parser')

                    # add nav tag
                    new_tag = soup.new_tag('div', id='right-sidebar')
                    soup.body.append(new_tag)
                    html = unicode(soup)
                    file_utils.write_file(filename, html.encode('ascii', 'xmlcharrefreplace'))

                App.door43_s3_handler().upload_file(filename, '{0}/{1}'.format(project_key, base_name))

        # u, user, repo = project_key
        App.door43_s3_handler().upload_file(os.path.join(self.resources_dir, 'templates', 'project-page.html'),
                                          'templates/project-page.html')
    def test_callbackMultpleJob_build_error(self):
        # given
        self.results_key = 'u/tx-manager-test-data/en-ulb/22f3d09f7a'
        self.lint_callback_data['s3_results_key'] = self.results_key + '/2'
        self.lint_callback_data['identifier'] = '1234567890/4/2/03-LEV.usfm'
        self.unzip_resource_files("en_ulb.zip", convert_finished=False)
        build_log_path = self.get_source_path('build_log.json')
        build_log = file_utils.load_json_object(build_log_path)
        build_log['errors'].append('convert error')
        build_log['success'] = False
        build_log['status'] = 'errors'
        file_utils.write_file(build_log_path, build_log)
        self.finish_convert(self.source_folder)
        self.expected_error_count = 1
        self.expected_success = False
        self.expected_status = "errors"
        self.expected_log_count = 36
        self.expected_multipart = None
        linter_cb = self.mock_client_linter_callback()

        # when
        results = linter_cb.process_callback()

        # then
        self.validate_results(results, linter_cb)
 def set_deployed_flags(self, project_key, part_count, skip=-1):
     tempf = tempfile.mktemp(prefix="temp", suffix="deployed")
     file_utils.write_file(tempf, ' ')
     for i in range(0, part_count):
         if i != skip:
             key = '{0}/{1}/deployed'.format(project_key, i)
             App.cdn_s3_handler().upload_file(tempf, key, cache_time=0)
     os.remove(tempf)
    def template_converted_files(self, build_log, download_key, output_dir, repo_name, resource_type, s3_commit_key,
                                 source_dir, start, template_file):
        App.cdn_s3_handler().download_dir(download_key + '/', source_dir)
        source_dir = os.path.join(source_dir, download_key.replace('/', os.path.sep))
        elapsed_seconds = int(time.time() - start)
        App.logger.debug("deploy download completed in " + str(elapsed_seconds) + " seconds")
        html_files = sorted(glob(os.path.join(source_dir, '*.html')))
        if len(html_files) < 1:
            content = ''
            if len(build_log['errors']) > 0:
                content += """
                        <div style="text-align:center;margin-bottom:20px">
                            <i class="fa fa-times-circle-o" style="font-size: 250px;font-weight: 300;color: red"></i>
                            <br/>
                            <h2>Critical!</h2>
                            <h3>Here is what went wrong with this build:</h3>
                        </div>
                    """
                content += '<div><ul><li>' + '</li><li>'.join(build_log['errors']) + '</li></ul></div>'
            else:
                content += '<h1 class="conversion-requested">{0}</h1>'.format(build_log['message'])
                content += '<p><i>No content is available to show for {0} yet.</i></p>'.format(repo_name)
            html = """
                    <html lang="en">
                        <head>
                            <title>{0}</title>
                        </head>
                        <body>
                            <div id="content">{1}</div>
                        </body>
                    </html>""".format(repo_name, content)
            repo_index_file = os.path.join(source_dir, 'index.html')
            write_file(repo_index_file, html)

        # merge the source files with the template
        templater = init_template(resource_type, source_dir, output_dir, template_file)
        try:
            self.run_templater(templater)
            success = True
        except Exception as e:
            App.logger.error("Error applying template {0} to resource type {1}:".format(template_file, resource_type))
            App.logger.error(e.message)
            App.logger.error('{0}: {1}'.format(str(e), traceback.format_exc()))
            self.close()
            success = False

        if success:
            # update index of templated files
            index_json_fname = 'index.json'
            index_json = self.get_templater_index(s3_commit_key, index_json_fname)
            App.logger.debug("initial 'index.json': " + json.dumps(index_json)[:256])
            self.update_index_key(index_json, templater, 'titles')
            self.update_index_key(index_json, templater, 'chapters')
            self.update_index_key(index_json, templater, 'book_codes')
            App.logger.debug("final 'index.json': " + json.dumps(index_json)[:256])
            self.write_data_to_file(output_dir, s3_commit_key, index_json_fname, index_json)
        return source_dir, success
 def test_write_file_json(self):
     """
     A call to `write_file` where the content is an object (as opposed to a
     string).
     """
     d = {"one": 1, "two": 2, "child": {"numbers": [3, 4, 5]}}
     _, self.tmp_file = tempfile.mkstemp()
     file_utils.write_file(self.tmp_file, d)
     with open(self.tmp_file, "r") as f:
         self.assertEqual(json.load(f), d)
 def replace_verse_to_end(self, out_dir, file_name, chapter, start_vs, replace):
     book_path = os.path.join(out_dir, file_name)
     book_text = read_file(book_path)
     chapter_marker = '\\c {0:02d}'.format(chapter)
     c_pos = book_text.find(chapter_marker)
     previous_section = book_text[:c_pos]
     next_section = book_text[c_pos:]
     start_pos = next_section.find('\\v {0} '.format(start_vs))
     start_text = next_section[:start_pos]
     new_text = previous_section + start_text + replace
     write_file(book_path, new_text)
 def replace_chapter(self, out_dir, file_name, start_ch, end_ch, replace):
     book_path = os.path.join(out_dir, file_name)
     book_text = read_file(book_path)
     start_chapter_marker = '\\c {0:02d}'.format(start_ch)
     end_chapter_marker = '\\c {0:02d}'.format(end_ch)
     c_start_pos = book_text.find(start_chapter_marker)
     c_end_pos = book_text.find(end_chapter_marker)
     previous_section = book_text[:c_start_pos]
     next_section = book_text[c_end_pos:]
     new_text = previous_section + replace + next_section
     write_file(book_path, new_text)
 def replace_tag(self, out_dir, file_name, tag, replace):
     book_path = os.path.join(out_dir, file_name)
     book_text = read_file(book_path)
     start_marker = '\\{0}'.format(tag)
     end_marker = '\\'
     c_start_pos = book_text.find(start_marker)
     c_end_pos = book_text.find(end_marker, c_start_pos + 1)
     previous_section = book_text[:c_start_pos]
     next_section = book_text[c_end_pos:]
     new_text = previous_section + replace + next_section
     write_file(book_path, new_text)
    def mock_download_file(self, url, target):
        if self.raiseDownloadException:
            raise Exception

        file_name = os.path.basename(url)
        if '.zip' in file_name:
            shutil.copyfile(self.source_zip, target)
        elif file_name == 'build_log.json':
            file_utils.write_file(target, self.build_log_json)
        elif file_name == 'project.json':
            file_utils.write_file(target, self.project_json)
Пример #15
0
    def __init__(self, rc, source_dir, output_dir):
        """
        :param RC rc: 
        :param string source_dir: 
        :param string output_dir: 
        """
        self.rc = rc
        self.source_dir = source_dir  # Local directory
        self.output_dir = output_dir  # Local directory

        # Write out the new manifest file based on the resource container
        write_file(os.path.join(self.output_dir, 'manifest.yaml'), self.rc.as_dict())
    def convert_markdown(self):
        self.log.info('Processing Markdown files')

        # find the first directory that has md files.
        files = get_files(directory=self.files_dir, exclude=self.EXCLUDED_FILES)
        convert_only_list = self.check_for_exclusive_convert()

        current_dir = os.path.dirname(os.path.realpath(__file__))
        with open(os.path.join(current_dir, 'templates', 'template.html')) as template_file:
            html_template = string.Template(template_file.read())

        found_chapters = {}

        for filename in files:
            if filename.endswith('.md'):
                base_name = os.path.basename(filename)
                if convert_only_list and (base_name not in convert_only_list):  # see if this is a file we are to convert
                    continue

                # Convert files that are markdown files
                with codecs.open(filename, 'r', 'utf-8-sig') as md_file:
                    md = md_file.read()
                if self.resource in ['ta']:
                    html = markdown2.markdown(md, extras=['markdown-in-html', 'tables'])
                else:
                    html = markdown.markdown(md)
                html = html_template.safe_substitute(title=self.resource.upper(), content=html)

                # Change headers like <h1><a id="verbs"/>Verbs</h1> to <h1 id="verbs">Verbs</h1>
                soup = BeautifulSoup(html, 'html.parser')
                for tag in soup.findAll('a', {'id': True}):
                    if tag.parent and tag.parent.name in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
                        tag.parent['id'] = tag['id']
                        tag.parent['class'] = tag.parent.get('class', []) + ['section-header']
                        tag.extract()
                html = unicode(soup)

                base_name = os.path.splitext(os.path.basename(filename))[0]
                found_chapters[base_name] = True
                html_filename = base_name + ".html"
                output_file = os.path.join(self.output_dir, html_filename)
                write_file(output_file, html)
                self.log.info('Converted {0} to {1}.'.format(os.path.basename(filename),
                                                             os.path.basename(html_filename)))
            else:
                # Directly copy over files that are not markdown files
                try:
                    output_file = os.path.join(self.output_dir, os.path.basename(filename))
                    if not os.path.exists(output_file):
                        copyfile(filename, output_file)
                except:
                    pass
        self.log.info('Finished processing Markdown files.')
Пример #17
0
 def upload_build_log_to_s3(self, build_log, s3_commit_key, part=''):
     """
     :param dict build_log:
     :param string s3_commit_key:
     :param string part:
     :return:
     """
     build_log_file = os.path.join(self.base_temp_dir, 'build_log.json')
     write_file(build_log_file, build_log)
     upload_key = '{0}/{1}build_log.json'.format(s3_commit_key, part)
     App.logger.debug('Saving build log to ' + upload_key)
     App.cdn_s3_handler().upload_file(build_log_file, upload_key, cache_time=0)
Пример #18
0
 def run(self):
     index_json = {
         'titles': {},
         'chapters': {},
         'book_codes': {}
     }
     title_re = re.compile('^# +(.*?) *#*$', flags=re.MULTILINE)
     headers_re = re.compile('^(#+) +(.+?) *#*$', flags=re.MULTILINE)
     for idx, project in enumerate(self.rc.projects):
         term_text = {}
         section_dirs = sorted(glob(os.path.join(self.source_dir, project.path, '*')))
         for section_dir in section_dirs:
             section = os.path.basename(section_dir)
             if section not in self.section_titles:
                 continue
             key = '{0}.html'.format(section)
             index_json['titles'][key] = self.section_titles[section]
             index_json['chapters'][key] = {}
             index_json['book_codes'][key] = section
             term_files = sorted(glob(os.path.join(section_dir, '*.md')))
             for term_file in term_files:
                 term = os.path.splitext(os.path.basename(term_file))[0]
                 text = read_file(term_file)
                 if title_re.search(text):
                     title = title_re.search(text).group(1)
                     text = title_re.sub(r'# <a id="{0}"/>\1 #'.format(term), text)  # inject the term by the title
                 else:
                     title = os.path.splitext(os.path.basename(term_file))[0]  # No title found, so using term
                 text = headers_re.sub(r'#\1 \2', text)
                 index_json['chapters'][key][term] = title
                 term_text[term] = text
             # Sort terms by title and add to markdown
             markdown = ''
             titles = index_json['chapters'][key]
             terms_sorted_by_title = sorted(titles, key=lambda i: titles[i].lower())
             for term in terms_sorted_by_title:
                 if markdown:
                     markdown += '<hr>\n\n'
                 markdown += term_text[term] + '\n\n'
             markdown = '# <a id="tw-section-{0}"/>{1}\n\n'.format(section, self.section_titles[section]) + markdown
             markdown = self.fix_links(markdown, section)
             output_file = os.path.join(self.output_dir, '{0}.md'.format(section))
             write_file(output_file, markdown)
             config_file = os.path.join(self.source_dir, project.path, 'config.yaml')
             if os.path.isfile(config_file):
                 copy(config_file, os.path.join(self.output_dir, 'config.yaml'))
         output_file = os.path.join(self.output_dir, 'index.json')
         write_file(output_file, index_json)
     return True
Пример #19
0
 def run(self):
     index_json = {
         'titles': {},
         'chapters': {},
         'book_codes': {}
     }
     headers_re = re.compile('^(#+) +(.+?) *#*$', flags=re.MULTILINE)
     for idx, project in enumerate(self.rc.projects):
         if project.identifier in BOOK_NAMES:
             markdown = ''
             book = project.identifier.lower()
             html_file = '{0}-{1}.html'.format(BOOK_NUMBERS[book], book.upper())
             index_json['book_codes'][html_file] = book
             name = BOOK_NAMES[book]
             index_json['titles'][html_file] = name
             chapter_dirs = sorted(glob(os.path.join(self.source_dir, project.path, '*')))
             markdown += '# <a id="tq-{0}"/> {1}\n\n'.format(book, name)
             index_json['chapters'][html_file] = []
             for chapter_dir in chapter_dirs:
                 chapter = os.path.basename(chapter_dir)
                 link = 'tq-chapter-{0}-{1}'.format(book, chapter.zfill(3))
                 index_json['chapters'][html_file].append(link)
                 markdown += '## <a id="{0}"/> {1} {2}\n\n'.format(link, name, chapter.lstrip('0'))
                 chunk_files = sorted(glob(os.path.join(chapter_dir, '*.md')))
                 for chunk_idx, chunk_file in enumerate(chunk_files):
                     start_verse = os.path.splitext(os.path.basename(chunk_file))[0].lstrip('0')
                     if chunk_idx < len(chunk_files)-1:
                         end_verse = str(int(os.path.splitext(os.path.basename(chunk_files[chunk_idx+1]))[0])-1)
                     else:
                         end_verse = BOOK_CHAPTER_VERSES[book][chapter.lstrip('0')]
                     link = 'tq-chunk-{0}-{1}-{2}'.format(book, str(chapter).zfill(3), str(start_verse).zfill(3))
                     markdown += '### <a id="{0}"/>{1} {2}:{3}{4}\n\n'.\
                         format(link, name, chapter.lstrip('0'), start_verse,
                                '-'+end_verse if start_verse != end_verse else '')
                     text = read_file(chunk_file) + '\n\n'
                     text = headers_re.sub(r'\1### \2', text)  # This will bump any header down 3 levels
                     markdown += text
             file_path = os.path.join(self.output_dir, '{0}-{1}.md'.format(BOOK_NUMBERS[book], book.upper()))
             write_file(file_path, markdown)
         else:
             App.logger.debug('TqPreprocessor: extra project found: {0}'.format(project.identifier))
     # Write out index.json
     output_file = os.path.join(self.output_dir, 'index.json')
     write_file(output_file, index_json)
     return True
Пример #20
0
    def run(self):
        for idx, project in enumerate(self.rc.projects):
            project_path = os.path.join(self.source_dir, project.path)

            if os.path.isfile(project_path):
                # Case #1: Project path is a file, then we copy the file over to the output dir
                if project.identifier.lower() in BOOK_NUMBERS:
                    filename = '{0}-{1}.{2}'.format(BOOK_NUMBERS[project.identifier.lower()],
                                                    project.identifier.upper(), self.rc.resource.file_ext)
                else:
                    filename = '{0}-{1}.{2}'.format(str(idx + 1).zfill(2), project.identifier,
                                                    self.rc.resource.file_ext)
                copy(project_path, os.path.join(self.output_dir, filename))
            else:
                # Case #2: It's a directory of files, so we copy them over to the output directory
                files = glob(os.path.join(project_path, '*.{0}'.format(self.rc.resource.file_ext)))
                if len(files):
                    for file_path in files:
                        output_file_path = os.path.join(self.output_dir, os.path.basename(file_path))
                        if os.path.isfile(file_path) and not os.path.exists(output_file_path) \
                                and os.path.basename(file_path) not in self.ignoreFiles:
                            copy(file_path, output_file_path)
                else:
                    # Case #3: The project path is multiple chapters, so we piece them together
                    chapters = self.rc.chapters(project.identifier)
                    App.logger.debug("Merging chapters in '{0}'".format(project.identifier))
                    if len(chapters):
                        text = ''
                        for chapter in chapters:
                            text = self.mark_chapter(project.identifier, chapter, text)
                            for chunk in self.rc.chunks(project.identifier, chapter):
                                text = self.mark_chunk(project.identifier, chapter, chunk, text)
                                text += read_file(os.path.join(project_path, chapter, chunk))+"\n\n"
                        if project.identifier.lower() in BOOK_NUMBERS:
                            filename = '{0}-{1}.{2}'.format(BOOK_NUMBERS[project.identifier.lower()],
                                                            project.identifier.upper(), self.rc.resource.file_ext)
                        else:
                            filename = '{0}-{1}.{2}'.format(str(idx+1).zfill(2), project.identifier,
                                                            self.rc.resource.file_ext)
                        write_file(os.path.join(self.output_dir, filename), text)
        return True
Пример #21
0
    def test_lint_broken_links(self, mock_invoke_markdown_linter):
        # given
        mock_invoke_markdown_linter.return_value = {  # Don't care about markdown linting here, just specific tw linting
            '/tmp/tmp_lint_EYZ5zV/en_tn/2th/front/intro.md':
                [
                    {
                        'errorContext': 'dummy error message',
                        'lineNumber': 42,
                        'ruleDescription': 'dummy rule'
                    }
                ]
        }
        expected_warnings = 64 + 1  # 64 missing books + 1 markdown warning
        zip_file = os.path.join(self.resources_dir, 'tn_linter', 'en_tn.zip')
        out_dir = self.unzip_resource(zip_file)

        # remove everything past genesis
        for dir in BOOK_NUMBERS:
            book = '{0}-{1}'.format(BOOK_NUMBERS[dir], dir.upper())
            link = self.get_link_for_book(book)
            book_path = os.path.join(out_dir, 'en_tn', link)
            if os.path.exists(book_path):
                if book > "02":
                    file_utils.remove_tree(book_path)

        # put a verse in exo so that we can test that there is some content there
        file_path = os.path.join(out_dir, 'en_tn/exo/01/05.md')
        file_utils.write_file(file_path, 'dummy')

        # create chapter in lev with no md files so that we can test that there is no content there
        file_path = os.path.join(os.path.join(out_dir, 'en_tn/lev/01/readme.txt'))
        file_utils.write_file(file_path, 'dummy')

        new_zip = self.create_new_zip(out_dir)
        linter = TnLinter(source_file=new_zip, commit_data=self.commit_data)

        # when
        linter.run()

        # then
        self.verify_results_warnings_count(expected_warnings, linter)
    def test_callbackSimpleJob_build_error(self):
        # given
        self.unzip_resource_files("id_mat_ulb.zip",convert_finished=False)
        build_log_path = self.get_source_path('build_log.json')
        build_log = file_utils.load_json_object(build_log_path)
        build_log['errors'].append('convert error')
        build_log['success'] = False
        build_log['status'] = 'errors'
        file_utils.write_file(build_log_path, build_log)
        self.finish_convert(self.source_folder)
        self.expected_log_count = 9
        self.expected_error_count = 1
        self.expected_success = False
        self.expected_status = "errors"
        linter_cb = self.mock_client_linter_callback()

        # when
        results = linter_cb.process_callback()

        # then
        self.validate_results_and_log(results, linter_cb, self.expected_success, self.expected_status)
 def mock_s3_tn_project(self, part):
     zip_file = os.path.join(self.resources_dir, 'converted_projects', 'en_tn_converted.zip')
     out_dir = os.path.join(self.temp_dir, 'en_tn_converted')
     unzip(zip_file, out_dir)
     src_dir = os.path.join(out_dir, 'en_tn_converted')
     self.project_files = [f for f in os.listdir(src_dir) if os.path.isfile(os.path.join(src_dir, f))]
     self.project_key = 'u/door43/en_tn/12345678'
     build_log = file_utils.load_json_object(os.path.join(src_dir, 'build_log.json'))
     build_log['part'] = part
     file_utils.write_file(os.path.join(src_dir, 'build_log.json'), build_log)
     App.cdn_s3_handler().upload_file(os.path.join(src_dir, 'build_log.json'),
                                      '{0}/{1}/build_log.json'.format(self.project_key, part))
     App.cdn_s3_handler().upload_file(os.path.join(src_dir, 'index.json'),
                                      '{0}/{1}/index.json'.format(self.project_key, part))
     App.cdn_s3_handler().upload_file(os.path.join(src_dir, 'build_log.json'),
                                      '{0}/{1}/finished'.format(self.project_key, part))
     App.cdn_s3_handler().upload_file(os.path.join(src_dir, '01-GEN.html'),
                                      '{0}/{1}/01-GEN.html'.format(self.project_key, part))
     App.cdn_s3_handler().upload_file(os.path.join(src_dir, 'project.json'),
                                      'u/door43/en_tq/project.json')
     App.door43_s3_handler().upload_file(os.path.join(self.resources_dir, 'templates', 'project-page.html'),
                                         'templates/project-page.html')
    def test_callbackMultpleJob_first_merged(self):
        # given
        self.results_key = 'u/tx-manager-test-data/en-ulb/22f3d09f7a'
        self.unzip_resource_files("en_ulb.zip", convert_finished=True)
        self.lint_callback_data['s3_results_key'] = self.results_key + '/0'
        self.lint_callback_data['identifier'] = '1234567890/4/0/01-GEN.usfm'

        build_log_path = self.get_source_path('build_log.json')
        build_log = file_utils.load_json_object(build_log_path)
        lint_log_path = self.get_source_path(file_name='lint_log.json')
        lint_log = file_utils.load_json_object(lint_log_path)
        build_log['log'] += lint_log['log']
        merged_log_path = self.get_source_path(file_name='merged.json')
        file_utils.write_file(merged_log_path, build_log)

        self.expected_log_count = 36
        self.expected_multipart = True
        linter_cb = self.mock_client_linter_callback()

        # when
        results = linter_cb.process_callback()

        # then
        self.validate_results(results, linter_cb)
Пример #25
0
 def prepend_text(self, out_dir, file_name, prefix):
     file_path = os.path.join(out_dir, file_name)
     text = read_file(file_path)
     new_text = prefix + text
     write_file(file_path, new_text)
Пример #26
0
 def save_data_to_s3(self, key, data):
     file_name = key.split('/')[-1:]
     output_file = tempfile.mktemp(suffix="_" + file_name[0],
                                   dir=self.temp_dir)
     file_utils.write_file(output_file, data)
     self.mock_cdn_upload_file(output_file, key)
 def save_data_to_s3(self, key, data):
     file_name = key.split('/')[-1:]
     output_file = tempfile.mktemp(suffix="_" + file_name[0], dir=self.temp_dir)
     file_utils.write_file(output_file, data)
     self.mock_cdn_upload_file(output_file, key)
Пример #28
0
    def apply_template(self):
        language_code = self.rc.resource.language.identifier
        language_name = self.rc.resource.language.title
        language_dir = self.rc.resource.language.direction
        resource_title = self.rc.resource.title

        self.get_page_navigation()

        heading = '{0}: {1}'.format(language_name, resource_title)
        title = ''
        canonical = ''

        # soup is the template that we will replace content of for every file
        soup = BeautifulSoup(self.template_html, 'html.parser')
        left_sidebar_div = soup.body.find('div', id='left-sidebar')
        outer_content_div = soup.body.find('div', id='outer-content')
        right_sidebar_div = soup.body.find('div', id='right-sidebar')

        # find the outer-content div in the template
        if not outer_content_div:
            raise Exception('No div tag with id "outer-content" was found in the template')

        # get the canonical UTL
        if not canonical:
            links = soup.head.find_all('link[rel="canonical"]')
            if len(links) == 1:
                canonical = links[0]['href']

        # loop through the html files
        for filename in self.files:
            if filename not in self.already_converted:
                App.logger.debug('Applying template to {0}.'.format(filename))

                # read the downloaded file into a dom abject
                with codecs.open(filename, 'r', 'utf-8-sig') as f:
                    file_soup = BeautifulSoup(f, 'html.parser')

                # get the title from the raw html file
                if not title and file_soup.head and file_soup.head.title:
                    title = file_soup.head.title.text
                else:
                    title = os.path.basename(filename)

                # get the language code, if we haven't yet
                if not language_code:
                    if 'lang' in file_soup.html:
                        language_code = file_soup.html['lang']
                    else:
                        language_code = 'en'

                # get the body of the raw html file
                if not file_soup.body:
                    body = BeautifulSoup('<div>No content</div>', 'html.parser')
                else:
                    body = BeautifulSoup(''.join(['%s' % x for x in file_soup.body.contents]), 'html.parser')

                # insert new HTML into the template
                outer_content_div.clear()
                outer_content_div.append(body)
                soup.html['lang'] = language_code
                soup.html['dir'] = language_dir

                soup.head.title.clear()
                soup.head.title.append(heading+' - '+title)

                # set the page heading
                heading_span = soup.body.find('span', id='h1')
                heading_span.clear()
                heading_span.append(heading)

                if left_sidebar_div:
                    left_sidebar_html = self.build_left_sidebar(filename)
                    left_sidebar = BeautifulSoup(left_sidebar_html, 'html.parser').nav.extract()
                    left_sidebar_div.clear()
                    left_sidebar_div.append(left_sidebar)

                if right_sidebar_div:
                    right_sidebar_div.clear()
                    right_sidebar_html = self.build_right_sidebar(filename)
                    if right_sidebar_html:
                        right_sidebar = BeautifulSoup(right_sidebar_html, 'html.parser')
                        if right_sidebar and right_sidebar.nav:
                            right_sidebar_nav = right_sidebar.nav.extract()
                            right_sidebar_div.append(right_sidebar_nav)

                # render the html as an unicode string
                html = unicode(soup)

                # fix the footer message, removing the title of this page in parentheses as it doesn't get filled
                html = html.replace(
                    '("<a xmlns:dct="http://purl.org/dc/terms/" href="https://live.door43.org/templates/project-page.html" rel="dct:source">{{ HEADING }}</a>") ',
                    '')
                # update the canonical URL - it is in several different locations
                html = html.replace(canonical, canonical.replace('/templates/', '/{0}/'.format(language_code)))

                # Replace HEADING with page title in footer
                html = html.replace('{{ HEADING }}', title)

                # write to output directory
                out_file = os.path.join(self.output_dir, os.path.basename(filename))
                App.logger.debug('Writing {0}.'.format(out_file))
                write_file(out_file, html.encode('ascii', 'xmlcharrefreplace'))

            else:  # if already templated, need to update navigation bar
                # read the templated file into a dom abject
                with codecs.open(filename, 'r', 'utf-8-sig') as f:
                    soup = BeautifulSoup(f, 'html.parser')

                right_sidebar_div = soup.body.find('div', id='right-sidebar')
                if right_sidebar_div:
                    right_sidebar_html = self.build_right_sidebar(filename)
                    right_sidebar = BeautifulSoup(right_sidebar_html, 'html.parser').nav.extract()
                    right_sidebar_div.clear()
                    right_sidebar_div.append(right_sidebar)

                    # render the html as an unicode string
                    html = unicode(soup)

                    # write to output directory
                    out_file = os.path.join(self.output_dir, os.path.basename(filename))
                    App.logger.debug('Updating nav in {0}.'.format(out_file))
                    write_file(out_file, html.encode('ascii', 'xmlcharrefreplace'))
Пример #29
0
 def append_text(self, out_dir, file_name, append):
     book_path = os.path.join(out_dir, file_name)
     book_text = read_file(book_path)
     new_text = book_text + append
     write_file(book_path, new_text)
Пример #30
0
 def cdn_upload_contents(self, contents, key):
     file_name = os.path.join(self.temp_dir, 'contents.json')
     write_file(file_name, contents)
     App.logger.debug('Writing file to ' + key)
     App.cdn_s3_handler().upload_file(file_name, key, cache_time=0)
Пример #31
0
 def test_write_file(self):
     _, self.tmp_file = tempfile.mkstemp()
     file_utils.write_file(self.tmp_file, "hello world")
     with open(self.tmp_file, "r") as f:
         self.assertEqual(f.read(), "hello world")
Пример #32
0
    def generate_dashboard(self, max_failures=MAX_FAILURES):
        """
        Generate page with metrics indicating configuration of tx-manager.

        :param int max_failures:
        """
        App.logger.debug("Start: generateDashboard")

        dashboard = {
            'title': 'tX-Manager Dashboard',
            'body': 'No modules found'
        }

        items = sorted(TxModule().query(), key=lambda k: k.name)
        if items and len(items):
            module_names = []
            for item in items:
                module_names.append(item.name)

            App.logger.debug("Found: " + str(len(items)) + " item[s] in tx-module")
            App.logger.debug("Reading from Jobs table")

            registered_jobs = self.list_jobs({"convert_module": {"condition": "is_in", "value": module_names}}, False)
            total_job_count = TxJob.query().count()
            registered_job_count = registered_jobs.count()

            App.logger.debug("Finished reading from Jobs table")

            # sanity check since AWS can be slow to update job count reported in table (every 6 hours)
            if registered_job_count > total_job_count:
                total_job_count = registered_job_count

            body = BeautifulSoup('<h1>TX-Manager Dashboard - {0}</h1>'
                                 '<h2>Module Attributes</h2><br><table id="status"></table>'.format(datetime.now()),
                                 'html.parser')
            for item in items:
                module_name = item.name
                App.logger.debug(module_name)
                body.table.append(BeautifulSoup(
                    '<tr id="' + module_name + '"><td class="hdr" colspan="2">' + str(module_name) + '</td></tr>',
                    'html.parser'))

                self.get_jobs_counts_for_module(registered_jobs, module_name)

                # TBD the following code almosts walks the db record replacing next 11 lines
                # for attr, val in item:
                #    if (attr != 'name') and (len(attr) > 0):
                #       rec += '            <tr><td class="lbl">' + attr.replace("_", " ").title() + ':</td><td>' + "lst(val)" + "</td></tr>\n"
                # rec += '<tr><td colspan="2"></td></tr>'

                body.table.append(BeautifulSoup(
                    '<tr id="' + module_name + '-type" class="module-type"><td class="lbl">Type:</td><td>' +
                    str(item.type) + '</td></tr>',
                    'html.parser'))
                body.table.append(BeautifulSoup(
                    '<tr id="' + module_name + '-input" class="module-input"><td class="lbl">Input Format:</td><td>' +
                    json.dumps(item.input_format) + '</td></tr>',
                    'html.parser'))
                body.table.append(BeautifulSoup(
                    '<tr id="' + module_name + '-output" class="module-output">' +
                    '<td class="lbl">Output Format:</td><td>' +
                    json.dumps(item.output_format) + '</td></tr>',
                    'html.parser'))
                body.table.append(BeautifulSoup(
                    '<tr id="' + module_name + '-resource" class="module-resource"><td class="lbl">Resource Types:</td>'
                    '<td>' + json.dumps(item.resource_types) + '</td></tr>',
                    'html.parser'))
                body.table.append(BeautifulSoup(
                    '<tr id="' + module_name + '-version" class="module-version"><td class="lbl">Version:</td><td>' +
                    str(item.version) + '</td></tr>',
                    'html.parser'))

                if len(item.options) > 0:
                    body.table.append(BeautifulSoup(
                        '<tr id="' + module_name + '-options" class="module-options">' +
                        '<td class="lbl">Options:</td><td>' +
                        json.dumps(item.options) + '</td></tr>',
                        'html.parser'))

                if len(item.private_links) > 0:
                    body.table.append(BeautifulSoup(
                        '<tr id="' + module_name + '-private-links" class="module-private-links">' +
                        '<td class="lbl">Private Links:</td><td>' +
                        json.dumps(item.private_links) + '</td></tr>',
                        'html.parser'))

                if len(item.public_links) > 0:
                    body.table.append(BeautifulSoup(
                        '<tr id="' + module_name + '-public-links" class="module-public-links">' +
                        '<td class="lbl">Public Links:</td><td>' +
                        json.dumps(item.public_links) + '</td></tr>',
                        'html.parser'))

                body.table.append(BeautifulSoup(
                    '<tr id="' + module_name + '-job-success" class="module-public-links">' +
                    '<td class="lbl">Job Successes:</td><td>' +
                    str(self.jobs_success) + '</td></tr>',
                    'html.parser'))
                body.table.append(BeautifulSoup(
                    '<tr id="' + module_name + '-job-warning" class="module-public-links">' +
                    '<td class="lbl">Job Warnings:</td><td>' +
                    str(self.jobs_warnings) + '</td></tr>',
                    'html.parser'))
                body.table.append(BeautifulSoup(
                    '<tr id="' + module_name + '-job-failure" class="module-public-links">' +
                    '<td class="lbl">Job Failures:</td><td>' +
                    str(self.jobs_failures) + '</td></tr>',
                    'html.parser'))
                body.table.append(BeautifulSoup(
                    '<tr id="' + module_name + '-job-total" class="module-public-links">' +
                    '<td class="lbl">Jobs Total:</td><td>' +
                    str(self.jobs_total) + '</td></tr>',
                    'html.parser'))

            self.get_jobs_counts(registered_jobs)
            body.table.append(BeautifulSoup(
                '<tr id="totals"><td class="hdr" colspan="2">Total Jobs</td></tr>',
                'html.parser'))
            body.table.append(BeautifulSoup(
                '<tr id="totals-job-success" class="module-public-links"><td class="lbl">Success:</td><td>' +
                str(self.jobs_success) + '</td></tr>',
                'html.parser'))
            body.table.append(BeautifulSoup(
                '<tr id="totals-job-warning" class="module-public-links"><td class="lbl">Warnings:</td><td>' +
                str(self.jobs_warnings) + '</td></tr>',
                'html.parser'))
            body.table.append(BeautifulSoup(
                '<tr id="totals-job-failure" class="module-public-links"><td class="lbl">Failures:</td><td>' +
                str(self.jobs_failures) + '</td></tr>',
                'html.parser'))
            body.table.append(BeautifulSoup(
                '<tr id="totals-job-unregistered" class="module-public-links"><td class="lbl">Unregistered:</td><td>' +
                str(total_job_count - self.jobs_total) + '</td></tr>',
                'html.parser'))
            body.table.append(BeautifulSoup(
                '<tr id="totals-job-total" class="module-public-links"><td class="lbl">Total:</td><td>' +
                str(total_job_count) + '</td></tr>',
                'html.parser'))

            # build job failures table
            job_failures = self.get_job_failures(registered_jobs, max_failures)
            body.append(BeautifulSoup('<h2>Failed Jobs</h2>', 'html.parser'))
            failure_table = BeautifulSoup('<table id="failed" cellpadding="4" border="1" ' +
                                          'style="border-collapse:collapse"></table>', 'html.parser')
            failure_table.table.append(BeautifulSoup('''
                <tr id="header">
                <th class="hdr">Time</th>
                <th class="hdr">Errors</th>
                <th class="hdr">Repo</th>
                <th class="hdr">PreConvert</th>
                <th class="hdr">Converted</th>
                <th class="hdr">Destination</th>''', 'html.parser'))

            gogs_url = App.gogs_url
            if gogs_url is None:
                gogs_url = 'https://git.door43.org'

            for i in range(0, len(job_failures)):
                item = job_failures[i]

                try:
                    identifier = item.identifier
                    user_name, repo_name, commit_id = identifier.split('/')[:3]
                    source_sub_path = '{0}/{1}'.format(user_name, repo_name)
                    cdn_bucket = item.cdn_bucket
                    destination_url = 'https://{0}/u/{1}/{2}/{3}/build_log.json'.format(cdn_bucket, user_name,
                                                                                        repo_name, commit_id)
                    repo_url = gogs_url + "/" + source_sub_path
                    preconverted_url = item.source
                    converted_url = item.output
                    failure_table.table.append(BeautifulSoup(
                        '<tr id="failure-' + str(i) + '" class="module-job-id">'
                        + '<td>' + item.created_at.strftime("%Y-%m-%dT%H:%M:%SZ") + '</td>'
                        + '<td>' + ','.join(item.errors) + '</td>'
                        + '<td><a href="' + repo_url + '">' + source_sub_path + '</a></td>'
                        + '<td><a href="' + preconverted_url + '">' + preconverted_url.rsplit('/', 1)[1] + '</a></td>'
                        + '<td><a href="' + converted_url + '">' + item.job_id + '.zip</a></td>'
                        + '<td><a href="' + destination_url + '">Build Log</a></td>'
                        + '</tr>',
                        'html.parser'))
                except Exception as e:
                    pass

            body.append(failure_table)
            self.build_language_popularity_tables(body, max_failures)
            body_html = body.prettify('UTF-8')
            dashboard['body'] = body_html

            # save to cdn in case HTTP connection times out
            try:
                self.temp_dir = tempfile.mkdtemp(suffix="", prefix="dashboard_")
                temp_file = os.path.join(self.temp_dir, "index.html")
                file_utils.write_file(temp_file, body_html)
                cdn_handler = App.cdn_s3_handler()
                cdn_handler.upload_file(temp_file, 'dashboard/index.html')
            except Exception as e:
                App.logger.debug("Could not save dashboard: " + str(e))
        else:
            App.logger.debug("No modules found.")

        App.db().close()
        return dashboard
Пример #33
0
 def replace_text(self, out_dir, file_name, match, replace):
     file_path = os.path.join(out_dir, file_name)
     text = read_file(file_path)
     new_text = text.replace(match, replace)
     self.assertNotEqual(text, new_text)
     write_file(file_path, new_text)
Пример #34
0
 def replace_text(self, out_dir, file_name, match, replace):
     file_path = os.path.join(out_dir, file_name)
     text = read_file(file_path)
     new_text = text.replace(match, replace)
     self.assertNotEqual(text, new_text)
     write_file(file_path, new_text)
Пример #35
0
    def apply_template(self):

        App.logger.debug('bs4 version: ' + bs4.__version__)
        sys.setrecursionlimit(3000)
        App.logger.debug('Recursion limit: ' + str(sys.getrecursionlimit()))

        language_code = self.rc.resource.language.identifier
        language_name = self.rc.resource.language.title
        language_dir = self.rc.resource.language.direction
        resource_title = self.rc.resource.title

        self.get_page_navigation()

        heading = '{0}: {1}'.format(language_name, resource_title)
        title = ''
        canonical = ''

        # soup is the template that we will replace content of for every file
        soup = BeautifulSoup(self.template_html, 'html.parser')
        left_sidebar_div = soup.body.find('div', id='left-sidebar')
        outer_content_div = soup.body.find('div', id='outer-content')
        right_sidebar_div = soup.body.find('div', id='right-sidebar')

        # find the outer-content div in the template
        if not outer_content_div:
            raise Exception(
                'No div tag with id "outer-content" was found in the template')

        # get the canonical UTL
        if not canonical:
            links = soup.head.find_all('link[rel="canonical"]')
            if len(links) == 1:
                canonical = links[0]['href']

        # loop through the html files
        for filename in self.files:
            if filename not in self.already_converted:
                App.logger.debug('Applying template to {0}.'.format(filename))

                # read the downloaded file into a dom abject
                with codecs.open(filename, 'r', 'utf-8-sig') as f:
                    file_soup = BeautifulSoup(f, 'html.parser')

                # get the title from the raw html file
                if not title and file_soup.head and file_soup.head.title:
                    title = file_soup.head.title.text
                else:
                    title = os.path.basename(filename)

                # get the language code, if we haven't yet
                if not language_code:
                    if 'lang' in file_soup.html:
                        language_code = file_soup.html['lang']
                    else:
                        language_code = 'en'

                # get the body of the raw html file
                if not file_soup.body:
                    body = BeautifulSoup('<div>No content</div>',
                                         'html.parser')
                else:
                    body = BeautifulSoup(
                        ''.join(['%s' % x for x in file_soup.body.contents]),
                        'html.parser')

                # insert new HTML into the template
                outer_content_div.clear()
                outer_content_div.append(body)
                soup.html['lang'] = language_code
                soup.html['dir'] = language_dir

                soup.head.title.clear()
                soup.head.title.append(heading + ' - ' + title)

                # set the page heading
                heading_span = soup.body.find('span', id='h1')
                heading_span.clear()
                heading_span.append(heading)

                if left_sidebar_div:
                    left_sidebar_html = self.build_left_sidebar(filename)
                    left_sidebar = BeautifulSoup(left_sidebar_html,
                                                 'html.parser').nav.extract()
                    left_sidebar_div.clear()
                    left_sidebar_div.append(left_sidebar)

                if right_sidebar_div:
                    right_sidebar_div.clear()
                    right_sidebar_html = self.build_right_sidebar(filename)
                    if right_sidebar_html:
                        right_sidebar = BeautifulSoup(right_sidebar_html,
                                                      'html.parser')
                        if right_sidebar and right_sidebar.nav:
                            right_sidebar_nav = right_sidebar.nav.extract()
                            right_sidebar_div.append(right_sidebar_nav)

                # render the html as an unicode string
                html = unicode(soup)

                # fix the footer message, removing the title of this page in parentheses as it doesn't get filled
                html = html.replace(
                    '("<a xmlns:dct="http://purl.org/dc/terms/" href="https://live.door43.org/templates/project-page.html" rel="dct:source">{{ HEADING }}</a>") ',
                    '')
                # update the canonical URL - it is in several different locations
                html = html.replace(
                    canonical,
                    canonical.replace('/templates/',
                                      '/{0}/'.format(language_code)))

                # Replace HEADING with page title in footer
                html = html.replace('{{ HEADING }}', title)

                # write to output directory
                out_file = os.path.join(self.output_dir,
                                        os.path.basename(filename))
                App.logger.debug('Writing {0}.'.format(out_file))
                write_file(out_file, html.encode('ascii', 'xmlcharrefreplace'))

            else:  # if already templated, need to update navigation bar
                # read the templated file into a dom abject
                with codecs.open(filename, 'r', 'utf-8-sig') as f:
                    soup = BeautifulSoup(f, 'html.parser')

                right_sidebar_div = soup.body.find('div', id='right-sidebar')
                if right_sidebar_div:
                    right_sidebar_html = self.build_right_sidebar(filename)
                    right_sidebar = BeautifulSoup(right_sidebar_html,
                                                  'html.parser').nav.extract()
                    right_sidebar_div.clear()
                    right_sidebar_div.append(right_sidebar)

                    # render the html as an unicode string
                    html = unicode(soup)

                    # write to output directory
                    out_file = os.path.join(self.output_dir,
                                            os.path.basename(filename))
                    App.logger.debug('Updating nav in {0}.'.format(out_file))
                    write_file(out_file,
                               html.encode('ascii', 'xmlcharrefreplace'))
Пример #36
0
    def run(self):
        index_json = {'titles': {}, 'chapters': {}, 'book_codes': {}}
        title_re = re.compile('^# +(.*?) *#*$', flags=re.MULTILINE)
        headers_re = re.compile('^(#+) +(.+?) *#*$', flags=re.MULTILINE)
        for idx, project in enumerate(self.rc.projects):
            term_text = {}
            section_dirs = sorted(
                glob(os.path.join(self.source_dir, project.path, '*')))
            for section_dir in section_dirs:
                section = os.path.basename(section_dir)
                if section not in self.section_titles:
                    continue
                key = '{0}.html'.format(section)
                index_json['titles'][key] = self.section_titles[section]
                index_json['chapters'][key] = {}
                index_json['book_codes'][key] = section
                term_files = sorted(glob(os.path.join(section_dir, '*.md')))

                term_files_txt = sorted(
                    glob(os.path.join(section_dir, '*.txt')))
                # If there are txt files in section folders, convert them to md format
                if len(term_files_txt):
                    if txt2md(section_dir):
                        return self.run()

                for term_file in term_files:
                    term = os.path.splitext(os.path.basename(term_file))[0]
                    text = read_file(term_file)
                    if title_re.search(text):
                        title = title_re.search(text).group(1)
                        text = title_re.sub(
                            r'# <a id="{0}"/>\1 #'.format(term),
                            text)  # inject the term by the title
                    else:
                        title = os.path.splitext(os.path.basename(term_file))[
                            0]  # No title found, so using term
                    text = headers_re.sub(r'#\1 \2', text)
                    index_json['chapters'][key][term] = title
                    term_text[term] = text
                # Sort terms by title and add to markdown
                markdown = ''
                titles = index_json['chapters'][key]
                terms_sorted_by_title = sorted(titles,
                                               key=lambda i: titles[i].lower())
                for term in terms_sorted_by_title:
                    if markdown:
                        markdown += '<hr>\n\n'
                    markdown += term_text[term] + '\n\n'
                markdown = '# <a id="tw-section-{0}"/>{1}\n\n'.format(
                    section, self.section_titles[section]) + markdown
                markdown = self.fix_links(markdown, section)
                output_file = os.path.join(self.output_dir,
                                           '{0}.md'.format(section))
                write_file(output_file, markdown)
                config_file = os.path.join(self.source_dir, project.path,
                                           'config.yaml')
                if os.path.isfile(config_file):
                    copy(config_file,
                         os.path.join(self.output_dir, 'config.yaml'))
            output_file = os.path.join(self.output_dir, 'index.json')
            write_file(output_file, index_json)
        return True
Пример #37
0
    def run(self):
        for idx, project in enumerate(self.rc.projects):
            project_path = os.path.join(self.source_dir, project.path)
            file_format = '{0}-{1}.usfm'

            # Case #1: The project path is a file, and thus is one book of the Bible, copy to standard filename
            if os.path.isfile(project_path):
                if project.identifier.lower() in BOOK_NUMBERS:
                    filename = file_format.format(
                        BOOK_NUMBERS[project.identifier.lower()],
                        project.identifier.upper())
                else:
                    filename = file_format.format(
                        str(idx + 1).zfill(2), project.identifier.upper())
                copy(project_path, os.path.join(self.output_dir, filename))
                self.books.append(filename)
            else:
                # Case #2: Project path is a dir with one or more USFM files, is one or more books of the Bible
                usfm_files = glob(os.path.join(project_path, '*.usfm'))
                if len(usfm_files):
                    for usfm_path in usfm_files:
                        book_code = os.path.splitext(
                            os.path.basename(usfm_path))[0].split(
                                '-')[-1].lower()
                        if book_code in BOOK_NUMBERS:
                            filename = file_format.format(
                                BOOK_NUMBERS[book_code], book_code.upper())
                        else:
                            filename = '{0}.usfm'.format(
                                os.path.splitext(
                                    os.path.basename(usfm_path))[0])
                        output_file_path = os.path.join(
                            self.output_dir, filename)
                        if os.path.isfile(usfm_path) and not os.path.exists(
                                output_file_path):
                            copy(usfm_path, output_file_path)
                        self.books.append(filename)
                else:
                    # Case #3: Project path is a dir with one or more chapter dirs with chunk & title files
                    chapters = self.rc.chapters(project.identifier)
                    if len(chapters):
                        #          Piece the USFM file together
                        title_file = os.path.join(project_path, 'front',
                                                  'title.txt')
                        if os.path.isfile(title_file):
                            title = read_file(title_file).strip()
                        else:
                            title = project.title
                        if not title and os.path.isfile(
                                os.path.join(project_path, 'title.txt')):
                            title = read_file(
                                os.path.join(project_path, 'title.txt'))
                        usfm = """
\\id {0} {1}
\\ide UTF-8
\\h {2}
\\toc1 {2}
\\toc2 {2}
\\toc3 {2}
\\mt {2}
""".format(project.identifier.upper(), self.rc.resource.title, title)
                        for chapter in chapters:
                            if chapter in self.ignoreDirectories:
                                continue
                            chapter_num = chapter.lstrip('0')
                            chunks = self.rc.chunks(project.identifier,
                                                    chapter)
                            if not len(chunks):
                                continue
                            first_chunk = read_file(
                                os.path.join(project_path, chapter, chunks[0]))
                            usfm += "\n\n"
                            if '\\c {0}'.format(
                                    chapter_num) not in first_chunk:
                                usfm += "\\c {0}\n".format(chapter_num)
                            if os.path.isfile(
                                    os.path.join(project_path, chapter,
                                                 'title.txt')):
                                translated_title = read_file(
                                    os.path.join(project_path, chapter,
                                                 'title.txt'))
                                book_name = re.sub(r' \d+$', '',
                                                   translated_title).strip()
                                if book_name.lower() != title.lower():
                                    usfm += "\cl {0}\n".format(
                                        translated_title)
                            for chunk in chunks:
                                if chunk in self.ignoreFiles:
                                    continue
                                chunk_num = os.path.splitext(chunk)[0].lstrip(
                                    '0')
                                chunk_content = read_file(
                                    os.path.join(project_path, chapter, chunk))
                                if '\\v {0} '.format(
                                        chunk_num) not in chunk_content:
                                    chunk_content = '\\v {0} '.format(
                                        chunk_num) + chunk_content
                                usfm += chunk_content + "\n"
                        if project.identifier.lower() in BOOK_NUMBERS:
                            filename = file_format.format(
                                BOOK_NUMBERS[project.identifier.lower()],
                                project.identifier.upper())
                        else:
                            filename = file_format.format(
                                str(idx + 1).zfill(2),
                                project.identifier.upper())
                        write_file(os.path.join(self.output_dir, filename),
                                   usfm)
                        self.books.append(filename)
        return True
Пример #38
0
 def prepend_text(self, out_dir, file_name, prefix):
     file_path = os.path.join(out_dir, file_name)
     text = read_file(file_path)
     new_text = prefix + text
     write_file(file_path, new_text)
 def test_write_file(self):
     _, self.tmp_file = tempfile.mkstemp()
     file_utils.write_file(self.tmp_file, "hello world")
     with open(self.tmp_file, "r") as f:
         self.assertEqual(f.read(), "hello world")
Пример #40
0
    def run(self):
        index_json = {'titles': {}, 'chapters': {}, 'book_codes': {}}
        headers_re = re.compile('^(#+) +(.+?) *#*$', flags=re.MULTILINE)
        for idx, project in enumerate(self.rc.projects):
            App.logger.debug('TnPreprocessor: processing project: {0}'.format(
                project.identifier))
            if project.identifier in BOOK_NAMES:
                markdown = ''
                book = project.identifier.lower()
                html_file = '{0}-{1}.html'.format(BOOK_NUMBERS[book],
                                                  book.upper())
                index_json['book_codes'][html_file] = book
                name = BOOK_NAMES[book]
                index_json['titles'][html_file] = name
                chapter_dirs = sorted(
                    glob(os.path.join(self.source_dir, project.path, '*')))
                markdown += '# <a id="tn-{0}"/> {1}\n\n'.format(book, name)
                index_json['chapters'][html_file] = []
                for move_str in ['front', 'intro']:
                    self.move_to_front(chapter_dirs, move_str)
                for chapter_dir in chapter_dirs:
                    chapter = os.path.basename(chapter_dir)
                    link = 'tn-chapter-{0}-{1}'.format(book, chapter.zfill(3))
                    index_json['chapters'][html_file].append(link)
                    markdown += '## <a id="{0}"/> {1} {2}\n\n'.format(
                        link, name, chapter.lstrip('0'))
                    chunk_files = sorted(
                        glob(os.path.join(chapter_dir, '*.md')))

                    chunk_files_txt = sorted(
                        glob(os.path.join(chapter_dir, '*.txt')))
                    # If there are txt files in chapter folders, convert them to md format
                    if len(chunk_files_txt):
                        if txt2md(chapter_dir):
                            return self.run()

                    for move_str in ['front', 'intro']:
                        self.move_to_front(chunk_files, move_str)
                    for chunk_idx, chunk_file in enumerate(chunk_files):
                        start_verse = os.path.splitext(
                            os.path.basename(chunk_file))[0].lstrip('0')
                        if chunk_idx < len(chunk_files) - 1:
                            base_file_name = os.path.splitext(
                                os.path.basename(chunk_files[chunk_idx +
                                                             1]))[0]
                            if base_file_name.isdigit():
                                end_verse = str(int(base_file_name) - 1)
                            else:
                                end_verse = start_verse
                        else:
                            chapter_str = chapter.lstrip('0')
                            chapter_verses = BOOK_CHAPTER_VERSES[book]
                            end_verse = chapter_verses[
                                chapter_str] if chapter_str in chapter_verses else start_verse

                        start_verse_str = str(start_verse).zfill(
                            3) if start_verse.isdigit() else start_verse
                        link = 'tn-chunk-{0}-{1}-{2}'.format(
                            book,
                            str(chapter).zfill(3), start_verse_str)
                        markdown += '### <a id="{0}"/>{1} {2}:{3}{4}\n\n'. \
                            format(link, name, chapter.lstrip('0'), start_verse,
                                   '-'+end_verse if start_verse != end_verse else '')
                        text = read_file(chunk_file) + '\n\n'
                        text = headers_re.sub(
                            r'\1## \2',
                            text)  # This will bump any header down 2 levels
                        markdown += text
                markdown = self.fix_links(markdown)
                book_file_name = '{0}-{1}.md'.format(BOOK_NUMBERS[book],
                                                     book.upper())
                self.books.append(book_file_name)
                file_path = os.path.join(self.output_dir, book_file_name)
                write_file(file_path, markdown)
            else:
                App.logger.debug(
                    'TnPreprocessor: extra project found: {0}'.format(
                        project.identifier))
        # Write out index.json
        output_file = os.path.join(self.output_dir, 'index.json')
        write_file(output_file, index_json)
        return True
Пример #41
0
 def upload_build_log(build_log, file_name, output_dir, s3_results_key, cache_time=0):
     build_log_file = os.path.join(output_dir, file_name)
     write_file(build_log_file, build_log)
     upload_key = '{0}/{1}'.format(s3_results_key, file_name)
     App.logger.debug('Saving build log to ' + upload_key)
     App.cdn_s3_handler().upload_file(build_log_file, upload_key, cache_time=cache_time)