def ts_cat():

        ts_categories = []
        for x in CatalogUpdater.bible_dirs:
            CatalogUpdater.project_dirs.append(x)
        for p in CatalogUpdater.project_dirs:
            file_name = '{0}/{1}/languages.json'.format(CatalogUpdater.obs_v2_local, p)
            proj_cat = load_json_object(file_name)
            if not proj_cat:
                continue

            proj_url = '{0}/{1}/languages.json'.format(CatalogUpdater.obs_v2_api, p)
            dates = set([x['language']['date_modified'] for x in proj_cat])
            dates_list = list(dates)
            dates_list.sort(reverse=True)
            sort = '01'
            if p in CatalogUpdater.bible_dirs:
                sort = [x['project']['sort'] for x in proj_cat if 'project' in x][0]
            meta = []
            if proj_cat[0]['project']['meta']:
                if 'Bible: OT' in proj_cat[0]['project']['meta']:
                    meta += ['bible-ot']
                if 'Bible: NT' in proj_cat[0]['project']['meta']:
                    meta += ['bible-nt']
            ts_categories.append({'slug': p,
                                  'date_modified': dates_list[0],
                                  'lang_catalog': '{0}?date_modified={1}'.format(
                                      proj_url, dates_list[0]),
                                  'sort': sort,
                                  'meta': meta
                                  })
        # Write global catalog
        outfile = '{0}/catalog.json'.format(CatalogUpdater.obs_v2_local)
        write_file(outfile, ts_categories)
    def main(catalog, published_catalog):

        # get a list of the language codes already completed/published
        pub_list = [x['language'] for x in published_catalog]

        # get a list of the languages for which OBS has been initialized
        out, ret = ObsInProgress.shell_command('find {0} -maxdepth 2 -type d -name obs'.format(ObsInProgress.pages))

        # start building the in-progress list
        in_progress_languages = []
        for line in out.split('\n'):

            # get the language code from the OBS namespace
            lc = line.split('/')[9]

            # skip this language if it is in the list of published languages
            if lc in pub_list:
                continue

            # make sure the language is in the official list of languages
            for x in catalog:
                if lc == x['lc']:
                    in_progress_languages.append({'lc': lc, 'ln': x['ln']})
                    break

        # now that we have the list of in-progress languages, sort it by language code
        in_progress_languages.sort(key=lambda item: item['lc'])

        # add a date-stamp
        today = ''.join(str(datetime.date.today()).rsplit('-')[0:3])
        in_progress_languages.append({'date_modified': today})

        # save the results to a file
        write_file(ObsInProgress.obs_in_progress_file_name, in_progress_languages)
    def __init__(self, rc: RC, source_dir: str, output_dir: str) -> None:
        """
        :param RC rc:
        :param string source_dir:
        :param string output_dir:
        """
        self.rc = rc
        self.source_dir = source_dir  # Local directory
        self.output_dir = output_dir  # Local directory
        self.num_files_written = 0
        self.messages: List[str] = [
        ]  # { Messages only display if there's warnings or errors
        self.errors: List[str] = []  # { Errors float to the top of the list
        self.warnings: List[str] = []  # {    above warnings

        # Check that we had a manifest (or equivalent) file
        # found_manifest = False
        # for some_filename in ('manifest.yaml','manifest.json','package.json','project.json','meta.json',):
        #     if os.path.isfile(os.path.join(source_dir,some_filename)):
        #         found_manifest = True; break
        # if not found_manifest:
        if not self.rc.loadeded_manifest_file:
            self.warnings.append(
                "Possible missing manifest file in project folder")

        # Write out the new manifest file based on the resource container
        write_file(os.path.join(self.output_dir, 'manifest.yaml'),
                   self.rc.as_dict())
 def write_data_to_file_and_upload_to_CDN(self, output_dir:str, s3_commit_key:str,
                                                 fname:str, data:Union[str, Dict[str,Any]]) -> None:
     out_file = os.path.join(output_dir, fname)
     write_file(out_file, data)
     key = s3_commit_key + '/' + fname
     AppSettings.logger.debug(f"Uploading '{fname}' to {AppSettings.cdn_bucket_name} {key} …")
     AppSettings.cdn_s3_handler().upload_file(out_file, key, cache_time=0)
示例#5
0
    def ts_cat():
        global project_dirs, bible_dirs, obs_v2_local, obs_v2_api

        ts_categories = []
        for x in bible_dirs:
            project_dirs.append(x)
        for p in project_dirs:
            proj_url = '{0}/{1}/languages.json'.format(obs_v2_api, p)
            proj_data = get_url(proj_url, True)
            proj_cat = json.loads(proj_data)
            dates = set([x['language']['date_modified'] for x in proj_cat])
            dates_list = list(dates)
            dates_list.sort(reverse=True)
            sort = '01'
            if p in bible_dirs:
                sort = [x['project']['sort'] for x in proj_cat if 'project' in x][0]
            meta = []
            if proj_cat[0]['project']['meta']:
                if 'Bible: OT' in proj_cat[0]['project']['meta']:
                    meta += ['bible-ot']
                if 'Bible: NT' in proj_cat[0]['project']['meta']:
                    meta += ['bible-nt']
            ts_categories.append({'slug': p,
                                  'date_modified': dates_list[0],
                                  'lang_catalog': '{0}?date_modified={1}'.format(
                                      proj_url, dates_list[0]),
                                  'sort': sort,
                                  'meta': meta
                                  })
        # Write global catalog
        outfile = '{0}/catalog.json'.format(obs_v2_local)
        write_file(outfile, ts_categories)
示例#6
0
def export_unfolding_word(status, git_dir, json_data, lang_code,
                          github_organization, front_matter, back_matter):
    """
    Exports JSON data for each language into its own Github repo.
    """
    global github_org
    write_file(os.path.join(git_dir, 'obs-{0}.json'.format(lang_code)),
               json_data)
    write_file(
        os.path.join(git_dir, 'obs-{0}-front-matter.json'.format(lang_code)),
        front_matter)
    write_file(
        os.path.join(git_dir, 'obs-{0}-back-matter.json'.format(lang_code)),
        back_matter)
    status_str = json.dumps(status, sort_keys=True, cls=OBSEncoder)
    write_file(os.path.join(git_dir, 'status-{0}.json'.format(lang_code)),
               status_str)
    write_file(os.path.join(git_dir, 'README.md'), OBS.get_readme_text())

    if not github_org:
        return

    gitCreate(git_dir)
    name = 'obs-{0}'.format(lang_code)
    desc = 'Open Bible Stories for {0}'.format(lang_code)
    url = 'http://unfoldingword.org/{0}/'.format(lang_code)
    githubCreate(git_dir, name, desc, url, github_organization)
    commit_msg = status_str
    gitCommit(git_dir, commit_msg)
    gitPush(git_dir)
 def mock_s3_tn_project(self, part):
     zip_file = os.path.join(self.resources_dir, 'converted_projects',
                             'en_tn_converted.zip')
     out_dir = os.path.join(self.temp_dir, 'en_tn_converted')
     unzip(zip_file, out_dir)
     src_dir = os.path.join(out_dir, 'en_tn_converted')
     self.project_files = [
         f for f in os.listdir(src_dir)
         if os.path.isfile(os.path.join(src_dir, f))
     ]
     self.project_key = 'u/door43/en_tn/12345678'
     build_log = file_utils.load_json_object(
         os.path.join(src_dir, 'build_log.json'))
     build_log['part'] = part
     file_utils.write_file(os.path.join(src_dir, 'build_log.json'),
                           build_log)
     AppSettings.cdn_s3_handler().upload_file(
         os.path.join(src_dir, 'build_log.json'),
         '{0}/{1}/build_log.json'.format(self.project_key, part))
     AppSettings.cdn_s3_handler().upload_file(
         os.path.join(src_dir, 'index.json'),
         '{0}/{1}/index.json'.format(self.project_key, part))
     AppSettings.cdn_s3_handler().upload_file(
         os.path.join(src_dir, 'build_log.json'),
         '{0}/{1}/finished'.format(self.project_key, part))
     AppSettings.cdn_s3_handler().upload_file(
         os.path.join(src_dir, '01-GEN.html'),
         '{0}/{1}/01-GEN.html'.format(self.project_key, part))
     AppSettings.cdn_s3_handler().upload_file(
         os.path.join(src_dir, 'project.json'),
         'u/door43/en_tq/project.json')
     AppSettings.door43_s3_handler().upload_file(
         os.path.join(self.resources_dir, 'templates', 'project-page.html'),
         'templates/project-page.html')
def main(date_today, tag, version):
    global download_dir

    repo = 'https://git.door43.org/Door43/en-tq'
    download_dir = tempfile.mkdtemp(prefix='tempTQ_')
    download_url = join_url_parts(repo, 'archive', '{0}.zip'.format(tag))
    downloaded_file = os.path.join(download_dir, 'tQ.zip')

    # download the repository
    try:
        print('Downloading {0}...'.format(download_url), end=' ')
        download_file(download_url, downloaded_file)
    finally:
        print('finished.')

    try:
        print('Unzipping...'.format(downloaded_file), end=' ')
        unzip(downloaded_file, download_dir)
    finally:
        print('finished.')

    # examine the repository
    source_root = os.path.join(download_dir, 'en-tq', 'content')
    books = [x for x in os.listdir(source_root) if os.path.isdir(os.path.join(source_root, x))]

    for book in books:
        print('Processing {}.'.format(book))
        book_dir = os.path.join(source_root, book)
        api_path = os.path.join(api_v2, book, 'en')
        # noinspection PyUnresolvedReferences
        book_questions = []  # type: list[dict]

        for entry in os.listdir(book_dir):
            file_name = os.path.join(book_dir, entry)

            # we are only processing files
            if not os.path.isfile(file_name):
                continue

            # we are only processing markdown files
            if entry[-3:] != '.md':
                continue

            book_questions.append(get_cq(file_name))

        # Check to see if there are published questions in this book
        pub_check = [x['cq'] for x in book_questions if len(x['cq']) > 0]
        if len(pub_check) == 0:
            print('No published questions for {0}'.format(book))
            continue
        book_questions.sort(key=lambda y: y['id'])
        book_questions.append({'date_modified': date_today, 'version': version})
        write_file('{0}/questions.json'.format(api_path), book_questions, indent=2)

    print()
    print('Updating the catalogs...', end=' ')
    update_catalog()
    print('finished.')
 def get_json(self, lang, entry, tmp_ent):
     any_json_e = entry.format(lang)
     any_json_f = '/'.join([OBSTexExport.api_url_txt, lang, any_json_e])
     any_tmp_f = os.path.join(self.temp_dir, tmp_ent.format(lang))
     write_file(any_tmp_f, get_url(any_json_f))
     if not os.path.exists(any_tmp_f):
         print("Failed to get JSON {0} file into {1}.".format(any_json_e, any_tmp_f))
         sys.exit(1)
     return any_tmp_f
示例#10
0
def export_to_api(lang, status, today, cur_json):
    global unfoldingWord_dir, lang_cat, github_org, pages

    print('Getting Github credentials...', end=' ')
    try:
        github_org = None
        if os.path.isfile('/root/.github_pass'):
            # noinspection PyTypeChecker
            pw = open('/root/.github_pass', 'r').read().strip()
            g_user = githubLogin('dsm-git', pw)
            github_org = getGithubOrg('unfoldingword', g_user)
        else:
            print('none found...', end=' ')
    except GithubException as e:
        print_error('Problem logging into Github: {0}'.format(e))
        sys.exit(1)
    print('finished.')

    print('Loading the uw catalog...', end=' ')
    uw_cat_path = os.path.join(unfoldingWord_dir, 'obs-catalog.json')
    uw_catalog = load_json_object(uw_cat_path, [])
    uw_cat_langs = [x['language'] for x in uw_catalog]
    print('finished')

    unfolding_word_lang_dir = os.path.join(unfoldingWord_dir, lang)
    if 'checking_level' in status and 'publish_date' in status:
        if status.checking_level in ['1', '2', '3']:

            front_json = OBS.get_front_matter(pages, lang, today)
            back_json = OBS.get_back_matter(pages, lang, today)

            print('Exporting {0}...'.format(lang), end=' ')
            export_unfolding_word(status, unfolding_word_lang_dir, cur_json,
                                  lang, github_org, front_json, back_json)
            if lang in uw_cat_langs:
                uw_catalog.pop(uw_cat_langs.index(lang))
                uw_cat_langs.pop(uw_cat_langs.index(lang))
            uw_catalog.append(lang_cat)

            uw_cat_json = json.dumps(uw_catalog,
                                     sort_keys=True,
                                     cls=OBSEncoder)
            write_file(uw_cat_path, uw_cat_json)

            # update uw_admin status page
            ObsPublishedLangs.update_page(ObsPublishedLangs.cat_url,
                                          ObsPublishedLangs.uw_stat_page)

            print('finished.')
        else:
            print_error('The `checking_level` is invalid.')
            sys.exit(1)
    else:
        print_error(
            'The status is missing `checking_level` or `publish_date`.')
        sys.exit(1)
 def set_deployed_flags(self, project_key, part_count, skip=-1):
     tempf = tempfile.mktemp(prefix="temp", suffix="deployed")
     file_utils.write_file(tempf, ' ')
     for i in range(0, part_count):
         if i != skip:
             key = '{0}/{1}/deployed'.format(project_key, i)
             AppSettings.cdn_s3_handler().upload_file(tempf,
                                                      key,
                                                      cache_time=0)
     os.remove(tempf)
示例#12
0
 def test_write_file_json(self):
     """
     A call to `write_file` where the content is an object (as opposed to a
     string).
     """
     d = {"one": 1, "two": 2, "child": {"numbers": [3, 4, 5]}}
     _, self.tmp_file = tempfile.mkstemp(prefix='Door43_test_')
     file_utils.write_file(self.tmp_file, d)
     with open(self.tmp_file, "r") as f:
         self.assertEqual(json.load(f), d)
 def replace_tag(self, out_dir, file_name, tag, replace):
     book_path = os.path.join(out_dir, file_name)
     book_text = read_file(book_path)
     start_marker = f'\\{tag}'
     end_marker = '\\'
     c_start_pos = book_text.find(start_marker)
     c_end_pos = book_text.find(end_marker, c_start_pos + 1)
     previous_section = book_text[:c_start_pos]
     next_section = book_text[c_end_pos:]
     new_text = previous_section + replace + next_section
     write_file(book_path, new_text)
 def replace_verse_to_end(self, out_dir, file_name, chapter, start_vs, replace):
     book_path = os.path.join(out_dir, file_name)
     book_text = read_file(book_path)
     chapter_marker = f'\\c {chapter}'
     c_pos = book_text.find(chapter_marker)
     previous_section = book_text[:c_pos]
     next_section = book_text[c_pos:]
     start_pos = next_section.find(f'\\v {start_vs} ')
     start_text = next_section[:start_pos]
     new_text = previous_section + start_text + replace
     write_file(book_path, new_text)
 def replace_chapter(self, out_dir, file_name, start_ch, end_ch, replace):
     book_path = os.path.join(out_dir, file_name)
     book_text = read_file(book_path)
     start_chapter_marker = f'\\c {start_ch}'
     end_chapter_marker = f'\\c {end_ch}'
     c_start_pos = book_text.find(start_chapter_marker)
     c_end_pos = book_text.find(end_chapter_marker)
     previous_section = book_text[:c_start_pos]
     next_section = book_text[c_end_pos:]
     new_text = previous_section + replace + next_section
     write_file(book_path, new_text)
def import_now():

    regex1 = re.compile(r'(\[)\s+(\d)', re.UNICODE)
    regex2 = re.compile(r'(,)\s+?(\d)', re.UNICODE)
    regex3 = re.compile(r'(\d)\s+(\])', re.UNICODE)
    regex4 = re.compile(r'\s+(\n)', re.UNICODE)

    base_url = 'https://api.unfoldingword.org/bible/txt/1/'
    output_dir = os.path.join(os.path.dirname(inspect.stack()[0][1]), 'output')

    # get the list of books
    books_url = 'https://raw.githubusercontent.com/unfoldingWord-dev/uw-api/develop/static/versification/ufw/books.json'
    unsorted_books = json.loads(get_url(books_url))
    sorted_books = []

    for book in unsorted_books:
        sorted_books.append((book.lower(), unsorted_books[book][1]))

    sorted_books.sort(key=lambda x: x[1])

    # get the chunk definitions
    for book in sorted_books:
        file_contents = []
        chap_num = ''
        current = None
        print('Processing {0}'.format(book[0]))

        chunk_url = '{0}/{1}/chunks.json'.format(base_url, book[0])
        chunk_defs = json.loads(get_url(chunk_url))

        for chunk in chunk_defs:

            # is this is a different chapter?
            if chap_num != chunk['chp']:
                chap_num = chunk['chp']
                if current:
                    file_contents.append(current)
                current = {'chapter': int(chunk['chp']), 'first_verses': []}

            current['first_verses'].append(int(chunk['firstvs']))

        if current:
            file_contents.append(current)

        # format the output
        file_contents_str = json.dumps(file_contents, sort_keys=True, indent=2)
        file_contents_str = regex1.sub(r'\1\2', file_contents_str)
        file_contents_str = regex2.sub(r'\1 \2', file_contents_str)
        file_contents_str = regex3.sub(r'\1\2', file_contents_str)
        file_contents_str = regex4.sub(r'\1', file_contents_str)

        write_file(os.path.join(output_dir, '{0}.json'.format(book[0])),
                   file_contents_str)
def import_now():

    regex1 = re.compile(r'(\[)\s+(\d)', re.UNICODE)
    regex2 = re.compile(r'(,)\s+?(\d)', re.UNICODE)
    regex3 = re.compile(r'(\d)\s+(\])', re.UNICODE)
    regex4 = re.compile(r'\s+(\n)', re.UNICODE)

    base_url = 'https://api.unfoldingword.org/bible/txt/1/'
    output_dir = os.path.join(os.path.dirname(inspect.stack()[0][1]), 'output')

    # get the list of books
    books_url = 'https://raw.githubusercontent.com/unfoldingWord-dev/uw-api/develop/static/versification/ufw/books.json'
    unsorted_books = json.loads(get_url(books_url))
    sorted_books = []

    for book in unsorted_books:
        sorted_books.append((book.lower(), unsorted_books[book][1]))

    sorted_books.sort(key=lambda x: x[1])

    # get the chunk definitions
    for book in sorted_books:
        file_contents = []
        chap_num = ''
        current = None
        print('Processing {0}'.format(book[0]))

        chunk_url = '{0}/{1}/chunks.json'.format(base_url, book[0])
        chunk_defs = json.loads(get_url(chunk_url))

        for chunk in chunk_defs:

            # is this is a different chapter?
            if chap_num != chunk['chp']:
                chap_num = chunk['chp']
                if current:
                    file_contents.append(current)
                current = {'chapter': int(chunk['chp']), 'first_verses': []}

            current['first_verses'].append(int(chunk['firstvs']))

        if current:
            file_contents.append(current)

        # format the output
        file_contents_str = json.dumps(file_contents, sort_keys=True, indent=2)
        file_contents_str = regex1.sub(r'\1\2', file_contents_str)
        file_contents_str = regex2.sub(r'\1 \2', file_contents_str)
        file_contents_str = regex3.sub(r'\1\2', file_contents_str)
        file_contents_str = regex4.sub(r'\1', file_contents_str)

        write_file(os.path.join(output_dir, '{0}.json'.format(book[0])), file_contents_str)
    def obs(obs_v1_cat):

        langs_cat = []
        # Write OBS catalog for each language
        for e in obs_v1_cat:
            file_name = '{0}/{1}/obs-{1}-front-matter.json'.format(CatalogUpdater.obs_v1_local, e['language'])
            if not os.path.isfile(file_name):
                continue

            front_json = load_json_object(file_name)
            lang_entry = {'language': {'slug': e['language'],
                                       'name': e['string'],
                                       'direction': e['direction'],
                                       'date_modified': e['date_modified']
                                       },
                          'project': {'name': front_json['name'],
                                      'desc': front_json['tagline'],
                                      'meta': []
                                      }
                          }
            lang = e['language']
            del e['language']
            del e['string']
            del e['direction']
            e['slug'] = 'obs'
            e['name'] = 'Open Bible Stories'
            e['source'] = CatalogUpdater.add_date('{0}/{1}/obs-{1}.json'.format(CatalogUpdater.obs_v1_api, lang))

            if lang == 'en':
                e['terms'] = CatalogUpdater.add_date('{0}/{1}/kt-{1}.json'.format(CatalogUpdater.obs_v1_api, lang))
                e['notes'] = CatalogUpdater.add_date('{0}/{1}/tN-{1}.json'.format(CatalogUpdater.obs_v1_api, lang))
                e['tw_cat'] = CatalogUpdater.add_date('{0}/{1}/tw_cat-{1}.json'.format(CatalogUpdater.obs_v1_api, lang))
                e['checking_questions'] = CatalogUpdater.add_date('{0}/{1}/CQ-{1}.json'.format(
                    CatalogUpdater.obs_v1_api, lang))
            else:
                e['terms'] = ''
                e['notes'] = ''
                e['tw_cat'] = ''
                e['checking_questions'] = ''

            e['date_modified'] = CatalogUpdater.most_recent(e)
            outfile = '{0}/obs/{1}/resources.json'.format(CatalogUpdater.obs_v2_local, lang)

            write_file(outfile, [e])

            lang_entry['res_catalog'] = '{0}/obs/{1}/resources.json?date_modified={2}'.format(CatalogUpdater.obs_v2_api,
                                                                                              lang, e['date_modified'])
            langs_cat.append(lang_entry)

        # Write global OBS catalog
        outfile = '{0}/obs/languages.json'.format(CatalogUpdater.obs_v2_local)
        write_file(outfile, langs_cat)
def export_to_api(lang, status, today, cur_json):
    global unfoldingWord_dir, lang_cat, github_org, pages

    print('Getting Github credentials...', end=' ')
    try:
        github_org = None
        if os.path.isfile('/root/.github_pass'):
            # noinspection PyTypeChecker
            pw = open('/root/.github_pass', 'r').read().strip()
            g_user = githubLogin('dsm-git', pw)
            github_org = getGithubOrg('unfoldingword', g_user)
        else:
            print('none found...', end=' ')
    except GithubException as e:
        print_error('Problem logging into Github: {0}'.format(e))
        sys.exit(1)
    print('finished.')

    print('Loading the uw catalog...', end=' ')
    uw_cat_path = os.path.join(unfoldingWord_dir, 'obs-catalog.json')
    uw_catalog = load_json_object(uw_cat_path, [])
    uw_cat_langs = [x['language'] for x in uw_catalog]
    print('finished')

    unfolding_word_lang_dir = os.path.join(unfoldingWord_dir, lang)
    if 'checking_level' in status and 'publish_date' in status:
        if status.checking_level in ['1', '2', '3']:

            front_json = OBS.get_front_matter(pages, lang, today)
            back_json = OBS.get_back_matter(pages, lang, today)

            print('Exporting {0}...'.format(lang), end=' ')
            export_unfolding_word(status, unfolding_word_lang_dir, cur_json,
                                  lang, github_org, front_json, back_json)
            if lang in uw_cat_langs:
                uw_catalog.pop(uw_cat_langs.index(lang))
                uw_cat_langs.pop(uw_cat_langs.index(lang))
            uw_catalog.append(lang_cat)

            uw_cat_json = json.dumps(uw_catalog, sort_keys=True, cls=OBSEncoder)
            write_file(uw_cat_path, uw_cat_json)

            # update uw_admin status page
            ObsPublishedLangs.update_page(ObsPublishedLangs.cat_url, ObsPublishedLangs.uw_stat_page)

            print('finished.')
        else:
            print_error('The `checking_level` is invalid.')
            sys.exit(1)
    else:
        print_error('The status is missing `checking_level` or `publish_date`.')
        sys.exit(1)
    def run(self) -> Tuple[int, List[str]]:
        """
        Default Preprocessor

        Case #1: Project path is a file, then we copy the file over to the output dir
        Case #2: It's a directory of files, so we copy them over to the output directory
        Case #3: The project path is multiple chapters, so we piece them together
        """
        for idx, project in enumerate(self.rc.projects):
            project_path = os.path.join(self.source_dir, project.path)

            if os.path.isfile(project_path):
                filename = f'{str(idx + 1).zfill(2)}-{project.identifier}.{self.rc.resource.file_ext}'
                copy(project_path, os.path.join(self.output_dir, filename))
                self.num_files_written += 1
            else:
                # Case #2: It's a directory of files, so we copy them over to the output directory
                files = glob(
                    os.path.join(project_path,
                                 f'*.{self.rc.resource.file_ext}'))
                if files:
                    for file_path in files:
                        output_file_path = os.path.join(
                            self.output_dir, os.path.basename(file_path))
                        if os.path.isfile(file_path) and not os.path.exists(output_file_path) \
                                and os.path.basename(file_path) not in self.ignoreFiles:
                            copy(file_path, output_file_path)
                            self.num_files_written += 1
                else:
                    # Case #3: The project path is multiple chapters, so we piece them together
                    chapters = self.rc.chapters(project.identifier)
                    if chapters:
                        text = ''
                        for chapter in chapters:
                            text = self.mark_chapter(project.identifier,
                                                     chapter, text)
                            for chunk in self.rc.chunks(
                                    project.identifier, chapter):
                                text = self.mark_chunk(project.identifier,
                                                       chapter, chunk, text)
                                text += read_file(
                                    os.path.join(project_path, chapter,
                                                 chunk)) + "\n\n"
                        filename = f'{str(idx + 1).zfill(2)}-{project.identifier}.{self.rc.resource.file_ext}'
                        write_file(os.path.join(self.output_dir, filename),
                                   text)
                        self.num_files_written += 1
        if self.num_files_written == 0:
            self.errors.append("No source files discovered")
        return self.num_files_written, self.errors + self.warnings + (
            self.messages if self.errors or self.warnings else [])
示例#21
0
def main(date_today, tag, version):
    """

    :param str|unicode date_today:
    :param str|unicode tag:
    :param str|unicode version:
    :return:
    """
    global download_dir, tw_aliases

    repo = 'https://git.door43.org/Door43/en-tw'
    download_dir = tempfile.mkdtemp(prefix='tempTW_')
    download_url = join_url_parts(repo, 'archive', '{0}.zip'.format(tag))
    downloaded_file = os.path.join(download_dir, 'tW.zip')

    # download the repository
    try:
        print('Downloading {0}...'.format(download_url), end=' ')
        download_file(download_url, downloaded_file)
    finally:
        print('finished.')

    try:
        print('Unzipping...'.format(downloaded_file), end=' ')
        unzip(downloaded_file, download_dir)
    finally:
        print('finished.')

    # examine the repository
    tw_list = []
    for root, dirs, files in os.walk(
            os.path.join(download_dir, 'en-tw', 'content')):
        for f in files:
            file_name = os.path.join(root, f)
            tw = get_tw(file_name)
            if tw:
                tw_list.append(tw)

    for i in tw_list:  # type: dict
        if i['id'] in tw_aliases:
            i['aliases'] = [x for x in tw_aliases[i['id']] if x != i['term']]

    tw_list.sort(key=lambda y: len(y['term']), reverse=True)
    tw_list.append({'date_modified': date_today, 'version': version})
    api_path = os.path.join(api_v2, 'bible', 'en')
    write_file('{0}/terms.json'.format(api_path), tw_list, indent=2)

    print()
    print('Updating the catalogs...', end=' ')
    update_catalog()
    print('finished.')
示例#22
0
    def run(self):
        self.title = self.get_title()
        usfm_content = self.get_usfm_header()

        chapters = self.get_chapters()

        for key in sorted(chapters):
            usfm_content += self.get_chapter(chapters[key])

        usfm_file = os.path.join(
            self.output_dir, '{0}-{1}.usfm'.format(
                bible_books.BOOK_NUMBERS[self.manifest.project['id']],
                self.manifest.project['id'].upper()))
        write_file(usfm_file, usfm_content)
示例#23
0
    def run(self):
        language = self.manifest.target_language['id']
        chapters = self.get_chapters()

        for chapter in chapters:
            markdown = u'# {0}\n\n'.format(chapter.get('title'))
            for frame in chapter.get('frames'):
                markdown += u'![Frame {0}](https://cdn.door43.org/obs/jpg/360px/obs-en-{0}.jpg)\n\n'.format(
                    frame.get('id'))
                markdown += frame.get('text') + u'\n\n'
            markdown += u'_{0}_\n'.format(chapter.get('reference'))

            output_file = os.path.join(self.output_dir,
                                       '{0}.md'.format(chapter.get('id')))
            write_file(output_file, markdown)
示例#24
0
    def obs(obs_v1_cat):
        global obs_v1_api, obs_v2_local, obs_v2_api

        langs_cat = []
        # Write OBS catalog for each language
        for e in obs_v1_cat:
            front = get_url('{0}/{1}/obs-{1}-front-matter.json'.format(obs_v1_api,
                                                                       e['language']), True)
            front_json = json.loads(front)
            lang_entry = {'language': {'slug': e['language'],
                                       'name': e['string'],
                                       'direction': e['direction'],
                                       'date_modified': e['date_modified']
                                       },
                          'project': {'name': front_json['name'],
                                      'desc': front_json['tagline'],
                                      'meta': []
                                      }
                          }
            del e['string']
            del e['direction']
            e['slug'] = 'obs'
            e['name'] = 'Open Bible Stories'
            e['source'] = CatalogUpdater.add_date('{0}/{1}/obs-{1}.json'.format(obs_v1_api,
                                                                                e['language']))
            e['terms'] = CatalogUpdater.add_date('{0}/{1}/kt-{1}.json'.format(obs_v1_api,
                                                                              e['language']))
            e['notes'] = CatalogUpdater.add_date('{0}/{1}/tN-{1}.json'.format(obs_v1_api,
                                                                              e['language']))
            e['tw_cat'] = CatalogUpdater.add_date('{0}/{1}/tw_cat-{1}.json'.format(obs_v1_api,
                                                                                   e['language']))
            e['checking_questions'] = CatalogUpdater.add_date('{0}/{1}/CQ-{1}.json'.format(
                obs_v1_api, e['language']))
            e['date_modified'] = CatalogUpdater.most_recent(e)
            outfile = '{0}/obs/{1}/resources.json'.format(obs_v2_local,
                                                          e['language'])
            lang = e['language']
            del e['language']
            write_file(outfile, [e])

            lang_entry['res_catalog'] = '{0}/obs/{1}/resources.json?date_modified={2}'.format(
                obs_v2_api, lang, e['date_modified'])
            langs_cat.append(lang_entry)

        # Write global OBS catalog
        outfile = '{0}/obs/languages.json'.format(obs_v2_local)
        write_file(outfile, langs_cat)
示例#25
0
def handle(event, context):
    print(context.invoked_function_arn)

    if '581647696645' in context.invoked_function_arn:
        api_bucket = 'test-api.door43.org'
    else:
        api_bucket = 'api.door43.org'

    catalog_handler = DynamoDBHandler('catalog-production')
    data = {"languages": []}

    for item in catalog_handler.query_items():
        repo_name = item['repo_name']
        print(repo_name)
        contents = json.loads(item['contents'])
        if repo_name == "catalogs":
            data['catalogs'] = contents
        else:
            if 'language' in contents:
                language = contents['language']
                del contents['language']
                l = None
                for lang in data['languages']:
                    if lang['slug'] == language['slug']:
                        l = lang
                if not l:
                    data['languages'].append(language)
                else:
                    language = l
                if repo_name.startswith('localization_'):
                    language.update(contents)
                else:
                    if 'resources' not in language:
                        language['resources'] = []
                    language['resources'].append(contents)

    catalog_path = os.path.join(tempfile.gettempdir(), 'catalog.json')
    write_file(catalog_path, data)
    s3handler = S3Handler(api_bucket)
    s3handler.upload_file(catalog_path,
                          'v{0}/catalog.json'.format(VERSION),
                          cache_time=0)

    return data
    def convert_obs(self) -> None:
        self.log.info("Converting OBS markdown files…")

        # Find the first directory that has md files.
        files = get_files(directory=self.files_dir,
                          exclude=self.EXCLUDED_FILES)

        current_dir = os.path.dirname(os.path.realpath(__file__))
        with open(os.path.join(current_dir, 'templates',
                               'template.html')) as template_file:
            html_template = string.Template(template_file.read())

        # found_chapters = {}
        for filepath in sorted(files):
            if filepath.endswith('.md'):
                # Convert files that are markdown files
                base_name_part = os.path.splitext(
                    os.path.basename(filepath))[0]
                # found_chapters[base_name] = True
                try:
                    md = read_file(filepath)
                except Exception as e:
                    self.log.error(
                        f"Error reading {base_name_part+'.md'}: {e}")
                    continue
                html = markdown.markdown(md)
                html = html_template.safe_substitute(
                    title=self.repo_subject.replace('_', ' '), content=html)
                html_filename = base_name_part + '.html'
                output_filepath = os.path.join(self.output_dir, html_filename)
                write_file(output_filepath, html)
                self.log.info(
                    f"Converted {os.path.basename(filepath)} to {os.path.basename(html_filename)}."
                )
            else:
                # Directly copy over files that are not markdown files
                try:
                    output_filepath = os.path.join(self.output_dir,
                                                   os.path.basename(filepath))
                    if not os.path.exists(output_filepath):
                        copyfile(filepath, output_filepath)
                except:
                    pass
        self.log.info("Finished processing OBS markdown files.")
    def mock_s3_bible_project(self,
                              test_file_name,
                              project_key,
                              multi_part=False):
        converted_proj_dir = os.path.join(self.resources_dir,
                                          'converted_projects')
        test_file_base = test_file_name.split('.zip')[0]
        zip_file = os.path.join(converted_proj_dir, test_file_name)
        out_dir = os.path.join(self.temp_dir, test_file_base)
        unzip(zip_file, out_dir)
        project_dir = os.path.join(out_dir, test_file_base) + os.path.sep
        self.project_files = file_utils.get_files(out_dir)
        self.project_key = project_key
        for filename in self.project_files:
            sub_path = filename.split(project_dir)[1].replace(
                os.path.sep, '/')  # Make sure it is a bucket path
            AppSettings.cdn_s3_handler().upload_file(
                filename, '{0}/{1}'.format(project_key, sub_path))

            if multi_part:  # copy files from cdn to door43
                base_name = os.path.basename(filename)
                if '.html' in base_name:
                    with open(filename, 'r') as f:
                        soup = BeautifulSoup(f, 'html.parser')

                    # add nav tag
                    new_tag = soup.new_tag('div', id='right-sidebar')
                    soup.body.append(new_tag)
                    html = str(soup)
                    file_utils.write_file(
                        filename, html.encode('ascii', 'xmlcharrefreplace'))

                AppSettings.door43_s3_handler().upload_file(
                    filename, '{0}/{1}'.format(project_key, base_name))

        # u, user, repo = project_key
        AppSettings.door43_s3_handler().upload_file(
            os.path.join(self.resources_dir, 'templates', 'project-page.html'),
            'templates/project-page.html')
 def run(self) -> Tuple[int, List[str]]:
     for project in self.rc.projects:
         project_path = os.path.join(self.source_dir, project.path)
         # Copy all the markdown files in the project root directory to the output directory
         for file_path in glob(os.path.join(project_path, '*.md')):
             output_file_path = os.path.join(self.output_dir,
                                             os.path.basename(file_path))
             if os.path.isfile(file_path) and not os.path.exists(output_file_path) \
                     and os.path.basename(file_path) not in self.ignoreFiles:
                 copy(file_path, output_file_path)
                 self.num_files_written += 1
         if self.is_chunked(project):
             for chapter in self.get_chapters(project_path):
                 markdown = f"# {chapter['title']}\n\n"
                 for frame in chapter['frames']:
                     markdown += f"![OBS Image](https://cdn.door43.org/obs/jpg/360px/obs-en-{frame.get('id')}.jpg)\n\n"
                     markdown += frame['text'] + '\n\n'
                 markdown += f"_{chapter['reference']}_\n"
                 output_file = os.path.join(self.output_dir, 'content',
                                            f"{chapter.get('id')}.md")
                 write_file(output_file, markdown)
                 self.num_files_written += 1
         else:
             for chapter in self.rc.chapters(project.identifier):
                 f = None
                 if os.path.isfile(
                         os.path.join(project_path, chapter, '01.md')):
                     f = os.path.join(project_path, chapter, '01.md')
                 elif os.path.isfile(
                         os.path.join(project_path, chapter, 'intro.md')):
                     f = os.path.join(project_path, chapter, 'intro.md')
                 if f:
                     copy(f, os.path.join(self.output_dir, f'{chapter}.md'))
                     self.num_files_written += 1
     if self.num_files_written == 0:
         self.errors.append("No OBS source files discovered")
     return self.num_files_written, self.errors + self.warnings + (
         self.messages if self.errors or self.warnings else [])
示例#29
0
    def run(self):

        today = ''.join(str(datetime.date.today()).rsplit('-')[0:3])
        dirs = []
        if self.source:
            dirs.append(self.source)
        else:
            for source_dir in api_publish.source_dirs:
                udb_dir = [
                    os.path.join(source_dir, x) for x in os.listdir(source_dir)
                ]
                dirs += udb_dir

        for d in dirs:
            ver, lang = d.rsplit('/', 1)[1].split('-', 1)
            self.temp_dir = '/tmp/{0}-{1}'.format(ver, lang)
            if os.path.isdir(self.temp_dir):
                shutil.rmtree(self.temp_dir)
            UsfmTransform.buildUSX(d, self.temp_dir, '', True)
            print("#### Chunking...")
            for f in os.listdir(self.temp_dir):
                # use utf-8-sig to remove the byte order mark
                with codecs.open(os.path.join(self.temp_dir, f),
                                 'r',
                                 encoding='utf-8-sig') as in_file:
                    usx = in_file.readlines()

                slug = f.split('.')[0].lower()
                print('     ({0})'.format(slug.upper()))
                book = self.parse(usx)
                payload = {'chapters': book, 'date_modified': today}
                write_file(
                    os.path.join(api_publish.api_v2, slug, lang, ver,
                                 'source.json'), payload)
                chunks = self.get_chunks(book)
                write_file(
                    os.path.join(api_publish.api_v2, slug, lang, ver,
                                 'chunks.json'), chunks)
def export_unfolding_word(status, git_dir, json_data, lang_code, github_organization, front_matter, back_matter):
    """
    Exports JSON data for each language into its own Github repo.
    """
    global github_org
    write_file(os.path.join(git_dir, 'obs-{0}.json'.format(lang_code)), json_data)
    write_file(os.path.join(git_dir, 'obs-{0}-front-matter.json'.format(lang_code)), front_matter)
    write_file(os.path.join(git_dir, 'obs-{0}-back-matter.json'.format(lang_code)), back_matter)
    status_str = json.dumps(status, sort_keys=True, cls=OBSEncoder)
    write_file(os.path.join(git_dir, 'status-{0}.json'.format(lang_code)), status_str)
    write_file(os.path.join(git_dir, 'README.md'), OBS.get_readme_text())

    if not github_org:
        return

    gitCreate(git_dir)
    name = 'obs-{0}'.format(lang_code)
    desc = 'Open Bible Stories for {0}'.format(lang_code)
    url = 'http://unfoldingword.org/{0}/'.format(lang_code)
    githubCreate(git_dir, name, desc, url, github_organization)
    commit_msg = status_str
    gitCommit(git_dir, commit_msg)
    gitPush(git_dir)
示例#31
0
 def test_write_file(self):
     _, self.tmp_file = tempfile.mkstemp(prefix='Door43_test_')
     file_utils.write_file(self.tmp_file, "hello world")
     with open(self.tmp_file, "r") as f:
         self.assertEqual(f.read(), "hello world")
def rechunk_this_one(api_directory):
    global id_re, s5_re

    print_notice('Processing {}'.format(api_directory))

    # read the status.json file
    with codecs.open(os.path.join(api_directory, 'status.json'), 'r', 'utf-8-sig') as in_file:
        status = json.loads(in_file.read())

    # determine versification
    if status['lang'] == 'ru':
        versification = 'rsc'

    elif status['lang'] == 'hi' or status['lang'] == 'sr-Latn' or status['lang'] == 'hu' or status['lang'] == 'ta':
        versification = 'ufw-odx'

    elif status['lang'] == 'bn':
        versification = 'ufw-bn'

    elif status['lang'] == 'ar':
        versification = 'avd'

    elif status['lang'] == 'kn':
        versification = 'ufw-rev'

    else:
        versification = 'ufw'

    versification_data = Bible.get_versification(versification)  # type: list<Book>

    # remove all .sig files
    for f in os.listdir(api_directory):
        if f.endswith('.sig'):
            os.remove(os.path.join(api_directory, f))

    # rechunk files in this directory
    usfm_files = glob(os.path.join(api_directory, '*.usfm'))
    errors_found = False
    for usfm_file in usfm_files:

        if usfm_file.endswith('LICENSE.usfm'):
            continue

        # read the file
        with codecs.open(usfm_file, 'r', 'utf-8') as in_file:
            book_text = in_file.read()

        # get the book id
        book_search = id_re.search(book_text)
        if not book_search:
            print_error('Book id not found in {}'.format(usfm_file))
            sys.exit(1)

        book_id = book_search.group(1)

        print('Beginning {}...'.format(book_id), end=' ')

        # get book versification info
        book = next((b for b in versification_data if b.book_id == book_id), None)
        if not book:
            print_error('Book versification data was not found for "{}"'.format(book_id))
            sys.exit(1)

        # remove \s5 lines
        book_text = s5_re.sub('', book_text)

        # get the usfm for the book
        book.set_usfm(book_text)

        # do basic checks
        book.verify_chapters_and_verses(True)
        if book.validation_errors:
            errors_found = True

        # get chunks for this book
        Bible.chunk_book(versification, book)
        book.apply_chunks()

        # produces something like '01-GEN.usfm'
        book_file_name = '{0}-{1}.usfm'.format(str(book.number).zfill(2), book.book_id)
        print('Writing ' + book_file_name + '...', end=' ')
        write_file(usfm_file, book.usfm)

        print('finished.')

    if errors_found:
        print_error('These USFM errors must be corrected before publishing can continue.')
        sys.exit(1)

    # rebuild source for tS
    print()
    print('Publishing to the API...')
    with api_publish(api_directory) as api:
        api.run()
    print('Finished publishing to the API.')

    # update the catalog
    print()
    print('Updating the catalogs...', end=' ')
    update_catalog()
    print('finished.')
示例#33
0
    def run(self):

        try:
            self.temp_dir = tempfile.mkdtemp(prefix='txOBS_')

            # clean up the git repo url
            if self.source_repo_url[-4:] == '.git':
                self.source_repo_url = self.source_repo_url[:-4]

            if self.source_repo_url[-1:] == '/':
                self.source_repo_url = self.source_repo_url[:-1]

            # download the archive
            file_to_download = join_url_parts(self.source_repo_url, 'archive/master.zip')
            repo_dir = self.source_repo_url.rpartition('/')[2]
            downloaded_file = os.path.join(self.temp_dir, repo_dir + '.zip')
            try:
                print('Downloading {0}...'.format(file_to_download), end=' ')
                if not os.path.isfile(downloaded_file):
                    download_file(file_to_download, downloaded_file)
            finally:
                print('finished.')

            # unzip the archive
            try:
                print('Unzipping...'.format(downloaded_file), end=' ')
                unzip(downloaded_file, self.temp_dir)
            finally:
                print('finished.')

            # get the manifest
            try:
                print('Reading the manifest...', end=' ')
                manifest = load_json_object(os.path.join(self.temp_dir, 'manifest.json'))
            finally:
                print('finished.')

            # create output directory
            make_dir(self.output_directory)

            # read the markdown files and output html files
            try:
                print('Processing the OBS markdown files')
                files_to_process = []
                for i in range(1, 51):
                    files_to_process.append(str(i).zfill(2) + '.md')

                current_dir = os.path.dirname(inspect.stack()[0][1])
                with codecs.open(os.path.join(current_dir, 'template.html'), 'r', 'utf-8-sig') as html_file:
                    html_template = html_file.read()

                for file_to_process in files_to_process:

                    # read the markdown file
                    file_name = os.path.join(self.temp_dir, repo_dir, 'content', file_to_process)
                    with codecs.open(file_name, 'r', 'utf-8-sig') as md_file:
                        md = md_file.read()

                    html = markdown.markdown(md)
                    html = TransformOBS.dir_re.sub(r'\1\n' + html + r'\n\2', html_template)
                    write_file(os.path.join(self.output_directory, file_to_process.replace('.md', '.html')), html)

            except IOError as ioe:
                print_error('{0}: {1}'.format(ioe.strerror, ioe.filename))
                self.errors.append(ioe)

            except Exception as e:
                print_error(e.message)
                self.errors.append(e)

            finally:
                print('finished.')

        except Exception as e:
            print_error(e.message)
            self.errors.append(e)
 def prepend_text(self, out_dir, file_name, prefix):
     file_path = os.path.join(out_dir, file_name)
     text = read_file(file_path)
     new_text = prefix + text
     write_file(file_path, new_text)
 def replace_text(self, out_dir, file_name, match, replace):
     file_path = os.path.join(out_dir, file_name)
     text = read_file(file_path)
     new_text = text.replace(match, replace)
     self.assertNotEqual(text, new_text)
     write_file(file_path, new_text)
示例#36
0
def main(git_repo, tag):
    global download_dir

    # clean up the git repo url
    if git_repo[-4:] == '.git':
        git_repo = git_repo[:-4]

    if git_repo[-1:] == '/':
        git_repo = git_repo[:-1]

    # initialize some variables
    download_dir = '/tmp/{0}'.format(git_repo.rpartition('/')[2])
    make_dir(download_dir)
    downloaded_file = '{0}/{1}.zip'.format(download_dir, git_repo.rpartition('/')[2])
    file_to_download = join_url_parts(git_repo, 'archive/' + tag + '.zip')
    metadata_obj = None
    content_dir = None
    toc_obj = None

    # download the repository
    try:
        print('Downloading {0}...'.format(file_to_download), end=' ')
        if not os.path.isfile(downloaded_file):
            download_file(file_to_download, downloaded_file)
    finally:
        print('finished.')

    try:
        print('Unzipping...'.format(downloaded_file), end=' ')
        unzip(downloaded_file, download_dir)
    finally:
        print('finished.')

    # examine the repository
    for root, dirs, files in os.walk(download_dir):

        if 'meta.yaml' in files:
            # read the metadata
            try:
                print('Reading the metadata...', end=' ')
                metadata_obj = TAMetaData(os.path.join(root, 'meta.yaml'))
            finally:
                print('finished.')

        if 'toc.yaml' in files:
            # read the table of contents
            try:
                print('Reading the toc...', end=' ')
                toc_obj = TATableOfContents(os.path.join(root, 'toc.yaml'))
            finally:
                print('finished.')

        if 'content' in dirs:
            content_dir = os.path.join(root, 'content')

        # if we have everything, exit the loop
        if content_dir and metadata_obj and toc_obj:
            break

    # check for valid repository structure
    if not metadata_obj:
        print_error('Did not find meta.yaml in {}'.format(git_repo))
        sys.exit(1)

    if not content_dir:
        print_error('Did not find the content directory in {}'.format(git_repo))
        sys.exit(1)

    if not toc_obj:
        print_error('Did not find toc.yaml in {}'.format(git_repo))
        sys.exit(1)

    # check for missing pages
    check_missing_pages(toc_obj, content_dir)

    # generate the pages
    print('Generating the manual...', end=' ')
    manual = TAManual(metadata_obj, toc_obj)
    manual.load_pages(content_dir)
    print('finished.')

    file_name = os.path.join(get_output_dir(), '{0}_{1}.json'.format(manual.meta.manual, manual.meta.volume))
    print('saving to {0} ...'.format(file_name), end=' ')
    content = json.dumps(manual, sort_keys=True, indent=2, cls=TAEncoder)
    write_file(file_name, content)
    print('finished.')
def main(git_repo, tag, no_pdf):
    global download_dir

    # clean up the git repo url
    if git_repo[-4:] == '.git':
        git_repo = git_repo[:-4]

    if git_repo[-1:] == '/':
        git_repo = git_repo[:-1]

    # initialize some variables
    today = ''.join(str(datetime.date.today()).rsplit('-')[0:3])  # str(datetime.date.today())
    download_dir = '/tmp/{0}'.format(git_repo.rpartition('/')[2])
    make_dir(download_dir)
    downloaded_file = '{0}/{1}.zip'.format(download_dir, git_repo.rpartition('/')[2])
    file_to_download = join_url_parts(git_repo, 'archive/' + tag + '.zip')
    manifest = None
    status = None  # type: OBSStatus
    content_dir = None

    # download the repository
    try:
        print('Downloading {0}...'.format(file_to_download), end=' ')
        if not os.path.isfile(downloaded_file):
            download_file(file_to_download, downloaded_file)
    finally:
        print('finished.')

    try:
        print('Unzipping...'.format(downloaded_file), end=' ')
        unzip(downloaded_file, download_dir)
    finally:
        print('finished.')

    # examine the repository
    for root, dirs, files in os.walk(download_dir):

        if 'manifest.json' in files:
            # read the manifest
            try:
                print('Reading the manifest...', end=' ')
                content_dir = root
                manifest = load_json_object(os.path.join(root, 'manifest.json'))
            finally:
                print('finished.')

        if 'status.json' in files:
            # read the meta data
            try:
                print('Reading the status...', end=' ')
                content_dir = root
                status = OBSStatus(os.path.join(root, 'status.json'))
            finally:
                print('finished.')

        # if we have everything, exit the loop
        if content_dir and manifest and status:
            break

    # check for valid repository structure
    if not manifest:
        print_error('Did not find manifest.json in {}'.format(git_repo))
        sys.exit(1)

    if not status:
        print_error('Did not find status.json in {}'.format(git_repo))
        sys.exit(1)

    print('Initializing OBS object...', end=' ')
    lang = manifest['target_language']['id']
    obs_obj = OBS()
    obs_obj.date_modified = today
    obs_obj.direction = manifest['target_language']['direction']
    obs_obj.language = lang
    print('finished')

    obs_obj.chapters = load_obs_chapters(content_dir)
    obs_obj.chapters.sort(key=lambda c: c['number'])

    if not obs_obj.verify_all():
        print_error('Quality check did not pass.')
        sys.exit(1)

    print('Loading languages...', end=' ')
    lang_dict = OBS.load_lang_strings()
    print('finished.')

    print('Loading the catalog...', end=' ')
    export_dir = '/var/www/vhosts/door43.org/httpdocs/exports'
    # uw_cat_path = os.path.join(unfoldingWord_dir, 'obs-catalog.json')
    # uw_catalog = load_json_object(uw_cat_path, [])
    # uw_cat_langs = [x['language'] for x in uw_catalog]
    cat_path = os.path.join(export_dir, 'obs-catalog.json')
    catalog = load_json_object(cat_path, [])
    print('finished')

    print('Getting already published languages...', end=' ')
    json_lang_file_path = os.path.join(export_dir, lang, 'obs', 'obs-{0}.json'.format(lang))
    # prev_json_lang = load_json_object(json_lang_file_path, {})

    if lang not in lang_dict:
        print("Configuration for language {0} missing.".format(lang))
        sys.exit(1)
    print('finished.')

    updated = update_language_catalog(lang, obs_obj.direction, status, today, lang_dict, catalog)

    print('Writing the OBS file to the exports directory...', end=' ')
    cur_json = json.dumps(obs_obj, sort_keys=True, cls=OBSEncoder)

    if updated:
        ([x for x in catalog if x['language'] == lang][0]['date_modified']) = today
        write_file(json_lang_file_path.replace('.txt', '.json'), cur_json)
    print('finished.')

    export_to_api(lang, status, today, cur_json)

    cat_json = json.dumps(catalog, sort_keys=True, cls=OBSEncoder)
    write_file(cat_path, cat_json)

    # update the catalog
    print_ok('STARTING: ', 'updating the catalogs.')
    update_catalog()
    print_ok('FINISHED: ', 'updating the catalogs.')

    if no_pdf:
        return

    create_pdf(lang, status.checking_level, status.version)
    def run(self):
        # download the archive
        file_to_download = self.source_url
        filename = self.source_url.rpartition('/')[2]
        downloaded_file = os.path.join(self.download_dir, filename)
        self.log_message('Downloading {0}...'.format(file_to_download))
        if not os.path.isfile(downloaded_file):
            try:
                download_file(file_to_download, downloaded_file)
            finally:
                if not os.path.isfile(downloaded_file):
                    raise Exception("Failed to download {0}".format(file_to_download))
                else:
                    self.log_message('Download successful.')

        # unzip the archive
        self.log_message('Unzipping {0}...'.format(downloaded_file))
        unzip(downloaded_file, self.files_dir)
        self.log_message('Unzip successful.')

        # create output directory
        make_dir(self.output_dir)

        # read the markdown files and output html files
        self.log_message('Processing the OBS markdown files')

        files = sorted(glob(os.path.join(self.files_dir, '*')))

        current_dir = os.path.dirname(os.path.realpath(__file__))
        with open(os.path.join(current_dir, 'obs-template.html')) as template_file:
            html_template = string.Template(template_file.read())

        complete_html = ''
        for filename in files:
            if filename.endswith('.md'):
                # read the markdown file
                with codecs.open(filename, 'r', 'utf-8-sig') as md_file:
                    md = md_file.read()
                html = markdown.markdown(md)
                complete_html += html
                html = html_template.safe_substitute(content=html)
                html_filename = os.path.splitext(os.path.basename(filename))[0] + ".html"
                output_file = os.path.join(self.output_dir, html_filename)
                write_file(output_file, html)
                self.log_message('Converted {0} to {1}.'.format(os.path.basename(filename), os.path.basename(html_filename)))
            else:
                try:
                    output_file = os.path.join(self.output_dir, filename[len(self.files_dir)+1:])
                    if not os.path.exists(output_file):
                        if not os.path.exists(os.path.dirname(output_file)):
                            os.makedirs(os.path.dirname(output_file))
                        copyfile(filename, output_file)
                except Exception:
                    pass

        # Do the OBS inspection
        inspector = OBSInspection(self.output_dir)
        try:
            inspector.run()
        except Exception as e:
            self.warning_message('Failed to run OBS inspector: {0}'.format(e.message))

        for warning in inspector.warnings:
            self.warning_message(warning)
        for error in inspector.errors:
            self.error_message(error)

        complete_html = html_template.safe_substitute(content=complete_html)
        write_file(os.path.join(self.output_dir, 'all.html'), complete_html)

        self.log_message('Made one HTML of all stories in all.html.')
        self.log_message('Finished processing Markdown files.')
def write_page(outfile, p):
    write_file(outfile.replace('.txt', '.json'), p)
def main(git_repo, tag, domain):
    global download_dir, out_template

    # clean up the git repo url
    if git_repo[-4:] == '.git':
        git_repo = git_repo[:-4]

    if git_repo[-1:] == '/':
        git_repo = git_repo[:-1]

    # initialize some variables
    today = ''.join(str(datetime.date.today()).rsplit('-')[0:3])  # str(datetime.date.today())
    download_dir = '/tmp/{0}'.format(git_repo.rpartition('/')[2])
    make_dir(download_dir)
    downloaded_file = '{0}/{1}.zip'.format(download_dir, git_repo.rpartition('/')[2])
    file_to_download = join_url_parts(git_repo, 'archive/' + tag + '.zip')
    manifest = None
    metadata_obj = None
    content_dir = ''
    usfm_file = None

    # download the repository
    try:
        print('Downloading {0}...'.format(file_to_download), end=' ')
        if not os.path.isfile(downloaded_file):
            download_file(file_to_download, downloaded_file)
    finally:
        print('finished.')

    try:
        print('Unzipping...'.format(downloaded_file), end=' ')
        unzip(downloaded_file, download_dir)
    finally:
        print('finished.')

    # examine the repository
    for root, dirs, files in os.walk(download_dir):

        if 'manifest.json' in files:
            # read the manifest
            try:
                print('Reading the manifest...', end=' ')
                manifest = load_json_object(os.path.join(root, 'manifest.json'))
                content_dir = root

                # look for the usfm file for the whole book
                found_usfm = glob(os.path.join(content_dir, '*.usfm'))
                if len(found_usfm) == 1:
                    usfm_file = os.path.join(content_dir, found_usfm[0])
            finally:
                print('finished.')

        if 'meta.json' in files:
            # read the metadata
            try:
                print('Reading the metadata...', end=' ')
                metadata_obj = BibleMetaData(os.path.join(root, 'meta.json'))
            finally:
                print('finished.')

        # if we have everything, exit the loop
        if manifest and metadata_obj:
            break

    # check for valid repository structure
    if not manifest:
        print_error('Did not find manifest.json in {}'.format(git_repo))
        sys.exit(1)

    if not metadata_obj:
        print_error('Did not find meta.json in {}'.format(git_repo))
        sys.exit(1)

    # get the versification data
    print('Getting versification info...', end=' ')
    vrs = Bible.get_versification(metadata_obj.versification)  # type: list<Book>

    # get the book object for this repository
    book = next((b for b in vrs if b.book_id.lower() == manifest['project']['id']), None)  # type: Book
    if not book:
        print_error('Book versification data was not found for "{}"'.format(manifest['project']['id']))
        sys.exit(1)
    print('finished')

    if usfm_file:
        read_unified_file(book, usfm_file)

    else:
        read_chunked_files(book, content_dir, metadata_obj)

    # do basic checks
    print('Running USFM checks...', end=' ')
    book.verify_chapters_and_verses(True)
    if book.validation_errors:
        print_error('These USFM errors must be corrected before publishing can continue.')
        sys.exit(1)
    else:
        print('finished.')

    # insert paragraph markers
    print('Inserting paragraph markers...', end=' ')
    Bible.insert_paragraph_markers(book)
    print('finished.')

    # get chunks for this book
    print('Chunking the text...', end=' ')
    Bible.chunk_book(metadata_obj.versification, book)
    book.apply_chunks()
    print('finished.')

    # save the output
    out_dir = out_template.format(domain, metadata_obj.slug)

    # produces something like '01-GEN.usfm'
    book_file_name = '{0}-{1}.usfm'.format(str(book.number).zfill(2), book.book_id)
    print('Writing ' + book_file_name + '...', end=' ')
    write_file('{0}/{1}'.format(out_dir, book_file_name), book.usfm)
    print('finished.')

    # look for an existing status.json file
    print('Updating the status for {0}...'.format(metadata_obj.lang), end=' ')
    status_file = '{0}/status.json'.format(out_dir)
    if os.path.isfile(status_file):
        status = BibleStatus(status_file)
    else:
        status = BibleStatus()

    status.update_from_meta_data(metadata_obj)

    # add this book to the list of "books_published"
    status.add_book_published(book)

    # update the "date_modified"
    status.date_modified = today
    print('finished.')

    # save the status.json file
    print('Writing status.json...', end=' ')
    status_json = json.dumps(status, sort_keys=True, indent=2, cls=BibleEncoder)
    write_file(status_file, status_json)
    print('finished')

    # let the API know it is there
    print('Publishing to the API...')
    with api_publish(out_dir) as api:
        api.run()
    print('Finished publishing to the API.')

    # update the catalog
    print()
    print('Updating the catalogs...', end=' ')
    update_catalog()
    print('finished.')

    print_notice('Check {0} and do a git push'.format(out_dir))
def main(resource, lang, slug, name, checking, contrib, ver, check_level,
         comments, source):

    global downloaded_file, unzipped_dir, out_template

    today = ''.join(str(datetime.date.today()).rsplit('-')[0:3])
    downloaded_file = '/tmp/{0}'.format(resource.rpartition('/')[2])
    unzipped_dir = '/tmp/{0}'.format(resource.rpartition('/')[2].strip('.zip'))
    out_dir = out_template.format(slug, lang)

    if not os.path.isfile(downloaded_file):
        download_file(resource, downloaded_file)

    unzip(downloaded_file, unzipped_dir)

    books_published = {}
    there_were_errors = False

    for root, dirs, files in os.walk(unzipped_dir):

        # only usfm files
        files = [f for f in files if f[-3:].lower() == 'sfm']

        if not len(files):
            continue

        # there are usfm files, which book is this?
        test_dir = root.rpartition('/')[2]
        book = Book.create_book(test_dir)  # type: Book

        if book:
            book_text = ''
            files.sort()

            for usfm_file in files:
                with codecs.open(os.path.join(root, usfm_file), 'r', 'utf-8') as in_file:
                    book_text += in_file.read() + '\n'

            book.set_usfm(book_text)
            book.clean_usfm()

            # do basic checks
            book.verify_usfm_tags()
            book.verify_chapters_and_verses()
            if len(book.validation_errors) > 0:
                there_were_errors = True

            if there_were_errors:
                continue

            # get chunks for this book
            book.apply_chunks()

            # produces something like '01-GEN.usfm'
            book_file_name = '{0}-{1}.usfm'.format(str(book.number).zfill(2), book.book_id)
            print('Writing ' + book_file_name)
            write_file('{0}/{1}'.format(out_dir, book_file_name), book.usfm)

            meta = ['Bible: OT']
            if book.number > 39:
                meta = ['Bible: NT']
            books_published[book.book_id.lower()] = {'name': book.name,
                                                     'meta': meta,
                                                     'sort': str(book.number).zfill(2),
                                                     'desc': ''
                                                     }

    if there_were_errors:
        print_warning('There are errors you need to fix before continuing.')
        exit()

    source_ver = ver
    if '.' in ver:
        source_ver = ver.split('.')[0]
    status = {"slug": '{0}-{1}'.format(slug.lower(), lang),
              "name": name,
              "lang": lang,
              "date_modified": today,
              "books_published": books_published,
              "status": {"checking_entity": checking,
                         "checking_level": check_level,
                         "comments": comments,
                         "contributors": contrib,
                         "publish_date": today,
                         "source_text": source,
                         "source_text_version": source_ver,
                         "version": ver
                         }
              }
    write_file('{0}/status.json'.format(out_dir), status)

    print('Publishing to the API...')
    with api_publish(out_dir) as api:
        api.run()
    print('Finished publishing to the API.')

    # update the catalog
    print()
    print('Updating the catalogs...', end=' ')
    update_catalog()
    print('finished.')

    print('Check {0} and do a git push'.format(out_dir))
示例#42
0
def main(resource, lang, slug, name, checking, contrib, ver, check_level,
         comments, source):

    global downloaded_file, unzipped_dir, out_template

    today = ''.join(str(datetime.date.today()).rsplit('-')[0:3])
    downloaded_file = '/tmp/{0}'.format(resource.rpartition('/')[2])
    unzipped_dir = '/tmp/{0}'.format(resource.rpartition('/')[2].strip('.zip'))
    out_dir = out_template.format(slug, lang)

    if not os.path.isfile(downloaded_file):
        download_file(resource, downloaded_file)

    unzip(downloaded_file, unzipped_dir)

    books_published = {}
    there_were_errors = False

    for root, dirs, files in os.walk(unzipped_dir):

        # only usfm files
        files = [f for f in files if f[-3:].lower() == 'sfm']

        if not len(files):
            continue

        # there are usfm files, which book is this?
        test_dir = root.rpartition('/')[2]
        book = Book.create_book(test_dir)  # type: Book

        if book:
            book_text = ''
            files.sort()

            for usfm_file in files:
                with codecs.open(os.path.join(root, usfm_file), 'r',
                                 'utf-8') as in_file:
                    book_text += in_file.read() + '\n'

            book.set_usfm(book_text)
            book.clean_usfm()

            # do basic checks
            book.verify_usfm_tags()
            book.verify_chapters_and_verses()
            if len(book.validation_errors) > 0:
                there_were_errors = True

            if there_were_errors:
                continue

            # get chunks for this book
            book.apply_chunks()

            # produces something like '01-GEN.usfm'
            book_file_name = '{0}-{1}.usfm'.format(
                str(book.number).zfill(2), book.book_id)
            print('Writing ' + book_file_name)
            write_file('{0}/{1}'.format(out_dir, book_file_name), book.usfm)

            meta = ['Bible: OT']
            if book.number > 39:
                meta = ['Bible: NT']
            books_published[book.book_id.lower()] = {
                'name': book.name,
                'meta': meta,
                'sort': str(book.number).zfill(2),
                'desc': ''
            }

    if there_were_errors:
        print_warning('There are errors you need to fix before continuing.')
        exit()

    source_ver = ver
    if '.' in ver:
        source_ver = ver.split('.')[0]
    status = {
        "slug": '{0}-{1}'.format(slug.lower(), lang),
        "name": name,
        "lang": lang,
        "date_modified": today,
        "books_published": books_published,
        "status": {
            "checking_entity": checking,
            "checking_level": check_level,
            "comments": comments,
            "contributors": contrib,
            "publish_date": today,
            "source_text": source,
            "source_text_version": source_ver,
            "version": ver
        }
    }
    write_file('{0}/status.json'.format(out_dir), status)

    print('Publishing to the API...')
    with api_publish(out_dir) as api:
        api.run()
    print('Finished publishing to the API.')

    # update the catalog
    print()
    print('Updating the catalogs...', end=' ')
    update_catalog()
    print('finished.')

    print('Check {0} and do a git push'.format(out_dir))
示例#43
0
def handle(event, context):
    # Getting data from payload which is the JSON that was sent from tx-manager
    if 'data' not in event:
        raise Exception('"data" not in payload')
    job = event['data']

    env_vars = {}
    if 'vars' in event and isinstance(event['vars'], dict):
        env_vars = event['vars']

    # Getting the bucket to where we will unzip the converted files for door43.org. It is different from
    # production and testing, thus it is an environment variable the API Gateway gives us
    if 'cdn_bucket' not in env_vars:
        raise Exception('"cdn_bucket" was not in payload')
    cdn_handler = S3Handler(env_vars['cdn_bucket'])

    if 'identifier' not in job or not job['identifier']:
        raise Exception('"identifier" not in payload')

    owner_name, repo_name, commit_id = job['identifier'].split('/')

    s3_commit_key = 'u/{0}/{1}/{2}'.format(
        owner_name, repo_name, commit_id
    )  # The identifier is how to know which username/repo/commit this callback goes to

    # Download the ZIP file of the converted files
    converted_zip_url = job['output']
    converted_zip_file = os.path.join(tempfile.gettempdir(),
                                      converted_zip_url.rpartition('/')[2])
    try:
        print('Downloading converted zip file from {0}...'.format(
            converted_zip_url))
        if not os.path.isfile(converted_zip_file):
            download_file(converted_zip_url, converted_zip_file)
    finally:
        print('finished.')

    # Unzip the archive
    unzip_dir = tempfile.mkdtemp(prefix='unzip_')
    try:
        print('Unzipping {0}...'.format(converted_zip_file))
        unzip(converted_zip_file, unzip_dir)
    finally:
        print('finished.')

    # Upload all files to the cdn_bucket with the key of <user>/<repo_name>/<commit> of the repo
    for root, dirs, files in os.walk(unzip_dir):
        for f in sorted(files):
            path = os.path.join(root, f)
            key = s3_commit_key + path.replace(unzip_dir, '')
            print('Uploading {0} to {1}'.format(f, key))
            cdn_handler.upload_file(path, key)

    # Now download the existing build_log.json file, update it and upload it back to S3
    build_log_json = cdn_handler.get_json(s3_commit_key + '/build_log.json')

    build_log_json['started_at'] = job['started_at']
    build_log_json['ended_at'] = job['ended_at']
    build_log_json['success'] = job['success']
    build_log_json['status'] = job['status']
    build_log_json['message'] = job['message']

    if 'log' in job and job['log']:
        build_log_json['log'] = job['log']
    else:
        build_log_json['log'] = []

    if 'warnings' in job and job['warnings']:
        build_log_json['warnings'] = job['warnings']
    else:
        build_log_json['warnings'] = []

    if 'errors' in job and job['errors']:
        build_log_json['errors'] = job['errors']
    else:
        build_log_json['errors'] = []

    build_log_file = os.path.join(tempfile.gettempdir(),
                                  'build_log_finished.json')
    write_file(build_log_file, build_log_json)
    cdn_handler.upload_file(build_log_file, s3_commit_key + '/build_log.json',
                            0)

    # Download the project.json file for this repo (create it if doesn't exist) and update it
    project_json_key = 'u/{0}/{1}/project.json'.format(owner_name, repo_name)
    project_json = cdn_handler.get_json(project_json_key)

    project_json['user'] = owner_name
    project_json['repo'] = repo_name
    project_json['repo_url'] = 'https://git.door43.org/{0}/{1}'.format(
        owner_name, repo_name)

    commit = {
        'id': commit_id,
        'created_at': job['created_at'],
        'status': job['status'],
        'success': job['success'],
        'started_at': None,
        'ended_at': None
    }
    if 'started_at' in job:
        commit['started_at'] = job['started_at']
    if 'ended_at' in job:
        commit['ended_at'] = job['ended_at']

    if 'commits' not in project_json:
        project_json['commits'] = []

    commits = []
    for c in project_json['commits']:
        if c['id'] != commit_id:
            commits.append(c)
    commits.append(commit)
    project_json['commits'] = commits

    project_file = os.path.join(tempfile.gettempdir(), 'project.json')
    write_file(project_file, project_json)
    cdn_handler.upload_file(project_file, project_json_key, 0)

    print('Finished deploying to cdn_bucket. Done.')
    def run(self):

        relative_path_re = re.compile(r'([{ ])obs/tex/', re.UNICODE)

        sys.stdout = codecs.getwriter('utf8')(sys.stdout)
        top_tmp_f = self.get_json(self.lang, 'obs-{0}-front-matter.json', '{0}-front-matter-json.tmp')
        bot_tmp_f = self.get_json(self.lang, 'obs-{0}-back-matter.json', '{0}-back-matter-json.tmp')
        lang_top_json = load_json_object(top_tmp_f, {})
        lang_bot_json = load_json_object(bot_tmp_f, {})
        # Parse the front and back matter
        front_matter = self.export_matter(lang_top_json['front-matter'], 0)
        # The front matter really has two parts, an "about" section and a "license" section
        # Sadly the API returns it as one blob, but we want to insert the checking level
        # indicator on between the two. Until such a time as the API returns these strings separately,
        # this is a hack to split them. Failing a match it should just put the whole thing in the first section
        # fm = re.split(r'\{\\\\bf.+:\s*\}\\n', front_matter)
        fm = re.split(r'\s(?=\{\\bf.+:\s*\})', front_matter)
        output_front_about = fm[0]
        if len(fm) > 1:
            output_front_license = ''.join(fm[1:])
        else:
            output_front_license = ''
        output_back = self.export_matter(lang_bot_json['back-matter'], 0)
        # Parse the body matter
        jsonf = 'obs-{0}.json'.format(self.lang)

        tmpf = self.get_json(self.lang, jsonf, '{0}-body-matter-json.tmp')
        self.body_json = load_json_object(tmpf, {})
        self.check_for_standard_keys_json()
        # Hacks to make up for missing localized strings
        if 'toctitle' not in self.body_json.keys():
            self.body_json['toctitle'] = OBSTexExport.extract_title_from_frontmatter(lang_top_json['front-matter'])
        output = self.export(self.body_json['chapters'], self.max_chapters, self.img_res, self.body_json['language'])
        # For ConTeXt files only, Read the "main_template.tex" file replacing
        # all <<<[anyvar]>>> with its definition from the body-matter JSON file
        outlist = []
        tex_template = os.path.join(OBSTexExport.snippets_dir, 'main_template.tex')
        if not os.path.exists(tex_template):
            print("Failed to get TeX template.")
            sys.exit(1)

        with codecs.open(tex_template, 'r', encoding='utf-8-sig') as in_file:
            template = in_file.read()

        # replace relative path to fonts with absolute
        template = relative_path_re.sub(r'\1{0}/'.format(OBSTexExport.snippets_dir), template)

        for single_line in template.splitlines():

            if OBSTexExport.matchChaptersPat.search(single_line):
                outlist.append(output)
            elif OBSTexExport.matchFrontMatterAboutPat.search(single_line):
                outlist.append(output_front_about)
            elif OBSTexExport.matchFrontMatterlicensePat.search(single_line):
                outlist.append(output_front_license)
            elif OBSTexExport.matchBackMatterPat.search(single_line):
                outlist.append(output_back)
            else:
                occurs = 1
                while occurs > 0:
                    (single_line, occurs) \
                        = OBSTexExport.matchMiscPat.subn(self.another_replace, single_line,
                                                         OBSTexExport.MATCH_ALL)
                outlist.append(single_line)
        full_output = '\n'.join(outlist)
        write_file(self.out_path, full_output)
    def bible(self, lang_names, bible_status, bible_bks, langs):

        bks_set = set(bible_bks)
        for bk in bks_set:
            for lang_iter in langs:
                resources_cat = []
                for domain, slug, lang in self.bible_slugs:

                    if (domain, slug, lang) not in bible_status:
                        continue

                    this_status = bible_status[(domain, slug, lang)]
                    if bk not in this_status['books_published'].keys():
                        continue

                    if lang != lang_iter:
                        continue

                    lang = this_status['lang']
                    slug_cat = deepcopy(this_status)

                    # add link to source
                    if os.path.isfile('{0}/{1}/{2}/{3}/source.json'.format(self.obs_v2_local, bk, lang, slug)):
                        slug_cat['source'] = CatalogUpdater.add_date('{0}/{1}/{2}/{3}/source.json'
                                                                     .format(self.obs_v2_api, bk, lang, slug))
                    else:
                        slug_cat['source'] = ''

                    source_date = ''
                    if '?' in slug_cat['source']:
                        source_date = slug_cat['source'].split('?')[1]
                    usfm_name = '{0}-{1}.usfm'.format(this_status['books_published'][bk]['sort'], bk.upper())

                    # add link to usfm
                    slug_cat['usfm'] = self.usfm_api.format(domain, slug, lang, usfm_name) + '?' + source_date

                    # add link to terms
                    if os.path.isfile('{0}/bible/{1}/terms.json'.format(self.obs_v2_local, lang)):
                        slug_cat['terms'] = CatalogUpdater.add_date('{0}/bible/{1}/terms.json'.format(self.obs_v2_api,
                                                                                                      lang))
                    else:
                        slug_cat['terms'] = ''

                    # add link to notes
                    if os.path.isfile('{0}/{1}/{2}/notes.json'.format(self.obs_v2_local, bk, lang)):
                        slug_cat['notes'] = CatalogUpdater.add_date('{0}/{1}/{2}/notes.json'.format(self.obs_v2_api, bk,
                                                                                                    lang))
                    else:
                        slug_cat['notes'] = ''

                    # add link to tW
                    if os.path.isfile('{0}/{1}/{2}/tw_cat.json'.format(self.obs_v2_local, bk, lang)):
                        slug_cat['tw_cat'] = CatalogUpdater.add_date('{0}/{1}/{2}/tw_cat.json'.format(self.obs_v2_api,
                                                                                                      bk, lang))
                    else:
                        slug_cat['tw_cat'] = ''

                    # add link to tQ
                    if os.path.isfile('{0}/{1}/{2}/questions.json'.format(self.obs_v2_local, bk, lang)):
                        slug_cat['checking_questions'] = CatalogUpdater.add_date('{0}/{1}/{2}/questions.json'
                                                                                 .format(self.obs_v2_api, bk, lang))
                    else:
                        slug_cat['checking_questions'] = ''

                    del slug_cat['books_published']
                    del slug_cat['lang']
                    slug_cat['date_modified'] = CatalogUpdater.most_recent(slug_cat)

                    # 2016-05-21, Phil Hopper: The slug value from status.json might have the language code appended
                    slug_cat['slug'] = slug

                    resources_cat.append(slug_cat)

                # only write the file if there is something to publish
                if resources_cat:
                    outfile = '{0}/{1}/{2}/resources.json'.format(self.obs_v2_local, bk, lang_iter)
                    write_file(outfile, resources_cat)

        for bk in bks_set:
            languages_cat = []
            langs_processed = []
            for lang_iter in langs:
                for domain, slug, lang in self.bible_slugs:
                    if lang in langs_processed:
                        continue
                    if lang != lang_iter:
                        continue
                    if (domain, slug, lang_iter) not in bible_status:
                        continue

                    this_status = bible_status[(domain, slug, lang_iter)]

                    if bk not in this_status['books_published'].keys():
                        continue
                    lang_info = CatalogUpdater.get_lang_info(lang_iter, lang_names)
                    res_info = {'project': this_status['books_published'][bk],
                                'language': {'slug': lang_info['lc'],
                                             'name': lang_info['ln'],
                                             'direction': lang_info['ld'],
                                             'date_modified': this_status['date_modified'],
                                             },
                                'res_catalog': CatalogUpdater.add_date(
                                    '{0}/{1}/{2}/resources.json'.format(
                                        self.obs_v2_api, bk, lang_info['lc']))
                                }
                    res_info['language']['date_modified'] = CatalogUpdater.most_recent(res_info)
                    languages_cat.append(res_info)
                    langs_processed.append(lang)
            outfile = '{0}/{1}/languages.json'.format(self.obs_v2_local, bk)
            write_file(outfile, languages_cat)
    def uw_cat(self, obs_v1_cat, bible_status):

        # Create Bible section
        uw_bible = {'title': 'Bible',
                    'slug': 'bible',
                    'langs': []
                    }
        lang_cat = {}
        for domain, slug, lang in self.bible_slugs:

            if (domain, slug, lang) not in bible_status:
                continue

            this_status = bible_status[(domain, slug, lang)]
            date_mod = CatalogUpdater.get_seconds(this_status['date_modified'])
            if lang not in lang_cat:
                lang_cat[lang] = {'lc': lang,
                                  'mod': date_mod,
                                  'vers': []
                                  }
            ver = {'name': this_status['name'],
                   'slug': this_status['slug'],
                   'mod': date_mod,
                   'status': this_status['status'],
                   'toc': []
                   }
            bk_pub = this_status['books_published']

            for x in bk_pub:
                usfm_name = '{0}-{1}.usfm'.format(bk_pub[x]['sort'], x.upper())
                source = self.usfm_api.format(domain, slug, lang, usfm_name)
                source_sig = source.replace('.usfm', '.sig')
                pdf = source.replace('.usfm', '.pdf')
                ver['toc'].append({'title': bk_pub[x]['name'],
                                   'slug': x,
                                   'mod': date_mod,
                                   'desc': bk_pub[x]['desc'],
                                   'sort': bk_pub[x]['sort'],
                                   'src': source,
                                   'src_sig': source_sig,
                                   'pdf': pdf
                                   })
            ver['toc'].sort(key=lambda s: s['sort'])
            for x in ver['toc']:
                del x['sort']
            lang_cat[lang]['vers'].append(ver)
        uw_bible['langs'] = [lang_cat[k] for k in lang_cat]
        uw_bible['langs'].sort(key=lambda c: c['lc'])

        # Create OBS section
        uw_obs = {'title': 'Open Bible Stories',
                  'slug': 'obs',
                  'langs': []
                  }
        ts_obs_langs_str = get_url(self.ts_obs_langs_url, True)
        ts_obs_langs = json.loads(ts_obs_langs_str)
        for e in obs_v1_cat:
            date_mod = CatalogUpdater.get_seconds(e['date_modified'])
            desc = ''
            name = ''
            for x in ts_obs_langs:
                if x['language']['slug'] == e['language']:
                    desc = x['project']['desc']
                    name = x['project']['name']
            slug = 'obs-{0}'.format(e['language'])
            source = '{0}/{1}/{2}.json'.format(self.obs_v1_api, e['language'], slug)
            source_sig = source.replace('.json', '.sig')
            media = CatalogUpdater.get_media(e['language'])
            entry = {'lc': e['language'],
                     'mod': date_mod,
                     'vers': [{'name': name,
                               'slug': slug,
                               'mod': date_mod,
                               'status': e['status'],
                               'toc': [{'title': '',
                                        'slug': '',
                                        'media': media,
                                        'mod': date_mod,
                                        'desc': desc,
                                        'src': source,
                                        'src_sig': source_sig
                                        }]
                               }]
                     }
            uw_obs['langs'].append(entry)
        uw_obs['langs'].sort(key=lambda c: c['lc'])

        # Write combined uW catalog
        # noinspection PyTypeChecker
        mods = [int(x['mod']) for x in uw_bible['langs']]
        # noinspection PyTypeChecker
        mods += [int(x['mod']) for x in uw_obs['langs']]
        mods.sort(reverse=True)
        uw_category = {'cat': [uw_bible, uw_obs], 'mod': mods[0]}
        write_file(self.uw_v2_local, uw_category)
示例#47
0
def main(git_repo, tag):
    global download_dir

    # clean up the git repo url
    if git_repo[-4:] == '.git':
        git_repo = git_repo[:-4]

    if git_repo[-1:] == '/':
        git_repo = git_repo[:-1]

    # initialize some variables
    download_dir = '/tmp/{0}'.format(git_repo.rpartition('/')[2])
    make_dir(download_dir)
    downloaded_file = '{0}/{1}.zip'.format(download_dir,
                                           git_repo.rpartition('/')[2])
    file_to_download = join_url_parts(git_repo, 'archive/' + tag + '.zip')
    metadata_obj = None
    content_dir = None
    toc_obj = None

    # download the repository
    try:
        print('Downloading {0}...'.format(file_to_download), end=' ')
        if not os.path.isfile(downloaded_file):
            download_file(file_to_download, downloaded_file)
    finally:
        print('finished.')

    try:
        print('Unzipping...'.format(downloaded_file), end=' ')
        unzip(downloaded_file, download_dir)
    finally:
        print('finished.')

    # examine the repository
    for root, dirs, files in os.walk(download_dir):

        if 'meta.yaml' in files:
            # read the metadata
            try:
                print('Reading the metadata...', end=' ')
                metadata_obj = TAMetaData(os.path.join(root, 'meta.yaml'))
            finally:
                print('finished.')

        if 'toc.yaml' in files:
            # read the table of contents
            try:
                print('Reading the toc...', end=' ')
                toc_obj = TATableOfContents(os.path.join(root, 'toc.yaml'))
            finally:
                print('finished.')

        if 'content' in dirs:
            content_dir = os.path.join(root, 'content')

        # if we have everything, exit the loop
        if content_dir and metadata_obj and toc_obj:
            break

    # check for valid repository structure
    if not metadata_obj:
        print_error('Did not find meta.yaml in {}'.format(git_repo))
        sys.exit(1)

    if not content_dir:
        print_error(
            'Did not find the content directory in {}'.format(git_repo))
        sys.exit(1)

    if not toc_obj:
        print_error('Did not find toc.yaml in {}'.format(git_repo))
        sys.exit(1)

    # check for missing pages
    check_missing_pages(toc_obj, content_dir)

    # generate the pages
    print('Generating the manual...', end=' ')
    manual = TAManual(metadata_obj, toc_obj)
    manual.load_pages(content_dir)
    print('finished.')

    file_name = os.path.join(
        get_output_dir(), '{0}_{1}.json'.format(manual.meta.manual,
                                                manual.meta.volume))
    print('saving to {0} ...'.format(file_name), end=' ')
    content = json.dumps(manual, sort_keys=True, indent=2, cls=TAEncoder)
    write_file(file_name, content)
    print('finished.')
def main(git_repo, tag, domain):

    global download_dir, out_template

    # clean up the git repo url
    if git_repo[-4:] == '.git':
        git_repo = git_repo[:-4]

    if git_repo[-1:] == '/':
        git_repo = git_repo[:-1]

    # initialize some variables
    today = ''.join(str(datetime.date.today()).rsplit('-')[0:3])
    download_dir = '/tmp/{0}'.format(git_repo.rpartition('/')[2])
    make_dir(download_dir)
    downloaded_file = '{0}/{1}.zip'.format(download_dir, git_repo.rpartition('/')[2])
    file_to_download = join_url_parts(git_repo, 'archive/' + tag + '.zip')
    books_published = {}
    metadata_obj = None
    usfm_dir = None

    # download the repository
    try:
        print('Downloading {0}...'.format(file_to_download), end=' ')
        if not os.path.isfile(downloaded_file):
            download_file(file_to_download, downloaded_file)
    finally:
        print('finished.')

    try:
        print('Unzipping...'.format(downloaded_file), end=' ')
        unzip(downloaded_file, download_dir)
    finally:
        print('finished.')

    # examine the repository
    for root, dirs, files in os.walk(download_dir):

        if 'meta.json' in files:
            # read the metadata
            try:
                print('Reading the metadata...', end=' ')
                metadata_obj = BibleMetaData(os.path.join(root, 'meta.json'))
            finally:
                print('finished.')

        if 'usfm' in dirs:
            usfm_dir = os.path.join(root, 'usfm')

        # if we have everything, exit the loop
        if usfm_dir and metadata_obj:
            break

    # check for valid repository structure
    if not metadata_obj:
        print_error('Did not find meta.json in {}'.format(git_repo))
        sys.exit(1)

    if not usfm_dir:
        print_error('Did not find the usfm directory in {}'.format(git_repo))
        sys.exit(1)

    # get the versification data
    vrs = Bible.get_versification(metadata_obj.versification)  # type: list<Book>
    out_dir = out_template.format(domain, metadata_obj.slug, metadata_obj.lang)

    # walk through the usfm files
    usfm_files = glob(os.path.join(usfm_dir, '*.usfm'))
    errors_found = False
    for usfm_file in usfm_files:

        # read the file
        with codecs.open(usfm_file, 'r', 'utf-8') as in_file:
            book_text = in_file.read()

        # get the book id
        book_search = id_re.search(book_text)
        if not book_search:
            print_error('Book id not found in {}'.format(usfm_file))
            sys.exit(1)

        book_id = book_search.group(1)

        print('Beginning {}...'.format(book_id), end=' ')

        # get book versification info
        book = next((b for b in vrs if b.book_id == book_id), None)
        if not book:
            print_error('Book versification data was not found for "{}"'.format(book_id))
            sys.exit(1)

        # remove \s5 lines
        book_text = s5_re.sub('', book_text)

        # get the usfm for the book
        book.set_usfm(book_text)

        # do basic checks
        book.verify_usfm_tags()
        book.verify_chapters_and_verses(True)
        if book.validation_errors:
            errors_found = True

        # get chunks for this book
        Bible.chunk_book(metadata_obj.versification, book)
        book.apply_chunks()

        # produces something like '01-GEN.usfm'
        book_file_name = '{0}-{1}.usfm'.format(str(book.number).zfill(2), book.book_id)
        print('Writing ' + book_file_name + '...', end=' ')
        write_file('{0}/{1}'.format(out_dir, book_file_name), book.usfm)

        meta = ['Bible: OT']
        if book.number > 39:
            meta = ['Bible: NT']
        books_published[book.book_id.lower()] = {'name': book.name,
                                                 'meta': meta,
                                                 'sort': str(book.number).zfill(2),
                                                 'desc': ''
                                                 }
        print('finished.')

    # stop if errors were found
    if errors_found:
        print_error('These USFM errors must be corrected before publishing can continue.')
        sys.exit(1)

    print('Writing status.json...', end=' ')
    status = {"slug": '{0}'.format(metadata_obj.slug.lower()),
              "name": metadata_obj.name,
              "lang": metadata_obj.lang,
              "date_modified": today,
              "books_published": books_published,
              "status": {"checking_entity": metadata_obj.checking_entity,
                         "checking_level": metadata_obj.checking_level,
                         "comments": metadata_obj.comments,
                         "contributors": metadata_obj.contributors,
                         "publish_date": today,
                         "source_text": metadata_obj.source_text,
                         "source_text_version": metadata_obj.source_text_version,
                         "version": metadata_obj.version
                         }
              }
    write_file('{0}/status.json'.format(out_dir), status, indent=2)
    print('finished.')

    print()
    print('Publishing to the API...')
    with api_publish(out_dir) as api:
        api.run()
    print('Finished publishing to the API.')

    # update the catalog
    print()
    print('Updating the catalogs...', end=' ')
    update_catalog()
    print('finished.')

    print_notice('Check {0} and do a git push'.format(out_dir))