def test_obs_ts_pipeline(self, mock_s3_upload, mock_s3_get_objects, mock_download_repo, mock_post):
        """

        :param MagicMock mock_s3_upload:
        :param MagicMock mock_s3_get_objects:
        :param MagicMock mock_download_repo:
        :param MagicMock mock_post:
        :return:
        """

        mock_download_repo.side_effect = self.mock_download_repo
        mock_s3_upload.side_effect = self.mock_s3_upload_file
        mock_s3_get_objects.side_effect = self.mock_s3_get_objects
        mock_post.side_effect = self.mock_requests_post

        # create test event variable
        event = {'vars': load_json_object(os.path.join(self.resources_dir, 'en-obs-vars.json')),
                 'data': load_json_object(os.path.join(self.resources_dir, 'en-obs-payload.json'))}

        # create test context variable
        context = TestPipeline.JsonObject({'aws_request_id': str(uuid.uuid4())[-10:]})

        # fire the web hook
        webhook.handle(event, context)

        # check that the mocks are working
        self.assertIn('https://git.door43.org/Door43/en-obs/commit/e323f37de1ad2c063a3659c58494edbb2641ce54',
                      mock_download_repo.call_args[0])
 def get_manifest_from_dir(self) -> Dict[str, Any]:
     AppSettings.logger.info(f"get_manifest_from_dir() with {self.path} …")
     manifest = None
     self.loadeded_manifest_file = False
     if not self.path or not os.path.isdir(self.path):
         return get_manifest_from_repo_name(self.repo_name)
     try:
         manifest = load_yaml_object(
             os.path.join(self.path, 'manifest.yaml'))
     except (ParserError, ScannerError) as e:
         err_msg = f"Badly formed 'manifest.yaml' in {self.repo_name}: {e}"
         AppSettings.logger.error(err_msg)
         self.error_messages.add(err_msg)
     if manifest:
         self.loadeded_manifest_file = True
         return manifest
     try:
         manifest = load_json_object(
             os.path.join(self.path, 'manifest.json'))
     except JSONDecodeError as e:
         err_msg = f"Badly formed 'manifest.json' in {self.repo_name}: {e}"
         AppSettings.logger.error(err_msg)
         self.error_messages.add(err_msg)
     if manifest:
         self.loadeded_manifest_file = True
         return manifest
     try:
         manifest = load_json_object(os.path.join(self.path,
                                                  'package.json'))
     except JSONDecodeError as e:
         err_msg = f"Badly formed 'package.json' in {self.repo_name}: {e}"
         AppSettings.logger.error(err_msg)
         self.error_messages.add(err_msg)
     if manifest:
         self.loadeded_manifest_file = True
         return manifest
     try:
         manifest = load_json_object(os.path.join(self.path,
                                                  'project.json'))
     except JSONDecodeError as e:
         err_msg = f"Badly formed 'project.json' in {self.repo_name}: {e}"
         AppSettings.logger.error(err_msg)
         self.error_messages.add(err_msg)
     if manifest:
         self.loadeded_manifest_file = True
         return manifest
     try:
         manifest = load_json_object(os.path.join(self.path, 'meta.json'))
     except JSONDecodeError as e:
         err_msg = f"Badly formed 'meta.json' in {self.repo_name}: {e}"
         AppSettings.logger.error(err_msg)
         self.error_messages.add(err_msg)
     if manifest:
         self.loadeded_manifest_file = True
         return manifest
     return get_manifest_from_repo_name(self.repo_name)
    def test_obs_catalog(self):

        obs_v1_local = '{0}/obs/txt/1'.format(self.temp_dir)
        obs_v1_url = 'file://{0}/obs-catalog.json'.format(obs_v1_local)
        lang_url = 'file://{0}/td/langnames.json'.format(self.temp_dir)
        bible_stat = self.temp_dir + '/{0}/txt/1/{1}-{2}/status.json'
        uw_v2_local = '{0}/uw/txt/2/catalog.json'.format(self.temp_dir)
        ts_obs_langs_url = 'file://{0}/ts/txt/2/obs/languages.json'.format(self.temp_dir)

        # set up mocking
        CatalogUpdater.obs_v1_local = obs_v1_local
        CatalogUpdater.obs_v2_local = '{0}/ts/txt/2'.format(self.temp_dir)
        CatalogUpdater.uw_v2_local = uw_v2_local
        CatalogUpdater.ts_obs_langs_url = ts_obs_langs_url

        updater = CatalogUpdater(None, None, None)

        # OBS
        obs_v1 = get_url(obs_v1_url, True)
        obs_v1_catalog = json.loads(obs_v1)
        CatalogUpdater.obs(deepcopy(obs_v1_catalog))

        # Bible
        lang_names = json.loads(get_url(lang_url, True))
        bible_status = {}
        bible_bks = []
        langs = set([x[2] for x in updater.bible_slugs])
        for domain, slug, lang in updater.bible_slugs:
            file_name = bible_stat.format(domain, slug, lang)
            if not os.path.isfile(file_name):
                continue

            bible_status[(domain, slug, lang)] = load_json_object(file_name)
            bible_bks += bible_status[(domain, slug, lang)]['books_published'].keys()

        updater.bible(lang_names, bible_status, bible_bks, langs)

        # Global
        CatalogUpdater.ts_cat()
        updater.uw_cat(obs_v1_catalog, bible_status)

        # check door43.org/issues/376: remove tW, tN and tQ links from non-English OBS
        en_obs = load_json_object('{0}/ts/txt/2/obs/en/resources.json'.format(self.temp_dir))[0]
        self.assertNotEquals(en_obs['checking_questions'], '')
        self.assertNotEquals(en_obs['notes'], '')
        self.assertNotEquals(en_obs['terms'], '')
        self.assertNotEquals(en_obs['tw_cat'], '')

        fr_obs = load_json_object('{0}/ts/txt/2/obs/fr/resources.json'.format(self.temp_dir))[0]
        self.assertEquals(fr_obs['checking_questions'], '')
        self.assertEquals(fr_obs['notes'], '')
        self.assertEquals(fr_obs['terms'], '')
        self.assertEquals(fr_obs['tw_cat'], '')
def update_catalog(domain=None, slug=None, lang=None):
    global bible_stat, lang_url

    updater = CatalogUpdater(domain, slug, lang)

    # OBS
    obs_v1 = get_url(CatalogUpdater.obs_v1_url, True)
    obs_v1_catalog = json.loads(obs_v1)
    CatalogUpdater.obs(deepcopy(obs_v1_catalog))

    # Bible
    lang_names = json.loads(get_url(lang_url, True))
    bible_status = {}
    bible_bks = []
    langs = set([x[2] for x in updater.bible_slugs])
    for domain, slug, lang in updater.bible_slugs:
        file_name = bible_stat.format(domain, slug, lang)
        if not os.path.isfile(file_name):
            continue

        bible_status[(domain, slug, lang)] = load_json_object(file_name)
        bible_bks += bible_status[(domain, slug, lang)]['books_published'].keys()

    updater.bible(lang_names, bible_status, bible_bks, langs)

    # Global
    CatalogUpdater.ts_cat()
    updater.uw_cat(obs_v1_catalog, bible_status)
    def ts_cat():

        ts_categories = []
        for x in CatalogUpdater.bible_dirs:
            CatalogUpdater.project_dirs.append(x)
        for p in CatalogUpdater.project_dirs:
            file_name = '{0}/{1}/languages.json'.format(CatalogUpdater.obs_v2_local, p)
            proj_cat = load_json_object(file_name)
            if not proj_cat:
                continue

            proj_url = '{0}/{1}/languages.json'.format(CatalogUpdater.obs_v2_api, p)
            dates = set([x['language']['date_modified'] for x in proj_cat])
            dates_list = list(dates)
            dates_list.sort(reverse=True)
            sort = '01'
            if p in CatalogUpdater.bible_dirs:
                sort = [x['project']['sort'] for x in proj_cat if 'project' in x][0]
            meta = []
            if proj_cat[0]['project']['meta']:
                if 'Bible: OT' in proj_cat[0]['project']['meta']:
                    meta += ['bible-ot']
                if 'Bible: NT' in proj_cat[0]['project']['meta']:
                    meta += ['bible-nt']
            ts_categories.append({'slug': p,
                                  'date_modified': dates_list[0],
                                  'lang_catalog': '{0}?date_modified={1}'.format(
                                      proj_url, dates_list[0]),
                                  'sort': sort,
                                  'meta': meta
                                  })
        # Write global catalog
        outfile = '{0}/catalog.json'.format(CatalogUpdater.obs_v2_local)
        write_file(outfile, ts_categories)
 def __init__(self, file_name=None):
     """
     Class constructor. Optionally accepts the name of a file to deserialize.
     :param unicode file_name: The name of a file to deserialize into a BibleMetaData object
     """
     # deserialize
     if file_name:
         if os.path.isfile(file_name):
             self.__dict__ = load_json_object(file_name)
         else:
             raise IOError('The file {0} was not found.'.format(file_name))
     else:
         self.slug = ''  # like "{0}-{1}".format(domain, lang) = "ulb-lpx"
         self.name = ''  # like "Unlocked Literal Bible - Lopit"
         self.lang = ''  # like "lpx"
         self.date_modified = ''  # like "20160417"
         self.status = {"checking_entity": '',  # like "Translation Team"
                        "checking_level": '1',
                        "comments": '',
                        "contributors": '',
                        "publish_date": '',  # like "20160417"
                        "source_text": 'en',
                        "source_text_version": '2',
                        "version": '2.1'  # this is source_text_version + '.1' = 2.1 or 2.1.1
                        }
         self.books_published = {}
 def __init__(self, file_name=None):
     """
     Class constructor. Optionally accepts the name of a file to deserialize.
     :param str file_name: The name of a file to deserialize into a OBS object
     """
     # deserialize
     if file_name:
         if os.path.isfile(file_name):
             self.__dict__ = load_json_object(file_name)
         else:
             raise IOError('The file {0} was not found.'.format(file_name))
     else:
         self.app_words = dict(
             cancel='Cancel',
             chapters='Chapters',
             languages='Languages',
             next_chapter='Next Chapter',
             ok='OK',
             remove_locally='Remove Locally',
             remove_this_string=
             'Remove this language from offline storage. You will need an '
             'internet connection to view it in the future.',
             save_locally='Save Locally',
             save_this_string='Save this language locally for offline use.',
             select_a_language='Select a Language')
         self.chapters = []
         self.date_modified = datetime.today().strftime('%Y%m%d')
         self.direction = 'ltr'
         self.language = ''
 def __init__(self, file_name=None):
     """
     Class constructor. Optionally accepts the name of a file to deserialize.
     :param str file_name: The name of a file to deserialize into a BibleMetaData object
     """
     # deserialize
     if file_name:
         if os.path.isfile(file_name):
             self.__dict__ = load_json_object(file_name)
             if 'versification' not in self.__dict__:
                 self.versification = 'ufw'
         else:
             raise IOError('The file {0} was not found.'.format(file_name))
     else:
         self.lang = ''
         self.name = ''
         self.slug = ''
         self.checking_entity = ''
         self.checking_level = '1'
         self.comments = ''
         self.contributors = ''
         self.publish_date = datetime.today().strftime('%Y-%m-%d')
         self.source_text = ''
         self.source_text_version = ''
         self.version = ''
         self.versification = 'ufw'
 def mock_s3_tn_project(self, part):
     zip_file = os.path.join(self.resources_dir, 'converted_projects',
                             'en_tn_converted.zip')
     out_dir = os.path.join(self.temp_dir, 'en_tn_converted')
     unzip(zip_file, out_dir)
     src_dir = os.path.join(out_dir, 'en_tn_converted')
     self.project_files = [
         f for f in os.listdir(src_dir)
         if os.path.isfile(os.path.join(src_dir, f))
     ]
     self.project_key = 'u/door43/en_tn/12345678'
     build_log = file_utils.load_json_object(
         os.path.join(src_dir, 'build_log.json'))
     build_log['part'] = part
     file_utils.write_file(os.path.join(src_dir, 'build_log.json'),
                           build_log)
     AppSettings.cdn_s3_handler().upload_file(
         os.path.join(src_dir, 'build_log.json'),
         '{0}/{1}/build_log.json'.format(self.project_key, part))
     AppSettings.cdn_s3_handler().upload_file(
         os.path.join(src_dir, 'index.json'),
         '{0}/{1}/index.json'.format(self.project_key, part))
     AppSettings.cdn_s3_handler().upload_file(
         os.path.join(src_dir, 'build_log.json'),
         '{0}/{1}/finished'.format(self.project_key, part))
     AppSettings.cdn_s3_handler().upload_file(
         os.path.join(src_dir, '01-GEN.html'),
         '{0}/{1}/01-GEN.html'.format(self.project_key, part))
     AppSettings.cdn_s3_handler().upload_file(
         os.path.join(src_dir, 'project.json'),
         'u/door43/en_tq/project.json')
     AppSettings.door43_s3_handler().upload_file(
         os.path.join(self.resources_dir, 'templates', 'project-page.html'),
         'templates/project-page.html')
示例#10
0
    def __init__(self, file_name=None, repo_name=None):
        """
        Class constructor. Optionally accepts the name of a file to deserialize.
        :param str file_name: The name of a file to deserialize into a Manifest object
        """
        # Defaults
        self.package_version = Manifest.PACKAGE_VERSION
        self.modified_at = datetime.utcnow().strftime("%Y%m%d%H%M%S")
        self.slug = ""
        self.name = ""
        self.icon = "https://cdn.door43.org/images/default_icon.jpg"

        self.formats = {}
        self.language = {}
        self.projects = {}
        self.status = {}

        # deserialize
        if file_name:
            if os.path.isfile(file_name):
                try:
                    manifest_json = load_json_object(file_name)
                except Exception as e:
                    raise Exception(
                        'Structure error of the manifest.json file: {0}'.
                        format(e))
                self.__dict__.update(manifest_json)
            else:
                raise IOError('The manifest.json file was not found')
        if repo_name:
            self.update_from_repo_name(repo_name)
示例#11
0
 def __init__(self, file_name=None):
     """
     Class constructor. Optionally accepts the name of a file to deserialize.
     :param str file_name: The name of a file to deserialize into a OBS object
     """
     # deserialize
     if file_name:
         if os.path.isfile(file_name):
             self.__dict__ = load_json_object(file_name)
         else:
             raise IOError('The file {0} was not found.'.format(file_name))
     else:
         self.app_words = dict(cancel='Cancel',
                               chapters='Chapters',
                               languages='Languages',
                               next_chapter='Next Chapter',
                               ok='OK',
                               remove_locally='Remove Locally',
                               remove_this_string='Remove this language from offline storage. You will need an '
                                                  'internet connection to view it in the future.',
                               save_locally='Save Locally',
                               save_this_string='Save this language locally for offline use.',
                               select_a_language='Select a Language')
         self.chapters = []
         self.date_modified = datetime.today().strftime('%Y%m%d')
         self.direction = 'ltr'
         self.language = ''
示例#12
0
 def __init__(self, file_name=None):
     """
     Class constructor. Optionally accepts the name of a file to deserialize.
     :param str file_name: The name of a file to deserialize into a BibleMetaData object
     """
     # deserialize
     if file_name:
         if os.path.isfile(file_name):
             self.__dict__ = load_json_object(file_name)
             if 'versification' not in self.__dict__:
                 self.versification = 'ufw'
         else:
             raise IOError('The file {0} was not found.'.format(file_name))
     else:
         self.lang = ''
         self.name = ''
         self.slug = ''
         self.checking_entity = ''
         self.checking_level = '1'
         self.comments = ''
         self.contributors = ''
         self.publish_date = datetime.today().strftime('%Y-%m-%d')
         self.source_text = ''
         self.source_text_version = ''
         self.version = ''
         self.versification = 'ufw'
    def populate_tn_groups_data(self):
        tn_resource_path = os.path.join(self.working_dir, 'resources',
                                        self.lang_code, 'translationHelps',
                                        'translationNotes')
        if not tn_resource_path:
            self.logger.error(f'{tn_resource_path} not found!')
            exit(1)
        tn_version_path = get_latest_version_path(tn_resource_path)
        if not tn_version_path:
            self.logger.error(f'Version not found in {tn_resource_path}!')
            exit(1)

        groups = get_child_directories(tn_version_path)
        groups_data = OrderedDict()
        for group in groups:
            files_path = os.path.join(tn_version_path,
                                      f'{group}/groups/{self.project_id}',
                                      '*.json')
            files = glob(files_path)
            for file in files:
                base = os.path.splitext(os.path.basename(file))[0]
                occurrences = load_json_object(file)
                for occurrence in occurrences:
                    context_id = occurrence['contextId']
                    chapter = str(context_id['reference']['chapter'])
                    verse = str(context_id['reference']['verse'])
                    tn_rc_link = f'rc://{self.lang_code}/tn/help/{group}/{base}/{self.project_id}/{self.pad(chapter)}/{verse.zfill(3)}'
                    context_id['rc'] = tn_rc_link
                    if chapter not in groups_data:
                        groups_data[chapter] = OrderedDict()
                    if verse not in groups_data[chapter]:
                        groups_data[chapter][verse] = []
                    groups_data[chapter][verse].append(context_id)
        self.tn_groups_data = groups_data
示例#14
0
 def __init__(self, file_name=None):
     """
     Class constructor. Optionally accepts the name of a file to deserialize.
     :param unicode file_name: The name of a file to deserialize into a BibleMetaData object
     """
     # deserialize
     if file_name:
         if os.path.isfile(file_name):
             self.__dict__ = load_json_object(file_name)
         else:
             raise IOError('The file {0} was not found.'.format(file_name))
     else:
         self.slug = ''  # like "{0}-{1}".format(domain, lang) = "ulb-lpx"
         self.name = ''  # like "Unlocked Literal Bible - Lopit"
         self.lang = ''  # like "lpx"
         self.date_modified = ''  # like "20160417"
         self.status = {
             "checking_entity": '',  # like "Translation Team"
             "checking_level": '1',
             "comments": '',
             "contributors": '',
             "publish_date": '',  # like "20160417"
             "source_text": 'en',
             "source_text_version": '2',
             "version":
             '2.1'  # this is source_text_version + '.1' = 2.1 or 2.1.1
         }
         self.books_published = {}
示例#15
0
    def test_ceb_psa_text_ulb_L3(self):
        """ Populates the ResourceContainer object and verifies the output."""
        # test with the English OBS
        zip_file = os.path.join(self.resources_dir, 'ceb_psa_text_ulb_L3.zip')
        self.out_dir = tempfile.mkdtemp(prefix='Door43_test_repo_')
        unzip(zip_file, self.out_dir)
        repo_dir = os.path.join(self.out_dir, 'ceb_psa_text_ulb_l3')
        rc = RC(directory=repo_dir)
        rc.as_dict()
        json = load_json_object(os.path.join(repo_dir, 'manifest.json'))
        self.assertEqual(rc.resource.identifier, json['resource']['id'])
        self.assertEqual(rc.resource.type, 'book')
        self.assertEqual(rc.resource.format, 'text/{0}'.format(json['format']))
        self.assertEqual(rc.resource.file_ext, json['format'])
        self.assertEqual(rc.resource.conformsto, 'pre-rc')
        self.assertEqual(rc.resource.modified,
                         datetime.utcnow().strftime('%Y-%m-%d'))
        chapters = rc.projects[0].chapters()
        idx = 1

        for chapter in chapters:
            if chapter.isnumeric():
                self.assertEqual(int(chapter), idx)
                idx += 1

        self.assertEqual(len(chapters), 151)
        chunks = rc.projects[0].chunks('01')
        self.assertEqual(len(chunks), 5)
 def __init__(self, *args, **kwargs) -> None:
     self.templater_CSS_class = 'tw'
     super(TwTemplater, self).__init__(*args, **kwargs)
     index = file_utils.load_json_object(os.path.join(self.source_dir, 'index.json'))
     if index:
         self.titles = index['titles']
         self.chapters = index['chapters']
示例#17
0
    def get_usfm_data():

        if not Bible.usfm_data:
            # TODO: change these to point to the API when it is available
            api_root = 'https://raw.githubusercontent.com/unfoldingWord-dev/uw-api/develop/static'
            usfm_data_file = api_root + '/versification/ufw/books-en.json'
            Bible.usfm_data = load_json_object(usfm_data_file)

        return Bible.usfm_data
    def get_usfm_data():

        if not Bible.usfm_data:
            # TODO: change these to point to the API when it is available
            api_root = 'https://raw.githubusercontent.com/unfoldingWord-dev/uw-api/develop/static'
            usfm_data_file = api_root + '/versification/ufw/books-en.json'
            Bible.usfm_data = load_json_object(usfm_data_file)

        return Bible.usfm_data
示例#19
0
def export_to_api(lang, status, today, cur_json):
    global unfoldingWord_dir, lang_cat, github_org, pages

    print('Getting Github credentials...', end=' ')
    try:
        github_org = None
        if os.path.isfile('/root/.github_pass'):
            # noinspection PyTypeChecker
            pw = open('/root/.github_pass', 'r').read().strip()
            g_user = githubLogin('dsm-git', pw)
            github_org = getGithubOrg('unfoldingword', g_user)
        else:
            print('none found...', end=' ')
    except GithubException as e:
        print_error('Problem logging into Github: {0}'.format(e))
        sys.exit(1)
    print('finished.')

    print('Loading the uw catalog...', end=' ')
    uw_cat_path = os.path.join(unfoldingWord_dir, 'obs-catalog.json')
    uw_catalog = load_json_object(uw_cat_path, [])
    uw_cat_langs = [x['language'] for x in uw_catalog]
    print('finished')

    unfolding_word_lang_dir = os.path.join(unfoldingWord_dir, lang)
    if 'checking_level' in status and 'publish_date' in status:
        if status.checking_level in ['1', '2', '3']:

            front_json = OBS.get_front_matter(pages, lang, today)
            back_json = OBS.get_back_matter(pages, lang, today)

            print('Exporting {0}...'.format(lang), end=' ')
            export_unfolding_word(status, unfolding_word_lang_dir, cur_json,
                                  lang, github_org, front_json, back_json)
            if lang in uw_cat_langs:
                uw_catalog.pop(uw_cat_langs.index(lang))
                uw_cat_langs.pop(uw_cat_langs.index(lang))
            uw_catalog.append(lang_cat)

            uw_cat_json = json.dumps(uw_catalog,
                                     sort_keys=True,
                                     cls=OBSEncoder)
            write_file(uw_cat_path, uw_cat_json)

            # update uw_admin status page
            ObsPublishedLangs.update_page(ObsPublishedLangs.cat_url,
                                          ObsPublishedLangs.uw_stat_page)

            print('finished.')
        else:
            print_error('The `checking_level` is invalid.')
            sys.exit(1)
    else:
        print_error(
            'The status is missing `checking_level` or `publish_date`.')
        sys.exit(1)
def export_to_api(lang, status, today, cur_json):
    global unfoldingWord_dir, lang_cat, github_org, pages

    print('Getting Github credentials...', end=' ')
    try:
        github_org = None
        if os.path.isfile('/root/.github_pass'):
            # noinspection PyTypeChecker
            pw = open('/root/.github_pass', 'r').read().strip()
            g_user = githubLogin('dsm-git', pw)
            github_org = getGithubOrg('unfoldingword', g_user)
        else:
            print('none found...', end=' ')
    except GithubException as e:
        print_error('Problem logging into Github: {0}'.format(e))
        sys.exit(1)
    print('finished.')

    print('Loading the uw catalog...', end=' ')
    uw_cat_path = os.path.join(unfoldingWord_dir, 'obs-catalog.json')
    uw_catalog = load_json_object(uw_cat_path, [])
    uw_cat_langs = [x['language'] for x in uw_catalog]
    print('finished')

    unfolding_word_lang_dir = os.path.join(unfoldingWord_dir, lang)
    if 'checking_level' in status and 'publish_date' in status:
        if status.checking_level in ['1', '2', '3']:

            front_json = OBS.get_front_matter(pages, lang, today)
            back_json = OBS.get_back_matter(pages, lang, today)

            print('Exporting {0}...'.format(lang), end=' ')
            export_unfolding_word(status, unfolding_word_lang_dir, cur_json,
                                  lang, github_org, front_json, back_json)
            if lang in uw_cat_langs:
                uw_catalog.pop(uw_cat_langs.index(lang))
                uw_cat_langs.pop(uw_cat_langs.index(lang))
            uw_catalog.append(lang_cat)

            uw_cat_json = json.dumps(uw_catalog, sort_keys=True, cls=OBSEncoder)
            write_file(uw_cat_path, uw_cat_json)

            # update uw_admin status page
            ObsPublishedLangs.update_page(ObsPublishedLangs.cat_url, ObsPublishedLangs.uw_stat_page)

            print('finished.')
        else:
            print_error('The `checking_level` is invalid.')
            sys.exit(1)
    else:
        print_error('The status is missing `checking_level` or `publish_date`.')
        sys.exit(1)
    def obs(obs_v1_cat):

        langs_cat = []
        # Write OBS catalog for each language
        for e in obs_v1_cat:
            file_name = '{0}/{1}/obs-{1}-front-matter.json'.format(CatalogUpdater.obs_v1_local, e['language'])
            if not os.path.isfile(file_name):
                continue

            front_json = load_json_object(file_name)
            lang_entry = {'language': {'slug': e['language'],
                                       'name': e['string'],
                                       'direction': e['direction'],
                                       'date_modified': e['date_modified']
                                       },
                          'project': {'name': front_json['name'],
                                      'desc': front_json['tagline'],
                                      'meta': []
                                      }
                          }
            lang = e['language']
            del e['language']
            del e['string']
            del e['direction']
            e['slug'] = 'obs'
            e['name'] = 'Open Bible Stories'
            e['source'] = CatalogUpdater.add_date('{0}/{1}/obs-{1}.json'.format(CatalogUpdater.obs_v1_api, lang))

            if lang == 'en':
                e['terms'] = CatalogUpdater.add_date('{0}/{1}/kt-{1}.json'.format(CatalogUpdater.obs_v1_api, lang))
                e['notes'] = CatalogUpdater.add_date('{0}/{1}/tN-{1}.json'.format(CatalogUpdater.obs_v1_api, lang))
                e['tw_cat'] = CatalogUpdater.add_date('{0}/{1}/tw_cat-{1}.json'.format(CatalogUpdater.obs_v1_api, lang))
                e['checking_questions'] = CatalogUpdater.add_date('{0}/{1}/CQ-{1}.json'.format(
                    CatalogUpdater.obs_v1_api, lang))
            else:
                e['terms'] = ''
                e['notes'] = ''
                e['tw_cat'] = ''
                e['checking_questions'] = ''

            e['date_modified'] = CatalogUpdater.most_recent(e)
            outfile = '{0}/obs/{1}/resources.json'.format(CatalogUpdater.obs_v2_local, lang)

            write_file(outfile, [e])

            lang_entry['res_catalog'] = '{0}/obs/{1}/resources.json?date_modified={2}'.format(CatalogUpdater.obs_v2_api,
                                                                                              lang, e['date_modified'])
            langs_cat.append(lang_entry)

        # Write global OBS catalog
        outfile = '{0}/obs/languages.json'.format(CatalogUpdater.obs_v2_local)
        write_file(outfile, langs_cat)
示例#22
0
    def __init__(self, content_dir=None):
        """
        Class constructor. Takes a path to a directory
        :param object content_dir: Path to the directory of OBS manifest file
        """
        self.content_dir = content_dir

        self.manifest_file = os.path.join(self.content_dir, 'manifest.json')
        if os.path.isfile(self.manifest_file):
            self.__dict__ = load_json_object(self.manifest_file)
        else:
            raise IOError('The file {0} was not found.'.format(
                self.manifest_file))
    def populate_verse_usfm(self, bible_id, lang_code=None):
        if not lang_code:
            lang_code = self.lang_code
        bible_path = os.path.join(self.working_dir, 'resources', lang_code,
                                  'bibles', bible_id)
        if not bible_path:
            self.logger.error(f'{bible_path} not found!')
            exit(1)
        bible_version_path = get_latest_version_path(bible_path)
        if not bible_version_path:
            self.logger.error(f'No versions found in {bible_path}!')
            exit(1)

        book_data = OrderedDict()
        book_file = os.path.join(
            self.resources[bible_id].repo_dir,
            f'{self.book_number}-{self.project_id.upper()}.usfm')
        book_usfm = read_file(book_file)
        unaligned_usfm = unalign_usfm(book_usfm)
        chapters = unaligned_usfm.split(r'\c ')
        for chapter_usfm in chapters[1:]:
            chapter = re.findall(r'(\d+)', chapter_usfm)[0]
            book_data[chapter] = OrderedDict()
            chapter_usfm = r'\c ' + chapter_usfm

            chapter_vo_file = os.path.join(bible_version_path, self.project_id,
                                           f'{chapter}.json')
            chapter_verse_objects = load_json_object(chapter_vo_file)

            verses = chapter_usfm.split(r'\v ')
            for verse_usfm in verses[1:]:
                from_verse, to_verse = re.findall(r'^(\d+)(?:-(\d+))*',
                                                  verse_usfm)[0]
                if not to_verse:
                    to_verse = from_verse
                for verse in range(int(from_verse), int(to_verse) + 1):
                    verse = str(verse)
                    from_to_verse = f'{from_verse}-{to_verse}'
                    if from_to_verse in chapter_verse_objects:
                        usfm = rf'\v {from_to_verse} {self.get_text_from_verse_objects(chapter_verse_objects[from_to_verse])}'
                    elif verse in chapter_verse_objects:
                        usfm = rf'\v {verse} {self.get_text_from_verse_objects(chapter_verse_objects[verse]["verseObjects"])}'
                    else:
                        usfm = rf'\v {verse_usfm}'
                    html = self.get_verse_html(usfm, bible_id, chapter, verse)
                    book_data[chapter][verse] = {
                        'usfm': usfm.strip(),
                        'html': html.strip()
                    }
        self.verse_usfm[bible_id] = book_data
 def tw_cat(self):
     if not self._tw_cat:
         mapping = {
             'idol': 'falsegod',
             'witness': 'testimony',
             'newcovenant': 'covenant',
             'taxcollector': 'tax',
             'believer': 'believe'
         }
         tw_cat_file = os.path.join(self.converters_dir, 'tw_cat.json')
         self._tw_cat = load_json_object(tw_cat_file)
         for chapter in self._tw_cat['chapters']:
             self._tw_cat[chapter['id']] = {}
             for frame in chapter['frames']:
                 self._tw_cat[chapter['id']][frame['id']] = []
                 for item in frame['items']:
                     term = item['id']
                     category = None
                     for c in ['kt', 'names', 'other']:
                         if os.path.exists(
                                 os.path.join(self.resources['tw'].repo_dir,
                                              'bible', c, f'{term}.md')):
                             category = c
                             break
                     if not category and term in mapping:
                         category = None
                         for c in ['kt', 'names', 'other']:
                             if os.path.exists(
                                     os.path.join(
                                         self.resources['tw'].repo_dir,
                                         'bible', c,
                                         f'{mapping[term]}.md')):
                                 category = c
                                 term = mapping[term]
                                 break
                     if category:
                         self._tw_cat[chapter['id']][frame['id']].append(
                             f'rc://{self.lang_code}/tw/dict/bible/{category}/{term}'
                         )
                     if not category or term != item['id']:
                         fix = None
                         if term != item['id']:
                             fix = f'change to: {term}'
                         source_rc_link = f'rc://{self.lang_code}/tw_cat/{chapter["id"]}/{frame["id"]}'
                         source_rc = self.create_rc(source_rc_link)
                         self.add_error_message(source_rc, item['id'], fix)
     return self._tw_cat
    def get_verse_objects(self, bible_id, chapter, verse):
        bible_path = os.path.join(self.resources_dir, self.lang_code, 'bibles',
                                  bible_id)
        if not bible_path:
            self.logger.error(f'{bible_path} not found!')
            exit(1)
        bible_version_path = get_latest_version_path(bible_path)
        if not bible_version_path:
            self.logger.error(f'No versions found in {bible_path}!')
            exit(1)

        chapter_json_path = f'{bible_version_path}/{self.project_id}/{chapter}.json'
        data = load_json_object(chapter_json_path)
        if verse in data:
            return data[verse]['verseObjects']
        else:
            return []
示例#26
0
    def __init__(self,
                 file_name=None,
                 meta=None,
                 repo_name=None,
                 files_path=None):
        """
        Class constructor. Optionally accepts the name of a file to deserialize.
        :param str file_name: The name of a file to deserialize into a Manifest object
        """
        # Defaults
        self.package_version = Manifest.LATEST_VERSION
        self.format = ""
        self.generator = {"name": "", "build": ""}
        self.target_language = {"id": "", "name": "", "direction": "ltr"}
        self.project = {"id": "", "name": ""}
        self.type = {"id": "text", "name": "Text"}
        self.resource = {"id": "", "name": ""}
        self.source_translations = []
        self.parent_draft = {}
        self.translators = []
        self.finished_chunks = []

        # deserialize
        if file_name:
            if os.path.isfile(file_name):
                manifest_json = load_json_object(file_name)
                manifest_json = Manifest.standardize_manifest_json(
                    manifest_json)
                self.__dict__.update(manifest_json)
            else:
                raise IOError('The file {0} was not found.'.format(file_name))
        if meta:
            self.update_from_meta(meta)
        if files_path:
            self.update_from_files(files_path)
        if repo_name:
            self.update_from_repo_name(repo_name)

        if not self.resource['id'] and (
                self.format == 'usfm' or
            (self.project['id'] and self.project['id'].lower() in BOOK_NAMES)):
            self.resource['id'] = 'bible'
            self.resource['name'] = 'Bible'
    def populate_tw_words_data(self):
        tw_path = os.path.join(self.working_dir, 'resources',
                               self.ol_lang_code,
                               'translationHelps/translationWords')
        if not tw_path:
            self.logger.error(f'{tw_path} not found!')
            exit(1)
        tw_version_path = get_latest_version_path(tw_path)
        if not tw_version_path:
            self.logger.error(f'No versions found in {tw_path}!')
            exit(1)

        groups = get_child_directories(tw_version_path)
        words_data = OrderedDict()
        for group in groups:
            files_path = os.path.join(tw_version_path,
                                      f'{group}/groups/{self.project_id}',
                                      '*.json')
            files = glob(files_path)
            for file in files:
                base = os.path.splitext(os.path.basename(file))[0]
                tw_rc_link = f'rc://{self.lang_code}/tw/dict/bible/{group}/{base}'
                tw_group_data = load_json_object(file)
                for group_data in tw_group_data:
                    chapter = str(
                        group_data['contextId']['reference']['chapter'])
                    verse = str(group_data['contextId']['reference']['verse'])
                    group_data['contextId']['rc'] = tw_rc_link
                    group_data['alignments'] = {
                        self.ult_id:
                        self.get_aligned_text(self.ult_id,
                                              group_data['contextId']),
                        self.ust_id:
                        self.get_aligned_text(self.ust_id,
                                              group_data['contextId'])
                    }
                    if chapter not in words_data:
                        words_data[chapter] = OrderedDict()
                    if verse not in words_data[chapter]:
                        words_data[chapter][verse] = []
                    words_data[chapter][verse].append(group_data)
        self.tw_words_data = words_data
示例#28
0
 def __init__(self, file_name=None):
     """
     Class constructor. Optionally accepts the name of a file to deserialize.
     :param str file_name: The name of a file to deserialize into a TAStatus object
     """
     # deserialize
     if file_name:
         if os.path.isfile(file_name):
             self.__dict__ = load_json_object(file_name)
         else:
             raise IOError('The file {0} was not found.'.format(file_name))
     else:
         self.checking_entity = ''
         self.checking_level = '1'
         self.comments = ''
         self.contributors = ''
         self.license = 'CC BY-SA 4.0'
         self.publish_date = datetime.today().strftime('%Y-%m-%d')
         self.source_text = 'en'
         self.source_text_version = ''
         self.version = ''
示例#29
0
 def test_bible_from_tx_pre_rc(self):
     """ Populates the ResourceContainer object and verifies the output."""
     # test with the English OBS
     zip_file = os.path.join(self.resources_dir, 'id_mat_text_ulb-ts.zip')
     self.out_dir = tempfile.mkdtemp(prefix='Door43_test_repo_')
     unzip(zip_file, self.out_dir)
     repo_dir = os.path.join(self.out_dir, 'id_mat_text_ulb-ts')
     rc = RC(directory=repo_dir)
     rc.as_dict()
     json = load_json_object(os.path.join(repo_dir, 'manifest.json'))
     self.assertEqual(rc.resource.identifier, json['resource']['id'])
     self.assertEqual(rc.resource.type, 'book')
     self.assertEqual(rc.resource.format, 'text/{0}'.format(json['format']))
     self.assertEqual(rc.resource.file_ext, json['format'])
     self.assertEqual(rc.resource.conformsto, 'pre-rc')
     self.assertEqual(rc.resource.modified,
                      datetime.utcnow().strftime('%Y-%m-%d'))
     chapters = rc.projects[0].chapters()
     self.assertEqual(len(chapters), 29)
     chunks = rc.projects[0].chunks('01')
     self.assertEqual(len(chunks), 11)
示例#30
0
 def test_en_obs_package_json(self):
     """ Populates the ResourceContainer object and verifies the output."""
     # test with the English OBS
     zip_file = os.path.join(self.resources_dir, 'en-obs-package-json.zip')
     self.out_dir = tempfile.mkdtemp(prefix='Door43_test_repo_')
     unzip(zip_file, self.out_dir)
     repo_dir = os.path.join(self.out_dir, 'en-obs')
     rc = RC(directory=repo_dir)
     rc.as_dict()
     package_json = load_json_object(os.path.join(repo_dir, 'package.json'))
     self.assertEqual(rc.resource.identifier,
                      package_json['resource']['slug'])
     self.assertEqual(rc.resource.type, 'book')
     self.assertEqual(rc.resource.format, package_json['content_mime_type'])
     self.assertEqual(rc.resource.file_ext, 'md')
     self.assertEqual(rc.resource.conformsto, 'pre-rc')
     self.assertEqual(rc.resource.issued,
                      package_json['resource']['status']['pub_date'])
     chapters = rc.projects[0].chapters()
     self.assertEqual(len(chapters), 2)
     chunks = rc.project().chunks('_back')
     self.assertEqual(chunks, ['back-matter.md'])
import os
from general_tools.file_utils import load_json_object

# Mappings gathered from here:
#  https://r12a.github.io/scripts (primary site, copied in languages from the "languages using" section of each script)
#  https://www.google.com/get/noto/
#  http://td.unfoldingword.org/uw/languages/
#  https://www.monotype.com/resources/case-studies/more-than-800-languages-in-a-single-typeface-creating-noto-for-google

SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))

noto_font_list_file = os.path.join(SCRIPT_DIR, 'noto_font_list.json')
font_fallbacks_file = os.path.join(SCRIPT_DIR, 'font_fallbacks.json')
font_by_lang_file = os.path.join(SCRIPT_DIR, 'fonts_by_lang.json')

DEFAULT_FALLBACK = ['Noto Sans', 'sans-serif']
FONT_FALLBACKS = load_json_object(font_fallbacks_file)
NOTO_FONT_LIST = load_json_object(noto_font_list_file)
FONTS_BY_LANG = load_json_object(font_by_lang_file)

# Some font-families need "Noto Sans" in front of it so Latin letters & numbers will show in Noto, such as CJK
PRECEDING_FONT_FAMILIES = {
    'Noto Sans JC': ['Noto Sans'],
    'Noto Sans SC': ['Noto Sans'],
    'Noto Sans TC': ['Noto Sans'],
}
def tests():
    # TIT	1	8	xy12	figs-doublet	δίκαιον, ὅσιον	1	righteous, holy
    group_data = load_json_object(
        '/Users/richmahn/working/resources/en/translationHelps/translationNotes/v23/figures/groups/tit/figs-doublet.json'
    )
    chapter_verse_objects = load_json_object(
        '/Users/richmahn/working/resources/en/bibles/ult/v8/tit/1.json')
    quote = group_data[1]["contextId"]["quote"]
    verse_objects = chapter_verse_objects["8"]["verseObjects"]
    alignments = get_alignment(verse_objects, quote)
    print(alignments)
    return

    # TIT	1	2	r2gj		πρὸ χρόνων αἰωνίων	1	before all the ages of time
    chapter_verse_objects = load_json_object(
        '/Users/richmahn/working/resources/en/bibles/ult/v8/tit/1.json')
    quote = 'πρὸ χρόνων αἰωνίων'
    occurrence = 1
    verse_objects = chapter_verse_objects["2"]["verseObjects"]
    alignments = get_alignment(verse_objects, quote, occurrence)
    print(alignments)
    return

    string = 'בִּ⁠ימֵי֙ שְׁפֹ֣ט הַ⁠שֹּׁפְטִ֔ים'
    group_data = load_json_object(
        '/Users/richmahn/working/resources/en/translationHelps/translationNotes/v23/other/groups/rut/grammar-connect-time-simultaneous.json'
    )
    chapter_verse_objects = load_json_object(
        '/Users/richmahn/working/resources/en/bibles/ult/v8/rut/1.json')

    quote = group_data[0]["contextId"]["quote"]
    verse_objects = chapter_verse_objects["1"]["verseObjects"]
    alignments = get_alignment(verse_objects, quote)
    print(alignments)

    # RUT	4	22	abcd	figs-explicit	אֶת־דָּוִֽד	1	David
    group_data = load_json_object(
        '/Users/richmahn/working/resources/en/translationHelps/translationNotes/v23/culture/groups/rut/figs-explicit.json'
    )
    chapter_verse_objects = load_json_object(
        '/Users/richmahn/working/resources/en/bibles/ult/v8/rut/4.json')

    quote = group_data[12]["contextId"]["quote"]
    occurrence = group_data[12]["contextId"]["occurrence"]
    verse_objects = chapter_verse_objects["22"]["verseObjects"]
    alignments = get_alignment(verse_objects, quote, occurrence)
    print(alignments)

    # RUT	4	17	f9ha	figs-explicit	אֲבִ֥י דָוִֽד	1	the father of David
    quote = group_data[11]["contextId"]["quote"]
    occurrence = group_data[11]["contextId"]["occurrence"]
    verse_objects = chapter_verse_objects["17"]["verseObjects"]
    alignments = get_alignment(verse_objects, quote, occurrence)
    print(alignments)

    # RUT	4	19	rl3k	translate-names	וְ⁠חֶצְרוֹן֙…עַמִּֽינָדָֽב׃	1	Hezron…Amminadab
    group_data = load_json_object(
        '/Users/richmahn/working/resources/en/translationHelps/translationNotes/v23/culture/groups/rut/translate-names.json'
    )
    quote = group_data[-1]["contextId"]["quote"]
    occurrence = group_data[-1]["contextId"]["occurrence"]
    verse_objects = chapter_verse_objects["17"]["verseObjects"]
    alignments = get_alignment(verse_objects, quote, occurrence)
    print(alignments)

    # RUT	1	4	aee6		שֵׁ֤ם הָֽ⁠אַחַת֙…וְ⁠שֵׁ֥ם הַ⁠שֵּׁנִ֖י	1	the name of the first woman was…and the name of the second woman was
    quote = 'שֵׁ֤ם הָֽ⁠אַחַת֙…וְ⁠שֵׁ֥ם הַ⁠שֵּׁנִ֖י'
    occurrence = 1
    chapter_verse_objects = load_json_object(
        '/Users/richmahn/working/resources/en/bibles/ult/v8/rut/1.json')
    verse_objects = chapter_verse_objects["4"]["verseObjects"]
    alignments = get_alignment(verse_objects, quote, occurrence)
    print(alignments)
示例#33
0
    def get_tw_checking_html(self):
        tw_html = f'''
<section id="{self.lang_code}-{self.name}-{self.project_id}" class="{self.name}">
    <article id="{self.lang_code}-{self.name}-{self.project_id}-cover" class="resource-title-page">
        <img src="{self.main_resource.logo_url}" class="logo" alt="UTW">
        <h1 class="section-header">{self.title}</h1>
        <h2 class="section-header">{self.project_title}</h2>
    </article>
'''

        tw_path = os.path.join(self.resources_dir, self.ol_lang_code,
                               'translationHelps/translationWords')
        if not tw_path:
            self.logger.error(f'{tw_path} not found!')
            exit(1)
        tw_version_path = get_latest_version_path(tw_path)
        if not tw_version_path:
            self.logger.error(f'No versions found in {tw_path}!')
            exit(1)

        groups = get_child_directories(tw_version_path)
        for group in groups:
            files_path = os.path.join(tw_version_path,
                                      f'{group}/groups/{self.project_id}',
                                      '*.json')
            files = glob(files_path)
            for file in files:
                base = os.path.splitext(os.path.basename(file))[0]
                tw_rc_link = f'rc://{self.lang_code}/tw/dict/bible/{group}/{base}'
                tw_rc = self.add_rc(tw_rc_link, title=base)
                self.get_tw_article_html(tw_rc)
                tw_html += f'''
    <article id="{tw_rc.article_id}">
        <h3 class="section-header">[[{tw_rc.rc_link}]]</h3>
        <table width="100%">
            <tr>
               <th style="width:1px;padding:0 !important"></th>
               <th>Verse</th>
               <th>{self.ult_id.upper()} Alignment</th>
               <th>{self.ult_id.upper()} Text</th>
               <th>{self.ust_id.upper()} Alignment</th>
               <th>{self.ust_id.upper()} Text</th>
               <th>{self.ol_bible_id.upper()} Quote</th>
               <th>{self.ol_bible_id.upper()} Text</th>
            </tr>
'''

                tw_group_data = load_json_object(file)
                for group_data in tw_group_data:
                    context_id = group_data['contextId']
                    context_id['rc'] = tw_rc.rc_link
                    chapter = str(context_id['reference']['chapter'])
                    verse = str(context_id['reference']['verse'])
                    context_id['scripture'] = {}
                    context_id['alignments'] = {}
                    for bible_id in [self.ult_id, self.ust_id]:
                        alignment = self.get_aligned_text(
                            bible_id, group_data['contextId'])
                        if alignment:
                            context_id['alignments'][
                                bible_id] = flatten_alignment(alignment)
                        else:
                            context_id['alignments'][
                                bible_id] = '<div style="color: red">NONE</div>'
                        scripture = self.get_plain_scripture(
                            bible_id, chapter, verse)
                        marked_html = None
                        if alignment:
                            marked_html = mark_phrases_in_html(
                                scripture, alignment)
                        if marked_html:
                            context_id['scripture'][bible_id] = marked_html
                        else:
                            context_id['scripture'][
                                bible_id] = f'<div style="color: red">{scripture}</div>'
                    scripture = self.get_plain_scripture(
                        self.ol_bible_id, chapter, verse)
                    ol_alignment = context_id['quote']
                    if isinstance(ol_alignment, str):
                        ol_alignment = split_string_into_alignment(
                            ol_alignment)
                    if not isinstance(ol_alignment[0], list):
                        ol_alignment = convert_single_dimensional_quote_to_multidimensional(
                            ol_alignment)
                    marked_html = mark_phrases_in_html(scripture, ol_alignment)
                    if marked_html:
                        context_id['scripture'][self.ol_bible_id] = marked_html
                    else:
                        context_id['scripture'][
                            self.
                            ol_bible_id] = f'<div style="color: red">{scripture}</div>'
                    tw_html += f'''
            <tr id="{tw_rc.article_id}-{chapter}-{verse}">
                <td style="width:1px;padding:0 !important"><a href="#{tw_rc.article_id}-{chapter}-{verse}"><i class="fa fa-link"></i></td>
                <td>
                    {chapter}:{verse}
                </td>
                <td>
                    {context_id['alignments'][self.ult_id]}
                </td>
                <td>
                    {context_id['scripture'][self.ult_id]}
                </td>
                <td>
                    {context_id['alignments'][self.ust_id]}
                </td>
                <td>
                    {context_id['scripture'][self.ust_id]}
                </td>
                <td style="direction: {'rtl' if self.ol_lang_code == 'hbo' else 'ltr'}">
                    {flatten_alignment(ol_alignment)}
                </td>
                <td style="direction: {'rtl' if self.ol_lang_code == 'hbo' else 'ltr'}">
                    {context_id['scripture'][self.ol_bible_id]}
                </td>
            </tr>
'''
                tw_html += '''
        </table>
    </article>
'''

        tw_html += '''
</section>
'''
        self.logger.info('Done generating TW Checking HTML.')
        return tw_html
    def run(self):

        relative_path_re = re.compile(r'([{ ])obs/tex/', re.UNICODE)

        sys.stdout = codecs.getwriter('utf8')(sys.stdout)
        top_tmp_f = self.get_json(self.lang, 'obs-{0}-front-matter.json', '{0}-front-matter-json.tmp')
        bot_tmp_f = self.get_json(self.lang, 'obs-{0}-back-matter.json', '{0}-back-matter-json.tmp')
        lang_top_json = load_json_object(top_tmp_f, {})
        lang_bot_json = load_json_object(bot_tmp_f, {})
        # Parse the front and back matter
        front_matter = self.export_matter(lang_top_json['front-matter'], 0)
        # The front matter really has two parts, an "about" section and a "license" section
        # Sadly the API returns it as one blob, but we want to insert the checking level
        # indicator on between the two. Until such a time as the API returns these strings separately,
        # this is a hack to split them. Failing a match it should just put the whole thing in the first section
        # fm = re.split(r'\{\\\\bf.+:\s*\}\\n', front_matter)
        fm = re.split(r'\s(?=\{\\bf.+:\s*\})', front_matter)
        output_front_about = fm[0]
        if len(fm) > 1:
            output_front_license = ''.join(fm[1:])
        else:
            output_front_license = ''
        output_back = self.export_matter(lang_bot_json['back-matter'], 0)
        # Parse the body matter
        jsonf = 'obs-{0}.json'.format(self.lang)

        tmpf = self.get_json(self.lang, jsonf, '{0}-body-matter-json.tmp')
        self.body_json = load_json_object(tmpf, {})
        self.check_for_standard_keys_json()
        # Hacks to make up for missing localized strings
        if 'toctitle' not in self.body_json.keys():
            self.body_json['toctitle'] = OBSTexExport.extract_title_from_frontmatter(lang_top_json['front-matter'])
        output = self.export(self.body_json['chapters'], self.max_chapters, self.img_res, self.body_json['language'])
        # For ConTeXt files only, Read the "main_template.tex" file replacing
        # all <<<[anyvar]>>> with its definition from the body-matter JSON file
        outlist = []
        tex_template = os.path.join(OBSTexExport.snippets_dir, 'main_template.tex')
        if not os.path.exists(tex_template):
            print("Failed to get TeX template.")
            sys.exit(1)

        with codecs.open(tex_template, 'r', encoding='utf-8-sig') as in_file:
            template = in_file.read()

        # replace relative path to fonts with absolute
        template = relative_path_re.sub(r'\1{0}/'.format(OBSTexExport.snippets_dir), template)

        for single_line in template.splitlines():

            if OBSTexExport.matchChaptersPat.search(single_line):
                outlist.append(output)
            elif OBSTexExport.matchFrontMatterAboutPat.search(single_line):
                outlist.append(output_front_about)
            elif OBSTexExport.matchFrontMatterlicensePat.search(single_line):
                outlist.append(output_front_license)
            elif OBSTexExport.matchBackMatterPat.search(single_line):
                outlist.append(output_back)
            else:
                occurs = 1
                while occurs > 0:
                    (single_line, occurs) \
                        = OBSTexExport.matchMiscPat.subn(self.another_replace, single_line,
                                                         OBSTexExport.MATCH_ALL)
                outlist.append(single_line)
        full_output = '\n'.join(outlist)
        write_file(self.out_path, full_output)
示例#35
0
 def test_load_json_object(self):
     d = {"one": 1, "two": 2, "child": {"three": 3}}
     _, self.tmp_file = tempfile.mkstemp(prefix='Door43_test_')
     with open(self.tmp_file, "w") as tmpf:
         json.dump(d, tmpf)
     self.assertEqual(file_utils.load_json_object(self.tmp_file), d)
                'The tools directory was not found. The PDF cannot be generated.'
            )
            resp = prompt(
                'Do you want to continue without generating a PDF? [Y|n]: ')
            if resp != '' and resp != 'Y' and resp != 'y':
                sys.exit(0)

    today = ''.join(str(datetime.date.today()).rsplit('-')[0:3])

    print('Loading languages...', end=' ')
    lang_dict = OBS.load_lang_strings()
    print('finished.')

    print('Loading the catalog...', end=' ')
    uw_cat_path = os.path.join(unfoldingWord_dir, 'obs-catalog.json')
    uw_catalog = load_json_object(uw_cat_path, [])
    uw_cat_langs = [x['language'] for x in uw_catalog]
    cat_path = os.path.join(export_dir, 'obs-catalog.json')
    catalog = load_json_object(cat_path, [])
    print('finished')

    if 'obs' not in os.listdir(os.path.join(pages, lang)):
        print('OBS not configured in Door43 for {0}'.format(lang))
        sys.exit(1)

    print('Getting metadata...', end=' ')
    app_words = get_json_dict(os.path.join(pages, lang, 'obs/app_words.txt'))
    lang_direction = 'ltr'
    if lang in rtl:
        lang_direction = 'rtl'
    obs_obj = OBS()
示例#37
0
    def run(self):

        try:
            self.temp_dir = tempfile.mkdtemp(prefix='txOBS_')

            # clean up the git repo url
            if self.source_repo_url[-4:] == '.git':
                self.source_repo_url = self.source_repo_url[:-4]

            if self.source_repo_url[-1:] == '/':
                self.source_repo_url = self.source_repo_url[:-1]

            # download the archive
            file_to_download = join_url_parts(self.source_repo_url, 'archive/master.zip')
            repo_dir = self.source_repo_url.rpartition('/')[2]
            downloaded_file = os.path.join(self.temp_dir, repo_dir + '.zip')
            try:
                print('Downloading {0}...'.format(file_to_download), end=' ')
                if not os.path.isfile(downloaded_file):
                    download_file(file_to_download, downloaded_file)
            finally:
                print('finished.')

            # unzip the archive
            try:
                print('Unzipping...'.format(downloaded_file), end=' ')
                unzip(downloaded_file, self.temp_dir)
            finally:
                print('finished.')

            # get the manifest
            try:
                print('Reading the manifest...', end=' ')
                manifest = load_json_object(os.path.join(self.temp_dir, 'manifest.json'))
            finally:
                print('finished.')

            # create output directory
            make_dir(self.output_directory)

            # read the markdown files and output html files
            try:
                print('Processing the OBS markdown files')
                files_to_process = []
                for i in range(1, 51):
                    files_to_process.append(str(i).zfill(2) + '.md')

                current_dir = os.path.dirname(inspect.stack()[0][1])
                with codecs.open(os.path.join(current_dir, 'template.html'), 'r', 'utf-8-sig') as html_file:
                    html_template = html_file.read()

                for file_to_process in files_to_process:

                    # read the markdown file
                    file_name = os.path.join(self.temp_dir, repo_dir, 'content', file_to_process)
                    with codecs.open(file_name, 'r', 'utf-8-sig') as md_file:
                        md = md_file.read()

                    html = markdown.markdown(md)
                    html = TransformOBS.dir_re.sub(r'\1\n' + html + r'\n\2', html_template)
                    write_file(os.path.join(self.output_directory, file_to_process.replace('.md', '.html')), html)

            except IOError as ioe:
                print_error('{0}: {1}'.format(ioe.strerror, ioe.filename))
                self.errors.append(ioe)

            except Exception as e:
                print_error(e.message)
                self.errors.append(e)

            finally:
                print('finished.')

        except Exception as e:
            print_error(e.message)
            self.errors.append(e)
def main(git_repo, tag, no_pdf):
    global download_dir

    # clean up the git repo url
    if git_repo[-4:] == '.git':
        git_repo = git_repo[:-4]

    if git_repo[-1:] == '/':
        git_repo = git_repo[:-1]

    # initialize some variables
    today = ''.join(str(datetime.date.today()).rsplit('-')[0:3])  # str(datetime.date.today())
    download_dir = '/tmp/{0}'.format(git_repo.rpartition('/')[2])
    make_dir(download_dir)
    downloaded_file = '{0}/{1}.zip'.format(download_dir, git_repo.rpartition('/')[2])
    file_to_download = join_url_parts(git_repo, 'archive/' + tag + '.zip')
    manifest = None
    status = None  # type: OBSStatus
    content_dir = None

    # download the repository
    try:
        print('Downloading {0}...'.format(file_to_download), end=' ')
        if not os.path.isfile(downloaded_file):
            download_file(file_to_download, downloaded_file)
    finally:
        print('finished.')

    try:
        print('Unzipping...'.format(downloaded_file), end=' ')
        unzip(downloaded_file, download_dir)
    finally:
        print('finished.')

    # examine the repository
    for root, dirs, files in os.walk(download_dir):

        if 'manifest.json' in files:
            # read the manifest
            try:
                print('Reading the manifest...', end=' ')
                content_dir = root
                manifest = load_json_object(os.path.join(root, 'manifest.json'))
            finally:
                print('finished.')

        if 'status.json' in files:
            # read the meta data
            try:
                print('Reading the status...', end=' ')
                content_dir = root
                status = OBSStatus(os.path.join(root, 'status.json'))
            finally:
                print('finished.')

        # if we have everything, exit the loop
        if content_dir and manifest and status:
            break

    # check for valid repository structure
    if not manifest:
        print_error('Did not find manifest.json in {}'.format(git_repo))
        sys.exit(1)

    if not status:
        print_error('Did not find status.json in {}'.format(git_repo))
        sys.exit(1)

    print('Initializing OBS object...', end=' ')
    lang = manifest['target_language']['id']
    obs_obj = OBS()
    obs_obj.date_modified = today
    obs_obj.direction = manifest['target_language']['direction']
    obs_obj.language = lang
    print('finished')

    obs_obj.chapters = load_obs_chapters(content_dir)
    obs_obj.chapters.sort(key=lambda c: c['number'])

    if not obs_obj.verify_all():
        print_error('Quality check did not pass.')
        sys.exit(1)

    print('Loading languages...', end=' ')
    lang_dict = OBS.load_lang_strings()
    print('finished.')

    print('Loading the catalog...', end=' ')
    export_dir = '/var/www/vhosts/door43.org/httpdocs/exports'
    # uw_cat_path = os.path.join(unfoldingWord_dir, 'obs-catalog.json')
    # uw_catalog = load_json_object(uw_cat_path, [])
    # uw_cat_langs = [x['language'] for x in uw_catalog]
    cat_path = os.path.join(export_dir, 'obs-catalog.json')
    catalog = load_json_object(cat_path, [])
    print('finished')

    print('Getting already published languages...', end=' ')
    json_lang_file_path = os.path.join(export_dir, lang, 'obs', 'obs-{0}.json'.format(lang))
    # prev_json_lang = load_json_object(json_lang_file_path, {})

    if lang not in lang_dict:
        print("Configuration for language {0} missing.".format(lang))
        sys.exit(1)
    print('finished.')

    updated = update_language_catalog(lang, obs_obj.direction, status, today, lang_dict, catalog)

    print('Writing the OBS file to the exports directory...', end=' ')
    cur_json = json.dumps(obs_obj, sort_keys=True, cls=OBSEncoder)

    if updated:
        ([x for x in catalog if x['language'] == lang][0]['date_modified']) = today
        write_file(json_lang_file_path.replace('.txt', '.json'), cur_json)
    print('finished.')

    export_to_api(lang, status, today, cur_json)

    cat_json = json.dumps(catalog, sort_keys=True, cls=OBSEncoder)
    write_file(cat_path, cat_json)

    # update the catalog
    print_ok('STARTING: ', 'updating the catalogs.')
    update_catalog()
    print_ok('FINISHED: ', 'updating the catalogs.')

    if no_pdf:
        return

    create_pdf(lang, status.checking_level, status.version)
示例#39
0
def main(git_repo, tag, no_pdf):
    global download_dir

    # clean up the git repo url
    if git_repo[-4:] == '.git':
        git_repo = git_repo[:-4]

    if git_repo[-1:] == '/':
        git_repo = git_repo[:-1]

    # initialize some variables
    today = ''.join(str(datetime.date.today()).rsplit(
        str('-'))[0:3])  # str(datetime.date.today())
    download_dir = '/tmp/{0}'.format(git_repo.rpartition('/')[2])
    make_dir(download_dir)
    downloaded_file = '{0}/{1}.zip'.format(download_dir,
                                           git_repo.rpartition('/')[2])
    file_to_download = join_url_parts(git_repo, 'archive/{0}.zip'.format(tag))
    manifest = None
    status = None  # type: OBSStatus
    content_dir = None

    # download the repository
    try:
        print('Downloading {0}...'.format(file_to_download), end=' ')
        if not os.path.isfile(downloaded_file):
            download_file(file_to_download, downloaded_file)
    finally:
        print('finished.')

    try:
        print('Unzipping...'.format(downloaded_file), end=' ')
        unzip(downloaded_file, download_dir)
    finally:
        print('finished.')

    # examine the repository
    for root, dirs, files in os.walk(download_dir):

        if 'manifest.json' in files:
            # read the manifest
            try:
                print('Reading the manifest...', end=' ')
                content_dir = root
                manifest = load_json_object(os.path.join(
                    root, 'manifest.json'))
                status = OBSStatus.from_manifest(manifest)
            finally:
                print('finished.')

        if 'content' in dirs:
            content_dir = os.path.join(root, 'content')

        # if we have everything, exit the loop
        if content_dir and manifest and status:
            break

    # check for valid repository structure
    if not manifest:
        print_error('Did not find manifest.json in {}'.format(git_repo))
        sys.exit(1)

    print('Initializing OBS object...', end=' ')
    lang = manifest['language']['slug']
    obs_obj = OBS()
    obs_obj.date_modified = today
    obs_obj.direction = manifest['language']['dir']
    obs_obj.language = lang
    print('finished')

    obs_obj.chapters = load_obs_chapters(content_dir)
    obs_obj.chapters.sort(key=lambda c: int(c['number']))

    if not obs_obj.verify_all():
        print_error('Quality check did not pass.')
        sys.exit(1)

    print('Loading languages...', end=' ')
    lang_dict = OBS.load_lang_strings()
    print('finished.')

    print('Loading the catalog...', end=' ')
    export_dir = '/var/www/vhosts/door43.org/httpdocs/exports'

    cat_path = os.path.join(export_dir, 'obs-catalog.json')
    catalog = load_json_object(cat_path, [])
    print('finished')

    print('Getting already published languages...', end=' ')
    json_lang_file_path = os.path.join(export_dir, lang, 'obs',
                                       'obs-{0}.json'.format(lang))

    if lang not in lang_dict:
        print("Configuration for language {0} missing.".format(lang))
        sys.exit(1)
    print('finished.')

    updated = update_language_catalog(lang, obs_obj.direction, status, today,
                                      lang_dict, catalog)

    print('Writing the OBS file to the exports directory...', end=' ')
    cur_json = json.dumps(obs_obj, sort_keys=True, cls=OBSEncoder)

    if updated:
        ([x for x in catalog
          if x['language'] == lang][0]['date_modified']) = today
        # noinspection PyTypeChecker
        write_file(json_lang_file_path.replace('.txt', '.json'), cur_json)
    print('finished.')

    export_to_api(lang, status, today, cur_json)

    cat_json = json.dumps(catalog, sort_keys=True, cls=OBSEncoder)
    write_file(cat_path, cat_json)

    # update the catalog
    print_ok('STARTING: ', 'updating the catalogs.')
    update_catalog()
    print_ok('FINISHED: ', 'updating the catalogs.')

    if no_pdf:
        return

    create_pdf(lang, status.checking_level, status.version)
示例#40
0
    def run(self):

        if 'git.door43.org' not in self.source_repo_url:
            print_warning(
                'Currently only git.door43.org repositories are supported.')
            sys.exit(0)

        try:
            # clean up the git repo url
            if self.source_repo_url[-4:] == '.git':
                self.source_repo_url = self.source_repo_url[:-4]

            if self.source_repo_url[-1:] == '/':
                self.source_repo_url = self.source_repo_url[:-1]

            # download the archive
            file_to_download = join_url_parts(self.source_repo_url,
                                              'archive/master.zip')
            repo_dir = self.source_repo_url.rpartition('/')[2]
            downloaded_file = os.path.join(self.temp_dir, repo_dir + '.zip')
            try:
                if not self.quiet:
                    print('Downloading {0}...'.format(file_to_download),
                          end=' ')
                if not os.path.isfile(downloaded_file):
                    download_file(file_to_download, downloaded_file)
            finally:
                if not self.quiet:
                    print('finished.')

            # unzip the archive
            try:
                if not self.quiet:
                    print('Unzipping...'.format(downloaded_file), end=' ')
                unzip(downloaded_file, self.temp_dir)
            finally:
                if not self.quiet:
                    print('finished.')

            # get the manifest
            try:
                if not self.quiet:
                    print('Reading the manifest...', end=' ')
                manifest = load_json_object(
                    os.path.join(self.temp_dir, 'manifest.json'))
            finally:
                if not self.quiet:
                    print('finished.')

            # create output directory
            make_dir(self.output_directory)

            # read the markdown files and output html files
            try:
                if not self.quiet:
                    print('Processing the OBS markdown files')
                files_to_process = []
                for i in range(1, 51):
                    files_to_process.append(str(i).zfill(2) + '.md')

                current_dir = os.path.dirname(inspect.stack()[0][1])
                with codecs.open(os.path.join(current_dir, 'template.html'),
                                 'r', 'utf-8-sig') as html_file:
                    html_template = html_file.read()

                for file_to_process in files_to_process:

                    # read the markdown file
                    file_name = os.path.join(self.temp_dir, repo_dir,
                                             'content', file_to_process)
                    with codecs.open(file_name, 'r', 'utf-8-sig') as md_file:
                        md = md_file.read()

                    html = markdown.markdown(md)
                    html = TransformOBS.dir_re.sub(r'\1\n' + html + r'\n\2',
                                                   html_template)
                    write_file(
                        os.path.join(self.output_directory,
                                     file_to_process.replace('.md', '.html')),
                        html)

            except IOError as ioe:
                print_error('{0}: {1}'.format(ioe.strerror, ioe.filename))
                self.errors.append(ioe)

            except Exception as e:
                print_error(e.message)
                self.errors.append(e)

            finally:
                if not self.quiet:
                    print('finished.')

        except Exception as e:
            print_error(e.message)
            self.errors.append(e)
        if not os.path.isdir(tools_dir):
            tools_dir = None
            print_notice('The tools directory was not found. The PDF cannot be generated.')
            resp = prompt('Do you want to continue without generating a PDF? [Y|n]: ')
            if resp != '' and resp != 'Y' and resp != 'y':
                sys.exit(0)

    today = ''.join(str(datetime.date.today()).rsplit('-')[0:3])

    print('Loading languages...', end=' ')
    lang_dict = OBS.load_lang_strings()
    print('finished.')

    print('Loading the catalog...', end=' ')
    uw_cat_path = os.path.join(unfoldingWord_dir, 'obs-catalog.json')
    uw_catalog = load_json_object(uw_cat_path, [])
    uw_cat_langs = [x['language'] for x in uw_catalog]
    cat_path = os.path.join(export_dir, 'obs-catalog.json')
    catalog = load_json_object(cat_path, [])
    print('finished')

    if 'obs' not in os.listdir(os.path.join(pages, lang)):
        print('OBS not configured in Door43 for {0}'.format(lang))
        sys.exit(1)

    print('Getting metadata...', end=' ')
    app_words = get_json_dict(os.path.join(pages, lang, 'obs/app_words.txt'))
    lang_direction = 'ltr'
    if lang in rtl:
        lang_direction = 'rtl'
    obs_obj = OBS()
def main(git_repo, tag, domain):
    global download_dir, out_template

    # clean up the git repo url
    if git_repo[-4:] == '.git':
        git_repo = git_repo[:-4]

    if git_repo[-1:] == '/':
        git_repo = git_repo[:-1]

    # initialize some variables
    today = ''.join(str(datetime.date.today()).rsplit('-')[0:3])  # str(datetime.date.today())
    download_dir = '/tmp/{0}'.format(git_repo.rpartition('/')[2])
    make_dir(download_dir)
    downloaded_file = '{0}/{1}.zip'.format(download_dir, git_repo.rpartition('/')[2])
    file_to_download = join_url_parts(git_repo, 'archive/' + tag + '.zip')
    manifest = None
    metadata_obj = None
    content_dir = ''
    usfm_file = None

    # download the repository
    try:
        print('Downloading {0}...'.format(file_to_download), end=' ')
        if not os.path.isfile(downloaded_file):
            download_file(file_to_download, downloaded_file)
    finally:
        print('finished.')

    try:
        print('Unzipping...'.format(downloaded_file), end=' ')
        unzip(downloaded_file, download_dir)
    finally:
        print('finished.')

    # examine the repository
    for root, dirs, files in os.walk(download_dir):

        if 'manifest.json' in files:
            # read the manifest
            try:
                print('Reading the manifest...', end=' ')
                manifest = load_json_object(os.path.join(root, 'manifest.json'))
                content_dir = root

                # look for the usfm file for the whole book
                found_usfm = glob(os.path.join(content_dir, '*.usfm'))
                if len(found_usfm) == 1:
                    usfm_file = os.path.join(content_dir, found_usfm[0])
            finally:
                print('finished.')

        if 'meta.json' in files:
            # read the metadata
            try:
                print('Reading the metadata...', end=' ')
                metadata_obj = BibleMetaData(os.path.join(root, 'meta.json'))
            finally:
                print('finished.')

        # if we have everything, exit the loop
        if manifest and metadata_obj:
            break

    # check for valid repository structure
    if not manifest:
        print_error('Did not find manifest.json in {}'.format(git_repo))
        sys.exit(1)

    if not metadata_obj:
        print_error('Did not find meta.json in {}'.format(git_repo))
        sys.exit(1)

    # get the versification data
    print('Getting versification info...', end=' ')
    vrs = Bible.get_versification(metadata_obj.versification)  # type: list<Book>

    # get the book object for this repository
    book = next((b for b in vrs if b.book_id.lower() == manifest['project']['id']), None)  # type: Book
    if not book:
        print_error('Book versification data was not found for "{}"'.format(manifest['project']['id']))
        sys.exit(1)
    print('finished')

    if usfm_file:
        read_unified_file(book, usfm_file)

    else:
        read_chunked_files(book, content_dir, metadata_obj)

    # do basic checks
    print('Running USFM checks...', end=' ')
    book.verify_chapters_and_verses(True)
    if book.validation_errors:
        print_error('These USFM errors must be corrected before publishing can continue.')
        sys.exit(1)
    else:
        print('finished.')

    # insert paragraph markers
    print('Inserting paragraph markers...', end=' ')
    Bible.insert_paragraph_markers(book)
    print('finished.')

    # get chunks for this book
    print('Chunking the text...', end=' ')
    Bible.chunk_book(metadata_obj.versification, book)
    book.apply_chunks()
    print('finished.')

    # save the output
    out_dir = out_template.format(domain, metadata_obj.slug)

    # produces something like '01-GEN.usfm'
    book_file_name = '{0}-{1}.usfm'.format(str(book.number).zfill(2), book.book_id)
    print('Writing ' + book_file_name + '...', end=' ')
    write_file('{0}/{1}'.format(out_dir, book_file_name), book.usfm)
    print('finished.')

    # look for an existing status.json file
    print('Updating the status for {0}...'.format(metadata_obj.lang), end=' ')
    status_file = '{0}/status.json'.format(out_dir)
    if os.path.isfile(status_file):
        status = BibleStatus(status_file)
    else:
        status = BibleStatus()

    status.update_from_meta_data(metadata_obj)

    # add this book to the list of "books_published"
    status.add_book_published(book)

    # update the "date_modified"
    status.date_modified = today
    print('finished.')

    # save the status.json file
    print('Writing status.json...', end=' ')
    status_json = json.dumps(status, sort_keys=True, indent=2, cls=BibleEncoder)
    write_file(status_file, status_json)
    print('finished')

    # let the API know it is there
    print('Publishing to the API...')
    with api_publish(out_dir) as api:
        api.run()
    print('Finished publishing to the API.')

    # update the catalog
    print()
    print('Updating the catalogs...', end=' ')
    update_catalog()
    print('finished.')

    print_notice('Check {0} and do a git push'.format(out_dir))
示例#43
0
 def load_static_json_file(file_name):
     file_name = os.path.join(app_utils.get_static_dir(), file_name)
     return load_json_object(file_name, {})