Пример #1
0
    def check_yaml_values(self, yaml_data):

        return_val = True

        # check the required yaml values
        if not TAArticle.check_value_is_valid_int('volume', yaml_data):
            print_error('Volume value is not valid.')
            return_val = False

        if not self.check_value_is_valid_string('manual', yaml_data):
            print_error('Manual value is not valid.')
            return_val = False

        if not self.check_value_is_valid_string('slug', yaml_data):
            print_error('Volume value is not valid.')
            return_val = False
        else:
            # slug cannot contain a dash, only underscores
            test_slug = str(yaml_data['slug']).strip()
            if '-' in test_slug:
                print_error('Slug values cannot contain hyphen (dash).')
                return_val = False

        if not self.check_value_is_valid_string('title', yaml_data):
            print_error('Title value is not valid.')
            return_val = False

        return return_val
Пример #2
0
def check_missing_pages(toc_obj, content_dir):

    toc_slugs = toc_obj.all_slugs()
    page_slugs = get_all_page_slugs(content_dir)
    not_in_pages = list(set(toc_slugs) - set(page_slugs))
    not_in_toc = list(set(page_slugs) - set(toc_slugs))

    if not_in_toc:
        print_warning(
            'The following pages are in the content directory but not in the TOC:'
        )
        for item in not_in_toc:
            print('- ' + item)
        print()
        print(
            'If you continue these pages will NOT be included in the published product.'
        )
        print()
        resp = prompt(
            'Do you want to continue with the data as it is? [Y|n]: ')
        if resp != '' and resp[0:1].lower() != 'y':
            sys.exit(0)

    if not_in_pages:
        print_error(
            'The following pages are in the TOC but were not found in the content directory:'
        )
        for item in not_in_pages:
            print('- ' + item)
        print()
        sys.exit(1)
Пример #3
0
def get_q_and_a(text):
    cq = []
    first_line = None
    for line in text.splitlines():
        line = line.strip()

        if not first_line and line.startswith('#'):
            first_line = line
            continue

        if line.startswith('\n') or \
                line == '' or \
                line.startswith('~~') or \
                line.startswith('#') or \
                line.startswith('{{') or \
                line.startswith('__[') or \
                line.startswith('These questions will'):
            continue

        if q_re.search(line):
            item = {'q': q_re.search(line).group(1).strip()}
        elif a_re.search(line):
            item['a'] = a_re.search(line).group(1).strip()
            item['ref'] = fix_refs(ref_re.findall(item['a']))
            item['a'] = item['a'].split(str('['))[0].strip()
            cq.append(item)
            continue
        else:
            print_error('tQ error in {0}: {1}'.format(first_line, line))
    return cq
Пример #4
0
    def load_pages(self, content_dir):

        toc_slugs = self.toc.all_slugs()
        for slug in toc_slugs:

            print('Processing {0}...'.format(slug), end=' ')

            file_name = os.path.join(content_dir, slug + '.md')
            with codecs.open(file_name, 'r', 'utf-8-sig') as in_file:
                content = in_file.read()

            article = TAArticle(content, slug)
            if not article.yaml:
                print_error('No yaml data found for ' + slug)

            self.articles.append(article)

            print('finished.')
Пример #5
0
def main(directory_to_check, versification):
    """

    :param str|unicode directory_to_check:
    :param str|unicode versification:
    """

    # get the versification data
    vrs = Bible.get_versification(versification)  # type: list<Book>

    # walk through the usfm files
    patterns = ['*.usfm', '*.sfm', '*.SFM']
    usfm_files = []
    for pattern in patterns:
        usfm_files.extend(glob(os.path.join(directory_to_check, pattern)))

    errors_found = False
    for usfm_file in usfm_files:

        # read the file
        with codecs.open(usfm_file, 'r', 'utf-8') as in_file:
            book_text = in_file.read()

        # get the book id
        book_search = id_re.search(book_text)
        if not book_search:
            print_error('Book id not found in {}'.format(usfm_file))
            sys.exit(1)

        book_id = book_search.group(1)

        print('Beginning {}...'.format(book_id), end=' ')

        # get book versification info
        book = next((b for b in vrs if b.book_id == book_id), None)
        if not book:
            print_error(
                'Book versification data was not found for "{}"'.format(
                    book_id))
            sys.exit(1)

        # remove \s5 lines
        book_text = s5_re.sub('', book_text)

        # get the usfm for the book
        book.set_usfm(book_text)

        # do basic checks
        book.verify_chapters_and_verses(True)
        if book.validation_errors:
            errors_found = True

        print('finished.')

    # stop if errors were found
    if errors_found:
        print_error(
            'These USFM errors must be corrected before publishing can continue.'
        )
        sys.exit(1)
Пример #6
0
    def check_value_is_valid_int(value_to_check, yaml_data):

        if value_to_check not in yaml_data:
            print_error('"' + value_to_check + '" data value for page is missing')
            return False

        if not yaml_data[value_to_check]:
            print_error('"' + value_to_check + '" data value for page is blank')
            return False

        data_value = yaml_data[value_to_check]

        if not isinstance(data_value, int):
            try:
                data_value = int(data_value)
            except:
                try:
                    data_value = int(float(data_value))
                except:
                    return False

        return isinstance(data_value, int)
Пример #7
0
def check_missing_pages(toc_obj, content_dir):

    toc_slugs = toc_obj.all_slugs()
    page_slugs = get_all_page_slugs(content_dir)
    not_in_pages = list(set(toc_slugs) - set(page_slugs))
    not_in_toc = list(set(page_slugs) - set(toc_slugs))

    if not_in_toc:
        print_warning('The following pages are in the content directory but not in the TOC:')
        for item in not_in_toc:
            print('- ' + item)
        print()
        print('If you continue these pages will NOT be included in the published product.')
        print()
        resp = prompt('Do you want to continue with the data as it is? [Y|n]: ')
        if resp != '' and resp[0:1].lower() != 'y':
            sys.exit(0)

    if not_in_pages:
        print_error('The following pages are in the TOC but were not found in the content directory:')
        for item in not_in_pages:
            print('- ' + item)
        print()
        sys.exit(1)
Пример #8
0
def main(directory_to_check, versification):
    """

    :param str|unicode directory_to_check:
    :param str|unicode versification:
    """

    # get the versification data
    vrs = Bible.get_versification(versification)  # type: list<Book>

    # walk through the usfm files
    patterns = ['*.usfm', '*.sfm', '*.SFM']
    usfm_files = []
    for pattern in patterns:
        usfm_files.extend(glob(os.path.join(directory_to_check, pattern)))

    errors_found = False
    for usfm_file in usfm_files:

        # read the file
        with codecs.open(usfm_file, 'r', 'utf-8') as in_file:
            book_text = in_file.read()

        # get the book id
        book_search = id_re.search(book_text)
        if not book_search:
            print_error('Book id not found in {}'.format(usfm_file))
            sys.exit(1)

        book_id = book_search.group(1)

        print('Beginning {}...'.format(book_id), end=' ')

        # get book versification info
        book = next((b for b in vrs if b.book_id == book_id), None)
        if not book:
            print_error('Book versification data was not found for "{}"'.format(book_id))
            sys.exit(1)

        # remove \s5 lines
        book_text = s5_re.sub('', book_text)

        # get the usfm for the book
        book.set_usfm(book_text)

        # do basic checks
        book.verify_chapters_and_verses(True)
        if book.validation_errors:
            errors_found = True

        print('finished.')

    # stop if errors were found
    if errors_found:
        print_error('These USFM errors must be corrected before publishing can continue.')
        sys.exit(1)
Пример #9
0
def export_to_api(lang, status, today, cur_json):
    global unfoldingWord_dir, lang_cat, github_org, pages

    print('Getting Github credentials...', end=' ')
    try:
        github_org = None
        if os.path.isfile('/root/.github_pass'):
            # noinspection PyTypeChecker
            pw = open('/root/.github_pass', 'r').read().strip()
            g_user = githubLogin('dsm-git', pw)
            github_org = getGithubOrg('unfoldingword', g_user)
        else:
            print('none found...', end=' ')
    except GithubException as e:
        print_error('Problem logging into Github: {0}'.format(e))
        sys.exit(1)
    print('finished.')

    print('Loading the uw catalog...', end=' ')
    uw_cat_path = os.path.join(unfoldingWord_dir, 'obs-catalog.json')
    uw_catalog = load_json_object(uw_cat_path, [])
    uw_cat_langs = [x['language'] for x in uw_catalog]
    print('finished')

    unfolding_word_lang_dir = os.path.join(unfoldingWord_dir, lang)
    if 'checking_level' in status and 'publish_date' in status:
        if status.checking_level in ['1', '2', '3']:

            front_json = OBS.get_front_matter(pages, lang, today)
            back_json = OBS.get_back_matter(pages, lang, today)

            print('Exporting {0}...'.format(lang), end=' ')
            export_unfolding_word(status, unfolding_word_lang_dir, cur_json,
                                  lang, github_org, front_json, back_json)
            if lang in uw_cat_langs:
                uw_catalog.pop(uw_cat_langs.index(lang))
                uw_cat_langs.pop(uw_cat_langs.index(lang))
            uw_catalog.append(lang_cat)

            uw_cat_json = json.dumps(uw_catalog,
                                     sort_keys=True,
                                     cls=OBSEncoder)
            write_file(uw_cat_path, uw_cat_json)

            # update uw_admin status page
            ObsPublishedLangs.update_page(ObsPublishedLangs.cat_url,
                                          ObsPublishedLangs.uw_stat_page)

            print('finished.')
        else:
            print_error('The `checking_level` is invalid.')
            sys.exit(1)
    else:
        print_error(
            'The status is missing `checking_level` or `publish_date`.')
        sys.exit(1)
def read_chunked_files(book, content_dir, metadata_obj):

    print('Reading chapter USFM files...', end=' ')
    for i in range(0, len(book.chapters) + 1):

        # get the directory for this chapter
        chapter_dir = os.path.join(content_dir, str(i).zfill(2))
        if not os.path.isdir(chapter_dir):
            print_error('Did not find directory for chapter {}.'.format(i))
            sys.exit(1)

        # directory 00 contains the translated book title
        if i == 0:
            file_name = os.path.join(chapter_dir, 'title.txt')
            if not os.path.isfile(file_name):
                print_error('Did not find file "{}".'.format(file_name))
                sys.exit(1)

            with codecs.open(file_name, 'r', 'utf-8-sig') as in_file:
                translated_name = in_file.read()

            header_usfm = Bible.get_header_text()
            header_usfm = header_usfm.replace('{BOOK_CODE}', book.book_id)
            header_usfm = header_usfm.replace('{BIBLE_NAME}', metadata_obj.name)
            header_usfm = header_usfm.replace('{BOOK_NAME_SHORT}', translated_name)
            header_usfm = header_usfm.replace('{BOOK_NAME_LONG}', translated_name)

            book.name = translated_name
            book.header_usfm = header_usfm

        else:
            # other directories will have the chunk files for the chapter
            chapter = next((c for c in book.chapters if c.number == i), None)  # type: Chapter

            chunk_list = [f for f in os.listdir(chapter_dir) if re.search(r'[0-1]?[0-9][0-9]\.txt$', f)]
            chunk_list.sort()
            for chunk_file in chunk_list:

                # skip the junk chunk in the last chapter
                if chunk_file == '00.txt' or chunk_file == '000.txt':
                    continue

                file_name = os.path.join(chapter_dir, chunk_file)
                if not os.path.isfile(file_name):
                    print_error('Did not find file "{}".'.format(file_name))
                    sys.exit(1)

                with codecs.open(file_name, 'r', 'utf-8-sig') as in_file:
                    chunk_usfm = in_file.read()

                chapter.usfm += reformat_usfm(remove_chapter_markers(chunk_usfm)) + "\n"

    book.build_usfm_from_chapters()
    print('finished.')
def export_to_api(lang, status, today, cur_json):
    global unfoldingWord_dir, lang_cat, github_org, pages

    print('Getting Github credentials...', end=' ')
    try:
        github_org = None
        if os.path.isfile('/root/.github_pass'):
            # noinspection PyTypeChecker
            pw = open('/root/.github_pass', 'r').read().strip()
            g_user = githubLogin('dsm-git', pw)
            github_org = getGithubOrg('unfoldingword', g_user)
        else:
            print('none found...', end=' ')
    except GithubException as e:
        print_error('Problem logging into Github: {0}'.format(e))
        sys.exit(1)
    print('finished.')

    print('Loading the uw catalog...', end=' ')
    uw_cat_path = os.path.join(unfoldingWord_dir, 'obs-catalog.json')
    uw_catalog = load_json_object(uw_cat_path, [])
    uw_cat_langs = [x['language'] for x in uw_catalog]
    print('finished')

    unfolding_word_lang_dir = os.path.join(unfoldingWord_dir, lang)
    if 'checking_level' in status and 'publish_date' in status:
        if status.checking_level in ['1', '2', '3']:

            front_json = OBS.get_front_matter(pages, lang, today)
            back_json = OBS.get_back_matter(pages, lang, today)

            print('Exporting {0}...'.format(lang), end=' ')
            export_unfolding_word(status, unfolding_word_lang_dir, cur_json,
                                  lang, github_org, front_json, back_json)
            if lang in uw_cat_langs:
                uw_catalog.pop(uw_cat_langs.index(lang))
                uw_cat_langs.pop(uw_cat_langs.index(lang))
            uw_catalog.append(lang_cat)

            uw_cat_json = json.dumps(uw_catalog, sort_keys=True, cls=OBSEncoder)
            write_file(uw_cat_path, uw_cat_json)

            # update uw_admin status page
            ObsPublishedLangs.update_page(ObsPublishedLangs.cat_url, ObsPublishedLangs.uw_stat_page)

            print('finished.')
        else:
            print_error('The `checking_level` is invalid.')
            sys.exit(1)
    else:
        print_error('The status is missing `checking_level` or `publish_date`.')
        sys.exit(1)
Пример #12
0
    def check_value_is_valid_string(value_to_check, yaml_data):

        if value_to_check not in yaml_data:
            print_error('"' + value_to_check + '" data value for page is missing')
            return False

        if not yaml_data[value_to_check]:
            print_error('"' + value_to_check + '" data value for page is blank')
            return False

        data_value = yaml_data[value_to_check]

        if not isinstance(data_value, str) and not isinstance(data_value, unicode):
            print_error('"' + value_to_check + '" data value for page is not a string')
            return False

        if not data_value.strip():
            print_error('"' + value_to_check + '" data value for page is blank')
            return False

        return True
Пример #13
0
    def get_yaml_data(self, raw_yaml_text):

        return_val = {}

        # convert windows line endings
        cleaned = raw_yaml_text.replace('\r\n', '\n')

        # replace curly quotes
        cleaned = cleaned.replace('“', '"').replace('”', '"')

        # split into individual values, removing empty lines
        parts = filter(bool, cleaned.split('\n'))

        # check each value
        for part in parts:

            # split into name and value
            pieces = part.split(':', 1)

            # must be 2 pieces
            if len(pieces) != 2:
                print_error('Bad yaml format => ' + part)
                return None

            # try to parse
            # noinspection PyBroadException
            try:
                parsed = yaml.load(part)

            except:
                print_error('Not able to parse yaml value => ' + part)
                return None

            if not isinstance(parsed, dict):
                print_error('Yaml parse did not return the expected type => ' + part)
                return None

            # add the successfully parsed value to the dictionary
            for key in parsed.keys():
                return_val[key] = parsed[key]

        if not self.check_yaml_values(return_val):
            return None

        return return_val
Пример #14
0
    def run(self):

        if 'git.door43.org' not in self.source_repo_url:
            print_warning(
                'Currently only git.door43.org repositories are supported.')
            sys.exit(0)

        try:
            # clean up the git repo url
            if self.source_repo_url[-4:] == '.git':
                self.source_repo_url = self.source_repo_url[:-4]

            if self.source_repo_url[-1:] == '/':
                self.source_repo_url = self.source_repo_url[:-1]

            # download the archive
            file_to_download = join_url_parts(self.source_repo_url,
                                              'archive/master.zip')
            repo_dir = self.source_repo_url.rpartition('/')[2]
            downloaded_file = os.path.join(self.temp_dir, repo_dir + '.zip')
            try:
                if not self.quiet:
                    print('Downloading {0}...'.format(file_to_download),
                          end=' ')
                if not os.path.isfile(downloaded_file):
                    download_file(file_to_download, downloaded_file)
            finally:
                if not self.quiet:
                    print('finished.')

            # unzip the archive
            try:
                if not self.quiet:
                    print('Unzipping...'.format(downloaded_file), end=' ')
                unzip(downloaded_file, self.temp_dir)
            finally:
                if not self.quiet:
                    print('finished.')

            # get the manifest
            try:
                if not self.quiet:
                    print('Reading the manifest...', end=' ')
                manifest = load_json_object(
                    os.path.join(self.temp_dir, 'manifest.json'))
            finally:
                if not self.quiet:
                    print('finished.')

            # create output directory
            make_dir(self.output_directory)

            # read the markdown files and output html files
            try:
                if not self.quiet:
                    print('Processing the OBS markdown files')
                files_to_process = []
                for i in range(1, 51):
                    files_to_process.append(str(i).zfill(2) + '.md')

                current_dir = os.path.dirname(inspect.stack()[0][1])
                with codecs.open(os.path.join(current_dir, 'template.html'),
                                 'r', 'utf-8-sig') as html_file:
                    html_template = html_file.read()

                for file_to_process in files_to_process:

                    # read the markdown file
                    file_name = os.path.join(self.temp_dir, repo_dir,
                                             'content', file_to_process)
                    with codecs.open(file_name, 'r', 'utf-8-sig') as md_file:
                        md = md_file.read()

                    html = markdown.markdown(md)
                    html = TransformOBS.dir_re.sub(r'\1\n' + html + r'\n\2',
                                                   html_template)
                    write_file(
                        os.path.join(self.output_directory,
                                     file_to_process.replace('.md', '.html')),
                        html)

            except IOError as ioe:
                print_error('{0}: {1}'.format(ioe.strerror, ioe.filename))
                self.errors.append(ioe)

            except Exception as e:
                print_error(e.message)
                self.errors.append(e)

            finally:
                if not self.quiet:
                    print('finished.')

        except Exception as e:
            print_error(e.message)
            self.errors.append(e)
Пример #15
0
def main(git_repo, tag, domain):

    global download_dir, out_template

    # clean up the git repo url
    if git_repo[-4:] == '.git':
        git_repo = git_repo[:-4]

    if git_repo[-1:] == '/':
        git_repo = git_repo[:-1]

    # initialize some variables
    today = ''.join(str(datetime.date.today()).rsplit('-')[0:3])
    download_dir = '/tmp/{0}'.format(git_repo.rpartition('/')[2])
    make_dir(download_dir)
    downloaded_file = '{0}/{1}.zip'.format(download_dir, git_repo.rpartition('/')[2])
    file_to_download = join_url_parts(git_repo, 'archive/' + tag + '.zip')
    books_published = {}
    metadata_obj = None
    usfm_dir = None

    # download the repository
    try:
        print('Downloading {0}...'.format(file_to_download), end=' ')
        if not os.path.isfile(downloaded_file):
            download_file(file_to_download, downloaded_file)
    finally:
        print('finished.')

    try:
        print('Unzipping...'.format(downloaded_file), end=' ')
        unzip(downloaded_file, download_dir)
    finally:
        print('finished.')

    # examine the repository
    for root, dirs, files in os.walk(download_dir):

        if 'meta.json' in files:
            # read the metadata
            try:
                print('Reading the metadata...', end=' ')
                metadata_obj = BibleMetaData(os.path.join(root, 'meta.json'))
            finally:
                print('finished.')

        if 'usfm' in dirs:
            usfm_dir = os.path.join(root, 'usfm')

        # if we have everything, exit the loop
        if usfm_dir and metadata_obj:
            break

    # check for valid repository structure
    if not metadata_obj:
        print_error('Did not find meta.json in {}'.format(git_repo))
        sys.exit(1)

    if not usfm_dir:
        print_error('Did not find the usfm directory in {}'.format(git_repo))
        sys.exit(1)

    # get the versification data
    vrs = Bible.get_versification(metadata_obj.versification)  # type: list<Book>
    out_dir = out_template.format(domain, metadata_obj.slug, metadata_obj.lang)

    # walk through the usfm files
    usfm_files = glob(os.path.join(usfm_dir, '*.usfm'))
    errors_found = False
    for usfm_file in usfm_files:

        # read the file
        with codecs.open(usfm_file, 'r', 'utf-8') as in_file:
            book_text = in_file.read()

        # get the book id
        book_search = id_re.search(book_text)
        if not book_search:
            print_error('Book id not found in {}'.format(usfm_file))
            sys.exit(1)

        book_id = book_search.group(1)

        print('Beginning {}...'.format(book_id), end=' ')

        # get book versification info
        book = next((b for b in vrs if b.book_id == book_id), None)
        if not book:
            print_error('Book versification data was not found for "{}"'.format(book_id))
            sys.exit(1)

        # remove \s5 lines
        book_text = s5_re.sub('', book_text)

        # get the usfm for the book
        book.set_usfm(book_text)

        # do basic checks
        book.verify_usfm_tags()
        book.verify_chapters_and_verses(True)
        if book.validation_errors:
            errors_found = True

        # get chunks for this book
        Bible.chunk_book(metadata_obj.versification, book)
        book.apply_chunks()

        # produces something like '01-GEN.usfm'
        book_file_name = '{0}-{1}.usfm'.format(str(book.number).zfill(2), book.book_id)
        print('Writing ' + book_file_name + '...', end=' ')
        write_file('{0}/{1}'.format(out_dir, book_file_name), book.usfm)

        meta = ['Bible: OT']
        if book.number > 39:
            meta = ['Bible: NT']
        books_published[book.book_id.lower()] = {'name': book.name,
                                                 'meta': meta,
                                                 'sort': str(book.number).zfill(2),
                                                 'desc': ''
                                                 }
        print('finished.')

    # stop if errors were found
    if errors_found:
        print_error('These USFM errors must be corrected before publishing can continue.')
        sys.exit(1)

    print('Writing status.json...', end=' ')
    status = {"slug": '{0}'.format(metadata_obj.slug.lower()),
              "name": metadata_obj.name,
              "lang": metadata_obj.lang,
              "date_modified": today,
              "books_published": books_published,
              "status": {"checking_entity": metadata_obj.checking_entity,
                         "checking_level": metadata_obj.checking_level,
                         "comments": metadata_obj.comments,
                         "contributors": metadata_obj.contributors,
                         "publish_date": today,
                         "source_text": metadata_obj.source_text,
                         "source_text_version": metadata_obj.source_text_version,
                         "version": metadata_obj.version
                         }
              }
    write_file('{0}/status.json'.format(out_dir), status, indent=2)
    print('finished.')

    print()
    print('Publishing to the API...')
    with api_publish(out_dir) as api:
        api.run()
    print('Finished publishing to the API.')

    # update the catalog
    print()
    print('Updating the catalogs...', end=' ')
    update_catalog()
    print('finished.')

    print_notice('Check {0} and do a git push'.format(out_dir))
Пример #16
0
    def append_error(self, message, prefix='** '):

        print_error(prefix + message)
        self.validation_errors.append(message)
Пример #17
0
    parser.add_argument('-u',
                        '--url',
                        dest='url',
                        default=False,
                        required=False,
                        help='Door43 page where the source can be found.')
    parser.add_argument('-p',
                        '--nopdf',
                        dest='nopdf',
                        action='store_true',
                        help='Do not produce a PDF.')

    args = parser.parse_args(sys.argv[1:])

    if not args.gitrepo and not args.url:
        print_error(
            'You must provide either --gitrepo or --url to this script.')
        sys.exit(0)

    try:
        # get the language data
        try:
            print('Downloading language data...', end=' ')
            langs = get_languages()
        finally:
            print('finished.')

        this_lang = next(l for l in langs if l['lc'] == args.lang)

        if not this_lang:
            print_error('Information for language "{0}" was not found.'.format(
                args.lang))
Пример #18
0
    def run(self):

        try:
            self.temp_dir = tempfile.mkdtemp(prefix='txOBS_')

            # clean up the git repo url
            if self.source_repo_url[-4:] == '.git':
                self.source_repo_url = self.source_repo_url[:-4]

            if self.source_repo_url[-1:] == '/':
                self.source_repo_url = self.source_repo_url[:-1]

            # download the archive
            file_to_download = join_url_parts(self.source_repo_url, 'archive/master.zip')
            repo_dir = self.source_repo_url.rpartition('/')[2]
            downloaded_file = os.path.join(self.temp_dir, repo_dir + '.zip')
            try:
                print('Downloading {0}...'.format(file_to_download), end=' ')
                if not os.path.isfile(downloaded_file):
                    download_file(file_to_download, downloaded_file)
            finally:
                print('finished.')

            # unzip the archive
            try:
                print('Unzipping...'.format(downloaded_file), end=' ')
                unzip(downloaded_file, self.temp_dir)
            finally:
                print('finished.')

            # get the manifest
            try:
                print('Reading the manifest...', end=' ')
                manifest = load_json_object(os.path.join(self.temp_dir, 'manifest.json'))
            finally:
                print('finished.')

            # create output directory
            make_dir(self.output_directory)

            # read the markdown files and output html files
            try:
                print('Processing the OBS markdown files')
                files_to_process = []
                for i in range(1, 51):
                    files_to_process.append(str(i).zfill(2) + '.md')

                current_dir = os.path.dirname(inspect.stack()[0][1])
                with codecs.open(os.path.join(current_dir, 'template.html'), 'r', 'utf-8-sig') as html_file:
                    html_template = html_file.read()

                for file_to_process in files_to_process:

                    # read the markdown file
                    file_name = os.path.join(self.temp_dir, repo_dir, 'content', file_to_process)
                    with codecs.open(file_name, 'r', 'utf-8-sig') as md_file:
                        md = md_file.read()

                    html = markdown.markdown(md)
                    html = TransformOBS.dir_re.sub(r'\1\n' + html + r'\n\2', html_template)
                    write_file(os.path.join(self.output_directory, file_to_process.replace('.md', '.html')), html)

            except IOError as ioe:
                print_error('{0}: {1}'.format(ioe.strerror, ioe.filename))
                self.errors.append(ioe)

            except Exception as e:
                print_error(e.message)
                self.errors.append(e)

            finally:
                print('finished.')

        except Exception as e:
            print_error(e.message)
            self.errors.append(e)
Пример #19
0
def import_obs(lang_data, git_repo, door43_url, no_pdf):
    global download_dir, root, api_dir

    lang_code = lang_data['lc']

    # pre-flight checklist
    link_source = '/var/www/vhosts/api.unfoldingword.org/httpdocs/obs/jpg/1/en'
    if not os.path.isdir(link_source):
        print_error('Image source directory not found: {0}.'.format(link_source))
        sys.exit(1)

    if git_repo[-1:] != '/':
        git_repo += '/'

    if no_pdf:
        tools_dir = None
    else:
        tools_dir = '/var/www/vhosts/door43.org/tools'
        if not os.path.isdir(tools_dir):
            tools_dir = os.path.expanduser('~/Projects/tools')

        # prompt if tools not found
        if not os.path.isdir(tools_dir):
            tools_dir = None
            print_notice('The tools directory was not found. The PDF cannot be generated.')
            resp = prompt('Do you want to continue without generating a PDF? [Y|n]: ')
            if resp != '' and resp != 'Y' and resp != 'y':
                sys.exit(0)

    if git_repo:
        if git_repo[-1:] == '/':
            git_repo = git_repo[:-1]

        download_dir = '/tmp/{0}'.format(git_repo.rpartition('/')[2])
        make_dir(download_dir)

        # make sure OBS is initialized on Dokuwiki
        test_dir = root.format(lang_code, '')
        if not os.path.isdir(test_dir):
            print_warning('It seems OBS has not been initialized on Door43.org for {0}'.format(lang_code))
            sys.exit(1)

    elif door43_url:
        print_error('URL not yet implemented.')
        return
    else:
        print_error('Source not provided.')
        return

    # get the source files from the git repository
    if 'github' in git_repo:
        # https://github.com/unfoldingWord/obs-ru
        # https://raw.githubusercontent.com/unfoldingWord/obs-ru/master/obs-ru.json
        raw_url = git_repo.replace('github.com', 'raw.githubusercontent.com')
    elif 'git.door43.org' in git_repo:
        raw_url = join_url_parts(git_repo, 'raw')
    else:
        # this is to keep IntelliJ happy, is should have been caught in sub main
        return

    # download needed files from the repository
    file_suffix = '-{0}.json'.format(lang_code.lower())
    files_to_download = [
        join_url_parts(raw_url, 'master/obs' + file_suffix),
        join_url_parts(raw_url, 'master/status' + file_suffix)
    ]

    for file_to_download in files_to_download:

        downloaded_file = os.path.join(download_dir, file_to_download.rpartition('/')[2])

        try:
            print('Downloading {0}...'.format(file_to_download), end=' ')
            download_file(file_to_download, downloaded_file)
        finally:
            print('finished.')

    # read the files from the git repository
    file_suffix = '-{0}.json'.format(lang_code.lower())
    obs_obj = None
    status_obj = None
    # front_matter_found = False
    # back_matter_found = False

    try:
        print('Examining the files...', end=' ')
        for root_path, dirs, files in os.walk(download_dir):

            if not len(files):
                continue

            for git_file in files:
                if git_file == 'obs' + file_suffix:
                    obs_obj = OBS(os.path.join(root_path, git_file))
                elif git_file == 'status' + file_suffix:
                    status_obj = OBSStatus(os.path.join(root_path, git_file))
                    # elif 'front-matter' in git_file:
                    #     front_matter_found = True
                    # elif 'back-matter' in git_file:
                    #     back_matter_found = True
    finally:
        print('finished.')

    # check data integrity
    if not obs_obj.verify_all():
        sys.exit(1)

    if not status_obj:
        print_error('The file "status{0}" was not found in the git repository.'.format(file_suffix))
        sys.exit(1)

    # create Dokuwiki pages
    print_ok('Begin: ', 'creating Dokuwiki pages.')
    for chapter in obs_obj.chapters:

        chapter_title = '====== {0} ======'.format(chapter['title'])
        chapter_ref = '//{0}//'.format(chapter['ref'])
        chapter_body = ''
        chapter_num = chapter['number'].zfill(2)

        for frame in chapter['frames']:
            chapter_body += '{{{{{0}?direct&}}}}\n\n{1}\n\n'.format(frame['img'], frame['text'])

        file_name = root.format(lang_code, chapter_num + '.txt')
        print('  Writing {0}'.format(file_name))
        with codecs.open(file_name, 'w', 'utf-8-sig') as out_file:
            out_file.write('{0}\n\n{1}{2}\n'.format(chapter_title, chapter_body, chapter_ref))

    print_ok('Finished: ', 'creating Dokuwiki pages.')

    # Create image symlinks on api.unfoldingword.org
    try:
        print('Creating symlink to images directory...', end=' ')
        link_name = '/var/www/vhosts/api.unfoldingword.org/httpdocs/obs/jpg/1/{0}'.format(lang_code.lower())
        if not os.path.isfile(link_name) and not os.path.isdir(link_name) and not os.path.islink(link_name):
            os.symlink(link_source, link_name)
    finally:
        print('finished.')

    # Create PDF via ConTeXt
    if not no_pdf and tools_dir and os.path.isdir(tools_dir):
        try:
            print_ok('Beginning: ', 'PDF generation.')
            script_file = os.path.join(tools_dir, 'obs/book/pdf_export.sh')
            out_dir = api_dir.format(lang_code)
            make_dir(out_dir)
            process = subprocess.Popen([script_file,
                                        '-l', lang_code,
                                        '-c', status_obj.checking_level,
                                        '-v', status_obj.version,
                                        '-o', out_dir],
                                       shell=True,
                                       stdout=subprocess.PIPE,
                                       stderr=subprocess.PIPE)

            # wait for the process to terminate
            out, err = process.communicate()
            exit_code = process.returncode
            out = out.strip().decode('utf-8')
            err = err.strip().decode('utf-8')

            # the error message may be in stdout
            if exit_code != 0:
                if not err:
                    err = out
                    out = None

            if err:
                print_error(err, 2)

            if out:
                print('  ' + out)

            print('  PDF subprocess finished with exit code {0}'.format(exit_code))

        finally:
            print_ok('Finished:', 'generating PDF.')
                        dest='nopdf',
                        action='store_true',
                        help='Do not produce a PDF.')

    args = parser.parse_args(sys.argv[1:])
    lang = args.lang
    uw_export = args.uwexport
    test_export = args.testexport
    no_pdf = args.nopdf

    print_ok('STARTING: ', 'importing OBS from Dokuwiki')

    # pre-flight checklist
    link_source = '/var/www/vhosts/api.unfoldingword.org/httpdocs/obs/jpg/1/en'
    if not os.path.isdir(link_source):
        print_error(
            'Image source directory not found: {0}.'.format(link_source))
        sys.exit(1)

    if no_pdf:
        tools_dir = None
    else:
        tools_dir = '/var/www/vhosts/door43.org/tools'
        if not os.path.isdir(tools_dir):
            tools_dir = os.path.expanduser('~/Projects/tools')

        # prompt if tools not found
        if not os.path.isdir(tools_dir):
            tools_dir = None
            print_notice(
                'The tools directory was not found. The PDF cannot be generated.'
            )
def rechunk_this_one(api_directory):
    global id_re, s5_re

    print_notice('Processing {}'.format(api_directory))

    # read the status.json file
    with codecs.open(os.path.join(api_directory, 'status.json'), 'r',
                     'utf-8-sig') as in_file:
        status = json.loads(in_file.read())

    # determine versification
    if status['lang'] == 'ru':
        versification = 'rsc'

    elif status['lang'] == 'hi' or status['lang'] == 'sr-Latn' or status[
            'lang'] == 'hu' or status['lang'] == 'ta':
        versification = 'ufw-odx'

    elif status['lang'] == 'bn':
        versification = 'ufw-bn'

    elif status['lang'] == 'ar':
        versification = 'avd'

    elif status['lang'] == 'kn':
        versification = 'ufw-rev'

    else:
        versification = 'ufw'

    versification_data = Bible.get_versification(
        versification)  # type: list<Book>

    # remove all .sig files
    for f in os.listdir(api_directory):
        if f.endswith('.sig'):
            os.remove(os.path.join(api_directory, f))

    # rechunk files in this directory
    usfm_files = glob(os.path.join(api_directory, '*.usfm'))
    errors_found = False
    for usfm_file in usfm_files:

        if usfm_file.endswith('LICENSE.usfm'):
            continue

        # read the file
        with codecs.open(usfm_file, 'r', 'utf-8') as in_file:
            book_text = in_file.read()

        # get the book id
        book_search = id_re.search(book_text)
        if not book_search:
            print_error('Book id not found in {}'.format(usfm_file))
            sys.exit(1)

        book_id = book_search.group(1)

        print('Beginning {}...'.format(book_id), end=' ')

        # get book versification info
        book = next((b for b in versification_data if b.book_id == book_id),
                    None)
        if not book:
            print_error(
                'Book versification data was not found for "{}"'.format(
                    book_id))
            sys.exit(1)

        # remove \s5 lines
        book_text = s5_re.sub('', book_text)

        # get the usfm for the book
        book.set_usfm(book_text)

        # do basic checks
        book.verify_chapters_and_verses(True)
        if book.validation_errors:
            errors_found = True

        # get chunks for this book
        Bible.chunk_book(versification, book)
        book.apply_chunks()

        # produces something like '01-GEN.usfm'
        book_file_name = '{0}-{1}.usfm'.format(
            str(book.number).zfill(2), book.book_id)
        print('Writing ' + book_file_name + '...', end=' ')
        write_file(usfm_file, book.usfm)

        print('finished.')

    if errors_found:
        print_error(
            'These USFM errors must be corrected before publishing can continue.'
        )
        sys.exit(1)

    # rebuild source for tS
    print()
    print('Publishing to the API...')
    with api_publish(api_directory) as api:
        api.run()
    print('Finished publishing to the API.')

    # update the catalog
    print()
    print('Updating the catalogs...', end=' ')
    update_catalog()
    print('finished.')
def main(git_repo, tag, domain):
    global download_dir, out_template

    # clean up the git repo url
    if git_repo[-4:] == '.git':
        git_repo = git_repo[:-4]

    if git_repo[-1:] == '/':
        git_repo = git_repo[:-1]

    # initialize some variables
    today = ''.join(str(datetime.date.today()).rsplit('-')[0:3])  # str(datetime.date.today())
    download_dir = '/tmp/{0}'.format(git_repo.rpartition('/')[2])
    make_dir(download_dir)
    downloaded_file = '{0}/{1}.zip'.format(download_dir, git_repo.rpartition('/')[2])
    file_to_download = join_url_parts(git_repo, 'archive/' + tag + '.zip')
    manifest = None
    metadata_obj = None
    content_dir = ''
    usfm_file = None

    # download the repository
    try:
        print('Downloading {0}...'.format(file_to_download), end=' ')
        if not os.path.isfile(downloaded_file):
            download_file(file_to_download, downloaded_file)
    finally:
        print('finished.')

    try:
        print('Unzipping...'.format(downloaded_file), end=' ')
        unzip(downloaded_file, download_dir)
    finally:
        print('finished.')

    # examine the repository
    for root, dirs, files in os.walk(download_dir):

        if 'manifest.json' in files:
            # read the manifest
            try:
                print('Reading the manifest...', end=' ')
                manifest = load_json_object(os.path.join(root, 'manifest.json'))
                content_dir = root

                # look for the usfm file for the whole book
                found_usfm = glob(os.path.join(content_dir, '*.usfm'))
                if len(found_usfm) == 1:
                    usfm_file = os.path.join(content_dir, found_usfm[0])
            finally:
                print('finished.')

        if 'meta.json' in files:
            # read the metadata
            try:
                print('Reading the metadata...', end=' ')
                metadata_obj = BibleMetaData(os.path.join(root, 'meta.json'))
            finally:
                print('finished.')

        # if we have everything, exit the loop
        if manifest and metadata_obj:
            break

    # check for valid repository structure
    if not manifest:
        print_error('Did not find manifest.json in {}'.format(git_repo))
        sys.exit(1)

    if not metadata_obj:
        print_error('Did not find meta.json in {}'.format(git_repo))
        sys.exit(1)

    # get the versification data
    print('Getting versification info...', end=' ')
    vrs = Bible.get_versification(metadata_obj.versification)  # type: list<Book>

    # get the book object for this repository
    book = next((b for b in vrs if b.book_id.lower() == manifest['project']['id']), None)  # type: Book
    if not book:
        print_error('Book versification data was not found for "{}"'.format(manifest['project']['id']))
        sys.exit(1)
    print('finished')

    if usfm_file:
        read_unified_file(book, usfm_file)

    else:
        read_chunked_files(book, content_dir, metadata_obj)

    # do basic checks
    print('Running USFM checks...', end=' ')
    book.verify_chapters_and_verses(True)
    if book.validation_errors:
        print_error('These USFM errors must be corrected before publishing can continue.')
        sys.exit(1)
    else:
        print('finished.')

    # insert paragraph markers
    print('Inserting paragraph markers...', end=' ')
    Bible.insert_paragraph_markers(book)
    print('finished.')

    # get chunks for this book
    print('Chunking the text...', end=' ')
    Bible.chunk_book(metadata_obj.versification, book)
    book.apply_chunks()
    print('finished.')

    # save the output
    out_dir = out_template.format(domain, metadata_obj.slug)

    # produces something like '01-GEN.usfm'
    book_file_name = '{0}-{1}.usfm'.format(str(book.number).zfill(2), book.book_id)
    print('Writing ' + book_file_name + '...', end=' ')
    write_file('{0}/{1}'.format(out_dir, book_file_name), book.usfm)
    print('finished.')

    # look for an existing status.json file
    print('Updating the status for {0}...'.format(metadata_obj.lang), end=' ')
    status_file = '{0}/status.json'.format(out_dir)
    if os.path.isfile(status_file):
        status = BibleStatus(status_file)
    else:
        status = BibleStatus()

    status.update_from_meta_data(metadata_obj)

    # add this book to the list of "books_published"
    status.add_book_published(book)

    # update the "date_modified"
    status.date_modified = today
    print('finished.')

    # save the status.json file
    print('Writing status.json...', end=' ')
    status_json = json.dumps(status, sort_keys=True, indent=2, cls=BibleEncoder)
    write_file(status_file, status_json)
    print('finished')

    # let the API know it is there
    print('Publishing to the API...')
    with api_publish(out_dir) as api:
        api.run()
    print('Finished publishing to the API.')

    # update the catalog
    print()
    print('Updating the catalogs...', end=' ')
    update_catalog()
    print('finished.')

    print_notice('Check {0} and do a git push'.format(out_dir))
Пример #23
0
def import_obs(lang_data, git_repo, door43_url, no_pdf):
    global download_dir, root, api_dir

    lang_code = lang_data['lc']

    # pre-flight checklist
    link_source = '/var/www/vhosts/api.unfoldingword.org/httpdocs/obs/jpg/1/en'
    if not os.path.isdir(link_source):
        print_error(
            'Image source directory not found: {0}.'.format(link_source))
        sys.exit(1)

    if git_repo[-1:] != '/':
        git_repo += '/'

    if no_pdf:
        tools_dir = None
    else:
        tools_dir = '/var/www/vhosts/door43.org/tools'
        if not os.path.isdir(tools_dir):
            tools_dir = os.path.expanduser('~/Projects/tools')

        # prompt if tools not found
        if not os.path.isdir(tools_dir):
            tools_dir = None
            print_notice(
                'The tools directory was not found. The PDF cannot be generated.'
            )
            resp = prompt(
                'Do you want to continue without generating a PDF? [Y|n]: ')
            if resp != '' and resp != 'Y' and resp != 'y':
                sys.exit(0)

    if git_repo:
        if git_repo[-1:] == '/':
            git_repo = git_repo[:-1]

        download_dir = '/tmp/{0}'.format(git_repo.rpartition('/')[2])
        make_dir(download_dir)

        # make sure OBS is initialized on Dokuwiki
        test_dir = root.format(lang_code, '')
        if not os.path.isdir(test_dir):
            print_warning(
                'It seems OBS has not been initialized on Door43.org for {0}'.
                format(lang_code))
            sys.exit(1)

    elif door43_url:
        print_error('URL not yet implemented.')
        return
    else:
        print_error('Source not provided.')
        return

    # get the source files from the git repository
    if 'github' in git_repo:
        # https://github.com/unfoldingWord/obs-ru
        # https://raw.githubusercontent.com/unfoldingWord/obs-ru/master/obs-ru.json
        raw_url = git_repo.replace('github.com', 'raw.githubusercontent.com')
    elif 'git.door43.org' in git_repo:
        raw_url = join_url_parts(git_repo, 'raw')
    else:
        # this is to keep IntelliJ happy, is should have been caught in sub main
        return

    # download needed files from the repository
    file_suffix = '-{0}.json'.format(lang_code.lower())
    files_to_download = [
        join_url_parts(raw_url, 'master/obs' + file_suffix),
        join_url_parts(raw_url, 'master/status' + file_suffix)
    ]

    for file_to_download in files_to_download:

        downloaded_file = os.path.join(download_dir,
                                       file_to_download.rpartition('/')[2])

        try:
            print('Downloading {0}...'.format(file_to_download), end=' ')
            download_file(file_to_download, downloaded_file)
        finally:
            print('finished.')

    # read the files from the git repository
    file_suffix = '-{0}.json'.format(lang_code.lower())
    obs_obj = None
    status_obj = None
    # front_matter_found = False
    # back_matter_found = False

    try:
        print('Examining the files...', end=' ')
        for root_path, dirs, files in os.walk(download_dir):

            if not len(files):
                continue

            for git_file in files:
                if git_file == 'obs' + file_suffix:
                    obs_obj = OBS(os.path.join(root_path, git_file))
                elif git_file == 'status' + file_suffix:
                    status_obj = OBSStatus(os.path.join(root_path, git_file))
                    # elif 'front-matter' in git_file:
                    #     front_matter_found = True
                    # elif 'back-matter' in git_file:
                    #     back_matter_found = True
    finally:
        print('finished.')

    # check data integrity
    if not obs_obj.verify_all():
        sys.exit(1)

    if not status_obj:
        print_error(
            'The file "status{0}" was not found in the git repository.'.format(
                file_suffix))
        sys.exit(1)

    # create Dokuwiki pages
    print_ok('Begin: ', 'creating Dokuwiki pages.')
    for chapter in obs_obj.chapters:

        chapter_title = '====== {0} ======'.format(chapter['title'])
        chapter_ref = '//{0}//'.format(chapter['ref'])
        chapter_body = ''
        chapter_num = chapter['number'].zfill(2)

        for frame in chapter['frames']:
            chapter_body += '{{{{{0}?direct&}}}}\n\n{1}\n\n'.format(
                frame['img'], frame['text'])

        file_name = root.format(lang_code, chapter_num + '.txt')
        print('  Writing {0}'.format(file_name))
        with codecs.open(file_name, 'w', 'utf-8-sig') as out_file:
            out_file.write('{0}\n\n{1}{2}\n'.format(chapter_title,
                                                    chapter_body, chapter_ref))

    print_ok('Finished: ', 'creating Dokuwiki pages.')

    # Create image symlinks on api.unfoldingword.org
    try:
        print('Creating symlink to images directory...', end=' ')
        link_name = '/var/www/vhosts/api.unfoldingword.org/httpdocs/obs/jpg/1/{0}'.format(
            lang_code.lower())
        if not os.path.isfile(link_name) and not os.path.isdir(
                link_name) and not os.path.islink(link_name):
            os.symlink(link_source, link_name)
    finally:
        print('finished.')

    # Create PDF via ConTeXt
    if not no_pdf and tools_dir and os.path.isdir(tools_dir):
        try:
            print_ok('Beginning: ', 'PDF generation.')
            script_file = os.path.join(tools_dir, 'obs/book/pdf_export.sh')
            out_dir = api_dir.format(lang_code)
            make_dir(out_dir)
            process = subprocess.Popen([
                script_file, '-l', lang_code, '-c', status_obj.checking_level,
                '-v', status_obj.version, '-o', out_dir
            ],
                                       shell=True,
                                       stdout=subprocess.PIPE,
                                       stderr=subprocess.PIPE)

            # wait for the process to terminate
            out, err = process.communicate()
            exit_code = process.returncode
            out = out.strip().decode('utf-8')
            err = err.strip().decode('utf-8')

            # the error message may be in stdout
            if exit_code != 0:
                if not err:
                    err = out
                    out = None

            if err:
                print_error(err, 2)

            if out:
                print('  ' + out)

            print('  PDF subprocess finished with exit code {0}'.format(
                exit_code))

        finally:
            print_ok('Finished:', 'generating PDF.')
Пример #24
0
def main(git_repo, tag, no_pdf):
    global download_dir

    # clean up the git repo url
    if git_repo[-4:] == '.git':
        git_repo = git_repo[:-4]

    if git_repo[-1:] == '/':
        git_repo = git_repo[:-1]

    # initialize some variables
    today = ''.join(str(datetime.date.today()).rsplit(
        str('-'))[0:3])  # str(datetime.date.today())
    download_dir = '/tmp/{0}'.format(git_repo.rpartition('/')[2])
    make_dir(download_dir)
    downloaded_file = '{0}/{1}.zip'.format(download_dir,
                                           git_repo.rpartition('/')[2])
    file_to_download = join_url_parts(git_repo, 'archive/{0}.zip'.format(tag))
    manifest = None
    status = None  # type: OBSStatus
    content_dir = None

    # download the repository
    try:
        print('Downloading {0}...'.format(file_to_download), end=' ')
        if not os.path.isfile(downloaded_file):
            download_file(file_to_download, downloaded_file)
    finally:
        print('finished.')

    try:
        print('Unzipping...'.format(downloaded_file), end=' ')
        unzip(downloaded_file, download_dir)
    finally:
        print('finished.')

    # examine the repository
    for root, dirs, files in os.walk(download_dir):

        if 'manifest.json' in files:
            # read the manifest
            try:
                print('Reading the manifest...', end=' ')
                content_dir = root
                manifest = load_json_object(os.path.join(
                    root, 'manifest.json'))
                status = OBSStatus.from_manifest(manifest)
            finally:
                print('finished.')

        if 'content' in dirs:
            content_dir = os.path.join(root, 'content')

        # if we have everything, exit the loop
        if content_dir and manifest and status:
            break

    # check for valid repository structure
    if not manifest:
        print_error('Did not find manifest.json in {}'.format(git_repo))
        sys.exit(1)

    print('Initializing OBS object...', end=' ')
    lang = manifest['language']['slug']
    obs_obj = OBS()
    obs_obj.date_modified = today
    obs_obj.direction = manifest['language']['dir']
    obs_obj.language = lang
    print('finished')

    obs_obj.chapters = load_obs_chapters(content_dir)
    obs_obj.chapters.sort(key=lambda c: int(c['number']))

    if not obs_obj.verify_all():
        print_error('Quality check did not pass.')
        sys.exit(1)

    print('Loading languages...', end=' ')
    lang_dict = OBS.load_lang_strings()
    print('finished.')

    print('Loading the catalog...', end=' ')
    export_dir = '/var/www/vhosts/door43.org/httpdocs/exports'

    cat_path = os.path.join(export_dir, 'obs-catalog.json')
    catalog = load_json_object(cat_path, [])
    print('finished')

    print('Getting already published languages...', end=' ')
    json_lang_file_path = os.path.join(export_dir, lang, 'obs',
                                       'obs-{0}.json'.format(lang))

    if lang not in lang_dict:
        print("Configuration for language {0} missing.".format(lang))
        sys.exit(1)
    print('finished.')

    updated = update_language_catalog(lang, obs_obj.direction, status, today,
                                      lang_dict, catalog)

    print('Writing the OBS file to the exports directory...', end=' ')
    cur_json = json.dumps(obs_obj, sort_keys=True, cls=OBSEncoder)

    if updated:
        ([x for x in catalog
          if x['language'] == lang][0]['date_modified']) = today
        # noinspection PyTypeChecker
        write_file(json_lang_file_path.replace('.txt', '.json'), cur_json)
    print('finished.')

    export_to_api(lang, status, today, cur_json)

    cat_json = json.dumps(catalog, sort_keys=True, cls=OBSEncoder)
    write_file(cat_path, cat_json)

    # update the catalog
    print_ok('STARTING: ', 'updating the catalogs.')
    update_catalog()
    print_ok('FINISHED: ', 'updating the catalogs.')

    if no_pdf:
        return

    create_pdf(lang, status.checking_level, status.version)
def main(git_repo, tag, no_pdf):
    global download_dir

    # clean up the git repo url
    if git_repo[-4:] == '.git':
        git_repo = git_repo[:-4]

    if git_repo[-1:] == '/':
        git_repo = git_repo[:-1]

    # initialize some variables
    today = ''.join(str(datetime.date.today()).rsplit('-')[0:3])  # str(datetime.date.today())
    download_dir = '/tmp/{0}'.format(git_repo.rpartition('/')[2])
    make_dir(download_dir)
    downloaded_file = '{0}/{1}.zip'.format(download_dir, git_repo.rpartition('/')[2])
    file_to_download = join_url_parts(git_repo, 'archive/' + tag + '.zip')
    manifest = None
    status = None  # type: OBSStatus
    content_dir = None

    # download the repository
    try:
        print('Downloading {0}...'.format(file_to_download), end=' ')
        if not os.path.isfile(downloaded_file):
            download_file(file_to_download, downloaded_file)
    finally:
        print('finished.')

    try:
        print('Unzipping...'.format(downloaded_file), end=' ')
        unzip(downloaded_file, download_dir)
    finally:
        print('finished.')

    # examine the repository
    for root, dirs, files in os.walk(download_dir):

        if 'manifest.json' in files:
            # read the manifest
            try:
                print('Reading the manifest...', end=' ')
                content_dir = root
                manifest = load_json_object(os.path.join(root, 'manifest.json'))
            finally:
                print('finished.')

        if 'status.json' in files:
            # read the meta data
            try:
                print('Reading the status...', end=' ')
                content_dir = root
                status = OBSStatus(os.path.join(root, 'status.json'))
            finally:
                print('finished.')

        # if we have everything, exit the loop
        if content_dir and manifest and status:
            break

    # check for valid repository structure
    if not manifest:
        print_error('Did not find manifest.json in {}'.format(git_repo))
        sys.exit(1)

    if not status:
        print_error('Did not find status.json in {}'.format(git_repo))
        sys.exit(1)

    print('Initializing OBS object...', end=' ')
    lang = manifest['target_language']['id']
    obs_obj = OBS()
    obs_obj.date_modified = today
    obs_obj.direction = manifest['target_language']['direction']
    obs_obj.language = lang
    print('finished')

    obs_obj.chapters = load_obs_chapters(content_dir)
    obs_obj.chapters.sort(key=lambda c: c['number'])

    if not obs_obj.verify_all():
        print_error('Quality check did not pass.')
        sys.exit(1)

    print('Loading languages...', end=' ')
    lang_dict = OBS.load_lang_strings()
    print('finished.')

    print('Loading the catalog...', end=' ')
    export_dir = '/var/www/vhosts/door43.org/httpdocs/exports'
    # uw_cat_path = os.path.join(unfoldingWord_dir, 'obs-catalog.json')
    # uw_catalog = load_json_object(uw_cat_path, [])
    # uw_cat_langs = [x['language'] for x in uw_catalog]
    cat_path = os.path.join(export_dir, 'obs-catalog.json')
    catalog = load_json_object(cat_path, [])
    print('finished')

    print('Getting already published languages...', end=' ')
    json_lang_file_path = os.path.join(export_dir, lang, 'obs', 'obs-{0}.json'.format(lang))
    # prev_json_lang = load_json_object(json_lang_file_path, {})

    if lang not in lang_dict:
        print("Configuration for language {0} missing.".format(lang))
        sys.exit(1)
    print('finished.')

    updated = update_language_catalog(lang, obs_obj.direction, status, today, lang_dict, catalog)

    print('Writing the OBS file to the exports directory...', end=' ')
    cur_json = json.dumps(obs_obj, sort_keys=True, cls=OBSEncoder)

    if updated:
        ([x for x in catalog if x['language'] == lang][0]['date_modified']) = today
        write_file(json_lang_file_path.replace('.txt', '.json'), cur_json)
    print('finished.')

    export_to_api(lang, status, today, cur_json)

    cat_json = json.dumps(catalog, sort_keys=True, cls=OBSEncoder)
    write_file(cat_path, cat_json)

    # update the catalog
    print_ok('STARTING: ', 'updating the catalogs.')
    update_catalog()
    print_ok('FINISHED: ', 'updating the catalogs.')

    if no_pdf:
        return

    create_pdf(lang, status.checking_level, status.version)
Пример #26
0
def main(git_repo, tag):
    global download_dir

    # clean up the git repo url
    if git_repo[-4:] == '.git':
        git_repo = git_repo[:-4]

    if git_repo[-1:] == '/':
        git_repo = git_repo[:-1]

    # initialize some variables
    download_dir = '/tmp/{0}'.format(git_repo.rpartition('/')[2])
    make_dir(download_dir)
    downloaded_file = '{0}/{1}.zip'.format(download_dir, git_repo.rpartition('/')[2])
    file_to_download = join_url_parts(git_repo, 'archive/' + tag + '.zip')
    metadata_obj = None
    content_dir = None
    toc_obj = None

    # download the repository
    try:
        print('Downloading {0}...'.format(file_to_download), end=' ')
        if not os.path.isfile(downloaded_file):
            download_file(file_to_download, downloaded_file)
    finally:
        print('finished.')

    try:
        print('Unzipping...'.format(downloaded_file), end=' ')
        unzip(downloaded_file, download_dir)
    finally:
        print('finished.')

    # examine the repository
    for root, dirs, files in os.walk(download_dir):

        if 'meta.yaml' in files:
            # read the metadata
            try:
                print('Reading the metadata...', end=' ')
                metadata_obj = TAMetaData(os.path.join(root, 'meta.yaml'))
            finally:
                print('finished.')

        if 'toc.yaml' in files:
            # read the table of contents
            try:
                print('Reading the toc...', end=' ')
                toc_obj = TATableOfContents(os.path.join(root, 'toc.yaml'))
            finally:
                print('finished.')

        if 'content' in dirs:
            content_dir = os.path.join(root, 'content')

        # if we have everything, exit the loop
        if content_dir and metadata_obj and toc_obj:
            break

    # check for valid repository structure
    if not metadata_obj:
        print_error('Did not find meta.yaml in {}'.format(git_repo))
        sys.exit(1)

    if not content_dir:
        print_error('Did not find the content directory in {}'.format(git_repo))
        sys.exit(1)

    if not toc_obj:
        print_error('Did not find toc.yaml in {}'.format(git_repo))
        sys.exit(1)

    # check for missing pages
    check_missing_pages(toc_obj, content_dir)

    # generate the pages
    print('Generating the manual...', end=' ')
    manual = TAManual(metadata_obj, toc_obj)
    manual.load_pages(content_dir)
    print('finished.')

    file_name = os.path.join(get_output_dir(), '{0}_{1}.json'.format(manual.meta.manual, manual.meta.volume))
    print('saving to {0} ...'.format(file_name), end=' ')
    content = json.dumps(manual, sort_keys=True, indent=2, cls=TAEncoder)
    write_file(file_name, content)
    print('finished.')
def rechunk_this_one(api_directory):
    global id_re, s5_re

    print_notice('Processing {}'.format(api_directory))

    # read the status.json file
    with codecs.open(os.path.join(api_directory, 'status.json'), 'r', 'utf-8-sig') as in_file:
        status = json.loads(in_file.read())

    # determine versification
    if status['lang'] == 'ru':
        versification = 'rsc'

    elif status['lang'] == 'hi' or status['lang'] == 'sr-Latn' or status['lang'] == 'hu' or status['lang'] == 'ta':
        versification = 'ufw-odx'

    elif status['lang'] == 'bn':
        versification = 'ufw-bn'

    elif status['lang'] == 'ar':
        versification = 'avd'

    elif status['lang'] == 'kn':
        versification = 'ufw-rev'

    else:
        versification = 'ufw'

    versification_data = Bible.get_versification(versification)  # type: list<Book>

    # remove all .sig files
    for f in os.listdir(api_directory):
        if f.endswith('.sig'):
            os.remove(os.path.join(api_directory, f))

    # rechunk files in this directory
    usfm_files = glob(os.path.join(api_directory, '*.usfm'))
    errors_found = False
    for usfm_file in usfm_files:

        if usfm_file.endswith('LICENSE.usfm'):
            continue

        # read the file
        with codecs.open(usfm_file, 'r', 'utf-8') as in_file:
            book_text = in_file.read()

        # get the book id
        book_search = id_re.search(book_text)
        if not book_search:
            print_error('Book id not found in {}'.format(usfm_file))
            sys.exit(1)

        book_id = book_search.group(1)

        print('Beginning {}...'.format(book_id), end=' ')

        # get book versification info
        book = next((b for b in versification_data if b.book_id == book_id), None)
        if not book:
            print_error('Book versification data was not found for "{}"'.format(book_id))
            sys.exit(1)

        # remove \s5 lines
        book_text = s5_re.sub('', book_text)

        # get the usfm for the book
        book.set_usfm(book_text)

        # do basic checks
        book.verify_chapters_and_verses(True)
        if book.validation_errors:
            errors_found = True

        # get chunks for this book
        Bible.chunk_book(versification, book)
        book.apply_chunks()

        # produces something like '01-GEN.usfm'
        book_file_name = '{0}-{1}.usfm'.format(str(book.number).zfill(2), book.book_id)
        print('Writing ' + book_file_name + '...', end=' ')
        write_file(usfm_file, book.usfm)

        print('finished.')

    if errors_found:
        print_error('These USFM errors must be corrected before publishing can continue.')
        sys.exit(1)

    # rebuild source for tS
    print()
    print('Publishing to the API...')
    with api_publish(api_directory) as api:
        api.run()
    print('Finished publishing to the API.')

    # update the catalog
    print()
    print('Updating the catalogs...', end=' ')
    update_catalog()
    print('finished.')
    parser.add_argument('-t', '--testexport', dest="testexport", default=False,
                        action='store_true', help="Test export to unfoldingWord.")
    parser.add_argument('-p', '--nopdf', dest='nopdf', action='store_true', help='Do not produce a PDF.')

    args = parser.parse_args(sys.argv[1:])
    lang = args.lang
    uw_export = args.uwexport
    test_export = args.testexport
    no_pdf = args.nopdf

    print_ok('STARTING: ', 'importing OBS from Dokuwiki')

    # pre-flight checklist
    link_source = '/var/www/vhosts/api.unfoldingword.org/httpdocs/obs/jpg/1/en'
    if not os.path.isdir(link_source):
        print_error('Image source directory not found: {0}.'.format(link_source))
        sys.exit(1)

    if no_pdf:
        tools_dir = None
    else:
        tools_dir = '/var/www/vhosts/door43.org/tools'
        if not os.path.isdir(tools_dir):
            tools_dir = os.path.expanduser('~/Projects/tools')

        # prompt if tools not found
        if not os.path.isdir(tools_dir):
            tools_dir = None
            print_notice('The tools directory was not found. The PDF cannot be generated.')
            resp = prompt('Do you want to continue without generating a PDF? [Y|n]: ')
            if resp != '' and resp != 'Y' and resp != 'y':
Пример #29
0
def main(git_repo, tag):
    global download_dir

    # clean up the git repo url
    if git_repo[-4:] == '.git':
        git_repo = git_repo[:-4]

    if git_repo[-1:] == '/':
        git_repo = git_repo[:-1]

    # initialize some variables
    download_dir = '/tmp/{0}'.format(git_repo.rpartition('/')[2])
    make_dir(download_dir)
    downloaded_file = '{0}/{1}.zip'.format(download_dir,
                                           git_repo.rpartition('/')[2])
    file_to_download = join_url_parts(git_repo, 'archive/' + tag + '.zip')
    metadata_obj = None
    content_dir = None
    toc_obj = None

    # download the repository
    try:
        print('Downloading {0}...'.format(file_to_download), end=' ')
        if not os.path.isfile(downloaded_file):
            download_file(file_to_download, downloaded_file)
    finally:
        print('finished.')

    try:
        print('Unzipping...'.format(downloaded_file), end=' ')
        unzip(downloaded_file, download_dir)
    finally:
        print('finished.')

    # examine the repository
    for root, dirs, files in os.walk(download_dir):

        if 'meta.yaml' in files:
            # read the metadata
            try:
                print('Reading the metadata...', end=' ')
                metadata_obj = TAMetaData(os.path.join(root, 'meta.yaml'))
            finally:
                print('finished.')

        if 'toc.yaml' in files:
            # read the table of contents
            try:
                print('Reading the toc...', end=' ')
                toc_obj = TATableOfContents(os.path.join(root, 'toc.yaml'))
            finally:
                print('finished.')

        if 'content' in dirs:
            content_dir = os.path.join(root, 'content')

        # if we have everything, exit the loop
        if content_dir and metadata_obj and toc_obj:
            break

    # check for valid repository structure
    if not metadata_obj:
        print_error('Did not find meta.yaml in {}'.format(git_repo))
        sys.exit(1)

    if not content_dir:
        print_error(
            'Did not find the content directory in {}'.format(git_repo))
        sys.exit(1)

    if not toc_obj:
        print_error('Did not find toc.yaml in {}'.format(git_repo))
        sys.exit(1)

    # check for missing pages
    check_missing_pages(toc_obj, content_dir)

    # generate the pages
    print('Generating the manual...', end=' ')
    manual = TAManual(metadata_obj, toc_obj)
    manual.load_pages(content_dir)
    print('finished.')

    file_name = os.path.join(
        get_output_dir(), '{0}_{1}.json'.format(manual.meta.manual,
                                                manual.meta.volume))
    print('saving to {0} ...'.format(file_name), end=' ')
    content = json.dumps(manual, sort_keys=True, indent=2, cls=TAEncoder)
    write_file(file_name, content)
    print('finished.')
Пример #30
0
if __name__ == '__main__':
    print()
    parser = argparse.ArgumentParser(description=__doc__,
                                     formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('-l', '--lang', dest='lang', default=False,
                        required=True, help='Language code of resource.')
    parser.add_argument('-r', '--gitrepo', dest='gitrepo', default=False,
                        required=False, help='Git repository where the source can be found.')
    parser.add_argument('-u', '--url', dest='url', default=False,
                        required=False, help='Door43 page where the source can be found.')
    parser.add_argument('-p', '--nopdf', dest='nopdf', action='store_true', help='Do not produce a PDF.')

    args = parser.parse_args(sys.argv[1:])

    if not args.gitrepo and not args.url:
        print_error('You must provide either --gitrepo or --url to this script.')
        sys.exit(0)

    try:
        # get the language data
        try:
            print('Downloading language data...', end=' ')
            langs = get_languages()
        finally:
            print('finished.')

        this_lang = next(l for l in langs if l['lc'] == args.lang)

        if not this_lang:
            print_error('Information for language "{0}" was not found.'.format(args.lang))
            sys.exit(1)