def mock_s3_tn_project(self, part): zip_file = os.path.join(self.resources_dir, 'converted_projects', 'en_tn_converted.zip') out_dir = os.path.join(self.temp_dir, 'en_tn_converted') unzip(zip_file, out_dir) src_dir = os.path.join(out_dir, 'en_tn_converted') self.project_files = [ f for f in os.listdir(src_dir) if os.path.isfile(os.path.join(src_dir, f)) ] self.project_key = 'u/door43/en_tn/12345678' build_log = file_utils.load_json_object( os.path.join(src_dir, 'build_log.json')) build_log['part'] = part file_utils.write_file(os.path.join(src_dir, 'build_log.json'), build_log) AppSettings.cdn_s3_handler().upload_file( os.path.join(src_dir, 'build_log.json'), '{0}/{1}/build_log.json'.format(self.project_key, part)) AppSettings.cdn_s3_handler().upload_file( os.path.join(src_dir, 'index.json'), '{0}/{1}/index.json'.format(self.project_key, part)) AppSettings.cdn_s3_handler().upload_file( os.path.join(src_dir, 'build_log.json'), '{0}/{1}/finished'.format(self.project_key, part)) AppSettings.cdn_s3_handler().upload_file( os.path.join(src_dir, '01-GEN.html'), '{0}/{1}/01-GEN.html'.format(self.project_key, part)) AppSettings.cdn_s3_handler().upload_file( os.path.join(src_dir, 'project.json'), 'u/door43/en_tq/project.json') AppSettings.door43_s3_handler().upload_file( os.path.join(self.resources_dir, 'templates', 'project-page.html'), 'templates/project-page.html')
def test_convert_only_jas(self): """Runs the converter and verifies the output.""" # test with the English OBS zip_file = os.path.join(self.resources_dir, 'eight_bible_books.zip') # zip_file = self.make_duplicate_zip_that_can_be_deleted(zip_file) # out_zip_file = tempfile.NamedTemporaryFile(suffix='.zip', dir=self.temp_dir, delete=False).name self.in_dir = tempfile.mkdtemp(prefix='udb_in_', dir=self.temp_dir) unzip(zip_file, self.in_dir) source_url = 'http://test.com/preconvert/22f3d09f7a.zip?convert_only=60-JAS.usfm' with closing(Usfm2HtmlConverter('Bible', self.in_dir)) as tx: # tx.input_zip_file = zip_file results = tx.run() # verify the output # self.assertTrue(os.path.isfile(out_zip_file), "There was no output zip file produced.") self.assertIsNotNone(results) # self.out_dir = tempfile.mkdtemp(prefix='udb_out_', dir=self.temp_dir) # unzip(out_zip_file, self.out_dir) # files_to_verify = ['60-JAS.html'] # for file_to_verify in files_to_verify: # file_name = os.path.join(self.out_dir, file_to_verify) # self.assertTrue(os.path.isfile(file_name), 'UDB HTML file not found: {0}'.format(file_name)) # files_to_not_verify = ['61-1PE.html', '62-2PE.html', '63-1JN.html', '64-2JN.html', '65-3JN.html', # '66-JUD.html', '67-REV.html'] # for file_to_verify in files_to_not_verify: # file_name = os.path.join(self.out_dir, file_to_verify) # self.assertFalse(os.path.isfile(file_name), 'UDB HTML file not found: {0}'.format(file_name)) # self.assertEqual(tx.source, source_url.split('?')[0]) self.assertTrue(isinstance(results,dict)) print("results2", results) self.assertTrue(results['success'])
def test_run(self): """Runs the converter and verifies the output.""" # test with the English tN zip_file = os.path.join(self.resources_dir, 'en_tn.tsv.zip') # zip_file = self.make_duplicate_zip_that_can_be_deleted(zip_file) # out_zip_file = tempfile.NamedTemporaryFile(prefix='en_tn_tsv_', suffix='.zip', delete=False).name self.in_dir = tempfile.mkdtemp(prefix='tn_in_', dir=self.temp_dir) unzip(zip_file, self.in_dir) with closing(Tsv2HtmlConverter('Translation_Notes', self.in_dir)) as tx: # tx.input_zip_file = zip_file results = tx.run() # verify the output # self.assertTrue(os.path.isfile(out_zip_file), "There was no output zip file produced.") # self.out_dir = tempfile.mkdtemp(prefix='tX_test_tn_tsv_') # unzip(out_zip_file, self.out_dir) # remove(out_zip_file) # # print(f"Got in {self.out_dir}: {os.listdir(self.out_dir)}") # files_to_verify = [] # # for i in range(1, 51): # # files_to_verify.append(f'en_tn_{str(i).zfill(2)}-{XXX}'.html') # for folder in BOOK_NUMBERS: # book = f'{BOOK_NUMBERS[folder]}-{folder.upper()}' # filename = f'en_tn_{book}.html' # files_to_verify.append(filename) # for file_to_verify in files_to_verify: # file_name = os.path.join(self.out_dir, file_to_verify) # self.assertTrue(os.path.isfile(file_name), f'tN HTML file not found: {file_name}') self.assertTrue(isinstance(results, dict)) self.assertTrue(results['success'])
def test_multiple_projects(self): """ Populates the ResourceContainer object and verifies the output.""" # test with the English OBS zip_file = os.path.join(self.resources_dir, 'en-ta-multiple-projects.zip') self.out_dir = tempfile.mkdtemp(prefix='Door43_test_repo_') unzip(zip_file, self.out_dir) repo_dir = os.path.join(self.out_dir, 'en_ta') rc = RC(directory=repo_dir) rc.as_dict() yaml = load_yaml_object(os.path.join(repo_dir, 'manifest.yaml')) self.assertEqual(rc.resource.identifier, yaml['dublin_core']['identifier']) self.assertEqual(rc.resource.type, yaml['dublin_core']['type']) self.assertEqual(rc.resource.format, yaml['dublin_core']['format']) self.assertEqual(rc.resource.file_ext, 'md') self.assertEqual(rc.resource.conformsto, yaml['dublin_core']['conformsto']) self.assertEqual(rc.resource.modified, yaml['dublin_core']['modified']) self.assertEqual(len(rc.project_ids), 4) self.assertEqual(rc.project_count, 4) chapters = rc.project('checking').chapters() self.assertEqual(len(chapters), 44) chunks = rc.project('checking').chunks('level1') self.assertEqual(chunks, ['01.md', 'sub-title.md', 'title.md']) self.assertTrue('acceptable' in rc.project('checking').config()) self.assertTrue('title' in rc.project('checking').toc()) self.assertTrue( rc.project('checking').toc()['title'], 'Table of Contents')
def test_ceb_psa_text_ulb_L3(self): """ Populates the ResourceContainer object and verifies the output.""" # test with the English OBS zip_file = os.path.join(self.resources_dir, 'ceb_psa_text_ulb_L3.zip') self.out_dir = tempfile.mkdtemp(prefix='Door43_test_repo_') unzip(zip_file, self.out_dir) repo_dir = os.path.join(self.out_dir, 'ceb_psa_text_ulb_l3') rc = RC(directory=repo_dir) rc.as_dict() json = load_json_object(os.path.join(repo_dir, 'manifest.json')) self.assertEqual(rc.resource.identifier, json['resource']['id']) self.assertEqual(rc.resource.type, 'book') self.assertEqual(rc.resource.format, 'text/{0}'.format(json['format'])) self.assertEqual(rc.resource.file_ext, json['format']) self.assertEqual(rc.resource.conformsto, 'pre-rc') self.assertEqual(rc.resource.modified, datetime.utcnow().strftime('%Y-%m-%d')) chapters = rc.projects[0].chapters() idx = 1 for chapter in chapters: if chapter.isnumeric(): self.assertEqual(int(chapter), idx) idx += 1 self.assertEqual(len(chapters), 151) chunks = rc.projects[0].chunks('01') self.assertEqual(len(chunks), 5)
def test_run(self): """Runs the converter and verifies the output.""" # test with the English OBS zip_file = os.path.join(self.resources_dir, 'en-obs.zip') # zip_file = self.make_duplicate_zip_that_can_be_deleted(zip_file) # out_zip_file = tempfile.NamedTemporaryFile(prefix='en-obs', suffix='.zip', delete=False).name self.in_dir = tempfile.mkdtemp(prefix='en_obs_in_', dir=self.temp_dir) unzip(zip_file, self.in_dir) with closing(Md2HtmlConverter('Open_Bible_Stories', self.in_dir)) as tx: # tx.input_zip_file = zip_file results = tx.run() # verify the output # # self.assertTrue(os.path.isfile(out_zip_file), "There was no output zip file produced.") # self.out_dir = tempfile.mkdtemp(prefix='tX_test_obs_') # unzip(out_zip_file, self.out_dir) # remove(out_zip_file) # files_to_verify = [] # for i in range(1, 51): # files_to_verify.append(str(i).zfill(2) + '.html') # for file_to_verify in files_to_verify: # file_name = os.path.join(self.out_dir, file_to_verify) # self.assertTrue(os.path.isfile(file_name), 'OBS HTML file not found: {0}'.format(file_name)) self.assertTrue(isinstance(results,dict)) self.assertTrue(results['success'])
def main(date_today, tag, version): global download_dir repo = 'https://git.door43.org/Door43/en-tq' download_dir = tempfile.mkdtemp(prefix='tempTQ_') download_url = join_url_parts(repo, 'archive', '{0}.zip'.format(tag)) downloaded_file = os.path.join(download_dir, 'tQ.zip') # download the repository try: print('Downloading {0}...'.format(download_url), end=' ') download_file(download_url, downloaded_file) finally: print('finished.') try: print('Unzipping...'.format(downloaded_file), end=' ') unzip(downloaded_file, download_dir) finally: print('finished.') # examine the repository source_root = os.path.join(download_dir, 'en-tq', 'content') books = [x for x in os.listdir(source_root) if os.path.isdir(os.path.join(source_root, x))] for book in books: print('Processing {}.'.format(book)) book_dir = os.path.join(source_root, book) api_path = os.path.join(api_v2, book, 'en') # noinspection PyUnresolvedReferences book_questions = [] # type: list[dict] for entry in os.listdir(book_dir): file_name = os.path.join(book_dir, entry) # we are only processing files if not os.path.isfile(file_name): continue # we are only processing markdown files if entry[-3:] != '.md': continue book_questions.append(get_cq(file_name)) # Check to see if there are published questions in this book pub_check = [x['cq'] for x in book_questions if len(x['cq']) > 0] if len(pub_check) == 0: print('No published questions for {0}'.format(book)) continue book_questions.sort(key=lambda y: y['id']) book_questions.append({'date_modified': date_today, 'version': version}) write_file('{0}/questions.json'.format(api_path), book_questions, indent=2) print() print('Updating the catalogs...', end=' ') update_catalog() print('finished.')
def unzip_converted_files(self, converted_zip_filepath:str) -> str: AppSettings.logger.debug(f"ClientConverterCallback.unzip_converted_files({converted_zip_filepath})…") unzip_dirpath = tempfile.mkdtemp(prefix='unzip_', dir=self.temp_dir) try: AppSettings.logger.debug(f"Unzipping {converted_zip_filepath} …") unzip(converted_zip_filepath, unzip_dirpath) finally: AppSettings.logger.debug("Unzip finished.") return unzip_dirpath
def test_lint_warnings(self, mock_invoke): obs_zip_file = os.path.join(self.resources_dir, 'obs_linter', 'en-obs.zip') unzip(obs_zip_file, self.temp_dir) source_dir = os.path.join(self.temp_dir, 'en-obs') mock_invoke.return_value = {} expected_warnings = True linter = ObsLinter(repo_subject='Open_Bible_Stories', source_dir=source_dir) linter.run() self.verify_results(expected_warnings, linter)
def doTransformTw(self, file_name): zip_file_path = os.path.join(self.resources_dir, file_name) # zip_file_path = self.make_duplicate_zip_that_can_be_deleted(zip_file_path) # self.out_zip_file = tempfile.NamedTemporaryFile(prefix='en_tw', suffix='.zip', delete=False).name self.in_dir = tempfile.mkdtemp(prefix='udb_in_', dir=self.temp_dir) unzip(zip_file_path, self.in_dir) self.return_val = None with closing(Md2HtmlConverter('Translation_Words', self.in_dir)) as tx: # tx.input_zip_file = zip_file_path self.return_val = tx.run() return tx
def doTransformTn(self, file_name, part=None): zip_file_path = os.path.join(self.resources_dir, file_name) # zip_file_path = self.make_duplicate_zip_that_can_be_deleted(zip_file_path) # self.out_zip_file = tempfile.NamedTemporaryFile(prefix='en_tq', suffix='.zip', delete=False).name self.in_dir = tempfile.mkdtemp(prefix='udb_in_', dir=self.temp_dir) unzip(zip_file_path, self.in_dir) self.return_val = None # source = '' if not part else 'https://door43.org/dummy?convert_only={0}'.format(part) with closing(Md2HtmlConverter('Translation_Notes', self.in_dir)) as tx: # tx.input_zip_file = zip_file_path self.return_val = tx.run() return tx
def main(date_today, tag, version): """ :param str|unicode date_today: :param str|unicode tag: :param str|unicode version: :return: """ global download_dir, tw_aliases repo = 'https://git.door43.org/Door43/en-tw' download_dir = tempfile.mkdtemp(prefix='tempTW_') download_url = join_url_parts(repo, 'archive', '{0}.zip'.format(tag)) downloaded_file = os.path.join(download_dir, 'tW.zip') # download the repository try: print('Downloading {0}...'.format(download_url), end=' ') download_file(download_url, downloaded_file) finally: print('finished.') try: print('Unzipping...'.format(downloaded_file), end=' ') unzip(downloaded_file, download_dir) finally: print('finished.') # examine the repository tw_list = [] for root, dirs, files in os.walk( os.path.join(download_dir, 'en-tw', 'content')): for f in files: file_name = os.path.join(root, f) tw = get_tw(file_name) if tw: tw_list.append(tw) for i in tw_list: # type: dict if i['id'] in tw_aliases: i['aliases'] = [x for x in tw_aliases[i['id']] if x != i['term']] tw_list.sort(key=lambda y: len(y['term']), reverse=True) tw_list.append({'date_modified': date_today, 'version': version}) api_path = os.path.join(api_v2, 'bible', 'en') write_file('{0}/terms.json'.format(api_path), tw_list, indent=2) print() print('Updating the catalogs...', end=' ') update_catalog() print('finished.')
def extractFiles(cls, file_name, repo_name): file_path = os.path.join(TestTqPreprocessor.resources_dir, file_name) # 1) unzip the repo files temp_dir = tempfile.mkdtemp(prefix='Door43_test_repo_') unzip(file_path, temp_dir) repo_dir = os.path.join(temp_dir, repo_name) if not os.path.isdir(repo_dir): repo_dir = file_path # 2) Get the resource container rc = RC(repo_dir) return rc, repo_dir, temp_dir
def test_unzip(self): tmp_dir = tempfile.mkdtemp() zip_file = tmp_dir + "/foo.zip" _, tmp_file = tempfile.mkstemp() with open(tmp_file, "w") as tmpf: tmpf.write("hello world") with zipfile.ZipFile(zip_file, "w") as zf: zf.write(tmp_file, os.path.basename(tmp_file)) file_utils.unzip(zip_file, tmp_dir) with open(os.path.join(tmp_dir, os.path.basename(tmp_file))) as outf: self.assertEqual(outf.read(), "hello world")
def test_unzip(self): self.tmp_dir = tempfile.mkdtemp(prefix='Door43_test_file_utils_') zip_file = os.path.join(self.tmp_dir, 'foo.zip') _, self.tmp_file = tempfile.mkstemp(prefix='Door43_test_') with open(self.tmp_file, "w") as tmpf: tmpf.write("hello world") with zipfile.ZipFile(zip_file, "w") as zf: zf.write(self.tmp_file, os.path.basename(self.tmp_file)) file_utils.unzip(zip_file, self.tmp_dir) with open(os.path.join(self.tmp_dir, os.path.basename(self.tmp_file))) as outf: self.assertEqual(outf.read(), "hello world")
def test_en_obs_manifest_yaml(self): """ Populates the ResourceContainer object and verifies the output.""" # test with the English OBS zip_file = os.path.join(self.resources_dir, 'en-obs-manifest-yaml.zip') self.out_dir = tempfile.mkdtemp(prefix='Door43_test_repo_') unzip(zip_file, self.out_dir) repo_dir = os.path.join(self.out_dir, 'en_obs') rc = RC(directory=repo_dir, repo_name='en_obs') rc_dic = rc.as_dict() yaml = load_yaml_object(os.path.join(repo_dir, 'manifest.yaml')) self.assertDictEqual(yaml, rc_dic) chapters = rc.projects[0].chapters() self.assertEqual(len(chapters), 2) chunks = rc.project().chunks('front') self.assertEqual(chunks, ['intro.md', 'title.md'])
def download_repo(commit_url, repo_dir): repo_zip_url = commit_url.replace('commit', 'archive') + '.zip' repo_zip_file = os.path.join(tempfile.gettempdir(), repo_zip_url.rpartition('/')[2]) try: print('Downloading {0}...'.format(repo_zip_url)) if not os.path.isfile(repo_zip_file): download_file(repo_zip_url, repo_zip_file) finally: print('finished.') try: print('Unzipping {0}...'.format(repo_zip_file)) unzip(repo_zip_file, repo_dir) finally: print('finished.')
def download_source_file(source_url, destination_folder): """ Downloads the specified source file and unzips it if necessary. :param str source_url: The URL of the file to download :param str destination_folder: The directory where the downloaded file should be unzipped :return: None """ AppSettings.logger.debug( f"download_source_file( {source_url}, {destination_folder} )") source_filepath = os.path.join(destination_folder, source_url.rpartition(os.path.sep)[2]) AppSettings.logger.debug(f"source_filepath: {source_filepath}") try: AppSettings.logger.info(f"Downloading {source_url} …") # if the file already exists, remove it, we want a fresh copy if os.path.isfile(source_filepath): os.remove(source_filepath) download_file(source_url, source_filepath) finally: AppSettings.logger.debug("Downloading finished.") if source_url.lower().endswith('.zip'): try: AppSettings.logger.debug(f"Unzipping {source_filepath} …") # TODO: This is unsafe if the zipfile comes from an untrusted source unzip(source_filepath, destination_folder) finally: AppSettings.logger.debug("Unzipping finished.") # clean up the downloaded zip file if os.path.isfile(source_filepath): os.remove(source_filepath) str_filelist = str(os.listdir(destination_folder)) str_filelist_adjusted = str_filelist if len(str_filelist)<1500 \ else f'{str_filelist[:1000]} …… {str_filelist[-500:]}' AppSettings.logger.debug( f"Destination folder '{destination_folder}' now has: {str_filelist_adjusted}" )
def test_bible_no_manifest(self): """ Populates the ResourceContainer object and verifies the output.""" # test with the English OBS zip_file = os.path.join(self.resources_dir, 'bible-no-manifest.zip') self.out_dir = tempfile.mkdtemp(prefix='Door43_test_repo_') unzip(zip_file, self.out_dir) repo_dir = os.path.join(self.out_dir, 'en_ulb') rc = RC(directory=repo_dir) rc.as_dict() self.assertEqual(rc.resource.identifier, 'en_ulb') # RJH: was 'ulb' self.assertEqual(rc.resource.type, 'bundle') self.assertEqual(rc.resource.format, 'text/usfm') self.assertEqual(rc.resource.file_ext, 'usfm') self.assertEqual(rc.resource.conformsto, 'pre-rc') self.assertEqual(rc.resource.modified, datetime.utcnow().strftime('%Y-%m-%d')) chapters = rc.project().chapters() self.assertEqual(len(chapters), 0) self.assertEqual(len(rc.project().usfm_files()), 8)
def mock_s3_obs_project(self): zip_file = os.path.join(self.resources_dir, 'converted_projects', 'en-obs-complete.zip') out_dir = os.path.join(self.temp_dir, 'en-obs-complete') unzip(zip_file, out_dir) project_dir = os.path.join(out_dir, 'door43', 'en-obs', '12345678') self.project_files = [ f for f in os.listdir(project_dir) if os.path.isfile(os.path.join(project_dir, f)) ] self.project_key = 'u/door43/en-obs/12345678' for filename in self.project_files: AppSettings.cdn_s3_handler().upload_file( os.path.join(project_dir, filename), '{0}/{1}'.format(self.project_key, filename)) AppSettings.cdn_s3_handler().upload_file( os.path.join(out_dir, 'door43', 'en-obs', 'project.json'), 'u/door43/en-obs/project.json') AppSettings.door43_s3_handler().upload_file( os.path.join(self.resources_dir, 'templates', 'project-page.html'), 'templates/project-page.html')
def test_bible_from_tx_pre_rc(self): """ Populates the ResourceContainer object and verifies the output.""" # test with the English OBS zip_file = os.path.join(self.resources_dir, 'id_mat_text_ulb-ts.zip') self.out_dir = tempfile.mkdtemp(prefix='Door43_test_repo_') unzip(zip_file, self.out_dir) repo_dir = os.path.join(self.out_dir, 'id_mat_text_ulb-ts') rc = RC(directory=repo_dir) rc.as_dict() json = load_json_object(os.path.join(repo_dir, 'manifest.json')) self.assertEqual(rc.resource.identifier, json['resource']['id']) self.assertEqual(rc.resource.type, 'book') self.assertEqual(rc.resource.format, 'text/{0}'.format(json['format'])) self.assertEqual(rc.resource.file_ext, json['format']) self.assertEqual(rc.resource.conformsto, 'pre-rc') self.assertEqual(rc.resource.modified, datetime.utcnow().strftime('%Y-%m-%d')) chapters = rc.projects[0].chapters() self.assertEqual(len(chapters), 29) chunks = rc.projects[0].chunks('01') self.assertEqual(len(chunks), 11)
def test_en_obs_package_json(self): """ Populates the ResourceContainer object and verifies the output.""" # test with the English OBS zip_file = os.path.join(self.resources_dir, 'en-obs-package-json.zip') self.out_dir = tempfile.mkdtemp(prefix='Door43_test_repo_') unzip(zip_file, self.out_dir) repo_dir = os.path.join(self.out_dir, 'en-obs') rc = RC(directory=repo_dir) rc.as_dict() package_json = load_json_object(os.path.join(repo_dir, 'package.json')) self.assertEqual(rc.resource.identifier, package_json['resource']['slug']) self.assertEqual(rc.resource.type, 'book') self.assertEqual(rc.resource.format, package_json['content_mime_type']) self.assertEqual(rc.resource.file_ext, 'md') self.assertEqual(rc.resource.conformsto, 'pre-rc') self.assertEqual(rc.resource.issued, package_json['resource']['status']['pub_date']) chapters = rc.projects[0].chapters() self.assertEqual(len(chapters), 2) chunks = rc.project().chunks('_back') self.assertEqual(chunks, ['back-matter.md'])
def mock_s3_bible_project(self, test_file_name, project_key, multi_part=False): converted_proj_dir = os.path.join(self.resources_dir, 'converted_projects') test_file_base = test_file_name.split('.zip')[0] zip_file = os.path.join(converted_proj_dir, test_file_name) out_dir = os.path.join(self.temp_dir, test_file_base) unzip(zip_file, out_dir) project_dir = os.path.join(out_dir, test_file_base) + os.path.sep self.project_files = file_utils.get_files(out_dir) self.project_key = project_key for filename in self.project_files: sub_path = filename.split(project_dir)[1].replace( os.path.sep, '/') # Make sure it is a bucket path AppSettings.cdn_s3_handler().upload_file( filename, '{0}/{1}'.format(project_key, sub_path)) if multi_part: # copy files from cdn to door43 base_name = os.path.basename(filename) if '.html' in base_name: with open(filename, 'r') as f: soup = BeautifulSoup(f, 'html.parser') # add nav tag new_tag = soup.new_tag('div', id='right-sidebar') soup.body.append(new_tag) html = str(soup) file_utils.write_file( filename, html.encode('ascii', 'xmlcharrefreplace')) AppSettings.door43_s3_handler().upload_file( filename, '{0}/{1}'.format(project_key, base_name)) # u, user, repo = project_key AppSettings.door43_s3_handler().upload_file( os.path.join(self.resources_dir, 'templates', 'project-page.html'), 'templates/project-page.html')
def test_matt_complete_with_backslash(self): """ Runs the converter and verifies the output """ zip_file = os.path.join(self.resources_dir, 'kpb_mat_text_udb.zip') # zip_file = self.make_duplicate_zip_that_can_be_deleted(zip_file) # out_zip_file = tempfile.NamedTemporaryFile(suffix='.zip', dir=self.temp_dir, delete=False).name self.in_dir = tempfile.mkdtemp(prefix='udb_in_', dir=self.temp_dir) unzip(zip_file, self.in_dir) with closing(Usfm2HtmlConverter('Bible', self.in_dir)) as tx: # tx.input_zip_file = zip_file results = tx.run() # verify the output # self.assertTrue(os.path.isfile(out_zip_file), "There was no output zip file produced.") self.assertIsNotNone(results) # self.out_dir = tempfile.mkdtemp(prefix='udb_out_', dir=self.temp_dir) # unzip(out_zip_file, self.out_dir) # files_to_verify = ['41-MAT.html'] # self.verify_files(files_to_verify) self.assertTrue(isinstance(results,dict)) print("results6", results) self.assertTrue(results['success'])
def verifyTransform(self, tx, missing_chapters=None): if not missing_chapters: missing_chapters = [] self.assertTrue(os.path.isfile(self.out_zip_file), "There was no output zip file produced.") self.assertIsNotNone(self.return_val, "There was no return value.") self.out_dir = tempfile.mkdtemp(prefix='tX_test_obs_') unzip(self.out_zip_file, self.out_dir) remove_file(self.out_zip_file) files_to_verify = [] files_missing = [] for i in range(1, 51): file_name = str(i).zfill(2) + '.html' if not i in missing_chapters: files_to_verify.append(file_name) else: files_missing.append(file_name) for file_to_verify in files_to_verify: file_path = os.path.join(self.out_dir, file_to_verify) contents = self.getContents(file_path) self.assertIsNotNone(contents, 'OBS HTML body contents not found: {0}'.format(os.path.basename(file_path))) for file_to_verify in files_missing: file_path = os.path.join(self.out_dir, file_to_verify) contents = self.getContents(file_path) self.assertIsNone(contents, 'OBS HTML body contents present, but should not be: {0}'.format(os.path.basename(file_path))) self.assertEqual(self.return_val['success'], self.expected_success, "Mismatch in for success boolean") self.assertEqual(len(self.return_val['info']) == 0, self.expected_info_empty, "Mismatch in expected info empty") for warning in self.return_val['warnings']: AppSettings.logger.debug("Warning: " + warning) for error in self.return_val['errors']: AppSettings.logger.debug("Error: " + error) self.assertEqual(len(self.return_val['warnings']), self.expected_warnings, "Mismatch in expected warnings") self.assertEqual(len(self.return_val['errors']), self.expected_errors, "Mismatch in expected errors")
def main(resource, lang, slug, name, checking, contrib, ver, check_level, comments, source): global downloaded_file, unzipped_dir, out_template today = ''.join(str(datetime.date.today()).rsplit('-')[0:3]) downloaded_file = '/tmp/{0}'.format(resource.rpartition('/')[2]) unzipped_dir = '/tmp/{0}'.format(resource.rpartition('/')[2].strip('.zip')) out_dir = out_template.format(slug, lang) if not os.path.isfile(downloaded_file): download_file(resource, downloaded_file) unzip(downloaded_file, unzipped_dir) books_published = {} there_were_errors = False for root, dirs, files in os.walk(unzipped_dir): # only usfm files files = [f for f in files if f[-3:].lower() == 'sfm'] if not len(files): continue # there are usfm files, which book is this? test_dir = root.rpartition('/')[2] book = Book.create_book(test_dir) # type: Book if book: book_text = '' files.sort() for usfm_file in files: with codecs.open(os.path.join(root, usfm_file), 'r', 'utf-8') as in_file: book_text += in_file.read() + '\n' book.set_usfm(book_text) book.clean_usfm() # do basic checks book.verify_usfm_tags() book.verify_chapters_and_verses() if len(book.validation_errors) > 0: there_were_errors = True if there_were_errors: continue # get chunks for this book book.apply_chunks() # produces something like '01-GEN.usfm' book_file_name = '{0}-{1}.usfm'.format( str(book.number).zfill(2), book.book_id) print('Writing ' + book_file_name) write_file('{0}/{1}'.format(out_dir, book_file_name), book.usfm) meta = ['Bible: OT'] if book.number > 39: meta = ['Bible: NT'] books_published[book.book_id.lower()] = { 'name': book.name, 'meta': meta, 'sort': str(book.number).zfill(2), 'desc': '' } if there_were_errors: print_warning('There are errors you need to fix before continuing.') exit() source_ver = ver if '.' in ver: source_ver = ver.split('.')[0] status = { "slug": '{0}-{1}'.format(slug.lower(), lang), "name": name, "lang": lang, "date_modified": today, "books_published": books_published, "status": { "checking_entity": checking, "checking_level": check_level, "comments": comments, "contributors": contrib, "publish_date": today, "source_text": source, "source_text_version": source_ver, "version": ver } } write_file('{0}/status.json'.format(out_dir), status) print('Publishing to the API...') with api_publish(out_dir) as api: api.run() print('Finished publishing to the API.') # update the catalog print() print('Updating the catalogs...', end=' ') update_catalog() print('finished.') print('Check {0} and do a git push'.format(out_dir))
def main(git_repo, tag, domain): global download_dir, out_template # clean up the git repo url if git_repo[-4:] == '.git': git_repo = git_repo[:-4] if git_repo[-1:] == '/': git_repo = git_repo[:-1] # initialize some variables today = ''.join(str(datetime.date.today()).rsplit('-')[0:3]) # str(datetime.date.today()) download_dir = '/tmp/{0}'.format(git_repo.rpartition('/')[2]) make_dir(download_dir) downloaded_file = '{0}/{1}.zip'.format(download_dir, git_repo.rpartition('/')[2]) file_to_download = join_url_parts(git_repo, 'archive/' + tag + '.zip') manifest = None metadata_obj = None content_dir = '' usfm_file = None # download the repository try: print('Downloading {0}...'.format(file_to_download), end=' ') if not os.path.isfile(downloaded_file): download_file(file_to_download, downloaded_file) finally: print('finished.') try: print('Unzipping...'.format(downloaded_file), end=' ') unzip(downloaded_file, download_dir) finally: print('finished.') # examine the repository for root, dirs, files in os.walk(download_dir): if 'manifest.json' in files: # read the manifest try: print('Reading the manifest...', end=' ') manifest = load_json_object(os.path.join(root, 'manifest.json')) content_dir = root # look for the usfm file for the whole book found_usfm = glob(os.path.join(content_dir, '*.usfm')) if len(found_usfm) == 1: usfm_file = os.path.join(content_dir, found_usfm[0]) finally: print('finished.') if 'meta.json' in files: # read the metadata try: print('Reading the metadata...', end=' ') metadata_obj = BibleMetaData(os.path.join(root, 'meta.json')) finally: print('finished.') # if we have everything, exit the loop if manifest and metadata_obj: break # check for valid repository structure if not manifest: print_error('Did not find manifest.json in {}'.format(git_repo)) sys.exit(1) if not metadata_obj: print_error('Did not find meta.json in {}'.format(git_repo)) sys.exit(1) # get the versification data print('Getting versification info...', end=' ') vrs = Bible.get_versification(metadata_obj.versification) # type: list<Book> # get the book object for this repository book = next((b for b in vrs if b.book_id.lower() == manifest['project']['id']), None) # type: Book if not book: print_error('Book versification data was not found for "{}"'.format(manifest['project']['id'])) sys.exit(1) print('finished') if usfm_file: read_unified_file(book, usfm_file) else: read_chunked_files(book, content_dir, metadata_obj) # do basic checks print('Running USFM checks...', end=' ') book.verify_chapters_and_verses(True) if book.validation_errors: print_error('These USFM errors must be corrected before publishing can continue.') sys.exit(1) else: print('finished.') # insert paragraph markers print('Inserting paragraph markers...', end=' ') Bible.insert_paragraph_markers(book) print('finished.') # get chunks for this book print('Chunking the text...', end=' ') Bible.chunk_book(metadata_obj.versification, book) book.apply_chunks() print('finished.') # save the output out_dir = out_template.format(domain, metadata_obj.slug) # produces something like '01-GEN.usfm' book_file_name = '{0}-{1}.usfm'.format(str(book.number).zfill(2), book.book_id) print('Writing ' + book_file_name + '...', end=' ') write_file('{0}/{1}'.format(out_dir, book_file_name), book.usfm) print('finished.') # look for an existing status.json file print('Updating the status for {0}...'.format(metadata_obj.lang), end=' ') status_file = '{0}/status.json'.format(out_dir) if os.path.isfile(status_file): status = BibleStatus(status_file) else: status = BibleStatus() status.update_from_meta_data(metadata_obj) # add this book to the list of "books_published" status.add_book_published(book) # update the "date_modified" status.date_modified = today print('finished.') # save the status.json file print('Writing status.json...', end=' ') status_json = json.dumps(status, sort_keys=True, indent=2, cls=BibleEncoder) write_file(status_file, status_json) print('finished') # let the API know it is there print('Publishing to the API...') with api_publish(out_dir) as api: api.run() print('Finished publishing to the API.') # update the catalog print() print('Updating the catalogs...', end=' ') update_catalog() print('finished.') print_notice('Check {0} and do a git push'.format(out_dir))
def main(resource, lang, slug, name, checking, contrib, ver, check_level, comments, source): global downloaded_file, unzipped_dir, out_template today = ''.join(str(datetime.date.today()).rsplit('-')[0:3]) downloaded_file = '/tmp/{0}'.format(resource.rpartition('/')[2]) unzipped_dir = '/tmp/{0}'.format(resource.rpartition('/')[2].strip('.zip')) out_dir = out_template.format(slug, lang) if not os.path.isfile(downloaded_file): download_file(resource, downloaded_file) unzip(downloaded_file, unzipped_dir) books_published = {} there_were_errors = False for root, dirs, files in os.walk(unzipped_dir): # only usfm files files = [f for f in files if f[-3:].lower() == 'sfm'] if not len(files): continue # there are usfm files, which book is this? test_dir = root.rpartition('/')[2] book = Book.create_book(test_dir) # type: Book if book: book_text = '' files.sort() for usfm_file in files: with codecs.open(os.path.join(root, usfm_file), 'r', 'utf-8') as in_file: book_text += in_file.read() + '\n' book.set_usfm(book_text) book.clean_usfm() # do basic checks book.verify_usfm_tags() book.verify_chapters_and_verses() if len(book.validation_errors) > 0: there_were_errors = True if there_were_errors: continue # get chunks for this book book.apply_chunks() # produces something like '01-GEN.usfm' book_file_name = '{0}-{1}.usfm'.format(str(book.number).zfill(2), book.book_id) print('Writing ' + book_file_name) write_file('{0}/{1}'.format(out_dir, book_file_name), book.usfm) meta = ['Bible: OT'] if book.number > 39: meta = ['Bible: NT'] books_published[book.book_id.lower()] = {'name': book.name, 'meta': meta, 'sort': str(book.number).zfill(2), 'desc': '' } if there_were_errors: print_warning('There are errors you need to fix before continuing.') exit() source_ver = ver if '.' in ver: source_ver = ver.split('.')[0] status = {"slug": '{0}-{1}'.format(slug.lower(), lang), "name": name, "lang": lang, "date_modified": today, "books_published": books_published, "status": {"checking_entity": checking, "checking_level": check_level, "comments": comments, "contributors": contrib, "publish_date": today, "source_text": source, "source_text_version": source_ver, "version": ver } } write_file('{0}/status.json'.format(out_dir), status) print('Publishing to the API...') with api_publish(out_dir) as api: api.run() print('Finished publishing to the API.') # update the catalog print() print('Updating the catalogs...', end=' ') update_catalog() print('finished.') print('Check {0} and do a git push'.format(out_dir))
def run(self): # download the archive file_to_download = self.source_url filename = self.source_url.rpartition('/')[2] downloaded_file = os.path.join(self.download_dir, filename) self.log_message('Downloading {0}...'.format(file_to_download)) if not os.path.isfile(downloaded_file): try: download_file(file_to_download, downloaded_file) finally: if not os.path.isfile(downloaded_file): raise Exception("Failed to download {0}".format(file_to_download)) else: self.log_message('Download successful.') # unzip the archive self.log_message('Unzipping {0}...'.format(downloaded_file)) unzip(downloaded_file, self.files_dir) self.log_message('Unzip successful.') # create output directory make_dir(self.output_dir) # read the markdown files and output html files self.log_message('Processing the OBS markdown files') files = sorted(glob(os.path.join(self.files_dir, '*'))) current_dir = os.path.dirname(os.path.realpath(__file__)) with open(os.path.join(current_dir, 'obs-template.html')) as template_file: html_template = string.Template(template_file.read()) complete_html = '' for filename in files: if filename.endswith('.md'): # read the markdown file with codecs.open(filename, 'r', 'utf-8-sig') as md_file: md = md_file.read() html = markdown.markdown(md) complete_html += html html = html_template.safe_substitute(content=html) html_filename = os.path.splitext(os.path.basename(filename))[0] + ".html" output_file = os.path.join(self.output_dir, html_filename) write_file(output_file, html) self.log_message('Converted {0} to {1}.'.format(os.path.basename(filename), os.path.basename(html_filename))) else: try: output_file = os.path.join(self.output_dir, filename[len(self.files_dir)+1:]) if not os.path.exists(output_file): if not os.path.exists(os.path.dirname(output_file)): os.makedirs(os.path.dirname(output_file)) copyfile(filename, output_file) except Exception: pass # Do the OBS inspection inspector = OBSInspection(self.output_dir) try: inspector.run() except Exception as e: self.warning_message('Failed to run OBS inspector: {0}'.format(e.message)) for warning in inspector.warnings: self.warning_message(warning) for error in inspector.errors: self.error_message(error) complete_html = html_template.safe_substitute(content=complete_html) write_file(os.path.join(self.output_dir, 'all.html'), complete_html) self.log_message('Made one HTML of all stories in all.html.') self.log_message('Finished processing Markdown files.')
def mock_download_repo(source, target_dir): print('Mock downloading {}'.format(source)) print('Unzipping to {}...'.format(target_dir), end=' ') unzip(os.path.join(TestPipeline.resources_dir, 'en-obs-master.zip'), target_dir) print('finished.')
def run(self): try: self.temp_dir = tempfile.mkdtemp(prefix='txOBS_') # clean up the git repo url if self.source_repo_url[-4:] == '.git': self.source_repo_url = self.source_repo_url[:-4] if self.source_repo_url[-1:] == '/': self.source_repo_url = self.source_repo_url[:-1] # download the archive file_to_download = join_url_parts(self.source_repo_url, 'archive/master.zip') repo_dir = self.source_repo_url.rpartition('/')[2] downloaded_file = os.path.join(self.temp_dir, repo_dir + '.zip') try: print('Downloading {0}...'.format(file_to_download), end=' ') if not os.path.isfile(downloaded_file): download_file(file_to_download, downloaded_file) finally: print('finished.') # unzip the archive try: print('Unzipping...'.format(downloaded_file), end=' ') unzip(downloaded_file, self.temp_dir) finally: print('finished.') # get the manifest try: print('Reading the manifest...', end=' ') manifest = load_json_object(os.path.join(self.temp_dir, 'manifest.json')) finally: print('finished.') # create output directory make_dir(self.output_directory) # read the markdown files and output html files try: print('Processing the OBS markdown files') files_to_process = [] for i in range(1, 51): files_to_process.append(str(i).zfill(2) + '.md') current_dir = os.path.dirname(inspect.stack()[0][1]) with codecs.open(os.path.join(current_dir, 'template.html'), 'r', 'utf-8-sig') as html_file: html_template = html_file.read() for file_to_process in files_to_process: # read the markdown file file_name = os.path.join(self.temp_dir, repo_dir, 'content', file_to_process) with codecs.open(file_name, 'r', 'utf-8-sig') as md_file: md = md_file.read() html = markdown.markdown(md) html = TransformOBS.dir_re.sub(r'\1\n' + html + r'\n\2', html_template) write_file(os.path.join(self.output_directory, file_to_process.replace('.md', '.html')), html) except IOError as ioe: print_error('{0}: {1}'.format(ioe.strerror, ioe.filename)) self.errors.append(ioe) except Exception as e: print_error(e.message) self.errors.append(e) finally: print('finished.') except Exception as e: print_error(e.message) self.errors.append(e)
def unzip_resource(self, zip_name): zip_file = os.path.join(self.resources_dir, zip_name) out_dir = tempfile.mkdtemp(dir=self.temp_dir, prefix='linter_test_') unzip(zip_file, out_dir) return out_dir
def main(git_repo, tag, domain): global download_dir, out_template # clean up the git repo url if git_repo[-4:] == '.git': git_repo = git_repo[:-4] if git_repo[-1:] == '/': git_repo = git_repo[:-1] # initialize some variables today = ''.join(str(datetime.date.today()).rsplit('-')[0:3]) download_dir = '/tmp/{0}'.format(git_repo.rpartition('/')[2]) make_dir(download_dir) downloaded_file = '{0}/{1}.zip'.format(download_dir, git_repo.rpartition('/')[2]) file_to_download = join_url_parts(git_repo, 'archive/' + tag + '.zip') books_published = {} metadata_obj = None usfm_dir = None # download the repository try: print('Downloading {0}...'.format(file_to_download), end=' ') if not os.path.isfile(downloaded_file): download_file(file_to_download, downloaded_file) finally: print('finished.') try: print('Unzipping...'.format(downloaded_file), end=' ') unzip(downloaded_file, download_dir) finally: print('finished.') # examine the repository for root, dirs, files in os.walk(download_dir): if 'meta.json' in files: # read the metadata try: print('Reading the metadata...', end=' ') metadata_obj = BibleMetaData(os.path.join(root, 'meta.json')) finally: print('finished.') if 'usfm' in dirs: usfm_dir = os.path.join(root, 'usfm') # if we have everything, exit the loop if usfm_dir and metadata_obj: break # check for valid repository structure if not metadata_obj: print_error('Did not find meta.json in {}'.format(git_repo)) sys.exit(1) if not usfm_dir: print_error('Did not find the usfm directory in {}'.format(git_repo)) sys.exit(1) # get the versification data vrs = Bible.get_versification(metadata_obj.versification) # type: list<Book> out_dir = out_template.format(domain, metadata_obj.slug, metadata_obj.lang) # walk through the usfm files usfm_files = glob(os.path.join(usfm_dir, '*.usfm')) errors_found = False for usfm_file in usfm_files: # read the file with codecs.open(usfm_file, 'r', 'utf-8') as in_file: book_text = in_file.read() # get the book id book_search = id_re.search(book_text) if not book_search: print_error('Book id not found in {}'.format(usfm_file)) sys.exit(1) book_id = book_search.group(1) print('Beginning {}...'.format(book_id), end=' ') # get book versification info book = next((b for b in vrs if b.book_id == book_id), None) if not book: print_error('Book versification data was not found for "{}"'.format(book_id)) sys.exit(1) # remove \s5 lines book_text = s5_re.sub('', book_text) # get the usfm for the book book.set_usfm(book_text) # do basic checks book.verify_usfm_tags() book.verify_chapters_and_verses(True) if book.validation_errors: errors_found = True # get chunks for this book Bible.chunk_book(metadata_obj.versification, book) book.apply_chunks() # produces something like '01-GEN.usfm' book_file_name = '{0}-{1}.usfm'.format(str(book.number).zfill(2), book.book_id) print('Writing ' + book_file_name + '...', end=' ') write_file('{0}/{1}'.format(out_dir, book_file_name), book.usfm) meta = ['Bible: OT'] if book.number > 39: meta = ['Bible: NT'] books_published[book.book_id.lower()] = {'name': book.name, 'meta': meta, 'sort': str(book.number).zfill(2), 'desc': '' } print('finished.') # stop if errors were found if errors_found: print_error('These USFM errors must be corrected before publishing can continue.') sys.exit(1) print('Writing status.json...', end=' ') status = {"slug": '{0}'.format(metadata_obj.slug.lower()), "name": metadata_obj.name, "lang": metadata_obj.lang, "date_modified": today, "books_published": books_published, "status": {"checking_entity": metadata_obj.checking_entity, "checking_level": metadata_obj.checking_level, "comments": metadata_obj.comments, "contributors": metadata_obj.contributors, "publish_date": today, "source_text": metadata_obj.source_text, "source_text_version": metadata_obj.source_text_version, "version": metadata_obj.version } } write_file('{0}/status.json'.format(out_dir), status, indent=2) print('finished.') print() print('Publishing to the API...') with api_publish(out_dir) as api: api.run() print('Finished publishing to the API.') # update the catalog print() print('Updating the catalogs...', end=' ') update_catalog() print('finished.') print_notice('Check {0} and do a git push'.format(out_dir))
#======================================== try: print( 'Downloading converted file from: {0} to: {1} ...'.format( convertedZipUrl, convertedZipFile ), end=' ') download_file( convertedZipUrl, convertedZipFile ) finally: print( 'finished download.' ) # Unzip the archive door43Dir = tempfile.mkdtemp( prefix='door43_' ) if True: #if os.path.exists( convertedZipFile ): try: print( 'Unzipping {0}...'.format( convertedZipFile), end=' ' ) unzip( convertedZipFile, door43Dir ) finally: print( 'finished unzip.' ) usr = '******' + payload['repository']['owner']['username'] s3ProjectKey = os.path.join( usr, repoName, hash ) print( "s3ProjectKey: " + s3ProjectKey ) else: print( 'Nothing downloaded' ) # Delete existing files in door43.org for this Project Key s3Resource = boto3.resource( 's3' ) s3Bucket = s3Resource.Bucket( door43Bucket ) for obj in s3Bucket.objects.filter( Prefix=s3ProjectKey ): s3Resource.Object( s3Bucket.name, obj.key ).delete()
def main(git_repo, tag, no_pdf): global download_dir # clean up the git repo url if git_repo[-4:] == '.git': git_repo = git_repo[:-4] if git_repo[-1:] == '/': git_repo = git_repo[:-1] # initialize some variables today = ''.join(str(datetime.date.today()).rsplit('-')[0:3]) # str(datetime.date.today()) download_dir = '/tmp/{0}'.format(git_repo.rpartition('/')[2]) make_dir(download_dir) downloaded_file = '{0}/{1}.zip'.format(download_dir, git_repo.rpartition('/')[2]) file_to_download = join_url_parts(git_repo, 'archive/' + tag + '.zip') manifest = None status = None # type: OBSStatus content_dir = None # download the repository try: print('Downloading {0}...'.format(file_to_download), end=' ') if not os.path.isfile(downloaded_file): download_file(file_to_download, downloaded_file) finally: print('finished.') try: print('Unzipping...'.format(downloaded_file), end=' ') unzip(downloaded_file, download_dir) finally: print('finished.') # examine the repository for root, dirs, files in os.walk(download_dir): if 'manifest.json' in files: # read the manifest try: print('Reading the manifest...', end=' ') content_dir = root manifest = load_json_object(os.path.join(root, 'manifest.json')) finally: print('finished.') if 'status.json' in files: # read the meta data try: print('Reading the status...', end=' ') content_dir = root status = OBSStatus(os.path.join(root, 'status.json')) finally: print('finished.') # if we have everything, exit the loop if content_dir and manifest and status: break # check for valid repository structure if not manifest: print_error('Did not find manifest.json in {}'.format(git_repo)) sys.exit(1) if not status: print_error('Did not find status.json in {}'.format(git_repo)) sys.exit(1) print('Initializing OBS object...', end=' ') lang = manifest['target_language']['id'] obs_obj = OBS() obs_obj.date_modified = today obs_obj.direction = manifest['target_language']['direction'] obs_obj.language = lang print('finished') obs_obj.chapters = load_obs_chapters(content_dir) obs_obj.chapters.sort(key=lambda c: c['number']) if not obs_obj.verify_all(): print_error('Quality check did not pass.') sys.exit(1) print('Loading languages...', end=' ') lang_dict = OBS.load_lang_strings() print('finished.') print('Loading the catalog...', end=' ') export_dir = '/var/www/vhosts/door43.org/httpdocs/exports' # uw_cat_path = os.path.join(unfoldingWord_dir, 'obs-catalog.json') # uw_catalog = load_json_object(uw_cat_path, []) # uw_cat_langs = [x['language'] for x in uw_catalog] cat_path = os.path.join(export_dir, 'obs-catalog.json') catalog = load_json_object(cat_path, []) print('finished') print('Getting already published languages...', end=' ') json_lang_file_path = os.path.join(export_dir, lang, 'obs', 'obs-{0}.json'.format(lang)) # prev_json_lang = load_json_object(json_lang_file_path, {}) if lang not in lang_dict: print("Configuration for language {0} missing.".format(lang)) sys.exit(1) print('finished.') updated = update_language_catalog(lang, obs_obj.direction, status, today, lang_dict, catalog) print('Writing the OBS file to the exports directory...', end=' ') cur_json = json.dumps(obs_obj, sort_keys=True, cls=OBSEncoder) if updated: ([x for x in catalog if x['language'] == lang][0]['date_modified']) = today write_file(json_lang_file_path.replace('.txt', '.json'), cur_json) print('finished.') export_to_api(lang, status, today, cur_json) cat_json = json.dumps(catalog, sort_keys=True, cls=OBSEncoder) write_file(cat_path, cat_json) # update the catalog print_ok('STARTING: ', 'updating the catalogs.') update_catalog() print_ok('FINISHED: ', 'updating the catalogs.') if no_pdf: return create_pdf(lang, status.checking_level, status.version)
def handle(event, context): # Getting data from payload which is the JSON that was sent from tx-manager if 'data' not in event: raise Exception('"data" not in payload') job = event['data'] env_vars = {} if 'vars' in event and isinstance(event['vars'], dict): env_vars = event['vars'] # Getting the bucket to where we will unzip the converted files for door43.org. It is different from # production and testing, thus it is an environment variable the API Gateway gives us if 'cdn_bucket' not in env_vars: raise Exception('"cdn_bucket" was not in payload') cdn_handler = S3Handler(env_vars['cdn_bucket']) if 'identifier' not in job or not job['identifier']: raise Exception('"identifier" not in payload') owner_name, repo_name, commit_id = job['identifier'].split('/') s3_commit_key = 'u/{0}/{1}/{2}'.format( owner_name, repo_name, commit_id ) # The identifier is how to know which username/repo/commit this callback goes to # Download the ZIP file of the converted files converted_zip_url = job['output'] converted_zip_file = os.path.join(tempfile.gettempdir(), converted_zip_url.rpartition('/')[2]) try: print('Downloading converted zip file from {0}...'.format( converted_zip_url)) if not os.path.isfile(converted_zip_file): download_file(converted_zip_url, converted_zip_file) finally: print('finished.') # Unzip the archive unzip_dir = tempfile.mkdtemp(prefix='unzip_') try: print('Unzipping {0}...'.format(converted_zip_file)) unzip(converted_zip_file, unzip_dir) finally: print('finished.') # Upload all files to the cdn_bucket with the key of <user>/<repo_name>/<commit> of the repo for root, dirs, files in os.walk(unzip_dir): for f in sorted(files): path = os.path.join(root, f) key = s3_commit_key + path.replace(unzip_dir, '') print('Uploading {0} to {1}'.format(f, key)) cdn_handler.upload_file(path, key) # Now download the existing build_log.json file, update it and upload it back to S3 build_log_json = cdn_handler.get_json(s3_commit_key + '/build_log.json') build_log_json['started_at'] = job['started_at'] build_log_json['ended_at'] = job['ended_at'] build_log_json['success'] = job['success'] build_log_json['status'] = job['status'] build_log_json['message'] = job['message'] if 'log' in job and job['log']: build_log_json['log'] = job['log'] else: build_log_json['log'] = [] if 'warnings' in job and job['warnings']: build_log_json['warnings'] = job['warnings'] else: build_log_json['warnings'] = [] if 'errors' in job and job['errors']: build_log_json['errors'] = job['errors'] else: build_log_json['errors'] = [] build_log_file = os.path.join(tempfile.gettempdir(), 'build_log_finished.json') write_file(build_log_file, build_log_json) cdn_handler.upload_file(build_log_file, s3_commit_key + '/build_log.json', 0) # Download the project.json file for this repo (create it if doesn't exist) and update it project_json_key = 'u/{0}/{1}/project.json'.format(owner_name, repo_name) project_json = cdn_handler.get_json(project_json_key) project_json['user'] = owner_name project_json['repo'] = repo_name project_json['repo_url'] = 'https://git.door43.org/{0}/{1}'.format( owner_name, repo_name) commit = { 'id': commit_id, 'created_at': job['created_at'], 'status': job['status'], 'success': job['success'], 'started_at': None, 'ended_at': None } if 'started_at' in job: commit['started_at'] = job['started_at'] if 'ended_at' in job: commit['ended_at'] = job['ended_at'] if 'commits' not in project_json: project_json['commits'] = [] commits = [] for c in project_json['commits']: if c['id'] != commit_id: commits.append(c) commits.append(commit) project_json['commits'] = commits project_file = os.path.join(tempfile.gettempdir(), 'project.json') write_file(project_file, project_json) cdn_handler.upload_file(project_file, project_json_key, 0) print('Finished deploying to cdn_bucket. Done.')
def main(git_repo, tag): global download_dir # clean up the git repo url if git_repo[-4:] == '.git': git_repo = git_repo[:-4] if git_repo[-1:] == '/': git_repo = git_repo[:-1] # initialize some variables download_dir = '/tmp/{0}'.format(git_repo.rpartition('/')[2]) make_dir(download_dir) downloaded_file = '{0}/{1}.zip'.format(download_dir, git_repo.rpartition('/')[2]) file_to_download = join_url_parts(git_repo, 'archive/' + tag + '.zip') metadata_obj = None content_dir = None toc_obj = None # download the repository try: print('Downloading {0}...'.format(file_to_download), end=' ') if not os.path.isfile(downloaded_file): download_file(file_to_download, downloaded_file) finally: print('finished.') try: print('Unzipping...'.format(downloaded_file), end=' ') unzip(downloaded_file, download_dir) finally: print('finished.') # examine the repository for root, dirs, files in os.walk(download_dir): if 'meta.yaml' in files: # read the metadata try: print('Reading the metadata...', end=' ') metadata_obj = TAMetaData(os.path.join(root, 'meta.yaml')) finally: print('finished.') if 'toc.yaml' in files: # read the table of contents try: print('Reading the toc...', end=' ') toc_obj = TATableOfContents(os.path.join(root, 'toc.yaml')) finally: print('finished.') if 'content' in dirs: content_dir = os.path.join(root, 'content') # if we have everything, exit the loop if content_dir and metadata_obj and toc_obj: break # check for valid repository structure if not metadata_obj: print_error('Did not find meta.yaml in {}'.format(git_repo)) sys.exit(1) if not content_dir: print_error( 'Did not find the content directory in {}'.format(git_repo)) sys.exit(1) if not toc_obj: print_error('Did not find toc.yaml in {}'.format(git_repo)) sys.exit(1) # check for missing pages check_missing_pages(toc_obj, content_dir) # generate the pages print('Generating the manual...', end=' ') manual = TAManual(metadata_obj, toc_obj) manual.load_pages(content_dir) print('finished.') file_name = os.path.join( get_output_dir(), '{0}_{1}.json'.format(manual.meta.manual, manual.meta.volume)) print('saving to {0} ...'.format(file_name), end=' ') content = json.dumps(manual, sort_keys=True, indent=2, cls=TAEncoder) write_file(file_name, content) print('finished.')
def main(git_repo, tag): global download_dir # clean up the git repo url if git_repo[-4:] == '.git': git_repo = git_repo[:-4] if git_repo[-1:] == '/': git_repo = git_repo[:-1] # initialize some variables download_dir = '/tmp/{0}'.format(git_repo.rpartition('/')[2]) make_dir(download_dir) downloaded_file = '{0}/{1}.zip'.format(download_dir, git_repo.rpartition('/')[2]) file_to_download = join_url_parts(git_repo, 'archive/' + tag + '.zip') metadata_obj = None content_dir = None toc_obj = None # download the repository try: print('Downloading {0}...'.format(file_to_download), end=' ') if not os.path.isfile(downloaded_file): download_file(file_to_download, downloaded_file) finally: print('finished.') try: print('Unzipping...'.format(downloaded_file), end=' ') unzip(downloaded_file, download_dir) finally: print('finished.') # examine the repository for root, dirs, files in os.walk(download_dir): if 'meta.yaml' in files: # read the metadata try: print('Reading the metadata...', end=' ') metadata_obj = TAMetaData(os.path.join(root, 'meta.yaml')) finally: print('finished.') if 'toc.yaml' in files: # read the table of contents try: print('Reading the toc...', end=' ') toc_obj = TATableOfContents(os.path.join(root, 'toc.yaml')) finally: print('finished.') if 'content' in dirs: content_dir = os.path.join(root, 'content') # if we have everything, exit the loop if content_dir and metadata_obj and toc_obj: break # check for valid repository structure if not metadata_obj: print_error('Did not find meta.yaml in {}'.format(git_repo)) sys.exit(1) if not content_dir: print_error('Did not find the content directory in {}'.format(git_repo)) sys.exit(1) if not toc_obj: print_error('Did not find toc.yaml in {}'.format(git_repo)) sys.exit(1) # check for missing pages check_missing_pages(toc_obj, content_dir) # generate the pages print('Generating the manual...', end=' ') manual = TAManual(metadata_obj, toc_obj) manual.load_pages(content_dir) print('finished.') file_name = os.path.join(get_output_dir(), '{0}_{1}.json'.format(manual.meta.manual, manual.meta.volume)) print('saving to {0} ...'.format(file_name), end=' ') content = json.dumps(manual, sort_keys=True, indent=2, cls=TAEncoder) write_file(file_name, content) print('finished.')