def mock_s3_tn_project(self, part): zip_file = os.path.join(self.resources_dir, 'converted_projects', 'en_tn_converted.zip') out_dir = os.path.join(self.temp_dir, 'en_tn_converted') unzip(zip_file, out_dir) src_dir = os.path.join(out_dir, 'en_tn_converted') self.project_files = [ f for f in os.listdir(src_dir) if os.path.isfile(os.path.join(src_dir, f)) ] self.project_key = 'u/door43/en_tn/12345678' build_log = file_utils.load_json_object( os.path.join(src_dir, 'build_log.json')) build_log['part'] = part file_utils.write_file(os.path.join(src_dir, 'build_log.json'), build_log) AppSettings.cdn_s3_handler().upload_file( os.path.join(src_dir, 'build_log.json'), '{0}/{1}/build_log.json'.format(self.project_key, part)) AppSettings.cdn_s3_handler().upload_file( os.path.join(src_dir, 'index.json'), '{0}/{1}/index.json'.format(self.project_key, part)) AppSettings.cdn_s3_handler().upload_file( os.path.join(src_dir, 'build_log.json'), '{0}/{1}/finished'.format(self.project_key, part)) AppSettings.cdn_s3_handler().upload_file( os.path.join(src_dir, '01-GEN.html'), '{0}/{1}/01-GEN.html'.format(self.project_key, part)) AppSettings.cdn_s3_handler().upload_file( os.path.join(src_dir, 'project.json'), 'u/door43/en_tq/project.json') AppSettings.door43_s3_handler().upload_file( os.path.join(self.resources_dir, 'templates', 'project-page.html'), 'templates/project-page.html')
def setUp(self): """Runs before each test.""" AppSettings(prefix=f'{self._testMethodName}-') AppSettings.cdn_s3_handler().create_bucket() AppSettings.door43_s3_handler().create_bucket() self.temp_dir = tempfile.mkdtemp(prefix='Door43_test_project_deployer') self.deployer = ProjectDeployer(self.temp_dir) TdLanguage.language_list = { 'aa': TdLanguage({ 'gw': False, 'ld': 'ltr', 'ang': 'Afar', 'lc': 'aa', 'ln': 'Afaraf', 'lr': 'Africa', 'pk': 6 }), 'en': TdLanguage({ 'gw': True, 'ld': 'ltr', 'ang': 'English', 'lc': 'en', 'ln': 'English', 'lr': 'Europe', 'pk': 1747 }), 'es': TdLanguage({ 'gw': True, 'ld': 'ltr', 'ang': 'Spanish', 'lc': 'es', 'ln': 'espa\xf1ol', 'lr': 'Europe', 'pk': 1776 }), 'fr': TdLanguage({ 'gw': True, 'ld': 'ltr', 'ang': 'French', 'lc': 'fr', 'ln': 'fran\xe7ais, langue fran\xe7aise', 'lr': 'Europe', 'pk': 1868 }) }
def validate_bible_results(self, ret, build_log_key, expect_success, output_key): self.assertEqual(ret, expect_success) if expect_success: if output_key: self.assertTrue( AppSettings.door43_s3_handler().key_exists(output_key))
def mock_s3_obs_project(self): zip_file = os.path.join(self.resources_dir, 'converted_projects', 'en-obs-complete.zip') out_dir = os.path.join(self.temp_dir, 'en-obs-complete') unzip(zip_file, out_dir) project_dir = os.path.join(out_dir, 'door43', 'en-obs', '12345678') self.project_files = [ f for f in os.listdir(project_dir) if os.path.isfile(os.path.join(project_dir, f)) ] self.project_key = 'u/door43/en-obs/12345678' for filename in self.project_files: AppSettings.cdn_s3_handler().upload_file( os.path.join(project_dir, filename), '{0}/{1}'.format(self.project_key, filename)) AppSettings.cdn_s3_handler().upload_file( os.path.join(out_dir, 'door43', 'en-obs', 'project.json'), 'u/door43/en-obs/project.json') AppSettings.door43_s3_handler().upload_file( os.path.join(self.resources_dir, 'templates', 'project-page.html'), 'templates/project-page.html')
def mock_s3_bible_project(self, test_file_name, project_key, multi_part=False): converted_proj_dir = os.path.join(self.resources_dir, 'converted_projects') test_file_base = test_file_name.split('.zip')[0] zip_file = os.path.join(converted_proj_dir, test_file_name) out_dir = os.path.join(self.temp_dir, test_file_base) unzip(zip_file, out_dir) project_dir = os.path.join(out_dir, test_file_base) + os.path.sep self.project_files = file_utils.get_files(out_dir) self.project_key = project_key for filename in self.project_files: sub_path = filename.split(project_dir)[1].replace( os.path.sep, '/') # Make sure it is a bucket path AppSettings.cdn_s3_handler().upload_file( filename, '{0}/{1}'.format(project_key, sub_path)) if multi_part: # copy files from cdn to door43 base_name = os.path.basename(filename) if '.html' in base_name: with open(filename, 'r') as f: soup = BeautifulSoup(f, 'html.parser') # add nav tag new_tag = soup.new_tag('div', id='right-sidebar') soup.body.append(new_tag) html = str(soup) file_utils.write_file( filename, html.encode('ascii', 'xmlcharrefreplace')) AppSettings.door43_s3_handler().upload_file( filename, '{0}/{1}'.format(project_key, base_name)) # u, user, repo = project_key AppSettings.door43_s3_handler().upload_file( os.path.join(self.resources_dir, 'templates', 'project-page.html'), 'templates/project-page.html')
def get_jobID_from_commit_buildLog(project_folder_key:str, ix:int, commit_id:str) -> Optional[str]: """ Look for build_log.json in the Door43 bucket and extract the job_id from it. NOTE: It seems like old builds also put build_log.json in the CDN bucket but the new ones don't seem to have that. Return None if anything fails. """ file_key = f'{project_folder_key}{commit_id}/build_log.json' try: file_content = AppSettings.door43_s3_handler() \ .resource.Object(bucket_name=AppSettings.door43_bucket_name, key=file_key) \ .get()['Body'].read().decode('utf-8') json_content = json.loads(file_content) return json_content['job_id'] except Exception as e: AppSettings.logger.critical(f"get_jobID_from_commit_buildLog threw an exception while getting {prefix}D43 {ix:,} '{file_key}': {e}") return None
def deploy_revision_to_door43(self, build_log:Dict[str,Any]) -> bool: """ Deploys a single revision of a project to door43.org Templates the converted files then uploads them and the build log to the S3 bucket and creates a 'deployed' file there too. :param dict build_log: :return bool: """ start = time.time() AppSettings.logger.debug(f"Deploying, build log: {json.dumps(build_log)[:256]} …") assert 'multiple' not in build_log assert 'part' not in build_log user = build_log['repo_owner_username'] # was 'repo_owner' repo_name = build_log['repo_name'] commit_id = build_log['commit_id'] # Hashes should already be reduced to 10 characters s3_commit_key = f'u/{user}/{repo_name}/{commit_id}' s3_repo_key = f'u/{user}/{repo_name}' source_dir = tempfile.mkdtemp(prefix='source_', dir=self.temp_dir) template_dir = tempfile.mkdtemp(prefix='template_', dir=self.temp_dir) output_dir = tempfile.mkdtemp(prefix='output_', dir=self.temp_dir) # Do the templating first resource_type = build_log['resource_type'] template_key = 'templates/project-page.html' template_file = os.path.join(template_dir, 'project-page.html') AppSettings.logger.info(f"Downloading project page template from {AppSettings.door43_bucket_name} '{template_key}' to {template_file} …") AppSettings.door43_s3_handler().download_file(template_key, template_file) source_dir, success = self.template_converted_files(build_log, output_dir, repo_name, resource_type, s3_commit_key, source_dir, start, template_file) build_log['warnings'].extend(self.error_messages) if not success: AppSettings.logger.critical("Templating failed—returning False") return False ####################### # # Now do the deploy # ####################### # Copy all other files over that don't already exist in output_dir, like css files # Copying from source_dir to output_dir (both are folders inside main temp folder) for filename in sorted(glob(os.path.join(source_dir, '*'))): output_file = os.path.join(output_dir, os.path.basename(filename)) if not os.path.exists(output_file) and not os.path.isdir(filename): copyfile(filename, output_file) # Save master build_log.json build_log['ended_at'] = datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ') file_utils.write_file(os.path.join(output_dir, 'build_log.json'), build_log) AppSettings.logger.debug(f"Final build_log.json: {json.dumps(build_log)[:256]} …") # Clear out the door43.org bucket's commit dir AppSettings.logger.info(f"Deleting all files in the website bucket directory: {AppSettings.door43_bucket_name}/{s3_commit_key} …") AppSettings.door43_s3_handler().bucket.objects.filter(Prefix=s3_commit_key).delete() # Upload all files to the S3 door43.org bucket AppSettings.logger.info(f"Uploading all files to the website bucket directory: {AppSettings.door43_bucket_name}/{s3_commit_key} …") has_index_file = False for root, _dirs, files in os.walk(output_dir): for filename in sorted(files): filepath = os.path.join(root, filename) if os.path.isdir(filepath): continue key = s3_commit_key + filepath.replace(output_dir, '').replace(os.path.sep, '/') AppSettings.logger.debug(f"Uploading {filename} to {AppSettings.door43_bucket_name} bucket {key} …") AppSettings.door43_s3_handler().upload_file(filepath, key, cache_time=0) redirect_to_file = "index.html" html_files = get_sorted_Bible_html_filepath_list(output_dir) if len(html_files) > 0 and os.path.join(output_dir, "index.html") not in html_files: redirect_to_file = html_files[0].replace(output_dir, "").lstrip("/") # Now we place json files and redirect index.html for the whole repo to this index.html file AppSettings.logger.info("Copying json files and setting up redirect…") try: AppSettings.door43_s3_handler().copy(from_key=f'{s3_repo_key}/project.json', from_bucket=AppSettings.cdn_bucket_name) AppSettings.door43_s3_handler().copy(from_key=f'{s3_commit_key}/manifest.json', to_key=f'{s3_repo_key}/manifest.json') master_exists = AppSettings.door43_s3_handler().object_exists(f'{s3_repo_key}/master/index.html') main_exists = AppSettings.door43_s3_handler().object_exists(f'{s3_repo_key}/main/index.html') if commit_id == 'master' or commit_id == 'main' or (not master_exists and not main_exists): AppSettings.door43_s3_handler().redirect(key=s3_repo_key, location=f"/{s3_commit_key}/{redirect_to_file}") AppSettings.door43_s3_handler().redirect(key=s3_repo_key + '/index.html', location=f"/{s3_commit_key}/{redirect_to_file}") AppSettings.door43_s3_handler().redirect(key=s3_commit_key, location=f"/{s3_commit_key}/{redirect_to_file}") if not has_index_file: AppSettings.door43_s3_handler().redirect(key=f"{s3_commit_key}/index.html", location=f"/{s3_commit_key}/{redirect_to_file}") self.write_data_to_file_and_upload_to_CDN(output_dir, s3_commit_key, fname='deployed', data=' ') # flag that deploy has finished except Exception as e: AppSettings.logger.critical(f"Deployer threw an exception: {e}: {traceback.format_exc()}") elapsed_seconds = int(time.time() - start) AppSettings.logger.debug(f"Deploy completed in {elapsed_seconds} seconds.") self.close() return True
def update_project_file(build_log:Dict[str,Any], output_dirpath:str) -> None: """ project.json is read by the Javascript in door43.org/js/project-page-functions.js The commits are used to update the Revision list in the left side-bar. Changed March 2020 to read project.json from door43 bucket (not cdn bucket). (The updated file gets written to both buckets.) """ build_log_copy = build_log.copy() # Sometimes this gets too big if 'warnings' in build_log_copy and len(build_log_copy['warnings']) > 10: build_log_copy['warnings'] = f"{build_log_copy['warnings'][:5]} …… {build_log_copy['warnings'][-5:]}" AppSettings.logger.debug(f"Callback.update_project_file({build_log_copy}, output_dir={output_dirpath})…") commit_id = build_log['commit_id'] repo_owner_username = build_log['repo_owner_username'] # was 'repo_owner' repo_name = build_log['repo_name'] project_folder_key = f'u/{repo_owner_username}/{repo_name}/' project_json_key = f'{project_folder_key}project.json' project_json = AppSettings.door43_s3_handler().get_json(project_json_key) project_json['user'] = repo_owner_username project_json['repo'] = repo_name project_json['repo_url'] = f'https://{AppSettings.gogs_url}/{repo_owner_username}/{repo_name}' current_commit = { 'id': commit_id, 'job_id': build_log['job_id'], 'type': build_log['commit_type'], 'created_at': build_log['created_at'], 'status': build_log['status'], 'success': build_log['success'], # 'started_at': None, # 'ended_at': None } if build_log['commit_hash']: current_commit['commit_hash'] = build_log['commit_hash'] # if 'started_at' in build_log: # current_commit['started_at'] = build_log['started_at'] # if 'ended_at' in build_log: # current_commit['ended_at'] = build_log['ended_at'] def is_hash(commit_str:str) -> bool: """ Checks to see if this looks like a hexadecimal (abbreviated to 10 chars) hash """ if len(commit_str) != 10: return False for char in commit_str: if char not in 'abcdef1234567890': return False return True if 'commits' not in project_json: project_json['commits'] = [] AppSettings.logger.info(f"Rebuilding commits list (currently {len(project_json['commits']):,}) for project.json…") commits:List[Dict[str,Any]] = [] no_job_id_count = 0 for ix, c in enumerate(project_json['commits']): AppSettings.logger.debug(f" Looking at {len(commits)}/ '{c['id']}'. Is current commit={c['id'] == commit_id}…") # if c['id'] == commit_id: # the old entry for the current commit id # Why did this code ever get in here in callback!!!! (Deletes pre-convert folder when it shouldn't!) # zip_file_key = f"preconvert/{current_commit['job_id']}.zip" # AppSettings.logger.info(f" Removing obsolete {prefix}pre-convert '{current_commit['type']}' '{commit_id}' {zip_file_key} …") # try: # clear_commit_directory_from_bucket(AppSettings.pre_convert_s3_handler(), zip_file_key) # except Exception as e: # AppSettings.logger.critical(f" Remove obsolete pre-convert zipfile threw an exception while attempted to delete '{zip_file_key}': {e}") # Not appended to commits here coz it happens below instead if c['id'] != commit_id: # a different commit from the current one if 'job_id' not in c: # Might be able to remove this eventually c['job_id'] = get_jobID_from_commit_buildLog(project_folder_key, ix, c['id']) # Returned job id might have been None if not c['job_id']: no_job_id_count += 1 if 'type' not in c: # Might be able to remove this eventually c['type'] = 'hash' if is_hash(c['id']) \ else 'artifact' if c['id']in ('latest','OhDear') \ else 'unknown' commits.append(c) if no_job_id_count > 10: len_commits = len(commits) AppSettings.logger.info(f"{no_job_id_count} job ids were unable to be found. Have {len_commits} historical commit{'' if len_commits==1 else 's'}.") commits.append(current_commit) cleaned_commits = remove_excess_commits(commits, repo_owner_username, repo_name) if len(cleaned_commits) < len(commits): # Then we removed some # Save a dated (coz this could happen more than once) backup of the project.json file save_project_filename = f"project.save.{datetime.utcnow().strftime('%Y-%m-%d_%H-%M-%S')}.json" save_project_filepath = os.path.join(output_dirpath, save_project_filename) write_file(save_project_filepath, project_json) save_project_json_key = f'{project_folder_key}{save_project_filename}' # Don't need to save this twice (March 2020) # AppSettings.cdn_s3_handler().upload_file(save_project_filepath, save_project_json_key, cache_time=100) AppSettings.door43_s3_handler().upload_file(save_project_filepath, save_project_json_key, cache_time=100) # Now save the updated project.json file in both places project_json['commits'] = cleaned_commits project_filepath = os.path.join(output_dirpath, 'project.json') write_file(project_filepath, project_json) AppSettings.cdn_s3_handler().upload_file(project_filepath, project_json_key, cache_time=1) AppSettings.door43_s3_handler().upload_file(project_filepath, project_json_key, cache_time=1)
def remove_excess_commits(commits_list:list, repo_owner_username:str, repo_name:str) -> List[Dict[str,Any]]: """ Given a list of commits (oldest first), remove the unnecessary ones from the list and DELETE THE files from S3! Written: Aug 2019 This was especially important as we moved from hash numbers to tag and branch names. NOTE: Gitea before 1.11 had a bug where it didn't always notify of deleted branches. Also, the dev- chain is not always enabled, so doesn't get all notifications anyway. So RJH added code in March 2020 to check for now non-existent branches. """ MIN_WANTED_COMMITS = 1 # Lowered from 2,400 to 500 20Dec19—not sure why ru_gl/ru_tq_2lv kept getting timeout errors MAX_ALLOWED_REMOVED_FOLDERS = 500 # Don't want to get job timeouts—typically can do 3500+ in 600s # at least project.json will slowly get smaller if we limit this. # Each commit hash to be deleted has three folders to remove. AppSettings.logger.debug(f"remove_excess_commits({len(commits_list)}={commits_list}, {repo_owner_username}, {repo_name})…") current_branch_names_list = get_current_branch_names_list(repo_owner_username, repo_name) current_tag_names_list = get_current_tag_names_list(repo_owner_username, repo_name) project_folder_key = f'u/{repo_owner_username}/{repo_name}/' new_commits:List[Dict[str,Any]] = [] removed_folder_count = 0 # Process it backwards in case we want to count how many we have as we go for n, commit in enumerate( reversed(commits_list) ): # if DELETE_ENABLED or len(new_commits) < MAX_DEBUG_DISPLAYS: # don't clutter logs too much AppSettings.logger.debug(f" Investigating {commit['type']} '{commit['id']}' commit (already have {len(new_commits)} — want min of {MIN_WANTED_COMMITS})") # elif len(new_commits) == MAX_DEBUG_DISPLAYS: # don't clutter logs too much # AppSettings.logger.debug(" Logging suppressed for remaining hashes…") deleted_flag = False if len(new_commits) >= MIN_WANTED_COMMITS \ and removed_folder_count < MAX_ALLOWED_REMOVED_FOLDERS: if commit['type'] in ('hash','artifact',): # but not 'unknown'—can delete old master branches # Delete the commit hash folders from both CDN and D43 buckets commit_key = f"{project_folder_key}{commit['id']}" AppSettings.logger.info(f" {n:,} Removing {prefix} CDN & D43 '{commit['type']}' '{commit['id']}' commits! …") # AppSettings.logger.info(f" {n:,} Removing {prefix}CDN '{commit['type']}' '{commit['id']}' commit! …") clear_commit_directory_from_bucket(AppSettings.cdn_s3_handler(), commit_key) removed_folder_count += 1 # AppSettings.logger.info(f" {n:,} Removing {prefix}D43 '{commit['type']}' '{commit['id']}' commit! …") clear_commit_directory_from_bucket(AppSettings.door43_s3_handler(), commit_key) removed_folder_count += 1 # Delete the pre-convert .zip file (available on Download button) from its bucket if commit['job_id']: zipFile_key = f"preconvert/{commit['job_id']}.zip" AppSettings.logger.info(f" {n:,} Removing {prefix}PreConvert '{commit['type']}' '{zipFile_key}' file! …") clear_commit_directory_from_bucket(AppSettings.pre_convert_s3_handler(), zipFile_key) removed_folder_count += 1 else: # don't know the job_id (or the zip file was already deleted) AppSettings.logger.warning(f" {n:,} No job_id so pre-convert zip file not deleted.") # Setup redirects (so users don't get 404 errors from old saved links) old_repo_key = f"{project_folder_key}{commit['id']}" latest_repo_key = f"/{project_folder_key}{new_commits[-1]['id']}" # Must start with / AppSettings.logger.info(f" {n:,} Redirecting {old_repo_key} and {old_repo_key}/index.html to {latest_repo_key} …") AppSettings.door43_s3_handler().redirect(key=old_repo_key, location=latest_repo_key) AppSettings.door43_s3_handler().redirect(key=f'{old_repo_key}/index.html', location=latest_repo_key) deleted_flag = True elif commit['type'] == 'branch' and current_branch_names_list: # Some branches may have been deleted without us being informed branch_name = commit['id'] AppSettings.logger.debug(f"Checking branch '{branch_name}' against {current_branch_names_list}…") if branch_name not in current_branch_names_list: commit_key = f"{project_folder_key}{commit['id']}" AppSettings.logger.info(f" {n:,} Removing {prefix} CDN & D43 '{branch_name}' branch! …") # AppSettings.logger.info(f" {n:,} Removing {prefix}CDN '{branch_name}' branch! …") clear_commit_directory_from_bucket(AppSettings.cdn_s3_handler(), commit_key) removed_folder_count += 1 # AppSettings.logger.info(f" {n:,} Removing {prefix}D43 '{branch_name}' branch! …") clear_commit_directory_from_bucket(AppSettings.door43_s3_handler(), commit_key) removed_folder_count += 1 # Delete the pre-convert .zip file (available on Download button) from its bucket if commit['job_id']: zipFile_key = f"preconvert/{commit['job_id']}.zip" AppSettings.logger.info(f" {n:,} Removing {prefix}PreConvert '{commit['type']}' '{zipFile_key}' file! …") clear_commit_directory_from_bucket(AppSettings.pre_convert_s3_handler(), zipFile_key) removed_folder_count += 1 else: # don't know the job_id (or the zip file was already deleted) AppSettings.logger.warning(f" {n:,} No job_id so pre-convert zip file not deleted.") # Setup redirects (so users don't get 404 errors from old saved links) old_repo_key = f"{project_folder_key}{branch_name}" latest_repo_key = f"/{project_folder_key}{new_commits[-1]['id']}" # Must start with / AppSettings.logger.info(f" {n:,} Redirecting {old_repo_key} and {old_repo_key}/index.html to {latest_repo_key} …") AppSettings.door43_s3_handler().redirect(key=old_repo_key, location=latest_repo_key) AppSettings.door43_s3_handler().redirect(key=f'{old_repo_key}/index.html', location=latest_repo_key) deleted_flag = True elif commit['type'] == 'tag' and current_tag_names_list: # Some branches may have been deleted without us being informed tag_name = commit['id'] AppSettings.logger.debug(f"Checking tag '{tag_name}' against {current_tag_names_list}…") if tag_name not in current_tag_names_list: commit_key = f"{project_folder_key}{commit['id']}" AppSettings.logger.info(f" {n:,} Removing {prefix} CDN & D43 '{tag_name}' release! …") # AppSettings.logger.info(f" {n:,} Removing {prefix}CDN '{tag_name}' release! …") clear_commit_directory_from_bucket(AppSettings.cdn_s3_handler(), commit_key) removed_folder_count += 1 # AppSettings.logger.info(f" {n:,} Removing {prefix}D43 '{tag_name}' release! …") clear_commit_directory_from_bucket(AppSettings.door43_s3_handler(), commit_key) removed_folder_count += 1 # Delete the pre-convert .zip file (available on Download button) from its bucket if commit['job_id']: zipFile_key = f"preconvert/{commit['job_id']}.zip" AppSettings.logger.info(f" {n:,} Removing {prefix}PreConvert '{commit['type']}' '{zipFile_key}' file! …") clear_commit_directory_from_bucket(AppSettings.pre_convert_s3_handler(), zipFile_key) removed_folder_count += 1 else: # don't know the job_id (or the zip file was already deleted) AppSettings.logger.warning(f" {n:,} No job_id so pre-convert zip file not deleted.") # Setup redirects (so users don't get 404 errors from old saved links) old_repo_key = f"{project_folder_key}{tag_name}" latest_repo_key = f"/{project_folder_key}{new_commits[-1]['id']}" # Must start with / AppSettings.logger.info(f" {n:,} Redirecting {old_repo_key} and {old_repo_key}/index.html to {latest_repo_key} …") AppSettings.door43_s3_handler().redirect(key=old_repo_key, location=latest_repo_key) AppSettings.door43_s3_handler().redirect(key=f'{old_repo_key}/index.html', location=latest_repo_key) deleted_flag = True if not deleted_flag: AppSettings.logger.debug(" Keeping this one.") new_commits.insert(0, commit) # Insert at beginning to get the order correct again if removed_folder_count > 9: len_new_commits = len(new_commits) AppSettings.logger.info(f"{removed_folder_count:,} commit folders deleted and redirected. (Returning {len_new_commits:,} commit{'' if len_new_commits==1 else 's'}).") return new_commits