def mock_s3_tn_project(self, part): zip_file = os.path.join(self.resources_dir, 'converted_projects', 'en_tn_converted.zip') out_dir = os.path.join(self.temp_dir, 'en_tn_converted') unzip(zip_file, out_dir) src_dir = os.path.join(out_dir, 'en_tn_converted') self.project_files = [ f for f in os.listdir(src_dir) if os.path.isfile(os.path.join(src_dir, f)) ] self.project_key = 'u/door43/en_tn/12345678' build_log = file_utils.load_json_object( os.path.join(src_dir, 'build_log.json')) build_log['part'] = part file_utils.write_file(os.path.join(src_dir, 'build_log.json'), build_log) AppSettings.cdn_s3_handler().upload_file( os.path.join(src_dir, 'build_log.json'), '{0}/{1}/build_log.json'.format(self.project_key, part)) AppSettings.cdn_s3_handler().upload_file( os.path.join(src_dir, 'index.json'), '{0}/{1}/index.json'.format(self.project_key, part)) AppSettings.cdn_s3_handler().upload_file( os.path.join(src_dir, 'build_log.json'), '{0}/{1}/finished'.format(self.project_key, part)) AppSettings.cdn_s3_handler().upload_file( os.path.join(src_dir, '01-GEN.html'), '{0}/{1}/01-GEN.html'.format(self.project_key, part)) AppSettings.cdn_s3_handler().upload_file( os.path.join(src_dir, 'project.json'), 'u/door43/en_tq/project.json') AppSettings.door43_s3_handler().upload_file( os.path.join(self.resources_dir, 'templates', 'project-page.html'), 'templates/project-page.html')
def write_data_to_file_and_upload_to_CDN(self, output_dir:str, s3_commit_key:str, fname:str, data:Union[str, Dict[str,Any]]) -> None: out_file = os.path.join(output_dir, fname) write_file(out_file, data) key = s3_commit_key + '/' + fname AppSettings.logger.debug(f"Uploading '{fname}' to {AppSettings.cdn_bucket_name} {key} …") AppSettings.cdn_s3_handler().upload_file(out_file, key, cache_time=0)
def set_deployed_flags(self, project_key, part_count, skip=-1): tempf = tempfile.mktemp(prefix="temp", suffix="deployed") file_utils.write_file(tempf, ' ') for i in range(0, part_count): if i != skip: key = '{0}/{1}/deployed'.format(project_key, i) AppSettings.cdn_s3_handler().upload_file(tempf, key, cache_time=0) os.remove(tempf)
def upload_converted_files_to_CDN(s3_commit_key:str, unzip_dir:str) -> None: """ Uploads the converted (but not templated) files to the cdn.door43.org bucket NOTE: These are used from there by the Print button/function. """ AppSettings.logger.info(f"Uploading converted files from {unzip_dir} to {prefix}CDN {s3_commit_key} …") for root, _dirs, files in os.walk(unzip_dir): for filename in sorted(files): filepath = os.path.join(root, filename) key = s3_commit_key + filepath.replace(unzip_dir, '') AppSettings.logger.debug(f"Uploading {filename} to {prefix}CDN {key} …") AppSettings.cdn_s3_handler().upload_file(filepath, key, cache_time=0)
def upload_archive(self) -> None: """ Uploads self.output_zip_file """ #AppSettings.logger.debug("converter.upload_archive()") if self.cdn_file_key and os.path.isdir( os.path.dirname(self.cdn_file_key)): #AppSettings.logger.debug("converter.upload_archive() doing copy") copy(self.output_zip_file, self.cdn_file_key) elif AppSettings.cdn_s3_handler(): #AppSettings.logger.debug("converter.upload_archive() using S3 handler") AppSettings.cdn_s3_handler().upload_file(self.output_zip_file, self.cdn_file_key, cache_time=0)
def setUp(self): """Runs before each test.""" AppSettings(prefix=f'{self._testMethodName}-') AppSettings.cdn_s3_handler().create_bucket() AppSettings.door43_s3_handler().create_bucket() self.temp_dir = tempfile.mkdtemp(prefix='Door43_test_project_deployer') self.deployer = ProjectDeployer(self.temp_dir) TdLanguage.language_list = { 'aa': TdLanguage({ 'gw': False, 'ld': 'ltr', 'ang': 'Afar', 'lc': 'aa', 'ln': 'Afaraf', 'lr': 'Africa', 'pk': 6 }), 'en': TdLanguage({ 'gw': True, 'ld': 'ltr', 'ang': 'English', 'lc': 'en', 'ln': 'English', 'lr': 'Europe', 'pk': 1747 }), 'es': TdLanguage({ 'gw': True, 'ld': 'ltr', 'ang': 'Spanish', 'lc': 'es', 'ln': 'espa\xf1ol', 'lr': 'Europe', 'pk': 1776 }), 'fr': TdLanguage({ 'gw': True, 'ld': 'ltr', 'ang': 'French', 'lc': 'fr', 'ln': 'fran\xe7ais, langue fran\xe7aise', 'lr': 'Europe', 'pk': 1868 }) }
def get_templater_index(s3_commit_key:str, index_json_fname:str) -> Dict[str,Any]: index_json = AppSettings.cdn_s3_handler().get_json(s3_commit_key + '/' + index_json_fname) if not index_json: index_json['titles'] = {} index_json['chapters'] = {} index_json['book_codes'] = {} return index_json
def mock_s3_obs_project(self): zip_file = os.path.join(self.resources_dir, 'converted_projects', 'en-obs-complete.zip') out_dir = os.path.join(self.temp_dir, 'en-obs-complete') unzip(zip_file, out_dir) project_dir = os.path.join(out_dir, 'door43', 'en-obs', '12345678') self.project_files = [ f for f in os.listdir(project_dir) if os.path.isfile(os.path.join(project_dir, f)) ] self.project_key = 'u/door43/en-obs/12345678' for filename in self.project_files: AppSettings.cdn_s3_handler().upload_file( os.path.join(project_dir, filename), '{0}/{1}'.format(self.project_key, filename)) AppSettings.cdn_s3_handler().upload_file( os.path.join(out_dir, 'door43', 'en-obs', 'project.json'), 'u/door43/en-obs/project.json') AppSettings.door43_s3_handler().upload_file( os.path.join(self.resources_dir, 'templates', 'project-page.html'), 'templates/project-page.html')
def mock_s3_bible_project(self, test_file_name, project_key, multi_part=False): converted_proj_dir = os.path.join(self.resources_dir, 'converted_projects') test_file_base = test_file_name.split('.zip')[0] zip_file = os.path.join(converted_proj_dir, test_file_name) out_dir = os.path.join(self.temp_dir, test_file_base) unzip(zip_file, out_dir) project_dir = os.path.join(out_dir, test_file_base) + os.path.sep self.project_files = file_utils.get_files(out_dir) self.project_key = project_key for filename in self.project_files: sub_path = filename.split(project_dir)[1].replace( os.path.sep, '/') # Make sure it is a bucket path AppSettings.cdn_s3_handler().upload_file( filename, '{0}/{1}'.format(project_key, sub_path)) if multi_part: # copy files from cdn to door43 base_name = os.path.basename(filename) if '.html' in base_name: with open(filename, 'r') as f: soup = BeautifulSoup(f, 'html.parser') # add nav tag new_tag = soup.new_tag('div', id='right-sidebar') soup.body.append(new_tag) html = str(soup) file_utils.write_file( filename, html.encode('ascii', 'xmlcharrefreplace')) AppSettings.door43_s3_handler().upload_file( filename, '{0}/{1}'.format(project_key, base_name)) # u, user, repo = project_key AppSettings.door43_s3_handler().upload_file( os.path.join(self.resources_dir, 'templates', 'project-page.html'), 'templates/project-page.html')
def test_s3_handler(self): self.assertIsNotNone(AppSettings.cdn_s3_handler())
def update_project_file(build_log:Dict[str,Any], output_dirpath:str) -> None: """ project.json is read by the Javascript in door43.org/js/project-page-functions.js The commits are used to update the Revision list in the left side-bar. Changed March 2020 to read project.json from door43 bucket (not cdn bucket). (The updated file gets written to both buckets.) """ build_log_copy = build_log.copy() # Sometimes this gets too big if 'warnings' in build_log_copy and len(build_log_copy['warnings']) > 10: build_log_copy['warnings'] = f"{build_log_copy['warnings'][:5]} …… {build_log_copy['warnings'][-5:]}" AppSettings.logger.debug(f"Callback.update_project_file({build_log_copy}, output_dir={output_dirpath})…") commit_id = build_log['commit_id'] repo_owner_username = build_log['repo_owner_username'] # was 'repo_owner' repo_name = build_log['repo_name'] project_folder_key = f'u/{repo_owner_username}/{repo_name}/' project_json_key = f'{project_folder_key}project.json' project_json = AppSettings.door43_s3_handler().get_json(project_json_key) project_json['user'] = repo_owner_username project_json['repo'] = repo_name project_json['repo_url'] = f'https://{AppSettings.gogs_url}/{repo_owner_username}/{repo_name}' current_commit = { 'id': commit_id, 'job_id': build_log['job_id'], 'type': build_log['commit_type'], 'created_at': build_log['created_at'], 'status': build_log['status'], 'success': build_log['success'], # 'started_at': None, # 'ended_at': None } if build_log['commit_hash']: current_commit['commit_hash'] = build_log['commit_hash'] # if 'started_at' in build_log: # current_commit['started_at'] = build_log['started_at'] # if 'ended_at' in build_log: # current_commit['ended_at'] = build_log['ended_at'] def is_hash(commit_str:str) -> bool: """ Checks to see if this looks like a hexadecimal (abbreviated to 10 chars) hash """ if len(commit_str) != 10: return False for char in commit_str: if char not in 'abcdef1234567890': return False return True if 'commits' not in project_json: project_json['commits'] = [] AppSettings.logger.info(f"Rebuilding commits list (currently {len(project_json['commits']):,}) for project.json…") commits:List[Dict[str,Any]] = [] no_job_id_count = 0 for ix, c in enumerate(project_json['commits']): AppSettings.logger.debug(f" Looking at {len(commits)}/ '{c['id']}'. Is current commit={c['id'] == commit_id}…") # if c['id'] == commit_id: # the old entry for the current commit id # Why did this code ever get in here in callback!!!! (Deletes pre-convert folder when it shouldn't!) # zip_file_key = f"preconvert/{current_commit['job_id']}.zip" # AppSettings.logger.info(f" Removing obsolete {prefix}pre-convert '{current_commit['type']}' '{commit_id}' {zip_file_key} …") # try: # clear_commit_directory_from_bucket(AppSettings.pre_convert_s3_handler(), zip_file_key) # except Exception as e: # AppSettings.logger.critical(f" Remove obsolete pre-convert zipfile threw an exception while attempted to delete '{zip_file_key}': {e}") # Not appended to commits here coz it happens below instead if c['id'] != commit_id: # a different commit from the current one if 'job_id' not in c: # Might be able to remove this eventually c['job_id'] = get_jobID_from_commit_buildLog(project_folder_key, ix, c['id']) # Returned job id might have been None if not c['job_id']: no_job_id_count += 1 if 'type' not in c: # Might be able to remove this eventually c['type'] = 'hash' if is_hash(c['id']) \ else 'artifact' if c['id']in ('latest','OhDear') \ else 'unknown' commits.append(c) if no_job_id_count > 10: len_commits = len(commits) AppSettings.logger.info(f"{no_job_id_count} job ids were unable to be found. Have {len_commits} historical commit{'' if len_commits==1 else 's'}.") commits.append(current_commit) cleaned_commits = remove_excess_commits(commits, repo_owner_username, repo_name) if len(cleaned_commits) < len(commits): # Then we removed some # Save a dated (coz this could happen more than once) backup of the project.json file save_project_filename = f"project.save.{datetime.utcnow().strftime('%Y-%m-%d_%H-%M-%S')}.json" save_project_filepath = os.path.join(output_dirpath, save_project_filename) write_file(save_project_filepath, project_json) save_project_json_key = f'{project_folder_key}{save_project_filename}' # Don't need to save this twice (March 2020) # AppSettings.cdn_s3_handler().upload_file(save_project_filepath, save_project_json_key, cache_time=100) AppSettings.door43_s3_handler().upload_file(save_project_filepath, save_project_json_key, cache_time=100) # Now save the updated project.json file in both places project_json['commits'] = cleaned_commits project_filepath = os.path.join(output_dirpath, 'project.json') write_file(project_filepath, project_json) AppSettings.cdn_s3_handler().upload_file(project_filepath, project_json_key, cache_time=1) AppSettings.door43_s3_handler().upload_file(project_filepath, project_json_key, cache_time=1)
def remove_excess_commits(commits_list:list, repo_owner_username:str, repo_name:str) -> List[Dict[str,Any]]: """ Given a list of commits (oldest first), remove the unnecessary ones from the list and DELETE THE files from S3! Written: Aug 2019 This was especially important as we moved from hash numbers to tag and branch names. NOTE: Gitea before 1.11 had a bug where it didn't always notify of deleted branches. Also, the dev- chain is not always enabled, so doesn't get all notifications anyway. So RJH added code in March 2020 to check for now non-existent branches. """ MIN_WANTED_COMMITS = 1 # Lowered from 2,400 to 500 20Dec19—not sure why ru_gl/ru_tq_2lv kept getting timeout errors MAX_ALLOWED_REMOVED_FOLDERS = 500 # Don't want to get job timeouts—typically can do 3500+ in 600s # at least project.json will slowly get smaller if we limit this. # Each commit hash to be deleted has three folders to remove. AppSettings.logger.debug(f"remove_excess_commits({len(commits_list)}={commits_list}, {repo_owner_username}, {repo_name})…") current_branch_names_list = get_current_branch_names_list(repo_owner_username, repo_name) current_tag_names_list = get_current_tag_names_list(repo_owner_username, repo_name) project_folder_key = f'u/{repo_owner_username}/{repo_name}/' new_commits:List[Dict[str,Any]] = [] removed_folder_count = 0 # Process it backwards in case we want to count how many we have as we go for n, commit in enumerate( reversed(commits_list) ): # if DELETE_ENABLED or len(new_commits) < MAX_DEBUG_DISPLAYS: # don't clutter logs too much AppSettings.logger.debug(f" Investigating {commit['type']} '{commit['id']}' commit (already have {len(new_commits)} — want min of {MIN_WANTED_COMMITS})") # elif len(new_commits) == MAX_DEBUG_DISPLAYS: # don't clutter logs too much # AppSettings.logger.debug(" Logging suppressed for remaining hashes…") deleted_flag = False if len(new_commits) >= MIN_WANTED_COMMITS \ and removed_folder_count < MAX_ALLOWED_REMOVED_FOLDERS: if commit['type'] in ('hash','artifact',): # but not 'unknown'—can delete old master branches # Delete the commit hash folders from both CDN and D43 buckets commit_key = f"{project_folder_key}{commit['id']}" AppSettings.logger.info(f" {n:,} Removing {prefix} CDN & D43 '{commit['type']}' '{commit['id']}' commits! …") # AppSettings.logger.info(f" {n:,} Removing {prefix}CDN '{commit['type']}' '{commit['id']}' commit! …") clear_commit_directory_from_bucket(AppSettings.cdn_s3_handler(), commit_key) removed_folder_count += 1 # AppSettings.logger.info(f" {n:,} Removing {prefix}D43 '{commit['type']}' '{commit['id']}' commit! …") clear_commit_directory_from_bucket(AppSettings.door43_s3_handler(), commit_key) removed_folder_count += 1 # Delete the pre-convert .zip file (available on Download button) from its bucket if commit['job_id']: zipFile_key = f"preconvert/{commit['job_id']}.zip" AppSettings.logger.info(f" {n:,} Removing {prefix}PreConvert '{commit['type']}' '{zipFile_key}' file! …") clear_commit_directory_from_bucket(AppSettings.pre_convert_s3_handler(), zipFile_key) removed_folder_count += 1 else: # don't know the job_id (or the zip file was already deleted) AppSettings.logger.warning(f" {n:,} No job_id so pre-convert zip file not deleted.") # Setup redirects (so users don't get 404 errors from old saved links) old_repo_key = f"{project_folder_key}{commit['id']}" latest_repo_key = f"/{project_folder_key}{new_commits[-1]['id']}" # Must start with / AppSettings.logger.info(f" {n:,} Redirecting {old_repo_key} and {old_repo_key}/index.html to {latest_repo_key} …") AppSettings.door43_s3_handler().redirect(key=old_repo_key, location=latest_repo_key) AppSettings.door43_s3_handler().redirect(key=f'{old_repo_key}/index.html', location=latest_repo_key) deleted_flag = True elif commit['type'] == 'branch' and current_branch_names_list: # Some branches may have been deleted without us being informed branch_name = commit['id'] AppSettings.logger.debug(f"Checking branch '{branch_name}' against {current_branch_names_list}…") if branch_name not in current_branch_names_list: commit_key = f"{project_folder_key}{commit['id']}" AppSettings.logger.info(f" {n:,} Removing {prefix} CDN & D43 '{branch_name}' branch! …") # AppSettings.logger.info(f" {n:,} Removing {prefix}CDN '{branch_name}' branch! …") clear_commit_directory_from_bucket(AppSettings.cdn_s3_handler(), commit_key) removed_folder_count += 1 # AppSettings.logger.info(f" {n:,} Removing {prefix}D43 '{branch_name}' branch! …") clear_commit_directory_from_bucket(AppSettings.door43_s3_handler(), commit_key) removed_folder_count += 1 # Delete the pre-convert .zip file (available on Download button) from its bucket if commit['job_id']: zipFile_key = f"preconvert/{commit['job_id']}.zip" AppSettings.logger.info(f" {n:,} Removing {prefix}PreConvert '{commit['type']}' '{zipFile_key}' file! …") clear_commit_directory_from_bucket(AppSettings.pre_convert_s3_handler(), zipFile_key) removed_folder_count += 1 else: # don't know the job_id (or the zip file was already deleted) AppSettings.logger.warning(f" {n:,} No job_id so pre-convert zip file not deleted.") # Setup redirects (so users don't get 404 errors from old saved links) old_repo_key = f"{project_folder_key}{branch_name}" latest_repo_key = f"/{project_folder_key}{new_commits[-1]['id']}" # Must start with / AppSettings.logger.info(f" {n:,} Redirecting {old_repo_key} and {old_repo_key}/index.html to {latest_repo_key} …") AppSettings.door43_s3_handler().redirect(key=old_repo_key, location=latest_repo_key) AppSettings.door43_s3_handler().redirect(key=f'{old_repo_key}/index.html', location=latest_repo_key) deleted_flag = True elif commit['type'] == 'tag' and current_tag_names_list: # Some branches may have been deleted without us being informed tag_name = commit['id'] AppSettings.logger.debug(f"Checking tag '{tag_name}' against {current_tag_names_list}…") if tag_name not in current_tag_names_list: commit_key = f"{project_folder_key}{commit['id']}" AppSettings.logger.info(f" {n:,} Removing {prefix} CDN & D43 '{tag_name}' release! …") # AppSettings.logger.info(f" {n:,} Removing {prefix}CDN '{tag_name}' release! …") clear_commit_directory_from_bucket(AppSettings.cdn_s3_handler(), commit_key) removed_folder_count += 1 # AppSettings.logger.info(f" {n:,} Removing {prefix}D43 '{tag_name}' release! …") clear_commit_directory_from_bucket(AppSettings.door43_s3_handler(), commit_key) removed_folder_count += 1 # Delete the pre-convert .zip file (available on Download button) from its bucket if commit['job_id']: zipFile_key = f"preconvert/{commit['job_id']}.zip" AppSettings.logger.info(f" {n:,} Removing {prefix}PreConvert '{commit['type']}' '{zipFile_key}' file! …") clear_commit_directory_from_bucket(AppSettings.pre_convert_s3_handler(), zipFile_key) removed_folder_count += 1 else: # don't know the job_id (or the zip file was already deleted) AppSettings.logger.warning(f" {n:,} No job_id so pre-convert zip file not deleted.") # Setup redirects (so users don't get 404 errors from old saved links) old_repo_key = f"{project_folder_key}{tag_name}" latest_repo_key = f"/{project_folder_key}{new_commits[-1]['id']}" # Must start with / AppSettings.logger.info(f" {n:,} Redirecting {old_repo_key} and {old_repo_key}/index.html to {latest_repo_key} …") AppSettings.door43_s3_handler().redirect(key=old_repo_key, location=latest_repo_key) AppSettings.door43_s3_handler().redirect(key=f'{old_repo_key}/index.html', location=latest_repo_key) deleted_flag = True if not deleted_flag: AppSettings.logger.debug(" Keeping this one.") new_commits.insert(0, commit) # Insert at beginning to get the order correct again if removed_folder_count > 9: len_new_commits = len(new_commits) AppSettings.logger.info(f"{removed_folder_count:,} commit folders deleted and redirected. (Returning {len_new_commits:,} commit{'' if len_new_commits==1 else 's'}).") return new_commits