def check_images(self, exclude=None): """ Lists all the images not in https://zen-marketing-documentation.s3.amazonaws.com/docs/en. Ask writer to fix in source and html files. :param exclude: list of article ids that use English images in localized versions :return: boolean """ self.verify_handoff_exists() file_path = os.path.join(self.handoff_folder, 'articles') files = glob.glob(os.path.join(file_path, '*.html')) missing_images = '' for file in files: if exclude and get_id_from_filename(file) in exclude: # or? continue tree = create_tree(file) images = tree.find_all('img') for image in images: src = image['src'] if 'zen-marketing-documentation' in src or 'embed-ssl.wistia.com' in src: continue missing_images += '* in {} -> {}\n'.format( os.path.basename(file), src) if missing_images: print('The following images are not on S3:') print(missing_images) else: print('All image links point to S3!\n')
def update_srcs(self, locales=None, write=True): """ Updates links to S3 images in the tree. Verifies that link is pointing to a localized image on S3 before updating it. :param locales: :param write: :return: """ self.verify_handoff_exists(localized=True) registry = self.read_registry(self.image_registry) if locales is None: # if locales not specified, use all locales locales = self.locales for locale in locales: files = self.get_localized_files(locale) for file in files: # /Users/cnadeau/production/handoffs/2016-11-18/chat/localized/ print('\nOpening {}{} for srcs'.format(locale, file.split(locale.upper())[1])) tree = create_tree(file) img_links = tree.find_all('img', src=re.compile('/docs/en/')) for link in img_links: base_name = os.path.basename(link['src']) if base_name not in registry[locale]: print(' - Not localized on S3 - {}'.format(link['src'])) continue if locale == 'pt-br': link['src'] = re.sub(r'docs/en', 'docs/{}'.format('pt'), link['src']) else: link['src'] = re.sub(r'docs/en', 'docs/{}'.format(locale), link['src']) print(' - Updated src - {}'.format(link['src'])) if write: rewrite_file(file, tree)
def check_images(self, exclude=None): """ Lists all the images not in https://zen-marketing-documentation.s3.amazonaws.com/docs/en. Ask writer to fix in source and html files. :param exclude: list of article ids that use English images in localized versions :return: boolean """ self.verify_handoff_exists() file_path = os.path.join(self.handoff_folder, 'articles') files = glob.glob(os.path.join(file_path, '*.html')) missing_images = '' for file in files: if exclude and get_id_from_filename(file) in exclude: # or? continue tree = create_tree(file) images = tree.find_all('img') for image in images: src = image['src'] if 'zen-marketing-documentation' in src or 'embed-ssl.wistia.com' in src: continue missing_images += '* in {} -> {}\n'.format(os.path.basename(file), src) if missing_images: print('The following images are not on S3:') print(missing_images) else: print('All image links point to S3!\n')
def update_hrefs(self, locales=None, write=True): """ Updates links to HC articles in the tree. Verifies that link is pointing to a localized HC article before updating it. :param locales: tuple :param write: boolean :return: nothing """ self.verify_handoff_exists(localized=True) registry = self.read_registry(self.article_registry) if locales is None: # if locales not specified, use all locales locales = self.locales for locale in locales: files = self.get_localized_files(locale) for file in files: print('\nOpening {}{} for hrefs'.format( locale, file.split(locale.upper())[1])) tree = create_tree(file) links = tree.find_all('a', href=re.compile('/hc/en-us/')) for link in links: if '/community/posts/' in link[ 'href']: # exclude links to community posts continue if '/article_attachments/' in link[ 'href']: # exclude in-text attachment links continue if '/#topic' in link['href']: link['href'] = link['href'].split('/#topic')[ 0] # /hc/fr/articles/206544348#topic_etm_2qc_5s base_name = os.path.basename(link['href']).replace( '#', '-').split('-')[0] if '%' in base_name: base_name = base_name.split('%')[0] if '?' in base_name: base_name = base_name.split('?')[0] try: int(base_name) except ValueError: print(' - Link is not an id number - {}'.format( base_name)) continue if int(base_name) not in registry[ locale]: # does link have a known loc target? print(' - Not localized in HC - {}'.format( link['href'])) continue link['href'] = re.sub(r'hc/en-us', 'hc/{}'.format(locale), link['href']) print(' - Updated xref - {}'.format(link['href'])) if write: rewrite_file(file, tree)
def copy_images(self, months=4, exclude=None): """ Parses articles in handoff package and copies recently updated en-us images from s3 folder to handoff folder. :param months: number of months when en-us images were last updated :param exclude: list of article ids that use English images in localized versions :return: """ self.verify_handoff_exists() # Create images folder in handoff folder images_path = os.path.join(self.handoff_folder, 'images') images_path_fullsize = os.path.join(images_path, 'fullsize') images_path_resized = os.path.join(images_path, 'resized') if not os.path.exists(images_path): os.makedirs(images_path_fullsize) os.makedirs(images_path_resized) # Get fnames of images in s3 folder that were updated in the last x months src_images = [] now = arrow.utcnow() files = glob.glob(os.path.join(self.image_path, '*.*')) for file in files: modified_at = arrow.get(os.path.getmtime(file)) if modified_at > now.replace(months=-months): src_images.append(os.path.basename(file)) # Parse each article in handoff_folder and move recently updated images to handoff package file_path = os.path.join(self.handoff_folder, 'articles') files = glob.glob(os.path.join(file_path, '*.html')) copied = [] skipped = [] for file in files: if exclude and get_id_from_filename(file) in exclude: # or? continue tree = create_tree(file) images = tree.find_all('img') for image in images: image_name = os.path.basename(image['src']) if image_name in src_images: if image_name not in copied: src_image_path = os.path.join(self.image_path, image_name) if 'width' in image.attrs: shutil.copy(src_image_path, images_path_fullsize) else: shutil.copy(src_image_path, images_path_resized) copied.append(image_name) else: if image_name not in skipped: skipped.append(image_name) print( '\nImages copied (on s3 and updated in the last {} months): \n{}'. format(months, '\n'.join(copied))) print('\nImages not copied: \n{}'.format('\n'.join(skipped)))
def copy_images(self, months=4, exclude=None): """ Parses articles in handoff package and copies recently updated en-us images from s3 folder to handoff folder. :param months: number of months when en-us images were last updated :param exclude: list of article ids that use English images in localized versions :return: """ self.verify_handoff_exists() # Create images folder in handoff folder images_path = os.path.join(self.handoff_folder, 'images') images_path_fullsize = os.path.join(images_path, 'fullsize') images_path_resized = os.path.join(images_path, 'resized') if not os.path.exists(images_path): os.makedirs(images_path_fullsize) os.makedirs(images_path_resized) # Get fnames of images in s3 folder that were updated in the last x months src_images = [] now = arrow.utcnow() files = glob.glob(os.path.join(self.image_path, '*.*')) for file in files: modified_at = arrow.get(os.path.getmtime(file)) if modified_at > now.replace(months=-months): src_images.append(os.path.basename(file)) # Parse each article in handoff_folder and move recently updated images to handoff package file_path = os.path.join(self.handoff_folder, 'articles') files = glob.glob(os.path.join(file_path, '*.html')) copied = [] skipped = [] for file in files: if exclude and get_id_from_filename(file) in exclude: # or? continue tree = create_tree(file) images = tree.find_all('img') for image in images: image_name = os.path.basename(image['src']) if image_name in src_images: if image_name not in copied: src_image_path = os.path.join(self.image_path, image_name) if 'width' in image.attrs: shutil.copy(src_image_path, images_path_fullsize) else: shutil.copy(src_image_path, images_path_resized) copied.append(image_name) else: if image_name not in skipped: skipped.append(image_name) print('\nImages copied (on s3 and updated in the last {} months): \n{}'.format(months, '\n'.join(copied))) print('\nImages not copied: \n{}'.format('\n'.join(skipped)))
def publish_handoff(self, locales=None, write=True): self.verify_handoff_exists(localized=True) kb = KB(self.product) if locales is None: # if locales not specified, use all locales locales = self.locales for locale in locales: print('\nPushing \'{}\' translations ...\n'.format(locale)) files = self.get_localized_files(locale) for file in files: article_id = get_id_from_filename(file) print('Publishing {}...'.format(article_id)) if article_id == 203661746: # if glossary, paste in HC by hand print('Glossary, 203661746, skipped. Enter manually.') continue # # if included in a later loc handoff that has since been delivered, skip # later_handoff = [115012184908, 115014797387, 203664326, 115012399428, 115014417408, 115012258168, # 231747367, 115012794168, 115014810447] # if article_id in later_handoff: # print(f'{article_id} skipped. Was delivered in a later handoff.') # continue # # id changes since handoff # changes = {235723507: 203664366, 235651328: 216207658, 235721887: 224858627} # if article_id in changes: # article_id = changes[article_id] missing_translations = kb.get_missing_translations(article_id) if missing_translations is None: print('Error getting missing translations for article {}. Exiting'.format(article_id)) exit() if locale in missing_translations: # get http method to use for article http_method = 'post' else: http_method = 'put' tree = create_tree(file) title = ' '.join(tree.h1.stripped_strings) tree.h1.decompose() if http_method == 'post': data = {'translation': {'locale': locale, 'title': title, 'body': str(tree), 'draft': False}} if write: print(f'- posting {article_id}') kb.post_translation(article_id, payload=data) else: data = {'translation': {'title': title, 'body': str(tree), 'draft': False}} if write: print(f'- putting {article_id}') kb.put_translation(article_id, locale, payload=data)
def update_post_from_file(self, post_id, filename): endpoint = '/posts/{}.json'.format(post_id) url = self.community_root + endpoint file = os.path.join(self.staging, 'posts', filename) tree = create_tree(file) if tree.h1.string is None: print('ERROR: title h1 problem in {} (extra inner tags, etc)'.format(filename)) exit() title = tree.h1.string.strip() tree.h1.decompose() data = {'post': {'title': title, 'details': str(tree)}} response = self.put_record(url, payload=data) if response: print('Successfully updated post {}'.format(post_id))
def update_hrefs(self, locales=None, write=True): """ Updates links to HC articles in the tree. Verifies that link is pointing to a localized HC article before updating it. :param locales: tuple :param write: boolean :return: nothing """ self.verify_handoff_exists(localized=True) registry = self.read_registry(self.article_registry) if locales is None: # if locales not specified, use all locales locales = self.locales for locale in locales: files = self.get_localized_files(locale) for file in files: print('\nOpening {}{} for hrefs'.format(locale, file.split(locale.upper())[1])) tree = create_tree(file) links = tree.find_all('a', href=re.compile('/hc/en-us/')) for link in links: if '/community/posts/' in link['href']: # exclude links to community posts continue if '/article_attachments/' in link['href']: # exclude in-text attachment links continue if '/#topic' in link['href']: link['href'] = link['href'].split('/#topic')[0] # /hc/fr/articles/206544348#topic_etm_2qc_5s base_name = os.path.basename(link['href']).replace('#', '-').split('-')[0] if '%' in base_name: base_name = base_name.split('%')[0] if '?' in base_name: base_name = base_name.split('?')[0] try: int(base_name) except ValueError: print(' - Link is not an id number - {}'.format(base_name)) continue if int(base_name) not in registry[locale]: # does link have a known loc target? print(' - Not localized in HC - {}'.format(link['href'])) continue link['href'] = re.sub(r'hc/en-us', 'hc/{}'.format(locale), link['href']) print(' - Updated xref - {}'.format(link['href'])) if write: rewrite_file(file, tree)
def strip_comments(self, write=True): self.verify_handoff_exists() print('\n') file_path = os.path.join(self.handoff_folder, 'articles') files = glob.glob(os.path.join(file_path, '*.html')) for file in files: if get_id_from_filename(file) == 203661526: # voice pricing article - don't modify this file continue tree = create_tree(file) count = 0 comments = tree.find_all(text=lambda text: isinstance(text, Comment)) for comment in comments: comment.extract() count += 1 if write and count > 0: rewrite_file(file, tree) print('Stripped {} comments in {}'.format(count, os.path.basename(file)))
def push_staged(self, folder, locale='en-us', write=False): """ Pushes id-named html files in staging folder specified in tools.ini :param folder: Folder in the staging folder containing the files :param locale: Default :param write: Actually put the files. :return: """ path = os.path.join(os.sep, *self.settings['staging'].split('/'), folder) if not os.path.exists(path): print('Folder named "{}" not found in the staging folder.'.format(folder)) exit() files = glob.glob(os.path.join(path, '*.html')) for file in files: article_id = get_id_from_filename(file) if article_id == 203661746: print('Glossary, 203661746, skipped. Enter manually.') continue missing_translations = self.get_missing_translations(article_id) if missing_translations is None: print('Error getting missing translations for article {}. Exiting'.format(article_id)) exit() if locale in missing_translations: # get http method to use for article http_method = 'post' else: http_method = 'put' tree = create_tree(file) title = ' '.join(tree.h1.stripped_strings) tree.h1.decompose() if http_method == 'post': data = {'translation': {'locale': locale, 'title': title, 'body': str(tree.body), 'draft': False}} if write: print(f'Posting translation {article_id}: {title}') self.post_translation(article_id, payload=data) else: data = {'translation': {'title': title, 'body': str(tree), 'draft': False}} if write: print(f'Putting translation {article_id}: {title}') self.put_translation(article_id, locale, payload=data)
def update_srcs(self, locales=None, write=True): """ Updates links to S3 images in the tree. Verifies that link is pointing to a localized image on S3 before updating it. :param locales: :param write: :return: """ self.verify_handoff_exists(localized=True) registry = self.read_registry(self.image_registry) if locales is None: # if locales not specified, use all locales locales = self.locales for locale in locales: files = self.get_localized_files(locale) for file in files: # /Users/cnadeau/production/handoffs/2016-11-18/chat/localized/ print('\nOpening {}{} for srcs'.format( locale, file.split(locale.upper())[1])) tree = create_tree(file) img_links = tree.find_all('img', src=re.compile('/docs/en/')) for link in img_links: base_name = os.path.basename(link['src']) if base_name not in registry[locale]: print(' - Not localized on S3 - {}'.format( link['src'])) continue if locale == 'pt-br': link['src'] = re.sub(r'docs/en', 'docs/{}'.format('pt'), link['src']) else: link['src'] = re.sub(r'docs/en', 'docs/{}'.format(locale), link['src']) print(' - Updated src - {}'.format(link['src'])) if write: rewrite_file(file, tree)
def take_image_inventory(self, backup_folder): """ Reads the files written to a backup folder and create a JSON inventory of all images in the files. Prerequisites: 1. Take an inventory of articles for the HC or the category (e.g., agent or admin categories in Support). See take_inventory() method. 2. Write the files in the article inventory to the backups folder. See backup_inventory() method. 3. Pass the backup folder name to this method. :param backup_folder: Folder in backups containing the article files :return: None """ image_list = [] folder_path = os.path.join(os.sep, *self.settings['backups'].split('/'), backup_folder) files = glob.glob(os.path.join(folder_path, '*.html')) for file in files: tree = create_tree(file) img_links = tree('img') for image in img_links: base_name = os.path.basename(image['src']) image_list.append(base_name) image_list = list(set(image_list)) # a set is not JSON-serializable write_to_json(image_list, self.product_cache, f'kb_image_inventory_{backup_folder}.json')
def strip_comments(self, write=True): self.verify_handoff_exists() print('\n') file_path = os.path.join(self.handoff_folder, 'articles') files = glob.glob(os.path.join(file_path, '*.html')) for file in files: if get_id_from_filename( file ) == 203661526: # voice pricing article - don't modify this file continue tree = create_tree(file) count = 0 comments = tree.find_all( text=lambda text: isinstance(text, Comment)) for comment in comments: comment.extract() count += 1 if write and count > 0: rewrite_file(file, tree) print('Stripped {} comments in {}'.format(count, os.path.basename(file)))
def push_transformed_articles(self, folder, write=True): """ Pushes dita-transformed html files to the appropriate KBs. Reads the ditamap.yml file in the /production folder on the Documentation Team Drive. Looks for the transformed files in the production/staging/transformed_files folder. :param folder: The name of folder in /production/staging/transformed_files that contains the transformed files :param write: Boolean. If true, write changes to Help Centers :return: None, but prints the push results in json and xlsx files in /production/reports/publishing """ with open(self.dita_map, 'r') as f: dita_map = yaml.load(f) files_path = os.path.join(os.sep, *self.settings['staging'].split('/'), 'transformed_files', folder) files = glob.glob(os.path.join(files_path, '*.html')) results = [] for file in files: dita_name = get_dita_name_from_filename(file) mapping_exists = False for mapping in dita_map: if mapping['dita'] == dita_name: mapping_exists = True break if mapping_exists: print('Updating \"{}\" ({}) in {} kb'.format(mapping['dita'], mapping['id'], mapping['hc'])) tree = create_tree(file) if tree.h1.string is None: print('ERROR: title h1 problem in {} (extra inner tags, etc)'.format(mapping['dita'])) results.append( {'dita': mapping['dita'], 'id': mapping['id'], 'hc': mapping['hc'], 'pushed': False, 'notes': 'h1 problem'}) continue title = tree.h1.string.strip() tree.h1.decompose() body = tree.body data = {'translation': {'title': title, 'body': str(body)}} hc_root = 'https://{}.zendesk.com/api/v2/help_center'.format(mapping['hc']) endpoint = '/articles/{}/translations/en-us.json'.format(mapping['id']) url = hc_root + endpoint print(url) if write: response = self.put_record(url=url, payload=data) else: print('Testing mode!') response = 1 if response is not None: results.append( {'dita': mapping['dita'], 'id': mapping['id'], 'hc': mapping['hc'], 'pushed': True, 'notes': ''}) else: results.append( {'dita': mapping['dita'], 'id': mapping['id'], 'hc': mapping['hc'], 'pushed': False, 'notes': 'request error'}) else: print('Skipping \"{}\" in {} kb because has no mapping'.format(mapping['dita'], mapping['hc'])) results.append( {'dita': dita_name, 'id': None, 'hc': None, 'pushed': False, 'notes': 'no mapping'}) now = arrow.now('US/Pacific') report_name = 'push_results_{}'.format(now.format('YYYY-MM-DD')) reports_path = os.path.join(self.shared_folder, 'reports', self.subdomain, 'publishing') write_to_json(results, reports_path, '{}.json'.format(report_name)) write_to_excel(results, reports_path, '{}.xlsx'.format(report_name))
def publish_handoff(self, locales=None, write=True): self.verify_handoff_exists(localized=True) kb = KB(self.product) if locales is None: # if locales not specified, use all locales locales = self.locales for locale in locales: print('\nPushing \'{}\' translations ...\n'.format(locale)) files = self.get_localized_files(locale) for file in files: article_id = get_id_from_filename(file) print('Publishing {}...'.format(article_id)) if article_id == 203661746: # if glossary, paste in HC by hand print('Glossary, 203661746, skipped. Enter manually.') continue # # if included in a later loc handoff that has since been delivered, skip # later_handoff = [115012184908, 115014797387, 203664326, 115012399428, 115014417408, 115012258168, # 231747367, 115012794168, 115014810447] # if article_id in later_handoff: # print(f'{article_id} skipped. Was delivered in a later handoff.') # continue # # id changes since handoff # changes = {235723507: 203664366, 235651328: 216207658, 235721887: 224858627} # if article_id in changes: # article_id = changes[article_id] missing_translations = kb.get_missing_translations(article_id) if missing_translations is None: print( 'Error getting missing translations for article {}. Exiting' .format(article_id)) exit() if locale in missing_translations: # get http method to use for article http_method = 'post' else: http_method = 'put' tree = create_tree(file) title = ' '.join(tree.h1.stripped_strings) tree.h1.decompose() if http_method == 'post': data = { 'translation': { 'locale': locale, 'title': title, 'body': str(tree), 'draft': False } } if write: print(f'- posting {article_id}') kb.post_translation(article_id, payload=data) else: data = { 'translation': { 'title': title, 'body': str(tree), 'draft': False } } if write: print(f'- putting {article_id}') kb.put_translation(article_id, locale, payload=data)