def convert_subscribers(self): ''' ''' ## Check directories and data self.data.check_directories() self.data.check_subscriber_data() ## Open data files self.list_subscribers = self.open_data_files('subscribers') total_subscribers = len(self.list_subscribers) api_calls = math.ceil(total_subscribers / self.api_limit) per = math.ceil(total_subscribers / api_calls) for i in range(0, api_calls): start = per * i end = start + per data = json.dumps({'users': self.list_subscribers[start:end]}) response = requests.post(self.api_url, headers=self.headers, data=data) log_progress_bar(i + 1, api_calls, 'Sending data to Iterable', 'Subscriber conversion complete') ## Delete temporary directory if self.data.temp_dir_exists: self.data.remove_directory('temp')
def fetch_posts(self): ''' This is called from check_posts_data if the file 'conversion-data/posts-json.txt' doesn't exist ''' try: with urllib.request.urlopen(self.fetch_posts_link + self.url_args + str(self.iterations['posts']), cafile=certifi.where()) as res: data = res.read() total_pages = int(res.info().get('X-Wp-Totalpages')) except (urllib.error.HTTPError, urllib.error.URLError) as e: data = None if data: path = self.get_file_path('posts', True) file_ = open(path, 'wb') file_.write(data) file_.close() with open(path) as json_file: json_data = json.load(json_file) else: json_data = None if json_data: for n in json_data: if self.date_limit \ and dateutil.parser.parse(n['date']) < self.date_limit: break self.temp_post_data.append({ "id": n['id'], "link": n['link'], "tags": n['tags'], "category": n['categories'] }) log_progress_bar(self.iterations['posts'], total_pages, 'Fetching post data', 'Posts successfully fetched') self.iterations['posts'] += 1 self.fetch_posts() else: path = self.get_file_path('posts') file_ = open(path, 'w') file_.write(json.dumps(self.temp_post_data)) file_.close()
def fetch_categories(self): ''' This is called from check_categories_data if the file 'conversion-data/categories-json.txt' doesn't exist ''' try: with urllib.request.urlopen(self.fetch_categories_link + self.url_args + str(self.iterations['categories']), cafile=certifi.where()) as res: data = res.read() total_pages = int(res.info().get('X-Wp-Totalpages')) except (urllib.error.HTTPError, urllib.error.URLError) as e: data = None if data: path = self.get_file_path('categories', True) file_ = open(path, 'wb') file_.write(data) file_.close() with open(path) as json_file: json_data = json.load(json_file) else: json_data = None if json_data: for n in json_data: self.temp_categories_data[n['id']] = { "name": n['name'], "slug": n['slug'], "parent": n['parent'] } log_progress_bar(self.iterations['categories'], total_pages, 'Fetching categories data', 'Categories successfully fetched') self.iterations['categories'] += 1 self.fetch_categories() else: path = self.get_file_path('categories') file_ = open(path, 'w') file_.write(json.dumps(self.temp_categories_data)) file_.close()
def run(self): ''' ''' self.data.check_input_files() ## Open data files self.list_posts = self.data.open_data_files('posts') ## Loop through all posts total_posts = len(self.list_posts) log_update( f'Starting conversion at {datetime.now().strftime("%H:%M:%S")}') index = 0 errored_posts = [] for post in self.list_posts: index += 1 post_id = int(post['id']) target = post['legacy_category'] if post_id in self.progress: continue if target != self.legacy_category: errored_posts.append({ 'ID': post_id, 'Reason': 'Legacy category doesn\'t match' }) continue try: data = { 'post': post_id, 'dev_tag': f'legacy-category-{target}' } headers = {'Content-Type': 'application/json'} r = requests.post(f'{self.target}/{self.post_request_path}', json=data, headers=headers) except (requests.exceptions.ConnectionError, requests.exceptions.Timeout) as e: post_request = None errored_posts.append({ 'ID': post_id, 'Reason': 'requests.exceptions.ConnectionError, requests.exceptions.Timeout' }) print( "requests.exceptions.ConnectionError, requests.exceptions.Timeout" ) print(e) pass except (ValueError, requests.exceptions.HTTPError, urllib.error.URLError) as e: post_request = None errored_posts.append({ 'ID': post_id, 'Reason': 'ValueError, requests.exceptions.HTTPError, urllib.error.URLError' }) print( "ValueError, requests.exceptions.HTTPError, urllib.error.URLError" ) print(e) pass except Exception as e: post_request = None errored_posts.append({'ID': post_id, 'Reason': 'Exception'}) print("Exception") print(e) pass log_progress_bar( index, total_posts, f'Updating post {post_id}: {index} of {total_posts}', 'All posts updated') self.update_progress_file(post_id) log_update( f'Conversion finished at {datetime.now().strftime("%H:%M:%S")}') if len(errored_posts): print('Errors occrued on posts:') for post in errored_posts: print(post) if self.data.temp_dir_exists: self.data.remove_directory('temp')
def convert_results(self, results_file, dev_file): ''' ''' data = [] with open(f'{self.results_path}/{results_file}') as csv_file: csv_reader = csv.DictReader(csv_file) for index, row in enumerate(csv_reader, start=1): post_link = row['Link'] \ if row.get('Link') != None else '' hero_tag = row['Hero Tag'] \ if row.get('Hero Tag') != None else '' tag_1 = row['Tag 1'] \ if row.get('Tag 1') != None else '' tag_2 = row['Tag 2'] \ if row.get('Tag 2') != None else '' backend_tags = row['Backend Tags'] \ if row.get('Backend Tags') != None else '' post_id = row['post_id'] \ if row.get('post_id') != None else '' legacy_categories = row['Legacy Category'] \ if row.get('Legacy Category') != None else '' post_slug = urllib.parse.urlparse(post_link).path[:-1].split( '/')[-1] conv_hero_tag = '' conv_tag_1 = '' conv_tag_2 = '' for key, category in self.list_categories.items(): if hero_tag == category['name']: conv_hero_tag = key if tag_1 == category['name']: conv_tag_1 = key if tag_2 == category['name']: conv_tag_2 = key if conv_hero_tag and conv_tag_1 and conv_tag_2: break split_backend_tags = backend_tags.split( ', ') if backend_tags else None conv_backend_tags = '|'.join( split_backend_tags) if split_backend_tags else '' errors = [] if post_id and legacy_categories: conv_post_id = post_id conv_legacy_category = legacy_categories else: try: post_request = requests.get( f'{self.target}/wp-json/wp/v2/posts/?slug={post_slug}' ).json() except (requests.exceptions.ConnectionError, requests.exceptions.Timeout) as e: post_request = None errors.append("ConnectionError/Timeout") pass except (ValueError, requests.exceptions.HTTPError, urllib.error.URLError) as e: post_request = None errors.append("HTTPError/URLError") pass except Exception as e: post_request = None errors.append("exception") pass if post_id: conv_post_id = post_id else: conv_post_id = post_request[0][ 'id'] if post_request and len( post_request) > 0 and post_request[0] else None if legacy_categories: conv_legacy_category = legacy_categories else: legacy_category_matches = [] legacy_category = post_request[0][ 'categories'] if post_request and len( post_request) > 0 and post_request[0] else [] matches = [ 'good-advice', 'good-food', 'good-home', 'good-looks', 'good-sweat', 'good-travel' ] for legacy_category_id in legacy_category: if self.list_categories[str( legacy_category_id)]['slug'] in matches: legacy_category_matches.append( self.list_categories[str( legacy_category_id)]['slug']) conv_legacy_category = '|'.join( legacy_category_matches) if conv_post_id: data.append([ conv_post_id, conv_hero_tag, conv_tag_1, conv_tag_2, conv_legacy_category, conv_backend_tags ]) progress_message = f'Updating CSV post {conv_post_id}: {index} of {self.total_post_conversions}' if len( errors ) == 0 else f'Updating CSV post {conv_post_id}: {index} of {self.total_post_conversions}' success_message = 'All CSV posts successfully converted' if len( errors ) == 0 else f'CSV posts converted with {len(errors)} errors' log_progress_bar(index, self.total_post_conversions, progress_message, success_message) with open(f'{self.results_path}/{dev_file}', 'w') as new_dev_file: dev_writer = csv.writer(new_dev_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) dev_writer.writerow([ 'post_id', 'hero_tag', 'tag_1', 'tag_2', 'legacy_category', 'backend_tag' ]) for entry in data: dev_writer.writerow(entry)
def run(self): ''' ''' log_update( f'Starting conversion at {datetime.now().strftime("%H:%M:%S")}') index = 0 for post in self.posts: index += 1 post_id = post[0] hero_tag = post[1] tag_1 = post[2] tag_2 = post[3] legacy_category = post[4] backend_tags = post[5] # if post_id != str(9431934): # continue if post_id in self.progress: continue categories = [] if hero_tag: categories.append(int(hero_tag)) if tag_1: categories.append(int(tag_1)) if tag_2: categories.append(int(tag_2)) hero_parent = self.list_categories[str( hero_tag)]['parent'] if hero_tag else 0 while hero_parent != 0: categories.append(hero_parent) parent = self.list_categories[str(hero_parent)]['parent'] hero_parent = parent for cat_id in self.list_categories: if self.list_categories[cat_id][ 'slug'] in legacy_category.split('|'): categories.append(int(cat_id)) try: data = { 'categories': categories, 'hero_tag': int(hero_tag) if hero_tag else '', 'tag_1': int(tag_1) if tag_1 else '', 'tag_2': int(tag_2) if tag_2 else '', 'backend_tag': backend_tags, 'legacy_category': legacy_category } headers = { 'Content-Type': 'application/json', 'Authorization': os.environ['ENCODED_LOGIN'] } r = requests.post( f'{self.target}/wp-json/wp/v2/posts/{post_id}?skip_apple_news=true', json=data, headers=headers) except (requests.exceptions.ConnectionError, requests.exceptions.Timeout) as e: post_request = None print("******************") print( "requests.exceptions.ConnectionError, requests.exceptions.Timeout" ) print(e) print("Errored post: " + post_id) pass except (ValueError, requests.exceptions.HTTPError, urllib.error.URLError) as e: post_request = None print("******************") print( "ValueError, requests.exceptions.HTTPError, urllib.error.URLError" ) print(e) print("Errored post: " + post_id) pass except Exception as e: post_request = None print("******************") print("Exception") print(e) print("Errored post: " + post_id) pass log_progress_bar( index, self.total_post_updates, f'Updating post {post_id}: {index} of {self.total_post_updates}', 'All posts updated') self.update_progress_file(post_id) log_update( f'Conversion finished at {datetime.now().strftime("%H:%M:%S")}')
def run(self): ''' ''' self.data.check_input_files() ## Open data files self.list_posts = self.data.open_data_files('posts') ## Loop through all posts total_posts = len(self.list_posts) log_update(f'Starting conversion at {datetime.now().strftime("%H:%M:%S")}') index = 0 errored_posts = [] for post in self.list_posts: index += 1 post_id = int(post['id']) target = int(post['target_backend_tag']) if post_id in self.progress: continue try: data = { 'post': post_id, 'tag': target } headers = { 'Content-Type': 'application/json' } r = requests.post(f'{self.target}/wp-json/wellandgood/v1/transfer-to-backend-tags', json=data, headers=headers) except (requests.exceptions.ConnectionError, requests.exceptions.Timeout) as e: post_request = None errored_posts.append(post_id) print("requests.exceptions.ConnectionError, requests.exceptions.Timeout") print(e) pass except (ValueError, requests.exceptions.HTTPError, urllib.error.URLError) as e: post_request = None errored_posts.append(post_id) print("ValueError, requests.exceptions.HTTPError, urllib.error.URLError") print(e) pass except Exception as e: post_request = None errored_posts.append(post_id) print("Exception") print(e) pass log_progress_bar( index, total_posts, f'Updating post {post_id}: {index} of {total_posts}', 'All posts updated') self.update_progress_file(post_id) log_update(f'Conversion finished at {datetime.now().strftime("%H:%M:%S")}') if len(errored_posts): print('Errors occrued on posts:') for post in errored_posts: print(post) if self.data.temp_dir_exists: self.data.remove_directory('temp')