def create_directory(self, dir): ''' The temporary directory is used to compile data while fetching from Wordpress API, it is created upon initiation of the fetch_tags or fetch_posts functions The data directory is used to store formatted data after fetching from Wordpress API, it is created upon initiation of the fetch_tags or fetch_posts functions ''' path = None if dir == 'temp': path = f'{self.conversion_root}/{self.temp_path}' elif dir == 'data': path = f'{self.conversion_root}/{self.data_path}' if path: try: os.mkdir(path) if dir == 'temp': self.temp_dir_exists = True elif dir == 'data': self.data_dir_exists = True log_update(f'{dir.capitalize()} directory created') except FileExistsError: log_update(f'{dir.capitalize()} directory exists')
def check_posts_data(self): ''' Check if the posts data exists, if not, fetch the posts ''' posts_file = self.get_file_path('posts') file_exists = False recompile_posts = True if os.path.exists(posts_file): file_exists = True with open(posts_file, 'r') as json_posts, \ urllib.request.urlopen( self.fetch_posts_link + self.url_args, cafile=certifi.where()) as res: r = json_posts.read() temp_post_data = ast.literal_eval(r) if not file_exists or \ (recompile_posts and self.update_data): self.temp_post_data = [] self.fetch_posts() else: if file_exists: log_update('Loading "_data/data-posts.json"') else: log_update('Missing "_data/data-posts.json"')
def check_subscriber_data(self): ''' Make sure "_data/data-subscribers.csv" exists and convert to "data-score.json" ''' if os.path.exists(self.csv): self.convert_subscriber_csv() else: log_update(f'Missing CSV file') return None
def check_conversion_data(self): ''' Make sure "_data/tag-conversions.json" exists and convert to "data-conversion.json" ''' conversion_json_file = self.get_file_path('conversion') if os.path.exists(conversion_json_file): log_update(f'Loading "{self.conversion_file_json}"') else: self.convert_conversion_csv()
def check_score_data(self): ''' Make sure "_data/tag-scores.json" exists and convert to "data-score.json" ''' score_json_file = self.get_file_path('score') if os.path.exists(score_json_file): log_update(f'Loading "{self.score_file_json}"') else: self.convert_score_csv()
def convert_score_csv(self): ''' Convert google spreadsheet to a JSON file ''' log_update(f'Converting scores from Google Sheet to JSON') data = {} csv_file = self.google_client.open_by_key( self.google_sheet_key).worksheet('scores').get_all_values() csv_file.pop(0) for rows in csv_file: name = rows[0] data[name] = int(rows[1]) with open(self.get_file_path('score'), 'w') as json_file: json_file.write(json.dumps(data))
def check_posts_data(self): ''' Check if the posts data exists, if not, fetch the posts ''' posts_file = self.get_file_path('posts') file_exists = False recompile_posts = True if os.path.exists(posts_file): file_exists = True with open(posts_file, 'r') as json_posts, \ urllib.request.urlopen(self.fetch_posts_link, cafile=certifi.where()) as res: r = json_posts.read() temp_post_data = ast.literal_eval(r) if int(res.info().get('X-Wp-Total')) == len(temp_post_data): log_update('Posts have been compiled and are up to date') self.temp_post_data = temp_post_data recompile_posts = False if not file_exists or \ (recompile_posts and self.update_data): self.temp_post_data = [] self.fetch_posts() else: if file_exists: log_update('Loading "_data/data-posts.json"') else: log_update('Missing "_data/data-posts.json"')
def check_progress_data(self, reset_progress): ''' Make sure "_data/data-progress.json" exists ''' progress_file = self.get_file_path('progress') reset = [] if os.path.exists(progress_file): if reset_progress: log_update(f'Resetting "{self.progress_file_json}"') os.remove(progress_file) with open(progress_file, 'w') as outfile: outfile.write(json.dumps(reset)) else: log_update(f'Loading "{self.progress_file_json}"') else: with open(progress_file, 'w') as outfile: outfile.write(json.dumps(reset))
def remove_directory(self, dir): ''' The temporary directory is used to compile data while fetching from Wordpress API, it is deleted after running the convert_tags function ''' path = None if dir == 'temp': path = self.get_file_path('temp') if path and os.path.exists(path): try: shutil.rmtree(path) if dir == 'temp': self.temp_dir_exists = False log_update(f'{dir.capitalize()} directory removed') except OSError as e: log_update(f'Error creating temp directory: {e.filename} - {e.strerror}')
def convert_conversion_csv(self): ''' Convert google spreadsheet to a JSON file ''' log_update(f'Converting conversions from Google Sheet to JSON') data = {} csv_file = self.google_client.open_by_key( self.google_sheet_key).worksheet('conversions').get_all_values() csv_file.pop(0) for rows in csv_file: slug = rows[0] data[slug] = { "conversion1": rows[1], "conversion2": rows[2], "backend_tag": True if rows[3] == 'TRUE' else False, "franchise_tag": True if rows[4] == 'TRUE' else False } with open(self.get_file_path('conversion'), 'w') as json_file: json_file.write(json.dumps(data))
def convert_subscriber_csv(self): ''' Convert input to a JSON file ''' log_update(f'Converting CSV to JSON') data = {} with open(self.csv) as csv_file: csv_reader = csv.DictReader(csv_file) for row in csv_reader: subscriber = Subscriber(row) if not subscriber.is_valid: continue if subscriber.email in data: priors = data[subscriber.email] else: priors = None data[subscriber.email] = subscriber.compile_data(priors) data = [v for v in data.values()] with open(self.get_file_path('subscribers'), 'w') as json_file: json_file.write(json.dumps(data))
def run(self): ''' ''' self.data.check_input_files() ## Open data files self.list_posts = self.data.open_data_files('posts') ## Loop through all posts total_posts = len(self.list_posts) log_update( f'Starting conversion at {datetime.now().strftime("%H:%M:%S")}') index = 0 errored_posts = [] for post in self.list_posts: index += 1 post_id = int(post['id']) target = post['legacy_category'] if post_id in self.progress: continue if target != self.legacy_category: errored_posts.append({ 'ID': post_id, 'Reason': 'Legacy category doesn\'t match' }) continue try: data = { 'post': post_id, 'dev_tag': f'legacy-category-{target}' } headers = {'Content-Type': 'application/json'} r = requests.post(f'{self.target}/{self.post_request_path}', json=data, headers=headers) except (requests.exceptions.ConnectionError, requests.exceptions.Timeout) as e: post_request = None errored_posts.append({ 'ID': post_id, 'Reason': 'requests.exceptions.ConnectionError, requests.exceptions.Timeout' }) print( "requests.exceptions.ConnectionError, requests.exceptions.Timeout" ) print(e) pass except (ValueError, requests.exceptions.HTTPError, urllib.error.URLError) as e: post_request = None errored_posts.append({ 'ID': post_id, 'Reason': 'ValueError, requests.exceptions.HTTPError, urllib.error.URLError' }) print( "ValueError, requests.exceptions.HTTPError, urllib.error.URLError" ) print(e) pass except Exception as e: post_request = None errored_posts.append({'ID': post_id, 'Reason': 'Exception'}) print("Exception") print(e) pass log_progress_bar( index, total_posts, f'Updating post {post_id}: {index} of {total_posts}', 'All posts updated') self.update_progress_file(post_id) log_update( f'Conversion finished at {datetime.now().strftime("%H:%M:%S")}') if len(errored_posts): print('Errors occrued on posts:') for post in errored_posts: print(post) if self.data.temp_dir_exists: self.data.remove_directory('temp')
def run(self): ''' ''' log_update( f'Starting conversion at {datetime.now().strftime("%H:%M:%S")}') index = 0 for post in self.posts: index += 1 post_id = post[0] hero_tag = post[1] tag_1 = post[2] tag_2 = post[3] legacy_category = post[4] backend_tags = post[5] # if post_id != str(9431934): # continue if post_id in self.progress: continue categories = [] if hero_tag: categories.append(int(hero_tag)) if tag_1: categories.append(int(tag_1)) if tag_2: categories.append(int(tag_2)) hero_parent = self.list_categories[str( hero_tag)]['parent'] if hero_tag else 0 while hero_parent != 0: categories.append(hero_parent) parent = self.list_categories[str(hero_parent)]['parent'] hero_parent = parent for cat_id in self.list_categories: if self.list_categories[cat_id][ 'slug'] in legacy_category.split('|'): categories.append(int(cat_id)) try: data = { 'categories': categories, 'hero_tag': int(hero_tag) if hero_tag else '', 'tag_1': int(tag_1) if tag_1 else '', 'tag_2': int(tag_2) if tag_2 else '', 'backend_tag': backend_tags, 'legacy_category': legacy_category } headers = { 'Content-Type': 'application/json', 'Authorization': os.environ['ENCODED_LOGIN'] } r = requests.post( f'{self.target}/wp-json/wp/v2/posts/{post_id}?skip_apple_news=true', json=data, headers=headers) except (requests.exceptions.ConnectionError, requests.exceptions.Timeout) as e: post_request = None print("******************") print( "requests.exceptions.ConnectionError, requests.exceptions.Timeout" ) print(e) print("Errored post: " + post_id) pass except (ValueError, requests.exceptions.HTTPError, urllib.error.URLError) as e: post_request = None print("******************") print( "ValueError, requests.exceptions.HTTPError, urllib.error.URLError" ) print(e) print("Errored post: " + post_id) pass except Exception as e: post_request = None print("******************") print("Exception") print(e) print("Errored post: " + post_id) pass log_progress_bar( index, self.total_post_updates, f'Updating post {post_id}: {index} of {self.total_post_updates}', 'All posts updated') self.update_progress_file(post_id) log_update( f'Conversion finished at {datetime.now().strftime("%H:%M:%S")}')
def run(self): ''' ''' self.data.check_input_files() ## Open data files self.list_posts = self.data.open_data_files('posts') ## Loop through all posts total_posts = len(self.list_posts) log_update(f'Starting conversion at {datetime.now().strftime("%H:%M:%S")}') index = 0 errored_posts = [] for post in self.list_posts: index += 1 post_id = int(post['id']) target = int(post['target_backend_tag']) if post_id in self.progress: continue try: data = { 'post': post_id, 'tag': target } headers = { 'Content-Type': 'application/json' } r = requests.post(f'{self.target}/wp-json/wellandgood/v1/transfer-to-backend-tags', json=data, headers=headers) except (requests.exceptions.ConnectionError, requests.exceptions.Timeout) as e: post_request = None errored_posts.append(post_id) print("requests.exceptions.ConnectionError, requests.exceptions.Timeout") print(e) pass except (ValueError, requests.exceptions.HTTPError, urllib.error.URLError) as e: post_request = None errored_posts.append(post_id) print("ValueError, requests.exceptions.HTTPError, urllib.error.URLError") print(e) pass except Exception as e: post_request = None errored_posts.append(post_id) print("Exception") print(e) pass log_progress_bar( index, total_posts, f'Updating post {post_id}: {index} of {total_posts}', 'All posts updated') self.update_progress_file(post_id) log_update(f'Conversion finished at {datetime.now().strftime("%H:%M:%S")}') if len(errored_posts): print('Errors occrued on posts:') for post in errored_posts: print(post) if self.data.temp_dir_exists: self.data.remove_directory('temp')