def test_file_exists(self): """Ensure an error is raised when a file exists""" with TemporaryDirectory() as temp: fp=os.path.join(temp, "qwert.txt") with atomic_write(fp, "w") as f: f.write("qwert") assert os.path.exists(fp) try: with atomic_write(fp, "w") as f: f.write("1234") except FileExistsError as e: self.assertIsInstance(e, FileExistsError)
def save_followers(file: str, followers: list): # pragma: no cover """Saving followers list locally and counting new followers :param file: str filepath of the txt file :param followers: usernames list from get_followers :return: return the number of new followers :rtype: int """ new_followers = 0 if len(followers) == 0: print("No followers found.") return 0 elif os.path.exists(file): print("Updating followers.txt ...") with open(file, "a+") as f: for username in followers: if username not in f.read().strip().split(): f.write("%s\n" % username) new_followers += 1 f.close() else: with atomic_write(file, mode="w", as_file=True) as f: print("Creating followers.txt ...") for username in followers: f.write("%s\n" % username) new_followers += 1 return new_followers
def test_str_returned(self): """ 1. Test that a string is returned if the as_file argument is False. 2. Test that the object returned is not a string when as_file is True. """ with TemporaryDirectory() as tmp: fp = os.path.join(tmp, "asdf.txt") with atomic_write(fp, as_file=False) as file: self.assertEqual(type(file), str) with TemporaryDirectory() as tmp: fp = os.path.join(tmp, "asdf.txt") with atomic_write(fp, as_file=True) as file: self.assertNotEqual(type(file), str)
def excel_2_parquet(data): pd_df = pd.read_excel(data) df_name, df_ext = os.path.splitext(data) parquet_path = df_name + ".parquet" with atomic_write(parquet_path, mode="w", as_file=False) as f: pd_df.to_parquet(f) return parquet_path
def get_img(): # Find all elements containing img page_images = driver.find_elements_by_xpath("//img") # Check all images and getting their attribute sizes for img in page_images: sizes = img.get_attribute("sizes") # To find the post's image if sizes > "293px": # 293px is the size of the post preview # Any picture bigger than 293px is a post picture # So this will gives you only one url url = img.get_attribute("src") # Parsing the URL in order to get the filename path get_filename = urlparse(url).path # Image filename img_name = os.path.basename(get_filename) # Create folders to store the image os.makedirs("data/images", exist_ok=True) # Path where the image will be saved path = os.path.join(os.path.abspath("data"), "images/", img_name) # Making sure the file doesn't exist already if not os.path.exists(path): # Using package requests to check for any http issue like 4XX or 5XX errors headers = {} with requests.get(url, stream=True, headers=headers) as req: # Checking if request is successful (None = no error) if req.raise_for_status() is not None: # pragma: no cover print("Error: URL of picture is incorrect...") pass # Writing file atomically locally with atomic_write(path, as_file=False) as f: print("Saving image...") urlretrieve(url, filename=f) print("Image saved :)") else: # Pass if the picture is already present in the folder print("Picture already exists :(") # Returning path of the picture return path else: # pragma: no cover pass
def test_file_exists(self): """Ensure an error is raised when a file exists""" with TemporaryDirectory() as tmp: fp = os.path.join(tmp, "asdf.txt") with open(fp, "w") as f: f.write("test content") with self.assertRaises(FileExistsError): with atomic_write(fp, "w") as f: f.write("this shouldn't work")
def test_atomic_failure(self): """Ensure that file does not exist after failure during write""" with TemporaryDirectory() as tmp: fp = os.path.join(tmp, "abcd.txt") with self.assertRaises(FakeFileFailure): with atomic_write(fp, "w") as f: tmpfile = f.name assert os.path.exists(tmpfile) raise FakeFileFailure() assert not os.path.exists(tmpfile) assert not os.path.exists(fp)
def print_reports(self): """Method to print all the reports """ data = confusion_matrix(self.y_test, self.y_pred) df_cm = pd.DataFrame(data, columns=np.unique(self.names), index=np.unique(self.names)) df_cm.index.name = 'Actual' df_cm.columns.name = 'Predicted' plt.figure(figsize=(10, 8)) title1 = 'Confusion matrix for ' + self.model_name sns.set(font_scale=1.4) #for label size # suffix file with the timestamp timestr = datetime.now().strftime("%Y_%m_%d-%I_%M_%S_%p") suffix = '_confusion_map_' + timestr + '.png' fig_name = f'{self.result_path}{self.model_name}{suffix}' sns.heatmap(df_cm, cmap="viridis", annot=True, annot_kws={ "size": 16 }, fmt='d').set_title(title1) # save the classification heatmap plt.savefig(fig_name, dpi=300) print( classification_report(self.y_test, self.y_pred, target_names=self.names)) report = classification_report(self.y_test, self.y_pred, target_names=self.names, output_dict=True) # frame the dataframe object for the classification report df = pd.DataFrame(report).transpose() report_file = self.model_name + '_clf_report' + timestr + '.csv' filename = f'{self.result_path}{report_file}' # get the atomic writer setup if os.path.exists(filename) == False: # atomically write csv file by getting the path where to write it with atomic_write(filename, "w", False) as f: dir_path = f if self.test_flag == "No": new_file = f'{dir_path}/results/{report_file}' else: new_file = f'{dir_path}/test/results/{report_file}' df.to_csv(new_file) self.plot_clasf_map()
def test_atomic_write(self): """Ensure file exists after being written successfully""" with TemporaryDirectory() as tmp: fp = os.path.join(tmp, "abcd.txt") with atomic_write(fp, "w") as f: assert not os.path.exists(fp) tmpfile = f.name f.write("1234") assert not os.path.exists(tmpfile) assert os.path.exists(fp) with open(fp) as f: self.assertEqual(f.read(), "1234")
def test_file_exists(self): """Ensure an error is raised when a file exists""" # Create a new temporary file in a temporary directory with TemporaryDirectory() as tmp: fp = os.path.join(tmp, "asdf.txt") # Create a fake file with the same name in the same temp directory. with open(fp, "w") as existing_file: existing_file.write("I Exist") # Use assertRaises as a context manager testing if atomic_writer raises an exception when the file exists. with self.assertRaises(FileExistsError): # Try atomically writing to another file with the same name. with atomic_write(fp) as file: file.write("This file should not be written")
def test_atomic_failure(self): """Ensure that file does not exist after failure during write""" # Create a new temporary file in a temporary directory with TemporaryDirectory() as tmp: fp = os.path.join(tmp, "asdf.txt") # Create a fake failure. Temp file exists but before writing a FakeFailFailure() is raised. with self.assertRaises(FakeFileFailure): with atomic_write(fp, "w") as f: tmpfile = f.name assert os.path.exists(tmpfile) raise FakeFileFailure() # After the FakeFailFailure(), ensure that the temporary file path and permanent file path do not exist. assert not os.path.exists(tmpfile) assert not os.path.exists(fp)
def test_other_kwargs(self): """ Test that the atomic_write method can accept other keyword arguments. """ # Create a new temporary file in a temporary directory with TemporaryDirectory() as tmp: fp = os.path.join(tmp, "asdf.txt") # Write a character using UTF-8 encoding with atomic_write(fp, encoding='UTF-8') as file: first_ln = chr(57344) file.write(first_ln) # Read back what was written to the file and ensure it is in UTF-8. with open(fp, 'r') as opened_file: line_file = opened_file.read() self.assertEqual(line_file, '\ue000')
def get_vid(): # Find the first element containing video # The first element will always be the post's video page_video = driver.find_element_by_xpath("//video") # Getting the URL of the video # url = page_video.get_attribute("src") # On December 8th Instagram changed something here which affected src that we were getting # Due to this and for now we will only save the first frame of the video as jpg - speed up bot url = page_video.get_attribute("poster") # Parsing the URL in order to get the filename path get_filename = urlparse(url).path # Video filename filename = os.path.basename(get_filename) # Create folders to store the video os.makedirs("data/videos", exist_ok=True) # Path where the video will be saved path = os.path.join("data/videos/", filename) # Making sure the video doesn't exist already if not os.path.exists(path): # Using package requests to check for any http issue like 4XX or 5XX errors with requests.get(url, stream=True) as req: # Checking if request is successful (None = no error) if req.raise_for_status() is not None: # pragma: no cover print("Error: URL of video is incorrect :(") pass # Writing file atomically locally with atomic_write(path, as_file=False) as f: urlretrieve(url, filename=f) print("Video saved :)") # Returning the video path in order to use with other functions return path else: # Pass if the video is already present in the folder print("Video already exists :(") return path
def test_atomic_write(self): """Ensure file exists after being written successfully""" # Create a new temporary directory with TemporaryDirectory() as tmp: # Join the tmp file directory to the made up file name and store as a file path fp = os.path.join(tmp, "asdf.txt") # use the atomic_write method as a context manager. with atomic_write(fp, "w") as f: assert not os.path.exists(fp) tmpfile = f.name f.write("asdf") # After the atomic_write method finishes, make sure the tmpfile is removed. assert not os.path.exists(tmpfile) # Make sure that the non-temp file exists after the atomic_write context manager finishes assert os.path.exists(fp) # Check that the new atomically written file was written correctly. with open(fp) as f: self.assertEqual(f.read(), "asdf")
def dash_data(file: str, save_to: str): """ Get dashboard data, format to pd, saving relevant columns to a parquet file""" # Deleting old parquet file - This will be improved in future versions if os.path.exists(save_to): delete_file(save_to) # Copying locally the excel file to a parquet file # This file will be used to retrieve the data the user provided with atomic_write(save_to, mode="w", as_file=False) as f: print("Loading user data...") # Read dashboard excel sheets and merge them into one single pandas DataFrame df = pd.concat( pd.read_excel(file, sheet_name=["main", "comments"]), ignore_index=True, ) # Get only the columns object, tags, and accounts df = df[["object", "hashtags", "accounts", "comments"]] # Drop any row that has only NaNs in it df = df.dropna(axis=0, how="all") # Drop row 0 that contains the name of the columns (see on excel file row 14) # df = df.iloc[1:] # Convert to a parquet file format df.to_parquet(f)
def normalize_save_data(self): """Function to normalize and save the data in CSV file""" #print("===============================================\n") if self.suffix == "mat": arrIP, gt = self.mat.get_data() # get data from mat type file elif self.suffix == "npy": arrIP, gt = self.nmpy.get_data() # get data from mat type file else: print("Unsupported file type") raise AttributeError() #print("===================+++++++++================\n") # reshaping the data for the classification X = np.reshape(arrIP, (arrIP.shape[0] * arrIP.shape[1], arrIP.shape[2])) # Normalisation of data normalized_X = preprocessing.normalize(X) # converting numpy array to dataframe # please note that I am using normalized data to create the Data Frame which will be used # for further processing. df = pd.DataFrame(data=normalized_X) df_class = pd.DataFrame(data=gt.ravel()) df = pd.concat([df, df_class], axis=1) #Override the default initialized values of r1 and r2 using dict of object self.lis.__dict__['r1'] = 1 self.lis.__dict__['r2'] = arrIP.shape[ 2] # get the number of spectral bands list3 = self.lis.createfilelist() # set the names of the columns in the dataset df.columns = [f'band{i}' for i in list3] + ['classes'] # filename=f'{self.result_path}image_norm.parquet' # worked with cli # df.to_parquet(filename,engine='fastparquet',compression=None) # # filename2=f'{self.result_path}image_norm.csv' # df.to_csv(filename2) filename = f'{self.result_path}image_norm.parquet' # worked with cli if os.path.exists(filename) == False: # atomically write parquet file by getting the path where to write it with atomic_write(filename, "w", False) as f: dir_path = f if self.test_flag == "No": new_file = '{0}/results/image_norm.parquet'.format( str(dir_path)) # worked with cli else: new_file = '{0}/test/results/image_norm.parquet'.format( str(dir_path)) # worked with pytest df.to_parquet(new_file, engine='fastparquet', compression=None) filename2 = f'{self.result_path}image_norm.csv' if os.path.exists(filename2) == False: # atomically write csv file by getting the path where to write it with atomic_write(filename2, "w", False) as f: dir_path = f if self.test_flag == "No": new_file2 = '{0}/results/image_norm.csv'.format(str(dir_path)) else: new_file2 = '{0}/test/results/image_norm.csv'.format( str(dir_path)) df.to_csv(new_file2)