def load_connectivity_mx(self): """ This function loads the connectivity matrices from the chosen directory. The csv file is then searched for a column which matches the filenames in the directory. This assumes that the filenames have been given meaningful names. The order of the loaded filenames is rearranged to match the order in the csv file. This is so that the latent group information can be appended to the csv. Finally the data is loaded from the files and stored in class variables """ print("Finding the directory...") # Load the file names fnames_paths = glob(self.directory + "/*.txt") # Glob does not throw an error if the directory is invalid if fnames_paths == []: print("Invalid directory") return # Remove the path fnames = map(lambda tmp: tmp[-tmp[::-1].find('/'):], fnames_paths) print("Ordering filenames...") indexes = None # Check if the user has defined a CSV file if type(self.csv) != type(None): if len(fnames_paths) != len(self.csv): print("Mismatch between CSV and loaded connectivity matrices") exit print("Extracting meaningful CSV column...") column = extract_column(self.csv.values, fnames[0]) print("Column "+str(column)) print("Matching CSV order...") # Get the csv column with the correct file order. real_order = self.csv.values[:, column] indexes = reorder_fn(real_order, fnames) else: # If no CSV sort the filenames in the same way as file explorers print("No CSV, Human sorting...") indexes = natsort.index_humansorted(fnames) # Apply the reordering fnames = np.array(fnames) fnames = fnames[indexes] fnames_paths = np.array(fnames_paths) fnames_paths = fnames_paths[indexes] print("Loading from the directory...") # Load the data from the files files = np.array([np.loadtxt(f) for f in fnames_paths]) print("Assinging values...") # Assign the values to the class variables self.connectivity_mx = files m, n, n = self.connectivity_mx.shape self.num_subjects = m self.num_nodes = n # Init the pandas data frame self.data_frame = pd.DataFrame({"File Names" : fnames}) # Add the index to a named column self.data_frame['Subject Number'] = self.data_frame.index # Initialise the user defined groups with zeros self.data_frame["Expert Labels"] = pd.Series(np.zeros(m, dtype=np.int64)) print("Connectivity matrix loaded.")
def test_index_humansorted_returns_results_identical_to_index_natsorted_with_LOCALE(): a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana'] assert index_humansorted(a) == index_natsorted(a, alg=ns.LOCALE)
# If natsort can't be found, paste this into the console and run it: # !pip install natsort # This cell is for Deeper Exit Point Analysis # This is set up so that I can clearly see the coordinates and what each page is. DPRexitPoints = exitPoints.sort_values(by=['pageId']) DPRexitPoints['Latitude'] = DPRexitPoints['pageId'].map(pageData.set_index('id')['Latitude']) DPRexitPoints['Longitude'] = DPRexitPoints['pageId'].map(pageData.set_index('id')['Longitude']) # I added the page names so that I can easily tell where in the story it # is placed DPRexitPoints['PageName'] = DPRexitPoints['pageId'].map(pageData.set_index('id')['name']) # Sorts the page names by number then letter DPRexitPoints = DPRexitPoints.iloc[natsort.index_humansorted(DPRexitPoints.PageName)] # Drops unnecessary columns DPRexitPoints = DPRexitPoints[['PageName', 'NumExitedHere']] DPRexitPoints.to_csv('Deeper Exit Point Analysis.csv') #%% # This cell is to explore the average time spent on each page # List of each page and frequency of time on each page # Drops unnecessary columns then sorts them by ID AvTimeOnPage = DPRtimeSpentOnPage.drop(['user', 'date'], axis=1).sort_values('pageId') # Changes the dtype of the timeOnPage columns to timeDelta AvTimeOnPage['timeOnPage'] = AvTimeOnPage.apply(lambda x: pd.to_timedelta(x.timeOnPage), axis=1)
def test_index_humansorted_returns_results_identical_to_index_natsorted_with_LOCALE(): a = ["Apple", "corn", "Corn", "Banana", "apple", "banana"] assert index_humansorted(a) == index_natsorted(a, alg=ns.LOCALE)
def test_index_humansorted_is_identical_to_index_natsorted_with_locale_alg(fruit_list): assert index_humansorted(fruit_list) == index_natsorted(fruit_list, alg=ns.LOCALE)
def test_index_humansorted(): a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana'] assert index_humansorted(a) == [4, 0, 5, 3, 1, 2] assert index_humansorted(a) == index_natsorted(a, alg=ns.LOCALE) assert index_humansorted(a, reverse=True) == index_humansorted(a)[::-1]
def test_index_humansorted_is_identical_to_index_natsorted_with_locale_alg( fruit_list): assert index_humansorted(fruit_list) == index_natsorted(fruit_list, alg=ns.LOCALE)