示例#1
0
文件: subjects.py 项目: acb14js/ccat
    def load_connectivity_mx(self):
        """
        This function loads the connectivity matrices from the chosen 
        directory. The csv file is then searched for a column which matches 
        the filenames in the directory. This assumes that the filenames have
        been given meaningful names. The order of the loaded filenames is 
        rearranged to match the order in the csv file. This is so that the 
        latent group information can be appended to the csv. Finally the data
        is loaded from the files and stored in class variables
        """
        print("Finding the  directory...")
        # Load the file names
        fnames_paths = glob(self.directory + "/*.txt")

        # Glob does not throw an error if the directory is invalid
        if fnames_paths == []:
            print("Invalid directory")
            return

        # Remove the path
        fnames = map(lambda tmp: tmp[-tmp[::-1].find('/'):], fnames_paths)

        print("Ordering filenames...")
        indexes = None 
        # Check if the user has defined a CSV file
        if type(self.csv) != type(None):

            if len(fnames_paths) != len(self.csv):
                print("Mismatch between CSV and loaded connectivity matrices")
                exit

            print("Extracting meaningful CSV column...")
            column = extract_column(self.csv.values, fnames[0])

            print("Column "+str(column))

            print("Matching CSV order...")
            # Get the csv column with the correct file order.
            real_order = self.csv.values[:, column]
            indexes = reorder_fn(real_order, fnames)

        else:
            # If no CSV sort the filenames in the same way as file explorers
            print("No CSV, Human sorting...")
            indexes = natsort.index_humansorted(fnames)

        # Apply the reordering
        fnames = np.array(fnames)
        fnames = fnames[indexes]
        fnames_paths = np.array(fnames_paths)
        fnames_paths = fnames_paths[indexes]

        print("Loading from the directory...")
        # Load the data from the files
        files = np.array([np.loadtxt(f) for f in fnames_paths])

        print("Assinging values...")
        # Assign the values to the class variables
        self.connectivity_mx = files
        m, n, n = self.connectivity_mx.shape
        self.num_subjects = m
        self.num_nodes = n
        # Init the pandas data frame
        self.data_frame = pd.DataFrame({"File Names" : fnames})
        # Add the index to a named column
        self.data_frame['Subject Number'] = self.data_frame.index
        # Initialise the user defined groups with zeros
        self.data_frame["Expert Labels"] = pd.Series(np.zeros(m, dtype=np.int64))

        print("Connectivity matrix loaded.")
示例#2
0
def test_index_humansorted_returns_results_identical_to_index_natsorted_with_LOCALE():
    a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana']
    assert index_humansorted(a) == index_natsorted(a, alg=ns.LOCALE)
示例#3
0
def test_index_humansorted_returns_results_identical_to_index_natsorted_with_LOCALE():
    a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana']
    assert index_humansorted(a) == index_natsorted(a, alg=ns.LOCALE)
# If natsort can't be found, paste this into the console and run it: 
# !pip install natsort


# This cell is for Deeper Exit Point Analysis

# This is set up so that I can clearly see the coordinates and what each page is.
DPRexitPoints = exitPoints.sort_values(by=['pageId'])
DPRexitPoints['Latitude'] = DPRexitPoints['pageId'].map(pageData.set_index('id')['Latitude'])
DPRexitPoints['Longitude'] = DPRexitPoints['pageId'].map(pageData.set_index('id')['Longitude'])
# I added the page names so that I can easily tell where in the story it
# is placed
DPRexitPoints['PageName'] = DPRexitPoints['pageId'].map(pageData.set_index('id')['name'])

# Sorts the page names by number then letter
DPRexitPoints = DPRexitPoints.iloc[natsort.index_humansorted(DPRexitPoints.PageName)]
# Drops unnecessary columns
DPRexitPoints = DPRexitPoints[['PageName', 'NumExitedHere']]
DPRexitPoints.to_csv('Deeper Exit Point Analysis.csv')

#%%

# This cell is to explore the average time spent on each page

# List of each page and frequency of time on each page

# Drops unnecessary columns then sorts them by ID
AvTimeOnPage = DPRtimeSpentOnPage.drop(['user', 'date'], axis=1).sort_values('pageId')

# Changes the dtype of the timeOnPage columns to timeDelta
AvTimeOnPage['timeOnPage'] = AvTimeOnPage.apply(lambda x: pd.to_timedelta(x.timeOnPage), axis=1)
示例#5
0
def test_index_humansorted_returns_results_identical_to_index_natsorted_with_LOCALE():
    a = ["Apple", "corn", "Corn", "Banana", "apple", "banana"]
    assert index_humansorted(a) == index_natsorted(a, alg=ns.LOCALE)
def test_index_humansorted_is_identical_to_index_natsorted_with_locale_alg(fruit_list):
    assert index_humansorted(fruit_list) == index_natsorted(fruit_list, alg=ns.LOCALE)
示例#7
0
def test_index_humansorted():

    a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana']
    assert index_humansorted(a) == [4, 0, 5, 3, 1, 2]
    assert index_humansorted(a) == index_natsorted(a, alg=ns.LOCALE)
    assert index_humansorted(a, reverse=True) == index_humansorted(a)[::-1]
def test_index_humansorted_is_identical_to_index_natsorted_with_locale_alg(
        fruit_list):
    assert index_humansorted(fruit_list) == index_natsorted(fruit_list,
                                                            alg=ns.LOCALE)