def base( use_filter="default", data_path="~/data/faons/latest.csv", filter_name="default.csv", participant_subset="", drop_metadata=True, drop=[], clean=7, components=5, facecolor="#ffffff", ): data_path = path.expanduser(data_path) filter_path = path.join(path.dirname(path.realpath(__file__)), "filters", filter_name) filters = pd.read_csv( filter_path, index_col=0, header=None ).transpose() # transpose filters because of .csv file formatting, specify index_col to not get numbered index all_data = pd.read_csv(data_path) all_data = all_data[map(lambda y: len(set(y)) > clean, np.array(all_data))] # drops metadata if drop_metadata == True: all_data = all_data.drop(filters["metadata"][pd.Series.notnull(filters["metadata"])], axis=1) # compile list of column names to be dropped: drop_list = [] for drop_item in drop: drop_list += list(filters[drop_item][pd.Series.notnull(filters[drop_item])]) drop_list = list( set(drop_list) ) # get unique column names (the list may contain duplicates if overlaying multiple filters) all_data = all_data.drop(drop_list, axis=1) if participant_subset == "odd": keep_rows = all_data.index.values[1::2] filtered_data = all_data.ix[keep_rows] elif participant_subset == "even": keep_rows = all_data.index.values[0::2] filtered_data = all_data.ix[keep_rows] elif participant_subset == "male": filtered_data = all_data[all_data["My legal gender:"] == "Male"] elif participant_subset == "female": filtered_data = all_data[all_data["My legal gender:"] == "Female"] else: filtered_data = all_data # convert to correct type for analysis: filtered_data_array = np.array(filtered_data, dtype="float64") filtered_data_array = filtered_data_array / 100 pca = PCA() S_pca_ = pca.fit_transform(filtered_data_array) fa = FactorAnalysis(svd_method="lapack") S_fa_ = fa.fit_transform(filtered_data_array) ica = FastICA(n_components=components, max_iter=20000, tol=0.00001) S_ica_ = ica.fit_transform(filtered_data_array) # Estimate the sources load = ica.mixing_ remapped_cmap = remappedColorMap( cm.PiYG, start=(np.max(load) - abs(np.min(load))) / (2 * np.max(load)), midpoint=abs(np.min(load)) / (np.max(load) + abs(np.min(load))), name="shrunk", ) fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(17.5, 5), facecolor=facecolor) graphic = ax.imshow(load, cmap=remapped_cmap, interpolation="none")
def fa(source=False, use_filter="default", data_file="latest", participant_subset="", drop_metadata=True, drop=[], clean=7, factors=5, facecolor="#ffffff"): #gets config file: config = get_config_file(localpath=path.dirname(path.realpath(__file__))+'/') #IMPORT VARIABLES if not source: source = config.get('Source', 'source') data_path = config.get('Addresses', source) filter_dir = config.get('Paths', "filter_dir") filter_name = config.get("Filters", use_filter) #END IMPORT VARIABLES filter_path = path.dirname(path.realpath(__file__)) + '/' + filter_dir + filter_name + '.csv' filters = DataFrame.from_csv(filter_path, header=None).transpose() # transpose filters because of .csv file formatting all_data = DataFrame.from_csv(data_path + data_file + ".csv") all_data = all_data.reset_index(level=0) #~ print filters["metadata"] #clean data of respondents who only ckeck extreme answers: all_data = all_data[map(lambda y: len(set(y)) > clean,np.array(all_data))] if drop_metadata == True: # drops metadata all_data = all_data.drop(filters["metadata"][Series.notnull(filters["metadata"])], axis=1) drop_list = [] for drop_item in drop: # compile list of column names to be dropped: drop_list += list(filters[drop_item][Series.notnull(filters[drop_item])]) #get unique column names (the list may contain duplicates if overlaying multiple filters): drop_list = list(set(drop_list)) all_data = all_data.drop(drop_list, axis=1) if participant_subset == "odd": # selects only odd indexes (keep the other dataset half for validation) keep_rows = all_data.index.values[1::2] filtered_data = all_data.ix[keep_rows] elif participant_subset == "even": # selects only even indexes (keep the other dataset half for validation) keep_rows = all_data.index.values[0::2] filtered_data = all_data.ix[keep_rows] elif participant_subset == "male": # selects only male participants filtered_data = all_data[all_data['My legal gender:'] == 'Male'] elif participant_subset == "female": # selects only female participants filtered_data = all_data[all_data['My legal gender:'] == 'Female'] else: filtered_data = all_data #convert to correct type for analysis: filtered_data_array = np.array(filtered_data, dtype='float64') filtered_data_array = filtered_data_array / 100 fit = r.factanal(filtered_data_array, factors, rotation='promax') load = r.loadings(fit) load = numpy2ri.ri2numpy(load) load = r.t(load) remapped_cmap = remappedColorMap(cm.PiYG, start=(np.max(load)-abs(np.min(load)))/(2*np.max(load)), midpoint=abs(np.min(load))/(np.max(load)+abs(np.min(load))), name='shrunk') fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(17.5, 5), facecolor=facecolor) graphic = ax.imshow(load, cmap = remapped_cmap, interpolation='none') ax.xaxis.set_major_locator(matplotlib.ticker.MultipleLocator(base=1.0)) ax.yaxis.set_major_locator(matplotlib.ticker.MultipleLocator(base=1.0)) ax.set_xticklabels([0]+filtered_data.columns.tolist(),fontsize=8,rotation=90) ax.set_yticklabels(np.arange(factors+1)) ax.set_ylabel('Factors') ax.set_title("Question Loadings on Factors") #Recolor plot spines: for spine_side in ["bottom", "top", "left", "right"]: ax.spines[spine_side].set_color("#777777") #Remove ticks: plt.tick_params(axis='both', which='both', left="off", right="off", bottom='off', top='off') divider = make_axes_locatable(ax) #calculate width for cbar so that it is equal to the question column width: cbar_width = str(100/np.shape(load)[1])+ "%" cax = divider.append_axes("right", size=cbar_width, pad=0.05) cbar = colorbar(graphic, cax=cax, drawedges=True) #Limit the number of ticks: tick_locator = ticker.MaxNLocator(nbins=6) cbar.locator = tick_locator cbar.update_ticks() #Align ticklabels so that negative values are not misaligned (meaning right align): for t in cbar.ax.get_yticklabels(): t.set_horizontalalignment('right') t.set_x(0.045*(np.shape(load)[1]+6)) #Tweak color bar borders cbar.outline.set_color("#666666") cbar.dividers.set_linewidth(0)