def sub_all(): print('training all...') RF_all1 = RF.RF_ALL('./data/Normalized_Data/testing_DataSet.csv', './data/Normalized_Data/sub_DataSet.csv') RF_all2 = RF.RF_ALL('./data/Normalized_Data/EVAL_DataSet.csv', './data/Normalized_Data/sub_DataSet.csv') gdbt_all1 = GDBT_all.GDBT_ALL( './data/Normalized_Data/training_DataSet.csv', './data/Normalized_Data/sub_DataSet.csv') gdbt_all2 = GDBT_all.GDBT_ALL('./data/Normalized_Data/testing_DataSet.csv', './data/Normalized_Data/sub_DataSet.csv') gdbt_all3 = GDBT_all.GDBT_ALL('./data/Normalized_Data/EVAL_DataSet.csv', './data/Normalized_Data/sub_DataSet.csv') svr_all = s.SVR_ALL('./data/Normalized_Data/EVAL_DataSet.csv', './data/Normalized_Data/sub_DataSet.csv') fw = open(tmpFilename, 'w') for i in range(len(svr_all)): fw.write('%s,%s,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f\n' % (gdbt_all1[i][0],gdbt_all1[i][1], float(RF_all1[i][2]), float(RF_all2[i][2]), float(gdbt_all1[i][2]), float(gdbt_all2[i][2]), float(gdbt_all3[i][2]), float(svr_all[i][2]), float(svr_all[i][3]), float(svr_all[i][4]), ((float(RF_all2[i][2])+float(gdbt_all1[i][2]) + float(gdbt_all2[i][2])+\ float(svr_all[i][2]) + float(svr_all[i][3])+float(svr_all[i][4]))) / 6, cost_dict['all'][gdbt_all1[i][0]][0], cost_dict['all'][gdbt_all1[i][0]][1] )) fw.close()
def sub_st(): print('training ST...') RF_st1 = RF.RF_ST('./data/Normalized_Data/testing_DataSetST.csv', './data/Normalized_Data/sub_DataSetST.csv') RF_st2 = RF.RF_ST('./data/Normalized_Data/EVAL_DataSetST.csv', './data/Normalized_Data/sub_DataSetST.csv') gdbt_st1 = GDBT_ST.GDBT_ST('./data/Normalized_Data/testing_DataSetST.csv', './data/Normalized_Data/sub_DataSetST.csv') gdbt_st2 = GDBT_ST.GDBT_ST('./data/Normalized_Data/EVAL_DataSetST.csv', './data/Normalized_Data/sub_DataSetST.csv') gdbt_st3 = GDBT_ST.GDBT_ST('./data/Normalized_Data/EVAL_DataSetST.csv', './data/Normalized_Data/sub_DataSetST.csv') svr_st = s.SVR_ST('./data/Normalized_Data/EVAL_DataSetST.csv', './data/Normalized_Data/sub_DataSetST.csv') fw = open(tmpFilename, 'a') #追加写入文件 for i in range(len(svr_st)): fw.write('%s,%s,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f\n' % (gdbt_st1[i][0],gdbt_st1[i][1], float(RF_st1[i][2]), float(RF_st2[i][2]), float(gdbt_st1[i][2]), float(gdbt_st2[i][2]), float(gdbt_st3[i][2]), float(svr_st[i][2]), float(svr_st[i][3]), float(svr_st[i][4]), (float(RF_st2[i][2])+float(gdbt_st1[i][2]) + float(gdbt_st2[i][2]) + \ float(svr_st[i][2])+float(svr_st[i][3])+float(svr_st[i][4])) / 6, cost_dict[gdbt_st1[i][1]][gdbt_st1[i][0]][0], cost_dict[gdbt_st1[i][1]][gdbt_st1[i][0]][1], )) fw.close()
def calc_rast_by_stim(rast, stimparams, bins = None): usp1 = np.unique(stimparams[:, 0]) usp2 = np.unique(stimparams[:, 1]) nstimparams = stimparams.shape[1] ustimparams = [np.unique(stimparams[:, 0]), np.unique(stimparams[:, 1])] nparamlevels = np.array([usp1.size, usp2.size]) nbins = rast.shape[1] ntrials_per_stim = np.zeros((nparamlevels[0], nparamlevels[1])) for i in range(nparamlevels[0]): for j in range(nparamlevels[1]): ntrials_per_stim[i, j] = RF.get_trials(stimparams, np.array([usp1[i], usp2[j]])).size if np.unique(ntrials_per_stim).size > 1: print 'Different numbers of trials per stimulus!' elif np.unique(ntrials_per_stim).size == 1: ntrials = np.int32(ntrials_per_stim[0]) rast2 = np.zeros((ntrials, nparamlevels[0], nparamlevels[1], nbins)) for i in range(nparamlevels[0]): for j_ in range(nparamlevels[1]): ix = RF.get_trials(stimparams, (usp1[i], usp2[j_])) rast2[:, i, j_, :] = rast[ix, :] return rast2, ustimparams
def __init__(self, symb, predlen, cat='RL', kwargs=None): self.symb = symb self.predlen = predlen self.kwargs = kwargs self.cat = cat if cat == 'RF': if kwargs != None: self.learner = RF.RandomForest(**kwargs) else: self.learner = RF.RandomForest() elif cat == 'KNN': if kwargs != None: self.learner = KNN.KNN(**kwargs) else: self.learner = KNN.KNN() elif cat == 'SVM': if kwargs != None: self.learner = SVM.SVM(**kwargs) else: self.learner = SVM.SVM() elif cat == 'NN': if kwargs != None: self.learner = NN.NN(**kwargs) else: self.learner = NN.NN()
def plot_latency_by_stim(rast, stimparams): rf = calc_rf(rast, stimparams) stim_peak_times = calc_latency_by_stim(rast, stimparams) x = stim_peak_times.copy() x[x>80] = 80 x[x<50] = 50 fig = plt.figure() ax1 = fig.add_subplot(121) ax2 = fig.add_subplot(122) RF.plot_rf(rf, ax = ax1) RF.plot_rf(x.T, ax = ax2, cmap = 'jet')
def plot_fake_strf(rast, stimparams): fig = plt.figure(figsize = (15, 10)) ax1 = fig.add_axes([0.05, 0.2, 0.9, 0.75]) ax2 = fig.add_axes([0.05, 0.05, 0.9, 0.15]) fake_strf = calc_fake_strf(rast, stimparams) RF.plot_rf(fake_strf, ax = ax1, axes_on = False) psth = fake_strf.mean(0) psth_smoo = Spikes.hamming_smoo(psth, windlen = 5) ax2.plot(psth_smoo) ax2.set_xlim([0, rast.shape[1]]) plt.draw(); plt.show(); return fig
def add_bf_man(experiment): ''' Runs through all of the units and allows you to click on the CF. Clicking a negative x-coordinate results in the unit being discarded (renamed to _RR###.h5) For valid units, the CF and threshold are saved in a text file called cfs.txt ''' rf_blocks = glob.glob(os.path.join(studydir, 'Sessions', experiment, 'fileconversion', '*RF*.h5')) cfs = [] badunits = [] for rf_block in rf_blocks: fig = plt.figure(); ax = fig.add_subplot(111); print rf_block f = h5py.File(rf_block, 'r') blockname = os.path.splitext(os.path.split(rf_block)[1])[0] pennum = np.int32(blockname.split('RF')[1]) rf = f['rf'].value f.close() RF.plot_rf(rf, ax = ax) xlim = ax.get_xlim() ylim = ax.get_ylim() ax.set_xlim([xlim[0]-2, xlim[1]]) ax.set_ylim([ylim[0], ylim[1]-1]) ax.set_title(pennum) plt.show() xy = plt.ginput(1, timeout = -1)[0] xy = np.array(xy) if np.prod(xy)>0: xy = np.array(xy) cfs.append(np.hstack((pennum, xy))) else: badunits.append(pennum) plt.close(fig) savepath = os.path.join(studydir, 'Sessions', experiment, 'cfs.txt') np.savetxt(savepath, cfs) # print badunits plt.close(fig)
def predict(): if request.method == 'POST': f = request.files['model'] f.save('rf_model.pkl') pred_input = np.matrix(request.form['pred_input']) pred_result = RF.predict(pred_input) return pred_result
def optimize(all_data, target_data, feature_mtl, value_mtl): target = np.array(target_data) #np.ndarray() target = target.tolist() #list #target_data=list(target_data) left, right = rf.data_spilt(all_data, int(feature_mtl), value_mtl) left_t, right_t = rf.data_spilt(target, int(feature_mtl), value_mtl, True) all_label_left = list(float(row[-1]) for row in left) target_label_left = list(float(row[-1]) for row in left_t) all_label_right = list(float(row[-1]) for row in right) target_label_right = list(float(row[-1]) for row in right_t) theta = np.std(target_data.iloc[:, feature_mtl]) divergence_gain = -999 value_str = value_mtl i = value_mtl - theta while i <= (value_mtl + theta): left_n, right_n = rf.data_spilt(all_data, feature_mtl, i) left_n_t, right_n_t = rf.data_spilt(target, feature_mtl, i) all_n_left = list(float(row[-1]) for row in left_n) target_n_left = list(float(row[-1]) for row in left_n_t) all_n_right = list(float(row[-1]) for row in right_n) target_n_right = list(float(row[-1]) for row in right_n_t) loss_new = loss(all_label_left, all_label_right, target_label_left, target_label_right) loss_old = loss(all_n_left, all_n_right, target_n_left, target_n_right) if loss_new >= loss_old: weight_left = len(target_label_left) / (len(target_label_left) + len(target_label_right)) weight_right = len(target_label_right) / (len(target_label_left) + len(target_label_right)) divergence_tmp = 1 - weight_left * js_gain( all_label_left, all_n_left) - weight_right * js_gain( all_label_right, all_n_right) if divergence_tmp >= divergence_gain: divergence_gain = divergence_tmp value_str = i i += 0.05 * theta return value_str
def train_ST(): RF_st1 = RF.RF_ST_train('./data/testingDataSetST1.csv', './data/VALIDATION_DataSetST1.csv') RF_st2 = RF.RF_ST_train('./data/EVAL_DataSetST1.csv', './data/VALIDATION_DataSetST1.csv') gdbt_st1 = GDBT_ST.GDBT_ST_train('./data/EVAL_DataSetST1.csv', './data/VALIDATION_DataSetST1.csv') gdbt_st2 = GDBT_ST.GDBT_ST_train('./data/testingDataSetST1.csv', './data/VALIDATION_DataSetST1.csv') gdbt_st3 = GDBT_ST.GDBT_ST_train('./data/VALIDATION_DataSetST1.csv', './data/VALIDATION_DataSetST1.csv') svr_st = s.SVR_ST_train() # # 3个GBDT,1个sSVR再做一次SVR # store = ['1', '2', '3', '4', '5'];pred_y = [] # for st in store: # X = [];Y = [] # for i in range(len(gdbt_st1)): # if gdbt_st1[i][1] != st: continue # X.append((RF_st1[i][2], gdbt_st1[i][2], gdbt_st2[i][2], svr_st[i][2])) # Y.append(gdbt_st1[i][3]) # svr = SVR(kernel='linear', epsilon=0.5, C=1).fit(X, Y) # svr_res = svr.predict(X) # for x in svr_res: # pred_y.append(x) fw = open(filename, 'a') for i in range(len(gdbt_st1)): fw.write('%s,%s,%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%.2f\n' % (gdbt_st1[i][0], gdbt_st1[i][1], float(RF_st1[i][2]), float(RF_st2[i][2]), float(gdbt_st1[i][2]), float(gdbt_st2[i][2]), float(gdbt_st3[i][2]), float(svr_st[i][2]), float(svr_st[i][3]), float(svr_st[i][4]), (float(RF_st2[i][2])+float(gdbt_st1[i][2]) + float(gdbt_st2[i][2]) + \ float(svr_st[i][2])+float(svr_st[i][3])+float(svr_st[i][4])) / 6, cost_dict[gdbt_st1[i][1]][gdbt_st1[i][0]][0], cost_dict[gdbt_st1[i][1]][gdbt_st1[i][0]][1], float(gdbt_st1[i][3]) ) ) fw.close()
def time_tree(newick): tree = Tree(newick) t0 = time.time() sum3_dt = RF.polynomial_sum3_performance(tree, tree.get_leaves().__len__() + 1)[1] tf = time.time() return tf - t0, sum3_dt
def make_spk_mask(spktrials, stimparams, param): ix = RF.get_trials(stimparams, param) spk_mask = np.zeros(spktrials.size, dtype = np.bool) for ix_ in ix: spk_mask[spktrials == ix_] = True return spk_mask
def RandomForestTest(pca_option): import RF RF.RandomForestSimulation( RF.rf, processing.linear_pca, processing.overall_training_data, pca_option) processing.final_validation = np.array(processing.final_validation) FV_features = [] FV_labels = [] FV_features, FV_labels = processing.createFeatures_Labels( processing.final_validation) FV_features_data = None FV_labels_data = None FV_features_data, FV_labels_data = processing.convertToDataFrame( FV_features, FV_labels, processing.column_titles) global RF_final_predictions if(pca_option == 'yes' or pca_option == 'both'): transformed_FV = processing.linear_pca.transform(FV_features_data) final_predictions = RF.rf.predict(transformed_FV) RF_final_predictions = final_predictions accuracy = metrics.accuracy_score(final_predictions, FV_labels) precision = metrics.precision_score( FV_labels, final_predictions, average='micro') recall = metrics.recall_score( FV_labels, final_predictions, average='micro') print('RANDOM FOREST MODEL FINAL TEST DATA ACCURACY: ', 100 * accuracy) print('RANDOM FOREST MODEL FINAL TEST DATA PRECISION: ', 100 * precision) print('RANDOM FOREST MODEL FINAL TEST DATA RECALL: ', 100 * recall) print() return accuracy, precision, recall else: final_predictions = RF.rf.predict(FV_features_data) RF_final_predictions = final_predictions accuracy = metrics.accuracy_score(final_predictions, FV_labels) precision = metrics.precision_score( FV_labels, final_predictions, average='micro') recall = metrics.recall_score( FV_labels, final_predictions, average='micro') print('RANDOM FOREST MODEL FINAL TEST DATA ACCURACY: ', 100 * accuracy) print('RANDOM FOREST MODEL FINAL TEST DATA PRECISION: ', 100 * precision) print('RANDOM FOREST MODEL FINAL TEST DATA RECALL: ', 100 * recall) print() return accuracy, precision, recall
def main(args): SVM = args.SVM RF = args.RF if SVM: import SVM SVM.SVM(args) if RF: import RF RF.RF(args)
def weight_loss(w_m, w_r, target, feature_mtl, value_mtl, value_str): target = np.array(target) #np.ndarray() target = target.tolist() #list left_m, right_m = rf.data_spilt(target, feature_mtl, value_mtl) loss_m = rf.spilt_loss(left_m, right_m) left_r, right_r = rf.data_spilt(target, feature_mtl, value_str) loss_r = rf.spilt_loss(left_r, right_r) beta = 0.8 w_m_t = w_m * (beta ** loss_m) w_r_t = w_m * (beta ** loss_r) weight_m = w_m_t / (w_m_t + w_r_t) weight_r = w_r_t / (w_m_t + w_r_t) return weight_m, weight_r
def rf_predict_live_data(company: str, verbose=False, scaled=False): features, garbage, labels, garbage, garbage, garbage = get_features(company, train_size=1.00, scaled=scaled) live_features, live_labels, live_prices, live_times = get_live_features(company) true_labels, RF_predictions = RF.predict(features, labels, live_features, live_labels) accuracy = accuracy_score(true_labels, RF_predictions) if verbose: print("Random Forest Accuracy: " + str(accuracy * 100) + "%") prediction_distribution(RF_predictions, true_labels) return live_prices, live_times, RF_predictions, accuracy
def make_trial_mask(stimparams, param): ntrials = stimparams.shape[0] ix = RF.get_trials(stimparams, param) trial_mask = np.zeros(ntrials, dtype = np.bool) for ix_ in ix: trial_mask[ix_] = True return trial_mask
def train_all(): RF_all1 = RF.RF_ALL_train('./data/testingDataSet1.csv', './data/VALIDATION_DataSet1.csv') RF_all2 = RF.RF_ALL_train('./data/EVAL_DataSet1.csv', './data/VALIDATION_DataSet1.csv') gdbt_all1 = GDBT_all.GDBT_ALL_train('./data/VALIDATION_DataSet1.csv', './data/VALIDATION_DataSet1.csv') gdbt_all2 = GDBT_all.GDBT_ALL_train('./data/testingDataSet1.csv', './data/VALIDATION_DataSet1.csv') gdbt_all3 = GDBT_all.GDBT_ALL_train('./data/EVAL_DataSet1.csv', './data/VALIDATION_DataSet1.csv') svr_all = s.SVR_ALL_train() # 3个GBDTt,1个sSVR再做一次SVR # X = [];Y = [] # for i in range(len(gdbt_all1)): # X.append((RF_all1[i][2], gdbt_all1[i][2], gdbt_all2[i][2], svr_all[i][2],svr_all[i][3])) # Y.append(gdbt_all1[i][3]) # # svr = SVR(kernel='linear', epsilon=2, C=1).fit(X, Y) # print(svr.coef_) # pred_y = svr.fit(X, Y).predict(X) fw = open(filename, 'w') for i in range(len(gdbt_all1)): fw.write('%s,%s,%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%.2f\n' % (gdbt_all1[i][0], gdbt_all1[i][1], float(RF_all1[i][2]), float(RF_all2[i][2]), float(gdbt_all1[i][2]), float(gdbt_all2[i][2]), float(gdbt_all3[i][2]), float(svr_all[i][2]), float(svr_all[i][3]), #前14天的值 float(svr_all[i][4]), ((float(RF_all1[i][2])+float(gdbt_all1[i][2]) + float(gdbt_all2[i][2])+ \ float(svr_all[i][2]) + float(svr_all[i][3])+float(svr_all[i][4]))) / 6, cost_dict['all'][gdbt_all1[i][0]][0], cost_dict['all'][gdbt_all1[i][0]][1], float(gdbt_all1[i][3]) ) ) fw.close()
def make_spk_and_trial_masks(spktrials, stimparams, param): ntrials = stimparams.shape[0] ix = RF.get_trials(stimparams, param) trial_mask = np.zeros(ntrials, dtype = np.bool) spk_mask = np.zeros(spktrials.size, dtype = np.bool) for ix_ in ix: spk_mask[spktrials == ix_] = True trial_mask[ix_] = True return spk_mask, trial_mask
def transferForest(train, targetID, n_features, max_depth, min_size, n_trees, feature_name): transfer_trees =[] train = pd.DataFrame(train, columns = feature_name) group_list = list(train.iloc[:,0]) # The default first column is the primary base station identifier group_list = set(group_list) source_list = [] for group in group_list: if group != targetID: source_list.append(train[train['BS_ID'] == group]) target = train[train['BS_ID'] == float(targetID)] feature_list = [] value_list = [] w_m = 0.5 w_r = 0.5 for i in range(n_trees): normal_tree = rf.get_best_spilt_candidate(train.values.tolist(), n_features) for key in normal_tree: feature_list.append(key) value_list.append(normal_tree[key]) feature_mtl, value_mtl = mtl.multi_loss(source_list, target, feature_list, value_list) value_str = strut.optimize(train.values.tolist(), target, feature_mtl, value_mtl) split_value = w_m * value_mtl + w_r * value_str w_m, w_r = weight_loss(w_m, w_r, target, feature_mtl, value_mtl, value_str) tree = rf.build_transfer_tree(train, n_features, feature_mtl, split_value, max_depth, min_size) transfer_trees.append(tree) return transfer_trees
def remove_trials(spktimes, spktrials, spkwaveform, lfp, stimID, remID): spk_mask, trial_mask = RF.make_spk_and_trial_masks(spktrials, stimID, remID) if spk_mask.sum()>0: spktimes = spktimes[~spk_mask] spktrials = spktrials[~spk_mask] spkwaveform = spkwaveform[~spk_mask, :] lfp = lfp[~trial_mask, :] stimID = stimID[~trial_mask, :] return spktimes, spktrials, spkwaveform, lfp, stimID
def rf_predict(company: str, verbose=False, train_size=0.80, scaled=False): start = time.time() X_train, X_test, y_train, y_test, prices, times = get_features(company, train_size=train_size, scaled=scaled) end = time.time() print('Load time: ' + str(end - start)) true_labels, RF_predictions = RF.predict(X_train, y_train, X_test, y_test) accuracy = accuracy_score(true_labels, RF_predictions) if verbose: print("Random Forest Accuracy: " + str(accuracy * 100) + "%") prediction_distribution(RF_predictions, true_labels) return prices, times, RF_predictions, accuracy
def calc_rf_psth(rast, stimparams): ufreqs = np.unique(stimparams[:, 0]) uattens = np.unique(stimparams[:, 1]) nfreqs, nattens = ufreqs.size, uattens.size rf_psth = np.empty((nattens, nfreqs, rast.shape[1])) for i in range(nfreqs): for j in range(nattens): ix = RF.get_trials(stimparams, np.array([ufreqs[i], uattens[j]])) rf_psth[j, i, :] = rast[ix, :].mean(0) return rf_psth
def maps_by_group(gens = ['wt', 'ko'], exps = ['nai', 'exp', 'w1', 'w2', 'w3']): for i, (gen, exp) in enumerate(itertools.product(gens, exps)): sesss = glob.glob(os.path.join(basedir, 'Sessions', gen+'_'+exp+'*')) sesss = [os.path.basename(s) for s in sesss] nsess = len(sesss) if nsess > 0: fig, ax = RF.look_at_map(sesss) fig.savefig(os.path.join(basedir, 'maps', '%s_%s' % (gen, exp))) plt.close('all') return
def train(): global target global to_drop global feature_importance if request.method == 'POST': f = request.files['data'] f.save('data.csv') target = request.form['target'].strip() ID = request.form['ID'].strip() to_drop = [target, ID] feature_importance, score = RF.first_phase(target, ID, to_drop) model = send_file('rf_model.pkl') return model
def circ_psth_all(rast, stimparams, freq, npips, onset = 0.05, bins = 20, color = 'b', remove_first = False, axs = None): ''' Input: Output: r : the mean vector length for each repetition rate V : the summed vector length for each repetition rate theta : the mean vector angle for each repetition rate ''' urrs = np.unique(stimparams[:, 1]) nrrs = urrs.size ix = RF.get_trials(stimparams, (freq, np.nan)) rast_ = rast[ix, :] stimparams_ = stimparams[ix, :] r = []; V = []; theta = [] for i in xrange(nrrs): ix = RF.get_trials(stimparams_, (np.nan, urrs[i])) r_, V_, theta_ = circ_psth(rast_[ix, :], urrs[i], npips[i], onset = onset, bins = bins, color = color, remove_first = remove_first, ax = axs[i]) r.append(r_); V.append(V_); theta.append(theta_) misc.sameyaxis(axs) return np.array(r), np.array(V), np.array(theta)
def calc_vs_all(rast, stimparams, ufreqs, urrs, npips = 6, onset = 0.05): ufreqs = np.asarray(ufreqs) urrs = np.asarray(urrs) nfreqs = ufreqs.size nrrs = urrs.size vs = np.empty((nfreqs, nrrs))*np.nan vs_p = np.empty((nfreqs, nrrs))*np.nan for f in range(nfreqs): for r in range(nrrs): ix = RF.get_trials(stimparams, [ufreqs[f], urrs[r]]) rast_ = rast[ix, :] vs[f, r], vs_p[f, r] = calc_vs(rast_, urrs[r], npips, onset) return vs, vs_p
def calc_rrtf_lfp_all(lfp, lfp_t, stimparams, freq, rrs, onset = 0.05): nrrs = rrs.size rrtf_lfp = np.empty(nrrs) * np.nan trial_ix = stimparams[:, 0] == freq pip_start = 0.005 + onset pip_end = pip_start + 0.02 time_ix = np.vstack((pip_start<lfp_t, lfp_t<pip_end)).all(0) lfp_mag_1st = (lfp[trial_ix, :][:, time_ix]).mean(0).min() print lfp_mag_1st for i, rr in enumerate(rrs): lfp_ = lfp[RF.get_trials(stimparams, np.array([freq, rr])), :] rrtf_lfp[i] = calc_rrtf_lfp(lfp_, lfp_t, rr, lfp_mag_1st) return rrtf_lfp
def calc_lfp_by_stim(rast, stimparams): nstimparams = stimparams.shape[1] usp = [] for i in range(nstimparams): usp.append(list(np.unique(stimparams[:, i]))) nparamlevels = np.empty(nstimparams, dtype = np.int32) for i in range(nstimparams): nparamlevels[i] = len(usp[i]) ntrials_per_stim = np.zeros(nparamlevels) ''' compute nbins : the number of bins ''' dur_ms = rast.shape[1] # number of milliseconds t_ms = np.arange(dur_ms) # time indices in ms nbins = rast.shape[-1] assert np.unique(ntrials_per_stim).size == 1 ntrials = np.int32(ntrials_per_stim[0]) psth_shape = np.hstack((nparamlevels, nbins)) psth = np.zeros(psth_shape) combinations = [] combinations_ix = [] for i in itertools.product(*usp): combinations.append(i) combinations_ix_ = [] for j, i_ in enumerate(i): q = (np.array(usp[j])==i_).nonzero()[0][0] combinations_ix_.append(q) combinations_ix.append(combinations_ix_) for m, n in zip(combinations, combinations_ix): ix = RF.get_trials(stimparams, m) ntrials = ix.size lfp_ = rast[ix, :] psth[tuple(n)] = lfp_.sum(0) return psth, usp
def calc_rrtf_all(rast, stimparams, freq, urrs, npips = 6, onset = 0.05, norm = True): ''' Takes the full block raster and stimparams, finds the response to the first tone by filtering the rast to include only responses to the given frequency (but for all repetition rates), then it filters the rast to only the given freq/rr pair and passes that to calc_rrtf, along with the first tone response for all repetition rates Input: rast : full block raster stimparams : full block stimulus parameters freq : in Hz, the frequency played to this unit urrs : sorted list (lo-hi) of repetition rates played npips : list of the number of pips at each rate, or scalar if each rate had the same number of pips onset : in seconds, the onset time for the first pip norm : for the response to each pip, subtract the pre-pip response Output: rrtf : n-length vector, where n is the number of repetition rates ''' if type(npips) is int: npips = [npips]*len(urrs) nrrs = urrs.size rrtf = np.empty(nrrs) * np.nan # get raster subset for this frequency ix = stimparams[:, 0] == freq # response onset and offset (5 - 25 ms after stimulus onset) resp_start = np.int32((0.005 + onset) * 1000) resp_end = resp_start + 20 nspks_1st = (rast[ix, resp_start:resp_end]).mean() # spikes per millisecond # normalize by pre-pip baseline (-20 - 0 ms before stimulus onset) if norm: pip_end_pre = resp_start - 5 pip_start_pre = pip_end_pre - 20 nspks_1st = nspks_1st - (rast[ix, pip_start_pre: pip_end_pre]).mean() # loop through repetition rates, get raster subset, and calculate RRTF for i, (rr, npip) in enumerate(zip(urrs, npips)): rast_ = rast[RF.get_trials(stimparams, np.array([freq, rr])), :] rrtf[i] = calc_rrtf(rast_, rr, nspks_1st, npips = npip, norm = norm) return rrtf
def aligned_psth_separate_all(rast, stimparams, freq, npips, onset = 0.05, axs = None): ''' Input: rast : full block raster stimparams : full block stimulus parameters freq : in Hz, the frequency played to this neuron npips : number of pips for each repetition rate onset : in ms, onset time of first pip axs : a list of axis to which the output will be displayed ''' urrs = np.unique(stimparams[:, 1]) nrrs = urrs.size if axs is None: axs = [None]*nrrs aligned_psths = [] for rr, npip, ax in zip(urrs, npips, axs): ix = RF.get_trials(stimparams, (freq, rr)) psth = 1000*rast[ix, :].mean(0) aligned_psths.append(aligned_psth_separate(psth, rr, npip, onset = onset, ax = ax)) return aligned_psths
def export_unit(Data0): # number of time bins to include in the LFP array nlfpsamp = 0 for tt, trial in enumerate(Data0['trial'][0][0][0]): thislfpsamp = trial['LFP'].shape[1] if thislfpsamp>nlfpsamp: nlfpsamp = thislfpsamp ntrials = Data0['trial'][0][0][0].size # find number of trials nstimID = Data0['trial'][0][0][0][0]['Epoch_Value'][0].size # initialze LFP lfp = np.ndarray((2, ntrials, nlfpsamp), dtype = 'float32') # initialize frequency and attenuation IDs stimID = np.ndarray((ntrials, nstimID), dtype = 'float32') for tt in range(ntrials): trial = Data0['trial'][0][0][0][tt] thisstimID = np.float32(trial['Epoch_Value'][0]) # get the LFP for this trial and pad it with nans so it can fit in a matrix (since some of the trials have +/-1 data point for LFP) for cc in range(2): lfpchannel = trial['LFP'][cc] lfp[cc, tt, :len(lfpchannel)] = lfpchannel # add to Epoch_Value stimID[tt, :] = thisstimID remID = np.array([0., 0.]) trial_mask = RF.make_trial_mask(stimID, remID) lfp = lfp[:, ~trial_mask, :] stimID = stimID[~trial_mask, :] return lfp, stimID
data = json.load(fp) fp = open('./data/equations.json') equations = json.load(fp) for iteration in range(10): print('Iteration ', iteration) twentyFive = int(0.25 * len(data)) shuffle(data) test = data[:twentyFive] train = data[twentyFive:] for i in range(10): print('-- SubIteration ', i) predict = RF.train(train, (i + 1) * 100) predict_SVM = SVM.train(train, 10**(i - 4)) right = 0 right_SVM = 0 for datapoint in test: predicted = predict(datapoint['question'], False) predicted_SVM = predict_SVM(datapoint['question'], False) if checkSolution(predicted, datapoint['answers']): right += 1 if checkSolution(predicted_SVM, datapoint['answers']): right_SVM += 1 print('RF: ', right / twentyFive, '\t SVM: ', right_SVM / twentyFive)
# coding: utf-8 import csv from math import sqrt import RF as rf import numpy as np import pandas as pd import transferForest as trans if __name__ == '__main__': dataSet, feature = rf.loadCSV( '../../xxx.csv' ) #your file path, the file consists of source domain data and target domian data rf.column_to_float(dataSet) targetID = 'xxx' #specifiy the target domain ID # parametres in random forests n_folds = 10 max_depth = 15 min_size = 1 ratio = 1.0 n_features = 35 n_trees = 100 #cross validation folds = rf.spiltDataSet(dataSet, n_folds) for fold in folds: train_set = folds[:]
def unit(u_, Data0, cc, blockID): # number of time bins to include in the LFP array nlfpsamp = 0 for tt, trial in enumerate(Data0['trial'][0][0][0]): thislfpsamp = trial['LFP'].shape[1] if thislfpsamp>nlfpsamp: nlfpsamp = thislfpsamp ntrials = Data0['trial'][0][0][0].size # find number of trials nstimID = Data0['trial'][0][0][0][0]['Epoch_Value'][0].size # initialze LFP, spike times, spike trials, spike waveform lfp = np.ndarray((0, nlfpsamp), dtype = 'float32') spktimes = np.ndarray(0) spktrials = np.ndarray(0) spkwaveform = np.ndarray((0, 22)) # initialize frequency and attenuation IDs stimID = np.ndarray((0, nstimID), dtype = 'float32') ttt = 0 # valid trial counter for tt in range(ntrials): trial = Data0['trial'][0][0][0][tt] thisstimID = np.float32(trial['Epoch_Value'][0]) # if not ((blockID.startswith('b')) and (thisstimID[0] < 2)): # get the LFP for this trial and pad it with nans so it can fit in a matrix (since # some of the trials have +/-1 data point for LFP) lfpchannel = trial['LFP'][cc] lfpchannel = np.concatenate((lfpchannel, np.zeros(nlfpsamp - len(lfpchannel)) * np.nan)) lfp = np.vstack((lfp, lfpchannel)) spktime = trial['CH'][0][cc]['latency'] if np.prod(spktime.shape) > 0: spktimes = np.append(spktimes, spktime) spktrials = np.append(spktrials, np.ones(spktime.size) * ttt) spkwaveform = np.concatenate((spkwaveform, trial['CH'][0][cc]['spkwaveform'].T), 0) # add to Epoch_Value stimID = np.vstack((stimID, thisstimID)) ttt += 1 # increment valid trial counter # end if valid ID # end trial loop if spktimes.size == 0: # if no spikes print 'No spikes detected for this unit.' spktimes = np.array([np.nan]) spktrials = np.array([np.nan]) spkwaveform = np.array([np.nan]) rast = np.array([np.nan]) else: # filter out unwanted trials remID = np.array([np.nan, np.nan]) if blockID.startswith('b'): remID = np.array([1., 70.]) elif blockID.startswith('r'): remID = np.array([0., 0.]) spktimes, spktrials, spkwaveform, lfp, stimID = \ remove_trials(spktimes, spktrials, spkwaveform, lfp, stimID, remID) # create raster ntrials = stimID.shape[0] nbins = np.ceil(1000 * spktimes.max())+1 rast = Spikes.calc_rast(spktimes, spktrials, ntrials, nbins) # save out to file u_.create_dataset('chan', data = cc) u_.create_dataset('blockID', data = blockID) # add stimulus ID datasets to this stimset on this unit u_.create_dataset('stimID', data = stimID) u_.create_dataset('lfp', data = lfp, compression = 'gzip') u_.create_dataset('spktimes', data = spktimes, compression = 'gzip') u_.create_dataset('spktrials', data = spktrials, compression = 'gzip') u_.create_dataset('spkwaveform', data = spkwaveform, compression = 'gzip') u_.create_dataset('rast', data = rast, compression = 'gzip') if blockID.startswith('b'): rf = RF.calc_rf(rast, stimID) u_.create_dataset('rf', data = rf, compression = 'gzip')
import KNN time_start = time.time() # 选取随机数种子 np.random.seed(seed=10) # 读取数据 inputs = pd.read_csv('../../data/featuredata/total.csv', header=None) outputs = pd.read_csv('../../data/processdata/tag.csv', header=None) # 将数据顺序打乱 train = np.array(pd.concat([inputs, outputs], axis=1)) np.random.shuffle(train) # 分成输入和结果 inputs = pd.DataFrame(train).iloc[:, :-1] outputs = pd.DataFrame(train).iloc[:, -1] # 对四个模型分别进行训练,并进行十折交叉验证 result1 = SVM.model(inputs, outputs) result2 = NN.model(inputs, outputs) result3 = RF.model(inputs, outputs) result4 = KNN.model(inputs, outputs) time_end = time.time() # 输出各个算法交叉验证准确率的平均值 print('SVM: mean accuracy = ', result1) print('NN: mean accuracy = ', result2) print('RF: mean accuracy = ', result3) print('KNN: mean accuracy = ', result4) print('Running time: {:.2f} Seconds'.format(time_end-time_start))
import h5py, os, glob, re import RF, RR, Spikes, misc from fileconversion import load_cfs basedir = '/Volumes/BOB_SAGET/Fmr1_voc/voc_ko_nai_20130116' experiment = 'voc_ko_nai_20130116' rf_paths = glob.glob(os.path.join(basedir, 'fileconversion', 'RF*.h5')) rr_paths = glob.glob(os.path.join(basedir, 'fileconversion', 'RR*.h5')) voc_paths = glob.glob(os.path.join(basedir, 'fileconversion', 'VOC*.h5')) ix2freq = RF.get_ix2freq() p = re.compile('RF(\d+).h5') for rf_path in rf_paths: penno = p.findall(rf_path)[0] rr_path = [f for f in rr_paths if penno in f][0] voc_path = [f for f in voc_paths if penno in f][0] rf_file = h5py.File(rf_path, 'r') rf_rast = rf_file['rast'].value rf_stimparams = rf_file['stimID'].value rf_file.close() cfs = load_cfs(experiment) cf_ix = np.int32(np.round(RF.find_cf(cfs, np.int32(penno)))) cf = ix2freq[20:][cf_ix] # perform analysis if len(rr_path) > 0:
def characterize(sesss = sesss, experiment = 'Fmr1_RR', pplot = True, verbose = False): if type(sesss) == str: sesss = [sesss] # set up figure figsize = (12, 12) fig = plt.figure(figsize = figsize) # loop through sesss for sess in sesss: DB = np.empty(0, dtype = dtype) print '%s\n%s\n\n' % (sess, '-'*50) # build the output directory path savedir = os.path.join(basedir, experiment, 'Sessions', sess, 'analysis') if not os.path.exists(savedir): os.mkdir(savedir) # WT or KO / CTL or EXP gen, exp, date = sess.split('_') # find the RF blocks pens = glob.glob(os.path.join(basedir, experiment, 'Sessions', sess, 'fileconversion', 'RF*.h5')) # load the cfs for this sess cfs = np.loadtxt(os.path.join(basedir, experiment, 'Sessions', sess, 'cfs.txt'), ndmin = 1) # loop through blocks in this sess for pen in pens: absol, relat = os.path.split(pen) blockname = os.path.splitext(relat)[0] # get unit number from filename unitnum = np.int32(p.findall(relat))[0] # unit number ix = cfs[:, 0] == unitnum if ix.sum() > 0: cf_man = cfs[ix, 1][0] if verbose: print pen # load the RF block f = h5py.File(pen, 'r') spktimes = f['spktimes'].value # if not np.isnan(spktimes[0]): '''--------RF--------''' # load the RF block to get the RF rf = f['rf'].value; rast = f['rast'].value; spktimes = f['spktimes'].value; stimparams = f['stimID'].value; spktrials = f['spktrials'].value; coord = f['coord'].value; ntrials = f['rast'].shape[0]; f.close() # calculate the psth (spk/s*trial, normalized by number of trials psth = Spikes.calc_psth(rast, normed = True) # spk/s # baseline firing rate base_mean = psth[:stim_on].mean() # spk/s # response onset/offset psth_smoo = Spikes.exp_smoo(psth, tau = 0.003) resp_on, resp_off = Spikes.calc_on_off(psth_smoo, stim_on = stim_on) # rewindowed RF rf_rewin = RF.calc_rf(rast, stimparams, resp_on = resp_on + stim_on - 3, resp_off = resp_off + stim_on + 3, normed = True) # thresholded RF rf_thresh = rf_rewin.copy() rf_threshold = np.percentile(rf_thresh, 66)# upper quartile rf_peak = rf_rewin.max() rf_thresh[rf_thresh < rf_threshold] = 0 # find maximum RF cluster (rf_clust, clust_sizes) = RF.findmaxcluster(rf_thresh, cf = cf_man, include_diagonal = False) # if clust_sizes.max() < 10: # if it's a tiny RF, set it to nans # rf_clust = np.empty(rf_clust.shape) * np.nan rf_mask = rf_clust > 0 # find evoked psth ev_psth = RF.calc_evoked_psth(rast, stimparams, rf_mask) ev_psth_smoo = Spikes.exp_smoo(ev_psth, tau = 0.003) ev_resp_on, ev_resp_off = Spikes.calc_on_off(ev_psth_smoo, stim_on = stim_on) ev_mean = ev_psth[ev_resp_on : ev_resp_off].mean() # bandwidth and threshold bw, bw_lr, _, thresh = RF.calc_bw_cf_thresh(rf_mask) # center of mass com = RF.calc_rf_com(rf_clust) tip_top = np.max([thresh-2, 0]) tip_bottom = thresh com_tip = RF.calc_rf_com(rf_clust[tip_top:tip_bottom, :]) '''PLOT''' if pplot: rf1_ax = fig.add_subplot(221) rf2_ax = fig.add_subplot(222) psth1_ax = fig.add_subplot(223) psth2_ax = fig.add_subplot(224) RF.plot_RF(rf, bw_lr = bw_lr, thresh = thresh, cf = cf_man, ax = rf1_ax) RF.plot_RF(rf_clust, bw_lr = bw_lr, thresh = thresh, cf = cf_man, ax = rf2_ax) rf2_ax.axvline(com, color = 'g', ls = '--') rf2_ax.axvline(com_tip, color = 'b', ls = '--') psth1_ax.plot(psth_smoo) psth2_ax.plot(ev_psth_smoo) psth1_ax.axvline(resp_on+stim_on, color = 'r', ls = '--') psth1_ax.axvline(resp_off+stim_on, color = 'r', ls = '--') figpath = os.path.join(savedir, blockname + '.png') fig.savefig(figpath); fig.clf() '''PLOT''' DB.resize(DB.size + 1) DB[-1] = np.array((gen, exp, sess, unitnum, \ psth[:333], ev_psth[:333], \ rf, rf_clust, \ cf_man, com, com_tip, \ bw, bw_lr, thresh, coord, \ resp_on, resp_off, ev_resp_on, ev_resp_off, \ base_mean, ev_mean), dtype = dtype) # end unit loop np.savez(os.path.join(basedir, experiment, 'Sessions', sess, sess + '_RF.npz'), DB = DB) if verbose: print '\n'*4
def calc_psth_by_stim(rast, stimparams, bins = 0.001): nstimparams = stimparams.shape[1] usp = [] for i in range(nstimparams): usp.append(list(np.unique(stimparams[:, i]))) nparamlevels = np.empty(nstimparams, dtype = np.int32) for i in range(nstimparams): nparamlevels[i] = len(usp[i]) ntrials_per_stim = np.zeros(nparamlevels) ''' compute nbins : the number of bins bins : times (seconds) for each bin ''' dur_ms = rast.shape[1] # number of milliseconds t_ms = np.arange(dur_ms) # time indices in ms if type(bins) in (int, np.int32): nbins = bins bins = np.linspace(0, dur_ms, nbins) / 1000 bindur = bins[1]-bins[0] elif type(bins) in (float, np.float): bindur = bins bins = np.arange(0, (dur_ms/1000.)+bindur, bindur) nbins = bins.size-1 elif type(bins) is np.ndarray: nbins = bins.size-1 bindur = bins[1]-bins[0] assert np.unique(ntrials_per_stim).size == 1 ntrials = np.int32(ntrials_per_stim[0]) psth_shape = np.hstack((nparamlevels, nbins)) psth = np.zeros(psth_shape) combinations = [] combinations_ix = [] for i in itertools.product(*usp): combinations.append(i) combinations_ix_ = [] for j, i_ in enumerate(i): q = (np.array(usp[j])==i_).nonzero()[0][0] combinations_ix_.append(q) combinations_ix.append(combinations_ix_) for m, n in zip(combinations, combinations_ix): ix = RF.get_trials(stimparams, m) ntrials = ix.size spktimes_ = rast2spktimes(rast[ix, :]) psth_, edges_ = np.histogram(spktimes_, bins = bins) psth_ = (1./bindur) * (psth_.astype(float)/ntrials) psth[tuple(n)] = psth_ psth = psth.squeeze() return psth, usp
def transfer_forest_predict(train_model, test): predict_values = [rf.bagging_predict(train_model, row) for row in test] return predict_values
def load_model(): if request.method == 'POST': f = request.files['model'] f.save('rf_model.pkl') feature_dict = RF.prepred_model(target, to_drop, feature_importance) return feature_dict
import naiv import pandas as pd import json from flask import Flask,jsonify,request,render_template from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA df=prep.cleaning(pd.read_csv("dataset.csv")) #load the dataset lda = LDA(n_components = 8) #select 8 feature X=df["data"].drop("Churn",axis=1) Y=df["data"]["Churn"] lda.fit(X,Y) #fit the data to select the best feature clf_log=LOGR.LR(df["data"].copy(),"Churn",lda) #intiate object from logisticRegression Class clf_KNN=KNN.KNN(df["data"].copy(),"Churn",lda) #intiate object from K-NN Class clf_RF=RF.RF(df["data"].copy(),"Churn",lda) #intiate object from RandomForest Class clf_SVM=SVM.SV(df["data"].copy(),"Churn",lda) #intiate object from RandomForest Class clf_Dt=Dtree.DT(df["data"].copy(),"Churn",lda) #intiate object from RandomForest Class clf_naiv=naiv.RF(df["data"].copy(),"Churn",lda) #intiate object from RandomForest Class app = Flask(__name__) @app.route("/",methods=["GET","POST"]) def hello(): data={"data":pd.read_csv("dataset.csv").head(500).to_json()} return jsonify(data) ############################################################### PreProcessing ################################################### @app.route("/prep",methods=["GET","POST"]) def prp(): df=prep.cleaning(pd.read_csv("dataset.csv"))["data"]
rast = frr['rast'].value stimparams = frr['stimID'].value frr.close() # if not np.isnan(spktimes[0]): cf = cfs[cfs[:, 0]==unitnum, 1][0] cf_hz = ix2freq[20:][int(cf)] freqs = stimparams[:, 0] rrs = stimparams[:, 1] ufreqs = np.unique(freqs) urrs = np.unique(rrs) nrrs = urrs.size # now we determine which of the frequencies we played is closest to this neuron's CF thisfreq, thisfreq_ix, thisfreq_err = misc.closest(ufreqs, cf_hz, log = True) if np.abs(thisfreq_err) > 0.2: print 'No close frequency found!' thisfreq = ufreqs[thisfreq_ix] # isolate the parts of the raster for this frequency and build a psth for each RR ix = RF.get_trials(stimparams, np.array([thisfreq, np.nan])) thisrast = rast[ix, :1050] thisstims = stimparams[ix, :] psths, ustims = Spikes.calc_psth_by_stim(thisrast, thisstims) rrtf = RR.calc_rrtf_all(thisrast, thisstims, thisfreq, urrs) db.resize(db.size+1) db[-1] = np.array((gen, exp, sess, unitnum, cf_hz, rrtf, urrs), dtype = dtype) np.savez(savepath, db)
cursor.execute(create_experiment_query) cursor.execute(fetch_experiment_id) return cursor.fetchone()[0] def CreateConnection(): global cnx, cursor cnx = mysql.connector.connect(user='******', password='', host='localhost', database='cakephp') return cnx.cursor() if __name__ == "__main__": cursor = CreateConnection() exp_id = GetExperimentId() values = [] for i in range(iterations): start = timer() end = timer() values.append(end - start) final_result = mean(sorted(values)[1:-1]) print(final_result) cnx.commit() cnx.close() RF.run()
def main(): ######################################### ## Input file here #### # data input fSurface = open("kqQL91.txt", 'r') count = 0 Surface = dict() pointList = list() while True: temp = [float(i) for i in fSurface.readline().split()] if len(temp) == 0: break if len(temp) == 1: Surface[count] = pointList pointList.clear() count += 1 else: pointList.append(temp) fSurface.close() print(Surface) # props input fProps = open("props.txt", "r") props = json.loads(fProps.read()) fProps.close() ### props settings ############# # soil_props is permanently exist in the whole runtime !!! soil = RF.soil_props(float(props["soil_props"]["c"]), float(props["soil_props"]["phi"]), float(props["soil_props"]["gamma"])) ## # this instance use constant calculating props cal = RF.calculating_props(float(props["calculating_props"]["u"]), float(props["calculating_props"]["kW"]), float(props["calculating_props"]["A"]), float(props["calculating_props"]["D"]), float(props["calculating_props"]["omega"])) ## ###################################################################### #-------------------------------------------- FS_Surface = np.array([float(0)] * count) lamdaSurface = np.array([float(0)] * count) centerSurface = np.array([float(0)] * 2 * count).reshape(count, 2) RSurface = np.array([float(0)] * count) for i in tqdm(range(count), desc="Loading…", ascii=False, ncols=75): time.sleep(0.01) # Every Surface currData = Surface[i] # this in list type innerData = RF.depth_converter(currData) # this change to np arr ########################### DO SOMETHING TO GIVE FS AND BW ############## # Model setting #### numberCenter = innerData.shape[0] // 10 isLeft, first = RF.index_first(innerData) dx = innerData[1, 0] - innerData[0, 0] centerArr = RF.center_defining(innerData, numberCenter, first, isLeft, dx) RL = RF.radius_lines_defining(innerData, numberCenter, np.amin(innerData[:, 0]), dx) R = np.array([ RF.Radius(centerArr[numberCenter * i + 1, 1], RL[i]) for i in range(numberCenter) ]) FS = np.array([float(0)] * 2 * R.shape[0] * centerArr.shape[0]).reshape(centerArr.shape[0], R.shape[0], 2) ### ####### ## lamda f(x) Tolerance setting: ## fx = np.array([(k / innerData.shape[0]) * math.pi for k in range(1, innerData.shape[0])]) setting = RF.setting( fx, np.array([(i + 1) * (1 / numberCenter) for i in range(numberCenter)]), float(props["setting"]["Tolerance"])) ############ for j in range(centerArr.shape[0]): ### # slide_props depends on the current surface state so must be declared in every time loop to innerData alpha = np.array([0] * (innerData.shape[0] - 1)) beta = np.array([0] * (innerData.shape[0] - 1)) a = np.array([0] * (innerData.shape[0] - 1)) x = np.array([0] * (innerData.shape[0] - 1)) W = np.array([0] * (innerData.shape[0] - 1)) for jj in range(innerData.shape[0] - 1): alpha[jj] = math.atan( (innerData[jj + 1, 1] - innerData[jj, 1]) / dx) beta[jj] = dx / (math.cos(alpha[jj])) a[jj] = 0 x[jj] = abs(centerArr[j, 0] - innerData[jj, 0] - dx / 2) W[jj] = soil.gamma * (innerData[jj + 1, 1] - innerData[jj, 1]) * dx #?#?/#?# slide = RF.slide_props(alpha, beta, a, x, W) #### for k in range(R.shape[0]): FS[j, k] = RF.Calculating_FoS(innerData, setting.lamda, setting.fx, centerArr[j], R[k], soil, cal, slide, setting.Tolerance) FS_Surface[i] = np.amin(FS) index = np.where(FS == FS_Surface[i]) RSurface[i] = R[index[0, 1]] centerSurface[i] = centerArr[index[0, 0]] ######################### END DO SOMETHING ############## BW = np.array([float(0)] * count) for i in range(count): if FS_Surface[i] < float(props["setting"]["FSCritical"]): currData = Surface[i] # this in list type innerData = RF.depth_converter(currData) dx = innerData[1, 0] - innerData[0, 0] BW[i] = RF.Cal_BW(innerData, centerSurface[i], RSurface[i], dx) print(FS_Surface) print(BW)
def rr_make_contactsheets(): ''' loop through all the sessions and plot the rrtfs ''' fig = plt.figure(figsize = (30, 18)); txt_suptitle = fig.suptitle('') ax_cfrrtf = fig.add_axes((0.76, 0.76, 0.24, 0.23)); ax_cfvs = ax_cfrrtf.twinx(); ax_cfcircpsthall = fig.add_axes((0.62, (11/14.)-0.02, 0.1, (1/7.)+0.04), polar = True) ax_cfcircpsthall.set_xticklabels(''); ax_cfcircpsthall.set_yticklabels(''); ax_rf = fig.add_axes((0.67, 0.51, 0.33, 0.23)); ax_rfrast = fig.add_axes((0.67, 0.25, 0.33, 0.24)); ax_rfrast.set_xticklabels(''); ax_rfpsth = fig.add_axes((0.67, 0.01, 0.33, 0.24)); ax_cfrr = [fig.add_axes((0.03, 1-((i+1)/7.), 0.35, 1/7.)) for i in np.arange(nrrs)] ax_cfalignedpsth = [fig.add_axes((0.38, 1-((i+1)/7.), 0.17, 1/7.)) for i in np.arange(nrrs)] ax_cfcircpsth = [fig.add_axes((0.53, 1-((i+1)/7.), 0.1, 1/7.), polar = True) for i in np.arange(nrrs)] # ax_noiserr = [fig.add_subplot(nrrs, 3, i) for i in np.arange(1, 3*nrrs, 3)] for sessionpath in sessionpaths: session = os.path.split(sessionpath)[1] unitinfos = fileconversion.get_session_unitinfo(sessionpath, onlycomplete = ('RF', 'RR', 'VOC')) for unitkey in unitinfos.keys(): txt_suptitle.set_text('%s %s' % (session, unitkey)) unitinfo = unitinfos[unitkey] rf_ix = unitinfo['stimtype'].index('RF') f_rf = h5py.File(unitinfo['fpath'][rf_ix], 'r') rf_rast = f_rf['rast'].value rf_stimparams = f_rf['stimID'].value cf_ix = f_rf['cf'].value f_rf.close() cf = ix2freq[20:][int(cf_ix)] ''' calculate and plot RF, psth, and sorted raster''' rf = RF.calc_rf(rf_rast, rf_stimparams) rf_psth = Spikes.calc_psth(rf_rast) RF.plot_rf(rf, cf = cf_ix, axes_on = False, ax = ax_rf) # plot RF ax_rf.axvline(cf_ix, color = 'r', lw = 1.5) Spikes.plot_sorted_raster(rf_rast, rf_stimparams, ax = ax_rfrast) # plot raster ax_rfpsth.plot(t_rf, Spikes.exp_smoo(rf_psth, tau = 0.005)) # plot PSTH ''' calcualte and plot RRTFs for CF and noise stimuli ''' rr_ix = unitinfo['stimtype'].index('RR') f_rr = h5py.File(unitinfo['fpath'][rr_ix], 'r') rr_rast = f_rr['rast'].value rr_stimparams = f_rr['stimID'].value f_rr.close() # find the played CF rr_ufreqs = np.unique(rr_stimparams[:, 0]) urrs = np.unique(rr_stimparams[:, 1]) npips = (urrs*4).astype(int) rr_freq, rr_ufreq_ix, _ = misc.closest(rr_ufreqs, cf, log = True) ax_rf.axvline(RF.calc_freq2ix(rr_freq), color = 'g', lw = 1.5) # calculate the PSTHs for each repetition rate tmp = Spikes.calc_psth_by_stim(rr_rast, rr_stimparams) rr_cfpth = tmp[0][rr_ufreq_ix, :, :] # rrtf_noisepsth = tmp[0][0, :, :] # plot the aligned psths RR.aligned_psth_separate_all(rr_rast, rr_stimparams, rr_freq, npips, axs = ax_cfalignedpsth) [a.set_yticklabels('') for a in ax_cfalignedpsth] [a.set_xticklabels('') for a in ax_cfalignedpsth[:-1]] # plot circular psths r, V, theta = RR.circ_psth_all(rr_rast, rr_stimparams, rr_freq, npips, axs = ax_cfcircpsth) [a.set_yticklabels('') for a in ax_cfcircpsth] [a.set_xticklabels('') for a in ax_cfcircpsth] # plot all circular summed vector strengths ax_cfcircpsthall.plot(theta, V, '.-') [ax_cfcircpsthall.plot([0, th], [0, v], color = 'b', alpha = 1-(i/10.)) for i, (th, v) in enumerate(zip(theta, V))] # plot RRTF rrtf = RR.calc_rrtf_all(rr_rast, rr_stimparams, rr_freq, urrs, npips) ax_cfrrtf.plot(rrtf, '.-', ms = 10) ax_cfvs.plot(V*np.cos(theta), 'g.-', ms = 10) for tick in ax_cfvs.yaxis.get_major_ticks(): tick.set_pad(-5) tick.label2.set_horizontalalignment('right') # plot repetition rate PSTHs for i in xrange(nrrs): # RR.plot_rrtf(t_rrtf, rrtf_noisepsth[i, :], urrs[i], int(4*urrs[i]), onset = 0.05, duration = 0.025, ax = ax_noiserr[i]) RR.plot_rrtf(t_rrtf, rr_cfpth[i, :], urrs[i], int(4*urrs[i]), onset = 0.05, duration = 0.025, ax = ax_cfrr[i]) # ax_noiserr[0].set_title('Noise RRTFs') ax_cfrr[0].set_title('CF RRTFs (%.0f kHz)' % (cf/1000)) # [a.set_xlim(0, 4.5) for a in ax_noiserr] [a.set_xlim(0, 4.5) for a in ax_cfrr] misc.sameyaxis(ax_cfrr+ax_cfalignedpsth) figsavepath = os.path.join(studydir, 'Sheets', 'RRTFs', '%s_%s_RRTF.png' % (session, unitkey)) print figsavepath fig.savefig(figsavepath) [a.cla() for a in fig.get_axes()] # clear all axes
def main(): #prepare for data rawdata = pd.read_csv("mySpotify.csv") newdata = rawdata[[ 'track_id', 'duration_ms', 'popularity', 'acousticness', 'danceability', 'energy', 'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 'speechiness', 'tempo', 'time_signature', 'valence', 'parentCat' ]] #96153 ## hypothesis 1: t test : mean of energy between pop and non-pop print("hypothesis 1") target_pop = newdata[newdata.parentCat == "pop"].drop_duplicates( 'track_id') id_in_labels = target_pop['track_id'].values non_pop = newdata[~newdata.track_id.isin(id_in_labels)].drop_duplicates( 'track_id') print( stats.ttest_ind(target_pop["energy"].values, non_pop["energy"].values)) #hypothesis 2: linear regression acousticness~energy+loudness print("hypothesis 2") data_unique = newdata.drop_duplicates('track_id') X = data_unique[["energy", 'loudness']] X = sm.add_constant(X) y = data_unique["acousticness"] # Note the difference in argument order model = sm.OLS(y, X).fit() # Print out the statistics print(model.summary()) ## == groupby data using gernre == #there are 29 genres print("hypothesis 3") genres = newdata["parentCat"].unique() with open("genres_spotify.txt", "w") as f: f.write(str(genres)) print(genres) #record the classification result of diffrent classification methods on all genres precision_train = pd.DataFrame(0, index=genres, columns=[ "DecisisionTree", "KNN", "Naive Bayes", "SVM", "Random Forest" ]) precision_test = pd.DataFrame(0, index=genres, columns=[ "DecisisionTree", "KNN", "Naive Bayes", "SVM", "Random Forest" ]) with open("classification.txt", "w") as f: f.write("classification result \n") for i in np.arange(len(genres)): with open("classification.txt", "a") as f: f.write("\n classification result for " + genres[i] + "\n") #state the variable using string vars()["data_" + genres[i]] = newdata[newdata.parentCat == genres[i]] data_target = vars()["data_" + genres[i]] genre_target = genres[i] if genres[i] in ["party", "workout"]: data_target = data_target.sample(3000) ##fill with col index #features_selection=[] #y_selection=15 #Fisrt use data_Classical as the target data, then we randomly sample song tracks which dosen't #belong to Classical and integrate them into the data set # id_in_labels = data_target['track_id'].values data_out_of_label = newdata[~newdata.track_id. isin(id_in_labels)].drop_duplicates( 'track_id').sample(n=len(data_target)) #normalize dataframe columns #print(data_target.describe()) #print(data_out_of_label.describe()) #combine two data sets into one df = pd.concat([data_out_of_label, data_target]).sort_values(by=["track_id"]) df.parentCat[df.parentCat != genre_target] = 0 df.parentCat[df.parentCat == genre_target] = 1 ##normalize data from sklearn import preprocessing x = df.iloc[:, 1:15] #the last col is not included min_max_scaler = preprocessing.MinMaxScaler() x_scaled = min_max_scaler.fit_transform(x) df.iloc[:, 1:15] = x_scaled ##set training and test datasets; df_values = df.values X = np.array(df_values[:, 1:15].tolist()) Y = np.array(df_values[:, 15].tolist()) test_size = 0.30 seed = np.random.randint(1, 4) X_train, X_validate, Y_train, Y_validate = train_test_split( X, Y, test_size=test_size, random_state=seed) print("classification results for " + genres[i]) #### ===== ======# # Decison Tree ### ################## y_score, train_precision, test_precision = Basic_DecisionTree( X_train, Y_train, X_validate, Y_validate, genre_target) fpr_1, tpr_1, thresholds = roc_curve(Y_validate, y_score) roc_auc_1 = auc(fpr_1, tpr_1) precision_train.loc[genres[i], "DecisisionTree"] = train_precision precision_test.loc[genres[i], "DecisisionTree"] = test_precision #plot roc curve # fig = plt.figure() # plt.plot(fpr,tpr,label="ROC curve(area =%0.2f)" %roc_auc) # plt.plot([0,1],[0,1],"k--") # plt.xlim([0.0,1.0]) # plt.ylim([0.0,1.0]) # plt.xlabel("False positive ") # plt.ylabel("true positive") # plt.title("ROC of Decision Tree classifier for"+genre_target+",area=="+str(roc_auc)) # # fig.savefig('ROC DecisionTree'+genre_target+'.png') # plt.close() ####### ===== ======# # KNN ### ##################### y_score, train_precision, test_precision = KNN(X_train, Y_train, X_validate, Y_validate) fpr_2, tpr_2, thresholds = roc_curve(Y_validate, y_score) roc_auc_2 = auc(fpr_2, tpr_2) precision_train.loc[genres[i], "KNN"] = train_precision precision_test.loc[genres[i], "KNN"] = test_precision #### ===== ======# # Naive bayes ### ################## y_score, train_precision, test_precision = NVBayes( X_train, Y_train, X_validate, Y_validate) fpr_3, tpr_3, thresholds = roc_curve(Y_validate, y_score) roc_auc_3 = auc(fpr_3, tpr_3) precision_train.loc[genres[i], "Naive Bayes"] = train_precision precision_test.loc[genres[i], "Naive Bayes"] = test_precision # #### ===== ======# # Svm ### ################## y_score, train_precision, test_precision = mysvm( X_train, Y_train, X_validate, Y_validate) fpr_4, tpr_4, thresholds = roc_curve(Y_validate, y_score) roc_auc_4 = auc(fpr_4, tpr_4) precision_train.loc[genres[i], "SVM"] = train_precision precision_test.loc[genres[i], "SVM"] = test_precision #### ===== ======# # RandomForest ### ################## y_score, train_precision, test_precision = RF(X_train, Y_train, X_validate, Y_validate) fpr_5, tpr_5, thresholds = roc_curve(Y_validate, y_score) roc_auc_5 = auc(fpr_5, tpr_5) precision_train.loc[genres[i], "Random Forest"] = train_precision precision_test.loc[genres[i], "Random Forest"] = test_precision #plot roc curve fig = plt.figure() plt.plot(fpr_1, tpr_1, label="ROC curve for decisionTree(area =%0.2f)" % roc_auc_1, color='darkorange') plt.plot(fpr_2, tpr_2, label="ROC curve for KNN(area =%0.2f)" % roc_auc_2, color='aqua') plt.plot(fpr_3, tpr_3, label="ROC curve for Naive bayes(area =%0.2f)" % roc_auc_3, color='cornflowerblue') plt.plot(fpr_4, tpr_4, label="ROC curve for SVM (area =%0.2f)" % roc_auc_4, color='green') plt.plot(fpr_5, tpr_5, label="ROC curve for RandomForest(area =%0.2f)" % roc_auc_5, color='red') plt.plot([0, 1], [0, 1], "k--") plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.0]) plt.xlabel("False positive ") plt.ylabel("true positive") plt.title("ROC of classifiers for" + genre_target) plt.legend(loc="lower right") fig.savefig('ROC ' + genre_target + '.png') plt.close() precision_train.to_csv("precision_train.csv") precision_test.to_csv("precision_test.csv")