Exemplo n.º 1
0
def sub_all():
    print('training all...')
    RF_all1 = RF.RF_ALL('./data/Normalized_Data/testing_DataSet.csv',
                        './data/Normalized_Data/sub_DataSet.csv')
    RF_all2 = RF.RF_ALL('./data/Normalized_Data/EVAL_DataSet.csv',
                        './data/Normalized_Data/sub_DataSet.csv')
    gdbt_all1 = GDBT_all.GDBT_ALL(
        './data/Normalized_Data/training_DataSet.csv',
        './data/Normalized_Data/sub_DataSet.csv')
    gdbt_all2 = GDBT_all.GDBT_ALL('./data/Normalized_Data/testing_DataSet.csv',
                                  './data/Normalized_Data/sub_DataSet.csv')
    gdbt_all3 = GDBT_all.GDBT_ALL('./data/Normalized_Data/EVAL_DataSet.csv',
                                  './data/Normalized_Data/sub_DataSet.csv')
    svr_all = s.SVR_ALL('./data/Normalized_Data/EVAL_DataSet.csv',
                        './data/Normalized_Data/sub_DataSet.csv')

    fw = open(tmpFilename, 'w')
    for i in range(len(svr_all)):
        fw.write('%s,%s,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f\n' %
                (gdbt_all1[i][0],gdbt_all1[i][1],
                 float(RF_all1[i][2]),
                 float(RF_all2[i][2]),
                 float(gdbt_all1[i][2]),
                 float(gdbt_all2[i][2]),
                 float(gdbt_all3[i][2]),
                 float(svr_all[i][2]),
                 float(svr_all[i][3]),
                 float(svr_all[i][4]),
                ((float(RF_all2[i][2])+float(gdbt_all1[i][2]) + float(gdbt_all2[i][2])+\
                  float(svr_all[i][2]) + float(svr_all[i][3])+float(svr_all[i][4]))) / 6,
                cost_dict['all'][gdbt_all1[i][0]][0],
                cost_dict['all'][gdbt_all1[i][0]][1]
                ))
    fw.close()
Exemplo n.º 2
0
def sub_st():
    print('training ST...')
    RF_st1 = RF.RF_ST('./data/Normalized_Data/testing_DataSetST.csv',
                      './data/Normalized_Data/sub_DataSetST.csv')
    RF_st2 = RF.RF_ST('./data/Normalized_Data/EVAL_DataSetST.csv',
                      './data/Normalized_Data/sub_DataSetST.csv')
    gdbt_st1 = GDBT_ST.GDBT_ST('./data/Normalized_Data/testing_DataSetST.csv',
                               './data/Normalized_Data/sub_DataSetST.csv')
    gdbt_st2 = GDBT_ST.GDBT_ST('./data/Normalized_Data/EVAL_DataSetST.csv',
                               './data/Normalized_Data/sub_DataSetST.csv')
    gdbt_st3 = GDBT_ST.GDBT_ST('./data/Normalized_Data/EVAL_DataSetST.csv',
                               './data/Normalized_Data/sub_DataSetST.csv')
    svr_st = s.SVR_ST('./data/Normalized_Data/EVAL_DataSetST.csv',
                      './data/Normalized_Data/sub_DataSetST.csv')

    fw = open(tmpFilename, 'a')  #追加写入文件
    for i in range(len(svr_st)):

        fw.write('%s,%s,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f\n' %
                (gdbt_st1[i][0],gdbt_st1[i][1],
                 float(RF_st1[i][2]),
                 float(RF_st2[i][2]),
                 float(gdbt_st1[i][2]),
                 float(gdbt_st2[i][2]),
                 float(gdbt_st3[i][2]),
                 float(svr_st[i][2]),
                 float(svr_st[i][3]),
                 float(svr_st[i][4]),
                 (float(RF_st2[i][2])+float(gdbt_st1[i][2]) + float(gdbt_st2[i][2]) + \
                    float(svr_st[i][2])+float(svr_st[i][3])+float(svr_st[i][4])) / 6,
                  cost_dict[gdbt_st1[i][1]][gdbt_st1[i][0]][0],
                  cost_dict[gdbt_st1[i][1]][gdbt_st1[i][0]][1],
                  ))
    fw.close()
Exemplo n.º 3
0
def calc_rast_by_stim(rast, stimparams, bins = None):

	
	usp1 = np.unique(stimparams[:, 0])
	usp2 = np.unique(stimparams[:, 1])
		
	nstimparams = stimparams.shape[1]
	ustimparams = [np.unique(stimparams[:, 0]), np.unique(stimparams[:, 1])]
	nparamlevels = np.array([usp1.size, usp2.size])

	nbins = rast.shape[1]
	
	ntrials_per_stim = np.zeros((nparamlevels[0], nparamlevels[1]))
	for i in range(nparamlevels[0]):
		for j in range(nparamlevels[1]):
			ntrials_per_stim[i, j] = RF.get_trials(stimparams, np.array([usp1[i], usp2[j]])).size
	
	if np.unique(ntrials_per_stim).size > 1:
		print 'Different numbers of trials per stimulus!'
	elif np.unique(ntrials_per_stim).size == 1:
		ntrials = np.int32(ntrials_per_stim[0])
	
	rast2 = np.zeros((ntrials, nparamlevels[0], nparamlevels[1], nbins))
	for i in range(nparamlevels[0]):
		for j_ in range(nparamlevels[1]):
			
			ix = RF.get_trials(stimparams, (usp1[i], usp2[j_]))
			rast2[:, i, j_, :] = rast[ix, :]
				
	
	return rast2, ustimparams
Exemplo n.º 4
0
    def __init__(self, symb, predlen, cat='RL', kwargs=None):

        self.symb = symb
        self.predlen = predlen
        self.kwargs = kwargs
        self.cat = cat

        if cat == 'RF':
            if kwargs != None:
                self.learner = RF.RandomForest(**kwargs)
            else:
                self.learner = RF.RandomForest()

        elif cat == 'KNN':
            if kwargs != None:
                self.learner = KNN.KNN(**kwargs)
            else:
                self.learner = KNN.KNN()

        elif cat == 'SVM':
            if kwargs != None:
                self.learner = SVM.SVM(**kwargs)
            else:
                self.learner = SVM.SVM()

        elif cat == 'NN':
            if kwargs != None:
                self.learner = NN.NN(**kwargs)
            else:
                self.learner = NN.NN()
Exemplo n.º 5
0
Arquivo: RF.py Projeto: r-b-g-b/Lab
def plot_latency_by_stim(rast, stimparams):
	
	rf = calc_rf(rast, stimparams)
	stim_peak_times = calc_latency_by_stim(rast, stimparams)
	
	x = stim_peak_times.copy()
	x[x>80] = 80
	x[x<50] = 50

	fig = plt.figure()
	ax1 = fig.add_subplot(121)
	ax2 = fig.add_subplot(122)
	RF.plot_rf(rf, ax = ax1)
	RF.plot_rf(x.T, ax = ax2, cmap = 'jet')
Exemplo n.º 6
0
Arquivo: RF.py Projeto: r-b-g-b/Lab
def plot_fake_strf(rast, stimparams):
	
	fig = plt.figure(figsize = (15, 10))
	ax1 = fig.add_axes([0.05, 0.2, 0.9, 0.75])
	ax2 = fig.add_axes([0.05, 0.05, 0.9, 0.15])
	
	fake_strf = calc_fake_strf(rast, stimparams)
	RF.plot_rf(fake_strf, ax = ax1, axes_on = False)
	psth = fake_strf.mean(0)
	psth_smoo = Spikes.hamming_smoo(psth, windlen = 5)
	ax2.plot(psth_smoo)
	ax2.set_xlim([0, rast.shape[1]])
	plt.draw(); plt.show();

	return fig
Exemplo n.º 7
0
Arquivo: RF.py Projeto: r-b-g-b/Lab
def add_bf_man(experiment):
	'''
	Runs through all of the units and allows you to click on the CF. Clicking a negative x-coordinate
	results in the unit being discarded (renamed to _RR###.h5)
	For valid units, the CF and threshold are saved in a text file called cfs.txt
	'''

	rf_blocks = glob.glob(os.path.join(studydir, 'Sessions', experiment, 'fileconversion', '*RF*.h5'))
	cfs = []
	badunits = []
	for rf_block in rf_blocks:
				
		fig = plt.figure();
		ax = fig.add_subplot(111);
		
		print rf_block
		f = h5py.File(rf_block, 'r')

		blockname = os.path.splitext(os.path.split(rf_block)[1])[0]
		pennum = np.int32(blockname.split('RF')[1])
	
		rf = f['rf'].value
		f.close()
		
		RF.plot_rf(rf, ax = ax)
		xlim = ax.get_xlim()
		ylim = ax.get_ylim()
		ax.set_xlim([xlim[0]-2, xlim[1]])
		ax.set_ylim([ylim[0], ylim[1]-1])
		ax.set_title(pennum)
		plt.show()
		xy = plt.ginput(1, timeout = -1)[0]
		xy = np.array(xy)
		if np.prod(xy)>0:
			xy = np.array(xy)
			cfs.append(np.hstack((pennum, xy)))
		else:
			badunits.append(pennum)

		plt.close(fig)


		savepath = os.path.join(studydir, 'Sessions', experiment, 'cfs.txt')
		np.savetxt(savepath, cfs)

		# print badunits

	plt.close(fig)
Exemplo n.º 8
0
def predict():
    if request.method == 'POST':
        f = request.files['model']
        f.save('rf_model.pkl')
        pred_input = np.matrix(request.form['pred_input'])
        pred_result = RF.predict(pred_input)
        return pred_result
Exemplo n.º 9
0
def optimize(all_data, target_data, feature_mtl, value_mtl):
    target = np.array(target_data)  #np.ndarray()
    target = target.tolist()  #list
    #target_data=list(target_data)

    left, right = rf.data_spilt(all_data, int(feature_mtl), value_mtl)
    left_t, right_t = rf.data_spilt(target, int(feature_mtl), value_mtl, True)
    all_label_left = list(float(row[-1]) for row in left)
    target_label_left = list(float(row[-1]) for row in left_t)
    all_label_right = list(float(row[-1]) for row in right)
    target_label_right = list(float(row[-1]) for row in right_t)

    theta = np.std(target_data.iloc[:, feature_mtl])

    divergence_gain = -999

    value_str = value_mtl
    i = value_mtl - theta

    while i <= (value_mtl + theta):
        left_n, right_n = rf.data_spilt(all_data, feature_mtl, i)
        left_n_t, right_n_t = rf.data_spilt(target, feature_mtl, i)

        all_n_left = list(float(row[-1]) for row in left_n)
        target_n_left = list(float(row[-1]) for row in left_n_t)
        all_n_right = list(float(row[-1]) for row in right_n)
        target_n_right = list(float(row[-1]) for row in right_n_t)
        loss_new = loss(all_label_left, all_label_right, target_label_left,
                        target_label_right)
        loss_old = loss(all_n_left, all_n_right, target_n_left, target_n_right)

        if loss_new >= loss_old:
            weight_left = len(target_label_left) / (len(target_label_left) +
                                                    len(target_label_right))
            weight_right = len(target_label_right) / (len(target_label_left) +
                                                      len(target_label_right))
            divergence_tmp = 1 - weight_left * js_gain(
                all_label_left, all_n_left) - weight_right * js_gain(
                    all_label_right, all_n_right)

            if divergence_tmp >= divergence_gain:
                divergence_gain = divergence_tmp
                value_str = i

        i += 0.05 * theta

    return value_str
Exemplo n.º 10
0
def train_ST():
    RF_st1 = RF.RF_ST_train('./data/testingDataSetST1.csv',
                            './data/VALIDATION_DataSetST1.csv')
    RF_st2 = RF.RF_ST_train('./data/EVAL_DataSetST1.csv',
                            './data/VALIDATION_DataSetST1.csv')
    gdbt_st1 = GDBT_ST.GDBT_ST_train('./data/EVAL_DataSetST1.csv',
                                     './data/VALIDATION_DataSetST1.csv')
    gdbt_st2 = GDBT_ST.GDBT_ST_train('./data/testingDataSetST1.csv',
                                     './data/VALIDATION_DataSetST1.csv')
    gdbt_st3 = GDBT_ST.GDBT_ST_train('./data/VALIDATION_DataSetST1.csv',
                                     './data/VALIDATION_DataSetST1.csv')
    svr_st = s.SVR_ST_train()

    #     # 3个GBDT,1个sSVR再做一次SVR
    #     store = ['1', '2', '3', '4', '5'];pred_y = []
    #     for st in store:
    #         X = [];Y = []
    #         for i in range(len(gdbt_st1)):
    #             if gdbt_st1[i][1] != st: continue
    #             X.append((RF_st1[i][2], gdbt_st1[i][2], gdbt_st2[i][2], svr_st[i][2]))
    #             Y.append(gdbt_st1[i][3])
    #         svr = SVR(kernel='linear', epsilon=0.5, C=1).fit(X, Y)
    #         svr_res = svr.predict(X)
    #         for x in svr_res:
    #             pred_y.append(x)

    fw = open(filename, 'a')
    for i in range(len(gdbt_st1)):
        fw.write('%s,%s,%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%.2f\n' %
                 (gdbt_st1[i][0], gdbt_st1[i][1],
                  float(RF_st1[i][2]),
                  float(RF_st2[i][2]),
                  float(gdbt_st1[i][2]),
                  float(gdbt_st2[i][2]),
                  float(gdbt_st3[i][2]),
                  float(svr_st[i][2]),
                  float(svr_st[i][3]),
                  float(svr_st[i][4]),
                  (float(RF_st2[i][2])+float(gdbt_st1[i][2]) + float(gdbt_st2[i][2]) + \
                        float(svr_st[i][2])+float(svr_st[i][3])+float(svr_st[i][4])) / 6,
                  cost_dict[gdbt_st1[i][1]][gdbt_st1[i][0]][0],
                  cost_dict[gdbt_st1[i][1]][gdbt_st1[i][0]][1],
                  float(gdbt_st1[i][3])
                  )
                 )
    fw.close()
Exemplo n.º 11
0
def time_tree(newick):
    tree = Tree(newick)
    t0 = time.time()
    sum3_dt = RF.polynomial_sum3_performance(tree,
                                             tree.get_leaves().__len__() +
                                             1)[1]
    tf = time.time()
    return tf - t0, sum3_dt
Exemplo n.º 12
0
Arquivo: RF.py Projeto: r-b-g-b/Lab
def make_spk_mask(spktrials, stimparams, param):
	
	ix = RF.get_trials(stimparams, param)
	spk_mask = np.zeros(spktrials.size, dtype = np.bool)
	for ix_ in ix:
		spk_mask[spktrials == ix_] = True
		
	return spk_mask
Exemplo n.º 13
0
def RandomForestTest(pca_option):

    import RF

    RF.RandomForestSimulation(
        RF.rf, processing.linear_pca, processing.overall_training_data, pca_option)

    processing.final_validation = np.array(processing.final_validation)

    FV_features = []
    FV_labels = []

    FV_features, FV_labels = processing.createFeatures_Labels(
        processing.final_validation)

    FV_features_data = None
    FV_labels_data = None

    FV_features_data, FV_labels_data = processing.convertToDataFrame(
        FV_features, FV_labels, processing.column_titles)

    global RF_final_predictions
    if(pca_option == 'yes' or pca_option == 'both'):

        transformed_FV = processing.linear_pca.transform(FV_features_data)

        final_predictions = RF.rf.predict(transformed_FV)
        RF_final_predictions = final_predictions

        accuracy = metrics.accuracy_score(final_predictions, FV_labels)
        precision = metrics.precision_score(
            FV_labels, final_predictions, average='micro')
        recall = metrics.recall_score(
            FV_labels, final_predictions, average='micro')

        print('RANDOM FOREST MODEL FINAL TEST DATA ACCURACY: ', 100 * accuracy)
        print('RANDOM FOREST MODEL FINAL TEST DATA PRECISION: ', 100 * precision)
        print('RANDOM FOREST MODEL FINAL TEST DATA RECALL: ', 100 * recall)
        print()

        return accuracy, precision, recall
    else:

        final_predictions = RF.rf.predict(FV_features_data)
        RF_final_predictions = final_predictions

        accuracy = metrics.accuracy_score(final_predictions, FV_labels)
        precision = metrics.precision_score(
            FV_labels, final_predictions, average='micro')
        recall = metrics.recall_score(
            FV_labels, final_predictions, average='micro')

        print('RANDOM FOREST MODEL FINAL TEST DATA ACCURACY: ', 100 * accuracy)
        print('RANDOM FOREST MODEL FINAL TEST DATA PRECISION: ', 100 * precision)
        print('RANDOM FOREST MODEL FINAL TEST DATA RECALL: ', 100 * recall)
        print()

        return accuracy, precision, recall
Exemplo n.º 14
0
def main(args):
    SVM = args.SVM
    RF = args.RF
    if SVM:
        import SVM
        SVM.SVM(args)
    if RF:
        import RF
        RF.RF(args)
Exemplo n.º 15
0
def weight_loss(w_m, w_r, target, feature_mtl, value_mtl, value_str):
    target = np.array(target) #np.ndarray()
    target = target.tolist() #list
    left_m, right_m = rf.data_spilt(target, feature_mtl, value_mtl)
    loss_m = rf.spilt_loss(left_m, right_m)

    left_r, right_r = rf.data_spilt(target, feature_mtl, value_str)
    loss_r = rf.spilt_loss(left_r, right_r)

    beta = 0.8

    w_m_t = w_m * (beta ** loss_m)
    w_r_t = w_m * (beta ** loss_r)

    weight_m = w_m_t / (w_m_t + w_r_t)
    weight_r = w_r_t / (w_m_t + w_r_t)

    return weight_m, weight_r
Exemplo n.º 16
0
def rf_predict_live_data(company: str, verbose=False, scaled=False):
    features, garbage, labels, garbage, garbage, garbage = get_features(company, train_size=1.00, scaled=scaled)
    live_features, live_labels, live_prices, live_times = get_live_features(company)
    true_labels, RF_predictions = RF.predict(features, labels, live_features, live_labels)
    accuracy = accuracy_score(true_labels, RF_predictions)
    if verbose:
        print("Random Forest Accuracy: " + str(accuracy * 100) + "%")
        prediction_distribution(RF_predictions, true_labels)
    return live_prices, live_times, RF_predictions, accuracy
Exemplo n.º 17
0
Arquivo: RF.py Projeto: r-b-g-b/Lab
def make_trial_mask(stimparams, param):
		
	ntrials = stimparams.shape[0]
	ix = RF.get_trials(stimparams, param)
	trial_mask = np.zeros(ntrials, dtype = np.bool)
	for ix_ in ix:
		trial_mask[ix_] = True
		
	return trial_mask
Exemplo n.º 18
0
def train_all():
    RF_all1 = RF.RF_ALL_train('./data/testingDataSet1.csv',
                              './data/VALIDATION_DataSet1.csv')
    RF_all2 = RF.RF_ALL_train('./data/EVAL_DataSet1.csv',
                              './data/VALIDATION_DataSet1.csv')
    gdbt_all1 = GDBT_all.GDBT_ALL_train('./data/VALIDATION_DataSet1.csv',
                                        './data/VALIDATION_DataSet1.csv')
    gdbt_all2 = GDBT_all.GDBT_ALL_train('./data/testingDataSet1.csv',
                                        './data/VALIDATION_DataSet1.csv')
    gdbt_all3 = GDBT_all.GDBT_ALL_train('./data/EVAL_DataSet1.csv',
                                        './data/VALIDATION_DataSet1.csv')
    svr_all = s.SVR_ALL_train()

    # 3个GBDTt,1个sSVR再做一次SVR
    #     X = [];Y = []
    #     for i in range(len(gdbt_all1)):
    #         X.append((RF_all1[i][2], gdbt_all1[i][2], gdbt_all2[i][2], svr_all[i][2],svr_all[i][3]))
    #         Y.append(gdbt_all1[i][3])
    #
    #     svr = SVR(kernel='linear', epsilon=2, C=1).fit(X, Y)
    #     print(svr.coef_)
    #     pred_y = svr.fit(X, Y).predict(X)

    fw = open(filename, 'w')
    for i in range(len(gdbt_all1)):
        fw.write('%s,%s,%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%.2f\n' %
                 (gdbt_all1[i][0], gdbt_all1[i][1],
                  float(RF_all1[i][2]),
                  float(RF_all2[i][2]),
                  float(gdbt_all1[i][2]),
                  float(gdbt_all2[i][2]),
                  float(gdbt_all3[i][2]),
                  float(svr_all[i][2]),
                  float(svr_all[i][3]), #前14天的值
                  float(svr_all[i][4]),
                  ((float(RF_all1[i][2])+float(gdbt_all1[i][2]) + float(gdbt_all2[i][2])+ \
                        float(svr_all[i][2]) + float(svr_all[i][3])+float(svr_all[i][4]))) / 6,
                  cost_dict['all'][gdbt_all1[i][0]][0],
                  cost_dict['all'][gdbt_all1[i][0]][1],
                  float(gdbt_all1[i][3])
                  )
                 )

    fw.close()
Exemplo n.º 19
0
Arquivo: RF.py Projeto: r-b-g-b/Lab
def make_spk_and_trial_masks(spktrials, stimparams, param):
	ntrials = stimparams.shape[0]
	ix = RF.get_trials(stimparams, param)
	trial_mask = np.zeros(ntrials, dtype = np.bool)
	spk_mask = np.zeros(spktrials.size, dtype = np.bool)
	for ix_ in ix:
		spk_mask[spktrials == ix_] = True
		trial_mask[ix_] = True
		
	return spk_mask, trial_mask
Exemplo n.º 20
0
def transferForest(train, targetID, n_features, max_depth, min_size, n_trees, feature_name):
    transfer_trees =[]
    
    train = pd.DataFrame(train, columns = feature_name)
   
    group_list = list(train.iloc[:,0]) # The default first column is the primary base station identifier
    group_list = set(group_list)
    
    source_list = []
    for group in group_list:
        if group != targetID:
            source_list.append(train[train['BS_ID'] == group])
    target = train[train['BS_ID'] == float(targetID)] 
   
    
    feature_list = []
    value_list = []

    w_m = 0.5
    w_r = 0.5
    
    for i in range(n_trees):
        normal_tree = rf.get_best_spilt_candidate(train.values.tolist(), n_features)
        
        for key in normal_tree:
            feature_list.append(key)
            value_list.append(normal_tree[key])
        
        feature_mtl, value_mtl = mtl.multi_loss(source_list, target, feature_list, value_list)
        
        value_str = strut.optimize(train.values.tolist(), target, feature_mtl, value_mtl)
        
        split_value = w_m * value_mtl + w_r * value_str
       
        w_m, w_r = weight_loss(w_m, w_r, target, feature_mtl, value_mtl, value_str)

        tree = rf.build_transfer_tree(train, n_features, feature_mtl, split_value, max_depth, min_size)
        
        transfer_trees.append(tree)
    
    return transfer_trees
Exemplo n.º 21
0
def remove_trials(spktimes, spktrials, spkwaveform, lfp, stimID, remID):
	
	spk_mask, trial_mask = RF.make_spk_and_trial_masks(spktrials, stimID, remID)
	
	if spk_mask.sum()>0:
		spktimes = spktimes[~spk_mask]
		spktrials = spktrials[~spk_mask]
		spkwaveform = spkwaveform[~spk_mask, :]
		lfp = lfp[~trial_mask, :]
		stimID = stimID[~trial_mask, :]
	
	return spktimes, spktrials, spkwaveform, lfp, stimID
Exemplo n.º 22
0
def rf_predict(company: str, verbose=False, train_size=0.80, scaled=False):
    start = time.time()
    X_train, X_test, y_train, y_test, prices, times = get_features(company, train_size=train_size, scaled=scaled)
    end = time.time()
    print('Load time: ' + str(end - start))
    true_labels, RF_predictions = RF.predict(X_train, y_train, X_test, y_test)
    accuracy = accuracy_score(true_labels, RF_predictions)
    if verbose:
        print("Random Forest Accuracy: " + str(accuracy * 100) + "%")
        prediction_distribution(RF_predictions, true_labels)

    return prices, times, RF_predictions, accuracy
Exemplo n.º 23
0
Arquivo: RF.py Projeto: r-b-g-b/Lab
def calc_rf_psth(rast, stimparams):
	
	ufreqs = np.unique(stimparams[:, 0])
	uattens = np.unique(stimparams[:, 1])
	nfreqs, nattens = ufreqs.size, uattens.size
	rf_psth = np.empty((nattens, nfreqs, rast.shape[1]))
	for i in range(nfreqs):
		for j in range(nattens):
			ix = RF.get_trials(stimparams, np.array([ufreqs[i], uattens[j]]))
			rf_psth[j, i, :] = rast[ix, :].mean(0)
	
	return rf_psth
Exemplo n.º 24
0
def maps_by_group(gens = ['wt', 'ko'], exps = ['nai', 'exp', 'w1', 'w2', 'w3']):
	
	for i, (gen, exp) in enumerate(itertools.product(gens, exps)):
		sesss = glob.glob(os.path.join(basedir, 'Sessions', gen+'_'+exp+'*'))
		sesss = [os.path.basename(s) for s in sesss]
		nsess = len(sesss)
		if nsess > 0:
			fig, ax = RF.look_at_map(sesss)
			fig.savefig(os.path.join(basedir, 'maps', '%s_%s' % (gen, exp)))
			plt.close('all')


	return
Exemplo n.º 25
0
def train():
    global target
    global to_drop
    global feature_importance

    if request.method == 'POST':
        f = request.files['data']
        f.save('data.csv')
        target = request.form['target'].strip()
        ID = request.form['ID'].strip()
        to_drop = [target, ID]
        feature_importance, score = RF.first_phase(target, ID, to_drop)
        model = send_file('rf_model.pkl')
        return model
Exemplo n.º 26
0
Arquivo: RR.py Projeto: r-b-g-b/Lab
def circ_psth_all(rast, stimparams, freq, npips, onset = 0.05, bins = 20, color = 'b', remove_first = False, axs = None):
	'''
	Input:
	Output:
		r :  the mean vector length for each repetition rate
		V : the summed vector length for each repetition rate
		theta : the mean vector angle for each repetition rate
	'''
	urrs = np.unique(stimparams[:, 1])
	nrrs = urrs.size
	ix = RF.get_trials(stimparams, (freq, np.nan))
	rast_ = rast[ix, :]
	stimparams_ = stimparams[ix, :]
	r = []; V = []; theta = []
	
	for i in xrange(nrrs):
		ix = RF.get_trials(stimparams_, (np.nan, urrs[i]))
		r_, V_, theta_ = circ_psth(rast_[ix, :], urrs[i], npips[i], onset = onset, bins = bins, color = color, remove_first = remove_first, ax = axs[i])
		r.append(r_); V.append(V_); theta.append(theta_)
	
	misc.sameyaxis(axs)

	return np.array(r), np.array(V), np.array(theta)
Exemplo n.º 27
0
Arquivo: RR.py Projeto: r-b-g-b/Lab
def calc_vs_all(rast, stimparams, ufreqs, urrs, npips = 6, onset = 0.05):
	
	ufreqs = np.asarray(ufreqs)
	urrs = np.asarray(urrs)
	nfreqs = ufreqs.size
	nrrs = urrs.size
	vs = np.empty((nfreqs, nrrs))*np.nan
	vs_p = np.empty((nfreqs, nrrs))*np.nan
	for f in range(nfreqs):
		for r in range(nrrs):
			ix = RF.get_trials(stimparams, [ufreqs[f], urrs[r]])
			rast_ = rast[ix, :]
			vs[f, r], vs_p[f, r] = calc_vs(rast_, urrs[r], npips, onset)
		
	return vs, vs_p
Exemplo n.º 28
0
Arquivo: RR.py Projeto: r-b-g-b/Lab
def calc_rrtf_lfp_all(lfp, lfp_t, stimparams, freq, rrs, onset = 0.05):

	nrrs = rrs.size
	rrtf_lfp = np.empty(nrrs) * np.nan
	trial_ix = stimparams[:, 0] == freq
	pip_start = 0.005 + onset
	pip_end = pip_start + 0.02
	time_ix = np.vstack((pip_start<lfp_t, lfp_t<pip_end)).all(0)

	lfp_mag_1st = (lfp[trial_ix, :][:, time_ix]).mean(0).min()
	print lfp_mag_1st
	for i, rr in enumerate(rrs):
		lfp_ = lfp[RF.get_trials(stimparams, np.array([freq, rr])), :]
		rrtf_lfp[i] = calc_rrtf_lfp(lfp_, lfp_t, rr, lfp_mag_1st)
	
	return rrtf_lfp
Exemplo n.º 29
0
Arquivo: EEG.py Projeto: r-b-g-b/Lab
def calc_lfp_by_stim(rast, stimparams):

	nstimparams = stimparams.shape[1]
	
	usp = []
	for i in range(nstimparams):	
		usp.append(list(np.unique(stimparams[:, i])))

	nparamlevels = np.empty(nstimparams, dtype = np.int32)
	for i in range(nstimparams):
		nparamlevels[i] = len(usp[i])

	ntrials_per_stim = np.zeros(nparamlevels)

	'''
	compute
	nbins	:	the number of bins
	'''
	dur_ms = rast.shape[1] # number of milliseconds
	t_ms = np.arange(dur_ms) # time indices in ms
	nbins = rast.shape[-1]
	assert np.unique(ntrials_per_stim).size == 1

	ntrials = np.int32(ntrials_per_stim[0])
	
	psth_shape = np.hstack((nparamlevels, nbins))
	psth = np.zeros(psth_shape)
	
	combinations = []
	combinations_ix = []
	for i in itertools.product(*usp):
		combinations.append(i)
		combinations_ix_ = []
		for j, i_ in enumerate(i):
			q = (np.array(usp[j])==i_).nonzero()[0][0]
			combinations_ix_.append(q)
		combinations_ix.append(combinations_ix_)
		
	for m, n in zip(combinations, combinations_ix):
		ix = RF.get_trials(stimparams, m)
		ntrials = ix.size
		lfp_ = rast[ix, :]
		psth[tuple(n)] = lfp_.sum(0)
				
	return psth, usp
Exemplo n.º 30
0
Arquivo: RR.py Projeto: r-b-g-b/Lab
def calc_rrtf_all(rast, stimparams, freq, urrs, npips = 6, onset = 0.05, norm = True):
	'''
	Takes the full block raster and stimparams, finds the response to the first tone by filtering the rast to include only responses to the given frequency (but for all repetition rates), then it filters the rast to only the given freq/rr pair and passes that	to calc_rrtf, along with the first tone response for all repetition rates

	Input:
		rast : full block raster
		stimparams : full block stimulus parameters
		freq : in Hz, the frequency played to this unit
		urrs : sorted list (lo-hi) of repetition rates played
		npips : list of the number of pips at each rate, or scalar if each rate had the same number of pips
		onset : in seconds, the onset time for the first pip
		norm : for the response to each pip, subtract the pre-pip response

	Output:
		rrtf : n-length vector, where n is the number of repetition rates
	'''
	if type(npips) is int:
		npips = [npips]*len(urrs)

	nrrs = urrs.size
	rrtf = np.empty(nrrs) * np.nan

	# get raster subset for this frequency
	ix = stimparams[:, 0] == freq

	# response onset and offset (5 - 25 ms after stimulus onset)
	resp_start = np.int32((0.005 + onset) * 1000)
	resp_end = resp_start + 20
	nspks_1st = (rast[ix, resp_start:resp_end]).mean() # spikes per millisecond

	# normalize by pre-pip baseline (-20 - 0 ms before stimulus onset)
	if norm:
		pip_end_pre = resp_start - 5
		pip_start_pre = pip_end_pre - 20
		nspks_1st = nspks_1st - (rast[ix, pip_start_pre: pip_end_pre]).mean()

	# loop through repetition rates, get raster subset, and calculate RRTF
	for i, (rr, npip) in enumerate(zip(urrs, npips)):
		rast_ = rast[RF.get_trials(stimparams, np.array([freq, rr])), :]
		rrtf[i] = calc_rrtf(rast_, rr, nspks_1st, npips = npip, norm = norm)
	
	return rrtf
Exemplo n.º 31
0
Arquivo: RR.py Projeto: r-b-g-b/Lab
def aligned_psth_separate_all(rast, stimparams, freq, npips, onset = 0.05, axs = None):
	'''
	Input:
		rast : full block raster
		stimparams : full block stimulus parameters
		freq : in Hz, the frequency played to this neuron
		npips : number of pips for each repetition rate
		onset : in ms, onset time of first pip
		axs : a list of axis to which the output will be displayed
	'''
	urrs = np.unique(stimparams[:, 1])
	nrrs = urrs.size
	if axs is None:
		axs = [None]*nrrs

	aligned_psths = []
	for rr, npip, ax in zip(urrs, npips, axs):
		ix = RF.get_trials(stimparams, (freq, rr))
		psth = 1000*rast[ix, :].mean(0)
		aligned_psths.append(aligned_psth_separate(psth, rr, npip, onset = onset, ax = ax))

	return aligned_psths
Exemplo n.º 32
0
Arquivo: EEG.py Projeto: r-b-g-b/Lab
def export_unit(Data0):
	
	# number of time bins to include in the LFP array
	nlfpsamp = 0
	for tt, trial in enumerate(Data0['trial'][0][0][0]):

		thislfpsamp = trial['LFP'].shape[1]
		if thislfpsamp>nlfpsamp:
			nlfpsamp = thislfpsamp
	
	ntrials = Data0['trial'][0][0][0].size # find number of trials
	nstimID = Data0['trial'][0][0][0][0]['Epoch_Value'][0].size
	
	# initialze LFP
	lfp = np.ndarray((2, ntrials, nlfpsamp), dtype = 'float32')
	# initialize frequency and attenuation IDs
	stimID = np.ndarray((ntrials, nstimID), dtype = 'float32')

	for tt in range(ntrials):
		trial = Data0['trial'][0][0][0][tt]

		thisstimID = np.float32(trial['Epoch_Value'][0])
		# get the LFP for this trial and pad it with nans so it can fit in a matrix (since some of the trials have +/-1 data point for LFP)
		for cc in range(2):
			lfpchannel = trial['LFP'][cc]
			lfp[cc, tt, :len(lfpchannel)] = lfpchannel
			
		# add to Epoch_Value
		stimID[tt, :] = thisstimID

	remID = np.array([0., 0.])
	trial_mask = RF.make_trial_mask(stimID, remID)
	lfp = lfp[:, ~trial_mask, :]
	stimID = stimID[~trial_mask, :]

	return lfp, stimID
Exemplo n.º 33
0
data = json.load(fp)

fp = open('./data/equations.json')
equations = json.load(fp)

for iteration in range(10):
    print('Iteration ', iteration)
    twentyFive = int(0.25 * len(data))

    shuffle(data)
    test = data[:twentyFive]
    train = data[twentyFive:]

    for i in range(10):
        print('-- SubIteration ', i)
        predict = RF.train(train, (i + 1) * 100)
        predict_SVM = SVM.train(train, 10**(i - 4))

        right = 0
        right_SVM = 0

        for datapoint in test:
            predicted = predict(datapoint['question'], False)
            predicted_SVM = predict_SVM(datapoint['question'], False)
            if checkSolution(predicted, datapoint['answers']):
                right += 1
            if checkSolution(predicted_SVM, datapoint['answers']):
                right_SVM += 1

        print('RF: ', right / twentyFive, '\t SVM: ', right_SVM / twentyFive)
Exemplo n.º 34
0
# coding: utf-8

import csv
from math import sqrt
import RF as rf
import numpy as np
import pandas as pd
import transferForest as trans

if __name__ == '__main__':

    dataSet, feature = rf.loadCSV(
        '../../xxx.csv'
    )  #your file path, the file consists of source domain data and target domian data
    rf.column_to_float(dataSet)

    targetID = 'xxx'  #specifiy the target domain  ID

    # parametres in random forests
    n_folds = 10
    max_depth = 15
    min_size = 1
    ratio = 1.0

    n_features = 35
    n_trees = 100

    #cross validation
    folds = rf.spiltDataSet(dataSet, n_folds)
    for fold in folds:
        train_set = folds[:]
Exemplo n.º 35
0
def unit(u_, Data0, cc, blockID):
	
	# number of time bins to include in the LFP array
	nlfpsamp = 0
	for tt, trial in enumerate(Data0['trial'][0][0][0]):

		thislfpsamp = trial['LFP'].shape[1]
		if thislfpsamp>nlfpsamp:
			nlfpsamp = thislfpsamp
	
	
	ntrials = Data0['trial'][0][0][0].size # find number of trials
	nstimID = Data0['trial'][0][0][0][0]['Epoch_Value'][0].size
	
	# initialze LFP, spike times, spike trials, spike waveform
	lfp = np.ndarray((0, nlfpsamp), dtype = 'float32')
	spktimes = np.ndarray(0)
	spktrials = np.ndarray(0)
	spkwaveform = np.ndarray((0, 22))

	# initialize frequency and attenuation IDs
	stimID = np.ndarray((0, nstimID), dtype = 'float32')

	ttt = 0 # valid trial counter
	for tt in range(ntrials):
		trial = Data0['trial'][0][0][0][tt]

		thisstimID = np.float32(trial['Epoch_Value'][0])

		# if not ((blockID.startswith('b')) and (thisstimID[0] < 2)):

		# get the LFP for this trial and pad it with nans so it can fit in a matrix (since
		# some of the trials have +/-1 data point for LFP)
		lfpchannel = trial['LFP'][cc]
		lfpchannel = np.concatenate((lfpchannel, np.zeros(nlfpsamp - len(lfpchannel)) * np.nan))
		lfp = np.vstack((lfp, lfpchannel))

		spktime = trial['CH'][0][cc]['latency']
		if np.prod(spktime.shape) > 0:
			spktimes = np.append(spktimes, spktime)
			spktrials = np.append(spktrials, np.ones(spktime.size) * ttt)
			spkwaveform = np.concatenate((spkwaveform, trial['CH'][0][cc]['spkwaveform'].T), 0)
			
		# add to Epoch_Value
		stimID = np.vstack((stimID, thisstimID))

		ttt += 1 # increment valid trial counter

			# end if valid ID
	
	# end trial loop
	
	if spktimes.size == 0: # if no spikes
		print 'No spikes detected for this unit.'
		spktimes = np.array([np.nan])
		spktrials = np.array([np.nan])
		spkwaveform = np.array([np.nan])
		rast = np.array([np.nan])
		
	else:
		# filter out unwanted trials
		remID = np.array([np.nan, np.nan])
		if blockID.startswith('b'):
			remID = np.array([1., 70.])
		elif blockID.startswith('r'):
			remID = np.array([0., 0.])
	
		spktimes, spktrials, spkwaveform, lfp, stimID = \
			remove_trials(spktimes, spktrials, spkwaveform, lfp, stimID, remID)
	
		# create raster
		ntrials = stimID.shape[0]
		nbins = np.ceil(1000 * spktimes.max())+1
		rast = Spikes.calc_rast(spktimes, spktrials, ntrials, nbins)

	
	# save out to file
	u_.create_dataset('chan', data = cc)
	u_.create_dataset('blockID', data = blockID)
	# add stimulus ID datasets to this stimset on this unit
	u_.create_dataset('stimID', data = stimID)
	u_.create_dataset('lfp', data = lfp, compression = 'gzip')
	u_.create_dataset('spktimes', data = spktimes, compression = 'gzip')
	u_.create_dataset('spktrials', data = spktrials, compression = 'gzip')
	u_.create_dataset('spkwaveform', data = spkwaveform, compression = 'gzip')
	u_.create_dataset('rast', data = rast, compression = 'gzip')
		
	if blockID.startswith('b'):
		rf = RF.calc_rf(rast, stimID)
		u_.create_dataset('rf', data = rf, compression = 'gzip')
import KNN


time_start = time.time()
# 选取随机数种子
np.random.seed(seed=10)
# 读取数据
inputs = pd.read_csv('../../data/featuredata/total.csv', header=None)
outputs = pd.read_csv('../../data/processdata/tag.csv', header=None)
# 将数据顺序打乱
train = np.array(pd.concat([inputs, outputs], axis=1))
np.random.shuffle(train)
# 分成输入和结果
inputs = pd.DataFrame(train).iloc[:, :-1]
outputs = pd.DataFrame(train).iloc[:, -1]

# 对四个模型分别进行训练,并进行十折交叉验证
result1 = SVM.model(inputs, outputs)
result2 = NN.model(inputs, outputs)
result3 = RF.model(inputs, outputs)
result4 = KNN.model(inputs, outputs)

time_end = time.time()

# 输出各个算法交叉验证准确率的平均值
print('SVM: mean accuracy = ', result1)
print('NN: mean accuracy = ', result2)
print('RF: mean accuracy = ', result3)
print('KNN: mean accuracy = ', result4)
print('Running time: {:.2f} Seconds'.format(time_end-time_start))
Exemplo n.º 37
0
import h5py, os, glob, re
import RF, RR, Spikes, misc
from fileconversion import load_cfs

basedir = '/Volumes/BOB_SAGET/Fmr1_voc/voc_ko_nai_20130116'
experiment = 'voc_ko_nai_20130116'

rf_paths = glob.glob(os.path.join(basedir, 'fileconversion', 'RF*.h5'))

rr_paths = glob.glob(os.path.join(basedir, 'fileconversion', 'RR*.h5'))
voc_paths = glob.glob(os.path.join(basedir, 'fileconversion', 'VOC*.h5'))
ix2freq = RF.get_ix2freq()
p = re.compile('RF(\d+).h5')

for rf_path in rf_paths:
	penno = p.findall(rf_path)[0]
	rr_path = [f for f in rr_paths if penno in f][0]
	voc_path = [f for f in voc_paths if penno in f][0]
	
	rf_file = h5py.File(rf_path, 'r')
	rf_rast = rf_file['rast'].value
	rf_stimparams = rf_file['stimID'].value
	rf_file.close()

	cfs = load_cfs(experiment)
	cf_ix = np.int32(np.round(RF.find_cf(cfs, np.int32(penno))))
	cf = ix2freq[20:][cf_ix]

	# perform analysis 
	if len(rr_path) > 0:
		
Exemplo n.º 38
0
def characterize(sesss = sesss, experiment = 'Fmr1_RR', pplot = True, verbose = False):

	if type(sesss) == str:
		sesss = [sesss]

	# set up figure
	figsize = (12, 12)
	fig = plt.figure(figsize = figsize)

	# loop through sesss
	for sess in sesss:

		DB = np.empty(0, dtype = dtype)
		
		print '%s\n%s\n\n' % (sess, '-'*50)
		
		# build the output directory path
		savedir = os.path.join(basedir, experiment, 'Sessions', sess, 'analysis')
		if not os.path.exists(savedir):
			os.mkdir(savedir)
			
		# WT or KO / CTL or EXP
		gen, exp, date = sess.split('_')

		# find the RF blocks
		pens = glob.glob(os.path.join(basedir, experiment, 'Sessions', sess, 'fileconversion', 'RF*.h5'))

		# load the cfs for this sess
		cfs = np.loadtxt(os.path.join(basedir, experiment, 'Sessions', sess, 'cfs.txt'), ndmin = 1)
		
		# loop through blocks in this sess
		for pen in pens:
			
			absol, relat = os.path.split(pen)
			blockname = os.path.splitext(relat)[0]

			# get unit number from filename
			unitnum = np.int32(p.findall(relat))[0] # unit number
			ix = cfs[:, 0] == unitnum
			if ix.sum() > 0:
				cf_man = cfs[ix, 1][0]
				if verbose:
					print pen
			
				# load the RF block
				f = h5py.File(pen, 'r')
				spktimes = f['spktimes'].value
				# if not np.isnan(spktimes[0]):

				'''--------RF--------'''
				# load the RF block to get the RF
				rf = f['rf'].value;	rast = f['rast'].value; spktimes = f['spktimes'].value; stimparams = f['stimID'].value; spktrials = f['spktrials'].value; coord = f['coord'].value; ntrials = f['rast'].shape[0]; f.close()

				# calculate the psth (spk/s*trial, normalized by number of trials
				psth = Spikes.calc_psth(rast, normed = True) # spk/s
			
				# baseline firing rate
				base_mean = psth[:stim_on].mean() # spk/s
			
				# response onset/offset
				psth_smoo = Spikes.exp_smoo(psth, tau = 0.003) 
				resp_on, resp_off = Spikes.calc_on_off(psth_smoo, stim_on = stim_on)
			
				# rewindowed RF
				rf_rewin = RF.calc_rf(rast, stimparams, resp_on = resp_on + stim_on - 3, resp_off = resp_off + stim_on + 3, normed = True)
			
				# thresholded RF
				rf_thresh = rf_rewin.copy()
				rf_threshold = np.percentile(rf_thresh, 66)# upper quartile
				rf_peak = rf_rewin.max()
				rf_thresh[rf_thresh < rf_threshold] = 0
			
				# find maximum RF cluster
				(rf_clust, clust_sizes) = RF.findmaxcluster(rf_thresh, cf = cf_man, include_diagonal = False)
				# if clust_sizes.max() < 10: # if it's a tiny RF, set it to nans
				# 	rf_clust = np.empty(rf_clust.shape) * np.nan
			
				rf_mask = rf_clust > 0
				# find evoked psth
				ev_psth = RF.calc_evoked_psth(rast, stimparams, rf_mask)
				ev_psth_smoo = Spikes.exp_smoo(ev_psth, tau = 0.003)
				ev_resp_on, ev_resp_off = Spikes.calc_on_off(ev_psth_smoo, stim_on = stim_on)
				ev_mean = ev_psth[ev_resp_on : ev_resp_off].mean()
				
				# bandwidth and threshold
				bw, bw_lr, _, thresh = RF.calc_bw_cf_thresh(rf_mask)

				# center of mass
				com = RF.calc_rf_com(rf_clust)
				tip_top = np.max([thresh-2, 0])
				tip_bottom = thresh
				com_tip = RF.calc_rf_com(rf_clust[tip_top:tip_bottom, :])

				'''PLOT'''
				if pplot:
					
					rf1_ax = fig.add_subplot(221)
					rf2_ax = fig.add_subplot(222)
					psth1_ax = fig.add_subplot(223)
					psth2_ax = fig.add_subplot(224)
					
					RF.plot_RF(rf, bw_lr = bw_lr, thresh = thresh, cf = cf_man, ax = rf1_ax)
					RF.plot_RF(rf_clust, bw_lr = bw_lr, thresh = thresh, cf = cf_man, ax = rf2_ax)
					rf2_ax.axvline(com, color = 'g', ls = '--')
					rf2_ax.axvline(com_tip, color = 'b', ls = '--')
				
					psth1_ax.plot(psth_smoo)
					psth2_ax.plot(ev_psth_smoo)
					psth1_ax.axvline(resp_on+stim_on, color = 'r', ls = '--')
					psth1_ax.axvline(resp_off+stim_on, color = 'r', ls = '--')
				
					figpath = os.path.join(savedir, blockname + '.png')
					fig.savefig(figpath);
					fig.clf()
				'''PLOT'''
			

				DB.resize(DB.size + 1)
				DB[-1] = np.array((gen, exp, sess, unitnum, \
					psth[:333], ev_psth[:333], \
					rf, rf_clust, \
					cf_man, com, com_tip, \
					bw, bw_lr, thresh, coord, \
					resp_on, resp_off, ev_resp_on, ev_resp_off, \
					base_mean, ev_mean), dtype = dtype)
		
			# end unit loop
	
			np.savez(os.path.join(basedir, experiment, 'Sessions', sess, sess + '_RF.npz'), DB = DB)
		if verbose:
			print '\n'*4
Exemplo n.º 39
0
def calc_psth_by_stim(rast, stimparams, bins = 0.001):

	nstimparams = stimparams.shape[1]
	
	usp = []
	for i in range(nstimparams):	
		usp.append(list(np.unique(stimparams[:, i])))

	nparamlevels = np.empty(nstimparams, dtype = np.int32)
	for i in range(nstimparams):
		nparamlevels[i] = len(usp[i])

	ntrials_per_stim = np.zeros(nparamlevels)

	'''
	compute
	nbins	:	the number of bins
	bins	:	times (seconds) for each bin
	'''
	dur_ms = rast.shape[1] # number of milliseconds
	t_ms = np.arange(dur_ms) # time indices in ms
	if type(bins) in (int, np.int32):
		nbins = bins
		bins = np.linspace(0, dur_ms, nbins) / 1000
		bindur = bins[1]-bins[0]
	elif type(bins) in (float, np.float):
		bindur = bins
		bins = np.arange(0, (dur_ms/1000.)+bindur, bindur)
		nbins = bins.size-1
	elif type(bins) is np.ndarray:
		nbins = bins.size-1
		bindur = bins[1]-bins[0]
	

	assert np.unique(ntrials_per_stim).size == 1

	ntrials = np.int32(ntrials_per_stim[0])
	
	psth_shape = np.hstack((nparamlevels, nbins))
	psth = np.zeros(psth_shape)
	
	combinations = []
	combinations_ix = []
	for i in itertools.product(*usp):
		combinations.append(i)
		combinations_ix_ = []
		for j, i_ in enumerate(i):
			q = (np.array(usp[j])==i_).nonzero()[0][0]
			combinations_ix_.append(q)
		combinations_ix.append(combinations_ix_)
		
	for m, n in zip(combinations, combinations_ix):
		ix = RF.get_trials(stimparams, m)
		ntrials = ix.size
		spktimes_ = rast2spktimes(rast[ix, :])
		psth_, edges_ = np.histogram(spktimes_, bins = bins)
		psth_ = (1./bindur) * (psth_.astype(float)/ntrials)
		psth[tuple(n)] = psth_
	
	psth = psth.squeeze()		
	return psth, usp
Exemplo n.º 40
0
def transfer_forest_predict(train_model, test):
    predict_values = [rf.bagging_predict(train_model, row) for row in test]
    return predict_values
Exemplo n.º 41
0
def load_model():
    if request.method == 'POST':
        f = request.files['model']
        f.save('rf_model.pkl')
        feature_dict = RF.prepred_model(target, to_drop, feature_importance)
        return feature_dict
Exemplo n.º 42
0
import naiv
import pandas as pd
import json
from flask import Flask,jsonify,request,render_template
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

df=prep.cleaning(pd.read_csv("dataset.csv")) #load the dataset

lda = LDA(n_components = 8)   #select 8 feature
X=df["data"].drop("Churn",axis=1)  
Y=df["data"]["Churn"]
lda.fit(X,Y)   #fit the data to select the best feature

clf_log=LOGR.LR(df["data"].copy(),"Churn",lda)  #intiate object from logisticRegression Class
clf_KNN=KNN.KNN(df["data"].copy(),"Churn",lda)  #intiate object from K-NN Class
clf_RF=RF.RF(df["data"].copy(),"Churn",lda)   #intiate object from RandomForest Class
clf_SVM=SVM.SV(df["data"].copy(),"Churn",lda)   #intiate object from RandomForest Class
clf_Dt=Dtree.DT(df["data"].copy(),"Churn",lda)   #intiate object from RandomForest Class
clf_naiv=naiv.RF(df["data"].copy(),"Churn",lda)   #intiate object from RandomForest Class

app = Flask(__name__)

@app.route("/",methods=["GET","POST"])
def hello():
    data={"data":pd.read_csv("dataset.csv").head(500).to_json()}
    return jsonify(data)
############################################################### PreProcessing  ###################################################

@app.route("/prep",methods=["GET","POST"])
def prp():
    df=prep.cleaning(pd.read_csv("dataset.csv"))["data"]
Exemplo n.º 43
0
		rast = frr['rast'].value
		stimparams = frr['stimID'].value
		frr.close()

		# if not np.isnan(spktimes[0]):
		cf = cfs[cfs[:, 0]==unitnum, 1][0]
		cf_hz = ix2freq[20:][int(cf)]
		freqs = stimparams[:, 0]
		rrs = stimparams[:, 1]
		ufreqs = np.unique(freqs)
		urrs = np.unique(rrs)
		nrrs = urrs.size
	
		# now we determine which of the frequencies we played is closest to this neuron's CF
		thisfreq, thisfreq_ix, thisfreq_err = misc.closest(ufreqs, cf_hz, log = True)
		if np.abs(thisfreq_err) > 0.2:
			print 'No close frequency found!'
		thisfreq = ufreqs[thisfreq_ix]
	
		# isolate the parts of the raster for this frequency and build a psth for each RR
		ix = RF.get_trials(stimparams, np.array([thisfreq, np.nan]))
		thisrast = rast[ix, :1050]
		thisstims = stimparams[ix, :]
		psths, ustims = Spikes.calc_psth_by_stim(thisrast, thisstims)
	
		rrtf = RR.calc_rrtf_all(thisrast, thisstims, thisfreq, urrs)
	
		db.resize(db.size+1)
		db[-1] = np.array((gen, exp, sess, unitnum, cf_hz, rrtf, urrs), dtype = dtype)

np.savez(savepath, db)
Exemplo n.º 44
0
    cursor.execute(create_experiment_query)
    cursor.execute(fetch_experiment_id)
    return cursor.fetchone()[0]


def CreateConnection():
    global cnx, cursor
    cnx = mysql.connector.connect(user='******',
                                  password='',
                                  host='localhost',
                                  database='cakephp')
    return cnx.cursor()


if __name__ == "__main__":
    cursor = CreateConnection()
    exp_id = GetExperimentId()
    values = []
    for i in range(iterations):
        start = timer()

        end = timer()
        values.append(end - start)

    final_result = mean(sorted(values)[1:-1])
    print(final_result)
    cnx.commit()
    cnx.close()

    RF.run()
Exemplo n.º 45
0
def main():
    #########################################
    ##		Input file here
    ####
    #			data input
    fSurface = open("kqQL91.txt", 'r')
    count = 0
    Surface = dict()
    pointList = list()
    while True:
        temp = [float(i) for i in fSurface.readline().split()]
        if len(temp) == 0:
            break
        if len(temp) == 1:
            Surface[count] = pointList
            pointList.clear()
            count += 1
        else:
            pointList.append(temp)
    fSurface.close()
    print(Surface)
    #		props input
    fProps = open("props.txt", "r")
    props = json.loads(fProps.read())
    fProps.close()

    ###			props settings
    #############
    #		soil_props is permanently exist in the whole runtime !!!
    soil = RF.soil_props(float(props["soil_props"]["c"]),
                         float(props["soil_props"]["phi"]),
                         float(props["soil_props"]["gamma"]))
    ##
    #		this instance use constant calculating props
    cal = RF.calculating_props(float(props["calculating_props"]["u"]),
                               float(props["calculating_props"]["kW"]),
                               float(props["calculating_props"]["A"]),
                               float(props["calculating_props"]["D"]),
                               float(props["calculating_props"]["omega"]))
    ##

    ######################################################################
    #--------------------------------------------
    FS_Surface = np.array([float(0)] * count)
    lamdaSurface = np.array([float(0)] * count)
    centerSurface = np.array([float(0)] * 2 * count).reshape(count, 2)
    RSurface = np.array([float(0)] * count)

    for i in tqdm(range(count), desc="Loading…", ascii=False, ncols=75):
        time.sleep(0.01)  #	Every Surface
        currData = Surface[i]  # this in list type
        innerData = RF.depth_converter(currData)  # this change to np arr

        ###########################	DO SOMETHING TO GIVE FS AND BW	##############
        #		Model setting
        ####

        numberCenter = innerData.shape[0] // 10
        isLeft, first = RF.index_first(innerData)
        dx = innerData[1, 0] - innerData[0, 0]
        centerArr = RF.center_defining(innerData, numberCenter, first, isLeft,
                                       dx)
        RL = RF.radius_lines_defining(innerData, numberCenter,
                                      np.amin(innerData[:, 0]), dx)
        R = np.array([
            RF.Radius(centerArr[numberCenter * i + 1, 1], RL[i])
            for i in range(numberCenter)
        ])
        FS = np.array([float(0)] * 2 * R.shape[0] *
                      centerArr.shape[0]).reshape(centerArr.shape[0],
                                                  R.shape[0], 2)

        ###
        #######
        ##			lamda f(x) Tolerance setting:
        ##
        fx = np.array([(k / innerData.shape[0]) * math.pi
                       for k in range(1, innerData.shape[0])])
        setting = RF.setting(
            fx,
            np.array([(i + 1) * (1 / numberCenter)
                      for i in range(numberCenter)]),
            float(props["setting"]["Tolerance"]))

        ############

        for j in range(centerArr.shape[0]):

            ###
            #		slide_props depends on the current surface state so must be declared in every time loop to innerData
            alpha = np.array([0] * (innerData.shape[0] - 1))
            beta = np.array([0] * (innerData.shape[0] - 1))
            a = np.array([0] * (innerData.shape[0] - 1))
            x = np.array([0] * (innerData.shape[0] - 1))
            W = np.array([0] * (innerData.shape[0] - 1))
            for jj in range(innerData.shape[0] - 1):

                alpha[jj] = math.atan(
                    (innerData[jj + 1, 1] - innerData[jj, 1]) / dx)
                beta[jj] = dx / (math.cos(alpha[jj]))
                a[jj] = 0
                x[jj] = abs(centerArr[j, 0] - innerData[jj, 0] - dx / 2)
                W[jj] = soil.gamma * (innerData[jj + 1, 1] -
                                      innerData[jj, 1]) * dx  #?#?/#?#

            slide = RF.slide_props(alpha, beta, a, x, W)
            ####
            for k in range(R.shape[0]):
                FS[j,
                   k] = RF.Calculating_FoS(innerData, setting.lamda,
                                           setting.fx, centerArr[j], R[k],
                                           soil, cal, slide, setting.Tolerance)

        FS_Surface[i] = np.amin(FS)
        index = np.where(FS == FS_Surface[i])
        RSurface[i] = R[index[0, 1]]
        centerSurface[i] = centerArr[index[0, 0]]

        #########################	END DO SOMETHING				##############
    BW = np.array([float(0)] * count)
    for i in range(count):
        if FS_Surface[i] < float(props["setting"]["FSCritical"]):
            currData = Surface[i]  # this in list type
            innerData = RF.depth_converter(currData)
            dx = innerData[1, 0] - innerData[0, 0]
            BW[i] = RF.Cal_BW(innerData, centerSurface[i], RSurface[i], dx)
    print(FS_Surface)
    print(BW)
Exemplo n.º 46
0
def rr_make_contactsheets():
	'''
	loop through all the sessions and plot the rrtfs
	'''

	fig = plt.figure(figsize = (30, 18));
	txt_suptitle = fig.suptitle('')
	ax_cfrrtf = fig.add_axes((0.76, 0.76, 0.24, 0.23));
	ax_cfvs = ax_cfrrtf.twinx();
	ax_cfcircpsthall = fig.add_axes((0.62, (11/14.)-0.02, 0.1, (1/7.)+0.04), polar = True)
	ax_cfcircpsthall.set_xticklabels(''); ax_cfcircpsthall.set_yticklabels('');
	ax_rf = fig.add_axes((0.67, 0.51, 0.33, 0.23));
	ax_rfrast = fig.add_axes((0.67, 0.25, 0.33, 0.24));
	ax_rfrast.set_xticklabels('');
	ax_rfpsth = fig.add_axes((0.67, 0.01, 0.33, 0.24));

	ax_cfrr = [fig.add_axes((0.03, 1-((i+1)/7.), 0.35, 1/7.)) for i in np.arange(nrrs)]
	ax_cfalignedpsth = [fig.add_axes((0.38, 1-((i+1)/7.), 0.17, 1/7.)) for i in np.arange(nrrs)]
	ax_cfcircpsth = [fig.add_axes((0.53, 1-((i+1)/7.), 0.1, 1/7.), polar = True) for i in np.arange(nrrs)]
	# ax_noiserr = [fig.add_subplot(nrrs, 3, i) for i in np.arange(1, 3*nrrs, 3)]

	for sessionpath in sessionpaths:

		session = os.path.split(sessionpath)[1]
		unitinfos = fileconversion.get_session_unitinfo(sessionpath, onlycomplete = ('RF', 'RR', 'VOC'))
			
		for unitkey in unitinfos.keys():
			
			txt_suptitle.set_text('%s %s' % (session, unitkey))

			unitinfo = unitinfos[unitkey]

			rf_ix = unitinfo['stimtype'].index('RF')
			
			f_rf = h5py.File(unitinfo['fpath'][rf_ix], 'r')
			rf_rast = f_rf['rast'].value
			rf_stimparams = f_rf['stimID'].value
			cf_ix = f_rf['cf'].value
			f_rf.close()
			
			cf = ix2freq[20:][int(cf_ix)]

			''' calculate and plot RF, psth, and sorted raster'''
			rf = RF.calc_rf(rf_rast, rf_stimparams)
			rf_psth = Spikes.calc_psth(rf_rast)
			RF.plot_rf(rf, cf = cf_ix, axes_on = False, ax = ax_rf) # plot RF
			ax_rf.axvline(cf_ix, color = 'r', lw = 1.5)
			Spikes.plot_sorted_raster(rf_rast, rf_stimparams, ax = ax_rfrast) # plot raster
			ax_rfpsth.plot(t_rf, Spikes.exp_smoo(rf_psth, tau = 0.005)) # plot PSTH

			''' calcualte and plot RRTFs for CF and noise stimuli '''
			rr_ix = unitinfo['stimtype'].index('RR')
			
			f_rr = h5py.File(unitinfo['fpath'][rr_ix], 'r')
			rr_rast = f_rr['rast'].value
			rr_stimparams = f_rr['stimID'].value
			f_rr.close()

			# find the played CF
			rr_ufreqs = np.unique(rr_stimparams[:, 0])
			urrs = np.unique(rr_stimparams[:, 1])
			npips = (urrs*4).astype(int)
			rr_freq, rr_ufreq_ix, _ = misc.closest(rr_ufreqs, cf, log = True)

			ax_rf.axvline(RF.calc_freq2ix(rr_freq), color = 'g', lw = 1.5)
			# calculate the PSTHs for each repetition rate
			tmp = Spikes.calc_psth_by_stim(rr_rast, rr_stimparams)
			rr_cfpth = tmp[0][rr_ufreq_ix, :, :]
			# rrtf_noisepsth = tmp[0][0, :, :]

			# plot the aligned psths
			RR.aligned_psth_separate_all(rr_rast, rr_stimparams, rr_freq, npips, axs = ax_cfalignedpsth)
			[a.set_yticklabels('') for a in ax_cfalignedpsth]
			[a.set_xticklabels('') for a in ax_cfalignedpsth[:-1]]

			# plot circular psths
			r, V, theta = RR.circ_psth_all(rr_rast, rr_stimparams, rr_freq, npips, axs = ax_cfcircpsth)
			[a.set_yticklabels('') for a in ax_cfcircpsth]
			[a.set_xticklabels('') for a in ax_cfcircpsth]

			# plot all circular summed vector strengths
			ax_cfcircpsthall.plot(theta, V, '.-')
			[ax_cfcircpsthall.plot([0, th], [0, v], color = 'b', alpha = 1-(i/10.)) for i, (th, v) in enumerate(zip(theta, V))]


			# plot RRTF
			rrtf = RR.calc_rrtf_all(rr_rast, rr_stimparams, rr_freq, urrs, npips)
			ax_cfrrtf.plot(rrtf, '.-', ms = 10)
			ax_cfvs.plot(V*np.cos(theta), 'g.-', ms = 10)
			for tick in ax_cfvs.yaxis.get_major_ticks():
				tick.set_pad(-5)
				tick.label2.set_horizontalalignment('right')

			# plot repetition rate PSTHs
			for i in xrange(nrrs):
				# RR.plot_rrtf(t_rrtf, rrtf_noisepsth[i, :], urrs[i], int(4*urrs[i]), onset = 0.05, duration = 0.025, ax = ax_noiserr[i])
				RR.plot_rrtf(t_rrtf, rr_cfpth[i, :], urrs[i], int(4*urrs[i]), onset = 0.05, duration = 0.025, ax = ax_cfrr[i])

			# ax_noiserr[0].set_title('Noise RRTFs')
			ax_cfrr[0].set_title('CF RRTFs (%.0f kHz)' % (cf/1000))
			# [a.set_xlim(0, 4.5) for a in ax_noiserr]
			[a.set_xlim(0, 4.5) for a in ax_cfrr]
			misc.sameyaxis(ax_cfrr+ax_cfalignedpsth)

			figsavepath = os.path.join(studydir, 'Sheets', 'RRTFs', '%s_%s_RRTF.png' % (session, unitkey))
			print figsavepath
			fig.savefig(figsavepath)
			[a.cla() for a in fig.get_axes()] # clear all axes
def main():

    #prepare for data
    rawdata = pd.read_csv("mySpotify.csv")

    newdata = rawdata[[
        'track_id', 'duration_ms', 'popularity', 'acousticness',
        'danceability', 'energy', 'instrumentalness', 'key', 'liveness',
        'loudness', 'mode', 'speechiness', 'tempo', 'time_signature',
        'valence', 'parentCat'
    ]]

    #96153

    ## hypothesis 1: t test : mean  of energy between pop and non-pop
    print("hypothesis 1")
    target_pop = newdata[newdata.parentCat == "pop"].drop_duplicates(
        'track_id')
    id_in_labels = target_pop['track_id'].values
    non_pop = newdata[~newdata.track_id.isin(id_in_labels)].drop_duplicates(
        'track_id')

    print(
        stats.ttest_ind(target_pop["energy"].values, non_pop["energy"].values))

    #hypothesis 2: linear regression acousticness~energy+loudness
    print("hypothesis 2")
    data_unique = newdata.drop_duplicates('track_id')
    X = data_unique[["energy", 'loudness']]
    X = sm.add_constant(X)
    y = data_unique["acousticness"]

    # Note the difference in argument order
    model = sm.OLS(y, X).fit()
    # Print out the statistics
    print(model.summary())

    ## == groupby data using gernre ==
    #there are 29 genres
    print("hypothesis 3")
    genres = newdata["parentCat"].unique()
    with open("genres_spotify.txt", "w") as f:
        f.write(str(genres))

    print(genres)
    #record the classification result of diffrent classification methods on all genres
    precision_train = pd.DataFrame(0,
                                   index=genres,
                                   columns=[
                                       "DecisisionTree", "KNN", "Naive Bayes",
                                       "SVM", "Random Forest"
                                   ])
    precision_test = pd.DataFrame(0,
                                  index=genres,
                                  columns=[
                                      "DecisisionTree", "KNN", "Naive Bayes",
                                      "SVM", "Random Forest"
                                  ])

    with open("classification.txt", "w") as f:
        f.write("classification result \n")

    for i in np.arange(len(genres)):
        with open("classification.txt", "a") as f:
            f.write("\n classification result for " + genres[i] + "\n")
        #state the variable using string
        vars()["data_" + genres[i]] = newdata[newdata.parentCat == genres[i]]

        data_target = vars()["data_" + genres[i]]
        genre_target = genres[i]
        if genres[i] in ["party", "workout"]:
            data_target = data_target.sample(3000)

        ##fill with col index
        #features_selection=[]
        #y_selection=15

        #Fisrt use data_Classical as the target data, then we randomly sample song tracks which dosen't
        #belong to Classical and integrate them into the data set
        #
        id_in_labels = data_target['track_id'].values
        data_out_of_label = newdata[~newdata.track_id.
                                    isin(id_in_labels)].drop_duplicates(
                                        'track_id').sample(n=len(data_target))
        #normalize dataframe columns

        #print(data_target.describe())
        #print(data_out_of_label.describe())

        #combine two data sets into one
        df = pd.concat([data_out_of_label,
                        data_target]).sort_values(by=["track_id"])
        df.parentCat[df.parentCat != genre_target] = 0
        df.parentCat[df.parentCat == genre_target] = 1

        ##normalize data
        from sklearn import preprocessing
        x = df.iloc[:, 1:15]  #the last col is not included
        min_max_scaler = preprocessing.MinMaxScaler()
        x_scaled = min_max_scaler.fit_transform(x)
        df.iloc[:, 1:15] = x_scaled

        ##set training and test datasets;
        df_values = df.values
        X = np.array(df_values[:, 1:15].tolist())
        Y = np.array(df_values[:, 15].tolist())
        test_size = 0.30
        seed = np.random.randint(1, 4)
        X_train, X_validate, Y_train, Y_validate = train_test_split(
            X, Y, test_size=test_size, random_state=seed)

        print("classification results for " + genres[i])

        #### ===== ======#
        # Decison Tree ###
        ##################

        y_score, train_precision, test_precision = Basic_DecisionTree(
            X_train, Y_train, X_validate, Y_validate, genre_target)
        fpr_1, tpr_1, thresholds = roc_curve(Y_validate, y_score)
        roc_auc_1 = auc(fpr_1, tpr_1)
        precision_train.loc[genres[i], "DecisisionTree"] = train_precision
        precision_test.loc[genres[i], "DecisisionTree"] = test_precision

        #plot roc curve
        #    fig = plt.figure()
        #    plt.plot(fpr,tpr,label="ROC curve(area =%0.2f)" %roc_auc)
        #    plt.plot([0,1],[0,1],"k--")
        #    plt.xlim([0.0,1.0])
        #    plt.ylim([0.0,1.0])
        #    plt.xlabel("False positive ")
        #    plt.ylabel("true positive")
        #    plt.title("ROC of Decision Tree classifier for"+genre_target+",area=="+str(roc_auc))
        #
        #    fig.savefig('ROC DecisionTree'+genre_target+'.png')
        #    plt.close()

        ####### ===== ======#
        #      KNN       ###
        #####################
        y_score, train_precision, test_precision = KNN(X_train, Y_train,
                                                       X_validate, Y_validate)
        fpr_2, tpr_2, thresholds = roc_curve(Y_validate, y_score)
        roc_auc_2 = auc(fpr_2, tpr_2)

        precision_train.loc[genres[i], "KNN"] = train_precision
        precision_test.loc[genres[i], "KNN"] = test_precision

        #### ===== ======#
        # Naive bayes ###
        ##################
        y_score, train_precision, test_precision = NVBayes(
            X_train, Y_train, X_validate, Y_validate)
        fpr_3, tpr_3, thresholds = roc_curve(Y_validate, y_score)
        roc_auc_3 = auc(fpr_3, tpr_3)

        precision_train.loc[genres[i], "Naive Bayes"] = train_precision
        precision_test.loc[genres[i], "Naive Bayes"] = test_precision

        #
        #### ===== ======#
        # Svm ###
        ##################
        y_score, train_precision, test_precision = mysvm(
            X_train, Y_train, X_validate, Y_validate)
        fpr_4, tpr_4, thresholds = roc_curve(Y_validate, y_score)
        roc_auc_4 = auc(fpr_4, tpr_4)

        precision_train.loc[genres[i], "SVM"] = train_precision
        precision_test.loc[genres[i], "SVM"] = test_precision

        #### ===== ======#
        # RandomForest ###
        ##################
        y_score, train_precision, test_precision = RF(X_train, Y_train,
                                                      X_validate, Y_validate)
        fpr_5, tpr_5, thresholds = roc_curve(Y_validate, y_score)
        roc_auc_5 = auc(fpr_5, tpr_5)

        precision_train.loc[genres[i], "Random Forest"] = train_precision
        precision_test.loc[genres[i], "Random Forest"] = test_precision

        #plot roc curve
        fig = plt.figure()
        plt.plot(fpr_1,
                 tpr_1,
                 label="ROC curve for decisionTree(area =%0.2f)" % roc_auc_1,
                 color='darkorange')
        plt.plot(fpr_2,
                 tpr_2,
                 label="ROC curve for KNN(area =%0.2f)" % roc_auc_2,
                 color='aqua')
        plt.plot(fpr_3,
                 tpr_3,
                 label="ROC curve for Naive bayes(area =%0.2f)" % roc_auc_3,
                 color='cornflowerblue')
        plt.plot(fpr_4,
                 tpr_4,
                 label="ROC curve for SVM (area =%0.2f)" % roc_auc_4,
                 color='green')
        plt.plot(fpr_5,
                 tpr_5,
                 label="ROC curve for RandomForest(area =%0.2f)" % roc_auc_5,
                 color='red')

        plt.plot([0, 1], [0, 1], "k--")
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.0])
        plt.xlabel("False positive ")
        plt.ylabel("true positive")
        plt.title("ROC of classifiers for" + genre_target)
        plt.legend(loc="lower right")
        fig.savefig('ROC ' + genre_target + '.png')
        plt.close()

    precision_train.to_csv("precision_train.csv")
    precision_test.to_csv("precision_test.csv")