Пример #1
0
    def dispersion_ranking_NN(self, dist, num_norm_avg=50):
        """Computes the spatial dispersion factors for each gene.

        Given a cell distance matrix, this function calculates the k-nearest
        neighbor adjacency matrix, and performs k-nearest-neighbor averaging
        of the expression data. From the averaged expression data, the Fano
        factor (variance/mean) for each gene is computed. These factors are
        square rooted and then min-max normalized to generate the
        gene weights, from which gene rankings are calculated.


        Loads the cell annoations specified by 'ann_name' during the creation
        of the SAM object.

        Parameters
        ----------
        dist - ndarray, float
            Square cell-to-cell distance matrix.

        num_norm_avg - int, optional, default 50
            The top 'num_norm_avg' dispersions are averaged to determine the
            normalization factor when calculating the weights.

        Returns:
        -------
        indices - ndarray, int
            The indices corresponding to the gene weights sorted in decreasing
            order.

        weights - ndarray, float
            The vector of gene weights.

        nnm - ndarray, int
            The square k-nearest-neighbor directed adjacency matrix.

        D_avg - ndarray, float
            The k-nearest-neighbor-averaged expression data.

        """
        nnm = ut.dist_to_nn(dist, self.k)

        D_avg = nnm.dot(self.D) / np.sum(nnm, axis=1).reshape(
            self.D.shape[0], 1)

        dispersions = D_avg.var(0) / D_avg.mean(0)
        ma = np.sort(dispersions)[-num_norm_avg:].mean()
        dispersions[dispersions >= ma] = ma

        weights = ut.normalizer(dispersions**0.5)

        indices = np.argsort(-weights)

        self.D_avg = D_avg

        return indices, weights, nnm, D_avg
Пример #2
0
        temp_data = arr_test[i[0]:i[1]]
        #generate an array the size of the current sub array, to be added.
        noise_array = np.array([[temp_data[k, 0]] + [temp_data[k, 1]] + [
            temp_data[k, j] +
            temp_data[k, j] * np.random.uniform(-noise, noise)
            for j in xrange(2, 6)
        ] for k in xrange(len(temp_data))])
        enh_array = np.concatenate((enh_array, noise_array), axis=0)

    #Data normalization
    #print noise_array
    # a custom built function normalizes along the column. The columns are then put back together
    #TIME REMOVED
    #now we have in order
    #Illumination DCW nitrate Lutein n-Flowrate
    enh_array = np.column_stack((normalizer(standardizer(enh_array[1:, 0])),
                                 normalizer(standardizer(enh_array[1:, 2])),
                                 normalizer(standardizer(enh_array[1:, 3])),
                                 normalizer(standardizer(enh_array[1:, 4])),
                                 normalizer(standardizer(enh_array[1:, 5]))))

    #this is meant to
    r_lengths = [[i, i + 12] for i in xrange(0, 12 * 100 * 5, 12)]
    #print r_lengths[-1]
    #print len(enh_array)
    #dataset without time
    #print enh_array

    for j in r_lengths:
        temp_data = enh_array[j[0]:j[1]]
        for k in xrange(len(temp_data) - 1):
Пример #3
0
        #generate an array the size of the current sub array, to be added.
        noise_array = np.array([[temp_data[k, 0]] + [temp_data[k, 1]] + [
            temp_data[k, j] +
            temp_data[k, j] * np.random.uniform(-noise, noise)
            for j in xrange(2, 6)
        ] for k in xrange(len(temp_data))])
        enh_array = np.concatenate((enh_array, noise_array), axis=0)

    #Data normalization
    #print noise_array
    # a custom built function normalizes along the column. The columns are then put back together
    #TIME REMOVED
    #now we have in order
    #Illumination DCW nitrate Lutein n-Flowrate
enh_array = np.column_stack(
    (normalizer(enh_array[1:, 0]), normalizer(enh_array[1:, 2]),
     normalizer(enh_array[1:, 3]), normalizer(enh_array[1:, 4]),
     normalizer(enh_array[1:, 5])))

#build and populate datasets
ds2 = SupervisedDataSet(5, 3)
#this is meant to
r_lengths = [[i, i + 12] for i in xrange(0, 12 * 100 * 7, 12)]
#print r_lengths[-1]
#print len(enh_array)
#dataset without time
#print enh_array

for j in r_lengths:
    temp_data = enh_array[j[0]:j[1]]
    for k in xrange(len(temp_data) - 1):
Пример #4
0
                      usecols=range(5),
                      columns=lista_col)
exp_1.col = lista_col1
exp_2.col = lista_col1
flow1 = np.array([(4 * [0] + 8 * [127.5]) * 3])
flow2 = np.array([(4 * [0] + 8 * [25.5]) * 4])

arr1 = np.concatenate((exp_1.values[1:], flow1.T), axis=1)
arr2 = np.concatenate((exp_2.values[1:], flow2.T), axis=1)

arr_test = np.concatenate(
    (arr1[0:12], arr1[24:36], arr2[0:12], arr2[24:36], arr2[36:48]), axis=0)
arr_val = np.concatenate((arr1[12:24], arr2[12:24]), axis=0)
arr_temp = np.concatenate((arr_test, arr_val), axis=0)
#columns are 'Illumination','Time', 'DCW', 'nitrate', 'lutein', 'nitrogen flowrate'
arr_tempA = np.column_stack((normalizer(standardizer(arr_temp[:, 0])),
                             normalizer(standardizer(arr_temp[:, 1])),
                             normalizer(standardizer(arr_temp[:, 2])),
                             normalizer(standardizer(arr_temp[:, 3])),
                             normalizer(standardizer(arr_temp[:, 4])),
                             normalizer(standardizer(arr_temp[:, 5]))))

arr_testT, arr_valT = arr_tempA[:len(arr_test)], arr_tempA[len(arr_test):]

#save data so that it can be reversed later
arr_temp_means = np.mean(arr_temp, axis=0, dtype=np.float64)
#print exp_1.col
#print arr_temp
print arr_temp_means
arr_temp_stdD = np.std(arr_temp, axis=0, dtype=np.float64)
print arr_temp_stdD
#different deltaT.Normalized. Should be 12,24,36,72
#print cross_val

#Open networks

network_2 = NetworkReader.readFrom(net_fold + 'network_Type2H1NewSTD.xml')
network_4 = NetworkReader.readFrom(net_fold + 'network_Type2H2NewSTD.xml')
#normalize the cross validation set

#normalize the other data with custom function. Remove cross validation and rearrange

#print pd.DataFrame(array_data,columns=lista_col1)

#------------------------------------------Test on  cross validation set 3--------------------------------------------------------------------
arr_valT2 = np.column_stack(
    (standardizer(arr_val[:, 0]), normalizer(standardizer(arr_val[:, 1])),
     standardizer(arr_val[:, 2]), standardizer(arr_val[:, 3]),
     standardizer(arr_val[:, 4]), standardizer(arr_val[:, 5])))
beta = pd.DataFrame(arr_valT2, columns=lista_col2)
beta.drop('Time', axis=1, inplace=True)
#theta is the pre-treatment dataframe
theta = pd.DataFrame(arr_val, columns=lista_col2)
theta.drop('Time', axis=1, inplace=True)
#for each network do the round
#do the k
#this takes out the cross validation set for ease of use.
beta_Val = beta.values
theta_Val = theta.values
sim_net2 = [beta_Val[0]]

#Predict from Experimental point

#Open networks

network_2=NetworkReader.readFrom(net_fold+ 'network_Type2H1NewSTD_LessNoise2.xml')
network_4=NetworkReader.readFrom(net_fold+ 'network_Type2H2NewSTD_LessNoise2.xml')
#normalize the cross validation set

#normalize the other data with custom function. Remove cross validation and rearrange


#print pd.DataFrame(array_data,columns=lista_col1)


#------------------------------------------Test on  cross validation set 3--------------------------------------------------------------------
arr_valT2=np.column_stack((standardizer(arr_val[:,0]),normalizer(standardizer(arr_val[:,1])),standardizer(arr_val[:,2]),standardizer(arr_val[:,3]),standardizer(arr_val[:,4]),standardizer(arr_val[:,5])))
beta=pd.DataFrame(arr_valT2,columns=lista_col2)
beta.drop('Time',axis=1, inplace=True)
#theta is the pre-treatment dataframe
theta=pd.DataFrame(arr_val,columns=lista_col2)
theta.drop('Time',axis=1, inplace=True)
#for each network do the round
#do the k
#this takes out the cross validation set for ease of use.
beta_Val=beta.values
theta_Val=theta.values
sim_net2=[beta_Val[0]]

#Predict from Experimental point
#simulation for Network 2, 1 Hidden.
for j in xrange(11):
                    sheetname='Sheet1',
                    header=None,
                    usecols=range(5),
                    columns=lista_col)
mb2.col = lista_col1
array_data = mb2.values[1:]  #keep only the numbers and turn into numpy array

length_data = len(array_data)

#the four sets of experiments are given separately. These are their start and end points
set_lengths = ((0, 13), (13, 26), (26, 39), (39, 52))
#third set is separated
#an enchanced array that will contain the noisy data

cross_val = array_data[set_lengths[2][0]:set_lengths[2][1]]
times = normalizer(cross_val[:, 1])
#different deltaT.Normalized. Should be 12,24,36,72
#print cross_val

deltaT = np.append(times[1:4], [times[6]])
#sys.exit()

intrv = [1, 2, 3, 6]

#Open networks
network_1 = NetworkReader.readFrom(net_fold + 'network_Type1H1.xml')
network_2 = NetworkReader.readFrom(net_fold + 'network_Type2H1.xml')
network_3 = NetworkReader.readFrom(net_fold + 'network_Type1H2.xml')
network_4 = NetworkReader.readFrom(net_fold + 'network_Type2H2.xml')
#normalize the cross validation set
Пример #8
0
        noise_array=np.array([[temp_data[k,0]]+[temp_data[k,j]+temp_data[k,j]*np.random.uniform(-noise,noise) for j in xrange(1,5)] for k in xrange(len(temp_data))])
    
        #print noise_array
        #print enh_array
        enh_array=np.concatenate((enh_array,noise_array),axis=0)
        #print enh_array
        
    #Data normalization


    #print enh_array[:3]
    #print enh_array[1:3]
    #print 'its length',enh_array.shape
    # a custom built function normalizes along the column. The columns are then put back together

    enh_array=np.column_stack((normalizer(enh_array[1:,0]),normalizer(enh_array[1:,1]),normalizer(enh_array[1:,2]),normalizer(enh_array[1:,3]),normalizer(enh_array[1:,4])))

    #print enh_array
    #print 'its length once normalized',enh_array.shape
    #test_arr=pd.DataFrame(enh_array)
    #test_arr.to_csv('test_check.csv')
    #sys.exit()
    #check later this is NOT DONE

    #build and populate datasets
    ds1 = SupervisedDataSet(5, 3)
    ds2 = SupervisedDataSet(4, 3)
    r_lengths=[[i,i+13] for i in xrange(0,13*100*3,13)]
    #print r_lengths

    for j in r_lengths:
                      columns=lista_col)
exp_1.col = lista_col1
exp_2.col = lista_col1
flow1 = np.array([(4 * [0] + 8 * [127.5]) * 3])
flow2 = np.array([(4 * [0] + 8 * [25.5]) * 4])

arr1 = np.concatenate((exp_1.values[1:], flow1.T), axis=1)
arr2 = np.concatenate((exp_2.values[1:], flow2.T), axis=1)

arr_test = np.concatenate(
    (arr1[0:12], arr1[24:36], arr2[0:12], arr2[24:36], arr2[36:48]), axis=0)
arr_val = np.concatenate((arr1[12:24], arr2[12:24]), axis=0)
arr_temp = np.concatenate((arr_test, arr_val), axis=0)

arr_tempA = np.column_stack(
    (normalizer(arr_temp[:, 0]), normalizer(arr_temp[:, 1]),
     normalizer(arr_temp[:, 2]), normalizer(arr_temp[:, 3]),
     normalizer(arr_temp[:, 4]), normalizer(arr_temp[:, 5])))

arr_testT, arr_valT = arr_tempA[:len(arr_test)], arr_tempA[len(arr_test):]

#the four sets of experiments are given separately. These are their start and end points
set_lengths = ((0, 13), (13, 26), (26, 39), (39, 52))
#third set is separated
#an enchanced array that will contain the noisy data

#different deltaT.Normalized. Should be 12,24,36,72
#print cross_val

#Open networks
        #generate an array the size of the current sub array, to be added.
        noise_array = np.array([[temp_data[k, 0]] + [temp_data[k, 1]] + [
            temp_data[k, j] +
            temp_data[k, j] * np.random.uniform(-noise, noise)
            for j in xrange(2, 6)
        ] for k in xrange(len(temp_data))])
        enh_array = np.concatenate((enh_array, noise_array), axis=0)

    #Data normalization
    #print noise_array
    # a custom built function normalizes along the column. The columns are then put back together
    #TIME REMOVED
    #now we have in order
    #Illumination DCW nitrate Lutein n-Flowrate
enh_array = np.column_stack(
    (normalizer(enh_array[1:, 0]), normalizer(enh_array[1:, 2]),
     normalizer(enh_array[1:, 3]), normalizer(enh_array[1:, 4]),
     normalizer(enh_array[1:, 5])))

normalized_time = normalizer(exp_1.values[1:13, 1])
print normalized_time
time_int = normalized_time[1:4]

#build and populate datasets
ds2 = SupervisedDataSet(6, 3)
#this is meant to
r_lengths = [[i, i + 12] for i in xrange(0, 12 * 100 * 5, 12)]
#print r_lengths[-1]
#print len(enh_array)
#dataset without time
#print enh_array
Пример #11
0
#----------
# build the datasets
#----------




length_data=len(array_data)
ds1 = SupervisedDataSet(5, 3)
#the four sets are given separately
set_lengths=((0,13),(13,26),(26,39),(39,52))

#Data normalization
# a custom built function normalizes along the column. The columns are then put back together
array_data=np.column_stack((normalizer(array_data[:,0]),normalizer(array_data[:,1]),normalizer(array_data[:,2]),normalizer(array_data[:,3]),normalizer(array_data[:,4])))

#print array_data


for j in set_lengths:
    temp_data=array_data[j[0]:j[1]]
    
    for k in xrange(len(temp_data)-1):

        for l in xrange(k+1,len(temp_data)):
            ds1.addSample((temp_data[k][0],temp_data[l][1]-temp_data[k][1],temp_data[k][2],temp_data[k][3],temp_data[k][4]), (temp_data[l][2]-temp_data[k][2],temp_data[l][3]-temp_data[k][3],temp_data[l][4]-temp_data[k][4]))
            #in this data set we add the illumination factor, the chemical concentrations at a certain time
            #one of the inputs is the difference between current time and the time at step we want as output
            #the output is only the chemical concentrations
                      columns=lista_col)
exp_1.col = lista_col1
exp_2.col = lista_col1
flow1 = np.array([(4 * [0] + 8 * [127.5]) * 3])
flow2 = np.array([(4 * [0] + 8 * [25.5]) * 4])

arr1 = np.concatenate((exp_1.values[1:], flow1.T), axis=1)
arr2 = np.concatenate((exp_2.values[1:], flow2.T), axis=1)

arr_test = np.concatenate(
    (arr1[0:12], arr1[24:36], arr2[0:12], arr2[24:36], arr2[36:48]), axis=0)
arr_val = np.concatenate((arr1[12:24], arr2[12:24]), axis=0)
arr_temp = np.concatenate((arr_test, arr_val), axis=0)

arr_temp = np.column_stack(
    (normalizer(arr_temp[:, 0]), normalizer(arr_temp[:, 1]),
     normalizer(arr_temp[:, 2]), normalizer(arr_temp[:, 3]),
     normalizer(arr_temp[:, 4]), normalizer(arr_temp[:, 5])))

arr_testT, arr_valT = arr_temp[:len(arr_test)], arr_temp[len(arr_test):]

#the four sets of experiments are given separately. These are their start and end points
set_lengths = ((0, 13), (13, 26), (26, 39), (39, 52))
#third set is separated
#an enchanced array that will contain the noisy data

#different deltaT.Normalized. Should be 12,24,36,72
#print cross_val

#Open networks