def intersect_rows(array1, array2, index=None): """Return intersection of rows""" if (array1.shape[0] == 0): if index == True: return (array1, sp.zeros((0, )), sp.zeros((0, ))) else: return array1 if (array2.shape[0] == 0): if index == True: return (array2, sp.zeros((0, )), sp.zeros((0, ))) else: return array2 array1_v = array1.view([('', array1.dtype)] * array1.shape[1]) array2_v = array2.view([('', array2.dtype)] * array2.shape[1]) array_i = sp.intersect1d(array1_v, array2_v) if index == True: a1_i = sp.where(sp.in1d(array1_v, array_i))[0] a2_i = sp.where(sp.in1d(array2_v, array_i))[0] return (array_i.view(array1.dtype).reshape(array_i.shape[0], array1.shape[1]), a1_i, a2_i) else: return array_i.view(array1.dtype).reshape(array_i.shape[0], array1.shape[1])
def _generate_masked_mesh(self, cell_mask=None): r""" Generates the mesh based on the cell mask provided """ # if cell_mask is None: cell_mask = sp.ones(self.data_map.shape, dtype=bool) # # initializing arrays self._edges = sp.ones(0, dtype=str) self._merge_patch_pairs = sp.ones(0, dtype=str) self._create_blocks(cell_mask) # # building face arrays mapper = sp.ravel(sp.array(cell_mask, dtype=int)) mapper[mapper == 1] = sp.arange(sp.count_nonzero(mapper)) mapper = sp.reshape(mapper, (self.nz, self.nx)) mapper[~cell_mask] = -sp.iinfo(int).max # boundary_dict = { 'bottom': {'bottom': mapper[0, :][cell_mask[0, :]]}, 'top': {'top': mapper[-1, :][cell_mask[-1, :]]}, 'left': {'left': mapper[:, 0][cell_mask[:, 0]]}, 'right': {'right': mapper[:, -1][cell_mask[:, -1]]}, 'front': {'front': mapper[cell_mask]}, 'back': {'back': mapper[cell_mask]}, 'internal': {'bottom': [], 'top': [], 'left': [], 'right': []} } # # determining cells linked to a masked cell cell_mask = sp.where(~sp.ravel(cell_mask))[0] inds = sp.in1d(self._field._cell_interfaces, cell_mask) inds = sp.reshape(inds, (len(self._field._cell_interfaces), 2)) inds = inds[:, 0].astype(int) + inds[:, 1].astype(int) inds = (inds == 1) links = self._field._cell_interfaces[inds] # # adjusting order so masked cells are all on links[:, 1] swap = sp.in1d(links[:, 0], cell_mask) links[swap] = links[swap, ::-1] # # setting side based on index difference sides = sp.ndarray(len(links), dtype='<U6') sides[sp.where(links[:, 1] == links[:, 0]-self.nx)[0]] = 'bottom' sides[sp.where(links[:, 1] == links[:, 0]+self.nx)[0]] = 'top' sides[sp.where(links[:, 1] == links[:, 0]-1)[0]] = 'left' sides[sp.where(links[:, 1] == links[:, 0]+1)[0]] = 'right' # # adding each block to the internal face dictionary inds = sp.ravel(mapper)[links[:, 0]] for side, block_id in zip(sides, inds): boundary_dict['internal'][side].append(block_id) self.set_boundary_patches(boundary_dict, reset=True)
def make_unique_by_event(event_list): # function event_list = make_unique_by_event(event_list) # # This script removes all events that share the sam alternative evnt coordinates # but differ in the flanking size. The longest of several equal events is kept. rm_idx = [] last_kept = 0 for i in range(1, event_list.shape[0]): if i % 1000 == 0: print '.', if i % 10000 == 0: print '%i' % i old_coords = event_list[last_kept].get_inner_coords(trafo=True) curr_coords = event_list[i].get_inner_coords(trafo=True) if old_coords.shape[0] == curr_coords.shape[0] and sp.all( old_coords == curr_coords): ### assertion that we did everything right assert (event_list[last_kept].chr == event_list[i].chr) assert (event_list[last_kept].strand == event_list[i].strand) ### check, which event is longer -> keep longer event len1 = event_list[last_kept].get_len() len2 = event_list[i].get_len() if len1 > len2: keep_idx = last_kept not_keep_idx = i else: keep_idx = i not_keep_idx = last_kept ### check if we would loose strains idx = sp.where(~sp.in1d(event_list[not_keep_idx].strain, event_list[keep_idx].strain))[0] if idx.shape[0] > 0: event_list[keep_idx].strain = sp.r_[ event_list[keep_idx].strain, event_list[not_keep_idx].strain[idx]] ### TODO !!!!!!!!!!!!! make sure that we keep different coordinates if the strains differ ... event_list[keep_idx].gene_name = sp.union1d( event_list[keep_idx].gene_name, event_list[not_keep_idx].gene_name) rm_idx.append(not_keep_idx) last_kept = keep_idx else: last_kept = i print 'events dropped: %i' % len(rm_idx) keep_idx = sp.where(~sp.in1d(sp.arange(event_list.shape[0]), rm_idx))[0] event_list = event_list[keep_idx] return event_list
def plot_overlap_ps(result_file, ss_file='/Users/bjarnivilhjalmsson/data/GIANT/GIANT_HEIGHT_Wood_et_al_2014_publicrelease_HapMapCeuFreq.txt', fig_filename='/Users/bjarnivilhjalmsson/data/tmp/manhattan_combPC_HGT.png', method='combPC', ylabel='Comb. PC (HIP,WC,HGT,BMI) $-log_{10}(P$-value$)$', xlabel='Height $-log_{10}(P$-value$)$', p_thres=0.00001): # Parse results ans SS file res_table = pandas.read_table(result_file) ss_table = pandas.read_table(ss_file) # Parse res_sids = sp.array(res_table['SNPid']) if method == 'MVT': comb_ps = sp.array(res_table['pval']) elif method == 'combPC': comb_ps = sp.array(res_table['combPC']) if 'MarkerName' in ss_table.keys(): ss_sids = sp.array(ss_table['MarkerName']) elif 'SNP' in ss_table.keys(): ss_sids = sp.array(ss_table['SNP']) else: raise Exception("Don't know where to look for rs IDs") marg_ps = sp.array(ss_table['p']) # Filtering boring p-values res_p_filter = comb_ps < p_thres res_sids = res_sids[res_p_filter] comb_ps = comb_ps[res_p_filter] # ss_p_filter = marg_ps<p_thres # ss_sids = ss_sids[ss_p_filter] # marg_ps = marg_ps[ss_p_filter] common_sids = sp.intersect1d(res_sids, ss_sids) print 'Found %d SNPs in common' % (len(common_sids)) ss_filter = sp.in1d(ss_sids, common_sids) res_filter = sp.in1d(res_sids, common_sids) ss_sids = ss_sids[ss_filter] res_sids = res_sids[res_filter] marg_ps = marg_ps[ss_filter] comb_ps = comb_ps[res_filter] print 'Now sorting' ss_index = sp.argsort(ss_sids) res_index = sp.argsort(res_sids) marg_ps = -sp.log10(marg_ps[ss_index]) comb_ps = -sp.log10(comb_ps[res_index]) with plt.style.context('fivethirtyeight'): plt.plot(marg_ps, comb_ps, 'b.', alpha=0.2) (x_min, x_max) = plt.xlim() (y_min, y_max) = plt.ylim() plt.plot([x_min, x_max], [y_min, y_max], 'k--', alpha=0.2) plt.ylabel(ylabel) plt.xlabel(xlabel) plt.tight_layout() plt.savefig(fig_filename) plt.clf()
def test_trim_extend(): pn = OpenPNM.Network.Cubic(shape=[5, 5, 5]) assert sp.all(sp.in1d(pn.find_neighbor_pores(pores=0), [1, 5, 25])) assert [pn.Np, pn.Nt] == [125, 300] pn.trim(pores=[0]) assert sp.all(sp.in1d(pn.find_neighbor_pores(pores=0), [1, 5, 25])) assert [pn.Np, pn.Nt] == [124, 297] pn.extend(pore_coords=[0, 0, 0], throat_conns=[[124, 0]]) assert [pn.Np, pn.Nt] == [125, 298] assert sp.all(sp.in1d(pn.find_neighbor_pores(pores=0), [1, 5, 25, 124]))
def make_unique_by_event(event_list): # function event_list = make_unique_by_event(event_list) # # This script removes all events that share the sam alternative evnt coordinates # but differ in the flanking size. The longest of several equal events is kept. rm_idx = [] last_kept = 0 for i in range(1, event_list.shape[0]): if i % 1000 == 0: print '.', if i % 10000 == 0: print '%i' % i old_coords = event_list[last_kept].get_inner_coords(trafo=True) curr_coords = event_list[i].get_inner_coords(trafo=True) if old_coords.shape[0] == curr_coords.shape[0] and sp.all(old_coords == curr_coords): ### assertion that we did everything right assert(event_list[last_kept].chr == event_list[i].chr) assert(event_list[last_kept].strand == event_list[i].strand) ### check, which event is longer -> keep longer event len1 = event_list[last_kept].get_len() len2 = event_list[i].get_len() if len1 > len2: keep_idx = last_kept not_keep_idx = i else: keep_idx = i not_keep_idx = last_kept ### check if we would loose strains idx = sp.where(~sp.in1d(event_list[not_keep_idx].strain, event_list[keep_idx].strain))[0] if idx.shape[0] > 0: event_list[keep_idx].strain = sp.r_[event_list[keep_idx].strain, event_list[not_keep_idx].strain[idx]] ### TODO !!!!!!!!!!!!! make sure that we keep different coordinates if the strains differ ... event_list[keep_idx].gene_name = sp.union1d(event_list[keep_idx].gene_name, event_list[not_keep_idx].gene_name) rm_idx.append(not_keep_idx) last_kept = keep_idx else: last_kept = i print 'events dropped: %i' % len(rm_idx) keep_idx = sp.where(~sp.in1d(sp.arange(event_list.shape[0]), rm_idx))[0] event_list = event_list[keep_idx] return event_list
def calculate_hapmap_pcs(hapmap_file, pc_weights_dict, snps_filter=None): """ Calculates the principal components for the hapmap project :param hapmap_file: Hapmap file in HDF5 format :param pc_weights_dict: dictionary with SNP weights (key = snpid) :param snps_filter: list of snp-ids to subset (optional) :return: dictionary with pcs and number of snps that were used """ log.info('Calculating Principal components for Hapmap file %s' % hapmap_file) ok_sids = np.asarray(list(pc_weights_dict.keys())) log.info('Loaded PC weight for %d SNPs' % (len(ok_sids))) # Load genotypes log.info('Load Hapmap dataset') h5f = h5py.File(hapmap_file, 'r') num_indivs = len(h5f['indivs']['continent'][...]) log.info('Found genotypes for %d individuals' % num_indivs) pcs = sp.zeros((num_indivs, 2)) num_nt_issues = 0 num_snps_used = 0 log.info('Calculating PCs') for chrom in range(1, 23): log.info('Working on Chromosome %d' % chrom) chrom_str = 'chr%d' % chrom log.info('Identifying overlap') ok_snp_filter = sp.in1d(ok_sids, snps_filter[chrom_str]) ok_chrom_sids = ok_sids.compress(ok_snp_filter, axis=0) sids = h5f[chrom_str]['variants']['ID'][...] ok_snp_filter = sp.in1d(sids, ok_chrom_sids) # assert sids[ok_snp_filter]==ok_sids, 'WTF?' sids = sids.compress(ok_snp_filter, axis=0) log.info('Loading SNPs') snps = h5f[chrom_str]['calldata']['snps'][...] snps = snps.compress(ok_snp_filter, axis=0) length = len(h5f[chrom_str]['variants/REF']) nts = np.hstack((h5f[chrom_str]['variants/REF'][:].reshape(length, 1), h5f[chrom_str]['variants/ALT'][:].reshape(length, 1))) nts = nts.compress(ok_snp_filter, axis=0) log.info('Updating PCs') pcs_per_chr = _calc_pcs(pc_weights_dict, sids, nts, snps) pcs += pcs_per_chr['pcs'] num_nt_issues += pcs_per_chr['num_nt_issues'] num_snps_used += pcs_per_chr['num_snps_used'] h5f.close() log.info('%d SNPs were excluded from the analysis due to nucleotide issues.' % (num_nt_issues)) log.info('%d SNPs were used for the analysis.' % (num_snps_used)) return {'pcs': pcs, 'num_snps_used': num_snps_used}
def intersect_rows(array1, array2, index=False): """Return array with rows that intersect between array1 and array2""" tmp1 = sp.array(['-'.join(array1[i, :].astype('str')) for i in range(array1.shape[0])]) tmp2 = sp.array(['-'.join(array2[i, :].astype('str')) for i in range(array2.shape[0])]) idx = sp.where(sp.in1d(tmp1, tmp2))[0] if index: idx2 = sp.where(sp.in1d(tmp2, tmp1))[0] if index: return (array1[idx, :], idx, idx2) else: return (array1[idx, :], None, None)
def find_interface_throats(self, labels=[]): r""" Finds the throats that join two pore labels. Parameters ---------- labels : list of strings The labels of the two pore groups whose interface is sought Returns ------- An array of throat numbers that connect the given pore groups Notes ----- This method is meant to find interfaces between TWO groups, regions or clusters of pores (as defined by their label). If the input labels overlap or are not adjacent, an empty array is returned. Examples -------- >>> import OpenPNM >>> pn = OpenPNM.Network.TestNet() >>> pn['pore.domain1'] = False >>> pn['pore.domain2'] = False >>> pn['pore.domain1'][[0, 1, 2]] = True >>> pn['pore.domain2'][[5, 6, 7]] = True >>> pn.find_interface_throats(labels=['domain1', 'domain2']) array([1, 4, 7]) TODO: It might be a good idea to allow overlapping regions """ Tind = sp.array([], ndmin=1) if sp.shape(labels)[0] != 2: logger.error('Exactly two labels must be given') pass else: P1 = self.pores(labels=labels[0]) P2 = self.pores(labels=labels[1]) # Check if labels overlap if sp.sum(sp.in1d(P1, P2)) > 0: logger.error('Some labels overlap, iterface cannot be found') pass else: T1 = self.find_neighbor_throats(P1) T2 = self.find_neighbor_throats(P2) Tmask = sp.in1d(T1, T2) Tind = T1[Tmask] return Tind
def on_shifted_dwp_curves(self, t): a = P4Rm() if a.AllDataDict['model'] == 0: temp_1 = arange(2, len(a.ParamDict['dwp'])+1) temp_2 = temp_1 * t / (len(a.ParamDict['dwp'])) P4Rm.ParamDict['x_dwp'] = t - temp_2 shifted_dwp = a.ParamDict['dwp'][:-1:] temp_3 = in1d(around(a.ParamDict['depth'], decimals=3), around(a.ParamDict['x_dwp'], decimals=3)) temp_4 = a.ParamDict['DW_i'][temp_3] P4Rm.ParamDict['scale_dw'] = shifted_dwp / temp_4 P4Rm.ParamDict['scale_dw'][a.ParamDict['scale_dw'] == 0] = 1. P4Rm.ParamDict['DW_shifted'] = shifted_dwp/a.ParamDict['scale_dw'] P4Rm.ParamDict['dw_out'] = a.ParamDict['dwp'][-1] elif a.AllDataDict['model'] == 1: temp_1 = arange(0, len(a.ParamDict['dwp'])+1-3) temp_2 = temp_1 * t / (len(a.ParamDict['dwp'])-3) P4Rm.ParamDict['x_dwp'] = t - temp_2 shifted_dwp = a.ParamDict['dwp'][1:-1:] temp_3 = in1d(around(a.ParamDict['depth'], decimals=3), around(a.ParamDict['x_dwp'], decimals=3)) temp_4 = a.ParamDict['DW_i'][temp_3] P4Rm.ParamDict['scale_dw'] = shifted_dwp / temp_4 P4Rm.ParamDict['scale_dw'][a.ParamDict['scale_dw'] == 0] = 1. P4Rm.ParamDict['DW_shifted'] = shifted_dwp/a.ParamDict['scale_dw'] temp_5 = array([a.ParamDict['dwp'][0], a.ParamDict['dwp'][-1]]) P4Rm.ParamDict['dw_out'] = temp_5 elif a.AllDataDict['model'] == 2: x_dw_temp = [] x_dw_temp.append(t*(1-a.ParamDict['dwp'][1])) x_dw_temp.append(t*(1-a.ParamDict['dwp'][1] + a.ParamDict['dwp'][2]/2)) x_dw_temp.append(t*(1-a.ParamDict['dwp'][1] - a.ParamDict['dwp'][3]/2)) x_dw_temp.append(t*0.05) P4Rm.ParamDict['x_dwp'] = x_dw_temp y_dw_temp = [] y_dw_temp.append(a.ParamDict['dwp'][0]) y_dw_temp.append(1. - (1-a.ParamDict['dwp'][0])/2) y_dw_temp.append(1. - (1-a.ParamDict['dwp'][0])/2 - (1-a.ParamDict['dwp'][6])/2) y_dw_temp.append(a.ParamDict['dwp'][6]) P4Rm.ParamDict['DW_shifted'] = y_dw_temp
def solve(self, A=None, b=None, iterative_solver=None, **kwargs): r""" Executes the right algorithm for the solution: regular solution of a linear system or iterative solution over the nonlinear source terms. Parameters ---------- A : sparse matrix 2D Coefficient matrix b : dense matrix 1D RHS vector iterative_sovler : string Name of solver to use. If not solve is specified, sp.solve is used which is a direct solver (SuperLU on default Scipy installation) kwargs : list of keyword arguments These arguments and values are sent to the sparse solver, so read the specific documentation for the solver chosen """ self._iterative_solver = iterative_solver # Executes the right algorithm if any("pore.source_nonlinear" in s for s in self.props()): X = self._do_one_outer_iteration(**kwargs) else: X = self._do_one_inner_iteration(A, b, **kwargs) self.X = X self._Neumann_super_X = self.X[ -sp.in1d(sp.arange(0, self._coeff_dimension), self.pores())] #Removing the additional super pore variables from the results self[self._quantity] = self.X[self.pores()] logger.info('Writing the results to ' + '[\'' + self._quantity + '\'] in the ' + self.name + ' algorithm.')
def _check_trapping(self, inv_val): r""" Determine which pores and throats are trapped by invading phase. This method is called by ``run`` if 'trapping' is set to True. """ # Generate a list containing boolean values for throat state Tinvaded = self['throat.inv_Pc'] < sp.inf # Add residual throats, if any, to list of invaded throats Tinvaded = Tinvaded + self['throat.residual'] # Invert logic to find defending throats Tdefended = ~Tinvaded [pclusters, tclusters] = self._net.find_clusters2(mask=Tdefended, t_labels=True) # See which outlet pores remain uninvaded outlets = self['pore.outlets'] * (self['pore.inv_Pc'] == sp.inf) # Identify clusters connected to remaining outlet sites def_clusters = sp.unique(pclusters[outlets]) temp = sp.in1d(sp.unique(pclusters), def_clusters, invert=True) trapped_clusters = sp.unique(pclusters)[temp] trapped_clusters = trapped_clusters[trapped_clusters >= 0] # Find defending clusters NOT connected to the outlet pores pmask = np.in1d(pclusters, trapped_clusters) # Store current applied pressure in newly trapped pores pinds = (self['pore.trapped'] == sp.inf) * (pmask) self['pore.trapped'][pinds] = inv_val # Find throats on the trapped defending clusters tinds = self._net.find_neighbor_throats(pores=pinds, mode='intersection') self['throat.trapped'][tinds] = inv_val self['throat.entry_pressure'][tinds] = 1000000
def make_introns_feasible(introns, genes, CFG): # introns = make_introns_feasible(introns, genes, CFG) tmp1 = sp.array([x.shape[0] for x in introns[:, 0]]) tmp2 = sp.array([x.shape[0] for x in introns[:, 1]]) unfeas = sp.where((tmp1 > 200) | (tmp2 > 200))[0] print >> CFG['fd_log'], 'found %i unfeasible genes' % unfeas.shape[0] while unfeas.shape[0] > 0: ### make filter more stringent CFG['read_filter']['exon_len'] = min(36, CFG['read_filter']['exon_len'] + 4) CFG['read_filter']['mincount'] = 2 * CFG['read_filter']['mincount'] CFG['read_filter']['mismatch'] = max(CFG['read_filter']['mismatch'] - 1, 0) ### get new intron counts tmp_introns = get_intron_list(genes[unfeas], CFG) introns[unfeas, :] = tmp_introns ### stil unfeasible? tmp1 = sp.array([x.shape[0] for x in introns[:, 0]]) tmp2 = sp.array([x.shape[0] for x in introns[:, 1]]) still_unfeas = sp.where((tmp1 > 200) | (tmp2 > 200))[0] idx = sp.where(~sp.in1d(unfeas, still_unfeas))[0] for i in unfeas[idx]: print >> CFG['fd_log'], '[feasibility] set criteria for gene %s to: min_ex %i, min_conf %i, max_mism %i' % (genes[i].name, CFG['read_filter']['exon_len'], CFG['read_filter']['mincount'], CFG['read_filter']['mismatch']) unfeas = still_unfeas; return introns
def get_usgs_n(self): if self.get_usgsrc() == 0: return self.get_values( ) # Fetch usgsq,usgsh,handq,handh,handarea,handrad,handslope, handstage # Find indices for integer stageheight values in usgsh, and apply to usgsq usgsidx = scipy.where(scipy.equal(scipy.mod( self.usgsh, 1), 0)) # Find indices of integer values in usgsh usgsh = self.usgsh[usgsidx] usgsq = self.usgsq[usgsidx] # Find indices where usgsh[usgsidx] occur in handstage, and apply to handarea and handrad handidx = scipy.where(scipy.in1d(self.handstage, usgsh)) area = self.handarea[handidx] hydrad = self.handrad[handidx] # Remove usgsq values for duplicate usgsh heights (keep first instance only) if usgsh.shape != area.shape: for i in range(usgsh.shape[0]): if i == 0: pass elif usgsh[i] == usgsh[i - 1]: usgsq = scipy.delete(usgsq, i) # Calculate average manning's n after converting discharge units disch = usgsq #*0.0283168 # Convert cfs to cms self.usgsroughness_array = self.mannings_n(area=area, hydrad=hydrad, slope=self.handslope, disch=disch) self.usgsroughness = scipy.average(self.usgsroughness_array) print 'Average roughness: {0:.2f}'.format(self.usgsroughness)
def test_find_nearby_pores_distance_2_flattened_inclself(self): a = self.net.find_nearby_pores(pores=[0, 1], distance=2, flatten=True, excl_self=False) assert sp.size(a) == 17 assert sp.all(sp.in1d([0, 1], a))
def intersect_rows(array1, array2, index=False): """Return array with rows that intersect between array1 and array2""" tmp1 = sp.array( ['-'.join(array1[i, :].astype('str')) for i in range(array1.shape[0])]) tmp2 = sp.array( ['-'.join(array2[i, :].astype('str')) for i in range(array2.shape[0])]) idx = sp.where(sp.in1d(tmp1, tmp2))[0] if index: idx2 = sp.where(sp.in1d(tmp2, tmp1))[0] if index: return (array1[idx, :], idx, idx2) else: return (array1[idx, :], None, None)
def validate_vulnerability_set(self): """The vulnerability set must provide curves for all sites in this object. A Vulnerability_Function needs to be defined to match each attributes['STRUCTURE_CLASSIFICATION'] identifier. Raises a RuntimeError if it cannot find a match. """ if self.vulnerability_set is None: raise RuntimeError('Vulnerability Set must not be None') # Function IDs for the vulnerability set curves_defined = self.vulnerability_set.vulnerability_functions.keys() # Sites STRUCTURE_CLASSIFICATIONs structure_classifications = self.attributes['STRUCTURE_CLASSIFICATION'] structure_classifications = unique(structure_classifications) # Are there any unique structure classifications that are not in the # curves defined? in_curves_defined = in1d(structure_classifications, curves_defined) if not alltrue(in_curves_defined): msg = 'The following structures do not have a vulnerability curve: ' msg += '%s' % structure_classifications[where( in_curves_defined == False)] raise RuntimeError(msg)
def getSizeFactor(fn_anno, data, gid, mode = 'sum', withXYMT = True, filterbyPC = True): ''' input annotation, counts and gene ids output sum of protein coding gene levels excluding sex chromosomes and mitochondria genes ''' anno = sp.loadtxt(fn_anno, delimiter = '\t', dtype = 'string', usecols=[0,2,8]) anno = anno[anno[:,1] == 'gene', :] if not withXYMT: ### filter xymt anno = anno[anno[:,0] != 'MT',:] anno = anno[anno[:,0] != 'Y',:] anno = anno[anno[:,0] != 'X',:] agid = [x.split(';')[0] for x in anno[:,2]] ### clean gene id's agid = sp.array([x.split(" ")[1].strip('\"') for x in agid]) if filterbyPC: ### filter protein coding gtpe = [x.split(';')[2] for x in anno[:,2]] gtpe = sp.array([x.split('\"')[1].split('\"')[0] for x in gtpe]) iPC = sp.where(gtpe == 'protein_coding')[0] agid = agid[iPC] iGn = sp.in1d(gid, agid) libsize = sp.sum(data[iGn,:], axis = 0) if mode == 'uq': libsize = sp.array([sp.percentile(x[x!=0] ,75) for x in data[iGn,:].T]) * iGn.sum() return libsize
def get_sid_pos_map(sids, hdf5_kgenomes_file=None): """ Returns a SNP map, with information for each SNP """ if hdf5_kgenomes_file is None: hdf5_kgenomes_file = __default_kgenomes_file__ h5f = h5py.File(hdf5_kgenomes_file, 'r') snp_info_map = {} for chrom_i in range(1, 23): cg = h5f['chrom_%d' % chrom_i] sids_1k = cg['sids'][...] sids_filter_1k = sp.in1d(sids_1k, sp.array(sids)) common_sids = sids_1k[sids_filter_1k] common_positions = cg['positions'][sids_filter_1k] eur_mafs = cg['eur_mafs'][sids_filter_1k] nts = cg['nts'][sids_filter_1k] for sid, pos, eur_maf, nt in izip(common_sids, common_positions, eur_mafs, nts): snp_info_map[sid] = { 'pos': pos, 'chrom': chrom_i, 'eur_maf': eur_maf, 'nts': nt } return snp_info_map
def regenerate(self, prop_list='',mode=None): r''' This updates all properties using the selected methods Parameters ---------- prop_list : string or list of strings The names of the properties that should be updated, defaults to all mode : string Control how the regeneration occurs. Examples -------- >>> pn = OpenPNM.Network.TestNet() >>> pind = pn.get_pore_indices() >>> geom = OpenPNM.Geometry.Stick_and_Ball(network=pn, name='geo_test', locations=pind) >>> geom.regenerate() # Regenerate all properties at once >>> geom.regenerate('pore_seed') # only one property >>> geom.regenerate(['pore_seed', 'pore_diameter']) # or several ''' if prop_list == '': prop_list = self._prop_list elif type(prop_list) == str: prop_list = [prop_list] if mode == 'exclude': a = sp.array(self._prop_list) b = sp.array(prop_list) c = a[sp.where(~sp.in1d(a,b))[0]] prop_list = list(c) for item in prop_list: self._logger.debug('Refreshing: '+item) getattr(self,item)()
def _check_trapping(self, inv_val): r""" Determine which pores and throats are trapped by invading phase. This method is called by ``run`` if 'trapping' is set to True. """ # Generate a list containing boolean values for throat state Tinvaded = self['throat.inv_Pc'] < sp.inf # Add residual throats, if any, to list of invaded throats Tinvaded = Tinvaded + self['throat.residual'] # Invert logic to find defending throats Tdefended = ~Tinvaded [pclusters, tclusters] = self._net.find_clusters2(mask=Tdefended, t_labels=True) # See which outlet pores remain uninvaded outlets = self['pore.outlets']*(self['pore.inv_Pc'] == sp.inf) # Identify clusters connected to remaining outlet sites def_clusters = sp.unique(pclusters[outlets]) temp = sp.in1d(sp.unique(pclusters), def_clusters, invert=True) trapped_clusters = sp.unique(pclusters)[temp] trapped_clusters = trapped_clusters[trapped_clusters >= 0] # Find defending clusters NOT connected to the outlet pores pmask = np.in1d(pclusters, trapped_clusters) # Store current applied pressure in newly trapped pores pinds = (self['pore.trapped'] == sp.inf) * (pmask) self['pore.trapped'][pinds] = inv_val # Find throats on the trapped defending clusters tinds = self._net.find_neighbor_throats(pores=pinds, mode='intersection') self['throat.trapped'][tinds] = inv_val self['throat.entry_pressure'][tinds] = 1000000
def calculate_ld(nt_map_file, kgenomes_file, output_folder, window_size): """ Calculate LD in windows for a reference genome dataset for a given set of SNPIds that are defined in the genotype_file """ log.info('Calculating LD') # Load 1K genome kg_h5f = h5py.File(kgenomes_file, 'r') # load map file. with open(nt_map_file, 'rb') as f: snp_map_dict = pickle.load(f, encoding='latin1') # Figure out overlap (all genotype SNPs should be in the 1K genomes data).. for chrom in range(1, 23): log.info('Working on Chromosome %s' % chrom) chrom_str1 = 'chr%s' % chrom kg_cg = kg_h5f[chrom_str1] kg_sids = kg_cg['snp_ids'][...] chrom_dict = snp_map_dict[chrom_str1] g_sids = chrom_dict['sids'] kg_filter = sp.in1d(kg_sids, g_sids) assert sp.sum(kg_filter) == len(g_sids), '..bug...' assert sp.all(kg_sids[kg_filter] == g_sids), '...bug' snps = kg_cg['snps'][...] snps = snps.compress(kg_filter, axis=0) snp_stds = kg_cg['snp_stds'][...] snp_stds = snp_stds.compress(kg_filter, axis=0) snp_means = kg_cg['snp_means'][...] snp_means = snp_means.compress(kg_filter, axis=0) norm_snps = sp.array((snps - snp_means) / snp_stds, dtype='single') # Iterate over SNPs and calculate LD num_snps, num_indivs = snps.shape ld_mats = [] boundaries = [] for snp_i in range(num_snps): start_i = max(0, snp_i - window_size / 2) end_i = min(snp_i + (window_size / 2) + 1, num_snps) X = norm_snps[start_i:end_i] D = sp.dot(X, X.T) / num_indivs ld_mats.append(D) boundaries.append([start_i, end_i]) ld_dict = {'Ds':ld_mats, 'boundaries':boundaries, 'snp_means':snp_means, 'snp_stds':snp_stds, 'window_size':window_size} # Store things ld_file = '%s/LD' % output_folder + '_' + chrom_str1 + '.pickled.gz' log.info('Saving LD in %s' % ld_file) with gzip.open(ld_file, 'w') as f: pickle.dump(ld_dict, f, protocol=2)
def test_find_nearby_pores_distance_2_flattened_include_input(self): a = self.net.find_nearby_pores(pores=[0, 1], r=2, flatten=True, include_input=True) assert sp.size(a) == 17 assert sp.all(sp.in1d([0, 1], a))
def remove_isolated_clusters(conns, nonzero_locs, num_to_keep): r""" Identifies and removes all disconnected clusters except the number of groups specified by "num_to_keep". num_to_keep=N retains the N largest clusters """ # adj_mat = generate_adjacency_matrix(conns, nonzero_locs) # logger.info('determining connected components...') cs_ids = csgraph.connected_components(csgraph=adj_mat, directed=False)[1] groups, counts = sp.unique(cs_ids, return_counts=True) order = sp.argsort(counts)[::-1] groups = groups[order] counts = counts[order] # msg = ' {} component groups for {} total nodes' logger.debug(msg.format(groups.size, cs_ids.size)) msg = ' largest group number: {}, size {}' logger.debug(msg.format(groups[0], counts[0])) msg = ' {} % of nodes contained in largest group' logger.debug(msg.format(counts[0]/cs_ids.size*100)) msg = ' {} % of nodes contained in {} retained groups' num = sp.sum(counts[0:num_to_keep])/cs_ids.size*100 logger.debug(msg.format(num, num_to_keep)) # inds = sp.where(sp.in1d(cs_ids, groups[0:num_to_keep]))[0] num = nonzero_locs.size nonzero_locs = nonzero_locs[inds] msg = ' removed {} disconnected nodes' logger.debug(msg.format(num - nonzero_locs.size)) # return nonzero_locs
def fill_masked_pixels(dll, ll, delta, diff, iv, no_apply_filling): if no_apply_filling: return ll, delta, diff, iv, 0 ll_idx = ll.copy() ll_idx -= ll[0] ll_idx /= dll ll_idx += 0.5 index = sp.array(ll_idx, dtype=int) index_all = range(index[-1] + 1) index_ok = sp.in1d(index_all, index) delta_new = sp.zeros(len(index_all)) delta_new[index_ok] = delta ll_new = sp.array(index_all, dtype=float) ll_new *= dll ll_new += ll[0] diff_new = sp.zeros(len(index_all)) diff_new[index_ok] = diff iv_new = sp.ones(len(index_all)) iv_new *= 0.0 iv_new[index_ok] = iv nb_masked_pixel = len(index_all) - len(index) return ll_new, delta_new, diff_new, iv_new, nb_masked_pixel
def evaluate_trapping(self, p_outlets): r""" Finds trapped pores and throats after a full ordinary percolation simulation has been run. Parameters ---------- p_outlets : array_like A list of pores that define the wetting phase outlets. Disconnection from these outlets results in trapping. Returns ------- It creates arrays called ``pore.trapped`` and ``throat.trapped``, but also adjusts the ``pore.inv_Pc`` and ``throat.inv_Pc`` arrays to set trapped locations to have infinite invasion pressure. """ self['pore.trapped'] = sp.zeros([ self.Np, ], dtype=float) self['throat.trapped'] = sp.zeros([ self.Nt, ], dtype=float) try: # Get points used in OP inv_points = sp.unique(self['pore.inv_Pc']) except: raise Exception('Orindary percolation has not been run!') tind = self._net.throats() conns = self._net.find_connected_pores(tind) for inv_val in inv_points[0:-1]: # Find clusters of defender pores Pinvaded = self['pore.inv_Pc'] <= inv_val Cstate = sp.sum(Pinvaded[conns], axis=1) Tinvaded = self['throat.inv_Pc'] <= inv_val # 0 = all open, 1=1 pore filled, # 2=2 pores filled 3=2 pores + 1 throat filled Cstate = Cstate + Tinvaded clusters = self._net.find_clusters(Cstate == 0) # Clean up clusters (invaded = -1, defended >=0) clusters = clusters * (~Pinvaded) - (Pinvaded) # Identify clusters connected to outlet sites out_clusters = sp.unique(clusters[p_outlets]) trapped_pores = ~sp.in1d(clusters, out_clusters) trapped_pores[Pinvaded] = False if sum(trapped_pores) > 0: inds = (self['pore.trapped'] == 0) * trapped_pores self['pore.trapped'][inds] = inv_val trapped_throats = self._net.find_neighbor_throats( trapped_pores) trapped_throat_array = np.asarray([False] * len(Cstate)) trapped_throat_array[trapped_throats] = True inds = (self['throat.trapped'] == 0) * trapped_throat_array self['throat.trapped'][inds] = inv_val inds = (self['throat.trapped'] == 0) * (Cstate == 2) self['throat.trapped'][inds] = inv_val self['pore.inv_Pc'][self['pore.trapped'] > 0] = sp.inf self['throat.inv_Pc'][self['throat.trapped'] > 0] = sp.inf
def whitelisting(options, header, data): whitelist = sp.loadtxt(options.fn_white, delimiter = '\t', dtype = 'string') midx_m = sp.in1d(header, whitelist) tags = sp.array([x.split('-')[3] for x in header]) midx_n = np.core.defchararray.startswith(tags, '1') header = header[midx_m | midx_n] data = data[:, midx_m | midx_n] return header, data
def test_add_boundary_pores(self): net = op.Network.CubicDual(shape=[5, 5, 5], label_1='primary', label_2='secondary') Ps = net.pores(labels=['surface', 'bottom'], mode='intersection') net.add_boundary_pores(pores=Ps, offset=[0, 0, -0.5]) Ps2 = net.pores(labels=['boundary'], mode='intersection') assert Ps.size == Ps2.size assert ~sp.any(sp.in1d(Ps, Ps2))
def test_add_boundary_pores_cubicdual(self): net = OpenPNM.Network.CubicDual(shape=[5, 5, 5], label_1='primary', label_2='secondary') Ps = net.pores(labels=['surface', 'bottom'], mode='intersection') net.add_boundary_pores(pores=Ps, offset=[0, 0, -0.5]) Ps2 = net.pores(labels=['boundary'], mode='intersection') assert Ps.size == Ps2.size assert ~sp.any(sp.in1d(Ps, Ps2))
def rate(self,pores='',throats=''): if throats!='': p1 = self._net.find_connected_pores(throats)[:,0] p2 = self._net.find_connected_pores(throats)[:,1] elif pores!='': throats = self._net.find_neighbor_throats(pores,flatten=True,mode='not_intersection') p1 = self._net.find_connected_pores(throats)[:,0] p2 = self._net.find_connected_pores(throats)[:,1] pores1 = sp.copy(p1) pores2 = sp.copy(p2) pores1[-sp.in1d(p1,pores)] = p2[-sp.in1d(p1,pores)] pores2[-sp.in1d(p1,pores)] = p1[-sp.in1d(p1,pores)] X1 = self._result[pores1] X2 = self._result[pores2] g = self._conductance[throats] R = sp.sum(sp.multiply(g,(X1-X2))) return(R)
def _build_RHS_matrix(self, modified_RHS_pores=None, RHS_added_data=None, mode='overwrite'): r''' This builds the right-hand-side matrix for the linear solver. ''' if mode == 'overwrite': A_dim = self._coeff_dimension b = sp.zeros([A_dim, 1]) try: Dir_pores = self.pores(self._phase.name + '_Dirichlet') Dir_pores_vals = self['pore.' + self._phase.name + '_bcval_Dirichlet'][Dir_pores] b[Dir_pores] = sp.reshape(Dir_pores_vals, [len(Dir_pores), 1]) except: pass try: individual_Neu_pores = self.pores(self._phase.name + '_Neumann') individual_Neu_pores_vals = self[ 'pore.' + self._phase.name + '_bcval_Neumann'][individual_Neu_pores] b[individual_Neu_pores] = sp.reshape( individual_Neu_pores_vals, [len(individual_Neu_pores), 1]) except: pass try: self.pores(self._phase.name + '_Neumann_group') pnum = self._net.num_pores() b[sp.r_[pnum:(pnum + len(self._group_Neumann_vals))]] = sp.reshape( self._group_Neumann_vals[ sp.r_[0:len(self._group_Neumann_vals)]], [len(self._group_Neumann_vals), 1]) except: pass if mode in ['overwrite', 'modify_RHS']: try: b = sp.copy(self.b) except: pass # Adding necessary terms such as source terms to the RHS for non-Dirichlet pores if modified_RHS_pores is not None and RHS_added_data is not None: if sp.size(modified_RHS_pores) == sp.size(RHS_added_data): p = sp.in1d(modified_RHS_pores, self._non_Dir_diag) data = RHS_added_data[p] b[modified_RHS_pores[p]] = b[ modified_RHS_pores[p]] + data.reshape([len(data), 1]) else: raise Exception( 'Provided data and pores for modifying RHS matrix should have the same size!' ) return (b)
def read_from_spplate(in_dir, thid, ra, dec, zqso, plate, mjd, fid, order, log=None, best_obs=False): pix_data={} unique_plates = sp.unique(plate) print("reading {} plates".format(len(unique_plates))) for p in unique_plates: wplate = plate==p plate_mjd = "{}-*".format(p) mjd_in_plate = sp.unique(mjd[wplate]) spplates = glob.glob(in_dir+"/{}/spPlate-{}.fits".format(p, plate_mjd)) mjds_found = sp.array([spfile.split("-")[-1].replace(".fits",'') for spfile in spplates]).astype(int) wmissing = ~sp.in1d(mjd_in_plate, mjds_found) if wmissing.sum()>0: for m in mjd_in_plate[wmissing]: print("INFO: can't find spplate {} {}".format(p,m)) if log is not None: log.write("INFO: can't find spplate {} {}\n".format(p,m)) for spplate in spplates: h = fitsio.FITS(spplate) head0 = h[0].read_header() MJD = head0["MJD"] t0 = time.time() wfib = wplate if best_obs: ## select only the objects which have specified mjd within this plate wmjd = mjd == MJD wfib = wplate & wmjd coeff0 = head0["COEFF0"] coeff1 = head0["COEFF1"] flux = h[0].read() ivar = h[1].read()*(h[2].read()==0) llam = coeff0 + coeff1*sp.arange(flux.shape[1]) ## now convert all those fluxes into forest objects for (t, r, d, z, p, m, f) in zip(thid[wfib], ra[wfib], dec[wfib], zqso[wfib], plate[wfib], mjd[wfib], fid[wfib]): index = f-1 d = forest(llam,flux[index],ivar[index], t, r, d, z, p, m, f, order) if t in pix_data: pix_data[t] += d else: pix_data[t] = d if log is not None: log.write("{} read from file {} and mjd {}\n".format(t, spplate, m)) print("INFO: read {} from {} in {} per spec. Progress: {} of {} \n".format(wfib.sum(), os.path.basename(spplate), (time.time()-t0)/(wfib.sum()+1e-3), len(pix_data), len(thid))) h.close() data = list(pix_data.values()) return data
def evaluate_trapping(self, p_outlets): r""" Finds trapped pores and throats after a full ordinary percolation simulation has been run. Parameters ---------- p_outlets : array_like A list of pores that define the wetting phase outlets. Disconnection from these outlets results in trapping. Returns ------- It creates arrays called ``pore.trapped`` and ``throat.trapped``, but also adjusts the ``pore.inv_Pc`` and ``throat.inv_Pc`` arrays to set trapped locations to have infinite invasion pressure. """ self['pore.trapped'] = sp.zeros([self.Np, ], dtype=float) self['throat.trapped'] = sp.zeros([self.Nt, ], dtype=float) try: # Get points used in OP inv_points = sp.unique(self['pore.inv_Pc']) except: raise Exception('Orindary percolation has not been run!') tind = self._net.throats() conns = self._net.find_connected_pores(tind) for inv_val in inv_points[0:-1]: # Find clusters of defender pores Pinvaded = self['pore.inv_Pc'] <= inv_val Cstate = sp.sum(Pinvaded[conns], axis=1) Tinvaded = self['throat.inv_Pc'] <= inv_val # 0 = all open, 1=1 pore filled, # 2=2 pores filled 3=2 pores + 1 throat filled Cstate = Cstate + Tinvaded clusters = self._net.find_clusters(Cstate == 0) # Clean up clusters (invaded = -1, defended >=0) clusters = clusters * (~Pinvaded) - (Pinvaded) # Identify clusters connected to outlet sites out_clusters = sp.unique(clusters[p_outlets]) trapped_pores = ~sp.in1d(clusters, out_clusters) trapped_pores[Pinvaded] = False if sum(trapped_pores) > 0: inds = (self['pore.trapped'] == 0) * trapped_pores self['pore.trapped'][inds] = inv_val trapped_throats = self._net.find_neighbor_throats(trapped_pores) trapped_throat_array = np.asarray([False] * len(Cstate)) trapped_throat_array[trapped_throats] = True inds = (self['throat.trapped'] == 0) * trapped_throat_array self['throat.trapped'][inds] = inv_val inds = (self['throat.trapped'] == 0) * (Cstate == 2) self['throat.trapped'][inds] = inv_val self['pore.trapped'][self['pore.trapped'] > 0] = sp.inf self['throat.trapped'][self['throat.trapped'] > 0] = sp.inf self['pore.inv_Pc'][self['pore.trapped'] > 0] = sp.inf self['throat.inv_Pc'][self['throat.trapped'] > 0] = sp.inf
def test_map_pores(self): a = self.geo21['pore._id'] b = self.geo22['pore._id'] assert a.size == self.geo21.Np assert b.size == self.geo22.Np assert ~sp.any(sp.in1d(a, b)) Pgeo21 = self.net2.map_pores(pores=self.geo21.Ps, origin=self.geo21) assert sp.all(Pgeo21 == self.net2.pores(self.geo21.name)) Pgeo22 = self.net2.map_pores(pores=self.geo22.Ps, origin=self.geo22) assert sp.all(Pgeo22 == self.net2.pores(self.geo22.name))
def find_interface_throats(self,labels=[]): r''' Finds the throats that join two pore labels. Parameters ---------- labels : list of strings The labels of the two pore groups whose interface is sought Returns ------- An array of throat numbers that connect the given pore groups Notes ----- This method is meant to find interfaces between TWO groups, regions or clusters of pores (as defined by their label). If the input labels overlap or are not adjacent, an empty array is returned. Examples -------- >>> pn = OpenPNM.Network.TestNet() >>> pn.set_pore_info(label='domain1',locations=[0,1,2]) >>> pn.set_pore_info(label='domain2',locations=[5,6,7]) >>> pn.find_interface_throats(labels=['domain1','domain2']) array([1, 4, 7]) ''' Tind = sp.array([],ndmin=1) if sp.shape(labels)[0] != 2: self._logger.error('Exactly two labels must be given') else: P1 = self.get_pore_indices(labels=labels[0]) P2 = self.get_pore_indices(labels=labels[1]) #Check if labels overlap if sp.sum(sp.in1d(P1,P2)) > 0: self._logger.error('Some labels overlap, iterface cannot be found') else: T1 = self.find_neighbor_throats(P1) T2 = self.find_neighbor_throats(P2) Tmask = sp.in1d(T1,T2) Tind = T1[Tmask] return Tind
def on_shifted_sp_curves(self, t): a = P4Rm() if a.AllDataDict["model"] == 0: temp_1 = arange(2, len(a.ParamDict["sp"]) + 1) temp_2 = temp_1 * t / (len(a.ParamDict["sp"])) P4Rm.ParamDict["x_sp"] = t - temp_2 shifted_sp = a.ParamDict["sp"][:-1:] temp_3 = in1d(around(a.ParamDict["depth"], decimals=3), around(a.ParamDict["x_sp"], decimals=3)) temp_4 = a.ParamDict["strain_i"][temp_3] P4Rm.ParamDict["scale_strain"] = shifted_sp / temp_4 P4Rm.ParamDict["scale_strain"][a.ParamDict["scale_strain"] == 0] = 1.0 P4Rm.ParamDict["strain_shifted"] = asarray(shifted_sp) * 100.0 / a.ParamDict["scale_strain"] P4Rm.ParamDict["stain_out"] = a.ParamDict["sp"][-1] elif a.AllDataDict["model"] == 1: temp_1 = arange(0, len(a.ParamDict["sp"]) + 1 - 3) temp_2 = temp_1 * t / (len(a.ParamDict["sp"]) - 3) P4Rm.ParamDict["x_sp"] = t - temp_2 shifted_sp = a.ParamDict["sp"][1:-1:] temp_3 = in1d(around(a.ParamDict["depth"], decimals=3), around(a.ParamDict["x_sp"], decimals=3)) temp_4 = a.ParamDict["strain_i"][temp_3] P4Rm.ParamDict["scale_strain"] = shifted_sp / temp_4 P4Rm.ParamDict["scale_strain"][a.ParamDict["scale_strain"] == 0] = 1.0 P4Rm.ParamDict["strain_shifted"] = asarray(shifted_sp) * 100.0 / a.ParamDict["scale_strain"] temp_5 = array([a.ParamDict["sp"][0], a.ParamDict["sp"][-1]]) P4Rm.ParamDict["stain_out"] = temp_5 elif a.AllDataDict["model"] == 2: x_sp_temp = [] x_sp_temp.append(t * (1 - a.ParamDict["sp"][1])) x_sp_temp.append(t * (1 - a.ParamDict["sp"][1] + a.ParamDict["sp"][2] / 2)) x_sp_temp.append(t * (1 - a.ParamDict["sp"][1] - a.ParamDict["sp"][3] / 2)) x_sp_temp.append(t * 0.05) P4Rm.ParamDict["x_sp"] = x_sp_temp y_sp_temp = [] y_sp_temp.append(a.ParamDict["sp"][0]) y_sp_temp.append(a.ParamDict["sp"][0] / 2) y_sp_temp.append(a.ParamDict["sp"][0] / 2 + a.ParamDict["sp"][6] / 2) y_sp_temp.append(a.ParamDict["sp"][6]) P4Rm.ParamDict["strain_shifted"] = y_sp_temp
def make_unique_by_strain(event_list): # event_list = make_unique_by_strain(event_list) rm_idx = [] for i in range(1, event_list.shape[0]): if i % 1000 == 0: print '.', if i % 10000 == 0: print '%i' % i old_coords = event_list[i - 1].get_coords(trafo=True) curr_coords = event_list[i].get_coords(trafo=True) if old_coords.shape[0] == curr_coords.shape[0] and sp.all( old_coords == curr_coords): ### assertion that we did everything right if event_list[i - 1].chr == event_list[i].chr: assert (event_list[i - 1].strand == event_list[i].strand) assert (event_list[i].strain.shape[0] == 1) else: assert (event_list[i - 1].gene_name != event_list[i].gene_name) idx = sp.where(event_list[i - 1].strain == event_list[i].strain[0])[0] if idx.shape[0] > 0: assert (idx.shape[0] == 1) assert (sp.all(event_list[i].get_coords( trafo=True) == event_list[i - 1].get_coords(trafo=True))) if not event_list[i].gene_name[0] in event_list[i - 1].gene_name: event_list[ i - 1].gene_name = sp.r_[event_list[i - 1].gene_name, [event_list[i].gene_name[0]]] event_list[i] = event_list[i - 1] else: event_list[i].strain = sp.r_[[event_list[i - 1].strain[0]], event_list[i].strain] assert (sp.all( sp.sort(event_list[i].strain) == sp.sort( sp.unique(event_list[i].strain)))) ### TODO !!!!!!!!!!!!! make sure that we keep different coordinates if the strains differ ... if not event_list[i].gene_name[0] in event_list[i - 1].gene_name: event_list[i].gene_name = sp.r_[ event_list[i - 1].gene_name, [event_list[i].gene_name[0]]] rm_idx.append(i - 1) print 'events dropped: %i' % len(rm_idx) keep_idx = sp.where(~sp.in1d(sp.arange(event_list.shape[0]), rm_idx))[0] event_list = event_list[keep_idx] return event_list
def timereduce(self, timelims=None,timesselected=None): assert (timelims is not None) or (timesselected is not None), "Need a set of limits or selected set of times" if timelims is not None: tkeep = sp.logical_and(self.Time_Vector>=timelims[0],self.Time_Vector<timelims[1]) if timesselected is not None: tkeep = sp.in1d(self.Time_Vector,timesselected) # prune the arrays self.Time_Vector=self.Time_Vector[tkeep] self.Param_List=self.Param_List[:,tkeep] self.Velocity=self.Velocity[:,tkeep]
def split_data(RV): sp.random.seed(0) n_train = int(4 * RV["Y"].shape[0] / 5.0) n_test = int(1 * RV["Y"].shape[0] / 10.0) idxs = sp.random.permutation(RV["Y"].shape[0]) idxs_train = idxs[:n_train] idxs_test = idxs[n_train:(n_train + n_test)] idxs_val = idxs[(n_train + n_test):] Itrain = sp.in1d(sp.arange(RV["Y"].shape[0]), idxs_train) Itest = sp.in1d(sp.arange(RV["Y"].shape[0]), idxs_test) Ival = sp.in1d(sp.arange(RV["Y"].shape[0]), idxs_val) out = {} for key in RV.keys(): out["%s_train" % key] = RV[key][Itrain] out["%s_val" % key] = RV[key][Ival] out["%s_test" % key] = RV[key][Itest] return out
def test_from_neighbor_throats_max(self): self.geo.pop('pore.seed', None) self.geo.models.pop('pore.seed', None) self.geo.models.pop('throat.seed', None) self.geo['throat.seed'] = sp.rand(self.net.Nt, ) self.geo.add_model(model=mods.from_neighbor_throats, propname='pore.seed', throat_prop='throat.seed', mode='max') assert sp.all(sp.in1d(self.geo['pore.seed'], self.geo['throat.seed'])) pmin = sp.amin(self.geo['pore.seed']) tmin = sp.amin(self.geo['throat.seed']) assert pmin >= tmin
def argintersect_left(a, b): """ find indices in a, whose corresponding values are in b ---------------------------------------------------------------------- Input: a : array, for which indices are returned that are in the intersect with b b : array to be intersected with a ---------------------------------------------------------------------- Output: the indices of elements of a, which are in intersect of a and b ---------------------------------------------------------------------- """ return sp.arange(a.shape[0])[sp.in1d(a,b)]
def argintersect_left(a, b): """ find indices in a, whose corresponding values are in b ---------------------------------------------------------------------- Input: a : array, for which indices are returned that are in the intersect with b b : array to be intersected with a ---------------------------------------------------------------------- Output: the indices of elements of a, which are in intersect of a and b ---------------------------------------------------------------------- """ return sp.arange(a.shape[0])[sp.in1d(a, b)]
def rate(self, pores='', mode='group'): r''' Send a list of pores and receive the net rate of material moving into them. Parameters ---------- pores : array_like The pores where the net rate will be calculated mode : string, optional Controls how to return the rate. Options are: - 'group'(default): It returns the cumulative rate moving into them - 'single': It calculates the rate for each pore individually. ''' pores = sp.array(pores, ndmin=1) R = [] if mode == 'group': iteration = 1 elif mode == 'single': iteration = sp.shape(pores)[0] for i in sp.r_[0:iteration]: if mode == 'group': P = pores elif mode == 'single': P = pores[i] throats = self._net.find_neighbor_throats(P, flatten=True, mode='not_intersection') p1 = self._net.find_connected_pores(throats)[:, 0] p2 = self._net.find_connected_pores(throats)[:, 1] pores1 = sp.copy(p1) pores2 = sp.copy(p2) #Changes to pores1 and pores2 to make them as the internal and external pores pores1[-sp.in1d(p1, P)] = p2[-sp.in1d(p1, P)] pores2[-sp.in1d(p1, P)] = p1[-sp.in1d(p1, P)] X1 = self[self._quantity][pores1] X2 = self[self._quantity][pores2] g = self['throat.conductance'][throats] R.append(sp.sum(sp.multiply(g, (X2 - X1)))) return (sp.array(R, ndmin=1))
def get_conf_events(options, gid): event_info = [] for event_type in options.event_types: IN = h5py.File(os.path.join(options.outdir, 'merge_graphs_%s_C%i.counts.hdf5' % (event_type, options.confidence)), 'r') if 'conf_idx' in IN and IN['conf_idx'].shape[0] > 0 and IN['conf_idx'][0] != -1: conf_idx = IN['conf_idx'][:].astype('int') - 1 k_idx = sp.where(sp.in1d(IN['gene_idx'][:][conf_idx].astype('int') - 1, gid))[0] if k_idx.shape[0] > 0: event_info.extend([[event_type, x] for x in conf_idx[k_idx]]) IN.close() return sp.array(event_info, dtype='str')
def test_neighbor_max(self): catch = self.geo.pop('pore.seed', None) catch = self.geo.models.pop('pore.seed', None) catch = self.geo.models.pop('throat.seed', None) mod = gm.pore_misc.neighbor self.geo['throat.seed'] = sp.rand(self.net.Nt,) self.geo.models.add(model=mod, propname='pore.seed', throat_prop='throat.seed', mode='max') assert sp.all(sp.in1d(self.geo['pore.seed'], self.geo['throat.seed'])) pmin = sp.amin(self.geo['pore.seed']) tmin = sp.amin(self.geo['throat.seed']) assert pmin >= tmin
def test_conduit_conductance_loose(self): self.phase['pore.occupancy'][[19, 20]] = 0 t1 = self.net.Ts[self.phase['throat.occupancy'] == 0] t2 = self.net.Ts[~sp.in1d(self.net.Ts, t1)] self.phys1.models.add(propname='throat.cond_conductance', throat_conductance='throat.diffusive_conductance', model=pm.multiphase.conduit_conductance, mode='loose', factor=0) self.phys2.models.add(propname='throat.cond_conductance', throat_conductance='throat.diffusive_conductance', model=pm.multiphase.conduit_conductance, mode='loose', factor=0) assert sp.all(self.phase['throat.cond_conductance'][t1] == 0) assert sp.all(self.phase['throat.cond_conductance'][t2] != 0)
def _handle_multi_entries(header, data): cols_of_interest = [[ 'tumor_wgs_submitter_specimen_id', 'tumor_wgs_icgc_specimen_id', 'tumor_wgs_submitter_sample_id', 'tumor_wgs_icgc_sample_id', 'tumor_wgs_aliquot_id', 'tumor_wgs_oxog_score', 'tumor_wgs_ContEST', 'tumor_wgs_Stars', 'tumor_wgs_bwa_alignment_gnos_repo', 'tumor_wgs_bwa_alignment_gnos_id', 'is_mar2016_tumor_wgs_bwa_alignment', 'tumor_wgs_bwa_alignment_bam_file_name', 'tumor_wgs_minibam_gnos_repo', 'tumor_wgs_minibam_gnos_id', 'is_mar2016_tumor_wgs_minibam', 'tumor_wgs_minibam_bam_file_name', 'sanger_variant_calling_file_name_prefix', 'dkfz_embl_variant_calling_file_name_prefix', 'broad_variant_calling_file_name_prefix', 'muse_variant_calling_file_name_prefix', 'broad_tar_variant_calling_file_name_prefix', 'tumor_wgs_has_matched_rna_seq' ], [ 'tumor_rna_seq_submitter_specimen_id', 'tumor_rna_seq_icgc_specimen_id', 'tumor_rna_seq_submitter_sample_id', 'tumor_rna_seq_icgc_sample_id', 'tumor_rna_seq_aliquot_id', 'tumor_rna_seq_star_alignment_gnos_repo', 'tumor_rna_seq_star_alignment_gnos_id', 'is_mar2016_tumor_rna_seq_star_alignment', 'tumor_rna_seq_star_alignment_bam_file_name', 'tumor_rna_seq_tophat_alignment_gnos_repo', 'tumor_rna_seq_tophat_alignment_gnos_id', 'is_mar2016_tumor_rna_seq_tophat_alignment', 'tumor_rna_seq_tophat_alignment_bam_file_name' ]] for cols in cols_of_interest: c_idx = sp.where(sp.in1d(header, cols))[0] r_idx = sp.where([',' in x for x in data[:, c_idx[0]]])[0] for r in r_idx: data_ = sp.array([x.split(',') for x in data[r, c_idx]]) assert len(data_.shape) > 1 assert data_.shape[1] > 1 for r2 in range(1, data_.shape[1]): data = sp.r_[data, data[r, :][sp.newaxis, :]] data[-1, c_idx] = data_[:, r2] data[r, c_idx] = data_[:, 0] return data
def _generate_throats(self): r""" Generate the throats (connections, numbering and types) """ self._logger.info("generate_throats: Define connections between pores") [Nx, Ny, Nz] = sp.shape(self._template) Np = Nx*Ny*Nz ind = np.arange(0, Np) #Generate throats based on pattern of the adjacency matrix #This is taken from Cubic tpore1_1 = ind[(ind % Nx) < (Nx-1)] tpore2_1 = tpore1_1 + 1 tpore1_2 = ind[(ind % (Nx*Ny)) < (Nx*(Ny-1))] tpore2_2 = tpore1_2 + Nx tpore1_3 = ind[(ind % Np) < (Nx*Ny*(Nz-1))] tpore2_3 = tpore1_3 + Nx*Ny tpore1 = sp.hstack((tpore1_1, tpore1_2, tpore1_3)) tpore2 = sp.hstack((tpore2_1, tpore2_2, tpore2_3)) connections = sp.vstack((tpore1, tpore2)).T connections = connections[sp.lexsort((connections[:, 1], connections[:, 0]))] #Remove throats to non-active pores img_ind = self.get_pore_data(prop='voxel_index') temp0 = sp.in1d(connections[:, 0], img_ind) temp1 = sp.in1d(connections[:, 1], img_ind) tind = temp0*temp1 connections = connections[tind] #Need a cleaner way to do this other than voxel_to_pore map...figure out later self.set_throat_data(prop='connections', data=self._voxel_to_pore_map[connections]) self.set_throat_info(label='all', locations=sp.ones(sp.sum(tind,),dtype=bool)) self.set_throat_data(prop='numbering', data=np.arange(0, sp.sum(tind))) self._logger.debug("generate_throats: End of method")
def intersect_rows(array1, array2, index = None): """Return intersection of rows""" if (array1.shape[0] == 0): if index == True: return (array1, sp.zeros((0,)), sp.zeros((0,))) else: return array1 if (array2.shape[0] == 0): if index == True: return (array2, sp.zeros((0,)), sp.zeros((0,))) else: return array2 array1_v = array1.view([('', array1.dtype)] * array1.shape[1]) array2_v = array2.view([('', array2.dtype)] * array2.shape[1]) array_i = sp.intersect1d(array1_v, array2_v) if index == True: a1_i = sp.where(sp.in1d(array1_v, array_i))[0] a2_i = sp.where(sp.in1d(array2_v, array_i))[0] return (array_i.view(array1.dtype).reshape(array_i.shape[0], array1.shape[1]), a1_i, a2_i) else: return array_i.view(array1.dtype).reshape(array_i.shape[0], array1.shape[1])
def _do_one_inner_iteration(self): if (self._BCtypes==0).all(): raise Exception('No boundary condition has been applied to this network.') self._result = sp.zeros(self._net.num_pores()) else: self._logger.info("Creating Coefficient matrix for the algorithm") A = self._build_coefficient_matrix() self._logger.info("Creating RHS matrix for the algorithm") B = self._build_RHS_matrix() self._logger.info("Solving AX = B for the sparse matrices") X = sprslin.spsolve(A,B) self._Neumann_super_X = X[-sp.in1d(sp.r_[0:len(X)],sp.r_[0:self._net.num_pores()])] self._result = X[sp.r_[0:self._net.num_pores()]] return(self._result)
def curate_alt_prime(event_list, CFG): # event_list = curate_alt_prime(event_list) if event_list.shape[0] == 0: return event_list rm_idx = [] corr_count = 0 for i in range(event_list.shape[0]): ### check if we have introns of zero length #if sp.any(event_list[i].exons1[:, 1] - event_list[i].exons1[:, 1] < 2) or sp.any(event_list[i].exons2[:, 1] - event_list[i].exons2[:, 1] < 2): if (event_list[i].exons1[1, 0] - event_list[i].exons1[0, 1] < 1) or ( event_list[i].exons2[1, 0] - event_list[i].exons2[0, 1] < 1): rm_idx.append(i) continue ### check if alt exons overlap, otherwise we cannot curate (trim to shortest length) if (sp.all(event_list[i].exons1[0, :] == event_list[i].exons2[0, :]) and (event_list[i].exons1[1, 1] <= event_list[i].exons2[1, 0] or event_list[i].exons1[1, 0] >= event_list[i].exons2[1, 1])) or \ (sp.all(event_list[i].exons1[1, :] == event_list[i].exons2[1, :]) and (event_list[i].exons1[0, 1] <= event_list[i].exons2[0, 0] or event_list[i].exons1[0, 0] >= event_list[i].exons2[0, 1])): continue if sp.all(event_list[i].exons1[0, :] == event_list[i].exons2[0, :]): if event_list[i].exons1[1, 1] > event_list[i].exons2[1, 1]: event_list[i].exons1[1, 1] = event_list[i].exons2[1, 1] corr_count += 1 elif event_list[i].exons1[1, 1] < event_list[i].exons2[1, 1]: event_list[i].exons2[1, 1] = event_list[i].exons1[1, 1] corr_count += 1 elif sp.all(event_list[i].exons1[1, :] == event_list[i].exons2[1, :]): if event_list[i].exons1[0, 0] > event_list[i].exons2[0, 0]: event_list[i].exons2[0, 0] = event_list[i].exons1[0, 0] corr_count += 1 elif event_list[i].exons1[0, 0] < event_list[i].exons2[0, 0]: event_list[i].exons1[0, 0] = event_list[i].exons2[0, 0] corr_count += 1 ### remove events with non-overlapping alt_exons if len(rm_idx) > 0: keep_idx = sp.where( ~sp.in1d(sp.arange(event_list.shape[0]), rm_idx))[0] event_list = event_list[keep_idx] print 'Corrected %i events' % corr_count print 'Removed %i events' % len(rm_idx) return event_list