Пример #1
def intersect_rows(array1, array2, index=None):
    """Return intersection of rows"""

    if (array1.shape[0] == 0):
        if index == True:
            return (array1, sp.zeros((0, )), sp.zeros((0, )))
            return array1
    if (array2.shape[0] == 0):
        if index == True:
            return (array2, sp.zeros((0, )), sp.zeros((0, )))
            return array2

    array1_v = array1.view([('', array1.dtype)] * array1.shape[1])
    array2_v = array2.view([('', array2.dtype)] * array2.shape[1])
    array_i = sp.intersect1d(array1_v, array2_v)

    if index == True:
        a1_i = sp.where(sp.in1d(array1_v, array_i))[0]
        a2_i = sp.where(sp.in1d(array2_v, array_i))[0]
        return (array_i.view(array1.dtype).reshape(array_i.shape[0],
                                                   array1.shape[1]), a1_i,
        return array_i.view(array1.dtype).reshape(array_i.shape[0],
 def _generate_masked_mesh(self, cell_mask=None):
     Generates the mesh based on the cell mask provided
     if cell_mask is None:
         cell_mask = sp.ones(self.data_map.shape, dtype=bool)
     # initializing arrays
     self._edges = sp.ones(0, dtype=str)
     self._merge_patch_pairs = sp.ones(0, dtype=str)
     # building face arrays
     mapper = sp.ravel(sp.array(cell_mask, dtype=int))
     mapper[mapper == 1] = sp.arange(sp.count_nonzero(mapper))
     mapper = sp.reshape(mapper, (self.nz, self.nx))
     mapper[~cell_mask] = -sp.iinfo(int).max
     boundary_dict = {
             {'bottom': mapper[0, :][cell_mask[0, :]]},
             {'top': mapper[-1, :][cell_mask[-1, :]]},
             {'left': mapper[:, 0][cell_mask[:, 0]]},
             {'right': mapper[:, -1][cell_mask[:, -1]]},
             {'front': mapper[cell_mask]},
             {'back': mapper[cell_mask]},
             {'bottom': [], 'top': [], 'left': [], 'right': []}
     # determining cells linked to a masked cell
     cell_mask = sp.where(~sp.ravel(cell_mask))[0]
     inds = sp.in1d(self._field._cell_interfaces, cell_mask)
     inds = sp.reshape(inds, (len(self._field._cell_interfaces), 2))
     inds = inds[:, 0].astype(int) + inds[:, 1].astype(int)
     inds = (inds == 1)
     links = self._field._cell_interfaces[inds]
     # adjusting order so masked cells are all on links[:, 1]
     swap = sp.in1d(links[:, 0], cell_mask)
     links[swap] = links[swap, ::-1]
     # setting side based on index difference
     sides = sp.ndarray(len(links), dtype='<U6')
     sides[sp.where(links[:, 1] == links[:, 0]-self.nx)[0]] = 'bottom'
     sides[sp.where(links[:, 1] == links[:, 0]+self.nx)[0]] = 'top'
     sides[sp.where(links[:, 1] == links[:, 0]-1)[0]] = 'left'
     sides[sp.where(links[:, 1] == links[:, 0]+1)[0]] = 'right'
     # adding each block to the internal face dictionary
     inds = sp.ravel(mapper)[links[:, 0]]
     for side, block_id in zip(sides, inds):
     self.set_boundary_patches(boundary_dict, reset=True)
Пример #3
Пример #4
def plot_overlap_ps(result_file, ss_file='/Users/bjarnivilhjalmsson/data/GIANT/GIANT_HEIGHT_Wood_et_al_2014_publicrelease_HapMapCeuFreq.txt',
                   fig_filename='/Users/bjarnivilhjalmsson/data/tmp/manhattan_combPC_HGT.png', method='combPC',
                   ylabel='Comb. PC (HIP,WC,HGT,BMI) $-log_{10}(P$-value$)$', xlabel='Height $-log_{10}(P$-value$)$', p_thres=0.00001):
    # Parse results ans SS file
    res_table = pandas.read_table(result_file)
    ss_table = pandas.read_table(ss_file)
    # Parse 
    res_sids = sp.array(res_table['SNPid'])
    if method == 'MVT':
        comb_ps = sp.array(res_table['pval'])
    elif method == 'combPC':
        comb_ps = sp.array(res_table['combPC'])
    if 'MarkerName' in ss_table.keys():
        ss_sids = sp.array(ss_table['MarkerName'])
    elif 'SNP' in ss_table.keys():
        ss_sids = sp.array(ss_table['SNP'])
        raise Exception("Don't know where to look for rs IDs")
    marg_ps = sp.array(ss_table['p'])
    # Filtering boring p-values
    res_p_filter = comb_ps < p_thres
    res_sids = res_sids[res_p_filter]
    comb_ps = comb_ps[res_p_filter]
#     ss_p_filter = marg_ps<p_thres
#     ss_sids = ss_sids[ss_p_filter]
#     marg_ps = marg_ps[ss_p_filter]
    common_sids = sp.intersect1d(res_sids, ss_sids)
    print 'Found %d SNPs in common' % (len(common_sids))
    ss_filter = sp.in1d(ss_sids, common_sids)
    res_filter = sp.in1d(res_sids, common_sids)
    ss_sids = ss_sids[ss_filter]
    res_sids = res_sids[res_filter]
    marg_ps = marg_ps[ss_filter]
    comb_ps = comb_ps[res_filter]
    print 'Now sorting'
    ss_index = sp.argsort(ss_sids)
    res_index = sp.argsort(res_sids)
    marg_ps = -sp.log10(marg_ps[ss_index])
    comb_ps = -sp.log10(comb_ps[res_index])
    with plt.style.context('fivethirtyeight'):
        plt.plot(marg_ps, comb_ps, 'b.', alpha=0.2)
        (x_min, x_max) = plt.xlim()
        (y_min, y_max) = plt.ylim()
        plt.plot([x_min, x_max], [y_min, y_max], 'k--', alpha=0.2)
def test_trim_extend():
    pn = OpenPNM.Network.Cubic(shape=[5, 5, 5])
    assert sp.all(sp.in1d(pn.find_neighbor_pores(pores=0), [1, 5, 25]))
    assert [pn.Np, pn.Nt] == [125, 300]
    assert sp.all(sp.in1d(pn.find_neighbor_pores(pores=0), [1, 5, 25]))
    assert [pn.Np, pn.Nt] == [124, 297]
    pn.extend(pore_coords=[0, 0, 0], throat_conns=[[124, 0]])
    assert [pn.Np, pn.Nt] == [125, 298]
    assert sp.all(sp.in1d(pn.find_neighbor_pores(pores=0), [1, 5, 25, 124]))
Пример #6
Пример #7
def make_unique_by_event(event_list):
    # function event_list = make_unique_by_event(event_list)
    # This script removes all events that share the sam alternative evnt coordinates
    # but differ in the flanking size. The longest of several equal events is kept.

    rm_idx = []
    last_kept = 0
    for i in range(1, event_list.shape[0]):
        if i % 1000 == 0:
            print '.',
            if i % 10000 == 0:
                print '%i' % i
        old_coords = event_list[last_kept].get_inner_coords(trafo=True)
        curr_coords = event_list[i].get_inner_coords(trafo=True) 

        if old_coords.shape[0] == curr_coords.shape[0] and sp.all(old_coords == curr_coords):

            ### assertion that we did everything right
            assert(event_list[last_kept].chr == event_list[i].chr)
            assert(event_list[last_kept].strand == event_list[i].strand)
            ### check, which event is longer -> keep longer event
            len1 = event_list[last_kept].get_len()
            len2 = event_list[i].get_len()

            if len1 > len2:
                keep_idx = last_kept
                not_keep_idx = i
                keep_idx = i
                not_keep_idx = last_kept

            ### check if we would loose strains 
            idx = sp.where(~sp.in1d(event_list[not_keep_idx].strain, event_list[keep_idx].strain))[0]
            if idx.shape[0] > 0:
                event_list[keep_idx].strain = sp.r_[event_list[keep_idx].strain, event_list[not_keep_idx].strain[idx]]
                ### TODO !!!!!!!!!!!!! make sure that we keep different coordinates if the strains differ ...
                event_list[keep_idx].gene_name = sp.union1d(event_list[keep_idx].gene_name, event_list[not_keep_idx].gene_name)

            last_kept = keep_idx
            last_kept = i

    print 'events dropped: %i' % len(rm_idx)
    keep_idx = sp.where(~sp.in1d(sp.arange(event_list.shape[0]), rm_idx))[0]
    event_list = event_list[keep_idx]

    return event_list
Пример #8
def calculate_hapmap_pcs(hapmap_file, pc_weights_dict, snps_filter=None):
    Calculates the principal components for the hapmap project

    :param hapmap_file: Hapmap file in HDF5 format
    :param pc_weights_dict: dictionary with SNP weights (key = snpid)
    :param snps_filter: list of snp-ids to subset (optional)
    :return: dictionary with pcs and number of snps that were used
    log.info('Calculating Principal components for Hapmap file %s' % hapmap_file)
    ok_sids = np.asarray(list(pc_weights_dict.keys()))
    log.info('Loaded PC weight for %d SNPs' % (len(ok_sids)))
    # Load genotypes
    log.info('Load Hapmap dataset')
    h5f = h5py.File(hapmap_file, 'r')
    num_indivs = len(h5f['indivs']['continent'][...])
    log.info('Found genotypes for %d individuals' % num_indivs)
    pcs = sp.zeros((num_indivs, 2))
    num_nt_issues = 0
    num_snps_used = 0
    log.info('Calculating PCs')
    for chrom in range(1, 23):
        log.info('Working on Chromosome %d' % chrom)
        chrom_str = 'chr%d' % chrom

        log.info('Identifying overlap')
        ok_snp_filter = sp.in1d(ok_sids, snps_filter[chrom_str])
        ok_chrom_sids = ok_sids.compress(ok_snp_filter, axis=0)
        sids = h5f[chrom_str]['variants']['ID'][...]
        ok_snp_filter = sp.in1d(sids, ok_chrom_sids)
        #         assert sids[ok_snp_filter]==ok_sids, 'WTF?'
        sids = sids.compress(ok_snp_filter, axis=0)

        log.info('Loading SNPs')
        snps = h5f[chrom_str]['calldata']['snps'][...]
        snps = snps.compress(ok_snp_filter, axis=0)
        length = len(h5f[chrom_str]['variants/REF'])
        nts = np.hstack((h5f[chrom_str]['variants/REF'][:].reshape(length, 1),
                         h5f[chrom_str]['variants/ALT'][:].reshape(length, 1)))
        nts = nts.compress(ok_snp_filter, axis=0)
        log.info('Updating PCs')
        pcs_per_chr = _calc_pcs(pc_weights_dict, sids, nts, snps)
        pcs += pcs_per_chr['pcs']
        num_nt_issues += pcs_per_chr['num_nt_issues']
        num_snps_used += pcs_per_chr['num_snps_used']

    log.info('%d SNPs were excluded from the analysis due to nucleotide issues.' % (num_nt_issues))
    log.info('%d SNPs were used for the analysis.' % (num_snps_used))
    return {'pcs': pcs, 'num_snps_used': num_snps_used}
Пример #9
def intersect_rows(array1, array2, index=False):
    """Return array with rows that intersect between array1 and array2"""

    tmp1 = sp.array(['-'.join(array1[i, :].astype('str')) for i in range(array1.shape[0])])
    tmp2 = sp.array(['-'.join(array2[i, :].astype('str')) for i in range(array2.shape[0])])
    idx = sp.where(sp.in1d(tmp1, tmp2))[0]
    if index:
        idx2 = sp.where(sp.in1d(tmp2, tmp1))[0]

    if index:
        return (array1[idx, :], idx, idx2)
        return (array1[idx, :], None, None)
Пример #10
    def find_interface_throats(self, labels=[]):
        Finds the throats that join two pore labels.

        labels : list of strings
            The labels of the two pore groups whose interface is sought

        An array of throat numbers that connect the given pore groups

        This method is meant to find interfaces between TWO groups, regions or
        clusters of pores (as defined by their label).  If the input labels
        overlap or are not adjacent, an empty array is returned.

        >>> import OpenPNM
        >>> pn = OpenPNM.Network.TestNet()
        >>> pn['pore.domain1'] = False
        >>> pn['pore.domain2'] = False
        >>> pn['pore.domain1'][[0, 1, 2]] = True
        >>> pn['pore.domain2'][[5, 6, 7]] = True
        >>> pn.find_interface_throats(labels=['domain1', 'domain2'])
        array([1, 4, 7])

        TODO: It might be a good idea to allow overlapping regions
        Tind = sp.array([], ndmin=1)
        if sp.shape(labels)[0] != 2:
            logger.error('Exactly two labels must be given')
            P1 = self.pores(labels=labels[0])
            P2 = self.pores(labels=labels[1])
            # Check if labels overlap
            if sp.sum(sp.in1d(P1, P2)) > 0:
                logger.error('Some labels overlap, iterface cannot be found')
                T1 = self.find_neighbor_throats(P1)
                T2 = self.find_neighbor_throats(P2)
                Tmask = sp.in1d(T1, T2)
                Tind = T1[Tmask]
        return Tind
Пример #11
Пример #12
    def on_shifted_dwp_curves(self, t):
        a = P4Rm()
        if a.AllDataDict['model'] == 0:
            temp_1 = arange(2, len(a.ParamDict['dwp'])+1)
            temp_2 = temp_1 * t / (len(a.ParamDict['dwp']))
            P4Rm.ParamDict['x_dwp'] = t - temp_2
            shifted_dwp = a.ParamDict['dwp'][:-1:]
            temp_3 = in1d(around(a.ParamDict['depth'], decimals=3),
                          around(a.ParamDict['x_dwp'], decimals=3))
            temp_4 = a.ParamDict['DW_i'][temp_3]
            P4Rm.ParamDict['scale_dw'] = shifted_dwp / temp_4
            P4Rm.ParamDict['scale_dw'][a.ParamDict['scale_dw'] == 0] = 1.

            P4Rm.ParamDict['DW_shifted'] = shifted_dwp/a.ParamDict['scale_dw']
            P4Rm.ParamDict['dw_out'] = a.ParamDict['dwp'][-1]

        elif a.AllDataDict['model'] == 1:
            temp_1 = arange(0, len(a.ParamDict['dwp'])+1-3)
            temp_2 = temp_1 * t / (len(a.ParamDict['dwp'])-3)
            P4Rm.ParamDict['x_dwp'] = t - temp_2
            shifted_dwp = a.ParamDict['dwp'][1:-1:]
            temp_3 = in1d(around(a.ParamDict['depth'], decimals=3),
                          around(a.ParamDict['x_dwp'], decimals=3))
            temp_4 = a.ParamDict['DW_i'][temp_3]
            P4Rm.ParamDict['scale_dw'] = shifted_dwp / temp_4
            P4Rm.ParamDict['scale_dw'][a.ParamDict['scale_dw'] == 0] = 1.

            P4Rm.ParamDict['DW_shifted'] = shifted_dwp/a.ParamDict['scale_dw']
            temp_5 = array([a.ParamDict['dwp'][0], a.ParamDict['dwp'][-1]])
            P4Rm.ParamDict['dw_out'] = temp_5

        elif a.AllDataDict['model'] == 2:
            x_dw_temp = []
            x_dw_temp.append(t*(1-a.ParamDict['dwp'][1] +
            x_dw_temp.append(t*(1-a.ParamDict['dwp'][1] -
            P4Rm.ParamDict['x_dwp'] = x_dw_temp

            y_dw_temp = []
            y_dw_temp.append(1. - (1-a.ParamDict['dwp'][0])/2)
            y_dw_temp.append(1. - (1-a.ParamDict['dwp'][0])/2 -
            P4Rm.ParamDict['DW_shifted'] = y_dw_temp
Пример #13
    def solve(self, A=None, b=None, iterative_solver=None, **kwargs):
        Executes the right algorithm for the solution: regular solution of a 
        linear system or iterative solution over the nonlinear source terms.
        A : sparse matrix
            2D Coefficient matrix
        b : dense matrix
            1D RHS vector
        iterative_sovler : string
            Name of solver to use.  If not solve is specified, sp.solve is used
            which is a direct solver (SuperLU on default Scipy installation)
        kwargs : list of keyword arguments
            These arguments and values are sent to the sparse solver, so read
            the specific documentation for the solver chosen
        self._iterative_solver = iterative_solver

        # Executes the right algorithm
        if any("pore.source_nonlinear" in s for s in self.props()):
            X = self._do_one_outer_iteration(**kwargs)
            X = self._do_one_inner_iteration(A, b, **kwargs)
        self.X = X
        self._Neumann_super_X = self.X[
            -sp.in1d(sp.arange(0, self._coeff_dimension), self.pores())]
        #Removing the additional super pore variables from the results
        self[self._quantity] = self.X[self.pores()]
        logger.info('Writing the results to ' + '[\'' + self._quantity +
                    '\'] in the ' + self.name + ' algorithm.')
Пример #15
    def _check_trapping(self, inv_val):
        Determine which pores and throats are trapped by invading phase.  This
        method is called by ``run`` if 'trapping' is set to True.
        # Generate a list containing boolean values for throat state
        Tinvaded = self['throat.inv_Pc'] < sp.inf
        # Add residual throats, if any, to list of invaded throats
        Tinvaded = Tinvaded + self['throat.residual']
        # Invert logic to find defending throats
        Tdefended = ~Tinvaded
        [pclusters, tclusters] = self._net.find_clusters2(mask=Tdefended,
        # See which outlet pores remain uninvaded
        outlets = self['pore.outlets'] * (self['pore.inv_Pc'] == sp.inf)
        # Identify clusters connected to remaining outlet sites
        def_clusters = sp.unique(pclusters[outlets])
        temp = sp.in1d(sp.unique(pclusters), def_clusters, invert=True)
        trapped_clusters = sp.unique(pclusters)[temp]
        trapped_clusters = trapped_clusters[trapped_clusters >= 0]

        # Find defending clusters NOT connected to the outlet pores
        pmask = np.in1d(pclusters, trapped_clusters)
        # Store current applied pressure in newly trapped pores
        pinds = (self['pore.trapped'] == sp.inf) * (pmask)
        self['pore.trapped'][pinds] = inv_val

        # Find throats on the trapped defending clusters
        tinds = self._net.find_neighbor_throats(pores=pinds,
        self['throat.trapped'][tinds] = inv_val
        self['throat.entry_pressure'][tinds] = 1000000
Пример #16
def make_introns_feasible(introns, genes, CFG):
# introns = make_introns_feasible(introns, genes, CFG)

    tmp1 = sp.array([x.shape[0] for x in introns[:, 0]])
    tmp2 = sp.array([x.shape[0] for x in introns[:, 1]])
    unfeas = sp.where((tmp1 > 200) | (tmp2 > 200))[0]
    print >> CFG['fd_log'], 'found %i unfeasible genes' % unfeas.shape[0]

    while unfeas.shape[0] > 0:
        ### make filter more stringent
        CFG['read_filter']['exon_len'] = min(36, CFG['read_filter']['exon_len'] + 4)
        CFG['read_filter']['mincount'] = 2 * CFG['read_filter']['mincount']
        CFG['read_filter']['mismatch'] = max(CFG['read_filter']['mismatch'] - 1, 0)

        ### get new intron counts
        tmp_introns = get_intron_list(genes[unfeas], CFG)
        introns[unfeas, :] = tmp_introns

        ### stil unfeasible?
        tmp1 = sp.array([x.shape[0] for x in introns[:, 0]])
        tmp2 = sp.array([x.shape[0] for x in introns[:, 1]])

        still_unfeas = sp.where((tmp1 > 200) | (tmp2 > 200))[0]
        idx = sp.where(~sp.in1d(unfeas, still_unfeas))[0]

        for i in unfeas[idx]:
            print >> CFG['fd_log'], '[feasibility] set criteria for gene %s to: min_ex %i, min_conf %i, max_mism %i' % (genes[i].name, CFG['read_filter']['exon_len'], CFG['read_filter']['mincount'], CFG['read_filter']['mismatch'])
        unfeas = still_unfeas;

    return introns
Пример #17
    def get_usgs_n(self):
        if self.get_usgsrc() == 0:
        )  # Fetch usgsq,usgsh,handq,handh,handarea,handrad,handslope, handstage

        # Find indices for integer stageheight values in usgsh, and apply to usgsq
        usgsidx = scipy.where(scipy.equal(scipy.mod(
            self.usgsh, 1), 0))  # Find indices of integer values in usgsh
        usgsh = self.usgsh[usgsidx]
        usgsq = self.usgsq[usgsidx]

        # Find indices where usgsh[usgsidx] occur in handstage, and apply to handarea and handrad
        handidx = scipy.where(scipy.in1d(self.handstage, usgsh))
        area = self.handarea[handidx]
        hydrad = self.handrad[handidx]

        # Remove usgsq values for duplicate usgsh heights (keep first instance only)
        if usgsh.shape != area.shape:
            for i in range(usgsh.shape[0]):
                if i == 0: pass
                elif usgsh[i] == usgsh[i - 1]:
                    usgsq = scipy.delete(usgsq, i)

        # Calculate average manning's n after converting discharge units
        disch = usgsq  #*0.0283168 # Convert cfs to cms
        self.usgsroughness_array = self.mannings_n(area=area,
        self.usgsroughness = scipy.average(self.usgsroughness_array)
        print 'Average roughness: {0:.2f}'.format(self.usgsroughness)
Пример #18
 def test_find_nearby_pores_distance_2_flattened_inclself(self):
     a = self.net.find_nearby_pores(pores=[0, 1],
     assert sp.size(a) == 17
     assert sp.all(sp.in1d([0, 1], a))
Пример #19
def intersect_rows(array1, array2, index=False):
    """Return array with rows that intersect between array1 and array2"""

    tmp1 = sp.array(
        ['-'.join(array1[i, :].astype('str')) for i in range(array1.shape[0])])
    tmp2 = sp.array(
        ['-'.join(array2[i, :].astype('str')) for i in range(array2.shape[0])])

    idx = sp.where(sp.in1d(tmp1, tmp2))[0]
    if index:
        idx2 = sp.where(sp.in1d(tmp2, tmp1))[0]

    if index:
        return (array1[idx, :], idx, idx2)
        return (array1[idx, :], None, None)
    def validate_vulnerability_set(self):
        """The vulnerability set must provide curves for all sites in this
        object. A Vulnerability_Function needs to be defined to match each
        attributes['STRUCTURE_CLASSIFICATION'] identifier.

        Raises a RuntimeError if it cannot find a match.
        if self.vulnerability_set is None:
            raise RuntimeError('Vulnerability Set must not be None')

        # Function IDs for the vulnerability set
        curves_defined = self.vulnerability_set.vulnerability_functions.keys()

        structure_classifications = self.attributes['STRUCTURE_CLASSIFICATION']
        structure_classifications = unique(structure_classifications)

        # Are there any unique structure classifications that are not in the
        # curves defined?
        in_curves_defined = in1d(structure_classifications, curves_defined)
        if not alltrue(in_curves_defined):
            msg = 'The following structures do not have a vulnerability curve: '
            msg += '%s' % structure_classifications[where(
                in_curves_defined == False)]
            raise RuntimeError(msg)
def getSizeFactor(fn_anno, data, gid, mode = 'sum', withXYMT = True, filterbyPC = True):
    input annotation, counts and gene ids
    output sum of protein coding gene levels excluding sex chromosomes and mitochondria genes
    anno  = sp.loadtxt(fn_anno, delimiter = '\t', dtype = 'string', usecols=[0,2,8])
    anno  = anno[anno[:,1] == 'gene', :]
    if not withXYMT: ### filter xymt
        anno  = anno[anno[:,0] != 'MT',:]
        anno  = anno[anno[:,0] != 'Y',:]
        anno  = anno[anno[:,0] != 'X',:]

    agid   = [x.split(';')[0] for x in anno[:,2]] ### clean gene id's
    agid   = sp.array([x.split(" ")[1].strip('\"') for x in agid])

    if filterbyPC: ### filter protein coding
        gtpe  = [x.split(';')[2] for x in anno[:,2]]
        gtpe  = sp.array([x.split('\"')[1].split('\"')[0] for x in gtpe])
        iPC   = sp.where(gtpe == 'protein_coding')[0]
        agid  = agid[iPC]

    iGn = sp.in1d(gid, agid)
    libsize = sp.sum(data[iGn,:], axis = 0) 
    if mode == 'uq':
         libsize = sp.array([sp.percentile(x[x!=0] ,75) for x in data[iGn,:].T])  * iGn.sum() 

    return libsize
Пример #22
def get_sid_pos_map(sids, hdf5_kgenomes_file=None):
    Returns a SNP map, with information for each SNP
    if hdf5_kgenomes_file is None:
        hdf5_kgenomes_file = __default_kgenomes_file__
    h5f = h5py.File(hdf5_kgenomes_file, 'r')
    snp_info_map = {}
    for chrom_i in range(1, 23):
        cg = h5f['chrom_%d' % chrom_i]
        sids_1k = cg['sids'][...]
        sids_filter_1k = sp.in1d(sids_1k, sp.array(sids))
        common_sids = sids_1k[sids_filter_1k]
        common_positions = cg['positions'][sids_filter_1k]
        eur_mafs = cg['eur_mafs'][sids_filter_1k]
        nts = cg['nts'][sids_filter_1k]
        for sid, pos, eur_maf, nt in izip(common_sids, common_positions,
                                          eur_mafs, nts):
            snp_info_map[sid] = {
                'pos': pos,
                'chrom': chrom_i,
                'eur_maf': eur_maf,
                'nts': nt
    return snp_info_map
Пример #23
def make_introns_feasible(introns, genes, CFG):
# introns = make_introns_feasible(introns, genes, CFG)

    tmp1 = sp.array([x.shape[0] for x in introns[:, 0]])
    tmp2 = sp.array([x.shape[0] for x in introns[:, 1]])
    unfeas = sp.where((tmp1 > 200) | (tmp2 > 200))[0]
    print >> CFG['fd_log'], 'found %i unfeasible genes' % unfeas.shape[0]

    while unfeas.shape[0] > 0:
        ### make filter more stringent
        CFG['read_filter']['exon_len'] = min(36, CFG['read_filter']['exon_len'] + 4)
        CFG['read_filter']['mincount'] = 2 * CFG['read_filter']['mincount']
        CFG['read_filter']['mismatch'] = max(CFG['read_filter']['mismatch'] - 1, 0)

        ### get new intron counts
        tmp_introns = get_intron_list(genes[unfeas], CFG)
        introns[unfeas, :] = tmp_introns

        ### stil unfeasible?
        tmp1 = sp.array([x.shape[0] for x in introns[:, 0]])
        tmp2 = sp.array([x.shape[0] for x in introns[:, 1]])

        still_unfeas = sp.where((tmp1 > 200) | (tmp2 > 200))[0]
        idx = sp.where(~sp.in1d(unfeas, still_unfeas))[0]

        for i in unfeas[idx]:
            print >> CFG['fd_log'], '[feasibility] set criteria for gene %s to: min_ex %i, min_conf %i, max_mism %i' % (genes[i].name, CFG['read_filter']['exon_len'], CFG['read_filter']['mincount'], CFG['read_filter']['mismatch'])
        unfeas = still_unfeas;

    return introns
Пример #24
 def regenerate(self, prop_list='',mode=None):
     This updates all properties using the selected methods
     prop_list : string or list of strings
         The names of the properties that should be updated, defaults to all
     mode : string
         Control how the regeneration occurs.  
     >>> pn = OpenPNM.Network.TestNet()
     >>> pind = pn.get_pore_indices()
     >>> geom = OpenPNM.Geometry.Stick_and_Ball(network=pn, name='geo_test', locations=pind)
     >>> geom.regenerate()  # Regenerate all properties at once
     >>> geom.regenerate('pore_seed')  # only one property
     >>> geom.regenerate(['pore_seed', 'pore_diameter'])  # or several
     if prop_list == '':
         prop_list = self._prop_list
     elif type(prop_list) == str:
         prop_list = [prop_list]
     if mode == 'exclude':
         a = sp.array(self._prop_list)
         b = sp.array(prop_list)
         c = a[sp.where(~sp.in1d(a,b))[0]]
         prop_list = list(c)
     for item in prop_list:
         self._logger.debug('Refreshing: '+item)
Пример #25
 def test_find_nearby_pores_distance_2_flattened_inclself(self):
     a = self.net.find_nearby_pores(pores=[0, 1],
     assert sp.size(a) == 17
     assert sp.all(sp.in1d([0, 1], a))
Пример #26
Пример #27
def calculate_ld(nt_map_file, kgenomes_file, output_folder, window_size):
    Calculate LD in windows for a reference genome dataset for a given set of SNPIds that are defined in the genotype_file
    log.info('Calculating LD')
    # Load 1K genome
    kg_h5f = h5py.File(kgenomes_file, 'r')

    # load map file.
    with open(nt_map_file, 'rb') as f:
        snp_map_dict = pickle.load(f, encoding='latin1')

    # Figure out overlap (all genotype SNPs should be in the 1K genomes data)..
    for chrom in range(1, 23):
        log.info('Working on Chromosome %s' % chrom)
        chrom_str1 = 'chr%s' % chrom
        kg_cg = kg_h5f[chrom_str1]
        kg_sids = kg_cg['snp_ids'][...]
        chrom_dict = snp_map_dict[chrom_str1]
        g_sids = chrom_dict['sids']

        kg_filter = sp.in1d(kg_sids, g_sids)

        assert sp.sum(kg_filter) == len(g_sids), '..bug...'
        assert sp.all(kg_sids[kg_filter] == g_sids), '...bug'

        snps = kg_cg['snps'][...]
        snps = snps.compress(kg_filter, axis=0)

        snp_stds = kg_cg['snp_stds'][...]
        snp_stds = snp_stds.compress(kg_filter, axis=0)

        snp_means = kg_cg['snp_means'][...]
        snp_means = snp_means.compress(kg_filter, axis=0)

        norm_snps = sp.array((snps - snp_means) / snp_stds, dtype='single')

        # Iterate over SNPs and calculate LD
        num_snps, num_indivs = snps.shape

        ld_mats = []
        boundaries = []

        for snp_i in range(num_snps):
            start_i = max(0, snp_i - window_size / 2)
            end_i = min(snp_i + (window_size / 2) + 1, num_snps)

            X = norm_snps[start_i:end_i]
            D = sp.dot(X, X.T) / num_indivs

            boundaries.append([start_i, end_i])

        ld_dict = {'Ds':ld_mats, 'boundaries':boundaries, 'snp_means':snp_means, 'snp_stds':snp_stds, 'window_size':window_size}
        # Store things

        ld_file = '%s/LD' % output_folder + '_' + chrom_str1 + '.pickled.gz'
        log.info('Saving LD in %s' % ld_file)
        with gzip.open(ld_file, 'w') as f:
            pickle.dump(ld_dict, f, protocol=2)
Пример #28
 def test_find_nearby_pores_distance_2_flattened_include_input(self):
     a = self.net.find_nearby_pores(pores=[0, 1],
     assert sp.size(a) == 17
     assert sp.all(sp.in1d([0, 1], a))
def remove_isolated_clusters(conns, nonzero_locs, num_to_keep):
    Identifies and removes all disconnected clusters except the number of
    groups specified by "num_to_keep". num_to_keep=N retains the N largest
    adj_mat = generate_adjacency_matrix(conns, nonzero_locs)
    logger.info('determining connected components...')
    cs_ids = csgraph.connected_components(csgraph=adj_mat, directed=False)[1]
    groups, counts = sp.unique(cs_ids, return_counts=True)
    order = sp.argsort(counts)[::-1]
    groups = groups[order]
    counts = counts[order]
    msg = '    {} component groups for {} total nodes'
    logger.debug(msg.format(groups.size, cs_ids.size))
    msg = '    largest group number: {}, size {}'
    logger.debug(msg.format(groups[0], counts[0]))
    msg = '    {} % of nodes contained in largest group'
    msg = '    {} % of nodes contained in {} retained groups'
    num = sp.sum(counts[0:num_to_keep])/cs_ids.size*100
    logger.debug(msg.format(num, num_to_keep))
    inds = sp.where(sp.in1d(cs_ids, groups[0:num_to_keep]))[0]
    num = nonzero_locs.size
    nonzero_locs = nonzero_locs[inds]
    msg = '    removed {} disconnected nodes'
    logger.debug(msg.format(num - nonzero_locs.size))
    return nonzero_locs
Пример #30
def fill_masked_pixels(dll, ll, delta, diff, iv, no_apply_filling):

    if no_apply_filling: return ll, delta, diff, iv, 0

    ll_idx = ll.copy()
    ll_idx -= ll[0]
    ll_idx /= dll
    ll_idx += 0.5
    index = sp.array(ll_idx, dtype=int)
    index_all = range(index[-1] + 1)
    index_ok = sp.in1d(index_all, index)

    delta_new = sp.zeros(len(index_all))
    delta_new[index_ok] = delta

    ll_new = sp.array(index_all, dtype=float)
    ll_new *= dll
    ll_new += ll[0]

    diff_new = sp.zeros(len(index_all))
    diff_new[index_ok] = diff

    iv_new = sp.ones(len(index_all))
    iv_new *= 0.0
    iv_new[index_ok] = iv

    nb_masked_pixel = len(index_all) - len(index)

    return ll_new, delta_new, diff_new, iv_new, nb_masked_pixel
Пример #31
    def evaluate_trapping(self, p_outlets):
        Finds trapped pores and throats after a full ordinary
        percolation simulation has been run.

        p_outlets : array_like
            A list of pores that define the wetting phase outlets.
            Disconnection from these outlets results in trapping.

        It creates arrays called ``pore.trapped`` and ``throat.trapped``, but
        also adjusts the ``pore.inv_Pc`` and ``throat.inv_Pc`` arrays to set
        trapped locations to have infinite invasion pressure.

        self['pore.trapped'] = sp.zeros([
        ], dtype=float)
        self['throat.trapped'] = sp.zeros([
        ], dtype=float)
            # Get points used in OP
            inv_points = sp.unique(self['pore.inv_Pc'])
            raise Exception('Orindary percolation has not been run!')
        tind = self._net.throats()
        conns = self._net.find_connected_pores(tind)
        for inv_val in inv_points[0:-1]:
            # Find clusters of defender pores
            Pinvaded = self['pore.inv_Pc'] <= inv_val
            Cstate = sp.sum(Pinvaded[conns], axis=1)
            Tinvaded = self['throat.inv_Pc'] <= inv_val
            # 0 = all open, 1=1 pore filled,
            # 2=2 pores filled 3=2 pores + 1 throat filled
            Cstate = Cstate + Tinvaded
            clusters = self._net.find_clusters(Cstate == 0)
            # Clean up clusters (invaded = -1, defended >=0)
            clusters = clusters * (~Pinvaded) - (Pinvaded)
            # Identify clusters connected to outlet sites
            out_clusters = sp.unique(clusters[p_outlets])
            trapped_pores = ~sp.in1d(clusters, out_clusters)
            trapped_pores[Pinvaded] = False
            if sum(trapped_pores) > 0:
                inds = (self['pore.trapped'] == 0) * trapped_pores
                self['pore.trapped'][inds] = inv_val
                trapped_throats = self._net.find_neighbor_throats(
                trapped_throat_array = np.asarray([False] * len(Cstate))
                trapped_throat_array[trapped_throats] = True
                inds = (self['throat.trapped'] == 0) * trapped_throat_array
                self['throat.trapped'][inds] = inv_val
                inds = (self['throat.trapped'] == 0) * (Cstate == 2)
                self['throat.trapped'][inds] = inv_val
        self['pore.inv_Pc'][self['pore.trapped'] > 0] = sp.inf
        self['throat.inv_Pc'][self['throat.trapped'] > 0] = sp.inf
Пример #32
def whitelisting(options, header, data):
    whitelist = sp.loadtxt(options.fn_white, delimiter = '\t', dtype = 'string')
    midx_m    = sp.in1d(header, whitelist)
    tags      = sp.array([x.split('-')[3] for x in header])
    midx_n    = np.core.defchararray.startswith(tags, '1')        
    header    = header[midx_m | midx_n]
    data      = data[:, midx_m | midx_n]
    return header, data
Пример #33
 def test_add_boundary_pores(self):
     net = op.Network.CubicDual(shape=[5, 5, 5], label_1='primary',
     Ps = net.pores(labels=['surface', 'bottom'], mode='intersection')
     net.add_boundary_pores(pores=Ps, offset=[0, 0, -0.5])
     Ps2 = net.pores(labels=['boundary'], mode='intersection')
     assert Ps.size == Ps2.size
     assert ~sp.any(sp.in1d(Ps, Ps2))
Пример #34
 def test_add_boundary_pores_cubicdual(self):
     net = OpenPNM.Network.CubicDual(shape=[5, 5, 5],
     Ps = net.pores(labels=['surface', 'bottom'], mode='intersection')
     net.add_boundary_pores(pores=Ps, offset=[0, 0, -0.5])
     Ps2 = net.pores(labels=['boundary'], mode='intersection')
     assert Ps.size == Ps2.size
     assert ~sp.any(sp.in1d(Ps, Ps2))
Пример #35
    def rate(self,pores='',throats=''):

        if throats!='':
            p1 = self._net.find_connected_pores(throats)[:,0]
            p2 = self._net.find_connected_pores(throats)[:,1]
        elif pores!='': 
            throats = self._net.find_neighbor_throats(pores,flatten=True,mode='not_intersection')
            p1 = self._net.find_connected_pores(throats)[:,0]
            p2 = self._net.find_connected_pores(throats)[:,1]
        pores1 = sp.copy(p1)
        pores2 = sp.copy(p2)
        pores1[-sp.in1d(p1,pores)] = p2[-sp.in1d(p1,pores)]        
        pores2[-sp.in1d(p1,pores)] = p1[-sp.in1d(p1,pores)]
        X1 = self._result[pores1]
        X2 = self._result[pores2]
        g = self._conductance[throats]
        R = sp.sum(sp.multiply(g,(X1-X2)))
    def _build_RHS_matrix(self,
        This builds the right-hand-side matrix for the linear solver.
        if mode == 'overwrite':
            A_dim = self._coeff_dimension
            b = sp.zeros([A_dim, 1])
                Dir_pores = self.pores(self._phase.name + '_Dirichlet')
                Dir_pores_vals = self['pore.' + self._phase.name +
                b[Dir_pores] = sp.reshape(Dir_pores_vals, [len(Dir_pores), 1])
                individual_Neu_pores = self.pores(self._phase.name +
                individual_Neu_pores_vals = self[
                    'pore.' + self._phase.name +
                b[individual_Neu_pores] = sp.reshape(
                    individual_Neu_pores_vals, [len(individual_Neu_pores), 1])
                self.pores(self._phase.name + '_Neumann_group')
                pnum = self._net.num_pores()
                b[sp.r_[pnum:(pnum +
                              len(self._group_Neumann_vals))]] = sp.reshape(
                                  [len(self._group_Neumann_vals), 1])

        if mode in ['overwrite', 'modify_RHS']:
                b = sp.copy(self.b)
            # Adding necessary terms such as source terms to the RHS for non-Dirichlet pores
            if modified_RHS_pores is not None and RHS_added_data is not None:
                if sp.size(modified_RHS_pores) == sp.size(RHS_added_data):
                    p = sp.in1d(modified_RHS_pores, self._non_Dir_diag)
                    data = RHS_added_data[p]
                    b[modified_RHS_pores[p]] = b[
                        modified_RHS_pores[p]] + data.reshape([len(data), 1])
                    raise Exception(
                        'Provided data and pores for modifying RHS matrix should have the same size!'

        return (b)
Пример #37
def read_from_spplate(in_dir, thid, ra, dec, zqso, plate, mjd, fid, order, log=None, best_obs=False):
    unique_plates = sp.unique(plate)
    print("reading {} plates".format(len(unique_plates)))

    for p in unique_plates:
        wplate = plate==p
        plate_mjd = "{}-*".format(p)
        mjd_in_plate = sp.unique(mjd[wplate])

        spplates = glob.glob(in_dir+"/{}/spPlate-{}.fits".format(p, plate_mjd))

        mjds_found = sp.array([spfile.split("-")[-1].replace(".fits",'') for spfile in spplates]).astype(int)
        wmissing = ~sp.in1d(mjd_in_plate, mjds_found)
        if wmissing.sum()>0:
            for m in mjd_in_plate[wmissing]:
                print("INFO: can't find spplate {} {}".format(p,m))
                if log is not None:
                    log.write("INFO: can't find spplate {} {}\n".format(p,m))

        for spplate in spplates:
            h = fitsio.FITS(spplate)
            head0 = h[0].read_header()
            MJD = head0["MJD"]

            t0 = time.time()

            wfib = wplate
            if best_obs:
                ## select only the objects which have specified mjd within this plate
                wmjd = mjd == MJD
                wfib = wplate & wmjd

            coeff0 = head0["COEFF0"]
            coeff1 = head0["COEFF1"]

            flux = h[0].read()
            ivar = h[1].read()*(h[2].read()==0)
            llam = coeff0 + coeff1*sp.arange(flux.shape[1])

            ## now convert all those fluxes into forest objects
            for (t, r, d, z, p, m, f) in zip(thid[wfib], ra[wfib], dec[wfib], zqso[wfib], plate[wfib], mjd[wfib], fid[wfib]):
                index = f-1
                d = forest(llam,flux[index],ivar[index], t, r, d, z, p, m, f, order)
                if t in pix_data:
                    pix_data[t] += d
                    pix_data[t] = d
                if log is not None:
                    log.write("{} read from file {} and mjd {}\n".format(t, spplate, m))

            print("INFO: read {} from {} in {} per spec. Progress: {} of {} \n".format(wfib.sum(), os.path.basename(spplate), (time.time()-t0)/(wfib.sum()+1e-3), len(pix_data), len(thid)))

    data = list(pix_data.values())
    return data
Пример #38
Пример #39
 def test_map_pores(self):
     a = self.geo21['pore._id']
     b = self.geo22['pore._id']
     assert a.size == self.geo21.Np
     assert b.size == self.geo22.Np
     assert ~sp.any(sp.in1d(a, b))
     Pgeo21 = self.net2.map_pores(pores=self.geo21.Ps, origin=self.geo21)
     assert sp.all(Pgeo21 == self.net2.pores(self.geo21.name))
     Pgeo22 = self.net2.map_pores(pores=self.geo22.Ps, origin=self.geo22)
     assert sp.all(Pgeo22 == self.net2.pores(self.geo22.name))
Пример #40
Пример #41
 def find_interface_throats(self,labels=[]):
     Finds the throats that join two pore labels.  
     labels : list of strings
         The labels of the two pore groups whose interface is sought
     An array of throat numbers that connect the given pore groups
     This method is meant to find interfaces between TWO groups, regions or 
     clusters of pores (as defined by their label).  If the input labels 
     overlap or are not adjacent, an empty array is returned. 
     >>> pn = OpenPNM.Network.TestNet()
     >>> pn.set_pore_info(label='domain1',locations=[0,1,2])
     >>> pn.set_pore_info(label='domain2',locations=[5,6,7])
     >>> pn.find_interface_throats(labels=['domain1','domain2'])
     array([1, 4, 7])
     Tind = sp.array([],ndmin=1)
     if sp.shape(labels)[0] != 2:
         self._logger.error('Exactly two labels must be given')
         P1 = self.get_pore_indices(labels=labels[0])
         P2 = self.get_pore_indices(labels=labels[1])
         #Check if labels overlap
         if sp.sum(sp.in1d(P1,P2)) > 0: 
             self._logger.error('Some labels overlap, iterface cannot be found')
             T1 = self.find_neighbor_throats(P1)
             T2 = self.find_neighbor_throats(P2)
             Tmask = sp.in1d(T1,T2)
             Tind = T1[Tmask]
     return Tind
Пример #42
    def on_shifted_sp_curves(self, t):
        a = P4Rm()
        if a.AllDataDict["model"] == 0:
            temp_1 = arange(2, len(a.ParamDict["sp"]) + 1)
            temp_2 = temp_1 * t / (len(a.ParamDict["sp"]))
            P4Rm.ParamDict["x_sp"] = t - temp_2
            shifted_sp = a.ParamDict["sp"][:-1:]
            temp_3 = in1d(around(a.ParamDict["depth"], decimals=3), around(a.ParamDict["x_sp"], decimals=3))
            temp_4 = a.ParamDict["strain_i"][temp_3]
            P4Rm.ParamDict["scale_strain"] = shifted_sp / temp_4
            P4Rm.ParamDict["scale_strain"][a.ParamDict["scale_strain"] == 0] = 1.0
            P4Rm.ParamDict["strain_shifted"] = asarray(shifted_sp) * 100.0 / a.ParamDict["scale_strain"]
            P4Rm.ParamDict["stain_out"] = a.ParamDict["sp"][-1]

        elif a.AllDataDict["model"] == 1:
            temp_1 = arange(0, len(a.ParamDict["sp"]) + 1 - 3)
            temp_2 = temp_1 * t / (len(a.ParamDict["sp"]) - 3)
            P4Rm.ParamDict["x_sp"] = t - temp_2
            shifted_sp = a.ParamDict["sp"][1:-1:]
            temp_3 = in1d(around(a.ParamDict["depth"], decimals=3), around(a.ParamDict["x_sp"], decimals=3))
            temp_4 = a.ParamDict["strain_i"][temp_3]
            P4Rm.ParamDict["scale_strain"] = shifted_sp / temp_4
            P4Rm.ParamDict["scale_strain"][a.ParamDict["scale_strain"] == 0] = 1.0

            P4Rm.ParamDict["strain_shifted"] = asarray(shifted_sp) * 100.0 / a.ParamDict["scale_strain"]
            temp_5 = array([a.ParamDict["sp"][0], a.ParamDict["sp"][-1]])
            P4Rm.ParamDict["stain_out"] = temp_5

        elif a.AllDataDict["model"] == 2:
            x_sp_temp = []
            x_sp_temp.append(t * (1 - a.ParamDict["sp"][1]))
            x_sp_temp.append(t * (1 - a.ParamDict["sp"][1] + a.ParamDict["sp"][2] / 2))
            x_sp_temp.append(t * (1 - a.ParamDict["sp"][1] - a.ParamDict["sp"][3] / 2))
            x_sp_temp.append(t * 0.05)
            P4Rm.ParamDict["x_sp"] = x_sp_temp

            y_sp_temp = []
            y_sp_temp.append(a.ParamDict["sp"][0] / 2)
            y_sp_temp.append(a.ParamDict["sp"][0] / 2 + a.ParamDict["sp"][6] / 2)
            P4Rm.ParamDict["strain_shifted"] = y_sp_temp
Пример #43
def make_unique_by_strain(event_list):
    # event_list = make_unique_by_strain(event_list)

    rm_idx = []
    for i in range(1, event_list.shape[0]):
        if i % 1000 == 0:
            print '.',
            if i % 10000 == 0:
                print '%i' % i

        old_coords = event_list[i - 1].get_coords(trafo=True)
        curr_coords = event_list[i].get_coords(trafo=True)

        if old_coords.shape[0] == curr_coords.shape[0] and sp.all(
                old_coords == curr_coords):

            ### assertion that we did everything right
            if event_list[i - 1].chr == event_list[i].chr:
                assert (event_list[i - 1].strand == event_list[i].strand)
                assert (event_list[i].strain.shape[0] == 1)
                assert (event_list[i - 1].gene_name != event_list[i].gene_name)

            idx = sp.where(event_list[i -
                                      1].strain == event_list[i].strain[0])[0]
            if idx.shape[0] > 0:
                assert (idx.shape[0] == 1)
                assert (sp.all(event_list[i].get_coords(
                    trafo=True) == event_list[i - 1].get_coords(trafo=True)))
                if not event_list[i].gene_name[0] in event_list[i -
                        i - 1].gene_name = sp.r_[event_list[i - 1].gene_name,
                event_list[i] = event_list[i - 1]
                event_list[i].strain = sp.r_[[event_list[i - 1].strain[0]],
                assert (sp.all(
                    sp.sort(event_list[i].strain) == sp.sort(
                ### TODO !!!!!!!!!!!!! make sure that we keep different coordinates if the strains differ ...
                if not event_list[i].gene_name[0] in event_list[i -
                    event_list[i].gene_name = sp.r_[
                        event_list[i - 1].gene_name,
            rm_idx.append(i - 1)

    print 'events dropped: %i' % len(rm_idx)
    keep_idx = sp.where(~sp.in1d(sp.arange(event_list.shape[0]), rm_idx))[0]
    event_list = event_list[keep_idx]

    return event_list
Пример #44
    def timereduce(self, timelims=None,timesselected=None):
        assert (timelims is not None) or (timesselected is not None), "Need a set of limits or selected set of times"

        if timelims is not None:
            tkeep = sp.logical_and(self.Time_Vector>=timelims[0],self.Time_Vector<timelims[1])
        if timesselected is not None:
            tkeep = sp.in1d(self.Time_Vector,timesselected)
        # prune the arrays
Пример #45
def split_data(RV):

    n_train = int(4 * RV["Y"].shape[0] / 5.0)
    n_test = int(1 * RV["Y"].shape[0] / 10.0)
    idxs = sp.random.permutation(RV["Y"].shape[0])
    idxs_train = idxs[:n_train]
    idxs_test = idxs[n_train:(n_train + n_test)]
    idxs_val = idxs[(n_train + n_test):]

    Itrain = sp.in1d(sp.arange(RV["Y"].shape[0]), idxs_train)
    Itest = sp.in1d(sp.arange(RV["Y"].shape[0]), idxs_test)
    Ival = sp.in1d(sp.arange(RV["Y"].shape[0]), idxs_val)

    out = {}
    for key in RV.keys():
        out["%s_train" % key] = RV[key][Itrain]
        out["%s_val" % key] = RV[key][Ival]
        out["%s_test" % key] = RV[key][Itest]

    return out
Пример #46
 def test_from_neighbor_throats_max(self):
     self.geo.pop('pore.seed', None)
     self.geo.models.pop('pore.seed', None)
     self.geo.models.pop('throat.seed', None)
     self.geo['throat.seed'] = sp.rand(self.net.Nt, )
     assert sp.all(sp.in1d(self.geo['pore.seed'], self.geo['throat.seed']))
     pmin = sp.amin(self.geo['pore.seed'])
     tmin = sp.amin(self.geo['throat.seed'])
     assert pmin >= tmin
Пример #47
def argintersect_left(a, b):
    find indices in a, whose corresponding values are in b
    a        : array, for which indices are returned that are in the intersect with b
    b        : array to be intersected with a
    the indices of elements of a, which are in intersect of a and b
    return sp.arange(a.shape[0])[sp.in1d(a,b)]
Пример #48
def argintersect_left(a, b):
    find indices in a, whose corresponding values are in b
    a        : array, for which indices are returned that are in the intersect with b
    b        : array to be intersected with a
    the indices of elements of a, which are in intersect of a and b
    return sp.arange(a.shape[0])[sp.in1d(a, b)]
Пример #49
    def rate(self, pores='', mode='group'):
        Send a list of pores and receive the net rate
        of material moving into them.

        pores : array_like
            The pores where the net rate will be calculated
        mode : string, optional
            Controls how to return the rate.  Options are:

            - 'group'(default): It returns the cumulative rate moving into them
            - 'single': It calculates the rate for each pore individually.

        pores = sp.array(pores, ndmin=1)
        R = []
        if mode == 'group': iteration = 1
        elif mode == 'single': iteration = sp.shape(pores)[0]
        for i in sp.r_[0:iteration]:
            if mode == 'group': P = pores
            elif mode == 'single': P = pores[i]
            throats = self._net.find_neighbor_throats(P,
            p1 = self._net.find_connected_pores(throats)[:, 0]
            p2 = self._net.find_connected_pores(throats)[:, 1]
            pores1 = sp.copy(p1)
            pores2 = sp.copy(p2)
            #Changes to pores1 and pores2 to make them as the internal and external pores
            pores1[-sp.in1d(p1, P)] = p2[-sp.in1d(p1, P)]
            pores2[-sp.in1d(p1, P)] = p1[-sp.in1d(p1, P)]
            X1 = self[self._quantity][pores1]
            X2 = self[self._quantity][pores2]
            g = self['throat.conductance'][throats]
            R.append(sp.sum(sp.multiply(g, (X2 - X1))))
        return (sp.array(R, ndmin=1))
Пример #50
def get_conf_events(options, gid):

    event_info = []

    for event_type in options.event_types:
        IN = h5py.File(os.path.join(options.outdir, 'merge_graphs_%s_C%i.counts.hdf5' % (event_type, options.confidence)), 'r')
        if 'conf_idx' in IN and IN['conf_idx'].shape[0] > 0 and IN['conf_idx'][0] != -1:
            conf_idx = IN['conf_idx'][:].astype('int') - 1
            k_idx = sp.where(sp.in1d(IN['gene_idx'][:][conf_idx].astype('int') - 1, gid))[0]
            if k_idx.shape[0] > 0:
                event_info.extend([[event_type, x] for x in conf_idx[k_idx]])

    return sp.array(event_info, dtype='str')
Пример #51
 def test_neighbor_max(self):
     catch = self.geo.pop('pore.seed', None)
     catch = self.geo.models.pop('pore.seed', None)
     catch = self.geo.models.pop('throat.seed', None)
     mod = gm.pore_misc.neighbor
     self.geo['throat.seed'] = sp.rand(self.net.Nt,)
     assert sp.all(sp.in1d(self.geo['pore.seed'], self.geo['throat.seed']))
     pmin = sp.amin(self.geo['pore.seed'])
     tmin = sp.amin(self.geo['throat.seed'])
     assert pmin >= tmin
Пример #52
 def test_conduit_conductance_loose(self):
     self.phase['pore.occupancy'][[19, 20]] = 0
     t1 = self.net.Ts[self.phase['throat.occupancy'] == 0]
     t2 = self.net.Ts[~sp.in1d(self.net.Ts, t1)]
                           mode='loose', factor=0)
                           mode='loose', factor=0)
     assert sp.all(self.phase['throat.cond_conductance'][t1] == 0)
     assert sp.all(self.phase['throat.cond_conductance'][t2] != 0)
def _handle_multi_entries(header, data):

    cols_of_interest = [[
        'tumor_wgs_submitter_specimen_id', 'tumor_wgs_icgc_specimen_id',
        'tumor_wgs_submitter_sample_id', 'tumor_wgs_icgc_sample_id',
        'tumor_wgs_aliquot_id', 'tumor_wgs_oxog_score', 'tumor_wgs_ContEST',
        'tumor_wgs_Stars', 'tumor_wgs_bwa_alignment_gnos_repo',
        'tumor_wgs_bwa_alignment_bam_file_name', 'tumor_wgs_minibam_gnos_repo',
        'tumor_wgs_minibam_gnos_id', 'is_mar2016_tumor_wgs_minibam',

    for cols in cols_of_interest:
        c_idx = sp.where(sp.in1d(header, cols))[0]
        r_idx = sp.where([',' in x for x in data[:, c_idx[0]]])[0]

        for r in r_idx:
            data_ = sp.array([x.split(',') for x in data[r, c_idx]])
            assert len(data_.shape) > 1
            assert data_.shape[1] > 1

            for r2 in range(1, data_.shape[1]):
                data = sp.r_[data, data[r, :][sp.newaxis, :]]
                data[-1, c_idx] = data_[:, r2]
            data[r, c_idx] = data_[:, 0]

    return data
Пример #54
    def _generate_throats(self):
        Generate the throats (connections, numbering and types)
        self._logger.info("generate_throats: Define connections between pores")

        [Nx, Ny, Nz] = sp.shape(self._template)
        Np = Nx*Ny*Nz
        ind = np.arange(0, Np)

        #Generate throats based on pattern of the adjacency matrix
        #This is taken from Cubic
        tpore1_1 = ind[(ind % Nx) < (Nx-1)]
        tpore2_1 = tpore1_1 + 1
        tpore1_2 = ind[(ind % (Nx*Ny)) < (Nx*(Ny-1))]
        tpore2_2 = tpore1_2 + Nx
        tpore1_3 = ind[(ind % Np) < (Nx*Ny*(Nz-1))]
        tpore2_3 = tpore1_3 + Nx*Ny
        tpore1 = sp.hstack((tpore1_1, tpore1_2, tpore1_3))
        tpore2 = sp.hstack((tpore2_1, tpore2_2, tpore2_3))
        connections = sp.vstack((tpore1, tpore2)).T
        connections = connections[sp.lexsort((connections[:, 1], connections[:, 0]))]

        #Remove throats to non-active pores
        img_ind = self.get_pore_data(prop='voxel_index')
        temp0 = sp.in1d(connections[:, 0], img_ind)
        temp1 = sp.in1d(connections[:, 1], img_ind)
        tind = temp0*temp1
        connections = connections[tind]

        #Need a cleaner way to do this other than voxel_to_pore map...figure out later
        self.set_throat_data(prop='connections', data=self._voxel_to_pore_map[connections])
        self.set_throat_info(label='all', locations=sp.ones(sp.sum(tind,),dtype=bool))
        self.set_throat_data(prop='numbering', data=np.arange(0, sp.sum(tind)))
        self._logger.debug("generate_throats: End of method")
Пример #55
def intersect_rows(array1, array2, index = None):
    """Return intersection of rows"""

    if (array1.shape[0] == 0):
        if index == True:
            return (array1, sp.zeros((0,)), sp.zeros((0,)))
            return array1
    if (array2.shape[0] == 0):
        if index == True:
            return (array2, sp.zeros((0,)), sp.zeros((0,)))
            return array2

    array1_v = array1.view([('', array1.dtype)] * array1.shape[1])
    array2_v = array2.view([('', array2.dtype)] * array2.shape[1])
    array_i = sp.intersect1d(array1_v, array2_v)

    if index == True:
        a1_i = sp.where(sp.in1d(array1_v, array_i))[0]
        a2_i = sp.where(sp.in1d(array2_v, array_i))[0]
        return (array_i.view(array1.dtype).reshape(array_i.shape[0], array1.shape[1]), a1_i, a2_i)
        return array_i.view(array1.dtype).reshape(array_i.shape[0], array1.shape[1])
Пример #56
    def _do_one_inner_iteration(self):

        if (self._BCtypes==0).all():
            raise Exception('No boundary condition has been applied to this network.')
            self._result = sp.zeros(self._net.num_pores())
            self._logger.info("Creating Coefficient matrix for the algorithm")
            A = self._build_coefficient_matrix()
            self._logger.info("Creating RHS matrix for the algorithm")
            B = self._build_RHS_matrix()
            self._logger.info("Solving AX = B for the sparse matrices")
            X = sprslin.spsolve(A,B)
            self._Neumann_super_X = X[-sp.in1d(sp.r_[0:len(X)],sp.r_[0:self._net.num_pores()])]
            self._result = X[sp.r_[0:self._net.num_pores()]]        
Пример #57
def curate_alt_prime(event_list, CFG):
    # event_list = curate_alt_prime(event_list)

    if event_list.shape[0] == 0:
        return event_list

    rm_idx = []
    corr_count = 0

    for i in range(event_list.shape[0]):

        ### check if we have introns of zero length
        #if sp.any(event_list[i].exons1[:, 1] - event_list[i].exons1[:, 1] < 2) or sp.any(event_list[i].exons2[:, 1] - event_list[i].exons2[:, 1] < 2):
        if (event_list[i].exons1[1, 0] - event_list[i].exons1[0, 1] < 1) or (
                event_list[i].exons2[1, 0] - event_list[i].exons2[0, 1] < 1):

        ### check if alt exons overlap, otherwise we cannot curate (trim to shortest length)
        if (sp.all(event_list[i].exons1[0, :] == event_list[i].exons2[0, :]) and (event_list[i].exons1[1, 1] <= event_list[i].exons2[1, 0] or event_list[i].exons1[1, 0] >= event_list[i].exons2[1, 1])) or \
           (sp.all(event_list[i].exons1[1, :] == event_list[i].exons2[1, :]) and (event_list[i].exons1[0, 1] <= event_list[i].exons2[0, 0] or event_list[i].exons1[0, 0] >= event_list[i].exons2[0, 1])):

        if sp.all(event_list[i].exons1[0, :] == event_list[i].exons2[0, :]):
            if event_list[i].exons1[1, 1] > event_list[i].exons2[1, 1]:
                event_list[i].exons1[1, 1] = event_list[i].exons2[1, 1]
                corr_count += 1
            elif event_list[i].exons1[1, 1] < event_list[i].exons2[1, 1]:
                event_list[i].exons2[1, 1] = event_list[i].exons1[1, 1]
                corr_count += 1
        elif sp.all(event_list[i].exons1[1, :] == event_list[i].exons2[1, :]):
            if event_list[i].exons1[0, 0] > event_list[i].exons2[0, 0]:
                event_list[i].exons2[0, 0] = event_list[i].exons1[0, 0]
                corr_count += 1
            elif event_list[i].exons1[0, 0] < event_list[i].exons2[0, 0]:
                event_list[i].exons1[0, 0] = event_list[i].exons2[0, 0]
                corr_count += 1

    ### remove events with non-overlapping alt_exons
    if len(rm_idx) > 0:
        keep_idx = sp.where(
            ~sp.in1d(sp.arange(event_list.shape[0]), rm_idx))[0]
        event_list = event_list[keep_idx]

    print 'Corrected %i events' % corr_count
    print 'Removed %i events' % len(rm_idx)

    return event_list