def trainStep(fnn, trainer, trndata, tstdata): trainer.trainEpochs(1) trnresult = percentError(trainer.testOnClassData(), trndata["class"]) tstresult = percentError(trainer.testOnClassData(dataset=tstdata), tstdata["class"]) print "epoch: %4d" % trainer.totalepochs, " train error: %5.2f%%" % trnresult, " test error: %5.2f%%" % tstresult out = fnn.activateOnDataset(griddata) out = out.argmax(axis=1) # the highest output activation gives the class out = out.reshape(X.shape) figure(1) ioff() # interactive graphics off clf() # clear the plot hold(True) # overplot on for c in [0, 1, 2]: here, _ = where(trndata["class"] == c) plot(trndata["input"][here, 0], trndata["input"][here, 1], "o") if out.max() != out.min(): # safety check against flat field contourf(X, Y, out) # plot the contour ion() # interactive graphics on draw() # update the plot figure(2) ioff() # interactive graphics off clf() # clear the plot hold(True) # overplot on for c in [0, 1, 2]: here, _ = where(tstdata["class"] == c) plot(tstdata["input"][here, 0], tstdata["input"][here, 1], "o") if out.max() != out.min(): # safety check against flat field contourf(X, Y, out) # plot the contour ion() # interactive graphics on draw() # update the plot
def get_indices(arr, vals, disp=False): """ Get the indices of all the elements between vals[0] and vals[1]. Alternatively also between vals[2] and vals[3] if they are given. Input: arr : the array in which to look for the elements vals : a list with either 2 or 4 values that corresponds limits inbetween which the indices of the values Optional argument(s): disp : Bolean parameter, if True it displays start and end index and the number of channels inbetween. Only works for value lists of length 2. Assumes the values in 'arr' is the mid values and that it is evenly spaced for all values. ********************** Important! ********************************** The output indices are Python friendly, i.e. they are 0-based. Take when using the indices in other software e.g. GILDAS, MIRIAD, which are 1-based. -------------------------------------------------------------------- oOO Changelog OOo *2012/02 Added more documentation, "important" notice about indexing *2011/07 Removed +1 in the output indices to be compatible with rest of module, where Pythons 0-based indexing is used. *2010/12 Doc written *2010/06 Funciton created """ from scipy import concatenate, where, array, diff dx = abs(0.5 * diff(arr)[0]) if len(vals) == 4: v1, v2, v3, v4 = vals + array([-1, 1, -1, 1]) * dx # if the user wants two velocity areas to calculate noise low = where((arr >= v1) * (arr <= v2))[0] high = where((arr >= v3) * (arr <= v4))[0] channels = concatenate((low, high)) elif len(vals) == 2: v1, v2 = vals + array([-1, 1]) * dx # channels = where((arr>=v1)*(arr<v2))[0]+1 # this is because if +1 it is FITS/Fortran safe # changed: removed +1 for consistency in program channels = where((arr >= v1) * (arr <= v2))[0] # if disp and len(vals) == 2: first, last = channels.min(), channels.max() n = last - first + 1 print "\nFirst: %d,\n Last: %d\n Nchan: %d\n" % (first, last, n) return channels
def make_introns_feasible(introns, genes, CFG): # introns = make_introns_feasible(introns, genes, CFG) tmp1 = sp.array([x.shape[0] for x in introns[:, 0]]) tmp2 = sp.array([x.shape[0] for x in introns[:, 1]]) unfeas = sp.where((tmp1 > 200) | (tmp2 > 200))[0] print >> CFG['fd_log'], 'found %i unfeasible genes' % unfeas.shape[0] while unfeas.shape[0] > 0: ### make filter more stringent CFG['read_filter']['exon_len'] = min(36, CFG['read_filter']['exon_len'] + 4) CFG['read_filter']['mincount'] = 2 * CFG['read_filter']['mincount'] CFG['read_filter']['mismatch'] = max(CFG['read_filter']['mismatch'] - 1, 0) ### get new intron counts tmp_introns = get_intron_list(genes[unfeas], CFG) introns[unfeas, :] = tmp_introns ### stil unfeasible? tmp1 = sp.array([x.shape[0] for x in introns[:, 0]]) tmp2 = sp.array([x.shape[0] for x in introns[:, 1]]) still_unfeas = sp.where((tmp1 > 200) | (tmp2 > 200))[0] idx = sp.where(~sp.in1d(unfeas, still_unfeas))[0] for i in unfeas[idx]: print >> CFG['fd_log'], '[feasibility] set criteria for gene %s to: min_ex %i, min_conf %i, max_mism %i' % (genes[i].name, CFG['read_filter']['exon_len'], CFG['read_filter']['mincount'], CFG['read_filter']['mismatch']) unfeas = still_unfeas; return introns
def rectangle(iterable): """Turn the masks of an accessor iterable into the coordinates of an rectangle surrounding all the segements points. The coordinates are normalized to the interval [-1, 1].""" # TODO: make this work for multiclass data. for sample, target in iterable: orig_shape = target.shape[1], target.shape[2] target.shape = scipy.size(target) / 2, 2 classes = target[:, 0].copy() classes.shape = orig_shape indices = scipy.where(classes.sum(axis=0) >= 1) min0, max0 = indices[0][0], indices[0][-1] indices = scipy.where(classes.sum(axis=1) >= 1) min1, max1 = indices[0][0], indices[0][-1] print min0, max0, min1, max1 # Normalize. normalize = lambda x, rng: 2. * x / rng - 1 size0, size1 = classes.shape[0], classes.shape[1] min0 = normalize(min0, size0) max0 = normalize(max0, size0) min1 = normalize(min1, size1) max1 = normalize(max1, size1) target = scipy.array((min0, max0, min1, max1)) print target yield sample, target
def get_concentration_functions(composition_table_dict): meta = composition_table_dict['meta'] composition_table = Table.from_dict(composition_table_dict['data']) elements = [col for col in composition_table.columns if col not in meta] x = composition_table["X"].values y = composition_table["Y"].values cats = composition_table["X"].unique() concentration, conc, d, y_c, functions = {}, {}, {}, {}, RecursiveDict() for el in elements: concentration[el] = to_numeric(composition_table[el].values)/100. conc[el], d[el], y_c[el] = {}, {}, {} if meta['X'] == 'category': for i in cats: k = '{:06.2f}'.format(float(i)) y_c[el][k] = to_numeric(y[where(x==i)]) conc[el][k] = to_numeric(concentration[el][where(x==i)]) d[el][k] = interp1d(y_c[el][k], conc[el][k]) functions[el] = lambda a, b, el=el: d[el][a](b) else: functions[el] = interp2d(float(x), float(y), concentration[el]) return functions
def __init__(self, which_case, LUT, RandomSamples, interp_type): print 'SciPy Interpolating ', which_case select = {\ "rhoe":('Density','StaticEnergy'),\ "PT":('Pressure','Temperature'),\ "Prho":('Pressure','Density'),\ "rhoT":('Density','Temperature'),\ "Ps":('Pressure','Entropy'),\ "hs":('Enthalpy','Entropy')\ } thermo1, thermo2, = select[which_case] x =getattr(LUT,thermo1) y =getattr(LUT,thermo2) samples_x = getattr(RandomSamples,thermo1) samples_y = getattr(RandomSamples,thermo2) setattr(self,thermo1, samples_x) setattr(self,thermo2, samples_y) variables = sp.array(['Temperature','Density','Enthalpy','StaticEnergy',\ 'Entropy','Pressure','SoundSpeed2','dPdrho_e','dPde_rho',\ 'dTdrho_e','dTde_rho','Cp','Mu','Kt']); for var in variables[sp.where((variables!=thermo1) * (variables!=thermo2))]: z = getattr(LUT,var) interp_func = sp.interpolate.griddata((x,y),z,sp.column_stack((samples_x,samples_y)),\ method=interp_type) nan_index = sp.where(sp.isnan(interp_func)) interp_func[nan_index]= sp.interpolate.griddata((x,y),z,\ sp.column_stack((samples_x[nan_index],samples_y[nan_index])),\ method='nearest') setattr(self,var,interp_func) return
def getEncodedData(filename,encoding="additive",phenotype_id=None,maf=0.0): f = h5py.File(filename,'r') phenotype_id = str(phenotype_id) if not phenotype_id==None: sample_ids = f['Genotype/sample_ids'][:] p_sample_ids = f['Phenotypes'][phenotype_id]['sample_ids'][:] y = f['Phenotypes'][phenotype_id]['y'][:] ind = sp.where(~sp.isnan(y))[0] y = y[ind] p_sample_ids = p_sample_ids[ind] ind = (sp.reshape(sample_ids,(sample_ids.shape[0],1))==p_sample_ids).nonzero() raw = f['Genotype/raw'][:] raw = raw[ind[0],:] [encoded,maf_v] = encodeHeterozygousData(raw) ind = sp.where(maf_v>=maf)[0] encoded = encoded[:,ind] identifiers = f['Genotype/identifiers'][:] identifiers = identifiers[ind] maf_v = maf_v[ind] f.close() return [encoded,maf_v,identifiers] if encoding=="additive": if 'encoded_additive' in f['Genotype'].keys(): encoded = f['Genotype/encoded_additive'][:] maf_v = f['Genotype/global_maf'][:] else: [encoded,maf_v] = encodeHeterozygousData(f['Genotype/raw'][:]) identifiers = f['Genotype/identifiers'][:] f.close() return [encoded,maf_v,identifiers]
def scanSound(self, source, minnotel): binarized = source scale = 60. / self.wavetempo * (binarized[0].size / self.duration) noise_length = scale*minnotel antinoised = sp.zeros_like(binarized) for i in range(sp.shape(binarized)[0]): new_line = binarized[i, :].copy() diffed = sp.diff(new_line) ones_keys = sp.where(diffed == 1)[0] minus_keys = sp.where(diffed == -1)[0] if(ones_keys.size != 0 and minus_keys.size != 0): if(ones_keys[0] > minus_keys[0]): new_line = self.cutNoise( (0, minus_keys[0]), noise_length, new_line) minus_keys = sp.delete(minus_keys, 0) if(ones_keys[-1] > minus_keys[-1]): new_line = self.cutNoise( (ones_keys[-1], new_line.size-1), noise_length, new_line) ones_keys = sp.delete(ones_keys, -1) for j in range(sp.size(ones_keys)): new_line = self.cutNoise( (ones_keys[j], minus_keys[j]), noise_length, new_line) antinoised[i, :] = new_line return antinoised
def cryptoInternal(self, data, base): addresses = scipy.array(range(base, base + (len(data) * 2), 2), scipy.uint32) for mask, xorVal in self.XOR_TABLE1: data = scipy.where((addresses & mask) == mask, data ^ xorVal, data) for mask, xorVal in self.XOR_TABLE2: data = scipy.where((addresses & mask) != 0, data ^ xorVal, data) return data
def smart_threshold(self): self.median = numpy.median(self.data) self.std = numpy.std(self.data) blank = scipy.where(self.data < self.median+0.25*self.std) signal = scipy.where(self.data > self.median+0.25*self.std) self.data[blank] = 0.0 self.data[signal] = 1.0
def maskLowStddVoxels(self, dds, nMeanDds, nStddDds): unique = np.unique(sp.where(nStddDds.subd.asarray() <= 1.0/3, dds.subd.asarray(), dds.mtype.maskValue())) unique = unique[sp.where(unique != dds.mtype.maskValue())] if (dds.mpi.comm != None): unique = dds.mpi.comm.allreduce(unique.tolist(), op=mpi.SUM) unique = np.unique(unique) rootLogger.info("Unique constant stdd values = %s" % (unique,)) rootLogger.info("Creating mask from unique constant values...") mskDds = mango.zeros_like(dds, mtype="segmented") for uVal in unique: mskDds.asarray()[...] = sp.where(dds.asarray() == uVal, 1, mskDds.asarray()) rootLogger.info("Done creating mask from unique constant values.") rootLogger.info("Labeling connected constant zero-stdd regions...") mskDds.updateHaloRegions() mskDds.mirrorOuterLayersToBorder(False) self.writeIntermediateDds("_000ZeroStddForLabeling", mskDds) lblDds = mango.image.label(mskDds, 1) rootLogger.info("Done labeling connected constant stdd regions.") self.writeIntermediateDds("_000ZeroStdd", lblDds) countThresh = 0.01 * sp.product(lblDds.shape) rootLogger.info("Eliminating large clusters...") lblDds = mango.image.eliminate_labels_by_size(lblDds, minsz=int(countThresh), val=lblDds.mtype.maskValue()) self.writeIntermediateDds("_000ZeroStddLargeEliminated", lblDds) rootLogger.info("Assigning mask values...") mskDds.subd.asarray()[...] = \ sp.where(lblDds.subd.asarray() == lblDds.mtype.maskValue(), True, False) self.writeIntermediateDds("_000ZeroStddMskd", mskDds) del lblDds for tmpDds in [dds, nMeanDds, nStddDds]: tmpDds.subd.asarray()[...] = \ sp.where(mskDds.subd.asarray(), tmpDds.mtype.maskValue(), tmpDds.subd.asarray())
def eliminatePercentileTails(self, mskDds, loPercentile=10.0, hiPercentile=90.0): """ Trims lower and/or upper image histogram tails by replacing :samp:`mskDds` voxel values with :samp:`mskDds.mtype.maskValue()`. """ rootLogger.info("Eliminating percentile tails...") rootLogger.info("Calculating element frequencies...") elems, counts = elemfreq(mskDds) rootLogger.info("elems:\n%s" % (elems,)) rootLogger.info("counts:\n%s" % (counts,)) cumSumCounts = sp.cumsum(counts, dtype="float64") percentiles = 100.0*(cumSumCounts/float(cumSumCounts[-1])) percentileElems = elems[sp.where(sp.logical_and(percentiles > loPercentile, percentiles < hiPercentile))] loThresh = percentileElems[0] hiThresh = percentileElems[-1] rootLogger.info("Masking percentiles range (%s,%s) = (%s,%s)" % (loPercentile, hiPercentile, loThresh, hiThresh)) mskDds.asarray()[...] = \ sp.where( sp.logical_and( sp.logical_and(mskDds.asarray() >= loThresh, mskDds.asarray() <= hiThresh), mskDds.asarray() != mskDds.mtype.maskValue() ), mskDds.asarray(), mskDds.mtype.maskValue() ) rootLogger.info("Done eliminating percentile tails.")
def step(self, *args): """First update the step size, then actually take a step along the gradient.""" g = self.model.grad(*args); # Update the weighted Exponential sq avg. self.sqExpAvgGrad *= self.exponentAvgM; self.sqExpAvgGrad += (1-self.exponentAvgM) * g**2; self.sqExpAvgGrad[:] = where(self.sqExpAvgGrad < EPSILON, EPSILON, self.sqExpAvgGrad); # Uodate the muVect possUpdate = 1 + self.qLearningRate * g * self.expAvgGrad / self.sqExpAvgGrad #log.debug('max(possUpdate): %.4f, min(possUpdate): %.4f' % (max(possUpdate), min(possUpdate))) ## Keep this from going negative. possUpdate = where(possUpdate < 0.001, 0.001, possUpdate); self.muVect *= possUpdate # Do something to cap the update rate. This is allowing the step rate to overpower the decay completely self.muVect = where(self.muVect > self.maxMuVect, self.maxMuVect, self.muVect); # Then update the exponential average self.expAvgGrad *= self.exponentAvgM; self.expAvgGrad += (1-self.exponentAvgM) * g; self.model.params -= self.muVect * g Trainer.step(self,*args)
def from_gene(self, gene): sg = gene.splicegraph.vertices breakpoints = sp.unique(sg.ravel()) self.segments = sp.zeros((2, 0), dtype='int') for j in range(1, breakpoints.shape[0]): s = sp.sum(sg[0, :] < breakpoints[j]) e = sp.sum(sg[1, :] < breakpoints[j]) if s > e: self.segments = sp.c_[self.segments, [breakpoints[j-1], breakpoints[j]]] ### match nodes to segments self.seg_match = sp.zeros((0, sg.shape[1]), dtype='bool') for j in range(sg.shape[1]): tmp = ((sg[0, j] <= self.segments[0, :]) & (sg[1, j] >= self.segments[1, :])) if self.seg_match.shape[0] == 0: self.seg_match = tmp.copy().reshape((1, tmp.shape[0])) else: self.seg_match = sp.r_[self.seg_match, tmp.reshape((1, tmp.shape[0]))] ### create edge graph between segments self.seg_edges = sp.zeros((self.segments.shape[1], self.segments.shape[1]), dtype='bool') k, l = sp.where(sp.triu(gene.splicegraph.edges)) for m in range(k.shape[0]): ### donor segment d = sp.where(self.seg_match[k[m], :])[0][-1] ### acceptor segment a = sp.where(self.seg_match[l[m], :])[0][0] self.seg_edges[d, a] = True
def newEpisode(self): if self.learning: params = ravel(self.explorationlayer.module.params) target = ravel(sum(self.history.getSequence(self.history.getNumSequences()-1)[2]) / 500) if target != 0.0: self.gp.addSample(params, target) if len(self.gp.trainx) > 20: self.gp.trainx = self.gp.trainx[-20:, :] self.gp.trainy = self.gp.trainy[-20:] self.gp.noise = self.gp.noise[-20:] self.gp._calculate() # get new parameters where mean was highest max_cov = diag(self.gp.pred_cov).max() indices = where(diag(self.gp.pred_cov) == max_cov)[0] pick = indices[random.randint(len(indices))] new_param = self.gp.testx[pick] # check if that one exists already in gp training set if len(where(self.gp.trainx == new_param)[0]) > 0: # add some normal noise to it new_param += random.normal(0, 1, len(new_param)) self.explorationlayer.module._setParameters(new_param) else: self.explorationlayer.drawRandomWeights() # don't call StateDependentAgent.newEpisode() because it randomizes the params LearningAgent.newEpisode(self)
def _do_outer_iteration_stage(self): #Generate curve from points for inv_val in self._inv_points: #Apply one applied pressure and determine invaded pores logger.info('Applying capillary pressure: '+str(inv_val)) self._do_one_inner_iteration(inv_val) #Store results using networks' get/set method self['pore.inv_Pc'] = self._p_inv self['throat.inv_Pc'] = self._t_inv #Find invasion sequence values (to correspond with IP algorithm) self._p_seq = sp.searchsorted(sp.unique(self._p_inv),self._p_inv) self._t_seq = sp.searchsorted(sp.unique(self._t_inv),self._t_inv) self['pore.inv_seq'] = self._p_seq self['throat.inv_seq'] = self._t_seq #Calculate Saturations v_total = sp.sum(self._net['pore.volume'])+sp.sum(self._net['throat.volume']) sat = 0. self['pore.inv_sat'] = 1. self['throat.inv_sat'] = 1. for i in range(self._npts): inv_pores = sp.where(self._p_seq==i)[0] inv_throats = sp.where(self._t_seq==i)[0] new_sat = (sum(self._net['pore.volume'][inv_pores])+sum(self._net['throat.volume'][inv_throats]))/v_total sat += new_sat self['pore.inv_sat'][inv_pores] = sat self['throat.inv_sat'][inv_throats] = sat
def spectralSlope(wl, flux, dFlux, wlStart, wlStop, beta_guess, **kwargs): bm = scipy.where( (wl > wlStart) & (wl < wlStop) & numpy.isfinite(flux) )[0] if ( 'strongLines' in kwargs ): for line, width in zip(kwargs['strongLines'], kwargs['lineWidths']): new_bm = scipy.where( abs(wl[bm]-line) > width) bm = bm[new_bm[0]] x = wl[bm] y = flux[bm] dy = dFlux[bm] normalization = y[0] z = normalization*(x/wlStart)**beta_guess coeffs = [normalization, beta_guess] fitfunc = lambda p, x : p[0]*(x/wlStart)**(p[1]) errfunc = lambda p, x, z, dz: numpy.abs((fitfunc(p, x) - z)/dz) pfit = scipy.optimize.leastsq(errfunc, coeffs, args=(numpy.asarray(x, dtype=numpy.float64), numpy.asarray(y,dtype=numpy.float64), numpy.asarray(dy,dtype=numpy.float64)), full_output = 1) if ( 'plt' in kwargs ): original = Gnuplot.Data(x, y, with_='lines') guess = Gnuplot.Data(x, z, with_='lines') new = Gnuplot.Data(x, pfit[0][0]*(x/wlStart)**(pfit[0][1]), with_='lines') kwargs['plt'].plot(original, guess, new) #raw_input() return pfit[0]
def populate_out_of_dip_theta(self, n, dip): out_of_dip = asarray(self.populate_distribution( self.out_of_dip_theta_dist, n)) (errorIndexes,) = where((out_of_dip > (175 - dip)) & (out_of_dip < (185 - dip))) if len(errorIndexes) > 0: for i in errorIndexes: blnBadNum = True count = 0 while blnBadNum: newNum = self.populate_distribution( self.out_of_dip_theta_dist, 1) if ((newNum[0] <= (175 - dip)) | (newNum[0] >= (185 - dip))): blnBadNum = False count = count + 1 if count > 1000: msg = "Bad out of dip theta range in fault \ source file" raise IOError(msg) out_of_dip[i] = newNum[0] (errorIndexes,) = where((out_of_dip > (175 - dip)) & (out_of_dip < (185 - dip))) if len(errorIndexes) > 0: msg = "Bad out of dip theta range in fault \ source file" raise IOError(msg) return out_of_dip
def binSyntheticSpectrum(spectrum, native_wl, new_wl): """ This routine pixelates a synthetic spectrum, in effect simulating the discrete nature of detector pixels. """ retval = numpy.zeros(len(new_wl)) for i in range(len(new_wl)-1): bm = scipy.where( (native_wl > new_wl[i]) & ( native_wl <= new_wl[i+1]))[0] if (len(bm) > 1): num=scipy.integrate.simps(spectrum[bm], x=native_wl[bm]) denom = max(native_wl[bm]) - min(native_wl[bm]) retval[i] = num/denom elif (len(bm) == 1): retval[i] = 0.0#native_wl[bm] else: retval[i] = 0.0#retval[-1] bm = scipy.where(native_wl > new_wl[-1])[0] if len(bm) > 1: num = scipy.integrate.simps(spectrum[bm], x=native_wl[bm]) denom = max(native_wl[bm]) - min(native_wl[bm]) retval[-1] = num/denom else: if len(bm) == 1: retval[-1] = spectrum[bm] else: retval[-1] = spectrum[-1] return retval
def fit_dispersion(counts, disp_raw, disp_conv, sf, CFG, dmatrix1): mean_count = sp.mean(counts / sf, axis=1)[:, sp.newaxis] index = sp.where(disp_conv)[0] lowerBound = sp.percentile(sp.unique(disp_raw[index]), 1) upperBound = sp.percentile(sp.unique(disp_raw[index]), 99) idx = sp.where((disp_raw > lowerBound) & (disp_raw < upperBound))[0] matrix = sp.ones((idx.shape[0], 2), dtype='float') matrix[:, 0] /= mean_count[idx].ravel() modGamma = sm.GLM(disp_raw[idx], matrix, family=sm.families.Gamma(sm.families.links.identity)) res = modGamma.fit() Lambda = res.params disp_fitted = disp_raw.copy() ok_idx = sp.where(~sp.isnan(disp_fitted))[0] disp_fitted[ok_idx] = Lambda[0] / mean_count[ok_idx] + Lambda[1] if sp.sum(disp_fitted > 0) > 0: print "Found dispersion fit" if CFG['diagnose_plots']: plot.mean_variance_plot(counts=counts, disp=disp_fitted, matrix=dmatrix1, figtitle='Fitted Dispersion Estimate', filename=os.path.join(CFG['plot_dir'], 'dispersion_fitted.pdf'), CFG=CFG) return (disp_fitted, Lambda, idx)
def filterNonInformativeSNPs(self): tmp = sp.where((self.__x==2).sum(axis=0)!=self.__x.shape[0])[0] if not tmp.shape[0]==self.__x.shape[0]: self.__x = self.__x[:,tmp] self.__chr_index = self.__chr_index[tmp] self.__pos_index = self.__pos_index[tmp] self.__maf_data = self.__maf_data[tmp] self.__raw = self.__raw[:,tmp] if not self.__x_additive is None: self.__x_additive = self.__x_additive[:,tmp] tmp = sp.where((self.__x==1).sum(axis=0)!=self.__x.shape[0])[0] if not tmp.shape[0]==self.__x.shape[0]: self.__x = self.__x[:,tmp] self.__chr_index = self.__chr_index[tmp] self.__pos_index = self.__pos_index[tmp] self.__maf_data = self.__maf_data[tmp] self.__raw = self.__raw[:,tmp] if not self.__x_additive is None: self.__x_additive = self.__x_additive[:,tmp] tmp = sp.where((self.__x==0).sum(axis=0)!=self.__x.shape[0])[0] if not tmp.shape[0]==self.__x.shape[0]: self.__x = self.__x[:,tmp] self.__chr_index = self.__chr_index[tmp] self.__pos_index = self.__pos_index[tmp] self.__maf_data = self.__maf_data[tmp] self.__raw = self.__raw[:,tmp] if not self.__x_additive is None: self.__x_additive = self.__x_additive[:,tmp]
def quantify_intron_retention(event, gene, counts_segments, counts_edges, counts_seg_pos): cov = sp.zeros((2, ), dtype='float') sg = gene.splicegraph segs = gene.segmentgraph seg_lens = segs.segments[1, :] - segs.segments[0, :] seg_shape = segs.seg_edges.shape order = 'C' offset = 0 ### find exons corresponding to event idx_exon1 = sp.where((sg.vertices[0, :] == event.exons1[0, 0]) & (sg.vertices[1, :] == event.exons1[0, 1]))[0] idx_exon2 = sp.where((sg.vertices[0, :] == event.exons1[1, 0]) & (sg.vertices[1, :] == event.exons1[1, 1]))[0] ### find segments corresponding to exons seg_exon1 = sp.sort(sp.where(segs.seg_match[idx_exon1, :])[1]) seg_exon2 = sp.sort(sp.where(segs.seg_match[idx_exon2, :])[1]) seg_all = sp.arange(seg_exon1[0], seg_exon2[-1]) seg_intron = sp.setdiff1d(seg_all, seg_exon1) seg_intron = sp.setdiff1d(seg_intron, seg_exon2) assert(seg_intron.shape[0] > 0) ### compute exon coverages as mean of position wise coverage # intron_cov cov[0] = sp.sum(counts_segments[seg_intron] * seg_lens[seg_intron]) / sp.sum(seg_lens[seg_intron]) ### check intron confirmation as sum of valid intron scores ### intron score is the number of reads confirming this intron # intron conf idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([seg_exon1[-1], seg_exon2[0]], seg_shape, order=order) + offset)[0] cov[1] = counts_edges[idx, 1] return cov
def calculate_kappa(magnitude, damping_s, damping_m, damping_l): """ kappa=where(magnitude>5.5,self.damping_m,damping_s) kappa[where(magnitude>7.5)]=damping_l # where may cause issues if both mag and sites has # non-trivial dimension. # in that case we may have to try: """ try: damping_s = damping_s.swapaxes(0, 1) damping_m = damping_m.swapaxes(0, 1) damping_l = damping_l.swapaxes(0, 1) magnitude = magnitude.swapaxes(0, 1) except ValueError: # to avoid error with numpy version > 1.10.1 pass kappa = damping_s * (magnitude <= 5.5) kappa[where(magnitude > 5.5)[0]] = damping_m kappa[where(magnitude > 7.5)[0]] = damping_l try: kappa = kappa.swapaxes(0, 1) except ValueError: # to avoid error with numpy version > 1.10.1 pass return kappa
def sort_traces(self): """ creates a (t,ID,stim,rep) np.array of the Traces """ labels = sp.array(self.Main.Data.Metadata.trial_labels) # inferrence stim_unique = sp.unique(labels) nStims = stim_unique.shape[0] nReps = len(labels) / nStims nFrames = self.Main.Data.nFrames nROIs = len(self.Main.ROIs.ROI_list) # dims are t, cell, odor, rep self.Main.Data.Traces_sorted = sp.zeros((nFrames,nROIs,nStims,nReps)) for n in range(self.Main.Data.nTrials): # get the correct indices stim_index = sp.where(stim_unique == labels[n])[0][0] # this finds the index in stim_unique of the corresponding stim of the trial rep_index = sp.where(sp.where(labels == labels[n])[0] == n)[0][0] # das wievielte mal kommt n in stim_order[n] vor? -> rep index # get the traces and put it in the data structure at the correct place try: self.Main.Data.Traces_sorted[:,:,stim_index,rep_index] = self.Main.Data.Traces[:,:,n] except IndexError: sys.exit() pass pass
def cross_validation(self, x, y, v=5, sig_r=2.0 ** sp.arange(-8, 0), mu_r=10.0 ** sp.arange(-15, 0)): # Get parameters n = x.shape[0] ns = sig_r.size nm = mu_r.size err = sp.zeros((ns, nm)) # Initialization of the indices for the cross validation cv = CV() cv.split_data_class(y, v=v) for i in range(ns): for j in range(nm): for k in range(v): model_temp = KDA() model_temp.train(x[cv.it[k], :], y[cv.it[k]], sig=sig_r[i], mu=mu_r[j]) yp = model_temp.predict(x[cv.iT[k], :], x[cv.it[k], :], y[cv.it[k]]) yp.shape = y[cv.iT[k]].shape t = sp.where(yp != y[cv.iT[k]])[0] err[i, j] += float(t.size) / yp.size del model_temp err /= v t = sp.where(err == err.min()) self.sig = sig_r[t[0][0]] self.mu = mu_r[t[1][0]] return sig_r[t[0][0]], mu_r[t[1][0]], err
def reScale(Array, MaxMin=None, level=64, NoData=-9999): '''Rescale pixel values MaxMin should be a list containing max and min (max,min) it will be calculated from the inputed array if it is not provided''' if isinstance(Array, sp.ma.MaskedArray): Array = Array.astype(float) else: Array = sp.ma.masked_values(Array, NoData).astype(float) if MaxMin == None: Max = Array.max() Min = Array.min() Range = Max-Min else: Max = MaxMin[0] Min = MaxMin[1] Range = Max-Min Array = sp.where(Array<Min, Min, Array) Array = sp.where(Array>Max, Max, Array) newArray = ((Array - Min)/Range*(level-1)).round() return newArray
def _generate_masked_mesh(self, cell_mask=None): r""" Generates the mesh based on the cell mask provided """ # if cell_mask is None: cell_mask = sp.ones(self.data_map.shape, dtype=bool) # # initializing arrays self._edges = sp.ones(0, dtype=str) self._merge_patch_pairs = sp.ones(0, dtype=str) self._create_blocks(cell_mask) # # building face arrays mapper = sp.ravel(sp.array(cell_mask, dtype=int)) mapper[mapper == 1] = sp.arange(sp.count_nonzero(mapper)) mapper = sp.reshape(mapper, (self.nz, self.nx)) mapper[~cell_mask] = -sp.iinfo(int).max # boundary_dict = { 'bottom': {'bottom': mapper[0, :][cell_mask[0, :]]}, 'top': {'top': mapper[-1, :][cell_mask[-1, :]]}, 'left': {'left': mapper[:, 0][cell_mask[:, 0]]}, 'right': {'right': mapper[:, -1][cell_mask[:, -1]]}, 'front': {'front': mapper[cell_mask]}, 'back': {'back': mapper[cell_mask]}, 'internal': {'bottom': [], 'top': [], 'left': [], 'right': []} } # # determining cells linked to a masked cell cell_mask = sp.where(~sp.ravel(cell_mask))[0] inds = sp.in1d(self._field._cell_interfaces, cell_mask) inds = sp.reshape(inds, (len(self._field._cell_interfaces), 2)) inds = inds[:, 0].astype(int) + inds[:, 1].astype(int) inds = (inds == 1) links = self._field._cell_interfaces[inds] # # adjusting order so masked cells are all on links[:, 1] swap = sp.in1d(links[:, 0], cell_mask) links[swap] = links[swap, ::-1] # # setting side based on index difference sides = sp.ndarray(len(links), dtype='<U6') sides[sp.where(links[:, 1] == links[:, 0]-self.nx)[0]] = 'bottom' sides[sp.where(links[:, 1] == links[:, 0]+self.nx)[0]] = 'top' sides[sp.where(links[:, 1] == links[:, 0]-1)[0]] = 'left' sides[sp.where(links[:, 1] == links[:, 0]+1)[0]] = 'right' # # adding each block to the internal face dictionary inds = sp.ravel(mapper)[links[:, 0]] for side, block_id in zip(sides, inds): boundary_dict['internal'][side].append(block_id) self.set_boundary_patches(boundary_dict, reset=True)
def simulate(self,x0,lambd): Dt = self.param['Dt'] # Dt needs to be a multiple of param['Dt'] dt = self.param['dt'] D = lambd[1] a = lambd[0] N = self.param['N'] drift = self.param['drift'] x = scipy.array(x0) tstart = 0 tcur = tstart while (tcur < tstart + Dt + dt/2 ): tcur += dt # the random number dW=self.rand.normal(loc=0.,scale=scipy.sqrt(2*D*dt),size=N) # if tcur == dt: #only print random number for first time step # print 'dW =', dW # the process drift_term = a * drift(x) x=x+drift_term*dt+dW # and reflecting boundary conditions scipy.where(x>self.domain[1],2*self.domain[1]-x,x) scipy.where(x<self.domain[0],2*self.domain[0]-x,x) return x
def compute_ndvi(im, r=0, ir=1, NODATA=-10000): """The function computes the NDVI of a multivalued image. It checks if there is NODATA value or division per zeros. Args: im: the image to process r: the number of the band that corresponds to the red band. ir: the number of the band that corresponds to the infra-red band. NODATA: the value of the NODATA Returns: ndvi = the ndvi of the image """ ## Get the size fo the image [nl, nc, nb] = im.shape ## Be sure that we can do 'floating operation' imf = im.astype(sp.float64) ndvi = sp.empty((nl, nc)) if nb < 2: print "Two bands are needed to compute the NDVI" return None else: den = imf[:, :, ir - 1] + imf[:, :, r - 1] # Pre compute the denominator t = sp.where((den > 0) & (imf[:, :, 1] != NODATA)) ndvi[t] = (imf[t[0], t[1], ir - 1] - imf[t[0], t[1], r - 1]) / den[t] # compute the ndvi on the safe samples if len(t[0]) < nl * nc: t = sp.where((den == 0) | (imf[:, :, 1] == NODATA)) # check for problematic pixels ndvi[t] = NODATA imf = [] return ndvi
def est_condprob2(data, val, given): """Calculate the probability of P(X|Y,Z) est_condprob2(data, 'A', ['M', 'LC'])""" if not isinstance(given, list): raise IndexError("Given must be a list or tuple of givens") elif len(given) != 2: raise IndexError("I need multiple givens! Give me more...give me more!") gcols = [] for g in given: if g in ['M', 'F']: gcols.append(1) elif g in ['LC', 'SC', 'T']: gcols.append(2) elif g in ['A', 'B', 'C']: gcols.append(0) if val in ['M', 'F']: vcol = 1 elif val in ['LC', 'SC', 'T']: vcol = 2 elif val in ['A', 'B', 'C']: vcol = 0 datsize = data.shape[0] needed = [val, given[0], given[1]] t = sp.where([sp.all(data[i]==needed) for i in range(datsize)])[0] t2 = sp.where([sp.all(data[i,1:]==given) for i in range(datsize)])[0] return float(t.size)/t2.size
def percentError(out, true): """ return percentage of mismatch between out and target values (lists and arrays accepted) """ arrout = array(out).flatten() wrong = where(arrout != array(true).flatten())[0].size return 100. * float(wrong) / float(arrout.size)
def plot_spectrum(self, x, rdn_meas, geom, fname=None): if fname is None and hasattr(self.output, 'plot_directory') and\ self.output.plot_directory is not None: fname = self.output.plot_directory + '/frame_%i.png' % self.iv.counts else: return plt.cla() xmin, xmax = min(self.wl), max(self.wl) fig = plt.subplots(1, 2, figsize=(10, 5)) plt.subplot(1, 2, 1) rdn_est = self.iv.fm.calc_rdn(x, geom) for lo, hi in self.windows: idx = s.where(s.logical_and(self.wl > lo, self.wl < hi))[0] p1 = plt.plot(self.iv.fm.wl[idx], rdn_meas[idx], color=[0.7, 0.2, 0.2], linewidth=2) plt.hold(True) p2 = plt.plot(self.iv.fm.wl, rdn_est, color='k', linewidth=2) plt.title("Radiance") ymax = max(rdn_meas) * 1.25 plt.text(500, ymax * 0.92, "Measured", color=[0.7, 0.2, 0.2]) plt.text(500, ymax * 0.86, "Model", color='k') plt.ylabel("$\mu$W nm$^{-1}$ sr$^{-1}$ cm$^{-2}$") plt.xlabel("Wavelength (nm)") plt.ylim([-0.001, ymax]) plt.xlim([xmin, xmax]) plt.subplot(1, 2, 2) lrfl_est = self.iv.fm.calc_lrfl(x, geom) ymax = min(max(lrfl_est) * 1.25, 0.7) for lo, hi in self.windows: if self.ref_wl is not None and self.ref_rfl is not None: # red line idx = s.where(s.logical_and(self.ref_wl > lo, self.ref_wl < hi))[0] p1 = plt.plot(self.ref_wl[idx], self.ref_rfl[idx], color=[0.7, 0.2, 0.2], linewidth=2) ymax = max(max(self.ref_rfl[idx] * 1.2), ymax) plt.hold(True) # black line idx = s.where(s.logical_and(self.wl > lo, self.wl < hi))[0] p2 = plt.plot(self.iv.fm.wl[idx], lrfl_est[idx], 'k', linewidth=2) ymax = max(max(lrfl_est[idx] * 1.2), ymax) # green and blue lines - surface components if hasattr(self.iv.fm.surface, 'components'): p3 = plt.plot(self.iv.fm.wl[idx], self.iv.fm.xa(x, geom)[idx], 'b', linewidth=2) for j in range(len(self.iv.fm.surface.components)): z = self.iv.fm.surface.norm( lrfl_est[self.iv.fm.surface.refidx]) mu = self.iv.fm.surface.components[j][0] * z plt.plot(self.iv.fm.wl[idx], mu[idx], 'g:', linewidth=1) plt.ylim([-0.0010, ymax]) plt.xlim([xmin, xmax]) plt.title("Reflectance") plt.xlabel("Wavelength (nm)") if self.ref_rfl is not None: plt.text(500, ymax * 0.92, "In situ reference", color=[0.7, 0.2, 0.2]) plt.text(500, ymax * 0.86, "Remote estimate", color='k') plt.text(500, ymax * 0.80, "Prior mean state ", color='b') plt.text(500, ymax * 0.74, "Surface components ", color='g') plt.savefig(fname) plt.close()
from mpl_toolkits.basemap.cm import sstanom from matplotlib.cm import jet # Read Reynolds SST climatology path = os.environ['NOBACKUP'] + '/verification/reynolds' execfile(path + '/ctl.py') obs = {} obs['name'] = 'Reynolds SST' obs['ctl'] = ctl # Calculate climatology obs['clim'] = obs['ctl'].fromfile('sst', tind=slice(1, None)).clim(12) obs['clim'].shiftgrid(30.) obs['clim'].grid['lon'] = sp.where(obs['clim'].grid['lon'] < 29., obs['clim'].grid['lon'] + 360., obs['clim'].grid['lon']) # Calculate DJF, JJA and annual mean obs['djf'] = obs['clim'].subset(tind=[0, 1, 11]).ave(0) obs['djf'].name += ', DJF' obs['jja'] = obs['clim'].subset(tind=[5, 6, 7]).ave(0) obs['jja'].name += ', JJA' obs['am'] = obs['clim'].ave(0) obs['am'].name += ', Annual Mean' # Equatorial annual cycle lonind = sp.logical_and(obs['clim'].grid['lon'][0] >= 130.0, obs['clim'].grid['lon'][0] <= 280.0) latind = sp.logical_and(obs['clim'].grid['lat'][:, 0] >= -2.1,
import muesli_functions as mf import scipy as sp # Load samples X, Y = mf.read2bands("../Data/grassland_id_2m.sqlite", 70, 106) ID = [] # Compute NDVI NDVI = [] for i in xrange(len(X)): X_ = X[i] # Compute safe version of NDVI DENOM = (X_[:, 1] + X_[:, 0]) t = sp.where(DENOM > 0)[0] NDVI_ = (X_[t, 1] - X_[t, 0]) / DENOM[t] if len(NDVI_) > 0: NDVI.append(NDVI_) # Scan Grasslands for i in xrange(len(NDVI)): m = sp.mean(NDVI[i][:, sp.newaxis]) if m > 0.6: ID.append(Y[i]) print("ID {} and mean NDVI {}".format(Y[i], m)) print("Number of selected grasslands: {}".format(len(ID))) sp.savetxt("id_grasslands.csv", ID, delimiter=',')
def estimate_dispersion(gene_counts, matrix, sf, CFG): if CFG['verbose']: print 'Estimating raw dispersions' if CFG['parallel'] > 1: disp_raw = sp.empty((gene_counts.shape[0], 1), dtype='float') disp_raw.fill(sp.nan) disp_raw_conv = sp.zeros((gene_counts.shape[0], 1), dtype='bool') pool = mp.Pool(processes=CFG['parallel'], initializer=lambda: sig.signal(sig.SIGINT, sig.SIG_IGN)) binsize = 30 idx_chunks = [ sp.arange(x, min(x + binsize, gene_counts.shape[0])) for x in range(0, gene_counts.shape[0], binsize) ] try: result = [ pool.apply_async(estimate_dispersion_chunk, args=( gene_counts[idx, :], matrix, sf, CFG, idx, )) for idx in idx_chunks ] res_cnt = 0 while result: tmp = result.pop(0).get() for i, j in enumerate(tmp[2]): if CFG['verbose']: log_progress(res_cnt, gene_counts.shape[0]) res_cnt += 1 disp_raw[j] = tmp[0][i] disp_raw_conv[j] = tmp[1][i] if CFG['verbose']: log_progress(gene_counts.shape[0], gene_counts.shape[0]) print '' pool.terminate() pool.join() except KeyboardInterrupt: print >> sys.stderr, 'Keyboard Interrupt - exiting' pool.terminate() pool.join() sys.exit(1) else: (disp_raw, disp_raw_conv, _) = estimate_dispersion_chunk(gene_counts, matrix, sf, CFG, sp.arange(gene_counts.shape[0]), log=CFG['verbose']) if CFG['debug']: fig = plt.figure(figsize=(8, 6), dpi=100) ax = fig.add_subplot(111) idx = sp.where(~sp.isnan(disp_raw))[0] ax.plot( sp.mean(sp.log10(gene_counts + 1), axis=1)[idx], disp_raw[idx], 'bo') ax.set_title('Raw Dispersion Estimate') ax.set_xlabel('Mean expression count') ax.set_ylabel('Dispersion') plt.savefig('dispersion_raw.pdf', format='pdf', bbox_inches='tight') plt.close(fig) return (disp_raw, disp_raw_conv)
def projection(dt, people, contacts, Vd, dmin = 0.0, \ nb_iter_max = 100000, rho=0.1, tol = 0.01, log=False, method="cvxopt"): """ From the desired velocities Vd, this projection step consists of computing \ the global velocity field defined as the closest velocity to the \ desired one among all the feasible fields (i.e. fields which do not lead \ to disks overlapping). Parameters ---------- dt: float time step people: numpy array people coordinates and radius : x,y,r contacts: numpy array all the contacts : i,j,dij,eij_x,eij_y Vd: numpy array people desired velocities dmin: float minimum distance guaranteed between individuals nb_iter_max: integer maximum number of iterations allowed rho: float parameter of the Uzawa method tol: float tolerance wished log: boolean to print the final accuracy, number of iterations,... method: string optimization algorithm : 'cvxopt' (default) or 'uzawa' (or 'mosek' if installed \ with a license file). Returns ------- B: numpy array constraint matrix U: numpy array new people velocities ensuring that there is no overlap \ between individuals L: numpy array Lagrange multipliers (only when method='uzawa', None otherwise) P: numpy array pressure on each individual (only when method='uzawa', None otherwise) info: integer number of iterations needed """ Np = people.shape[0] Nc = contacts.shape[0] info = 0 if (Nc == 0): info = 1 return info, None, Vd, None, None else: if (method == "cvxopt") or (method == "mosek"): import cvxopt cvxopt.solvers.options['show_progress'] = False cvxopt.solvers.maxiters = 1000 cvxopt.solvers.abstol = 1e-8 cvxopt.solvers.reltol = 1e-7 L = None P = None U = sp.zeros((2 * Np, )) V = sp.zeros((2 * Np, )) Z = (contacts[:, 2] - dmin) / dt ## ie Dij/dt V[::2] = Vd[:, 0] V[1::2] = Vd[:, 1] ## A priori velocity V = cvxopt.matrix(V) Z = cvxopt.matrix(Z, (Nc, 1)) Id = cvxopt.spdiag([1] * (U.shape[0])) if (Nc > 0): II = contacts[:, 0].astype(int) JJ = contacts[:, 1].astype(int) Jpos = sp.where(JJ >= 0)[0] Jneg = sp.where(JJ < 0)[0] row = sp.concatenate([Jpos, Jpos, Jpos, Jpos, Jneg, Jneg]) col = sp.concatenate([ 2 * II[Jpos], 2 * II[Jpos] + 1, 2 * JJ[Jpos], 2 * JJ[Jpos] + 1, 2 * II[Jneg], 2 * II[Jneg] + 1 ]) data = sp.concatenate([ contacts[Jpos, 3], contacts[Jpos, 4], -contacts[Jpos, 3], -contacts[Jpos, 4], -contacts[Jneg, 3], -contacts[Jneg, 4] ]) B = csr_matrix((data, (row, col)), shape=(Nc, 2 * Np)) #.toarray() cvxoptB = cvxopt.spmatrix(sp.array(data), sp.array(row), sp.array(col), size=(Nc, 2 * Np)) if (method == "mosek"): from mosek import iparam cvxopt.solvers.options['mosek'] = {iparam.log: 0} solution = cvxopt.solvers.qp(Id, -V, cvxoptB, Z, solver='mosek') else: solution = cvxopt.solvers.qp(Id, -V, cvxoptB, Z) info = solution["iterations"] U = solution['x'] if log: C = Z - B @ U if (method == "mosek"): print(" projection (mosek) : nb of contacts = ", Nc, ", contrainte (Z-B@U).min() = ", C.min()) else: print(" projection (cvxopt) : nb of contacts = ", Nc, ", nb of iterations = ", solution["iterations"], ", status = ", solution["status"], ", contrainte (Z-B@U).min() = ", C.min()) U = sp.array(U).reshape((Np, 2)) elif (method == "uzawa"): info = 0 II = contacts[:, 0].astype(int) JJ = contacts[:, 1].astype(int) Jpos = sp.where(JJ >= 0)[0] Jneg = sp.where(JJ < 0)[0] row = sp.concatenate([Jpos, Jpos, Jpos, Jpos, Jneg, Jneg]) col = sp.concatenate([ 2 * II[Jpos], 2 * II[Jpos] + 1, 2 * JJ[Jpos], 2 * JJ[Jpos] + 1, 2 * II[Jneg], 2 * II[Jneg] + 1 ]) data = sp.concatenate([ contacts[Jpos, 3], contacts[Jpos, 4], -contacts[Jpos, 3], -contacts[Jpos, 4], -contacts[Jneg, 3], -contacts[Jneg, 4] ]) B = csr_matrix((data, (row, col)), shape=(Nc, 2 * Np)) #.toarray() L = sp.zeros((Nc, )) R = 99 * sp.ones((Nc, )) U = sp.zeros((2 * Np, )) V = sp.zeros((2 * Np, )) D = contacts[:, 2] V[::2] = Vd[:, 0] V[1::2] = Vd[:, 1] k = 0 while ((dt * R.max() > tol * 2 * people[:, 2].min()) and (k < nb_iter_max)): U[:] = V[:] - B.transpose() @ L[:] R[:] = B @ U[:] - (D[:] - dmin) / dt L[:] = sp.maximum(L[:] + rho * R[:], 0) k += 1 P = sp.zeros(Np) ## Pressure P[II[Jpos]] += 3 / (4 * sp.pi * people[II[Jpos], 2]**2) * L[Jpos] P[JJ[Jpos]] += 3 / (4 * sp.pi * people[JJ[Jpos], 2]**2) * L[Jpos] P[II[Jneg]] += 3 / (4 * sp.pi * people[II[Jneg], 2]**2) * L[Jneg] if log: print(" projection (uzawa) : nb of contacts = ", Nc, ", nb of iterations = ", k, ", min = ", R.min(), ", max = ", R.max(), ", tol = ", tol) if (k == nb_iter_max): print("** WARNING : Method projection **") print( "** WARNING : you have reached the maximum number of iterations," ) print("** WARNING : it remains unsatisfied constraints !! ") info = -1 else: info = k return info, B, U.reshape((Np, 2)), L, P
griddata = ClassificationDataSet(2, 1, nb_classes=3) for i in xrange(X.size): griddata.addSample([X.ravel()[i], Y.ravel()[i]], [0]) griddata._convertToOneOfMany() # hace la red fiable # comenzamos las iteraciones de entreno for i in range(20): trainer.trainEpochs(1) trnresult = percentError(trainer.testOnClassData(), trndata['class']) tstresult = percentError(trainer.testOnClassData(dataset=tstdata), tstdata['class']) print "epoch: %4d" % trainer.totalepochs, \ " train error: %5.2f%%" % trnresult, \ " test error: %5.2f%%" % tstresult out = fnn.activateOnDataset(griddata) out = out.argmax(axis=1) # the highest output activation gives the class out = out.reshape(X.shape) figure(1) ioff() # interactive graphics off clf() # clear the plot hold(True) # overplot on for c in [0, 1, 2]: here, _ = where(tstdata['class'] == c) plot(tstdata['input'][here, 0], tstdata['input'][here, 1], 'o') if out.max() != out.min(): # safety check against flat field contourf(X, Y, out) # plot the contour ion() # interactive graphics on draw() # update the plot ioff() show()
def plot_sensor_data(ifig, sensor_data, time, initial_door_dist=None, axis = None, \ flux_timestep=1, \ savefig=False, filename='fig.png', cmap='winter'): """ When a sensor line is defined this function allows to draw the \ repartition of the people exit times. Parameters ---------- ifig: int figure number sensor_data : numpy array [time, direction, intersection_point[2]] for each individual time: float time in seconds initial_door_dist: numpy array people initial distance to the door axis: numpy array matplotlib axis : [xmin, xmax, ymin, ymax] flux_timestep: float timestep for the fluxes : number of persons per flux_timestep seconds savefig: boolean writes the figure as a png file if true filename: string png filename used to write the figure cmap: string matplotlib colormap name """ Np = sensor_data.shape[0] tmin = 0 tmax = time fig = plt.figure(ifig) plt.clf() ax1 = fig.add_subplot(211) if (initial_door_dist is None): ax1.plot(sp.arange(Np), sensor_data[:, 0], 'b+') ax1.set_title('Crossing time (s) vs people id') else: ax1.plot(initial_door_dist, sensor_data[:, 0], 'b+') ax1.set_title('Crossing time (s) vs initial door distance (m)') if (axis): ax1.set_xlim(axis[0], axis[1]) ax1.set_ylim(axis[2], axis[3]) #ax1.set_xticks([]) #ax1.set_yticks([]) #ax1.axis('off') tgrid = sp.arange(tmin, tmax, step=flux_timestep) tgrid = sp.append(tgrid, tgrid[-1] + flux_timestep) flux_exits = sp.zeros(tgrid.shape) flux_entries = sp.zeros(tgrid.shape) exits = sp.where(sensor_data[:, 1] == 1)[0] entries = sp.where(sensor_data[:, 1] == -1)[0] t_exits = sp.ceil((sensor_data[exits, 0] - tmin) / flux_timestep) t_entries = sp.ceil((sensor_data[entries, 0] - tmin) / flux_timestep) #t_exits = sp.floor((sensor_data[exits,0]-tmin)/flux_timestep) #t_entries = sp.floor((sensor_data[entries,0]-tmin)/flux_timestep) unique_exits, counts_exits = sp.unique(t_exits, return_counts=True) unique_entries, counts_entries = sp.unique(t_entries, return_counts=True) flux_exits[unique_exits.astype(int)] = counts_exits flux_entries[unique_entries.astype(int)] = counts_entries ax2 = fig.add_subplot(212) ax2.plot(tgrid, flux_entries, ':og', tgrid, flux_exits, ':or') ax2.set_title("Entries (green) and exits (red) per " + str(flux_timestep) + " s") if (axis): ax2.set_xlim(axis[0], axis[1]) ax2.set_ylim(axis[2], axis[3]) #ax2.set_xticks([]) #ax2.set_yticks([]) #ax2.axis('off') # Optionally : adds some histograms # if (exits.shape[0]>0): # ax3 = fig.add_subplot(413) # t_exits_sorted = sp.sort(sensor_data[exits,0]) # #print("t_exits_sorted = ",t_exits_sorted) # tmp = sp.concatenate(([0],t_exits_sorted)) # bins = 0.5*(tmp[:-1]+tmp[1:]) # widths = tmp[1:]-tmp[:-1] # heights = 1/widths # ax3.bar(bins, heights, width=widths,color='r',align='center') # # if (entries.shape[0]>0): # ax4 = fig.add_subplot(414) # t_entries_sorted = sp.sort(sensor_data[entries,0]) # tmp = sp.concatenate(([0],t_entries_sorted)) # bins = 0.5*(tmp[:-1]+tmp[1:]) # widths = tmp[1:]-tmp[:-1] # heights = 1/widths # ax4.bar(bins, heights, width=widths,color='r',align='center') fig.set_tight_layout(True) fig.canvas.draw() if (savefig): fig.savefig(filename, dpi=300)
def compute_contacts(dom, people, dmax): """ This function uses a KDTree method to find the contacts \ between individuals. Moreover the contacts with the walls \ are also determined from the wall distance (obtained by the \ fast-marching method). Parameters ---------- dom: Domain contains everything for managing the domain people: numpy array people coordinates and radius : x,y,r dmax: float threshold value used to consider a contact as \ active (dij<dmax) Returns ------- contacts: numpy array all the contacts i,j,dij,eij_x,eij_y such that dij<dmax \ and i<j (no duplication) """ # lf : the number of points at which the algorithm # switches over to brute-force. Has to be positive. lf = 100 if (lf > sys.getrecursionlimit()): sys.setrecursionlimit(lf) kd = cKDTree(people[:, :2], leafsize=lf) ## Find all pairs of points whose distance is at most dmax+2*rmax rmax = people[:, 2].max() neighbors = kd.query_ball_tree(kd, dmax + 2 * rmax) ## Create the contact array : i,j,dij,eij_x,eij_y first_elements = sp.arange(people.shape[0]) ## i.e. i other_elements = list(map(lambda x: x[1:], neighbors)) ## i.e. all the j values for each i lengths = list(map(len, other_elements)) tt = sp.stack([first_elements, lengths], axis=1) I = sp.concatenate(list(map(lambda x: sp.full((x[1], ), x[0]), tt))).astype(int) J = sp.concatenate(other_elements).astype(int) ind = sp.where(I < J)[0] I = I[ind] J = J[ind] DP = people[J, :2] - people[I, :2] Norm = sp.linalg.norm(DP, axis=1, ord=2) Dij = Norm - people[I, 2] - people[J, 2] ind = sp.where(Dij < dmax)[0] Dij = Dij[ind] I = I[ind] J = J[ind] Norm = Norm[ind] DP = DP[ind] contacts = sp.stack([I, J, Dij, DP[:, 0] / Norm, DP[:, 1] / Norm], axis=1) # Add contacts with the walls II = sp.floor((people[:, 1] - dom.ymin - 0.5 * dom.pixel_size) / dom.pixel_size).astype(int) JJ = sp.floor((people[:, 0] - dom.xmin - 0.5 * dom.pixel_size) / dom.pixel_size).astype(int) DD = dom.wall_distance[II, JJ] - people[:, 2] ind = sp.where(DD < dmax)[0] wall_contacts = sp.stack([ ind, -1 * sp.ones(ind.shape), DD[ind], dom.wall_grad_X[II[ind], JJ[ind]], dom.wall_grad_Y[II[ind], JJ[ind]] ], axis=1) contacts = sp.vstack([contacts, wall_contacts]) return sp.array(contacts)
def count_graph_coverage(genes, fn_bam=None, CFG=None, fn_out=None): # [counts] = count_graph_coverage(genes, fn_bam, CFG, fn_out) if fn_bam is None and isinstance(genes, dict): PAR = genes genes = PAR['genes'] fn_bam = PAR['fn_bam'] if 'fn_out' in PAR: fn_out = PAR['fn_out'] CFG = PAR['CFG'] if not isinstance(fn_bam, list): fn_bam = [fn_bam] counts = sp.zeros((len(fn_bam), genes.shape[0]), dtype='object') intron_tol = 0 sys.stdout.write('genes: %i\n' % genes.shape[0]) for f in range(counts.shape[0]): sys.stdout.write('\nsample %i/%i\n' % (f + 1, counts.shape[0])) ### iterate over all genes and generate counts for ### the segments in the segment graph ### and the splice junctions in the splice graph ### iterate per contig, so the bam caching works better contigs = sp.array([x.chr for x in genes]) for contig in sp.unique(contigs): contig_idx = sp.where(contigs == contig)[0] bam_cache = dict() print '\ncounting %i genes on contig %s' % (contig_idx.shape[0], contig) for ii, i in enumerate(contig_idx): sys.stdout.write('.') if ii > 0 and ii % 50 == 0: sys.stdout.write('%i/%i\n' % (ii, contig_idx.shape[0])) sys.stdout.flush() gg = genes[i] if gg.segmentgraph.is_empty(): gg.segmentgraph = Segmentgraph(gg) gg.start = gg.segmentgraph.segments.ravel().min() gg.stop = gg.segmentgraph.segments.ravel().max() counts[f, i] = Counts(gg.segmentgraph.segments.shape[1]) if CFG['bam_to_sparse'] and ( fn_bam[f].endswith('npz') or os.path.exists(re.sub(r'bam$', '', fn_bam[f]) + 'npz')): ### make sure that we query the right contig from cache assert (gg.chr == contig) (tracks, intron_list) = add_reads_from_sparse_bam( gg, fn_bam[f], contig, types=['exon_track', 'intron_list'], filter=None, cache=bam_cache) else: ### add RNA-seq evidence to the gene structure (tracks, intron_list) = add_reads_from_bam( gg, fn_bam[f], ['exon_track', 'intron_list'], None, CFG['var_aware'], CFG['primary_only']) intron_list = intron_list[0] ### TODO ### extract mean exon coverage for all segments for j in range(gg.segmentgraph.segments.shape[1]): idx = sp.arange(gg.segmentgraph.segments[0, j], gg.segmentgraph.segments[1, j]) - gg.start counts[f, i].segments[j] = sp.mean( sp.sum(tracks[:, idx], axis=0)) counts[f, i].seg_pos[j] = sp.sum( sp.sum(tracks[:, idx], axis=0) > 0) k, l = sp.where(gg.segmentgraph.seg_edges == 1) ### there are no introns to count if intron_list.shape[0] == 0: for m in range(k.shape[0]): if counts[f, i].edges.shape[0] == 0: counts[f, i].edges = sp.atleast_2d( sp.array([ sp.ravel_multi_index( [k[m], l[m]], gg.segmentgraph.seg_edges.shape), 0 ])) else: counts[f, i].edges = sp.r_[ counts[f, i].edges, sp.atleast_2d( sp.array([ sp.ravel_multi_index( [k[m], l[m]], gg.segmentgraph. seg_edges.shape), 0 ]))] continue ### extract intron counts for m in range(k.shape[0]): idx = sp.where( (sp.absolute(intron_list[:, 0] - gg.segmentgraph. segments[1, k[m]]) <= intron_tol) & (sp.absolute(intron_list[:, 1] - gg.segmentgraph. segments[0, l[m]]) <= intron_tol))[0] if counts[f, i].edges.shape[0] == 0: if idx.shape[0] > 0: counts[f, i].edges = sp.atleast_2d( sp.array([ sp.ravel_multi_index( [k[m], l[m]], gg.segmentgraph.seg_edges.shape), sp.sum(intron_list[idx, 2]) ])) else: counts[f, i].edges = sp.atleast_2d( sp.array([ sp.ravel_multi_index( [k[m], l[m]], gg.segmentgraph.seg_edges.shape), 0 ])) else: if idx.shape[0] > 0: counts[f, i].edges = sp.r_[ counts[f, i].edges, sp.atleast_2d( sp.array([ sp.ravel_multi_index([k[m], l[m]], gg. segmentgraph. seg_edges.shape), sp.sum(intron_list[idx, 2]) ]))] else: counts[f, i].edges = sp.r_[ counts[f, i].edges, sp.atleast_2d( sp.array([ sp.ravel_multi_index( [k[m], l[m]], gg.segmentgraph. seg_edges.shape), 0 ]))] if fn_out is not None: cPickle.dump(counts, open(fn_out, 'w'), -1) else: return counts
def preprocess_data_stack(self, stack_num, n_jobs, file_list, pattern, white, dark): # Average, merge and preprocess a stack of images # Typically a stack corresponds to one ptychographic position l = [] tmp = None # First - average according to the pattern if pattern in [1, 2]: # Averaging only for filename in file_list: if tmp is None: tmp = self.openup(filename) else: tmp += self.openup(filename) l.append(tmp / len(file_list)) elif pattern == 3: # Average then merge d = {} unique_times = list(set([t.split('_')[3] for t in file_list])) for filename in file_list: t = filename.split('.')[0].split('_')[-1] if t not in d.keys(): d[t] = (1, self.openup(filename)) else: d[t][0] += 1 d[t][1] += self.openup(filename) for key, (i, val) in d.iteritems(): val /= i # Check for saturated values and merge variable exposure times max_time = max(unique_times) if CXP.preprocessing.saturation_level > 0: for key in d.keys(): wh = sp.where(d[key] >= CXP.preprocessing.saturation_level) d[key][wh] = 0 if tmp == 0: tmp = d[key] * max_time / float(key) else: tmp += d[key] * max_time / float(key) l.append(tmp) else: raise Exception('NamingConventionError') # Do preprocessing data = CXData() data.data = l if CXP.measurement.beam_stop: data.treat_beamstop() data.symmetrize_array_shape() # CCD Specific Preprocessing if CXP.preprocessing.detector_type == 'ccd': try: # Dark field correction if dark is not None: print('Dark field correcting data') data -= dark # Dark correct white field if white is not None: print('Dark field correcting whitefield') white -= dark except UnboundLocalError: print('No darkfield subtraction performed.') # PAD Specific Preprocessing elif CXP.preprocessing.detector_type == 'pad': pass # Threshhold data if CXP.preprocessing.threshhold_raw_data > 0: data.threshhold() if white is not None: white.threshhold() # Bin data if CXP.preprocessing.bin > 1: data.bin() if white is not None: white.bin() if CXP.preprocessing.rot90 != 0: data.rot90(CXP.preprocessing.rot90) if white is not None: white.rot90(CXP.preprocessing.rot90) # Take square root data.square_root() if white is not None: white.square_root() # Put in FFT shifted data.fft_shift() if white is not None: white.fft_shift() return (stack_num, data.data)
if len(l) == 3: x.append(float(l[0])) t.append(float(l[1])) y.append(float(l[2])) x = numpy.array(x) t = numpy.array(t) y = numpy.array(y) dosage = [] sig = [] means = [] xpts = numpy.unique(x) plots = [] for i in xpts: bm = scipy.where(x == i) a = numpy.array(t[bm]).view(numpy.recarray) duplicates = numpy.core.records.find_duplicate(a) for dup in duplicates: dup_bm = scipy.where((x == i) & (t == dup))[0] x = numpy.delete(x, dup_bm[-1]) t = numpy.delete(t, dup_bm[-1]) y = numpy.delete(y, dup_bm[-1]) bm = scipy.where(x == i) #plots.append(Gnuplot.Data(y[bm], with_='lines')) #dosage.append(scipy.integrate.simps(y[bm])) plots.append(Gnuplot.Data(t[bm], y[bm], with_='lines')) dosage.append(scipy.integrate.simps(y[bm], x=t[bm])) sig.append(numpy.std(y[bm])) means.append(numpy.mean(y[bm]))
def n_to_one(arr): """ Returns the reverse of a 1-in-n binary encoding. """ return where(arr == 1)[0][0]
def check_people_in_box(dom, box, p, rng): """ To check that people coordinates are in the given box (test 1) and in an \ usable space i.e. in an area accessible and not concerned by obstacles \ (test 2). On the other hand, one moves the individuals which do not satisfy \ these two tests. Parameters ---------- dom: Domain contains everything for managing the domain box: list coordinates of the box [xmin, xmax, ymin, ymax] p: numpy array people coordinates x y r rng: RandomState scipy random state object (see scipy.random.RandomState) Returns ------- p: numpy array new people coordinates x y r """ print("------ check_people_in_box --> To verify that "+str(p.shape[0])+ \ " individuals are in the domain, in the box and with a defined"+ \ " desired velocity") p_rmax = p[:, 2].max() xmin, xmax, ymin, ymax = box info = False while True: ## test 1 I = sp.floor((p[:, 1] - dom.ymin - 0.5 * dom.pixel_size) / dom.pixel_size).astype(int) J = sp.floor((p[:, 0] - dom.xmin - 0.5 * dom.pixel_size) / dom.pixel_size).astype(int) test1 = (I >= 0) * (I < dom.height) * (J >= 0) * (J < dom.width) ind1 = sp.where(test1 == 0)[0] if (ind1.shape[0] > 0): print("------ check_people_in_box --> "+str(ind1.shape[0])+ \ " individuals outside the domain") info = True p[ind1, 0] = rng.uniform(xmin + p_rmax, xmax - p_rmax, ind1.shape[0]) p[ind1, 1] = rng.uniform(ymin + p_rmax, ymax - p_rmax, ind1.shape[0]) else: ## test 2 I, J, Vd = compute_desired_velocity(dom, p) normVd = Vd[:, 0]**2 + Vd[:, 1]**2 test2 = (p[:,0]>xmin+p_rmax)*(p[:,0]<xmax-p_rmax) \ *(p[:,1]>ymin+p_rmax)*(p[:,1]<ymax-p_rmax) \ *(normVd>0) ind2 = sp.where(test2 == 0)[0] if (ind2.shape[0] > 0): print("------ check_people_in_box --> "+str(ind2.shape[0])+ \ " individuals with an undefined desired velocity ") info = True p[ind2, 0] = rng.uniform(xmin + p_rmax, xmax - p_rmax, ind2.shape[0]) p[ind2, 1] = rng.uniform(ymin + p_rmax, ymax - p_rmax, ind2.shape[0]) else: print("------ check_people_in_box --> OK !") break return info, p
def _get_counts(chr_name, start, stop, files, intron_cov, intron_cnt=False, verbose=False, collapsed=True, bins=0): """Internal function that queries the bam files and produces the counts""" ### PYSAM CIGAR ENCODING # M BAM_CMATCH 0 # I BAM_CINS 1 # D BAM_CDEL 2 # N BAM_CREF_SKIP 3 # S BAM_CSOFT_CLIP 4 # H BAM_CHARD_CLIP 5 # P BAM_CPAD 6 # = BAM_CEQUAL 7 # X BAM_CDIFF 8 ### init counts counts = sp.zeros((len(files), stop - start + 1)) intron_counts = sp.zeros((len(files), stop - start + 1)) intron_list = [dict() for i in range(len(files))] for f_i, fn in enumerate(files): if fn.lower().endswith('bam'): if verbose: print >> sys.stdout, "reading bam %i of %i" % (f_i + 1, len(files)) try: infile = pysam.Samfile(str(fn), "rb") except ValueError: print >> sys.stderr, 'Could not load file %s - skipping' % fn continue c_len = stop - start + 1 for line in infile.fetch(chr_name, start, stop): if line.is_secondary: continue pos = line.pos for o in line.cigar: if o[0] in [0, 2, 3]: ### get segment overlap to current region seg_offset = max(0, start - pos) seg_len = o[1] - seg_offset if seg_len > 0: seg_start = max(pos - start, 0) if o[0] in [0, 2]: counts[f_i, seg_start:min(seg_start + seg_len, c_len)] += 1 elif (intron_cov or intron_cnt) and o[0] == 3: if pos >= start and (pos + o[1]) <= stop: if intron_cov: intron_counts[f_i, seg_start:min( seg_start + seg_len, c_len)] += 1 if intron_cnt and (seg_start + seg_len < c_len): try: intron_list[f_i][(seg_start, seg_len)] += 1 except KeyError: intron_list[f_i][(seg_start, seg_len)] = 1 if not o[0] in [1, 4, 5]: pos += o[1] elif fn.lower().endswith('npz'): try: infile = sp.load(str(fn)) except: print >> sys.stderr, 'Could not load file %s - skipping' % fn continue c_len = stop - start + 1 bam_reads = spsp.coo_matrix((infile[chr_name + '_reads_dat'], (infile[chr_name + '_reads_row'], infile[chr_name + '_reads_col'])), shape=infile[chr_name + '_reads_shp'], dtype='uint32').tocsc() bam_introns_m = infile[chr_name + '_introns_m'] bam_introns_p = infile[chr_name + '_introns_p'] counts[f_i, :] = sp.sum(bam_reads[:, start:stop + 1].todense(), axis=0) if intron_cnt: idx = sp.where((bam_introns_m[:, 0] > start) & (bam_introns_m[:, 1] < stop))[0] for _i in idx: try: intron_list[f_i][( bam_introns_m[_i, 0] - start, bam_introns_m[_i, 1] - bam_introns_m[_i, 0])] += bam_introns_m[_i, 2] except KeyError: intron_list[f_i][( bam_introns_m[_i, 0] - start, bam_introns_m[_i, 1] - bam_introns_m[_i, 0])] = bam_introns_m[_i, 2] if intron_cov: intron_counts[f_i, bam_introns_m[_i, 0]:bam_introns_m[ _i, 1]] += bam_introns_m[_i, 2] idx = sp.where((bam_introns_p[:, 0] > start) & (bam_introns_p[:, 1] < stop))[0] for _i in idx: try: intron_list[f_i][( bam_introns_p[_i, 0] - start, bam_introns_p[_i, 1] - bam_introns_p[_i, 0])] += bam_introns_p[_i, 2] except KeyError: intron_list[f_i][( bam_introns_p[_i, 0] - start, bam_introns_p[_i, 1] - bam_introns_p[_i, 0])] = bam_introns_p[_i, 2] if intron_cov: intron_counts[f_i, bam_introns_p[_i, 0]:bam_introns_p[ _i, 1]] += bam_introns_p[_i, 2] if collapsed: counts = sp.sum(counts, axis=0) intron_counts = sp.sum(intron_counts, axis=0) if intron_cnt: for f in range(1, len(files)): for intron in intron_list[f]: try: intron_list[0][intron] += intron_list[f][intron] except KeyError: intron_list[0][intron] = intron_list[f][intron] intron_list = intron_list[0] return (counts, intron_counts, intron_list)
def sensor(door, xy0, xy1, t0, t1): """ Compute the number of entries/exits through a door as a pedestrian sensor could do Parameters ---------- door: numpy array door coordinates [x0,y0,x1,y1] t0: float time t1: float time xy0: numpy array people coordinates at time t0 xy1: numpy array people coordinates at time t1 Returns ------- id: numpy array index of persons who go through the door p: numpy array coordinates of intersection points between the door and people trajectories io: numpy array the exit direction is the normal direction, 1 = exit, -1 = entry times: numpy array exit or entry times entries: int number of entries exits: int number of exits """ # # trajectories : # xy0 # | # | # door : d0--p--d1 # | # xy1 d0 = sp.empty(xy0.shape) d0[:, 0] = door[0] d0[:, 1] = door[1] d1 = sp.empty(xy1.shape) d1[:, 0] = door[2] d1[:, 1] = door[3] T = sp.array([[0, -1], [1, 0]]) vdoor = sp.atleast_2d(d1 - d0) vtraj = sp.atleast_2d(xy1 - xy0) v0 = sp.atleast_2d(d0 - xy0) dot_vdoor_T = sp.dot(vdoor, T) denom = sp.sum(dot_vdoor_T * vtraj, axis=1) num = sp.sum(dot_vdoor_T * v0, axis=1) # Intersection points # can be inf or nan if parallel lines... p = sp.atleast_2d(num / denom).T * vtraj + xy0 # Test if the intersection point is on the door segment vp0 = sp.atleast_2d(p - d0) norm_vdoor_2 = sp.sum(vdoor * vdoor, axis=1) dot_vdoor_vp0 = sp.sum(vdoor * vp0, axis=1) is_p_in_door = (dot_vdoor_vp0 >= 0) * (dot_vdoor_vp0 <= norm_vdoor_2) # Test if the intersection point is on the person trajectory vpxy0 = sp.atleast_2d(p - xy0) norm_vtraj_2 = sp.sum(vtraj * vtraj, axis=1) dot_vtraj_vpxy0 = sp.sum(vtraj * vpxy0, axis=1) is_p_in_traj = (dot_vtraj_vpxy0 >= 0) * (dot_vtraj_vpxy0 <= norm_vtraj_2) # Keep only points on the door and on the trajectory is_p_intersect = is_p_in_door * is_p_in_traj id = sp.where(is_p_intersect == True)[0] # Test if the direction is the output normal : (d1-d0)_y , (d1-d0)_x vn = sp.empty(vdoor.shape) vn[:, 0] = vdoor[:, 1] vn[:, 1] = -vdoor[:, 0] dot_vn_vtraj = sp.sum(vn * vtraj, axis=1) is_normal_dir = (dot_vn_vtraj > 0) io = (is_normal_dir[id] == True) * 1 + (is_normal_dir[id] == False) * (-1) exits = sp.sum(io == 1) entries = sp.sum(io == -1) # Compute the distance from the intersection point p to xy0 norm_vpxy0_2 = sp.sqrt(sp.sum(vpxy0 * vpxy0, axis=1)) # Compute the distance from the intersection point p to xy1 vpxy1 = sp.atleast_2d(p - xy1) norm_vpxy1_2 = sp.sqrt(sp.sum(vpxy1 * vpxy1, axis=1)) # Compute the intersection time norm_vtraj = sp.sqrt(norm_vtraj_2) dt = t1 - t0 times = t0 + (is_normal_dir==True)*(norm_vpxy0_2*dt/norm_vtraj) + \ (is_normal_dir==False)*(norm_vpxy1_2*dt/norm_vtraj) return id, p[id, :], io, times[id], entries, exits
def collect_events(CFG): ### which events do we call do_exon_skip = ('exon_skip' in CFG['event_types']) do_intron_retention = ('intron_retention' in CFG['event_types']) do_mult_exon_skip = ('mult_exon_skip' in CFG['event_types']) do_alt_3prime = ('alt_3prime' in CFG['event_types']) do_alt_5prime = ('alt_5prime' in CFG['event_types']) do_mutex_exons = ('mutex_exons' in CFG['event_types']) ### init empty event fields if do_intron_retention: intron_reten_pos = sp.zeros((len(CFG['replicate_idxs']), 1), dtype='object') if do_exon_skip: exon_skip_pos = sp.zeros((len(CFG['replicate_idxs']), 1), dtype='object') if do_alt_3prime or do_alt_5prime: alt_end_5prime_pos = sp.zeros((len(CFG['replicate_idxs']), 1), dtype='object') alt_end_3prime_pos = sp.zeros((len(CFG['replicate_idxs']), 1), dtype='object') if do_mult_exon_skip: mult_exon_skip_pos = sp.zeros((len(CFG['replicate_idxs']), 1), dtype='object') if do_mutex_exons: mutex_exons_pos = sp.zeros((len(CFG['replicate_idxs']), 1), dtype='object') validate_tag = '' if 'validate_splicegraphs' in CFG and CFG['validate_splicegraphs']: validate_tag = '.validated' for i in range(len(CFG['samples'])): if CFG['same_genestruct_for_all_samples'] == 1 and i == 1: break if i > 0: if do_intron_retention: intron_reten_pos = sp.c_[ intron_reten_pos, sp.zeros((len(CFG['replicate_idxs']), 1), dtype='object')] if do_exon_skip: exon_skip_pos = sp.c_[ exon_skip_pos, sp.zeros((len(CFG['replicate_idxs']), 1), dtype='object')] if do_alt_3prime: alt_end_3prime_pos = sp.c_[ alt_end_3prime_pos, sp.zeros((len(CFG['replicate_idxs']), 1), dtype='object')] if do_alt_5prime: alt_end_5prime_pos = sp.c_[ alt_end_5prime_pos, sp.zeros((len(CFG['replicate_idxs']), 1), dtype='object')] if do_mult_exon_skip: mult_exon_skip_pos = sp.c_[ mult_exon_skip_pos, sp.zeros((len(CFG['replicate_idxs']), 1), dtype='object')] if do_mutex_exons: mutex_exons_pos = sp.c_[ mutex_exons_pos, sp.zeros((len(CFG['replicate_idxs']), 1), dtype='object')] strain = CFG['strains'][i] for ridx in CFG['replicate_idxs']: if len(CFG['replicate_idxs']) > 1: rep_tag = '_R%i' % ridx else: rep_tag = '' if 'spladder_infile' in CFG: genes_fnames = CFG['spladder_infile'] elif CFG['merge_strategy'] == 'single': genes_fnames = '%s/spladder/genes_graph_conf%i%s.%s.pickle' % ( CFG['out_dirname'], CFG['confidence_level'], rep_tag, CFG['samples'][i]) else: genes_fnames = '%s/spladder/genes_graph_conf%i%s.%s%s.pickle' % ( CFG['out_dirname'], CFG['confidence_level'], rep_tag, CFG['merge_strategy'], validate_tag) ### define outfile names if CFG['merge_strategy'] == 'single': fn_out_ir = '%s/%s_intron_retention%s_C%i.pickle' % ( CFG['out_dirname'], CFG['samples'][i], rep_tag, CFG['confidence_level']) fn_out_es = '%s/%s_exon_skip%s_C%i.pickle' % ( CFG['out_dirname'], CFG['samples'][i], rep_tag, CFG['confidence_level']) fn_out_mes = '%s/%s_mult_exon_skip%s_C%i.pickle' % ( CFG['out_dirname'], CFG['samples'][i], rep_tag, CFG['confidence_level']) fn_out_a5 = '%s/%s_alt_5prime%s_C%i.pickle' % ( CFG['out_dirname'], CFG['samples'][i], rep_tag, CFG['confidence_level']) fn_out_a3 = '%s/%s_alt_3prime%s_C%i.pickle' % ( CFG['out_dirname'], CFG['samples'][i], rep_tag, CFG['confidence_level']) fn_out_mex = '%s/%s_mutex_exons%s_C%i.pickle' % ( CFG['out_dirname'], CFG['samples'][i], rep_tag, CFG['confidence_level']) else: fn_out_ir = '%s/%s_intron_retention%s_C%i.pickle' % ( CFG['out_dirname'], CFG['merge_strategy'], rep_tag, CFG['confidence_level']) fn_out_es = '%s/%s_exon_skip%s_C%i.pickle' % ( CFG['out_dirname'], CFG['merge_strategy'], rep_tag, CFG['confidence_level']) fn_out_mes = '%s/%s_mult_exon_skip%s_C%i.pickle' % ( CFG['out_dirname'], CFG['merge_strategy'], rep_tag, CFG['confidence_level']) fn_out_a5 = '%s/%s_alt_5prime%s_C%i.pickle' % ( CFG['out_dirname'], CFG['merge_strategy'], rep_tag, CFG['confidence_level']) fn_out_a3 = '%s/%s_alt_3prime%s_C%i.pickle' % ( CFG['out_dirname'], CFG['merge_strategy'], rep_tag, CFG['confidence_level']) fn_out_mex = '%s/%s_mutex_exons%s_C%i.pickle' % ( CFG['out_dirname'], CFG['merge_strategy'], rep_tag, CFG['confidence_level']) if do_intron_retention: intron_reten_pos[ridx, i] = [] if do_exon_skip: exon_skip_pos[ridx, i] = [] if do_mult_exon_skip: mult_exon_skip_pos[ridx, i] = [] if do_alt_5prime: alt_end_5prime_pos[ridx, i] = [] if do_alt_3prime: alt_end_3prime_pos[ridx, i] = [] if do_mutex_exons: mutex_exons_pos[ridx, i] = [] print '\nconfidence %i / sample %i / replicate %i' % ( CFG['confidence_level'], i, ridx) if os.path.exists(genes_fnames): print 'Loading gene structure from %s ...' % genes_fnames (genes, inserted) = cPickle.load(open(genes_fnames, 'r')) print '... done.' if not 'chrm_lookup' in CFG: CFG = append_chrms( sp.unique(sp.array([x.chr for x in genes], dtype='str')), CFG) ### detect intron retentions from splicegraph if do_intron_retention: if not os.path.exists(fn_out_ir): idx_intron_reten, intron_intron_reten = detect_events( genes, 'intron_retention', sp.where([x.is_alt for x in genes])[0], CFG) for k in range(len(idx_intron_reten)): gene = genes[idx_intron_reten[k]] ### perform liftover between strains if necessary exons = gene.splicegraph.vertices if not 'reference_strain' in CFG: exons_col = exons exons_col_pos = exons else: exons_col = convert_strain_pos_intervals( gene.chr, gene.splicegraph.vertices.T, strain, CFG['reference_strain']).T exons_col_pos = convert_strain_pos( gene.chr, gene.splicegraph.vertices.T, strain, CFG['reference_strain']).T if exons_col.shape != exons_col_pos.shape: print 'skipping non-mappable intron retention event' continue ### build intron retention data structure event = Event('intron_retention', gene.chr, gene.strand) event.strain = sp.array([strain]) event.exons1 = sp.c_[ exons[:, intron_intron_reten[k][0]], exons[:, intron_intron_reten[k][1]]].T event.exons2 = sp.array([ exons[:, intron_intron_reten[k][0]][0], exons[:, intron_intron_reten[k][1]][1] ]) #event.exons2 = exons[:, intron_intron_reten[k][2]] event.exons1_col = sp.c_[ exons_col[:, intron_intron_reten[k][0]], exons_col[:, intron_intron_reten[k][1]]] event.exons2_col = sp.array([ exons_col[:, intron_intron_reten[k][0]][0], exons_col[:, intron_intron_reten[k][1]][1] ]) #event.exons2_col = exons_col[:, intron_intron_reten[k][2]] event.gene_name = sp.array([gene.name]) event.gene_idx = idx_intron_reten[k] #event.transcript_type = sp.array([gene.transcript_type]) intron_reten_pos[ridx, i].append(event) else: print '%s already exists' % fn_out_ir ### detect exon_skips from splicegraph if do_exon_skip: if not os.path.exists(fn_out_es): idx_exon_skip, exon_exon_skip = detect_events( genes, 'exon_skip', sp.where([x.is_alt for x in genes])[0], CFG) for k in range(len(idx_exon_skip)): gene = genes[idx_exon_skip[k]] ### perform liftover between strains if necessary exons = gene.splicegraph.vertices if not 'reference_strain' in CFG: exons_col = exons exons_col_pos = exons else: exons_col = convert_strain_pos_intervals( gene.chr, gene.splicegraph.vertices.T, strain, CFG['reference_strain']).T exons_col_pos = convert_strain_pos( gene.chr, gene.splicegraph.vertices.T, strain, CFG['reference_strain']).T if exons_col.shape != exons_col_pos.shape: print 'skipping non-mappable exon_skip event' continue ### build exon skip data structure event = Event('exon_skip', gene.chr, gene.strand) event.strain = sp.array([strain]) event.exons1 = sp.c_[exons[:, exon_exon_skip[k][0]], exons[:, exon_exon_skip[k][2]]].T event.exons2 = sp.c_[exons[:, exon_exon_skip[k][0]], exons[:, exon_exon_skip[k][1]], exons[:, exon_exon_skip[k][2]]].T event.exons1_col = sp.c_[ exons_col[:, exon_exon_skip[k][0]], exons_col[:, exon_exon_skip[k][2]]].T event.exons2_col = sp.c_[ exons_col[:, exon_exon_skip[k][0]], exons_col[:, exon_exon_skip[k][1]], exons_col[:, exon_exon_skip[k][2]]].T event.gene_name = sp.array([gene.name]) event.gene_idx = idx_exon_skip[k] #event.transcript_type = sp.array([gene.transcript_type]) exon_skip_pos[ridx, i].append(event) else: print '%s already exists' % fn_out_es ### detect alternative intron_ends from splicegraph if do_alt_3prime or do_alt_5prime: if not os.path.exists(fn_out_a5) or not os.path.exists( fn_out_a3): idx_alt_end_5prime, exon_alt_end_5prime, idx_alt_end_3prime, exon_alt_end_3prime = detect_events( genes, 'alt_prime', sp.where([x.is_alt for x in genes])[0], CFG) ### handle 5 prime events for k in range(len(idx_alt_end_5prime)): gene = genes[idx_alt_end_5prime[k]] ### perform liftover between strains if necessary exons = gene.splicegraph.vertices if not 'reference_strain' in CFG: exons_col = exons exons_col_pos = exons else: exons_col = convert_strain_pos_intervals( gene.chr, gene.splicegraph.vertices.T, strain, CFG['reference_strain']).T exons_col_pos = convert_strain_pos( gene.chr, gene.splicegraph.vertices.T, strain, CFG['reference_strain']).T if exons_col.shape != exons_col_pos.shape: print 'skipping non-mappable alt 5-prime event' continue for k1 in range( len(exon_alt_end_5prime[k] ['fiveprimesites']) - 1): for k2 in range( k1 + 1, len(exon_alt_end_5prime[k] ['fiveprimesites'])): exon_alt1_col = exons_col[:, exon_alt_end_5prime[ k] ['fiveprimesites'] [k1]].T exon_alt2_col = exons_col[:, exon_alt_end_5prime[ k] ['fiveprimesites'] [k2]].T ### check if exons overlap if (exon_alt1_col[0] >= exon_alt2_col[1] ) or (exon_alt1_col[1] <= exon_alt2_col[0]): continue event = Event('alt_5prime', gene.chr, gene.strand) event.strain = sp.array([strain]) if gene.strand == '+': event.exons1 = sp.c_[ exons[:, exon_alt_end_5prime[k] ['fiveprimesites'][k1]], exons[:, exon_alt_end_5prime[k] ['threeprimesite']]].T event.exons2 = sp.c_[ exons[:, exon_alt_end_5prime[k] ['fiveprimesites'][k2]], exons[:, exon_alt_end_5prime[k] ['threeprimesite']]].T event.exons1_col = sp.c_[ exons_col[:, exon_alt_end_5prime[k] ['fiveprimesites'][k1]], exons_col[:, exon_alt_end_5prime[k] ['threeprimesite']]].T event.exons2_col = sp.c_[ exons_col[:, exon_alt_end_5prime[k] ['fiveprimesites'][k2]], exons_col[:, exon_alt_end_5prime[k] ['threeprimesite']]].T else: event.exons1 = sp.c_[ exons[:, exon_alt_end_5prime[k] ['threeprimesite']], exons[:, exon_alt_end_5prime[k] ['fiveprimesites'][k1]]].T event.exons2 = sp.c_[ exons[:, exon_alt_end_5prime[k] ['threeprimesite']], exons[:, exon_alt_end_5prime[k] ['fiveprimesites'][k2]]].T event.exons1_col = sp.c_[ exons_col[:, exon_alt_end_5prime[k] ['threeprimesite']], exons_col[:, exon_alt_end_5prime[ k]['fiveprimesites'][k1]]].T event.exons2_col = sp.c_[ exons_col[:, exon_alt_end_5prime[k] ['threeprimesite']], exons_col[:, exon_alt_end_5prime[ k]['fiveprimesites'][k2]]].T event.gene_name = sp.array([gene.name]) event.gene_idx = idx_alt_end_5prime[k] ### assert that first isoform is always the shorter one if sp.sum(event.exons1[:, 1] - event.exons1[:, 0]) > sp.sum( event.exons2[:, 1] - event.exons2[:, 0]): _tmp = event.exons1.copy() event.exons1 = event.exons2.copy() event.exons2 = _tmp #event.transcript_type = sp.array([gene.transcript_type]) if do_alt_5prime: alt_end_5prime_pos[ridx, i].append(event) ### handle 3 prime events for k in range(len(idx_alt_end_3prime)): gene = genes[idx_alt_end_3prime[k]] ### perform liftover between strains if necessary exons = gene.splicegraph.vertices if not 'reference_strain' in CFG: exons_col = exons exons_col_pos = exons else: exons_col = convert_strain_pos_intervals( gene.chr, gene.splicegraph.vertices.T, strain, CFG['reference_strain']).T exons_col_pos = convert_strain_pos( gene.chr, gene.splicegraph.vertices.T, strain, CFG['reference_strain']).T if exons_col.shape != exons_col_pos.shape: print 'skipping non-mappable alt 3-prime event' continue for k1 in range( len(exon_alt_end_3prime[k] ['threeprimesites']) - 1): for k2 in range( k1 + 1, len(exon_alt_end_3prime[k] ['threeprimesites'])): exon_alt1_col = exons_col[:, exon_alt_end_3prime[ k] ['threeprimesites'] [k1]].T exon_alt2_col = exons_col[:, exon_alt_end_3prime[ k] ['threeprimesites'] [k2]].T ### check if exons overlap if (exon_alt1_col[0] >= exon_alt2_col[1] ) or (exon_alt1_col[1] <= exon_alt2_col[0]): continue event = Event('alt_3prime', gene.chr, gene.strand) event.strain = sp.array([strain]) if gene.strand == '+': event.exons1 = sp.c_[ exons[:, exon_alt_end_3prime[k] ['threeprimesites'][k1]], exons[:, exon_alt_end_3prime[k] ['fiveprimesite']]].T event.exons2 = sp.c_[ exons[:, exon_alt_end_3prime[k] ['threeprimesites'][k2]], exons[:, exon_alt_end_3prime[k] ['fiveprimesite']]].T event.exons1_col = sp.c_[ exons_col[:, exon_alt_end_3prime[k] ['threeprimesites'][k1]], exons_col[:, exon_alt_end_3prime[k] ['fiveprimesite']]].T event.exons2_col = sp.c_[ exons_col[:, exon_alt_end_3prime[k] ['threeprimesites'][k2]], exons_col[:, exon_alt_end_3prime[k] ['fiveprimesite']]].T else: event.exons1 = sp.c_[ exons[:, exon_alt_end_3prime[k] ['fiveprimesite']], exons[:, exon_alt_end_3prime[k] ['threeprimesites'][k1]]].T event.exons2 = sp.c_[ exons[:, exon_alt_end_3prime[k] ['fiveprimesite']], exons[:, exon_alt_end_3prime[k] ['threeprimesites'][k2]]].T event.exons1_col = sp.c_[ exons_col[:, exon_alt_end_3prime[k] ['fiveprimesite']], exons_col[:, exon_alt_end_3prime[ k]['threeprimesites'][k1]]].T event.exons2_col = sp.c_[ exons_col[:, exon_alt_end_3prime[k] ['fiveprimesite']], exons_col[:, exon_alt_end_3prime[ k]['threeprimesites'][k2]]].T event.gene_name = sp.array([gene.name]) event.gene_idx = idx_alt_end_3prime[k] ### assert that first isoform is always the shorter one if sp.sum(event.exons1[:, 1] - event.exons1[:, 0]) > sp.sum( event.exons2[:, 1] - event.exons2[:, 0]): _tmp = event.exons1.copy() event.exons1 = event.exons2.copy() event.exons2 = _tmp #event.transcript_type = sp.array([gene.transcript_type]) if do_alt_3prime: alt_end_3prime_pos[ridx, i].append(event) else: print '%s and %s already exists' % (fn_out_a5, fn_out_a3) ### detect multiple_exon_skips from splicegraph if do_mult_exon_skip: if not os.path.exists(fn_out_mes): idx_mult_exon_skip, exon_mult_exon_skip = detect_events( genes, 'mult_exon_skip', sp.where([x.is_alt for x in genes])[0], CFG) for k, gidx in enumerate(idx_mult_exon_skip): gene = genes[gidx] ### perform liftover between strains if necessary exons = gene.splicegraph.vertices if not 'reference_strain' in CFG: exons_col = exons exons_col_pos = exons else: exons_col = convert_strain_pos_intervals( gene.chr, gene.splicegraph.vertices.T, strain, CFG['reference_strain']).T exons_col_pos = convert_strain_pos( gene.chr, gene.splicegraph.vertices.T, strain, CFG['reference_strain']).T if exons_col.shape != exons_col_pos.shape: print 'skipping non-mappable multiple exon skip event' continue ### build multiple exon skip data structure event = Event('mult_exon_skip', gene.chr, gene.strand) event.strain = sp.array([strain]) event.exons1 = sp.c_[ exons[:, exon_mult_exon_skip[k][0]], exons[:, exon_mult_exon_skip[k][2]]].T event.exons2 = sp.c_[ exons[:, exon_mult_exon_skip[k][0]], exons[:, exon_mult_exon_skip[k][1]], exons[:, exon_mult_exon_skip[k][2]]].T event.exons1_col = sp.c_[ exons_col[:, exon_mult_exon_skip[k][0]], exons_col[:, exon_mult_exon_skip[k][2]]].T event.exons2_col = sp.c_[ exons_col[:, exon_mult_exon_skip[k][0]], exons_col[:, exon_mult_exon_skip[k][1]], exons_col[:, exon_mult_exon_skip[k][2]]].T event.gene_name = sp.array([gene.name]) event.gene_idx = gidx #event.transcript_type = sp.array([gene.transcript_type]) mult_exon_skip_pos[ridx, i].append(event) else: print '%s already exists' % fn_out_mes ### detect mutually exclusive exons from splicegraph if do_mutex_exons: if not os.path.exists(fn_out_mex): idx_mutex_exons, exon_mutex_exons = detect_events( genes, 'mutex_exons', sp.where([x.is_alt for x in genes])[0], CFG) if len(idx_mutex_exons) > 0: for k in range(len(exon_mutex_exons)): gene = genes[idx_mutex_exons[k]] ### perform liftover between strains if necessary exons = gene.splicegraph.vertices if not 'reference_strain' in CFG: exons_col = exons exons_col_pos = exons else: exons_col = convert_strain_pos_intervals( gene.chr, gene.splicegraph.vertices.T, strain, CFG['reference_strain']).T exons_col_pos = convert_strain_pos( gene.chr, gene.splicegraph.vertices.T, strain, CFG['reference_strain']).T if exons_col.shape != exons_col_pos.shape: print 'skipping non-mappable mutex exons event' continue ### build data structure for mutually exclusive exons event = Event('mutex_exons', gene.chr, gene.strand) event.strain = sp.array([strain]) event.exons1 = sp.c_[ exons[:, exon_mutex_exons[k][0]], exons[:, exon_mutex_exons[k][1]], exons[:, exon_mutex_exons[k][3]]].T event.exons2 = sp.c_[ exons[:, exon_mutex_exons[k][0]], exons[:, exon_mutex_exons[k][2]], exons[:, exon_mutex_exons[k][3]]].T event.exons1_col = sp.c_[ exons_col[:, exon_mutex_exons[k][0]], exons_col[:, exon_mutex_exons[k][1]], exons_col[:, exon_mutex_exons[k][3]]].T event.exons2_col = sp.c_[ exons_col[:, exon_mutex_exons[k][0]], exons_col[:, exon_mutex_exons[k][2]], exons_col[:, exon_mutex_exons[k][3]]].T event.gene_name = sp.array([gene.name]) event.gene_idx = idx_mutex_exons[k] #event.transcript_type = sp.array([gene.transcript_type]) mutex_exons_pos[ridx, i].append(event) else: print '%s already exists' % fn_out_mex ### genes file does not exist else: print 'result file not found: %s' % genes_fnames ### combine events for all samples for ridx in CFG['replicate_idxs']: ################################################% ### COMBINE INTRON RETENTIONS ################################################% if do_intron_retention: if not os.path.exists(fn_out_ir): intron_reten_pos_all = sp.array([ item for sublist in intron_reten_pos[ridx, :] for item in sublist ]) ### post process event structure by sorting and making events unique events_all = post_process_event_struct(intron_reten_pos_all, CFG) ### store intron retentions print 'saving intron retentions to %s' % fn_out_ir cPickle.dump(events_all, open(fn_out_ir, 'w'), -1) else: print '%s already exists' % fn_out_ir ################################################% ### COMBINE EXON SKIPS ################################################% if do_exon_skip: if not os.path.exists(fn_out_es): exon_skip_pos_all = sp.array([ item for sublist in exon_skip_pos[ridx, :] for item in sublist ]) ### post process event structure by sorting and making events unique events_all = post_process_event_struct(exon_skip_pos_all, CFG) ### store exon skip events print 'saving exon skips to %s' % fn_out_es cPickle.dump(events_all, open(fn_out_es, 'w'), -1) else: print '%s already exists' % fn_out_es ################################################% ### COMBINE MULTIPLE EXON SKIPS ################################################% if do_mult_exon_skip: if not os.path.exists(fn_out_mes): mult_exon_skip_pos_all = sp.array([ item for sublist in mult_exon_skip_pos[ridx, :] for item in sublist ]) ### post process event structure by sorting and making events unique events_all = post_process_event_struct(mult_exon_skip_pos_all, CFG) ### store multiple exon skip events print 'saving multiple exon skips to %s' % fn_out_mes cPickle.dump(events_all, open(fn_out_mes, 'w'), -1) else: print '%s already exists' % fn_out_mes ################################################% ### COMBINE ALT FIVE PRIME EVENTS ################################################% if do_alt_5prime: if not os.path.exists(fn_out_a5): alt_end_5prime_pos_all = sp.array([ item for sublist in alt_end_5prime_pos[ridx, :] for item in sublist ]) ### post process event structure by sorting and making events unique events_all = post_process_event_struct(alt_end_5prime_pos_all, CFG) ### curate alt prime events ### cut to min len, if alt exon lengths differ ### remove, if alt exons do not overlap if CFG['curate_alt_prime_events']: events_all = curate_alt_prime(events_all, CFG) ### store alt 5 prime events print 'saving alt 5 prime events to %s' % fn_out_a5 cPickle.dump(events_all, open(fn_out_a5, 'w'), -1) else: print '%s already exists' % fn_out_a5 ################################################% ### COMBINE ALT THREE PRIME EVENTS ################################################% if do_alt_3prime: if not os.path.exists(fn_out_a3): alt_end_3prime_pos_all = sp.array([ item for sublist in alt_end_3prime_pos[ridx, :] for item in sublist ]) ### post process event structure by sorting and making events unique events_all = post_process_event_struct(alt_end_3prime_pos_all, CFG) ### curate alt prime events ### cut to min len, if alt exon lengths differ ### remove, if alt exons do not overlap if CFG['curate_alt_prime_events']: events_all = curate_alt_prime(events_all, CFG) ### store alt 3 prime events print 'saving alt 3 prime events to %s' % fn_out_a3 cPickle.dump(events_all, open(fn_out_a3, 'w'), -1) else: print '%s already exists' % fn_out_a3 ################################################% ### COMBINE MUTUALLY EXCLUSIVE EXONS ################################################% if do_mutex_exons: if not os.path.exists(fn_out_mex): mutex_exons_pos_all = sp.array([ item for sublist in mutex_exons_pos[ridx, :] for item in sublist ]) ### post process event structure by sorting and making events unique events_all = post_process_event_struct(mutex_exons_pos_all, CFG) ### store multiple exon skip events print 'saving mutually exclusive exons to %s' % fn_out_mex cPickle.dump(events_all, open(fn_out_mex, 'w'), -1) else: print '%s already exists' % fn_out_mex
def main(): ### get command line options options = parse_options(sys.argv) ### parse parameters from options object CFG = settings.parse_args(options, identity='test') ### generate output directory outdir = os.path.join(options.outdir, 'testing') if options.timestamp == 'y': outdir = '%s_%s' % (outdir, str(datetime.datetime.now()).replace( ' ', '_')) if options.labelA != 'condA' and options.labelB != 'condB': outdir = '%s_%s_vs_%s' % (outdir, options.labelA, options.labelB) if not os.path.exists(outdir): os.makedirs(outdir) if CFG['debug']: print "Generating simulated dataset" npr.seed(23) CFG['is_matlab'] = False #cov = npr.permutation(20000-20).astype('float').reshape(999, 20) #cov = sp.r_[cov, sp.c_[sp.ones((1, 10)) *10, sp.ones((1, 10)) * 500000] + npr.normal(10, 1, 20)] #sf = sp.ones((cov.shape[1], ), dtype='float') setsize = 50 ### diff event counts cov = sp.zeros((500, 2 * setsize), dtype='int') for i in range(10): cov[i, :setsize] = nbinom.rvs(30, 0.8, size=setsize) cov[i, setsize:] = nbinom.rvs(10, 0.8, size=setsize) for i in range(10, cov.shape[0]): cov[i, :] = nbinom.rvs(30, 0.8, size=2 * setsize) ### diff gene expression cov2 = sp.zeros((500, 2 * setsize), dtype='int') for i in range(20): cov2[i, :setsize] = nbinom.rvs(2000, 0.2, size=setsize) cov2[i, setsize:] = nbinom.rvs(2000, 0.3, size=setsize) for i in range(20, cov2.shape[0]): cov2[i, :] = nbinom.rvs(2000, 0.3, size=2 * setsize) cov = sp.c_[cov, cov2] * 10000 tidx = sp.arange(setsize) sf = npr.uniform(0, 5, 2 * setsize) sf = sp.r_[sf, sf] #dmatrix0 = sp.ones((cov.shape[1], 3), dtype='bool') dmatrix1 = sp.zeros((cov.shape[1], 4), dtype='float') dmatrix1[:, 0] = 1 dmatrix1[tidx, 1] = 1 #dmatrix1[tidx, 2] = 1 dmatrix1[tidx + (2 * setsize), 2] = 1 dmatrix1[(2 * setsize):, 3] = 1 #dmatrix1[:, 4] = sp.log(sf) dmatrix0 = dmatrix1[:, [0, 2, 3]] cov = cov * sf #sf = sp.ones((cov.shape[1], ), dtype='float') pvals = run_testing(cov, dmatrix0, dmatrix1, sf, CFG) pvals_adj = adj_pval(pvals, CFG) pdb.set_trace() else: val_tag = '' if CFG['validate_splicegraphs']: val_tag = '.validated' if CFG['is_matlab']: CFG['fname_genes'] = os.path.join( CFG['out_dirname'], 'spladder', 'genes_graph_conf%i.%s%s.mat' % (CFG['confidence_level'], CFG['merge_strategy'], val_tag)) CFG['fname_count_in'] = os.path.join( CFG['out_dirname'], 'spladder', 'genes_graph_conf%i.%s%s.count.mat' % (CFG['confidence_level'], CFG['merge_strategy'], val_tag)) else: CFG['fname_genes'] = os.path.join( CFG['out_dirname'], 'spladder', 'genes_graph_conf%i.%s%s.pickle' % (CFG['confidence_level'], CFG['merge_strategy'], val_tag)) CFG['fname_count_in'] = os.path.join( CFG['out_dirname'], 'spladder', 'genes_graph_conf%i.%s%s.count.pickle' % (CFG['confidence_level'], CFG['merge_strategy'], val_tag)) condition_strains = None CFG['fname_exp_hdf5'] = os.path.join( CFG['out_dirname'], 'spladder', 'genes_graph_conf%i.%s%s.gene_exp.hdf5' % (CFG['confidence_level'], CFG['merge_strategy'], val_tag)) if os.path.exists(CFG['fname_exp_hdf5']): if CFG['verbose']: print 'Loading expression counts from %s' % CFG[ 'fname_exp_hdf5'] IN = h5py.File(CFG['fname_exp_hdf5'], 'r') gene_counts = IN['raw_count'][:] gene_strains = IN['strains'][:] gene_ids = IN['genes'][:] IN.close() else: if options.subset_samples == 'y': condition_strains = sp.unique( sp.r_[sp.array(CFG['conditionA']), sp.array(CFG['conditionB'])]) CFG['fname_exp_hdf5'] = os.path.join( CFG['out_dirname'], 'spladder', 'genes_graph_conf%i.%s%s.gene_exp.%i.hdf5' % (CFG['confidence_level'], CFG['merge_strategy'], val_tag, hash(tuple(sp.unique(condition_strains))) * -1)) if os.path.exists(CFG['fname_exp_hdf5']): if CFG['verbose']: print 'Loading expression counts from %s' % CFG[ 'fname_exp_hdf5'] IN = h5py.File(CFG['fname_exp_hdf5'], 'r') gene_counts = IN['raw_count'][:] gene_strains = IN['strains'][:] gene_ids = IN['genes'][:] IN.close() else: gene_counts, gene_strains, gene_ids = get_gene_expression( CFG, fn_out=CFG['fname_exp_hdf5'], strain_subset=condition_strains) gene_strains = sp.array( [x.split(':')[1] if ':' in x else x for x in gene_strains]) ### estimate size factors for library size normalization sf = get_size_factors(gene_counts, CFG) ### get index of samples for difftest idx1 = sp.where(sp.in1d(gene_strains, CFG['conditionA']))[0] idx2 = sp.where(sp.in1d(gene_strains, CFG['conditionB']))[0] ### for TESTING #setsize = 100 #idx1 = sp.arange(0, setsize / 2) #idx2 = sp.arange(setsize / 2, setsize) ### subset expression counts to tested samples gene_counts = gene_counts[:, sp.r_[idx1, idx2]] sf = sf[sp.r_[idx1, idx2]] sf = sp.r_[sf, sf] ### test each event type individually for event_type in CFG['event_types']: if CFG['verbose']: print 'Testing %s events' % event_type CFG['fname_events'] = os.path.join( CFG['out_dirname'], 'merge_graphs_%s_C%i.counts.hdf5' % (event_type, CFG['confidence_level'])) ### quantify events (cov, gene_idx, event_idx, event_strains) = quantify.quantify_from_counted_events( CFG['fname_events'], sp.r_[idx1, idx2], event_type, CFG) assert (sp.all(gene_strains == event_strains)) ### map gene expression to event order curr_gene_counts = gene_counts[gene_idx, :] ### filter for min expression if event_type == 'intron_retention': k_idx = sp.where( (sp.mean(cov[0] == 0, axis=1) < CFG['max_0_frac']) | (sp.mean(cov[1] == 0, axis=1) < CFG['max_0_frac']))[0] else: k_idx = sp.where( ((sp.mean(cov[0] == 0, axis=1) < CFG['max_0_frac']) | (sp.mean(cov[1] == 0, axis=1) < CFG['max_0_frac'])) & (sp.mean(sp.c_[cov[0][:, :idx1.shape[0]], cov[1][:, :idx1.shape[0]]] == 0, axis=1) < CFG['max_0_frac']) & (sp.mean(sp.c_[cov[0][:, idx2.shape[0]:], cov[1][:, idx2.shape[0]:]] == 0, axis=1) < CFG['max_0_frac']))[0] if CFG['verbose']: print 'Exclude %i of %i %s events (%.2f percent) from testing due to low coverage' % ( cov[0].shape[0] - k_idx.shape[0], cov[0].shape[0], event_type, (1 - float(k_idx.shape[0]) / cov[0].shape[0]) * 100) if k_idx.shape[0] == 0: print 'All events of type %s were filtered out due to low coverage. Please try re-running with less stringent filter criteria' % event_type continue # k_idx = sp.where((sp.mean(sp.c_[cov[0], cov[1]], axis=1) > 2))[0] # k_idx = sp.where((sp.mean(cov[0], axis=1) > 2) & (sp.mean(cov[1], axis=1) > 2))[0] cov[0] = cov[0][k_idx, :] cov[1] = cov[1][k_idx, :] curr_gene_counts = curr_gene_counts[k_idx, :] event_idx = event_idx[k_idx] gene_idx = gene_idx[k_idx] cov[0] = sp.around(sp.hstack([cov[0], curr_gene_counts])) cov[1] = sp.around(sp.hstack([cov[1], curr_gene_counts])) cov = sp.vstack(cov) tidx = sp.arange(idx1.shape[0]) #if CFG['debug']: # for i in range(cov.shape[0]): # fig = plt.figure(figsize=(8, 6), dpi=100) # ax = fig.add_subplot(111) # ax.hist(cov[i, :] * sf, 50, histtype='bar', rwidth=0.8) # #ax.plot(sp.arange(cov.shape[1]), sorted(cov[i, :]), 'bo') # ax.set_title('Count Distribution - Sample %i' % i ) # plt.savefig('count_dist.%i.pdf' % i, format='pdf', bbox_inches='tight') # plt.close(fig) ### build design matrix for testing dmatrix1 = sp.zeros((cov.shape[1], 4), dtype='bool') dmatrix1[:, 0] = 1 # intercept dmatrix1[tidx, 1] = 1 # delta a dmatrix1[tidx, 2] = 1 # delta g dmatrix1[tidx + (idx1.shape[0] + idx2.shape[0]), 2] = 1 # delta g dmatrix1[(idx1.shape[0] + idx2.shape[0]):, 3] = 1 # is g dmatrix0 = dmatrix1[:, [0, 2, 3]] pvals = run_testing(cov, dmatrix0, dmatrix1, sf, CFG) pvals_adj = adj_pval(pvals, CFG) ### write output out_fname = os.path.join( outdir, 'test_results_C%i_%s.tsv' % (options.confidence, event_type)) if CFG['verbose']: print 'Writing test results to %s' % out_fname s_idx = sp.argsort(pvals_adj) header = sp.array(['event_id', 'gene', 'p_val', 'p_val_adj']) event_ids = sp.array( ['%s_%i' % (event_type, i + 1) for i in event_idx], dtype='str') if CFG['is_matlab']: data_out = sp.c_[event_ids[s_idx], gene_ids[gene_idx[s_idx], 0], pvals[s_idx].astype('str'), pvals_adj[s_idx].astype('str')] else: data_out = sp.c_[event_ids[s_idx], gene_ids[gene_idx[s_idx]], pvals[s_idx].astype('str'), pvals_adj[s_idx].astype('str')] data_out = sp.r_[header[sp.newaxis, :], data_out] sp.savetxt(out_fname, data_out, delimiter='\t', fmt='%s')
def get_gene_expression(CFG, fn_out=None, strain_subset=None): if CFG['verbose']: sys.stdout.write('Quantifying gene expression ...\n') ### load gene information if CFG['is_matlab']: genes = scio.loadmat(CFG['fname_genes'], struct_as_record=False)['genes'][0, :] numgenes = len(genes) else: genes = cPickle.load(open(CFG['fname_genes'], 'r'))[0] numgenes = genes.shape[0] ### open hdf5 file containing graph count information IN = h5py.File(CFG['fname_count_in'], 'r') strains = IN['strains'][:].astype('str') if strain_subset is None: strain_idx = sp.arange(strains.shape[0]) else: strain_idx = sp.where(sp.in1d(strains, strain_subset))[0] gene_counts = sp.zeros((numgenes, strain_idx.shape[0]), dtype='float') gene_names = sp.array([x.name for x in genes], dtype='str') if CFG['is_matlab']: seg_lens = IN['seg_len'][:, 0] gene_ids_segs = IN['gene_ids_segs'][0, :].astype('int') - 1 else: seg_lens = IN['seg_len'][:] gene_ids_segs = IN['gene_ids_segs'][:].astype('int') ### no longer assume that the gene_ids_segs are sorted by gene ID s_idx = sp.argsort(gene_ids_segs[:, 0], kind='mergesort') _, u_idx = sp.unique(gene_ids_segs[s_idx, 0], return_index=True) s_idx = s_idx[u_idx] ### iterate over genes #seg_offset = 0 #tut = sp.where(gene_names == 'ENSG00000163812.9')[0] #for gidx in tut: for gidx, iidx in enumerate(s_idx): if CFG['verbose']: log_progress(gidx, numgenes, 100) ### get idx of non alternative segments if CFG['is_matlab']: #non_alt_idx = get_non_alt_seg_ids_matlab(genes[gidx]) #seg_idx = sp.arange(seg_offset, seg_offset + genes[gidx].segmentgraph[0, 2].shape[0]) seg_idx = sp.arange(iidx, iidx + genes[gidx].segmentgraph[0, 2].shape[0]) if len(seg_idx) == 0: continue else: #non_alt_idx = genes[gidx].get_non_alt_seg_ids() #seg_idx = sp.arange(seg_offset, seg_offset + genes[gidx].segmentgraph.seg_edges.shape[0]) seg_idx = sp.arange( iidx, iidx + genes[gidx].segmentgraph.seg_edges.shape[0]) gene_idx = gene_ids_segs[seg_idx] if len(gene_idx.shape) > 0: gene_idx = gene_idx[0] if CFG['is_matlab']: assert (IN['gene_names'][gene_idx] == genes[gidx].name) else: assert (IN['gene_names'][:][gene_idx] == genes[gidx].name) assert (genes[gidx].name == gene_names[gidx]) #seg_idx = seg_idx[non_alt_idx] ### compute gene expression as the read count over all non alternative segments if CFG['is_matlab']: #gene_counts[gidx, :] = sp.dot(IN['segments'][:, seg_idx], IN['seg_len'][seg_idx, 0]) / sp.sum(IN['seg_len'][seg_idx, 0]) gene_counts[gidx, :] = sp.dot( IN['segments'][:, seg_idx][strain_idx], seg_lens[seg_idx]) / CFG['read_length'] #seg_offset += genes[gidx].segmentgraph[0, 2].shape[0] else: #gene_counts[gidx, :] = sp.dot(IN['segments'][seg_idx, :].T, IN['seg_len'][:][seg_idx]) / sp.sum(IN['seg_len'][:][seg_idx]) if seg_idx.shape[0] > 1: gene_counts[gidx, :] = sp.dot( IN['segments'][seg_idx, :][:, strain_idx].T, seg_lens[seg_idx, 0]) / CFG['read_length'] else: gene_counts[gidx, :] = IN['segments'][ seg_idx, :][strain_idx] * seg_lens[seg_idx, 0] / CFG['read_length'] #seg_offset += genes[gidx].segmentgraph.seg_edges.shape[0] IN.close() if CFG['verbose']: sys.stdout.write('\n... done.\n') ### write results to hdf5 if fn_out is not None: OUT = h5py.File(fn_out, 'w') OUT.create_dataset(name='strains', data=strains[strain_idx]) OUT.create_dataset(name='genes', data=gene_names) OUT.create_dataset(name='raw_count', data=gene_counts, compression="gzip") OUT.close() return (gene_counts, strains, gene_names)
### event id fin_data.append(full_data[:, 0]) ### event pos fin_data.append(sp.array([x[1] + '-' + ':'.join(x[2:8]) for x in full_data])) ### strand fin_data.append(full_data[:, 11]) ### ensemble id fin_data.append(full_data[:, 8]) ### gene name fin_data.append(full_data[:, 9]) ### max dPSI fin_data.append(full_data[:, 10]) ### coding status fin_data.append(full_data[:, 12]) ### overlapping SNVs snvs = [] for i in range(full_data.shape[0]): tmp = [] for j in sp.where(snv_data[:, 1] == full_data[i, 0])[0]: tmp.append(snv_data[j, 0]) if len(tmp) > 0: snvs.append(','.join(tmp)) else: snvs.append('NA') fin_data.append(sp.array(snvs)) ### gen header header = sp.array(['event_id', 'event_pos', 'strand', 'ensemble_id', 'gene_name', 'max_dpsi', 'coding_status', 'overlap_snv']) fin_data = sp.r_[header[sp.newaxis, :], sp.array(fin_data).T] sp.savetxt(os.path.join(basedir, 'supplemental_table_exonization_candidates.tsv'), fin_data, fmt='%s', delimiter='\t')
def adjust_dispersion(counts, dmatrix1, disp_raw, disp_fitted, idx, sf, CFG): if CFG['verbose']: print 'Start to estimate adjusted dispersions.' varLogDispSamp = polygamma( 1, (dmatrix1.shape[0] - dmatrix1.shape[1]) / 2) ## number of samples - number of coefficients varPrior = calculate_varPrior(disp_raw, disp_fitted, idx, varLogDispSamp) if CFG['parallel'] > 1: disp_adj = sp.empty((counts.shape[0], 1)) disp_adj.fill(sp.nan) disp_adj_conv = sp.zeros_like(disp_adj, dtype='bool') pool = mp.Pool(processes=CFG['parallel'], initializer=lambda: sig.signal(sig.SIGINT, sig.SIG_IGN)) binsize = 30 idx_chunks = [ sp.arange(x, min(x + binsize, counts.shape[0])) for x in range(0, counts.shape[0], binsize) ] try: result = [ pool.apply_async(adjust_dispersion_chunk, args=( counts[cidx, :], dmatrix1, disp_raw[cidx], disp_fitted[cidx], varPrior, sf, CFG, cidx, )) for cidx in idx_chunks ] res_cnt = 0 while result: tmp = result.pop(0).get() for i, j in enumerate(tmp[2]): if CFG['verbose']: log_progress(res_cnt, counts.shape[0]) res_cnt += 1 disp_adj[j] = tmp[0][i] disp_adj_conv[j] = tmp[1][i] if CFG['verbose']: log_progress(counts.shape[0], counts.shape[0]) print '' pool.terminate() pool.join() except KeyboardInterrupt: print >> sys.stderr, 'Keyboard Interrupt - exiting' pool.terminate() pool.join() sys.exit(1) else: (disp_adj, disp_adj_conv, _) = adjust_dispersion_chunk(counts, dmatrix1, disp_raw, disp_fitted, varPrior, sf, CFG, sp.arange(counts.shape[0]), log=CFG['verbose']) if CFG['debug']: fig = plt.figure(figsize=(8, 6), dpi=100) ax = fig.add_subplot(111) idx = sp.where(~sp.isnan(disp_adj))[0] ax.plot( sp.mean(sp.log10(counts + 1), axis=1)[idx], disp_adj[idx], 'bo') ax.set_title('Adjusted Dispersion Estimate') ax.set_xlabel('Mean expression count') ax.set_ylabel('Dispersion') plt.savefig('dispersion_adjusted.pdf', format='pdf', bbox_inches='tight') plt.close(fig) return (disp_adj, disp_adj_conv)
def doskysub(straight, ylen, xlen, sci, yback, sky2x, sky2y, ccd2wave, disp, mswave, offsets, cutoff, airmass): sci = sci.copy() # If cutoff is not a float, we are using the blueside locutoff = cutoff hicutoff = 10400. nsci = sci.shape[0] width = sci.shape[2] # Perform telluric correction coords = spectools.array_coords(sci[0].shape) x = coords[1].flatten() y = coords[0].flatten() for k in range(nsci): w = genfunc(x, y, ccd2wave[k]) telluric = correct_telluric.correct(w, airmass[k], disp) sci[k] *= telluric.reshape(sci[k].shape) del coords, x, y, telluric # Create arrays for output images outcoords = spectools.array_coords((ylen, xlen)) outcoords[1] *= disp outcoords[1] += mswave - disp * xlen / 2. xout = outcoords[1].flatten() yout = outcoords[0].flatten() out = scipy.zeros((nsci, ylen, xlen)) fudge = scipy.ceil(abs(offsets).max()) bgimage = scipy.zeros((nsci, ylen + fudge, xlen)) varimage = bgimage.copy() bgcoords = spectools.array_coords((ylen + fudge, xlen)) bgcoords[1] *= disp bgcoords[1] += mswave - disp * xlen / 2. # # Cosmic Ray Rejection and Background Subtraction # yfit = yback.flatten() ycond = (yfit > straight - 0.4) & (yfit < straight + ylen - 0.6) coords = spectools.array_coords(yback.shape) xvals = coords[1].flatten() yvals = coords[0].flatten() ap_y = scipy.zeros(0) aper = scipy.zeros(0) for k in range(nsci): xfit = genfunc(xvals, yfit - straight, ccd2wave[k]) zfit = sci[k].flatten() x = xfit[ycond] y = yfit[ycond] z = zfit[ycond] # The plus/minus 20 provides a better solution for the edges wavecond = (x > locutoff - 20.) & (x < hicutoff + 20.) x = x[wavecond] y = y[wavecond] z = z[wavecond] # If only resampling... if RESAMPLE == 1: coords = outcoords.copy() samp_x = genfunc(xout, yout, sky2x[k]) samp_y = genfunc(xout, yout, sky2y[k]) coords[0] = samp_y.reshape(coords[0].shape) coords[1] = samp_x.reshape(coords[1].shape) out[k] = scipy.ndimage.map_coordinates(sci[k], coords, output=scipy.float64, order=5, cval=-32768, prefilter=False) out[k][xout.reshape(coords[1].shape) < locutoff] = scipy.nan out[k][xout.reshape(coords[1].shape) > hicutoff] = scipy.nan out[k][out[k] == -32768] = scipy.nan continue bgfit = skysub.skysub(x, y, z, disp) background = zfit.copy() for indx in range(background.size): x0 = xfit[indx] y0 = yfit[indx] if x0 < locutoff - 10 or x0 > hicutoff + 10: background[indx] = scipy.nan else: background[indx] = interpolate.bisplev(x0, y0, bgfit) sub = zfit - background sub[scipy.isnan(sub)] = 0. sky = sub * 0. sky[ycond] = sub[ycond] sky = sky.reshape(sci[k].shape) sub = sky.copy() background[scipy.isnan(background)] = 0. # Note that 2d filtering may flag very sharp source traces! sub = sub.reshape(sci[k].shape) sky = ndimage.median_filter(sky, 5) diff = sub - sky model = scipy.sqrt(background.reshape(sci[k].shape) + sky) crmask = scipy.where(diff > 4. * model, diff, 0.) sub -= crmask sci[k] -= crmask # Create straightened slit coords = outcoords.copy() samp_x = genfunc(xout, yout, sky2x[k]) samp_y = genfunc(xout, yout, sky2y[k]) coords[0] = samp_y.reshape(coords[0].shape) coords[1] = samp_x.reshape(coords[1].shape) out[k] = scipy.ndimage.map_coordinates(sci[k], coords, output=scipy.float64, order=5, cval=magicnum, prefilter=False) out[k][xout.reshape(coords[1].shape) < locutoff] = scipy.nan out[k][xout.reshape(coords[1].shape) > hicutoff] = scipy.nan out[k][out[k] == magicnum] = scipy.nan # Output bgsub image coords = bgcoords.copy() bgy = bgcoords[0].flatten() + offsets[k] bgx = bgcoords[1].flatten() samp_x = genfunc(bgx, bgy, sky2x[k]) samp_y = genfunc(bgx, bgy, sky2y[k]) coords[0] = samp_y.reshape(coords[0].shape) coords[1] = samp_x.reshape(coords[1].shape) varimage[k] = scipy.ndimage.map_coordinates(sci[k], coords, output=scipy.float64, order=5, cval=magicnum, prefilter=False) # Only include good data (ie positive variance, wavelength # greater than dichroic cutoff) cond = (bgcoords[0] + offsets[k] < 0.) | (bgcoords[0] + offsets[k] > ylen) cond = (varimage[k] <= 0) | cond cond = (bgcoords[1] < locutoff) | (bgcoords[1] > hicutoff) | cond varimage[k][cond] = scipy.nan bgimage[k] = scipy.ndimage.map_coordinates(sub, coords, output=scipy.float64, order=5, cval=magicnum, prefilter=False) bgimage[k][cond] = scipy.nan bgimage[k][bgimage[k] == magicnum] = scipy.nan # Shouldn't be # necessary... if RESAMPLE == 1: return out, bgimage, varimage bgimage = fastmed(bgimage) varimage = fastmed(varimage) / nsci return out, bgimage, varimage
def __init__(self, inRaster, inVector, inField='Class', outModel=None, inSplit=1, inSeed=0, outMatrix=None, inClassifier='GMM'): learningProgress = progressBar('Learning model...', 6) # Convert vector to raster try: try: temp_folder = tempfile.mkdtemp() filename = os.path.join(temp_folder, 'temp.tif') data = gdal.Open(inRaster, gdal.GA_ReadOnly) shp = ogr.Open(inVector) lyr = shp.GetLayer() except: QgsMessageLog.logMessage( "Problem with making tempfile or opening raster or vector") # Create temporary data set try: driver = gdal.GetDriverByName('GTiff') dst_ds = driver.Create(filename, data.RasterXSize, data.RasterYSize, 1, gdal.GDT_Byte) dst_ds.SetGeoTransform(data.GetGeoTransform()) dst_ds.SetProjection(data.GetProjection()) OPTIONS = 'ATTRIBUTE=' + inField gdal.RasterizeLayer(dst_ds, [1], lyr, None, options=[OPTIONS]) data, dst_ds, shp, lyr = None, None, None, None except: QgsMessageLog.logMessage("Cannot create temporary data set") # Load Training set try: X, Y = dataraster.get_samples_from_roi(inRaster, filename) except: QgsMessageLog.logMessage( "Problem while getting samples from ROI with" + inRaster) QgsMessageLog.logMessage( "Are you sure to have only integer values in your " + str(inField) + " column ?") [n, d] = X.shape C = int(Y.max()) SPLIT = inSplit os.remove(filename) os.rmdir(temp_folder) # Scale the data X, M, m = self.scale(X) learningProgress.addStep() # Add Step to ProgressBar # Learning process take split of groundthruth pixels for training and the remaining for testing try: if SPLIT < 1: # Random selection of the sample x = sp.array([]).reshape(0, d) y = sp.array([]).reshape(0, 1) xt = sp.array([]).reshape(0, d) yt = sp.array([]).reshape(0, 1) sp.random.seed(inSeed) # Set the random generator state for i in range(C): t = sp.where((i + 1) == Y)[0] nc = t.size ns = int(nc * SPLIT) rp = sp.random.permutation(nc) x = sp.concatenate((X[t[rp[0:ns]], :], x)) xt = sp.concatenate((X[t[rp[ns:]], :], xt)) y = sp.concatenate((Y[t[rp[0:ns]]], y)) yt = sp.concatenate((Y[t[rp[ns:]]], yt)) else: x, y = X, Y except: QgsMessageLog.logMessage("Problem while learning if SPLIT <1") learningProgress.addStep() # Add Step to ProgressBar # Train Classifier if inClassifier == 'GMM': try: # tau=10.0**sp.arange(-8,8,0.5) model = gmmr.GMMR() model.learn(x, y) # htau,err = model.cross_validation(x,y,tau) # model.tau = htau except: QgsMessageLog.logMessage("Cannot train with GMMM") else: try: from sklearn import neighbors from sklearn.svm import SVC from sklearn.ensemble import RandomForestClassifier try: model_selection = True from sklearn.model_selection import StratifiedKFold from sklearn.model_selection import GridSearchCV except: model_selection = False from sklearn.cross_validation import StratifiedKFold from sklearn.grid_search import GridSearchCV try: # AS Qgis in Windows doensn't manage multiprocessing, force to use 1 thread for not linux system if os.name == 'posix': n_jobs = -1 else: n_jobs = 1 # if inClassifier == 'RF': param_grid_rf = dict(n_estimators=3**sp.arange( 1, 5), max_features=sp.arange(1, 4)) y.shape = (y.size, ) if model_selection: cv = StratifiedKFold(n_splits=3).split(x, y) #cv = cv.get_n_splits(y) else: cv = StratifiedKFold(y, n_folds=3) grid = GridSearchCV(RandomForestClassifier(), param_grid=param_grid_rf, cv=cv, n_jobs=n_jobs) grid.fit(x, y) model = grid.best_estimator_ model.fit(x, y) elif inClassifier == 'SVM': param_grid_svm = dict(gamma=2.0**sp.arange(-4, 4), C=10.0**sp.arange(-2, 5)) y.shape = (y.size, ) if model_selection: cv = StratifiedKFold(n_splits=5).split(x, y) else: cv = StratifiedKFold(y, n_folds=5) grid = GridSearchCV(SVC(), param_grid=param_grid_svm, cv=cv, n_jobs=n_jobs) grid.fit(x, y) model = grid.best_estimator_ model.fit(x, y) elif inClassifier == 'KNN': param_grid_knn = dict( n_neighbors=sp.arange(1, 20, 4)) y.shape = (y.size, ) if model_selection: cv = StratifiedKFold(n_splits=3).split(x, y) else: cv = StratifiedKFold(y, n_folds=3) grid = GridSearchCV( neighbors.KNeighborsClassifier(), param_grid=param_grid_knn, cv=cv, n_jobs=n_jobs) grid.fit(x, y) model = grid.best_estimator_ model.fit(x, y) except: QgsMessageLog.logMessage( "Cannot train with classifier " + inClassifier) except: QgsMessageLog.logMessage( "You must have sklearn dependencies on your computer. Please consult the documentation for installation." ) learningProgress.prgBar.setValue(5) # Add Step to ProgressBar # Assess the quality of the model if SPLIT < 1: # if inClassifier == 'GMM': # = model.predict(xt)[0] # else: yp = model.predict(xt) CONF = ai.CONFUSION_MATRIX() CONF.compute_confusion_matrix(yp, yt) sp.savetxt(outMatrix, CONF.confusion_matrix, delimiter=',', fmt='%1.4d') # Save Tree model if outModel is not None: output = open(outModel, 'wb') pickle.dump([model, M, m], output) output.close() learningProgress.addStep() # Add Step to ProgressBar # Close progressBar learningProgress.reset() learningProgress = None except: learningProgress.reset()
def skysub(x, y, z, scale): """ skysub(x,y,z,scale) Routine to determine the 2d background from data. (x,y) are the coordinates of the data, usually in the *corrected* frame. Inputs: x - 1d array describing x-coordinate, usually wavelength y - 1d array describing y-coordinate, usually corrected spatial position z - data each position (x,y) scale - approximate output scale (for knot placement). It is not, in general, possible to calculate this from x because the input coordinates are not on a regular grid. Outputs: 2d spline model of the background """ cond = (scipy.isfinite(z)) & (z > 0.) x = x[cond] y = y[cond] z = z[cond] x0 = x.copy() y0 = y.copy() z0 = z.copy() height = int(y.max() - y.min()) width = int(x.max() - x.min()) npoints = x.size midpt = y.mean() """ Very wide slits need special attention. Here we fit a first order correction to the slit and subtract it away before doing the high pixel rejection (the problem is if there is a small gradient across a wide slit, the top and bottom pixels may differ significantly, but these pixels may be close in *wavelength* and so locally (on the CCD) low pixels will be rejected in the smoothing """ if height > WIDE: zbak = z.copy() args = y.argsort() revargs = args.argsort() ymodel = ndimage.percentile_filter(z[args], 30., size=height)[revargs] fit = special_functions.lsqfit(ymodel, 'polynomial', 1) if fit['coeff'][1] * float(ymodel.size) / fit['coeff'][0] < 0.05: pass else: ymodel = special_functions.genfunc(scipy.arange(ymodel.size), 0, fit) ymodel -= ymodel.mean() z -= ymodel # Filter locally (in wavelength space) high points args = x.argsort() revargs = args.argsort() smooth = ndimage.percentile_filter(z[args], 35., size=height)[revargs] diff = z - smooth # We assume poisson statistics.... var = scipy.sqrt(scipy.fabs(z)) sigma = diff / var args = y.argsort() revargs = args.argsort() t = ndimage.median_filter(sigma[args], 9) t = ndimage.gaussian_filter(t, width) #[revargs] # Source detection/rejection # Reject yvalues > 1. sigma, and weight remaining pixels w = (1.0 - t) / abs(z[args]) if AGGRESSIVE: g = scipy.where(w <= 0, 0, 1) g = ndimage.maximum_filter(g, width * 3) g = ndimage.minimum_filter(g, width * 7) s = sigma[args].copy() b = ndimage.minimum_filter(g, width * 5) xi = scipy.arange(t.size) fitdata = scipy.empty((xi[g == 1].size, 2)) fitdata[:, 0] = xi[g == 1].copy() fitdata[:, 1] = t[g == 1].copy() fit = special_functions.lsqfit(fitdata, 'polynomial', 3) fit = special_functions.genfunc(xi, 0., fit) diff = (t - fit)[b == 1] s = diff.std() while (abs(t - fit)[(g == 1) & (b == 0)] > 2.5 * s).any(): g = b.copy() b = ndimage.minimum_filter(g, width * 5) fitdata = scipy.empty((xi[g == 1].size, 2)) fitdata[:, 0] = xi[g == 1].copy() fitdata[:, 1] = t[g == 1].copy() fit = special_functions.lsqfit(fitdata, 'polynomial', 3) fit = special_functions.genfunc(xi, 0., fit) diff = (t - fit)[b == 1] s = diff.std() w *= g skycond = ((w > 0.) & (z > 0)) x = x[skycond] y = y[skycond] z = z[skycond] # Reject residual high pixels (and very low pixels too!) args = x.argsort() revargs = args.argsort() smooth = ndimage.median_filter(z[args], height / 4.)[revargs] diff = z - smooth var = scipy.sqrt(smooth) cond = abs(diff) < 4. * var x = x[cond] y = y[cond] z = z[cond] kx = 3 ky = 1 # If the slit is long, return to original data and increase the order # of the y-fit. if height > WIDE: z = zbak[skycond] z = z[cond].astype(scipy.float64) # if height>WIDE*1.5: # ky = 3 cond = z > 0. x = x[cond] y = y[cond] z = z[cond] w = 1. / z if x.size < 5. * width: kx = 1 ky = 1 # Create knots... innertx = scipy.arange(x.min() + scale / 2., x.max() - scale / 2., 3. * scale / 4.) """ tx = scipy.zeros(innertx.size+kx*2+2) tx[0:kx+1] = x.min() tx[kx+1:innertx.size+kx+1] = innertx.copy() tx[innertx.size+kx+1:] = x.max() """ tx = scipy.linspace(x.min(), x.max(), innertx.size) xsort = scipy.sort(x) tmp = [x.min()] num = [] cnt = 0 j = 1 for i in range(xsort.size): while xsort[i] > tx[j]: if cnt > 0: if len(num) == 0 or cnt > 1 or num[-1] > 1: tmp.append(tx[j]) num.append(cnt) cnt = 0 j += 1 cnt += 1 tmp.append(x.max()) tx = scipy.asarray(tmp) ty = scipy.zeros(ky * 2 + 2) ty[0:ky + 1] = y.min() ty[ky + 1:] = y.max() #del innertx # ...and fit. bgfit = interpolate.bisplrep(x, y, z, w, tx=tx, ty=ty, kx=kx, ky=ky, task=-1, nxest=tx.size, nyest=ty.size) del x, y, z, w, tx, ty return bgfit
def biased_pagerank(G, num, alpha=0.05, personalization=None, max_iter=100, tol=1.0e-6, weight='weight', dangling=None): ''' Parameters ---------- G : graph A NetworkX graph. Undirected graphs will be converted to a directed graph with two directed edges for each undirected edge. num : integer Label number of a seed-node. alpha : float, optional Damping parameter for biased PageRank, default=0.05. personalization: dict, optional The "personalization vector" consisting of a dictionary with a key for every graph node and nonzero personalization value for each node. By default, a uniform distribution is used. max_iter : integer, optional Maximum number of iterations in power method eigenvalue solver. tol : float, optional Error tolerance used to check convergence in power method solver. weight : key, optional Edge data key to use as weight. If None weights are set to 1. dangling: dict, optional The outedges to be assigned to any "dangling" nodes, i.e., nodes without any outedges. The dict key is the node the outedge points to and the dict value is the weight of that outedge. By default, dangling nodes are given outedges according to the personalization vector (uniform if not specified). This must be selected to result in an irreducible transition matrix (see notes under google_matrix). It may be common to have the dangling dict to be the same as the personalization dict. Returns ------- pagerank : dictionary Dictionary of nodes with PageRank as value. ''' # Number of nodes N = len(G) if N == 0: return {} nodelist = G.nodes() # Adjacency matrix M = nx.to_scipy_sparse_matrix(G, nodelist=nodelist, weight=weight, dtype=float) S = scipy.array(M.sum(axis=1)).flatten() S[S != 0] = 1.0 / S[S != 0] Q = scipy.sparse.spdiags(S.T, 0, *M.shape, format='csr') M = Q * M # Initialize vector x = scipy.repeat(1.0 / N, N) # Personalization vector if personalization is None: p = scipy.repeat(1.0 / N, N) else: missing = set(nodelist) - set(personalization) if missing: raise NetworkXError('Personalization vector dictionary ' 'must have a value for every node. ' 'Missing nodes %s' % missing) p = scipy.array([personalization[n] for n in nodelist], dtype=float) p = p / p.sum() # Dangling nodes if dangling is None: dangling_weights = p else: missing = set(nodelist) - set(dangling) if missing: raise NetworkXError('Dangling node dictionary ' 'must have a value for every node. ' 'Missing nodes %s' % missing) # Convert the dangling dictionary into an array in nodelist order dangling_weights = scipy.array([dangling[n] for n in nodelist], dtype=float) dangling_weights /= dangling_weights.sum() is_dangling = scipy.where(S == 0)[0] # power iteration: make up to max_iter iterations for _ in range(max_iter): xlast = x x = alpha * (x * M + sum(x[is_dangling]) * dangling_weights) x[num] += np.float64((1 - alpha) * p[num]) # check convergence, l1 norm err = scipy.absolute(x - xlast).sum() if err < N * tol: print 'roop:', _ + 1 return dict(zip(nodelist, map(float, x))) raise NetworkXError('pagerank_scipy: power iteration failed to converge ' 'in %d iterations.' % max_iter)
def predict_image(self, inRaster, outRaster, model, inMask=None, confidenceMap=None, NODATA=-10000, SCALE=None, classifier='GMM'): """!@brief The function classify the whole raster image, using per block image analysis. The classifier is given in classifier and options in kwargs Input : inRaster : Filtered image name ('sample_filtered.tif',str) outRaster :Raster image name ('outputraster.tif',str) model : model file got from precedent step ('model', str) inMask : mask to confidenceMap : map of confidence per pixel NODATA : Default set to -10000 (int) SCALE : Default set to None classifier = Default 'GMM' Output : nothing but save a raster image and a confidence map if asked """ # Open Raster and get additionnal information raster = gdal.Open(inRaster, gdal.GA_ReadOnly) if raster is None: print 'Impossible to open ' + inRaster exit() if inMask is None: mask = None else: mask = gdal.Open(inMask, gdal.GA_ReadOnly) if mask is None: print 'Impossible to open ' + inMask exit() # Check size if (raster.RasterXSize != mask.RasterXSize) or ( raster.RasterYSize != mask.RasterYSize): print 'Image and mask should be of the same size' exit() if SCALE is not None: M, m = sp.asarray(SCALE[0]), sp.asarray(SCALE[1]) # Get the size of the image d = raster.RasterCount nc = raster.RasterXSize nl = raster.RasterYSize # Get the geoinformation GeoTransform = raster.GetGeoTransform() Projection = raster.GetProjection() # Get block size band = raster.GetRasterBand(1) block_sizes = band.GetBlockSize() x_block_size = block_sizes[0] y_block_size = block_sizes[1] del band ## Initialize the output driver = gdal.GetDriverByName('GTiff') dst_ds = driver.Create(outRaster, nc, nl, 1, gdal.GDT_Byte) dst_ds.SetGeoTransform(GeoTransform) dst_ds.SetProjection(Projection) out = dst_ds.GetRasterBand(1) if confidenceMap: dst_confidenceMap = driver.Create(confidenceMap, nc, nl, 1, gdal.GDT_Float32) dst_confidenceMap.SetGeoTransform(GeoTransform) dst_confidenceMap.SetProjection(Projection) out_confidenceMap = dst_confidenceMap.GetRasterBand(1) ## Perform the classification predictProgress = progressBar('Classifying image...', nl * y_block_size) for i in range(0, nl, y_block_size): predictProgress.addStep() if i + y_block_size < nl: # Check for size consistency in Y lines = y_block_size else: lines = nl - i for j in range(0, nc, x_block_size): # Check for size consistency in X if j + x_block_size < nc: cols = x_block_size else: cols = nc - j # Load the data and Do the prediction X = sp.empty((cols * lines, d)) for ind in xrange(d): X[:, ind] = raster.GetRasterBand(int(ind + 1)).ReadAsArray( j, i, cols, lines).reshape(cols * lines) # Do the prediction if mask is None: mask_temp = raster.GetRasterBand(1).ReadAsArray( j, i, cols, lines).reshape(cols * lines) t = sp.where((mask_temp != 0) & (X[:, 0] != NODATA))[0] yp = sp.zeros((cols * lines, )) K = sp.zeros((cols * lines, )) else: mask_temp = mask.GetRasterBand(1).ReadAsArray( j, i, cols, lines).reshape(cols * lines) t = sp.where((mask_temp != 0) & (X[:, 0] != NODATA))[0] yp = sp.zeros((cols * lines, )) K = sp.zeros((cols * lines, )) # TODO: Change this part accorindgly ... if t.size > 0: if confidenceMap and classifier == 'GMM': yp[t], K[t] = model.predict( self.scale(X[t, :], M=M, m=m), None, confidenceMap) elif confidenceMap: yp[t] = model.predict(self.scale(X[t, :], M=M, m=m)) K[t] = sp.amax(model.predict_proba( self.scale(X[t, :], M=M, m=m)), axis=1) else: yp[t] = model.predict(self.scale(X[t, :], M=M, m=m)) #QgsMessageLog.logMessage('amax from predict proba is : '+str(sp.amax(model.predict.proba(self.scale(X[t,:],M=M,m=m)),axis=1))) # Write the data out.WriteArray(yp.reshape(lines, cols), j, i) out.FlushCache() if confidenceMap: out_confidenceMap.WriteArray(K.reshape(lines, cols), j, i) out_confidenceMap.FlushCache() del X, yp # Clean/Close variables predictProgress.reset() raster = None dst_ds = None return outRaster
def __call__(self, Xi, Xj, ni, nj, hyper_deriv=None, symmetric=False): """Evaluate the covariance between points `Xi` and `Xj` with derivative order `ni`, `nj`. Parameters ---------- Xi : :py:class:`Matrix` or other Array-like, (`M`, `D`) `M` inputs with dimension `D`. Xj : :py:class:`Matrix` or other Array-like, (`M`, `D`) `M` inputs with dimension `D`. ni : :py:class:`Matrix` or other Array-like, (`M`, `D`) `M` derivative orders for set `i`. nj : :py:class:`Matrix` or other Array-like, (`M`, `D`) `M` derivative orders for set `j`. hyper_deriv : Non-negative int or None, optional The index of the hyperparameter to compute the first derivative with respect to. If None, no derivatives are taken. Hyperparameter derivatives are not supported at this point. Default is None. symmetric : bool, optional Whether or not the input `Xi`, `Xj` are from a symmetric matrix. Default is False. Returns ------- Kij : :py:class:`Array`, (`M`,) Covariances for each of the `M` `Xi`, `Xj` pairs. Raises ------ NotImplementedError If the `hyper_deriv` keyword is not None. """ if hyper_deriv is not None: raise NotImplementedError( "Hyperparameter derivatives have not been implemented!") n_cat = scipy.asarray(scipy.concatenate((ni, nj), axis=1), dtype=int) X_cat = scipy.asarray(scipy.concatenate((Xi, Xj), axis=1), dtype=float) n_cat_unique = unique_rows(n_cat) k = scipy.zeros(Xi.shape[0], dtype=float) # Loop over unique derivative patterns: if self.num_proc > 1: pool = multiprocessing.Pool(processes=self.num_proc) for n_cat_state in n_cat_unique: idxs = scipy.where( scipy.asarray((n_cat == n_cat_state).all(axis=1)).squeeze())[0] if (n_cat_state == 0).all(): k[idxs] = self.cov_func(Xi[idxs, :], Xj[idxs, :], *self.params) else: if self.num_proc > 1 and len(idxs) > 1: k[idxs] = scipy.asarray(pool.map( _ArbitraryKernelEval(self, n_cat_state), X_cat[idxs, :]), dtype=float) else: for idx in idxs: k[idx] = mpmath.chop( mpmath.diff(self._mask_cov_func, X_cat[idx, :], n=n_cat_state, singular=True)) if self.num_proc > 0: pool.close() return k
def TVDI_function(inNDVI,inLST,pas=0.02,t=1,s1Min=0.3,s2Max=0.8,ss1Min=0.2,ss2Max=0.8): """ Allows to calculates the TVDI. this function is a modified version of the IDL script published by Monica Garcia: (Garcia,M., Fernández, N., Villagarcía, L., Domingo, F., Puigdefábregas,J. & I. Sandholt. 2014. 2014. Accuracy of the Temperature–Vegetation Dryness Index using MODIS under water-limited vs. energy-limited evapotranspiration conditions Remote Sensing of Environment 149, 100-117.) Input: inNDVI: NDVI inLST: land surface temperature pas: intervall of the NDVI S1min: lower threshold to determine the interval which will be used to determine the design parameters of LSTmax S2max: upper threshold to determine the interval which will be used to determine the design parameters of LSTmax ss1Min: lower threshold to determine the interval which will be used to determine the calculation of parmaètres LSTmin ss2Max: upper threshold to determine the interval which will be used to determine the design parameters of LSTmin t : t=0 to use Garcia M method and t=1 to calculate the TVDI without using the threshold . Output: TVDI """ TVDI=sp.zeros(inLST.shape) if inNDVI.shape == inLST.shape : inNdvi=sp.reshape(inNDVI,(inNDVI.size)) inLst=sp.reshape(inLST,(inLST.size)) mini=sp.nanmin(inNdvi) # valeur minimale maxi=sp.nanmax(inNdvi) # valeur maximale arg=sp.argsort(inNdvi) #trie et renvoi les indices des valeurs ordonnées inV=inNdvi[arg] # on récupère les valeurs de NDVI inT=inLst[arg] # on récupère les valeurs de temperature # pas de decoupade du NDVI en intervalle percentileMax=99.0 percentileMin=1.0 nObsMin=5 # la longeur minimale que doit avoir un intervalle pour être considéré ni= int(round((maxi-mini)/pas ) + 1) # Nombre total d'intervalle iValMax=0 iValMin=ni #création des vecteurs de stockage vx= sp.zeros((ni),dtype="float") vMaxi=sp.zeros((ni),dtype="float") vMini=sp.zeros((ni),dtype="float") vMaxi[0:]=None vMini[0:]=None vNpi=sp.zeros((ni),dtype="float") for k in range (ni): hi=k*pas + mini # valeur de depart de l'intervalle hs=k*pas + hi # valeur de fin de l'intervalle a=sp.where(inV <= hi) ii=a[0].max() b=sp.where(inV <= hs) iis=b[0].max() vNpi[k]= iis - ii inTp=inT[ii:iis+1] #recuperation des valeurs de temperature contenues dans cet intervalle vx[k]=(hs - hi )/2 +hi #recuperation de valeur de NDVI qui se trouve au milieu intervalle if vNpi[k] > nObsMin : #on teste si l'intervalle defini a suffisamment de valeur inTp=inTp[sp.argsort(inTp)] #on trie les valeurs de temperature contenu dans cet intervalle vMaxi[k]=inTp[ int( ( vNpi[k] *percentileMax/100 )) ] #on recupère la valeur de temperature qui correspond au 99em percentile de l'intervalle vMini[k]=inTp[ int( ( vNpi[k] *percentileMin/100 ))] #on recupère la valeur de temperature qui correspond au 99em percentile de l'intervalle if k >iValMax: iValMax=k if k < iValMin: iValMin=k # calcul de LSTmax et LSTmin if (t==0): # Dry Edge # on utilise un seuil inferieur pour trouver la fin de l'intervalle qui va servir pour le calcul de la regression linéaire # on utilise iValMin et iValMax pour eviter les nan c'est à dire on reste dans les intervalles qui respecte le nObsMin try: b=sp.where(vx < s1Min) # seuil inferieur à modifier ii=sp.nanmax([sp.nanmax(b[0]),iValMin]) b=sp.where(vx < s2Max) # seuil superieur à modifier iis=sp.nanmin([sp.nanmax(b[0]),iValMax]) # Wet Edge c=sp.where(vx < ss1Min) # seuil inferieur à modifier ii2=sp.nanmax([sp.nanmax(c[0]),iValMin]) c=sp.where(vx < ss2Max) # seuil superieur à modifier iis2=sp.nanmin([sp.nanmax(c[0]),iValMax]) except: print "problème avec les valeurs inferieures et superieures utilisées" else: ii=iValMin iis=iValMax ii2=iValMin iis2=iValMax #calcul de la regression linéaire estimation1=sp.stats.linregress(vx[ii:iis+1],vMaxi[ii:iis+1]) #LSTmax= a * NDVI + b lstmax_a=estimation1[0] #recuperation du paramètre de pente lstmax_b=estimation1[1] #recuperation du paramètre de l'ordonnée à l'origine estimation1=sp.stats.linregress(vx[ii2:iis2+1],vMini[ii2:iis2+1]) #LSTmax= a * NDVI + b lstmin=sp.nanmin(vMini[ii2:iis2+1]) #calcul de TVDI TVDI=( inLST - lstmin) / ( lstmax_b + (lstmax_a * inNDVI )- lstmin+0.00000001 ) # TVDI=( inLST - lstmin) / ( ( lstmax_b + (lstmax_a * inNDVI ))- lstmin +0.00001 ) else: print "les deux tableaux n'ont pas la même taille" exit return TVDI
def unred(wave, ebv, R_V=3.1, LMC2=False, AVGLMC=False): ''' https://github.com/sczesla/PyAstronomy in /src/pyasl/asl/unred ''' x = 10000. / wave # Convert to inverse microns curve = x * 0. # Set some standard values: x0 = 4.596 gamma = 0.99 c3 = 3.23 c4 = 0.41 c2 = -0.824 + 4.717 / R_V c1 = 2.030 - 3.007 * c2 if LMC2: x0 = 4.626 gamma = 1.05 c4 = 0.42 c3 = 1.92 c2 = 1.31 c1 = -2.16 elif AVGLMC: x0 = 4.596 gamma = 0.91 c4 = 0.64 c3 = 2.73 c2 = 1.11 c1 = -1.28 # Compute UV portion of A(lambda)/E(B-V) curve using FM fitting function and # R-dependent coefficients xcutuv = np.array([10000.0 / 2700.0]) xspluv = 10000.0 / np.array([2700.0, 2600.0]) iuv = sp.where(x >= xcutuv)[0] N_UV = iuv.size iopir = sp.where(x < xcutuv)[0] Nopir = iopir.size if N_UV > 0: xuv = sp.concatenate((xspluv, x[iuv])) else: xuv = xspluv yuv = c1 + c2 * xuv yuv = yuv + c3 * xuv**2 / ((xuv**2 - x0**2)**2 + (xuv * gamma)**2) yuv = yuv + c4 * (0.5392 * (sp.maximum(xuv, 5.9) - 5.9)**2 + 0.05644 * (sp.maximum(xuv, 5.9) - 5.9)**3) yuv = yuv + R_V yspluv = yuv[0:2] # save spline points if N_UV > 0: curve[iuv] = yuv[2::] # remove spline points # Compute optical portion of A(lambda)/E(B-V) curve # using cubic spline anchored in UV, optical, and IR xsplopir = sp.concatenate( ([0], 10000.0 / np.array([26500.0, 12200.0, 6000.0, 5470.0, 4670.0, 4110.0]))) ysplir = np.array([0.0, 0.26469, 0.82925]) * R_V / 3.1 ysplop = np.array( (sp.polyval([-4.22809e-01, 1.00270, 2.13572e-04][::-1], R_V), sp.polyval([-5.13540e-02, 1.00216, -7.35778e-05][::-1], R_V), sp.polyval([7.00127e-01, 1.00184, -3.32598e-05][::-1], R_V), sp.polyval( [1.19456, 1.01707, -5.46959e-03, 7.97809e-04, -4.45636e-05][::-1], R_V))) ysplopir = sp.concatenate((ysplir, ysplop)) if Nopir > 0: tck = interpolate.splrep(sp.concatenate((xsplopir, xspluv)), sp.concatenate((ysplopir, yspluv)), s=0) curve[iopir] = interpolate.splev(x[iopir], tck) #Now apply extinction correction to input flux vector curve *= ebv corr = 1. / (10.**(0.4 * curve)) return corr
def makehistdata(params,maindir): """ This will make the histogram data for the statistics. Inputs params - A list of parameters that will have statistics created maindir - The directory that the simulation data is held. Outputs datadict - A dictionary with the data values in numpy arrays. The keys are param names. errordict - A dictionary with the data values in numpy arrays. The keys are param names. errdictrel - A dictionary with the error values in numpy arrays, normalized by the correct value. The keys are param names. """ maindir = Path(maindir) ffit = maindir.joinpath('Fitted', 'fitteddata.h5') inputfiledir = maindir.joinpath('Origparams') paramslower = [ip.lower() for ip in params] eparamslower = ['n'+ip.lower() for ip in params] # set up data dictionary errordict = {ip:[] for ip in params} errordictrel = {ip:[] for ip in params} #Read in fitted data Ionofit = IonoContainer.readh5(str(ffit)) times = Ionofit.Time_Vector dataloc = Ionofit.Sphere_Coords rng = dataloc[:, 0] rng_log = sp.logical_and(rng > 200., rng < 400) dataloc_out = dataloc[rng_log] pnames = Ionofit.Param_Names pnameslower = sp.array([ip.lower() for ip in pnames.flatten()]) p2fit = [sp.argwhere(ip == pnameslower)[0][0] if ip in pnameslower else None for ip in paramslower] datadict = {ip:Ionofit.Param_List[rng_log, :, p2fit[ipn]].flatten() for ipn, ip in enumerate(params)} ep2fit = [sp.argwhere(ip==pnameslower)[0][0] if ip in pnameslower else None for ip in eparamslower] edatadict = {ip:Ionofit.Param_List[rng_log, :, ep2fit[ipn]].flatten() for ipn, ip in enumerate(params)} # Determine which input files are to be used. dirlist = [str(i) for i in inputfiledir.glob('*.h5')] _, outime, filelisting, _, _ = IonoContainer.gettimes(dirlist) time2files = [] for itn, itime in enumerate(times): log1 = (outime[:, 0] >= itime[0]) & (outime[:, 0] < itime[1]) log2 = (outime[:, 1] > itime[0]) & (outime[:, 1] <= itime[1]) log3 = (outime[:, 0] <= itime[0]) & (outime[:, 1] > itime[1]) tempindx = sp.where(log1|log2|log3)[0] time2files.append(filelisting[tempindx]) curfilenum = -1 for iparam, pname in enumerate(params): curparm = paramslower[iparam] # Use Ne from input to compare the ne derived from the power. if curparm == 'nepow': curparm = 'ne' datalist = [] for itn, itime in enumerate(times): for filenum in time2files[itn]: filenum = int(filenum) if curfilenum != filenum: curfilenum = filenum datafilename = dirlist[filenum] Ionoin = IonoContainer.readh5(datafilename) if ('ti' in paramslower) or ('vi' in paramslower): Ionoin = maketi(Ionoin) pnames = Ionoin.Param_Names pnameslowerin = sp.array([ip.lower() for ip in pnames.flatten()]) prmloc = sp.argwhere(curparm == pnameslowerin) if prmloc.size != 0: curprm = prmloc[0][0] # build up parameter vector bs the range values by finding the closest point in space in the input curdata = sp.zeros(len(dataloc_out)) for irngn, curcoord in enumerate(dataloc_out): tempin = Ionoin.getclosestsphere(curcoord, [itime])[0] Ntloc = tempin.shape[0] tempin = sp.reshape(tempin, (Ntloc, len(pnameslowerin))) curdata[irngn] = tempin[0, curprm] datalist.append(curdata) errordict[pname] = datadict[pname]-sp.hstack(datalist) errordictrel[pname] = 100.*errordict[pname]/sp.absolute(sp.hstack(datalist)) return datadict, errordict, errordictrel, edatadict