def make_prior(self): D = self.D K = self.K alpha = self.D / 2. diriAlpha = 0.1 name = self.name self.prior = prior = pyutil.util_obj() try: tf.get_variable(name + '/prior', [1]) reuse = None except: reuse = True print('reuse', reuse) with tf.variable_scope(name, reuse=reuse): uspan = [-1E5, 1E5] ##### Prior prior.loc = edm.Normal(tf.zeros(D), tf.ones(D), sample_shape=K) # prior.loc = edm.Uniform(*uspan,sample_shape=(K,D)) prior.scale_diag = edm.Uniform(*[0.001, 10.], sample_shape=(K, D)) prior.scale_perturb_factor = edm.Uniform(*uspan, sample_shape=(K, D, 1)) # prior.scale_perturb_factor = edm.Normal(tf.zeros([1]), tf.ones([1]), # sample_shape=(K,D,)) # prior.concentration = edm.Uniform(*[0.01, 10.], sample_shape=(K, )) # prior.concentration = edm.Uniform(*uspan,sample_shape=(K,)) prior.rate = edm.Uniform(*[0.01, 10.], sample_shape=(K, )) prior.weight = pi = edm.Dirichlet( float(diriAlpha) / K * tf.ones(K)) return prior
def make_post(self): D = self.D K = self.K alpha = self.D / 2. name = self.name self.post = post = pyutil.util_obj() try: tf.get_variable(name + '/post', [1]) reuse = None except: reuse = True print('reuse', reuse) with tf.variable_scope(name, reuse=reuse): uspan = [-1E5, 1E5] ##### Posterior post.weight = ed.models.PointMass( tf.nn.softmax(tf.get_variable("q_pi", [K]))) post.mu = ed.models.PointMass(tf.get_variable("q_mu", [K, D])) post.scale_diag = edm.PointMass( tf.nn.softplus(tf.get_variable('q_scale_diag', shape=[K, D])), ) post.scale_perturb_factor = ed.models.PointMass( (tf.get_variable("q_scale_perturb_factor", [K, D, 1]))) post.concentration = edm.PointMass( tf.nn.softplus(tf.get_variable('concentration', shape=[K, 1])), ) post.rate = edm.PointMass( tf.nn.softplus(tf.get_variable('rate', shape=[K, 1])), ) return post
def add_predictProba(glist): mdl = pyutil.util_obj() def f(vals): res = np.nan_to_num(vals).astype(int) res = pyutil.oneHot(res) return res mdl.predict_proba = f glist.model = mdl return glist
def fit_PCA(C, n_components=5, **kwargs): mdl = skpca.PCA(n_components=n_components, **kwargs) M = mdl.fit_transform(C) resDict = { 'model': mdl, 'train_data': C, 'trans_data': M, } return pyutil.util_obj(**resDict)
def job__cluster__hpm( tdf, name='test0', K=40, meanNorm=1, threshold=0., batchSize=500, n_iter=3000, silent=0, NCORE=4, randomState=0, alpha=None, weighted=True, ): import pymisca.tensorflow_extra_.hyper_plane_mixture as hpm hpm.tf.set_random_seed(randomState) np.random.seed(randomState) mdl = hpm.main(K=K, NCORE=NCORE, name=name, meanNorm=meanNorm, threshold=threshold, weighted=weighted, alpha=alpha) if batchSize == 0 or batchSize is None: batchMaker = None # batchMaker = hpm.pytfu.batchMaker__random(batchSize=batchSize) else: batchMaker = hpm.pytfu.batchMaker__random(batchSize=batchSize) res = mdl.fit( tdf, batchMaker=batchMaker, n_iter=n_iter, autoStop=0, ) if not silent: # import matplotlib.pyplot as plt plt.plot(res) cdict = pymod.cache__model4data(mdl, tdf) # assert 0 mdl.post.__dict__.update(cdict) np.save('params.npy', mdl.params) res = mdl.params res['mdl'] = mdl return pyutil.util_obj(**res)
def worker__fluff(rec, ): rec = pyutil.util_obj(**rec) DIR = getattr(rec, 'DIR', '.') ext = getattr(rec, 'ext', 'svg') labels = getattr(rec, 'labels', None) # ofname = rec.acc + '.svg' ofname = '%s/%s.%s' % (DIR, rec.acc, ext) interval = rec.interval tracks = rec.tracks annotation = rec.annotation # ofname = bed.acc[i] + '.svg' # interval = bed.interval[i] ofname = sjob.fig__fluffProfile(interval, tracks, ofname=ofname, annotation=annotation, labels=labels) return ofname
def job__cluster__vmf( tdf, K=30, init_method='kmeans', weighted=True, n_iter=3000, randomState=None, nStart=15, min_iters=50, verbose=1, callback=None, silent=0, sample_weights='sd', ): import pymisca.model_collection.mixture_vmf as mod np.random.seed(randomState) mdl = mod.MixtureVMF( K=K, init_method=init_method, weighted=weighted, ) histLoss = mdl.fit( tdf, verbose=verbose, callback=callback, nStart=nStart, n_iter=n_iter, min_iters=min_iters, sample_weights=sample_weights, ) histLoss = -histLoss if not silent: # import matplotlib.pyplot as plt plt.plot(histLoss) cdict = pymod.cache__model4data(mdl, tdf) cdict.update(mdl.params) np.save('params.npy', cdict) cdict['mdl'] = mdl return pyutil.util_obj(**cdict)
def make_prior(self): D = self.D K = self.K alpha = self.D/2. # diriAlpha = self.K /10. # diriAlpha = 1. diriAlpha = 0.001 # diriAlpha = 0.00001 # diriAlpha = 0.0000000000000000000000000000000001 # diriAlpha = 10. name = self.name self.prior = prior = pyutil.util_obj() try: tf.get_variable(name+'/prior',[1]) reuse = None except: reuse = True print ('reuse',reuse) with tf.variable_scope(name, reuse=reuse): uspan = [-1E5,1E5] ##### Prior # prior.gamma_concentration = edm.Normal(tf.zeros(D), tf.ones(D), sample_shape=K) # prior.loc = edm.Uniform(*uspan,sample_shape=(K,D)) prior.gamma_concentration = edm.Uniform(*[0.001,1000.],sample_shape=(K,)) prior.gamma_rate = edm.Uniform(*[0.001,100000.],sample_shape=(K,)) prior.vm_concentration = edm.Uniform(*[0.001,100000.],sample_shape=(K,)) prior.vm_direction = edm.Uniform(*[0.001,100000.],sample_shape=(K,D)) # prior.vm_direction = edm.Normal(tf.zeros(D), tf.ones(D), sample_shape=K) # prior.weight = edm.Uniform(*[0.001,100000.],sample_shape=(K,)) prior.weight = pi = edm.Dirichlet( float(diriAlpha)/K * tf.ones(K) ) # prior.cat = edm.Categorical(weight = post.weight) return prior
def make_post(self): D = self.D K = self.K alpha = self.D/2. name = self.name self.post = post = pyutil.util_obj() try: tf.get_variable(name+'/post',[1]) reuse = None except: reuse = True print ('reuse',reuse) with tf.variable_scope(name, reuse=reuse): uspan = [-1E5,1E5] ##### Posterior i = -1 i += 1 # post.weight = edm.PointMass( # tf.nn.softmax( # tf.get_variable(str(i), shape=[K]), # name = 'weight', # # tf.Variable(name="q_pi",initial_value = self.random([K]) ), # ) # ) post.weight = edm.PointMass( tf.square( tf.nn.l2_normalize( tf.get_variable(str(i), shape=[K]), ), name = 'weight', # tf.Variable(name="q_pi",initial_value = self.random([K]) ), ) ) # post.cat = edm.PointMass( # tf.nn.softmax( # tf.get_variable("cat",[K]), # # tf.Variable(name="q_pi",initial_value = self.random([K]) ), # ) # ) i += 1 post.gamma_concentration = edm.PointMass( tf.nn.softplus( # tf.Variable(name="concentration",initial_value = self.random([K]) ), tf.get_variable(str(i),shape=[K,]), name = 'gamma_concentration', ), ) i += 1 post.gamma_rate = edm.PointMass( tf.nn.softplus( # tf.Variable(name="concentration",initial_value = self.random([K]) ), tf.get_variable(str(i),shape=[K,]), name = 'gamma_rate', ), ) i += 1 post.vm_concentration = edm.PointMass( 0.0 + tf.nn.softplus( 10. - tf.nn.softplus( # tf.Variable(name="concentration",initial_value = self.random([K]) ), tf.get_variable(str(i),shape=[K,]), ), name = 'vm_concentration',) ) # post.vm_concentration = edm.PointMass( # 10. * tf.nn.sigmoid( # # tf.Variable(name="concentration",initial_value = self.random([K]) ), # tf.get_variable('vm_concentration',shape=[K,]) # ), # ) i += 1 post.vm_direction = edm.PointMass( tf.nn.l2_normalize( tf.get_variable(str(i), [K,D]), axis = -1, name = "vm_direction", ), ) # post.rate = edm.PointMass( # tf.nn.softplus( # # tf.Variable(name="rate",initial_value = self.random([K]) ), # tf.get_variable('rate',shape=[K,]) # ), # ) return post
def clu2bed(segDF, ofname=None): '''Must have columns: ('acc','pos','clu') ''' segDF = segDF.reset_index() # stdout,isFile = get__stdout(ofname) stepSize = np.diff(segDF['pos'].values[:2], axis=0)[0] vals = segDF[['clu', 'acc']].values isDiff = (vals[1:] != vals[:-1]).any(axis=1) segDF['isDiff'] = np.concatenate([[True], isDiff], axis=0) it = (pyutil.util_obj(**vars(x)) for x in segDF.itertuples()) peak = pyutil.collections.OrderedDict(( ('chrom', None), ('start', None), ('end', None), ('acc', None), )) peaks = [] def savePeakStart(): peak['chrom'] = rec.acc peak['start'] = rec.pos return def savePeakEnd(): # kk = loc peak['end'] = oldPos + stepSize peak['acc'] = 'summitPos%d' % ((peak['start'] + peak['end']) // 2) assert peak['end'] > peak['start'], peak # pyutil.ppJson(locals()) peaks.append(peak.copy()) # line = u'\t'.join(map(unicode,peak.values())) # stdout.write(u'%s\n'%line) # print peak return def changed(): if idx != 0: if oldClu == 1: savePeakEnd() if rec.clu == 1: if (oldClu == 0) | (oldAcc != rec.acc): savePeakStart() else: if rec.clu == 1: savePeakStart() return #### Starting the loop oldClu = 0 for idx, rec in enumerate(it): if (idx == 0): changed() elif (rec.clu != oldClu) or (rec.acc != oldAcc): changed() oldClu = rec.clu oldPos = rec.pos oldAcc = rec.acc changed() resDF = pd.DataFrame(peaks) if ofname is not None: try: pyutil.to_tsv( resDF, ofname, ) return ofname except Exception as e: print e return resDF
def extract_bigwig( bwFile, bedFile, stepSize=1, mapChunk=None, # span = None shift=1, # outIndex = None, stranded=1, ): ''' Extracting a signal matrix for each bed region ''' # assert NCORE == 1,'Multi-thread is slower here..., so dont! ' # assert stepSize == 1,'Not implemented' with pybw.open(bwFile) as bw: it = open(bedFile) worker = pyutil.functools.partial( extract_bigwig_worker, bwFile=bwFile, stepSize=stepSize, stranded=stranded, ) if 1 == 1: res = map(worker, [it]) res = sum(res, []) # pass ids, out = zip(*res) #### Replacing "None" and incomplete intervals ref = next((item for item in out if item is not None), None) assert ref is not None, 'Cannot find an reference shape, likely wrong chromosomes.\n\ bigwigFile:"%s" ' % bwFile # L = len(ref) # L = len(res) if span is None else span //stepSize L = max(map(len, out)) lst = [] print '[L]=', L for x in out: if x is None: y = [0.] * L else: Lx = len(x) y = x + [0.] * (L - Lx) lst += [y] # out = [[0.]*L if x is None else x for x in out] out = np.array(lst) out = np.nan_to_num(out) # MLEN = np.mean([len(x) for x in out]) MLEN = 'not set' assert out.dtype != 'O', '''Unable to shape the matrix properly: %s, %s ''' % (MLEN, [(type(x), x) for x in out if len(x) < MLEN]) out = pd.DataFrame(out).set_index([list(ids)]) cols = stepSize * (np.arange( 0, out.shape[-1], )) if shift: mid = (L * stepSize) // 2 cols += -mid out.columns = cols # out.columns = (stepSize * np.arange(0, out.shape[-1], )) # Do something with the values... # out = ctMat.countMatrix.from_DataFrame(df=out) # out.fname = bwFile out.param = pyutil.util_obj() out.param['bwFile'] = bwFile out.param['bedFile'] = bedFile return out
def init_model(self,D=None,K = None,alpha = 1.0): self.D = D = self.D if D is None else D assert D is not None self.K = K = self.K if K is None else K assert K is not None # print (K) uspan = [-1E5,1E5] name = self.name try: tf.get_variable(name+'/test',[1]) reuse = None except: reuse = True print reuse prior = pyutil.util_obj() post = pyutil.util_obj() with tf.variable_scope(name, reuse=reuse): ##### Prior prior.mu = edm.Normal(tf.zeros(D), tf.ones(D), sample_shape=K) prior.scale_diag = edm.Uniform(*uspan,sample_shape=(K,D)) prior.scale_perturb_factor = edm.Uniform(*uspan,sample_shape=(K,D,1)) prior.concentration = edm.Uniform(*uspan,sample_shape=(K,1)) prior.rate = edm.Uniform(*uspan,sample_shape=(K,1)) # prio # scale_perturb_factor = edm.Normal( # loc=tf.zeros(1), # scale=tf.ones(1), # sample_shape=(K,D) # ) # prior.weight = edm.Dirichlet(tf.ones(K)) prior.weight = pi = edm.Dirichlet( float(alpha)/K * tf.ones(K) ) ##### Posterior post.weight = ed.models.PointMass( tf.nn.softmax( tf.get_variable("q_pi", [K]) ) ) post.mu = ed.models.PointMass( tf.get_variable("q_mu", [K,D]) ) post.scale_diag = edm.PointMass( tf.nn.softplus( tf.get_variable('q_scale_diag',shape=[K,D]) ), ) post.scale_perturb_factor = ed.models.PointMass( ( tf.get_variable("q_scale_perturb_factor", [K,D,1]) ) ) post.concentration = edm.PointMass( tf.nn.softplus( tf.get_variable('concentration',shape=[K,1]) ), ) post.rate = edm.PointMass( tf.nn.softplus( tf.get_variable('rate',shape=[K,1]) ), ) self.prior = prior self.post = post ##### Dictonary for constructing self.emDist(**self.param) self.em_key =[ 'scale_diag', 'scale_perturb_factor', 'concentration', 'rate', ] self.mix_key = [ 'weight', ] self.param_key = (self.em_key + self.mix_key) # self.emKey = ['loc','scale_diag','scale_perturb_factor'] self.paramDict = {getattr(prior,name): getattr(post,name) for name in self.param_key} # self.paramDict = {} # self.priorDict = {v[0]:v[1] for v in self.param.values()} # self.priorDict.update({self.pi:self.q_pi}) # self.postDict = {k:v[1] for k,v in self.param.items()} ### Prior components cDicts = [ {key: v[k] for key,v in prior.__dict__.items() if key in self.em_key} for k in range(K)] self.components = [self.emDist(**d) for d in cDicts] ### Posterior generative # edm.Mixture cDicts = [ {key: v[k] for key,v in post.__dict__.items() if key in self.em_key} for k in range(K)] self.postComponents = [self.emDist(**d) for d in cDicts] # edm.ParamMixture # self.x_post = em = self.emDist(**{k:v for k,v in self.post.__dict__.items() # if k in self.em_key}) self.initialised = True; return self