def initClusters(oo, K, x, mks, t0, t1, Asts, doSepHash=True, xLo=0, xHi=3, oneCluster=False, nzclstr=False): n0 = 0 n1 = len(Asts) _x = _N.empty((n1-n0, K+1)) _x[:, 0] = x[Asts+t0] _x[:, 1:] = mks[Asts+t0] if oneCluster: unonhash = _N.arange(len(Asts)) hashsp = _N.array([]) hashthresh = _N.min(_x[:, 1:], axis=0) # no hash spikes labS = _N.zeros(len(Asts), dtype=_N.int) labH = _N.array([], dtype=_N.int) clstrs = _N.array([0, 1]) lab = _N.array(labS.tolist() + (labH + clstrs[0]).tolist()) M = 1 MF = 1 flatlabels = _N.zeros(len(Asts), dtype=_N.int) else: if not doSepHash: unonhash = _N.arange(len(Asts)) hashsp = _N.array([]) hashthresh = _N.min(_x[:, 1:], axis=0) # no hash spikes ### 1 cluster # labS = _N.zeros(len(Asts), dtype=_N.int) # labH = _N.array([], dtype=_N.int) # clstrs = _N.array([0, 1]) else: unonhash, hashsp, hashthresh = sepHash(_x, BINS=20, blksz=5, xlo=oo.xLo, xhi=oo.xHi) # hashthresh is dim 2 # print len(unonhash) # print "--------" # print len(hashsp) # fig = _plt.figure(figsize=(5, 10)) # fig.add_subplot(3, 1, 1) # _plt.scatter(_x[hashsp, 1], _x[hashsp, 2], color="red") # _plt.scatter(_x[unonhash, 1], _x[unonhash, 2], color="black") # fig.add_subplot(3, 1, 2) # _plt.scatter(_x[hashsp, 0], _x[hashsp, 1], color="red") # _plt.scatter(_x[unonhash, 0], _x[unonhash, 1], color="black") # fig.add_subplot(3, 1, 3) # _plt.scatter(_x[hashsp, 0], _x[hashsp, 2], color="red") # _plt.scatter(_x[unonhash, 0], _x[unonhash, 2], color="black") # len(hashsp)==len(labH) # len(unonhash)==len(labS) if (len(unonhash) > 0) and (len(hashsp) > 0): labS, labH, clstrs = emMKPOS_sep1B(_x[unonhash], _x[hashsp]) elif len(unonhash) == 0: labS, labH, clstrs = emMKPOS_sep1B(None, _x[hashsp], TR=5) else: labS, labH, clstrs = emMKPOS_sep1B(_x[unonhash], None, TR=5) if doSepHash: splitclstrs(_x[unonhash], labS) posMkCov0(_x[unonhash], labS) #mergesmallclusters(_x[unonhash], _x[hashsp], labS, labH, K+1, clstrs) smallClstrID, spksInSmallClstrs = findsmallclusters(_x[unonhash], labS, K+1) print smallClstrID _N.savetxt("labSb4", labS, fmt="%d") for nid in smallClstrID: ths = _N.where(labS == nid)[0] labS[ths] = -1#clstrs[0]+clstrs[1]-1 # -1 first for easy cpack2 _N.savetxt("labS", labS, fmt="%d") # 0...clstrs[0]-1 clstrs[0]...clstrs[0]+clstrs[1]-1 (no nz) # 0...clstrs[0]-2 clstrs[0]-1...clstrs[0]+clstrs[1]-2 (no nz) contiguous_pack2(labS, startAt=-1) clstrs[0] = len(_N.unique(labS)) clstrs[1] = len(_N.unique(labH)) print "----------" print clstrs print "----------" # labS [0...#S] labH [#S...#S+#H] nzspks = _N.where(labS == -1)[0] labS[nzspks] = clstrs[0]+clstrs[1]-1 # highest ID contiguous_pack2(labH, startAt=(clstrs[0]-1)) _N.savetxt("labH", labH, fmt="%d") _N.savetxt("labS", labS, fmt="%d") #contiguous_pack2(labH, startAt=(_N.max(labS)+1)) nonnz = _N.where(labS < clstrs[0]-1)[0] nz = _N.where(labS == clstrs[0]+clstrs[1]-1)[0] _plt.scatter(_x[hashsp, 0], _x[hashsp, 1], color="black") _plt.scatter(_x[unonhash[nonnz], 0], _x[unonhash[nonnz], 1], color="blue") _plt.scatter(_x[unonhash[nz], 0], _x[unonhash[nz], 1], color="red") #colorclusters(_x[hashsp], labH, clstrs[1], name="hash", xLo=xLo, xHi=xHi) #colorclusters(_x[unonhash], labS, clstrs[0], name="nhash", xLo=xLo, xHi=xHi) # #fig = _plt.figure(figsize=(7, 10)) # #fig.add_subplot(2, 1, 1) flatlabels = _N.ones(n1-n0, dtype=_N.int)*-1 # #cls = clrs.get_colors(clstrs[0] + clstrs[1]) for i in labS: these = _N.where(labS == i)[0] if len(these) > 0: flatlabels[unonhash[these]] = i #_plt.scatter(_x[unonhash[these], 0], _x[unonhash[these], 1], color=cls[i]) #for i in xrange(clstrs[1]): for i in labH: these = _N.where(labH == i)[0] if len(these) > 0: flatlabels[hashsp[these]] = i #_plt.scatter(_x[hashsp[these], 0], _x[hashsp[these], 1], color=cls[i+clstrs[0]]) MF = clstrs[0] + clstrs[1] # includes noise if nzclstr: ths = _N.where(flatlabels == -1)[0] flatlabels[ths] = MF - 1 M = int((clstrs[0]-1) * 1.3 + clstrs[1]) + 2 # 20% more clusters else: M = int(clstrs[0] * 1.3 + clstrs[1]) + 2 # 20% more clusters print "cluters: %d" % M Mwonz = M if (nzclstr is False) else M-1 ##### MODES - find from the sampling oo.sp_prmPstMd = _N.zeros((oo.epochs, 3*Mwonz)) # mode of params oo.sp_hypPstMd = _N.zeros((oo.epochs, (2+2+2)*Mwonz)) # hyperparam oo.mk_prmPstMd = [_N.zeros((oo.epochs, Mwonz, K)), _N.zeros((oo.epochs, Mwonz, K, K))] # mode of params oo.mk_hypPstMd = [_N.zeros((oo.epochs, Mwonz, K)), _N.zeros((oo.epochs, Mwonz, K, K)), # hyperparam _N.zeros((oo.epochs, Mwonz, 1)), # hyperparam _N.zeros((oo.epochs, Mwonz, K, K))] print labS print labH _N.savetxt("flatlabels", flatlabels, fmt="%d") ################## # flatlabels + lab = same content, but flatlabels are temporally correct return labS, labH, flatlabels, Mwonz, MF, hashthresh, clstrs
def init0(self, pos, mk, n1, n2, sepHash=False, pctH=0.7, MS=None, sepHashMthd=0, doTouchUp=False, MF=None, kmeansinit=True): """ M total number of clusters MS number of clusters assigned to signal MF number of clusters used for initial fit M - MF If doing touchup, number of clusters to assign to this """ print "init0" oo = self k = oo.k MF = oo.M if MF is None else MF print "MF %d" % MF _x = _N.empty((n2-n1, k)) _x[:, 0] = pos _x[:, 1:] = mk N = n2-n1 # Gibbs sampling ################ init cluster centers if sepHash: # treat hash spikes seperately ########################## BINS = 20 bins = _N.linspace(-6, 6, BINS+1) blksz = 20 unonhash, hashsp = _fu.sepHash(_x,BINS=BINS,blksz=20,xlo=-6,xhi=6) MH = MF - MS sigInds = unonhash smkpos = _x[sigInds] print smkpos labS = _fu.spClstrs3MkCl(smkpos) MSA = len(labS) if MSA > MS: MH = MF - MS MS = MSA labH = _fu.bestcluster(50, _x[hashsp], MH) ################## lab = _N.array(labH.tolist() + (labS + MH).tolist()) x = _N.empty((n2-n1, k)) if sepHashMthd == 0: x[:, 0] = _x[inds, 0] x[:, 1:] = _x[inds, 1:] else: x[0:len(hashsp)] = _x[hashsp] x[len(hashsp):] = _x[sigInds] else: # don't separate hash from signal marks. simple kmeans2 x = _x if not kmeansinit: # just random initial conditions print "random initial conditions" lab = _N.array(_N.random.rand(N)*MF, dtype=_N.int) else: ITERS = 20 labsAll = [] mAll = [] bics = _N.empty(ITERS) for it in xrange(ITERS): scr, lab = scv.kmeans2(x, MF) _fu.contiguous_pack(lab) bic, K = _fu.kmBIC(scr, lab, x) bics[it] = bic mAll.append(K) labsAll.append(lab) bestI = _N.where(bics == _N.max(bics))[0][0] lab = labsAll[bestI] MF = mAll[bestI] # now assign the cluster we've found to Gaussian mixtures SI = N / MF covAll = _N.cov(x.T) dcovMag= _N.diagonal(covAll)*0.005 for im in xrange(MF): kinds = _N.where(lab == im)[0] # inds if len(kinds) > 6: # problem when cov is not positive def. oo.smu[0, im] = _N.mean(x[kinds], axis=0) oo.scov[0, im] = _N.cov(x[kinds], rowvar=0) oo.sm[0, im] = float(len(kinds)+1) / (N+MF) else: #oo.smu[0, im] = _N.mean(x[sigInds], axis=0) oo.smu[0, im] = _N.mean(x, axis=0) oo.scov[0, im] = covAll*0.125 oo.sm[0, im] = float(len(kinds)+1) / (N+MF)