logging.info('\n\n----------------------') logging.info('SIZE,iset,%d', len(iset)) logging.info('SIZE,dlat,%d', len(dlat)) clunumlist = [1000, 500, 250, 100, 50, 30, 25, 20, 18, 16, 14, 12, 10, 8, 6] for seqnum in range(1, 3): for num_clu in clunumlist: logging.info('\n\n----------------------') logging.info('NUM_CLUSTER,%d', num_clu) logging.info('----------------------') logging.info('Clustering Lattice:') keylist, clulist, centroid, variance, G = lat.cluster_harch( dlat, CMr, Dr, theta=.5, num_k=num_clu, dL=None, verbose=False) logging.info('Scoring Lattice:') well, tran = lat.score_clusters(clulist, Dr, centroid, variance, G, sigma, DE_LABEL) TBIN10 = sorted(set(DE_LABEL)) for k in TBIN10: logging.info('SCORE,%d,W,%d,%d,%s,%.5f', seqnum, support, num_clu, k, well[k]) for k in TBIN10: logging.info('SCORE,%d,T,%d,%d,%s,%.5f', seqnum, support, num_clu, k, tran[k]) # keylist, clulist, centroid, variance, G = lat.cluster_harch(dlat, CMr, Dr, theta=.5, num_k=num_clu, dL=dL, verbose=True) # well, tran = lat.score_clusters(clulist, Dr, centroid, variance, G, sigma, DE_LABEL) # FOR BASE VALUES base = defaultdict(int) for i in L:
CM = DS<cutoff CMr, Dr = CM[:,Kr], DS[:,Kr] CMm, Dm = CM[:,Km], DS[:,Km] delabel = np.load(home+'/work/results/DE_label_full.npy') DW = [] for i in range(42): for a,b in TS.TimeScape.windows(home+'/work/timescape/desh_%02d_transitions.log'%i): DW.append((a+i*100000, b+i*100000)) dL = [delabel[a:b] for a,b in DW] DE_LABEL = [LABEL10(i,.9) for i in dL] SPT = [i[0] for i in db.runquery('select distinct support from latt order by support')] NC = [i[0] for i in db.runquery('select distinct numclu from latt order by numclu')] mf, lf = {}, {} dl, ik = {}, {} key, clu, cent, var, Gm = {}, {}, {}, {}, {} s=support mf[s], lf[s] = lat.maxminer(CMr, s) dl[s], ik[s] = lat.derived_lattice(mf[s], Dr, CMr) pickle.dump(dl[s], open(home + '/work/latt_intrinsics/dlat2_%d.p' % support, 'wb')) for num_clu in NC: key[s], clu[s], cent[s], var[s], Gm[s] = lat.cluster_harch(dl[s], CMr, Dr, theta=.5, num_k=num_clu, dL=None, verbose=False) w, t = lat.score_clusters(clu[s], Dr, cent[s], var[s], Gm[s], sigma, DE_LABEL) for k in TBIN10: logging.info('SCORE,W,%d,%d,%s,%.5f', support, num_clu, k, w[k]) for k in TBIN10: logging.info('SCORE,T,%d,%d,%s,%.5f', support, num_clu, k, t[k])
for m, k in enumerate(KrD): idx = Kr[m] alld = hist(DS[:,idx]) kdistr[k]['All'] = alld/np.sum(alld) for st, c in enumerate(C): d = hist(cluD[st][:,idx]) kdistr[k]['%d'%st] = d/np.sum(d) pickle.dump(kdistr, open('kdistr', 'wb')) for k in KrD: P.show_distr(kdistr[k], xscale=(4,10), showlegend=True, states={str(i):i for i in range(5)},\ xlabel='Distance (in Angstroms)', ylabel='Frequency', fname='distr_'+k, latex=True) # TO Score Clusters based on total PDF: well, tran = lat.score_clusters(clulist, Dr, centroid, variance, G, sigma, DE_LABEL) # TBIN10 = sorted(set(DE_LABEL)) for k in TBIN10: logging.info('SCORE,%d,W,%d,%d,%s,%.5f', seqnum, support, num_clu, k, well[k]) for k in TBIN10: logging.info('SCORE,%d,T,%d,%d,%s,%.5f', seqnum, support, num_clu, k, tran[k]) for k,v in kdistr['q']: print(k, v) elms = (n, k, len(v), state, stperc, bc) if incldist else (n, k, len(v), state, stperc) clusterlist.append(elms) # print('%2d.'%n, '%-15s'%k, '%4d '%len(v), 'State: %d (%4.1f%%)' % (state, stperc)) n += 1 for i in sorted(clusterlist, key =lambda x : x[2], reverse=True):
i[0] for i in db.runquery('select distinct support from latt order by support') ] NC = [ i[0] for i in db.runquery('select distinct numclu from latt order by numclu') ] mf, lf = {}, {} dl, ik = {}, {} key, clu, cent, var, Gm = {}, {}, {}, {}, {} s = support mf[s], lf[s] = lat.maxminer(CMr, s) dl[s], ik[s] = lat.derived_lattice(mf[s], Dr, CMr) pickle.dump(dl[s], open(home + '/work/latt_intrinsics/dlat2_%d.p' % support, 'wb')) for num_clu in NC: key[s], clu[s], cent[s], var[s], Gm[s] = lat.cluster_harch(dl[s], CMr, Dr, theta=.5, num_k=num_clu, dL=None, verbose=False) w, t = lat.score_clusters(clu[s], Dr, cent[s], var[s], Gm[s], sigma, DE_LABEL) for k in TBIN10: logging.info('SCORE,W,%d,%d,%s,%.5f', support, num_clu, k, w[k]) for k in TBIN10: logging.info('SCORE,T,%d,%d,%s,%.5f', support, num_clu, k, t[k])