len(iset) dlat = pickle.load( open(home + '/work/latt_intrinsics/dlat_%d.p' % support, 'rb')) len(dlat) logging.info('\n\n----------------------') logging.info('SIZE,iset,%d', len(iset)) logging.info('SIZE,dlat,%d', len(dlat)) clunumlist = [1000, 500, 250, 100, 50, 30, 25, 20, 18, 16, 14, 12, 10, 8, 6] for seqnum in range(1, 3): for num_clu in clunumlist: logging.info('\n\n----------------------') logging.info('NUM_CLUSTER,%d', num_clu) logging.info('----------------------') logging.info('Clustering Lattice:') keylist, clulist, centroid, variance, G = lat.cluster_harch( dlat, CMr, Dr, theta=.5, num_k=num_clu, dL=None, verbose=False) logging.info('Scoring Lattice:') well, tran = lat.score_clusters(clulist, Dr, centroid, variance, G, sigma, DE_LABEL) TBIN10 = sorted(set(DE_LABEL)) for k in TBIN10: logging.info('SCORE,%d,W,%d,%d,%s,%.5f', seqnum, support, num_clu, k, well[k]) for k in TBIN10: logging.info('SCORE,%d,T,%d,%d,%s,%.5f', seqnum, support, num_clu, k, tran[k]) # keylist, clulist, centroid, variance, G = lat.cluster_harch(dlat, CMr, Dr, theta=.5, num_k=num_clu, dL=dL, verbose=True)
CM = DS<cutoff CMr, Dr = CM[:,Kr], DS[:,Kr] CMm, Dm = CM[:,Km], DS[:,Km] delabel = np.load(home+'/work/results/DE_label_full.npy') DW = [] for i in range(42): for a,b in TS.TimeScape.windows(home+'/work/timescape/desh_%02d_transitions.log'%i): DW.append((a+i*100000, b+i*100000)) dL = [delabel[a:b] for a,b in DW] DE_LABEL = [LABEL10(i,.9) for i in dL] SPT = [i[0] for i in db.runquery('select distinct support from latt order by support')] NC = [i[0] for i in db.runquery('select distinct numclu from latt order by numclu')] mf, lf = {}, {} dl, ik = {}, {} key, clu, cent, var, Gm = {}, {}, {}, {}, {} s=support mf[s], lf[s] = lat.maxminer(CMr, s) dl[s], ik[s] = lat.derived_lattice(mf[s], Dr, CMr) pickle.dump(dl[s], open(home + '/work/latt_intrinsics/dlat2_%d.p' % support, 'wb')) for num_clu in NC: key[s], clu[s], cent[s], var[s], Gm[s] = lat.cluster_harch(dl[s], CMr, Dr, theta=.5, num_k=num_clu, dL=None, verbose=False) w, t = lat.score_clusters(clu[s], Dr, cent[s], var[s], Gm[s], sigma, DE_LABEL) for k in TBIN10: logging.info('SCORE,W,%d,%d,%s,%.5f', support, num_clu, k, w[k]) for k in TBIN10: logging.info('SCORE,T,%d,%d,%s,%.5f', support, num_clu, k, t[k])
delabel = np.load(home+'/work/results/DE_label_full.npy') DW = [] for i in range(42): for a,b in TS.TimeScape.windows(home+'/work/timescape/desh_%02d_transitions.log'%i): DW.append((a+i*100000, b+i*100000)) dL = [delabel[a:b] for a,b in DW] DE_LABEL = [LABEL10(i,.9) for i in dL] logging.info('Loading Lattice') support = 4550 iset = pickle.load(open(home + '/work/latt_intrinsics/iset_%d.p' % support, 'rb')); len(iset) dlat = pickle.load(open(home + '/work/latt_intrinsics/dlat_%d.p' % support, 'rb')); len(dlat) keylist, clulist, centroid, variance, G = lat.cluster_harch(dlat, CMr, Dm, theta=.5, num_k=num_clu, dL=None, verbose=False) clu = {k:c for k,c in zip(keylist, clulist)}; lat.printclu(clu, bL) clu_by_state = [[] for i in range(5)] for idx, (k,c) in enumerate(zip(keylist, clulist)): size = len(c) if size < 100: continue bc = np.bincount([bL[i] for i in c], minlength=5) state = np.argmax(bc) stperc = 100*bc[state] / size clu_by_state[state].append((idx, stperc, sum(bc))) # ID best clusters hist = lambda x: np.histogram(x, bins=48, range=(4,12))[0] C = [max(cl, key=lambda x: x[1])[0] for cl in clu_by_state]
i[0] for i in db.runquery('select distinct support from latt order by support') ] NC = [ i[0] for i in db.runquery('select distinct numclu from latt order by numclu') ] mf, lf = {}, {} dl, ik = {}, {} key, clu, cent, var, Gm = {}, {}, {}, {}, {} s = support mf[s], lf[s] = lat.maxminer(CMr, s) dl[s], ik[s] = lat.derived_lattice(mf[s], Dr, CMr) pickle.dump(dl[s], open(home + '/work/latt_intrinsics/dlat2_%d.p' % support, 'wb')) for num_clu in NC: key[s], clu[s], cent[s], var[s], Gm[s] = lat.cluster_harch(dl[s], CMr, Dr, theta=.5, num_k=num_clu, dL=None, verbose=False) w, t = lat.score_clusters(clu[s], Dr, cent[s], var[s], Gm[s], sigma, DE_LABEL) for k in TBIN10: logging.info('SCORE,W,%d,%d,%s,%.5f', support, num_clu, k, w[k]) for k in TBIN10: logging.info('SCORE,T,%d,%d,%s,%.5f', support, num_clu, k, t[k])