def latent_equality(l1, l2, data1=None, include_ss=True, tol=0.0001): domains1 = l1['domains'] domains2 = l2['domains'] if set(domains1.keys()) != set(domains2.keys()): return False domain_groupid_maps = {} for d in domains1.keys(): d1 = domains1[d] d2 = domains2[d] for hp in d1['hps'].keys(): if delta_thold(d1['hps'][hp], d2['hps'][hp]): return False if (util.canonicalize_assignment(d1['assignment']) != util.canonicalize_assignment(d2['assignment'])).all(): return False gid_map = {} domain_groupid_maps[d] = { k: v for k, v in zip(d1['assignment'], d2['assignment']) } relations1 = l1['relations'] relations2 = l2['relations'] for r in relations1.keys(): r1 = relations1[r] r2 = relations2[r] for hp in r1['hps'].keys(): if delta_thold(r1['hps'][hp], r2['hps'][hp]): return False if not include_ss: return True # god this is going to be a bitch for r in relations1.keys(): rdef = data1['relations'][r]['relation'] print "rdef=", rdef ss1 = relations1[r]['ss'] ss2 = relations2[r]['ss'] for g1 in ss1.keys(): comp1 = ss1[g1] g2 = tuple([domain_groupid_maps[rn][g] for rn, g in zip(rdef, g1)]) print "g2=", g2, "g1=", g1 comp2 = ss2[g2] for param in comp1.keys(): if delta_thold(comp1[param], comp2[param]): return False return True
def plot_t1t1_latent(ax, adj_matrix, assign_vect, cmap=None, norm=None): """ Plot a latent with the assign vect returns the sorted order of the assignment vector """ from matplotlib import pylab a = util.canonicalize_assignment(assign_vect) # make big clusters first ai = np.argsort(a).flatten() conn = adj_matrix print "adj_matrix", adj_matrix.shape, adj_matrix.dtype s_conn = conn[ai] s_conn = s_conn[:, ai] if cmap == None: ax.imshow(s_conn, interpolation='nearest', cmap=pylab.cm.Greys) else: ax.imshow(s_conn, interpolation='nearest', cmap=cmap, norm=norm) x_line_offset = 0.5 y_line_offset = 0.4 for i in np.argwhere(np.diff(a[ai]) > 0): ax.axhline(i + y_line_offset, c='b', alpha=0.7, linewidth=1.0) ax.axvline(i + x_line_offset, c='b', alpha=0.7, linewidth=1.0) ax.set_xticks([]) ax.set_yticks([]) return ai
def plot_t1t1_latent(ax, adj_matrix, assign_vect, cmap=None, norm=None): """ Plot a latent with the assign vect returns the sorted order of the assignment vector """ from matplotlib import pylab a = util.canonicalize_assignment(assign_vect) # make big clusters first ai = np.argsort(a).flatten() conn = adj_matrix print "adj_matrix", adj_matrix.shape, adj_matrix.dtype s_conn =conn[ai] s_conn = s_conn[:, ai] if cmap == None: ax.imshow(s_conn, interpolation='nearest', cmap=pylab.cm.Greys) else: ax.imshow(s_conn, interpolation='nearest', cmap=cmap, norm=norm) x_line_offset = 0.5 y_line_offset = 0.4 for i in np.argwhere(np.diff(a[ai]) > 0): ax.axhline(i + y_line_offset, c='b', alpha=0.7, linewidth=1.0) ax.axvline(i + x_line_offset, c='b', alpha=0.7, linewidth=1.0) ax.set_xticks([]) ax.set_yticks([]) return ai
def latent_equality(l1, l2, data1=None, include_ss=True, tol=0.0001): domains1 = l1["domains"] domains2 = l2["domains"] if set(domains1.keys()) != set(domains2.keys()): return False domain_groupid_maps = {} for d in domains1.keys(): d1 = domains1[d] d2 = domains2[d] for hp in d1["hps"].keys(): if delta_thold(d1["hps"][hp], d2["hps"][hp]): return False if (util.canonicalize_assignment(d1["assignment"]) != util.canonicalize_assignment(d2["assignment"])).all(): return False gid_map = {} domain_groupid_maps[d] = {k: v for k, v in zip(d1["assignment"], d2["assignment"])} relations1 = l1["relations"] relations2 = l2["relations"] for r in relations1.keys(): r1 = relations1[r] r2 = relations2[r] for hp in r1["hps"].keys(): if delta_thold(r1["hps"][hp], r2["hps"][hp]): return False if not include_ss: return True # god this is going to be a bitch for r in relations1.keys(): rdef = data1["relations"][r]["relation"] print "rdef=", rdef ss1 = relations1[r]["ss"] ss2 = relations2[r]["ss"] for g1 in ss1.keys(): comp1 = ss1[g1] g2 = tuple([domain_groupid_maps[rn][g] for rn, g in zip(rdef, g1)]) print "g2=", g2, "g1=", g1 comp2 = ss2[g2] for param in comp1.keys(): if delta_thold(comp1[param], comp2[param]): return False return True
def plot_t1t1_latent_count(ax, adj_matrix, assign_vect, size_scale=1.0, color='k', alpha=1.0): """ Plot a latent with the assign vect for count data returns the sorted order of the assignment vector """ from matplotlib import pylab a = util.canonicalize_assignment(assign_vect) # make big clusters first ai = np.argsort(a).flatten() conn = adj_matrix print "adj_matrix", adj_matrix.shape, adj_matrix.dtype s_conn = conn[ai] s_conn = s_conn[:, ai] # scatter points s_x = [] s_y = [] s_s = [] for r in range(conn.shape[0]): for c in range(conn.shape[1]): if s_conn[r, c] > 0: s_x.append(c) s_y.append(r) s_s.append(s_conn[r, c]) ax.scatter(s_x, s_y, s=s_s, c=color, edgecolor='none', alpha=alpha) x_line_offset = 0.5 y_line_offset = 0.4 for i in np.argwhere(np.diff(a[ai]) > 0): ax.axhline(i + y_line_offset, c='k', alpha=0.7, linewidth=1.0) ax.axvline(i + x_line_offset, c='k', alpha=0.7, linewidth=1.0) ax.set_xlim(0, s_conn.shape[1]) ax.set_ylim(s_conn.shape[0], 0) ax.set_xticks([]) ax.set_yticks([]) return ai
def plot_t1t1_latent_count(ax, adj_matrix, assign_vect, size_scale=1.0, color='k', alpha=1.0): """ Plot a latent with the assign vect for count data returns the sorted order of the assignment vector """ from matplotlib import pylab a = util.canonicalize_assignment(assign_vect) # make big clusters first ai = np.argsort(a).flatten() conn = adj_matrix print "adj_matrix", adj_matrix.shape, adj_matrix.dtype s_conn =conn[ai] s_conn = s_conn[:, ai] # scatter points s_x = [] s_y = [] s_s = [] for r in range(conn.shape[0]): for c in range(conn.shape[1]): if s_conn[r, c] > 0: s_x.append(c) s_y.append(r) s_s.append(s_conn[r, c]) ax.scatter(s_x, s_y, s=s_s, c=color, edgecolor='none', alpha=alpha) x_line_offset = 0.5 y_line_offset = 0.4 for i in np.argwhere(np.diff(a[ai]) > 0): ax.axhline(i + y_line_offset, c='k', alpha=0.7, linewidth=1.0) ax.axvline(i + x_line_offset, c='k', alpha=0.7, linewidth=1.0) ax.set_xlim(0, s_conn.shape[1]) ax.set_ylim(s_conn.shape[0], 0) ax.set_xticks([]) ax.set_yticks([]) return ai
run = runner.Runner(latent, data, kernel_config) domain_names = sorted(data['domains'].keys()) ss = [] print "SAMPLING" for samp_set in range(SAMPLE_SETS): samp_set_items = {} print "Samp_set", samp_set for s in range(SAMPLES_N): t1 = time.time() run.run_iters(ITERS_PER_SAMPLE) a_s = [] for dn in domain_names: a = putil.canonicalize_assignment(run.model.domains[dn].get_assignments()) a = tuple(a) a_s.append(a) a_s = tuple(a_s) if a_s not in samp_set_items: samp_set_items[a_s] = 0 samp_set_items[a_s] += 1 t2 = time.time() delta = (t2-t1) approx_time_left = (SAMPLE_SETS-samp_set)*delta*SAMPLES_N print "%s : roughly %3.2f min left" %(outfile, approx_time_left/60.) ss.append(samp_set_items) print "DONE" pickle.dump({'samp_set_items' : ss,
run = runner.Runner(latent, data, kernel_config) domain_names = sorted(data['domains'].keys()) ss = [] print "SAMPLING" for samp_set in range(SAMPLE_SETS): samp_set_items = {} print "Samp_set", samp_set for s in range(SAMPLES_N): t1 = time.time() run.run_iters(ITERS_PER_SAMPLE) a_s = [] for dn in domain_names: a = putil.canonicalize_assignment( run.model.domains[dn].get_assignments()) a = tuple(a) a_s.append(a) a_s = tuple(a_s) if a_s not in samp_set_items: samp_set_items[a_s] = 0 samp_set_items[a_s] += 1 t2 = time.time() delta = (t2 - t1) approx_time_left = (SAMPLE_SETS - samp_set) * delta * SAMPLES_N print "%s : roughly %3.2f min left" % (outfile, approx_time_left / 60.) ss.append(samp_set_items) print "DONE"
def plot_t1t1_params(fig, conn_and_dist, assign_vect, ss, hps, MAX_DIST=10, model="LogisticDistance", MAX_CLASSES=20): """ In the same order that we would plot the latent matrix, plot the per-parameter properties hps are per-relation hps note, tragically, this wants the whole figure """ from mpl_toolkits.axes_grid1 import Grid from matplotlib import pylab assign_vect = np.array(assign_vect) canon_assign_vect = util.canonicalize_assignment(assign_vect) # create the mapping between existing and new canon_to_old = {} for i, v in enumerate(canon_assign_vect): canon_to_old[v] = assign_vect[i] CLASSES = np.sort(np.unique(canon_assign_vect)) CLASSN = len(CLASSES) if CLASSN > MAX_CLASSES: print "WARNING, TOO MANY CLASSES" CLASSN = MAX_CLASSES img_grid = Grid( fig, 111, # similar to subplot(111) nrows_ncols=(CLASSN, CLASSN), axes_pad=0.1, add_all=True, share_all=True, label_mode='L', ) if "istance" not in model: return for c1i, c1_canon in enumerate(CLASSES[:MAX_CLASSES]): for c2i, c2_canon in enumerate(CLASSES[:MAX_CLASSES]): c1 = canon_to_old[c1_canon] c2 = canon_to_old[c2_canon] ax_pos = c1i * CLASSN + c2i ax = img_grid[ax_pos] nodes_1 = np.argwhere(assign_vect == c1).flatten() nodes_2 = np.argwhere(assign_vect == c2).flatten() conn_dist_hist = [] noconn_dist_hist = [] flatten_dist_val = [] assert len(nodes_1) > 0 assert len(nodes_2) > 0 for n1 in nodes_1: for n2 in nodes_2: d = conn_and_dist[n1, n2]['distance'] if conn_and_dist[n1, n2]['link']: conn_dist_hist.append(d) else: noconn_dist_hist.append(d) flatten_dist_val.append((d, conn_and_dist[n1, n2]['link'])) flatten_dist_val = np.array(flatten_dist_val) bins = np.linspace(0, MAX_DIST, 20) fine_bins = np.linspace(0, MAX_DIST, 100) if model == "LogisticDistance" or model == "LogisticDistanceFixedLambda": # compute prob as a function of distance for this class htrue, _ = np.histogram(conn_dist_hist, bins) hfalse, _ = np.histogram(noconn_dist_hist, bins) p = htrue.astype(float) / (hfalse + htrue) ax.plot(bins[:-1], p, c='b', linewidth=3) if model == "LogisticDistance": c = ss[(c1, c2)] print "MAX_DISTANCE=", MAX_DIST, np.max(fine_bins), np.max( bins), c y = util.logistic(fine_bins, c['mu'], c['lambda']) y = y * (hps['p_max'] - hps['p_min']) + hps['p_min'] ax.plot(fine_bins, y, c='r', linewidth=2) ax.text(0, 0.2, r"mu: %3.2f" % c['mu'], fontsize=4) ax.text(0, 0.6, r"lamb: %3.2f" % c['lambda'], fontsize=4) ax.axvline(c['mu'], c='k') elif model == "LogisticDistanceFixedLambda": c = ss[(c1, c2)] y = util.logistic(fine_bins, c['mu'], hps['lambda']) y = y * (c['p_scale'] - hps['p_min']) + hps['p_min'] ax.plot(fine_bins, y, c='r') ax.text(0, 0.2, r"mu: %3.2f" % c['mu'], fontsize=4) ax.text(0, 0.6, r"lamb: %3.2f" % hps['lambda'], fontsize=4) ax.axvline(c['mu'], c='k') elif model == "ExponentialDistancePoisson": if len(flatten_dist_val) > 0: x_jitter = np.random.normal(0, 0.01, len(flatten_dist_val)) y_jitter = np.random.normal(0, 0.05, len(flatten_dist_val)) ax.scatter(flatten_dist_val[:, 0] + x_jitter, flatten_dist_val[:, 1] + y_jitter, edgecolor='none', s=2) c = ss[(c1, c2)] mu = c['mu'] rate_scale = c['rate_scale'] y = np.exp(-fine_bins / mu) y = y * rate_scale ax.plot(fine_bins, y, c='r') ax.text(0, 0.2, r"mu: %3.2f" % c['mu'], fontsize=4) ax.text(0, 0.6, r"rate_scale: %3.2f" % c['rate_scale'], fontsize=4) ax.set_ylim(-1, 20.0) ax.axvline(c['mu'], c='k') elif model == "LogisticDistancePoisson": if len(flatten_dist_val) > 0: x_jitter = np.random.normal(0, 0.01, len(flatten_dist_val)) y_jitter = np.random.normal(0, 0.05, len(flatten_dist_val)) ax.scatter(flatten_dist_val[:, 0] + x_jitter, flatten_dist_val[:, 1] + y_jitter, edgecolor='none', s=2) c = ss[(c1, c2)] y = util.logistic(fine_bins, c['mu'], hps['lambda']) y = y * (c['rate_scale'] - hps['rate_min']) + hps['rate_min'] ax.plot(fine_bins, y, c='r') ax.text(0, 1, r"mu: %3.2f" % c['mu'], fontsize=4) ax.text(0, 3, r"rate_scale: %3.2f" % c['rate_scale'], fontsize=4) ax.set_ylim(-1, 20.0) ax.axvline(c['mu'], c='k') elif model == "LinearDistance": print "MAX_DISTANCE=", MAX_DIST, np.max(fine_bins), np.max( bins) c = ss[(c1, c2)] y = util.linear_dist(fine_bins, c['p'], c['mu']) y += hps['p_min'] ax.plot(fine_bins, y, c='r') ax.set_xlim(0, MAX_DIST)
def plot_t1t1_params(fig, conn_and_dist, assign_vect, ss, hps, MAX_DIST=10, model="LogisticDistance", MAX_CLASSES = 20): """ In the same order that we would plot the latent matrix, plot the per-parameter properties hps are per-relation hps note, tragically, this wants the whole figure """ from mpl_toolkits.axes_grid1 import Grid from matplotlib import pylab assign_vect = np.array(assign_vect) canon_assign_vect = util.canonicalize_assignment(assign_vect) # create the mapping between existing and new canon_to_old = {} for i, v in enumerate(canon_assign_vect): canon_to_old[v]= assign_vect[i] CLASSES = np.sort(np.unique(canon_assign_vect)) CLASSN = len(CLASSES) if CLASSN > MAX_CLASSES: print "WARNING, TOO MANY CLASSES" CLASSN = MAX_CLASSES img_grid = Grid(fig, 111, # similar to subplot(111) nrows_ncols = (CLASSN, CLASSN), axes_pad = 0.1, add_all=True, share_all=True, label_mode = 'L', ) if "istance" not in model: return for c1i, c1_canon in enumerate(CLASSES[:MAX_CLASSES]): for c2i, c2_canon in enumerate(CLASSES[:MAX_CLASSES]): c1 = canon_to_old[c1_canon] c2 = canon_to_old[c2_canon] ax_pos = c1i * CLASSN + c2i ax = img_grid[ax_pos] nodes_1 = np.argwhere(assign_vect == c1).flatten() nodes_2 = np.argwhere(assign_vect == c2).flatten() conn_dist_hist = [] noconn_dist_hist = [] flatten_dist_val = [] assert len(nodes_1) > 0 assert len(nodes_2) > 0 for n1 in nodes_1: for n2 in nodes_2: d = conn_and_dist[n1, n2]['distance'] if conn_and_dist[n1, n2]['link']: conn_dist_hist.append(d) else: noconn_dist_hist.append(d) flatten_dist_val.append((d, conn_and_dist[n1, n2]['link'])) flatten_dist_val = np.array(flatten_dist_val) bins = np.linspace(0, MAX_DIST, 20) fine_bins = np.linspace(0, MAX_DIST, 100) if model == "LogisticDistance" or model == "LogisticDistanceFixedLambda": # compute prob as a function of distance for this class htrue, _ = np.histogram(conn_dist_hist, bins) hfalse, _ = np.histogram(noconn_dist_hist, bins) p = htrue.astype(float) / (hfalse + htrue) ax.plot(bins[:-1], p, c='b', linewidth=3) if model == "LogisticDistance": c = ss[(c1, c2)] print "MAX_DISTANCE=", MAX_DIST, np.max(fine_bins), np.max(bins), c y = util.logistic(fine_bins, c['mu'], c['lambda']) y = y * (hps['p_max'] - hps['p_min']) + hps['p_min'] ax.plot(fine_bins, y, c='r', linewidth=2) ax.text(0, 0.2, r"mu: %3.2f" % c['mu'], fontsize=4) ax.text(0, 0.6, r"lamb: %3.2f" % c['lambda'], fontsize=4) ax.axvline(c['mu'], c='k') elif model == "LogisticDistanceFixedLambda": c = ss[(c1, c2)] y = util.logistic(fine_bins, c['mu'], hps['lambda']) y = y * (c['p_scale'] - hps['p_min']) + hps['p_min'] ax.plot(fine_bins, y, c='r') ax.text(0, 0.2, r"mu: %3.2f" % c['mu'], fontsize=4) ax.text(0, 0.6, r"lamb: %3.2f" % hps['lambda'], fontsize=4) ax.axvline(c['mu'], c='k') elif model == "ExponentialDistancePoisson": if len(flatten_dist_val) > 0: x_jitter = np.random.normal(0, 0.01, len(flatten_dist_val)) y_jitter = np.random.normal(0, 0.05, len(flatten_dist_val)) ax.scatter(flatten_dist_val[:, 0] + x_jitter, flatten_dist_val[:, 1] + y_jitter, edgecolor='none', s=2) c = ss[(c1, c2)] mu = c['mu'] rate_scale = c['rate_scale'] y = np.exp(-fine_bins/mu) y = y * rate_scale ax.plot(fine_bins, y, c='r') ax.text(0, 0.2, r"mu: %3.2f" % c['mu'], fontsize=4) ax.text(0, 0.6, r"rate_scale: %3.2f" % c['rate_scale'], fontsize=4) ax.set_ylim(-1, 20.0) ax.axvline(c['mu'], c='k') elif model == "LogisticDistancePoisson": if len(flatten_dist_val) > 0: x_jitter = np.random.normal(0, 0.01, len(flatten_dist_val)) y_jitter = np.random.normal(0, 0.05, len(flatten_dist_val)) ax.scatter(flatten_dist_val[:, 0] + x_jitter, flatten_dist_val[:, 1] + y_jitter, edgecolor='none', s=2) c = ss[(c1, c2)] y = util.logistic(fine_bins, c['mu'], hps['lambda']) y = y * (c['rate_scale'] - hps['rate_min']) + hps['rate_min'] ax.plot(fine_bins, y, c='r') ax.text(0, 1, r"mu: %3.2f" % c['mu'], fontsize=4) ax.text(0, 3, r"rate_scale: %3.2f" % c['rate_scale'], fontsize=4) ax.set_ylim(-1, 20.0) ax.axvline(c['mu'], c='k') elif model == "LinearDistance": print "MAX_DISTANCE=", MAX_DIST, np.max(fine_bins), np.max(bins) c = ss[(c1, c2)] y = util.linear_dist(fine_bins, c['p'], c['mu']) y += hps['p_min'] ax.plot(fine_bins, y, c='r') ax.set_xlim(0, MAX_DIST)