예제 #1
0
def latent_equality(l1, l2, data1=None, include_ss=True, tol=0.0001):
    domains1 = l1['domains']
    domains2 = l2['domains']
    if set(domains1.keys()) != set(domains2.keys()):
        return False

    domain_groupid_maps = {}
    for d in domains1.keys():
        d1 = domains1[d]
        d2 = domains2[d]
        for hp in d1['hps'].keys():
            if delta_thold(d1['hps'][hp], d2['hps'][hp]):
                return False
        if (util.canonicalize_assignment(d1['assignment']) !=
                util.canonicalize_assignment(d2['assignment'])).all():
            return False

        gid_map = {}

        domain_groupid_maps[d] = {
            k: v
            for k, v in zip(d1['assignment'], d2['assignment'])
        }
    relations1 = l1['relations']
    relations2 = l2['relations']
    for r in relations1.keys():
        r1 = relations1[r]
        r2 = relations2[r]
        for hp in r1['hps'].keys():
            if delta_thold(r1['hps'][hp], r2['hps'][hp]):
                return False
    if not include_ss:
        return True

    # god this is going to be a bitch
    for r in relations1.keys():
        rdef = data1['relations'][r]['relation']
        print "rdef=", rdef
        ss1 = relations1[r]['ss']
        ss2 = relations2[r]['ss']
        for g1 in ss1.keys():
            comp1 = ss1[g1]
            g2 = tuple([domain_groupid_maps[rn][g] for rn, g in zip(rdef, g1)])
            print "g2=", g2, "g1=", g1
            comp2 = ss2[g2]
            for param in comp1.keys():
                if delta_thold(comp1[param], comp2[param]):
                    return False
    return True
예제 #2
0
def plot_t1t1_latent(ax, adj_matrix, assign_vect, cmap=None, norm=None):
    """
    Plot a latent with the assign vect

    returns the sorted order of the assignment vector
    """

    from matplotlib import pylab

    a = util.canonicalize_assignment(assign_vect)  # make big clusters first

    ai = np.argsort(a).flatten()

    conn = adj_matrix
    print "adj_matrix", adj_matrix.shape, adj_matrix.dtype
    s_conn = conn[ai]
    s_conn = s_conn[:, ai]
    if cmap == None:
        ax.imshow(s_conn, interpolation='nearest', cmap=pylab.cm.Greys)
    else:
        ax.imshow(s_conn, interpolation='nearest', cmap=cmap, norm=norm)

    x_line_offset = 0.5
    y_line_offset = 0.4
    for i in np.argwhere(np.diff(a[ai]) > 0):
        ax.axhline(i + y_line_offset, c='b', alpha=0.7, linewidth=1.0)
        ax.axvline(i + x_line_offset, c='b', alpha=0.7, linewidth=1.0)

    ax.set_xticks([])
    ax.set_yticks([])

    return ai
예제 #3
0
def plot_t1t1_latent(ax, adj_matrix, assign_vect, cmap=None, norm=None):
    """
    Plot a latent with the assign vect

    returns the sorted order of the assignment vector
    """

    from matplotlib import pylab

    a = util.canonicalize_assignment(assign_vect) # make big clusters first

    ai = np.argsort(a).flatten()
        
    conn = adj_matrix
    print "adj_matrix", adj_matrix.shape, adj_matrix.dtype
    s_conn =conn[ai]
    s_conn = s_conn[:, ai]
    if cmap == None:
        ax.imshow(s_conn, interpolation='nearest', cmap=pylab.cm.Greys)
    else:
        ax.imshow(s_conn, interpolation='nearest', cmap=cmap,
                  norm=norm)

    x_line_offset = 0.5
    y_line_offset = 0.4
    for i in  np.argwhere(np.diff(a[ai]) > 0):
        ax.axhline(i + y_line_offset, c='b', alpha=0.7, linewidth=1.0)
        ax.axvline(i + x_line_offset, c='b', alpha=0.7, linewidth=1.0)
        
    ax.set_xticks([])
    ax.set_yticks([])

    return ai
예제 #4
0
def latent_equality(l1, l2, data1=None, include_ss=True, tol=0.0001):
    domains1 = l1["domains"]
    domains2 = l2["domains"]
    if set(domains1.keys()) != set(domains2.keys()):
        return False

    domain_groupid_maps = {}
    for d in domains1.keys():
        d1 = domains1[d]
        d2 = domains2[d]
        for hp in d1["hps"].keys():
            if delta_thold(d1["hps"][hp], d2["hps"][hp]):
                return False
        if (util.canonicalize_assignment(d1["assignment"]) != util.canonicalize_assignment(d2["assignment"])).all():
            return False

        gid_map = {}

        domain_groupid_maps[d] = {k: v for k, v in zip(d1["assignment"], d2["assignment"])}
    relations1 = l1["relations"]
    relations2 = l2["relations"]
    for r in relations1.keys():
        r1 = relations1[r]
        r2 = relations2[r]
        for hp in r1["hps"].keys():
            if delta_thold(r1["hps"][hp], r2["hps"][hp]):
                return False
    if not include_ss:
        return True

    # god this is going to be a bitch
    for r in relations1.keys():
        rdef = data1["relations"][r]["relation"]
        print "rdef=", rdef
        ss1 = relations1[r]["ss"]
        ss2 = relations2[r]["ss"]
        for g1 in ss1.keys():
            comp1 = ss1[g1]
            g2 = tuple([domain_groupid_maps[rn][g] for rn, g in zip(rdef, g1)])
            print "g2=", g2, "g1=", g1
            comp2 = ss2[g2]
            for param in comp1.keys():
                if delta_thold(comp1[param], comp2[param]):
                    return False
    return True
예제 #5
0
def plot_t1t1_latent_count(ax,
                           adj_matrix,
                           assign_vect,
                           size_scale=1.0,
                           color='k',
                           alpha=1.0):
    """
    Plot a latent with the assign vect for count data 

    returns the sorted order of the assignment vector
    """

    from matplotlib import pylab

    a = util.canonicalize_assignment(assign_vect)  # make big clusters first

    ai = np.argsort(a).flatten()

    conn = adj_matrix
    print "adj_matrix", adj_matrix.shape, adj_matrix.dtype
    s_conn = conn[ai]
    s_conn = s_conn[:, ai]

    # scatter points
    s_x = []
    s_y = []
    s_s = []
    for r in range(conn.shape[0]):
        for c in range(conn.shape[1]):
            if s_conn[r, c] > 0:
                s_x.append(c)
                s_y.append(r)
                s_s.append(s_conn[r, c])

    ax.scatter(s_x, s_y, s=s_s, c=color, edgecolor='none', alpha=alpha)

    x_line_offset = 0.5
    y_line_offset = 0.4
    for i in np.argwhere(np.diff(a[ai]) > 0):
        ax.axhline(i + y_line_offset, c='k', alpha=0.7, linewidth=1.0)
        ax.axvline(i + x_line_offset, c='k', alpha=0.7, linewidth=1.0)
    ax.set_xlim(0, s_conn.shape[1])
    ax.set_ylim(s_conn.shape[0], 0)
    ax.set_xticks([])
    ax.set_yticks([])

    return ai
예제 #6
0
def plot_t1t1_latent_count(ax, adj_matrix, assign_vect, size_scale=1.0, 
                           color='k', alpha=1.0):
    """
    Plot a latent with the assign vect for count data 

    returns the sorted order of the assignment vector
    """

    from matplotlib import pylab

    a = util.canonicalize_assignment(assign_vect) # make big clusters first

    ai = np.argsort(a).flatten()
        
    conn = adj_matrix
    print "adj_matrix", adj_matrix.shape, adj_matrix.dtype
    s_conn =conn[ai]
    s_conn = s_conn[:, ai]

    # scatter points 
    s_x = []
    s_y = []
    s_s = []
    for r in range(conn.shape[0]):
        for c in range(conn.shape[1]):
            if s_conn[r, c] > 0:
                s_x.append(c)
                s_y.append(r)
                s_s.append(s_conn[r, c])

    ax.scatter(s_x, s_y, s=s_s, c=color, edgecolor='none', alpha=alpha)

    x_line_offset = 0.5
    y_line_offset = 0.4
    for i in  np.argwhere(np.diff(a[ai]) > 0):
        ax.axhline(i + y_line_offset, c='k', alpha=0.7, linewidth=1.0)
        ax.axvline(i + x_line_offset, c='k', alpha=0.7, linewidth=1.0)
    ax.set_xlim(0, s_conn.shape[1])
    ax.set_ylim(s_conn.shape[0], 0)
    ax.set_xticks([])
    ax.set_yticks([])

    return ai
예제 #7
0
    run = runner.Runner(latent, data, kernel_config)

    domain_names = sorted(data['domains'].keys())

    ss = []
    print "SAMPLING"
    for samp_set in range(SAMPLE_SETS):
        samp_set_items = {}
        print "Samp_set", samp_set
        for s in range(SAMPLES_N):
            t1 = time.time()
            run.run_iters(ITERS_PER_SAMPLE)
            a_s = []
            for dn in domain_names:
                a = putil.canonicalize_assignment(run.model.domains[dn].get_assignments())
                a = tuple(a)
                a_s.append(a)
            a_s = tuple(a_s)
            if a_s not in samp_set_items:
                samp_set_items[a_s] = 0
            samp_set_items[a_s] += 1
            t2 = time.time()
            delta = (t2-t1)
            approx_time_left = (SAMPLE_SETS-samp_set)*delta*SAMPLES_N
            print "%s : roughly %3.2f min left" %(outfile, approx_time_left/60.)

        ss.append(samp_set_items)
    print "DONE"
    
    pickle.dump({'samp_set_items' : ss, 
예제 #8
0
    run = runner.Runner(latent, data, kernel_config)

    domain_names = sorted(data['domains'].keys())

    ss = []
    print "SAMPLING"
    for samp_set in range(SAMPLE_SETS):
        samp_set_items = {}
        print "Samp_set", samp_set
        for s in range(SAMPLES_N):
            t1 = time.time()
            run.run_iters(ITERS_PER_SAMPLE)
            a_s = []
            for dn in domain_names:
                a = putil.canonicalize_assignment(
                    run.model.domains[dn].get_assignments())
                a = tuple(a)
                a_s.append(a)
            a_s = tuple(a_s)
            if a_s not in samp_set_items:
                samp_set_items[a_s] = 0
            samp_set_items[a_s] += 1
            t2 = time.time()
            delta = (t2 - t1)
            approx_time_left = (SAMPLE_SETS - samp_set) * delta * SAMPLES_N
            print "%s : roughly %3.2f min left" % (outfile,
                                                   approx_time_left / 60.)

        ss.append(samp_set_items)
    print "DONE"
예제 #9
0
def plot_t1t1_params(fig,
                     conn_and_dist,
                     assign_vect,
                     ss,
                     hps,
                     MAX_DIST=10,
                     model="LogisticDistance",
                     MAX_CLASSES=20):
    """
    In the same order that we would plot the latent matrix, plot
    the per-parameter properties

    hps are per-relation hps

    note, tragically, this wants the whole figure

    """

    from mpl_toolkits.axes_grid1 import Grid
    from matplotlib import pylab
    assign_vect = np.array(assign_vect)
    canon_assign_vect = util.canonicalize_assignment(assign_vect)
    # create the mapping between existing and new
    canon_to_old = {}
    for i, v in enumerate(canon_assign_vect):
        canon_to_old[v] = assign_vect[i]

    CLASSES = np.sort(np.unique(canon_assign_vect))

    CLASSN = len(CLASSES)

    if CLASSN > MAX_CLASSES:
        print "WARNING, TOO MANY CLASSES"
        CLASSN = MAX_CLASSES

    img_grid = Grid(
        fig,
        111,  # similar to subplot(111)
        nrows_ncols=(CLASSN, CLASSN),
        axes_pad=0.1,
        add_all=True,
        share_all=True,
        label_mode='L',
    )

    if "istance" not in model:
        return

    for c1i, c1_canon in enumerate(CLASSES[:MAX_CLASSES]):
        for c2i, c2_canon in enumerate(CLASSES[:MAX_CLASSES]):
            c1 = canon_to_old[c1_canon]
            c2 = canon_to_old[c2_canon]
            ax_pos = c1i * CLASSN + c2i
            ax = img_grid[ax_pos]

            nodes_1 = np.argwhere(assign_vect == c1).flatten()
            nodes_2 = np.argwhere(assign_vect == c2).flatten()
            conn_dist_hist = []
            noconn_dist_hist = []
            flatten_dist_val = []
            assert len(nodes_1) > 0
            assert len(nodes_2) > 0
            for n1 in nodes_1:
                for n2 in nodes_2:
                    d = conn_and_dist[n1, n2]['distance']
                    if conn_and_dist[n1, n2]['link']:
                        conn_dist_hist.append(d)
                    else:
                        noconn_dist_hist.append(d)
                    flatten_dist_val.append((d, conn_and_dist[n1, n2]['link']))
            flatten_dist_val = np.array(flatten_dist_val)
            bins = np.linspace(0, MAX_DIST, 20)
            fine_bins = np.linspace(0, MAX_DIST, 100)

            if model == "LogisticDistance" or model == "LogisticDistanceFixedLambda":
                # compute prob as a function of distance for this class
                htrue, _ = np.histogram(conn_dist_hist, bins)

                hfalse, _ = np.histogram(noconn_dist_hist, bins)

                p = htrue.astype(float) / (hfalse + htrue)

                ax.plot(bins[:-1], p, c='b', linewidth=3)

            if model == "LogisticDistance":
                c = ss[(c1, c2)]
                print "MAX_DISTANCE=", MAX_DIST, np.max(fine_bins), np.max(
                    bins), c
                y = util.logistic(fine_bins, c['mu'], c['lambda'])
                y = y * (hps['p_max'] - hps['p_min']) + hps['p_min']
                ax.plot(fine_bins, y, c='r', linewidth=2)
                ax.text(0, 0.2, r"mu: %3.2f" % c['mu'], fontsize=4)
                ax.text(0, 0.6, r"lamb: %3.2f" % c['lambda'], fontsize=4)
                ax.axvline(c['mu'], c='k')

            elif model == "LogisticDistanceFixedLambda":

                c = ss[(c1, c2)]
                y = util.logistic(fine_bins, c['mu'], hps['lambda'])
                y = y * (c['p_scale'] - hps['p_min']) + hps['p_min']
                ax.plot(fine_bins, y, c='r')
                ax.text(0, 0.2, r"mu: %3.2f" % c['mu'], fontsize=4)
                ax.text(0, 0.6, r"lamb: %3.2f" % hps['lambda'], fontsize=4)
                ax.axvline(c['mu'], c='k')

            elif model == "ExponentialDistancePoisson":
                if len(flatten_dist_val) > 0:
                    x_jitter = np.random.normal(0, 0.01, len(flatten_dist_val))
                    y_jitter = np.random.normal(0, 0.05, len(flatten_dist_val))
                    ax.scatter(flatten_dist_val[:, 0] + x_jitter,
                               flatten_dist_val[:, 1] + y_jitter,
                               edgecolor='none',
                               s=2)
                c = ss[(c1, c2)]
                mu = c['mu']
                rate_scale = c['rate_scale']
                y = np.exp(-fine_bins / mu)
                y = y * rate_scale
                ax.plot(fine_bins, y, c='r')
                ax.text(0, 0.2, r"mu: %3.2f" % c['mu'], fontsize=4)
                ax.text(0,
                        0.6,
                        r"rate_scale: %3.2f" % c['rate_scale'],
                        fontsize=4)
                ax.set_ylim(-1, 20.0)
                ax.axvline(c['mu'], c='k')

            elif model == "LogisticDistancePoisson":
                if len(flatten_dist_val) > 0:
                    x_jitter = np.random.normal(0, 0.01, len(flatten_dist_val))
                    y_jitter = np.random.normal(0, 0.05, len(flatten_dist_val))
                    ax.scatter(flatten_dist_val[:, 0] + x_jitter,
                               flatten_dist_val[:, 1] + y_jitter,
                               edgecolor='none',
                               s=2)
                c = ss[(c1, c2)]
                y = util.logistic(fine_bins, c['mu'], hps['lambda'])
                y = y * (c['rate_scale'] - hps['rate_min']) + hps['rate_min']
                ax.plot(fine_bins, y, c='r')
                ax.text(0, 1, r"mu: %3.2f" % c['mu'], fontsize=4)
                ax.text(0,
                        3,
                        r"rate_scale: %3.2f" % c['rate_scale'],
                        fontsize=4)
                ax.set_ylim(-1, 20.0)
                ax.axvline(c['mu'], c='k')

            elif model == "LinearDistance":
                print "MAX_DISTANCE=", MAX_DIST, np.max(fine_bins), np.max(
                    bins)
                c = ss[(c1, c2)]
                y = util.linear_dist(fine_bins, c['p'], c['mu'])
                y += hps['p_min']
                ax.plot(fine_bins, y, c='r')

            ax.set_xlim(0, MAX_DIST)
예제 #10
0
def plot_t1t1_params(fig, conn_and_dist, assign_vect, ss, hps, MAX_DIST=10, 
                     model="LogisticDistance", MAX_CLASSES = 20):
    """
    In the same order that we would plot the latent matrix, plot
    the per-parameter properties

    hps are per-relation hps

    note, tragically, this wants the whole figure

    """

    from mpl_toolkits.axes_grid1 import Grid
    from matplotlib import pylab
    assign_vect = np.array(assign_vect)
    canon_assign_vect = util.canonicalize_assignment(assign_vect)
    # create the mapping between existing and new
    canon_to_old  = {}
    for i, v in enumerate(canon_assign_vect):
        canon_to_old[v]= assign_vect[i]

    CLASSES = np.sort(np.unique(canon_assign_vect)) 
    
    CLASSN = len(CLASSES)

    if CLASSN > MAX_CLASSES:
        print "WARNING, TOO MANY CLASSES" 
        CLASSN = MAX_CLASSES

    img_grid = Grid(fig, 111, # similar to subplot(111)
                    nrows_ncols = (CLASSN, CLASSN),
                    axes_pad = 0.1,
                    add_all=True, 
                    share_all=True, 
                    label_mode = 'L',
                     )

    if "istance" not in model:
        return 

    for c1i, c1_canon in enumerate(CLASSES[:MAX_CLASSES]):
        for c2i, c2_canon in enumerate(CLASSES[:MAX_CLASSES]):
            c1 = canon_to_old[c1_canon]
            c2 = canon_to_old[c2_canon]
            ax_pos = c1i * CLASSN + c2i
            ax = img_grid[ax_pos]

            nodes_1 = np.argwhere(assign_vect == c1).flatten()
            nodes_2 = np.argwhere(assign_vect == c2).flatten()
            conn_dist_hist = []
            noconn_dist_hist = []
            flatten_dist_val = []
            assert len(nodes_1) > 0
            assert len(nodes_2) > 0 
            for n1 in nodes_1:
                for n2 in nodes_2:
                    d = conn_and_dist[n1, n2]['distance']
                    if conn_and_dist[n1, n2]['link']:
                        conn_dist_hist.append(d)
                    else:
                        noconn_dist_hist.append(d)
                    flatten_dist_val.append((d, conn_and_dist[n1, n2]['link']))
            flatten_dist_val = np.array(flatten_dist_val)
            bins = np.linspace(0, MAX_DIST, 20)
            fine_bins = np.linspace(0, MAX_DIST, 100)
            
            if model == "LogisticDistance" or model == "LogisticDistanceFixedLambda":
                # compute prob as a function of distance for this class
                htrue, _ = np.histogram(conn_dist_hist, bins)

                hfalse, _ = np.histogram(noconn_dist_hist, bins)

                p = htrue.astype(float) / (hfalse + htrue)

                ax.plot(bins[:-1], p, c='b', linewidth=3)


            if model == "LogisticDistance":
                c = ss[(c1, c2)]
                print "MAX_DISTANCE=", MAX_DIST, np.max(fine_bins), np.max(bins), c
                y = util.logistic(fine_bins, c['mu'], c['lambda']) 
                y = y * (hps['p_max'] - hps['p_min']) + hps['p_min']
                ax.plot(fine_bins, y, c='r', linewidth=2) 
                ax.text(0, 0.2, r"mu: %3.2f" % c['mu'], fontsize=4)
                ax.text(0, 0.6, r"lamb: %3.2f" % c['lambda'], fontsize=4)
                ax.axvline(c['mu'], c='k')

            elif model == "LogisticDistanceFixedLambda":

                c = ss[(c1, c2)]
                y = util.logistic(fine_bins, c['mu'], hps['lambda']) 
                y = y * (c['p_scale'] - hps['p_min']) + hps['p_min']
                ax.plot(fine_bins, y, c='r') 
                ax.text(0, 0.2, r"mu: %3.2f" % c['mu'], fontsize=4)
                ax.text(0, 0.6, r"lamb: %3.2f" % hps['lambda'], fontsize=4)
                ax.axvline(c['mu'], c='k')

            elif model == "ExponentialDistancePoisson":
                if len(flatten_dist_val) > 0:
                    x_jitter = np.random.normal(0, 0.01, len(flatten_dist_val))
                    y_jitter = np.random.normal(0, 0.05, len(flatten_dist_val))
                    ax.scatter(flatten_dist_val[:, 0] + x_jitter, 
                               flatten_dist_val[:, 1] + y_jitter, 
                               edgecolor='none', 
                               s=2)
                c = ss[(c1, c2)]
                mu = c['mu']
                rate_scale = c['rate_scale']
                y = np.exp(-fine_bins/mu)
                y = y * rate_scale
                ax.plot(fine_bins, y, c='r') 
                ax.text(0, 0.2, r"mu: %3.2f" % c['mu'], fontsize=4)
                ax.text(0, 0.6, r"rate_scale: %3.2f" % c['rate_scale'], fontsize=4)
                ax.set_ylim(-1, 20.0)
                ax.axvline(c['mu'], c='k')

            elif model == "LogisticDistancePoisson":
                if len(flatten_dist_val) > 0:
                    x_jitter = np.random.normal(0, 0.01, len(flatten_dist_val))
                    y_jitter = np.random.normal(0, 0.05, len(flatten_dist_val))
                    ax.scatter(flatten_dist_val[:, 0] + x_jitter, 
                               flatten_dist_val[:, 1] + y_jitter, 
                               edgecolor='none', 
                               s=2)
                c = ss[(c1, c2)]
                y = util.logistic(fine_bins, c['mu'], hps['lambda']) 
                y = y * (c['rate_scale'] - hps['rate_min']) + hps['rate_min']
                ax.plot(fine_bins, y, c='r') 
                ax.text(0, 1, r"mu: %3.2f" % c['mu'], fontsize=4)
                ax.text(0, 3, r"rate_scale: %3.2f" % c['rate_scale'], fontsize=4)
                ax.set_ylim(-1, 20.0)
                ax.axvline(c['mu'], c='k')

            elif model == "LinearDistance":
                print "MAX_DISTANCE=", MAX_DIST, np.max(fine_bins), np.max(bins)
                c = ss[(c1, c2)]
                y = util.linear_dist(fine_bins, c['p'], c['mu']) 
                y += hps['p_min']
                ax.plot(fine_bins, y, c='r') 

            ax.set_xlim(0, MAX_DIST)