예제 #1
0
def recursion(root, lvl):

    q = Queue.Queue()
    q.put((root, -1, 1, '*'))

    dbi_scores = {}

    while not q.empty():

        (c_folder, c_id, level, c_name) = q.get()

        if level >= int(lvl):
            continue

        hier_f = '%s/hierarchy.txt' % c_folder
        clus_kws_f = '%s/cluster_keywords.txt' % c_folder
        emb_f = '%s/embeddings.txt' % c_folder
        if not exists(hier_f):
            continue

        hier_map = utils.load_hier_f(hier_f)
        clus_map = get_clus_keywords(clus_kws_f)
        embs = utils.load_embeddings(emb_f)

        for cluster in hier_map:
            cc_id = hier_map[cluster]
            cluster_folder = '%s/%s' % (c_folder, cluster)
            cluster_namespace = '%s/%s' % (c_name, cluster)
            q.put((cluster_folder, cc_id, level + 1, cluster_namespace))

        # handle current
        dbi = compute_dbi(embs, clus_map, hier_map)
        print 'Computing DBI for %s: %f' % (c_name, dbi)
        dbi_scores[c_name] = (dbi, level)
    output_dbi(dbi_scores)
예제 #2
0
def recursion(corpus, root, o_file, N):
    # import ipdb; ipdb.set_trace();
    q = Queue.Queue()
    q.put((root, -1, '*'))

    g = open(o_file, 'w+')

    while not q.empty():
        (c_folder, c_id, c_name) = q.get()

        # hier_f = '%s/hierarchy.txt' % c_folder
        # if not exists(hier_f):
        #   continue
        # modified here
        hier_f = '%s/hierarchy.txt' % c_folder
        embed_f = '%s/embeddings.txt' % c_folder
        if not exists(hier_f):
            continue

        hier_map = utils.load_hier_f(hier_f)

        for cluster in hier_map:
            cc_id = hier_map[cluster]
            cluster_folder = '%s/%s' % (c_folder, cluster)
            cluster_namespace = '%s/%s' % (c_name, cluster)
            q.put((cluster_folder, cc_id, cluster_namespace))

        # handle current
        if c_folder != root:
            phs = get_rep(corpus, c_folder, str(c_id), N)
            phs_str = ','.join(phs)
            g.write('%s\t%s\n' % (c_name, phs_str))
            # g.write('%s\t%s\n' % (phs[0], phs_str))

    g.close()
예제 #3
0
def recursion(root, o_file, N):
    q = queue.Queue()
    q.put((root, -1, '*'))

    g = open(o_file, 'w+')

    while not q.empty():
        (c_folder, c_id, c_name) = q.get()

        hier_f = '%s/hierarchy.txt' % c_folder
        if not exists(hier_f):
            continue

        hier_map = utils.load_hier_f(hier_f)

        for cluster in hier_map:
            cc_id = hier_map[cluster]
            cluster_folder = '%s/%s' % (c_folder, cluster)
            cluster_namespace = '%s/%s' % (c_name, cluster)
            q.put((cluster_folder, cc_id, cluster_namespace))

        # handle current
        if c_folder != root:
            phs = get_rep(c_folder, str(c_id), N)
            phs_str = ','.join(phs)
            g.write('%s\t%s\n' % (c_name, phs_str))

    g.close()
예제 #4
0
def recursion(root):

    q = Queue.Queue()
    q.put((root, -1))

    label_map = {}

    try:
        while not q.empty():
            (c_folder, c_id) = q.get()
            hier_map = utils.load_hier_f('%s/hierarchy.txt' % c_folder)

            for cluster in hier_map:
                cc_id = hier_map[cluster]
                cluster_folder = '%s/%s' % (c_folder, cluster)
                q.put((cluster_folder, cc_id))

            # handle current
            if cluster_folder != root:
                l = label_emb_centric(c_folder, str(c_id))
                cur_label = basename(c_folder)
                label_map[cur_label] = l
                print 'label for %s is: %s\n' % (c_folder, l)
    except:
        for (o_l, l) in label_map.items():
            print '%s ==> %s' % (o_l, l)