def recursion(root, lvl): q = Queue.Queue() q.put((root, -1, 1, '*')) dbi_scores = {} while not q.empty(): (c_folder, c_id, level, c_name) = q.get() if level >= int(lvl): continue hier_f = '%s/hierarchy.txt' % c_folder clus_kws_f = '%s/cluster_keywords.txt' % c_folder emb_f = '%s/embeddings.txt' % c_folder if not exists(hier_f): continue hier_map = utils.load_hier_f(hier_f) clus_map = get_clus_keywords(clus_kws_f) embs = utils.load_embeddings(emb_f) for cluster in hier_map: cc_id = hier_map[cluster] cluster_folder = '%s/%s' % (c_folder, cluster) cluster_namespace = '%s/%s' % (c_name, cluster) q.put((cluster_folder, cc_id, level + 1, cluster_namespace)) # handle current dbi = compute_dbi(embs, clus_map, hier_map) print 'Computing DBI for %s: %f' % (c_name, dbi) dbi_scores[c_name] = (dbi, level) output_dbi(dbi_scores)
def recursion(corpus, root, o_file, N): # import ipdb; ipdb.set_trace(); q = Queue.Queue() q.put((root, -1, '*')) g = open(o_file, 'w+') while not q.empty(): (c_folder, c_id, c_name) = q.get() # hier_f = '%s/hierarchy.txt' % c_folder # if not exists(hier_f): # continue # modified here hier_f = '%s/hierarchy.txt' % c_folder embed_f = '%s/embeddings.txt' % c_folder if not exists(hier_f): continue hier_map = utils.load_hier_f(hier_f) for cluster in hier_map: cc_id = hier_map[cluster] cluster_folder = '%s/%s' % (c_folder, cluster) cluster_namespace = '%s/%s' % (c_name, cluster) q.put((cluster_folder, cc_id, cluster_namespace)) # handle current if c_folder != root: phs = get_rep(corpus, c_folder, str(c_id), N) phs_str = ','.join(phs) g.write('%s\t%s\n' % (c_name, phs_str)) # g.write('%s\t%s\n' % (phs[0], phs_str)) g.close()
def recursion(root, o_file, N): q = queue.Queue() q.put((root, -1, '*')) g = open(o_file, 'w+') while not q.empty(): (c_folder, c_id, c_name) = q.get() hier_f = '%s/hierarchy.txt' % c_folder if not exists(hier_f): continue hier_map = utils.load_hier_f(hier_f) for cluster in hier_map: cc_id = hier_map[cluster] cluster_folder = '%s/%s' % (c_folder, cluster) cluster_namespace = '%s/%s' % (c_name, cluster) q.put((cluster_folder, cc_id, cluster_namespace)) # handle current if c_folder != root: phs = get_rep(c_folder, str(c_id), N) phs_str = ','.join(phs) g.write('%s\t%s\n' % (c_name, phs_str)) g.close()
def recursion(root): q = Queue.Queue() q.put((root, -1)) label_map = {} try: while not q.empty(): (c_folder, c_id) = q.get() hier_map = utils.load_hier_f('%s/hierarchy.txt' % c_folder) for cluster in hier_map: cc_id = hier_map[cluster] cluster_folder = '%s/%s' % (c_folder, cluster) q.put((cluster_folder, cc_id)) # handle current if cluster_folder != root: l = label_emb_centric(c_folder, str(c_id)) cur_label = basename(c_folder) label_map[cur_label] = l print 'label for %s is: %s\n' % (c_folder, l) except: for (o_l, l) in label_map.items(): print '%s ==> %s' % (o_l, l)