def getDegreeComparison(prefix1, prefix2, k, l): format_float = '%.5f' from runcluster import filterWithI results = filterWithI(prefix1, k, l, 'out') results['g0-similarities'] = [map(lambda x: x[i], results['g0-similarities']) for i in range(2)] WORDS = '[a-zA-Z]+' SPACES = '[\s]+' NUMBERS = '[\d]+' SIM = '[+-\.\d]+' f = open('data/%s/data.out.clustering.%s.output' % (prefix2, k), 'r') g = open('data/%s/data.out.clustering.%s' % (prefix2, k), 'r') clusters1 = results['new_clusters'] clusters2 = g.read().split() cov = [[0] * int(k) for i in range(int(k))] for i, j in zip(clusters1, clusters2): cov[int(i)][int(j)] += 1 total = results['total'] total_nodes = sum(total) for i in cov: s = float(sum(i)) for j in range(len(i)): i[j] /= s i[j] -= (total[j]/float(total_nodes)) fractions = [j/float(total_nodes) for j in total] matches = [] category_regex = ('[\s]*' + '(%s)' % (WORDS) + SPACES + '(%s)' % (WORDS) + SPACES + '(%s)' % (WORDS) + SPACES + '(%s)' % (WORDS) + SPACES + '(%s)' % (WORDS) + SPACES + '(%s)' % (WORDS) + SPACES + '|' + SPACES + '\n' ) stats_regex = (SPACES + '(%s)' % (NUMBERS) + SPACES + '(%s)' % (NUMBERS) + SPACES + '(%s)' % (SIM) + SPACES + '(%s)' % (SIM) + SPACES + '(%s)' % (SIM) + SPACES + '(%s)' % (SIM) + SPACES + '|' + SPACES + '\n' ) category = None for x in f: y = match(stats_regex, x) if y: matches.append(list(y.groups())) else: y = match(category_regex, x) if y: category = list(y.groups()) g0_similarities = map(lambda x: [x], total) map(lambda x: x[0].extend(x[1]), zip(g0_similarities, transpose(results['g0-similarities']))) tmp = [(i, g0_similarities[i]) for i in range(len(g0_similarities))] tmp = sorted(tmp, key=lambda x: -x[1][0]) g0_similarities = map(lambda x: x[1], tmp) g0_v_category = map(lambda x: x[0], tmp) for i in g0_similarities: for j in range(2, len(i)): sign = '-' if i[j] > 0: sign = '+' i[j] = sign + format_float % i[j] g0_category = list(category) g0_category.pop(3) g0_category.pop(4) print_cov = [] for i in cov: lst = [i[0]] for j in i[1:]: lst.append(format_float % j) print_cov.append(lst) components = [form.Table('Ref stats', use_id=True, border="1", cellspacing="0", cellpadding="3", h_category=range(int(k)), v_category=['in-stats', 'out-stats', 'total-nodes'], pairs=[results['in-stats'], results['out-stats'], results['total']]), form.tr(''), form.tr(''), form.Table('G0 Similarities', use_id=True, border="1", cellspacing="0", cellpadding="3", h_category=g0_category, v_category=g0_v_category, pairs=g0_similarities), form.tr(''), form.tr(''), form.Table('G%s Similarities' % (l), use_id=True, border="1", cellspacing="0", cellpadding="3", v_category=range(len(matches)), h_category=category, pairs = matches), form.tr(''), form.Table('Cov', use_id=True, border="1", cellspacing="0", cellpadding="3", h_category=range(int(k)), pairs = print_cov), form.tr(''), ] j = 0 for i in cov: r = sorted(zip(i, range(int(k))), key=lambda x: -x[0]) pairs = map(lambda x: format_float % (x[0]), r) cate = map(lambda x: str((x[1], format_float % fractions[x[1]])), r) components.append(form.Table('cov%d' % (j), border="1", cellspacing="0", cellpadding="3", h_category=cate, v_category=[j], pairs=[pairs], use_id=True)) j += 1 components.append(form.tr('')) variables = 'var link = "/van_graph?prefix1=%s&prefix2=%s&k=%s&l=%s";\n' % (prefix1, prefix2, k, l) js = form.js('script', variables + 'function create_graph(id) { window.open(link + "&cell=" + id + "@scluster@scluster");}\n') return apply(form.Form, components, {'js':js})()
def getMergeComparison(prefix): format_float = '%.5f' from runcluster import filterWithI results = filterWithI(prefix, k, 0, 'abstract.matrix') f = open('data/%s/data.out.clustering.%s.output' % (prefix, k), 'r') g = open('data/%s/data.abstract.matrix.clustering.%s.output' % (prefix, k), 'r') s_stats = f.read().split('\n') v_stats = g.read().split('\n') (s_matches, category) = getMatches(s_stats) (v_matches, _) = getMatches(v_stats) f.close() g.close() f = open('data/%s/data.out.clustering.%s' % (prefix, k), 'r') g = open('data/%s/data.abstract.matrix.clustering.%s' % (prefix, k), 'r') clusters1 = f.read().split() clusters2 = g.read().split() total1 = [0] * (int(k)) total2 = [0] * (int(k)) for i in clusters1: total1[int(i)] += 1 for i in clusters2: total2[int(i)] += 1 total = results['total'] cov = [[0] * (int(k)) for i in range(int(k))] for i, j in zip(clusters1, clusters2): cov[int(i)][int(j)] += 1 total_nodes = sum(total) cov1 = [] for i in cov: s = float(sum(i)) lst = [] for j in range(len(i)): lst.append(i[j] / s) i[j] /= s i[j] -= (total[j]/float(total_nodes)) cov1.append(lst) fractions = [j/float(total_nodes) for j in total] print 'total', total g0_similarities = map(lambda x: [x], total) map(lambda x: x[0].extend(x[1]), zip(g0_similarities, results['g0-similarities'])) tmp = [(i, g0_similarities[i]) for i in range(len(g0_similarities))] tmp = sorted(tmp, key=lambda x: -x[1][0]) g0_similarities = map(lambda x: x[1], tmp) g0_v_category = map(lambda x: x[0], tmp) for i in g0_similarities: for j in range(2, len(i)): sign = '-' if i[j] > 0: sign = '+' i[j] = sign + format_float % i[j] g0_category = list(category) g0_category.pop(3) g0_category.pop(4) g0_category.pop(0) print_cov = [] for i in cov: lst = [] for j in i: lst.append(format_float % j) print_cov.append(lst) components = [#form.Table('Ref stats', use_id=True, border="1", # cellspacing="0", cellpadding="3", # h_category=range(int(k)), # v_category=['in-stats', 'out-stats', 'total-nodes'], # pairs=[results['in-stats'], results['out-stats'], # results['total']]), #form.tr(''), #form.tr(''), form.Table('SCluster Similarities', use_id=True, border="1", cellspacing="0", cellpadding="3", h_category=category, v_category=range(len(s_matches)), pairs=s_matches), form.tr(''), form.tr(''), form.Table('VCluster Similarities', use_id=True, border="1", cellspacing="0", cellpadding="3", v_category=range(len(v_matches)), h_category=category, pairs = v_matches), form.tr(''), form.tr(''), form.Table('G0 Similarities', use_id=True, border="1", cellspacing="0", cellpadding="3", h_category=g0_category, v_category=g0_v_category, cross='cid', pairs=g0_similarities), form.tr(''), form.tr(''), form.Table('Total', use_id=True, border="1", cellspacing="0", cellpadding="3", h_category=range(0, len(total)), pairs=[total]), form.tr(''), form.Table('Total1', use_id=True, border="1", cellspacing="0", cellpadding="3", h_category=range(0, len(total1)), pairs=[total1]), form.tr(''), form.Table('Total2', use_id=True, border="1", cellspacing="0", cellpadding="3", h_category=range(0, len(total2)), pairs=[total2]), form.tr(''), form.Table('fractions', use_id=True, border="1", cellspacing="0", cellpadding="3", h_category=range(0, len(fractions)), pairs=[fractions]), form.tr(''), form.Table('Cov', use_id=True, border="1", cellspacing="0", cellpadding="3", h_category=range(0, int(k)), pairs = print_cov), form.tr(''), #form.Table('Cov', use_id=True, border="1", # cellspacing="0", cellpadding="3", # h_category=range(0, int(k)), pairs = cov1), #form.tr(''), ] j = 0 for i in cov: r = sorted(zip(i, range(0, int(k))), key=lambda x: -x[0]) pairs = map(lambda x: format_float % (x[0]), r) cate = map(lambda x: str((x[1], format_float % fractions[x[1]])), r) components.append(form.Table('cov%d' % (j), border="1", cellspacing="0", cellpadding="3", h_category=cate, v_category=[j], pairs=[pairs], use_id=True)) j += 1 components.append(form.tr('')) variables = 'var link = "/van_graph?prefix1=%s&prefix2=%s&k=%s&l=0";\n' % (prefix, prefix, k) js = form.js('script', variables + 'function create_graph(id) { window.open(link + "&cell=" + id + "@scluster@vcluster_tfidf");}\n') return apply(form.Form, components, {'js':js})()