Exemplo n.º 1
0
from read_hierarchy import *
from hierarchy_common import all_roots

f = open("data/train_small.csv", 'r')
g = open("data/roottable_entries.txt", 'w')

#Skip the head line
f.readline()

for line in f:
	labels = line.split(',')
	labels = map(str.strip, labels)
	feature = labels[-1].split(' ')
	labels[-1] = feature[0]
	labels = map(int, labels)
	if len(labels) <= 10:
		roots = set.union(*map(lambda label: all_roots(label, ancestor_tree), labels))
		g.write(' '.join(map(str, list(roots)))+'\n')

f.close()
Exemplo n.º 2
0
from read_hierarchy import *
from hierarchy_common import all_roots

f = open("data/train_small.csv", 'r')
g = open("data/roottable_entries.txt", 'w')

#Skip the head line
f.readline()

for line in f:
    labels = line.split(',')
    labels = list(map(str.strip, labels))
    feature = labels[-1].split(' ')
    labels[-1] = feature[0]
    labels = list(map(int, labels))
    if len(labels) <= 10:
        roots = set.union(
            *[all_roots(label, ancestor_tree) for label in labels])
        g.write(' '.join(map(str, list(roots))) + '\n')

f.close()
Exemplo n.º 3
0
        descendant_tree[parent] = set([child])
    try:
        ancestor_tree[child].add(parent)
    except:
        ancestor_tree[child] = set([parent])

f.close()

### ------------------------------------------------------------------------------------------------------

f = open("data/label_popularity.txt", 'r')

root_popularity = {}
for line in f:
    (label, count) = line.split(' ')
    node = all_roots(int(label), ancestor_tree).pop()
    try:
        root_popularity[node] += int(count)
    except:
        root_popularity[node] = int(count)

f.close()

#root_popularity = sorted(root_popularity.iteritems(), key=itemgetter(1), reverse=True)
#print(root_popularity[:10])

### ------------------------------------------------------------------------------------------------------

f = open("data/popular_rootgroups.txt", 'r')

root_groups = []
Exemplo n.º 4
0
	(parent, child) = list(map(int, line.split(' ')))
	try: descendant_tree[parent].add(child)
	except: descendant_tree[parent] = set([child])
	try: ancestor_tree[child].add(parent)
	except: ancestor_tree[child] = set([parent])

f.close()

### ------------------------------------------------------------------------------------------------------

f = open("data/label_popularity.txt", 'r')

root_popularity = {}
for line in f:
	(label, count) = line.split(' ')
	node = all_roots(int(label), ancestor_tree).pop()
	try: root_popularity[node] += int(count)
	except: root_popularity[node] = int(count)

f.close()

#root_popularity = sorted(root_popularity.iteritems(), key=itemgetter(1), reverse=True)
#print(root_popularity[:10])

### ------------------------------------------------------------------------------------------------------

f = open("data/popular_rootgroups.txt", 'r')

root_groups = []
for line in f:
	roots = line.split(' ')
Exemplo n.º 5
0
    # "-1" makes smaller support less significant
    label_groups.append((labels, significance))
    #label_groups.append((labels, support))

f.close()

### ------------------------------------------------------------------------------------------------------

label_groups.sort(key=itemgetter(1), reverse=True)

print(label_groups[:10])

i = 1
for (roots, support) in label_groups[:50000]:
    real_roots = set.union(
        *map(lambda node: all_roots(node, ancestor_tree), roots))
    if len(real_roots) == 1: continue
    leaves_count = sum(
        map(lambda node: len(all_leaves(node, descendant_tree)), real_roots))
    if leaves_count < 20000:
        added_roots = filter(lambda x: x > 4000000, real_roots)
        original_roots = filter(lambda x: x < 4000000, real_roots)
        if len(added_roots) == 1:
            descendant_tree[added_roots[0]].update(set(original_roots))
            for node in original_roots:
                ancestor_tree[node] = set([added_roots[0]])
        else:
            descendant_tree[4000000 + i] = real_roots
            for node in real_roots:
                ancestor_tree[node] = set([4000000 + i])
            i += 1
Exemplo n.º 6
0
        [label_popularity[node] for node in labels])
    # "-1" makes smaller support less significant
    label_groups.append((labels, significance))
    #label_groups.append((labels, support))

f.close()

### ------------------------------------------------------------------------------------------------------

label_groups.sort(key=itemgetter(1), reverse=True)

print((label_groups[:10]))

i = 1
for (roots, support) in label_groups[:50000]:
    real_roots = set.union(*[all_roots(node, ancestor_tree) for node in roots])
    if len(real_roots) == 1: continue
    leaves_count = sum(
        [len(all_leaves(node, descendant_tree)) for node in real_roots])
    if leaves_count < 20000:
        added_roots = [x for x in real_roots if x > 4000000]
        original_roots = [x for x in real_roots if x < 4000000]
        if len(added_roots) == 1:
            descendant_tree[added_roots[0]].update(set(original_roots))
            for node in original_roots:
                ancestor_tree[node] = set([added_roots[0]])
        else:
            descendant_tree[4000000 + i] = real_roots
            for node in real_roots:
                ancestor_tree[node] = set([4000000 + i])
            i += 1