Exemplo n.º 1
0
def net_based_merge(n1, n2, distance, gamma):
    # calculates net over point nets and use it as keys of the merged network (similar/same? to the key-based)

    nn = create_aggregating_net(gamma=gamma,
                                apns=list(n1.keys())+list(n2.keys()),
                                distance=distance)
    targ = defaultdict(list)

    for k, v in nn.items():
        targ[k] = n1.get(k, []).copy()
        targ[k] += n2.get(k, []).copy()
        for el in v:
            targ[k] += n1.get(el, []).copy()
            targ[k] += n2.get(el, []).copy()
            targ[k].append(el)

    return targ
Exemplo n.º 2
0
def net_based_multi_merge(nets, distance, gamma):
    apns = []
    for net in nets:
        apns += list(net.keys())

    nn = create_aggregating_net(gamma=gamma,
                                apns=apns,
                                distance=distance)
    targ = defaultdict(list)

    for k, v in nn.items():
        for net in nets:
            targ[k] += net.get(k, []).copy()

        for el in v:
            for net in nets:
                targ[k] += net.get(el, []).copy()
            targ[k].append(el)

    return targ
Exemplo n.º 3
0
def merge_voting_nets(nets, distance, gamma):
    apns = []
    for net in nets:
        apns += list(net.keys())

    nn = create_aggregating_net(gamma=gamma,
                                apns=apns,
                                distance=distance)
    
    # transfer the "votes" from original networks to just created new anchors
    targ = dict()
    for k, v in nn.items():
        for net in nets:
            if k in net:
                targ[k] = net.get(k)
                break

        for el in v:
            for net in nets:
                if el in net:
                    targ[k] = list(np.add(targ[k], net[el]))
                    break

    return targ
Exemplo n.º 4
0
def single_net(x, gamma):
    start = time.time()
    net = create_aggregating_net(gamma=gamma, apns=x.keys(), distance=lambda x1, y1: distance(x1, y1, x))
    end = time.time()
    print(f"Creating network...{gamma=} {len(x)} Elapsed: {end-start}")
    return dict(net)
Exemplo n.º 5
0
    d = lambda x, y: distance(x, y, funcs)

    sample_size = 200
    smp = mysample(v, sample_size)
    funcs = smp.groupby(by='apn')['nf'].apply(set)

    #print('reading test ste')
    #tests = pd.read_csv('res2/9500-test.csv', index_col=0)
    test_size = 20
    train, test = train_test_split(funcs, test_size=test_size, random_state=42)
    test.to_csv(f"res/test-{test_size}.csv")


    #for gamma in tqdm([0, 1, 2, 4, 8, 16, 32, 180, 192]):
    #for gamma in tqdm([0, 0.1, 0.4, 0.5, 0.7, 0.8, 0.85, 0.9, 1.0]):
    intervals = 18
    for gamma in tqdm([x * 1/intervals for x in range(0, intervals+1)]):
        
        merged = generate_merged(gamma=gamma, funcs=funcs)
        #onsm, nets = generate_merged(gamma=gamma, distance=distance, labels=labels)
        print("Creating reference netwrok")
        start = time.time()
        reference = create_aggregating_net(gamma=gamma, apns=train.index, distance=d)
        end = time.time()
        print(f"\tElapsed: {end-start}")

        
        with open(f"res/mergers-jaccard-{gamma}.pickle", 'wb+') as f:
            pickle.dump([dict(reference), dict(merged)], f)