Exemplo n.º 1
0
    return conver_to_probs(votes)

if __name__=="__main__":
    #net_file = 'res/9503-jaccard-votingnets.pickle'
    #test_file ='res/10003-test.csv'
    net_file = 'res/9003-tc-jaccard-votingnets.pickle'
    test_file = 'res/test-tc-1000.npy'

    #test = pd.read_csv(test_file, index_col=0)
    test = np.load(test_file)
    with open(net_file, 'rb') as f:
        nets = pickle.load(f)
        
    m = get_data()

    distance = lambda x,y: jaccard(x,y, m['nf'])
    classifier = lambda x: lcl(x, m['ml'])


    refs = dict()
    mers = dict()
    #test_a = list(test.index)
    test_a  = list(test)
    for gamma, [mer, ref] in tqdm(nets.items()):
        mers[gamma] = get_predictions_score(net=mer, distance=distance, test=test_a)
        refs[gamma] = get_predictions_score(net=ref, distance=distance, test=test_a)
        with open(f"/tmp/preds-{gamma}-mers.pickle", 'wb+') as f:
            pickle.dump(mers[gamma], f)
    
        with open(f"/tmp/preds-{gamma}-refs.pickle", 'wb+') as f:
            pickle.dump(refs[gamma], f)
Exemplo n.º 2
0
def distance(x, y, arg):
    return jaccard(x, y, arg)
Exemplo n.º 3
0
def mjaccard(x, y, nafs):
    if len(nafs[x]) == 0 == len(nafs[y]):
        return 1
    return jaccard(x, y, nafs)
Exemplo n.º 4
0
        
        sizes = {g:len(n.keys()) for g, n in nets.items()}
        perfs_di = pd.DataFrame.from_dict(res, orient='index', columns=['fp', 'fn'])
        szs_di = pd.DataFrame.from_dict(sizes, orient='index', columns=['sizes'])
        gj_euc = perfs_di.join(szs_di)
        part_size = len(train)
        gj_euc['compression'] = gj_euc.sizes/part_size
        gj_euc.to_csv(f"res/euc{part_size}.csv")

if __name__ == "__main__":
    print("Setting up data...")
    afs, lbs = setup_data()
    num_parts = 2
    part_size = 500
    parts = get_part_indexes(afs, num_parts, part_size)
    test_size = 100
    test_set = parts[1][:test_size]
    train_set = parts[0]
    
    print(f"Part size: {part_size} and {test_size=}")
    print(f"Jaccard experiments")
    distance = lambda x,y: jaccard(x, y, afs)
    classifier=lambda x: lcl(x, lbs)
    run_jaccard(train=train_set, test=test_set, distance=distance, classifier=classifier)

    print("Euclid experiments")
    distance = lambda x,y: adf(x, y, afs)
    run_euclid(train=train_set, test=test_set, distance=distance, classifier=classifier)