Пример #1
0
    def test_device_boruvka_usa_cal(self):

        sp_mat = load_sparse_csr(path_usa_cal)
        dest = sp_mat.indices
        weight = sp_mat.data
        firstedge = sp_mat.indptr[:-1]
        outdegree = np.empty_like(firstedge)
        outdegree_from_firstedge(firstedge, outdegree, dest.size)

        n_edges = dest.size
        n_vertices = firstedge.size

        t1 = Timer()
        t1.tic()
        mst, n_mst = boruvka_minho_gpu(dest, weight, firstedge, outdegree)
        t1.tac()

        if n_mst < mst.size:
            mst = mst[:n_mst]

        # get MST from scipy library
        graph_csr = load_sparse_csr(path_usa_cal)
        scipy_mst = minimum_spanning_tree(graph_csr)
        true_mst_size = scipy_mst.size

        assert_msg = 'MST number of edges mismatch'
        self.assertEqual(n_mst, true_mst_size, assert_msg)

        assert_msg = 'MST total weight mismatch'
        self.assertEqual(weight[mst].sum(), scipy_mst.sum(), assert_msg)
Пример #2
0
    def test_seq_gpu(self):
        print "HOST VS DEVICE"

        same_sol = list()
        same_cost = list()

        for r in range(20):
            sp_mat = load_sparse_csr(path_4elt)
            dest = sp_mat.indices
            weight = sp_mat.data
            firstedge = sp_mat.indptr[:-1]  # last element is the total number
            outdegree = np.empty_like(firstedge)
            outdegree_from_firstedge(firstedge, outdegree, dest.size)

            n_edges = dest.size
            n_vertices = firstedge.size

            t1, t2 = Timer(), Timer()

            t1.tic()
            mst1, n_edges1 = boruvka_minho_seq(dest, weight,
                                               firstedge, outdegree)
            t1.tac()

            if n_edges1 < mst1.size:
                mst1 = mst1[:n_edges1]
            mst1.sort()

            assert_msg = '4elt dataset MST not fully connected in sequential'
            self.assertEqual(mst1.size, n_vertices-1, assert_msg)

            t2.tic()
            mst2, n_edges2 = boruvka_minho_gpu(dest, weight, firstedge,
                                               outdegree, MAX_TPB=256)
            t2.tac()

            if n_edges2 < mst2.size:
                mst2 = mst2[:n_edges2]
            mst2.sort()

            assert_msg = '4elt dataset MST not fully connected in gpu'
            self.assertEqual(mst2.size, n_vertices-1, assert_msg)

            # how many edges are common to both solutions
            # same_sol.append(np.in1d(mst1, mst2).sum())

            # check MST cost
            cost1 = weight[mst1].sum()
            cost2 = weight[mst2].sum()
            self.assertEqual(cost1, cost2, 'MSTs have diferent costs')
Пример #3
0
def mst_cal():
    sp_cal = load_csr_graph(home + "QCThesis/datasets/graphs/USA-road-d.CAL.csr")
    dest, weight, firstEdge, outDegree = get_boruvka_format(sp_cal)
    del sp_cal

    print "# edges:            ", dest.size
    print "# vertices:         ", firstEdge.size
    print "size of graph (MB): ", (dest.size + weight.size + firstEdge.size + outDegree.size) * 4.0 / 1024 / 1024

    times_cpu = list()
    times_gpu = list()
    equal_mst = list()
    equal_cost = list()
    t1, t2 = Timer(), Timer()

    for r in range(10):
        print "cpu round ", r
        t1.tic()
        mst1, n_edges1 = boruvka_minho_seq(dest, weight, firstEdge, outDegree)
        t1.tac()

        print "finished in ", t1.elapsed

        if n_edges1 < mst1.size:
            mst1 = mst1[:n_edges1]

        print "gpu round ", r

        t2.tic()
        mst2, n_edges2 = boruvka_minho_gpu(dest, weight, firstEdge, outDegree, MAX_TPB=512)
        t2.tac()

        print "finished in ", t2.elapsed
        print ""

        if n_edges2 < mst2.size:
            mst2 = mst2[:n_edges2]

        equal_mst.append(np.in1d(mst1,mst2).all())
        equal_cost.append(weight[mst1].sum() == weight[mst2].sum())

        if r > 0:
            times_cpu.append(t1.elapsed)
            times_gpu.append(t2.elapsed)

    print equal_mst
    print equal_cost
    print "average time cpu: ", np.mean(times_cpu)
    print "average time gpu: ", np.mean(times_gpu)
Пример #4
0
def device_boruvka():

    print "CUDA BORUVKA"

    dest, weight, firstEdge, outDegree = load_graph("4elt")

    t1 = Timer()
    t1.tic()
    mst, n_edges = boruvka_minho_gpu(dest, weight, firstEdge, outDegree)
    t1.tac()

    if n_edges < mst.size:
        mst = mst[:n_edges]    

    print "time elapsed: ", t1.elapsed
    mst.sort()
    print mst
    print n_edges
Пример #5
0
def host_vs_device():
    print "HOST VS DEVICE"

    same_sol = list()
    same_cost = list()

    for r in range(20):
        dest, weight, firstEdge, outDegree = load_graph("4elt")

        t1, t2 = Timer(), Timer()

        t1.tic()
        mst1, n_edges1 = boruvka_minho_seq(dest, weight, firstEdge, outDegree)
        t1.tac()

        if n_edges1 < mst1.size:
            mst1 = mst1[:n_edges1]
        mst1.sort()

        t2.tic()
        mst2, n_edges2 = boruvka_minho_gpu(dest, weight, firstEdge, outDegree, MAX_TPB=256)
        t2.tac()

        if n_edges2 < mst2.size:
            mst2 = mst2[:n_edges2]
        mst2.sort()

        same_sol.append(np.in1d(mst1,mst2).sum())
        same_cost.append(weight[mst1].sum() == weight[mst2].sum())
        #same_sol.append((mst1==mst2).all())

    print "no. edges: ", weight.size
    print "no. nodes: ", firstEdge.size

    print "Same solution: ", same_sol
    print "Same cost:", np.all(same_cost)

    print "Solution CPU cost: ", weight[mst1].sum()
    print "Solution GPU cost: ", weight[mst2].sum()

    print "Host time elapsed:   ", t1.elapsed
    print "Device time elapsed: ", t2.elapsed
Пример #6
0
def analyze_graph_from_h5(filename, verbose=False):

    def v_print(vstr):
        if verbose:
            print vstr

    csr_mat = load_h5_to_csr(filename)
    dest, weight, firstEdge, outDegree = get_boruvka_format(csr_mat)
    del csr_mat



    n_e = dest.size
    n_v = firstEdge.size
    mem = (dest.size*dest.itemsize + weight.size*weight.itemsize + firstEdge.size*firstEdge.itemsize + outDegree.size*outDegree.itemsize)/ (1024.0**2)
    print "# edges:            ", n_e
    print "# vertices:         ", n_v
    print "size of graph (MB): ", mem

    times_cpu = list()
    times_gpu = list()
    equal_mst = list()
    equal_cost = list()
    mst_costs = {'cpu':list(), 'gpu':list()}
    t1, t2 = Timer(), Timer()

    for r in range(10):
        v_print('------ Round {} -------'.format(r))
        t1.reset()
        t1.tic()
        mst1, n_edges1 = boruvka_minho_seq(dest, weight, firstEdge, outDegree)
        t1.tac()
        v_print('CPU finished in {} s'.format(t1.elapsed))

        if n_edges1 < mst1.size:
            mst1 = mst1[:n_edges1]

        t2.reset()
        t2.tic()
        mst2, n_edges2 = boruvka_minho_gpu(dest, weight, firstEdge, outDegree, MAX_TPB=512)
        t2.tac()
        v_print('GPU finished in {} s'.format(t2.elapsed))


        if n_edges2 < mst2.size:
            mst2 = mst2[:n_edges2]


        mst_costs['cpu'].append(weight[mst1].sum())
        mst_costs['gpu'].append(weight[mst2].sum())
        equal_mst.append(np.in1d(mst1,mst2).all())
        equal_cost.append(weight[mst1].sum() == weight[mst2].sum())

        if r > 0:
            times_cpu.append(t1.elapsed)
            times_gpu.append(t2.elapsed)



    max_cost = max((max(mst_costs['cpu']), max(mst_costs['gpu'])))
    cost_error = map(lambda x: abs(x[0]-x[1]), zip(*mst_costs.values()))
    cost_error = map(lambda x: x/max_cost, cost_error)
    error_threshold = 1e-5

    cpu_str = ''
    for t in times_cpu:
        cpu_str += str(t) + ','

    gpu_str = ''
    for t in times_gpu:
        gpu_str += str(t) + ','

    cpu_costs = ''
    for c in mst_costs['cpu']:
        cpu_costs += str(c) + ','
    gpu_costs = ''
    for c in mst_costs['gpu']:
        gpu_costs += str(c) + ','    

    print 'dataset: {}'.format(os.path.basename(filename))
    print 'CPU times,{},{},{},{}'.format(n_e,n_v,mem,cpu_str[:-1])
    print 'GPU times,{},{},{},{}'.format(n_e,n_v,mem,gpu_str[:-1])
    print 'CPU costs,{},{},{},{}'.format(n_e,n_v,mem,cpu_costs[:-1])
    print 'GPU costs,{},{},{},{}'.format(n_e,n_v,mem,gpu_costs[:-1])    
    print ''
    print 'All equal MSTs: {}'.format(np.all(np.array(equal_mst) == equal_mst[0]))
    print 'All equal costs: {}'.format(np.all(equal_cost))
    print 'All cost errors <= {}: {}'.format(error_threshold, np.all(map(lambda x:x<error_threshold, cost_error)))
    print 'Max normalized error: {}'.format(max(cost_error))

    speedup = np.array(times_cpu) / np.array(times_gpu)

    print 'Times(s)\tMean\tStd\tMax\tMin'
    print 'CPU     \t{:.5F}\t{:.5F}\t{:.5F}\t{:.5F}'.format(np.mean(times_cpu), np.std(times_cpu), np.max(times_cpu), np.min(times_cpu))
    print 'GPU     \t{:.5F}\t{:.5F}\t{:.5F}\t{:.5F}'.format(np.mean(times_gpu), np.std(times_gpu), np.max(times_gpu), np.min(times_gpu))
    print 'SpeedUp \t{:.5F}\t{:.5F}\t{:.5F}\t{:.5F}'.format(np.mean(speedup), np.std(speedup), np.max(speedup), np.min(speedup))
    print 'Error   \t{:.5F}\t{:.5F}\t{:.5F}\t{:.5F}'.format(np.mean(cost_error), np.std(cost_error), np.max(cost_error), np.min(cost_error))
Пример #7
0
def mst_cluster_coassoc():
    t1,t2 = Timer(), Timer()

    #foldername = "/home/courses/aac2015/diogoaos/QCThesis/datasets/gaussmix1e4/"
    foldername = home + "QCThesis/datasets/gaussmix1e4/"

    print "Loading datasets"

    t1.tic()
    # dest = np.genfromtxt(foldername + "prot_dest.csr", dtype = np.int32, delimiter=",")
    # weight = np.genfromtxt(foldername + "prot_weight.csr", dtype = np.float32, delimiter=",")
    # fe = np.genfromtxt(foldername + "prot_fe.csr", dtype = np.int32, delimiter=",")

    dest = np.genfromtxt(foldername + "full_dest.csr", dtype = np.int32, delimiter=",")
    weight = np.genfromtxt(foldername + "full_weight.csr", dtype = np.float32, delimiter=",")
    fe = np.genfromtxt(foldername + "full_fe.csr", dtype = np.int32, delimiter=",")
    t1.tac()

    print "loading elapsed time : ", t1.elapsed

    fe = fe[:-1]
    od = np.empty_like(fe)
    outdegree_from_firstedge(fe, od, dest.size)

    # fix weights to dissimilarity
    weight = 100 - weight

    print "# edges : ", dest.size
    print "# vertices : ", fe.size
    print "edges/vertices ratio : ", dest.size * 1.0 / fe.size

    t1.tic()
    mst, n_edges = boruvka_minho_seq(dest, weight, fe, od)
    t1.tac()

    print "seq: time elapsed : ", t1.elapsed
    print "seq: mst size :", mst.size
    print "seq: n_edges : ", n_edges

    if n_edges < mst.size:
        mst = mst[:n_edges]
    mst.sort()

    ev1,ev2 = cuda.event(), cuda.event()

    ev1.record()
    d_dest = cuda.to_device(dest)
    d_weight = cuda.to_device(weight)
    d_fe = cuda.to_device(fe)
    d_od = cuda.to_device(od)
    ev2.record()

    send_graph_time = cuda.event_elapsed_time(ev1,ev2)

    t2.tic()
    mst2, n_edges2 = boruvka_minho_gpu(d_dest, d_weight, d_fe, d_od, MAX_TPB=512, returnDevAry = True)
    t2.tac()

    ev1.record()
    mst2 = mst2.copy_to_host()
    n_edges2 = n_edges2.getitem(0)
    ev2.record()

    recv_mst_time = cuda.event_elapsed_time(ev1,ev2)
    print "gpu: send graph time : ", send_graph_time
    print "gpu: time elapsed : ", t2.elapsed    
    print "gpu: rcv mst time : ", recv_mst_time
    print "gpu: mst size :", mst2.size  
    print "seq: n_edges : ", n_edges2

    if n_edges2 < mst2.size:
        mst2 = mst2[:n_edges2]
    mst2.sort()

    if n_edges == n_edges2:
        mst_is_equal = (mst == mst2).all()
    else:
        mst_is_equal = False
    print "mst gpu == seq : ", mst_is_equal
Пример #8
0
def check_colors():

    print "CHECK COLORS SEQ & CUDA"

    #dest, weight, firstEdge, outDegree = load_graph("4elt")

    sp_cal = load_csr_graph(home + "QCThesis/datasets/graphs/USA-road-d.CAL.csr")
    dest, weight, firstEdge, outDegree = get_boruvka_format(sp_cal)
    del sp_cal

    print "# edges:            ", dest.size
    print "# vertices:         ", firstEdge.size
    print "size of graph (MB): ", (dest.size + weight.size + firstEdge.size + outDegree.size) * 4.0 / 1024 / 1024    

    print "# vertices: ", firstEdge.size
    print "# edges:    ", dest.size

    print "seq: Computing MST"

    t1 = Timer()
    t1.tic()
    mst, n_edges = boruvka_minho_seq(dest, weight, firstEdge, outDegree)
    t1.tac()

    print "seq: time elapsed: ", t1.elapsed
    print "seq: mst size :", mst.size
    print "seq: n_edges: ", n_edges


    if n_edges < mst.size:
        mst = mst[:n_edges]
    mst.sort()

    print "gpu: Computing MST"

    t1.tic()
    mst2, n_edges2 = boruvka_minho_gpu(dest, weight, firstEdge, outDegree, MAX_TPB=256)
    t1.tac()

    print "gpu: time elapsed: ", t1.elapsed
    print "gpu: mst size :", mst2.size  
    print "seq: n_edges: ", n_edges2

    if n_edges2 < mst2.size:
        mst2 = mst2[:n_edges2]
    mst2.sort()


    print "mst gpu == seq: ", (mst == mst2).all()

    # make two cuts
    mst = mst[:-2]

    print "seq: Generating MST graph"
    nod = np.zeros(outDegree.size, dtype = outDegree.dtype)
    nfe = np.empty(firstEdge.size, dtype = firstEdge.dtype)
    ndest = np.empty(mst.size * 2, dtype = dest.dtype)
    nweight = np.empty(mst.size * 2, dtype = weight.dtype)

    t1.tic()
    get_new_graph(dest, weight, firstEdge, outDegree, mst, nod, nfe, ndest, nweight)
    t1.tac()
     
    print "seq: time elapsed: ", t1.elapsed

    print "seq: Computing labels"
    t1.tic()
    colors = getLabels_seq(ndest, nweight, nfe, nod)
    t1.tac()

    print "seq: time elapsed: ", t1.elapsed
    print "seq: # colors:     ", np.unique(colors).size

    print "gpu: Computing labels"
    t1.tic()
    colors2 = getLabels_gpu(ndest, nweight, nfe, nod, MAX_TPB=256)
    t1.tac()

    print "gpu: time elapsed: ", t1.elapsed
    print "gpu: # colors:     ", np.unique(colors2).size

    print "colors gpu == seq: ", (colors == colors2).all()
Пример #9
0
def sl_mst_lifetime_gpu(dest,
                        weight,
                        fe,
                        od,
                        disconnect_weight=None,
                        MAX_TPB=256,
                        stream=None):
    """
    Input are device arrays.
    Inputs:
     dest, weight, fe 		: device arrays
     disconnect_weight 		: weight between unconnected vertices
     mst 					: list of edges in MST
     MAX_TPB 				: number of threads per block
     stream 				: CUDA stream to use
    TODO:
     - argmax is from cuBlas and only works with 32/64 floats. Make this work 
       with any type.
     - 
    """

    if disconnect_weight is None:
        disconnect_weight = weight.max()

    if stream is None:
        myStream = cuda.stream()
    else:
        myStream = stream

    mst, n_edges = boruvka_minho_gpu(dest,
                                     weight,
                                     fe,
                                     od,
                                     MAX_TPB=MAX_TPB,
                                     stream=myStream,
                                     returnDevAry=True)

    # Allocate array for the mst weights.
    h_n_edges = int(n_edges.getitem(0,
                                    stream=myStream))  # edges to keep in MST
    mst_weights = cuda.device_array(h_n_edges, dtype=weight.dtype)

    # Get array with only the considered weights in the MST
    # and remove those edges in the MST edge list
    mstGrid = compute_cuda_grid_dim(h_n_edges, MAX_TPB)
    d_weight = cuda.to_device(weight, stream=myStream)
    getWeightsOfEdges_gpu[mstGrid, MAX_TPB, myStream](mst, n_edges, d_weight,
                                                      mst_weights)

    # Sort the MST weights. There are no repeated edges at this
    # point since the output MST is like a directed graph.
    sorter = RadixSort(maxcount=mst_weights.size,
                       dtype=mst_weights.dtype,
                       stream=myStream)
    sortedWeightArgs = sorter.argsort(mst_weights)

    # Allocate array for the lifetimes.
    lifetimes = cuda.device_array(mst_weights.size - 1,
                                  dtype=mst_weights.dtype)
    compute_lifetimes_CUDA[mstGrid, MAX_TPB, myStream](mst_weights, lifetimes)

    maxer = Blas(stream)
    arg_max_lt = maxer.amax(lifetimes)
    max_lt = lifetimes.getitem(arg_max_lt)

    # this is the lifetime between edges with no connection and the weakest link
    #lt_threshold = disconnect_weight - max_lt
    lt_threshold = disconnect_weight - mst_weights.getitem(mst_weights.size -
                                                           1)

    # if the maximum lifetime is higher or equal than the lifetime threshold
    # cut the tree
    if max_lt >= lt_threshold:
        # from arg_max_lt onward all edges are discarded
        n_discarded = lifetimes.size - arg_max_lt + 1

        # remove edges
        removeGrid = compute_cuda_grid_dim(n_discarded, MAX_TPB)
        removeEdges[removeGrid, MAX_TPB](edgeList, sortedArgs, n_discarded)

        # compute new amount of edges and update it
        new_n_edges = h_n_edges - n_discarded
        cuda.to_device(np.array([new_n_edges], dtype=n_edges.dtype),
                       to=n_edges,
                       stream=myStream)

    ngraph = getGraphFromEdges_gpu(dest,
                                   weight,
                                   fe,
                                   od,
                                   edges=mst,
                                   n_edges=n_edges,
                                   MAX_TPB=MAX_TPB,
                                   stream=myStream)

    ndest, nweight, nfe, nod = ngraph

    labels = connected_comps_gpu(ndest,
                                 nweight,
                                 nfe,
                                 nod,
                                 MAX_TPB=512,
                                 stream=myStream)

    del ndest, nweight, nfe, nod, lifetimes

    return labels