Exemplos de calc_cpu em Python, exemplos de cpu_kmeans.calc_cpu em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: cuda_kmeans_tri.py Projeto: dbelll/bell_d_project

def run_tests1(nTests, nPts, nDim, nClusters, nReps=1, verbose = VERBOSE, 
                print_times = PRINT_TIMES):
    # run_tests(nTests, nPts, nDim, nClusters, nReps [, verbose [, print_times]]
    # Runs one repition and checks various intermdiate values against a cpu calculation
    
    if nReps > 1:
        print "This method only runs test for nReps == 1"
        return 1
        
    # Generate nPts random data elements with nDim dimensions and nCluster random clusters,
    # then run kmeans for nReps and compare gpu and cpu results.  This is repeated nTests times
    cpu_time = 0.
    gpu_time = 0.
    
    gpu_data_time = 0.
    gpu_module_time = 0.
    gpu_ccdist_time = 0.
    gpu_hdclosest_time = 0.
    gpu_init_time = 0.
    gpu_step3_time = 0.
    gpu_step4_time = 0.
    gpu_step56_time = 0.

    np.random.seed(SEED)
    data = np.random.rand(nDim, nPts).astype(np.float32)
    clusters = np.random.rand(nDim, nClusters).astype(np.float32)

    if verbose:
        print "data"
        print data
        print "\nclusters"
        print clusters

    nErrors = 0

    # repeat this test nTests times
    for iTest in range(nTests):
    
        #run the gpu algorithm
        t1 = time.time()
        (gpu_ccdist, gpu_hdClosest, gpu_assignments, gpu_lower, gpu_upper, \
            gpu_clusters2, gpu_cluster_movement, \
            data_time, module_time, init_time, ccdist_time, hdclosest_time, \
            step3_time, step4_time, step56_time) = \
            trikmeans_gpu(data, clusters, nReps, 1)
        t2 = time.time()        
        gpu_time += t2-t1
        gpu_data_time += data_time
        gpu_module_time += module_time
        gpu_ccdist_time += ccdist_time
        gpu_hdclosest_time += hdclosest_time
        gpu_init_time += init_time
        gpu_step3_time += step3_time
        gpu_step4_time += step4_time
        gpu_step56_time += step56_time
        
        if verbose:
            print "------------------------ gpu results ------------------------"
            print "cluster-cluster distances"
            print gpu_ccdist
            print "half distance to closest"
            print gpu_hdClosest
            print "gpu time = ", t2-t1
            print "gpu_assignments"
            print gpu_assignments
            print "gpu_lower"
            print gpu_lower
            print "gpu_upper"
            print gpu_upper
            print "gpu_clusters2"
            print gpu_clusters2
            print "-------------------------------------------------------------"
            

        # check ccdist and hdClosest
        ccdist = np.array(gpu_ccdist.get())
        hdClosest = np.array(gpu_hdClosest.get())
        
        t1 = time.time()
        cpu_ccdist = 0.5 * np.sqrt(((clusters[:,:,np.newaxis]-clusters[:,np.newaxis,:])**2).sum(0))
        t2 = time.time()
        cpu_ccdist_time = t2-t1
        
        if verbose:
            print "cpu_ccdist"
            print cpu_ccdist
        
        error = np.abs(cpu_ccdist - ccdist)
        if np.max(error) > 1e-7 * nDim * 2:
            print "iteration", iTest,
            print "***ERROR*** max ccdist error =", np.max(error)
            nErrors += 1
        if verbose:
            print "average ccdist error =", np.mean(error)
            print "max ccdist error     =", np.max(error)
        
        t1 = time.time()
        cpu_ccdist[cpu_ccdist == 0.] = 1e10
        good_hdClosest = np.min(cpu_ccdist, 0)
        t2 = time.time()
        cpu_hdclosest_time = t2-t1
        
        if verbose:
            print "good_hdClosest"
            print good_hdClosest
        err = np.abs(good_hdClosest - hdClosest)
        if np.max(err) > 1e-7 * nDim:
            print "***ERROR*** max hdClosest error =", np.max(err)
            nErrors += 1
        if verbose:
            print "errors on hdClosest"
            print err
            print "max error on hdClosest =", np.max(err)
    
    
        # calculate cpu initial assignments
        t1 = time.time()
        cpu_assign = assign_cpu(data, clusters)
        t2 = time.time()
        cpu_assign_time = t2-t1
        
        if verbose:
            print "assignments shape =", cpu_assign.shape
            print "data shape =", data.shape
            print "cpu assignments"
            print cpu_assign
            print "gpu assignments"
            print gpu_assignments
            print "gpu new clusters"
            print gpu_clusters2
            
        differences = sum(gpu_assignments.get() - cpu_assign)
        if(differences > 0):
            nErrors += 1
            print differences, "errors in initial assignment"
        else:
            if verbose:
                print "initial cluster assignments match"
    
        # calculate the number of data points in each cluster
        c = np.arange(nClusters)
        c_counts = np.sum(cpu_assign.reshape(nPts,1) == c, axis=0)

        # calculate cpu new cluster values:
        t1 = time.time()
        cpu_new_clusters = calc_cpu(data, cpu_assign, clusters)
        t2 = time.time()
        cpu_calc_time = t2-t1
        
        if verbose:
            print "cpu new clusters"
            print cpu_new_clusters
        
        diff = np.max(np.abs(gpu_clusters2 - cpu_new_clusters))
        if diff > 1e-7 * max(c_counts) or math.isnan(diff):
            iDiff = np.arange(nClusters)[((gpu_clusters2 - cpu_new_clusters)**2).sum(0) > 1e-7]
            print "clusters that differ:"
            print iDiff
            nErrors += 1
            if verbose:
                print "Test",iTest,"*** ERROR *** max diff was", diff
                for x in iDiff:
                    print "\ndata for cluster ",x
                    print "gpu:"
                    print gpu_clusters2[:,x]
                    print "cpu:"
                    print cpu_new_clusters[:,x]
                    print "points assigned:"
                    for ii in range(nPts):
                        if cpu_assign[ii] == x:
                            print "data point #",ii
                            print data[:,ii]
        else:
            if verbose:
                print "Test", iTest, "OK"
        
        #check if the cluster movement values are correct
        cpu_cluster_movement = np.sqrt(((clusters - cpu_new_clusters)**2).sum(0))
        diff = np.max(np.abs(cpu_cluster_movement - gpu_cluster_movement.get()))
        if diff > 1e-6 * nDim:
            print "*** ERROR *** max cluster movement error =", diff
            nErrors += 1
        if verbose:
            print "cpu cluster movements"
            print cpu_cluster_movement
            print "gpu cluster movements"
            print gpu_cluster_movement
            print "max diff in cluster movements is", diff
        
        cpu_time = cpu_assign_time + cpu_calc_time
    

    if print_times:
        print "\n---------------------------------------------"
        print "nPts      =", nPts
        print "nDim      =", nDim
        print "nClusters =", nClusters
        print "nReps     =", nReps
        print "average cpu time (ms) =", cpu_time/nTests*1000.
        print "     assign time (ms) =", cpu_assign_time/nTests*1000.
        if nReps == 1:
            print "       calc time (ms) =", cpu_calc_time/nTests*1000.
            print "average gpu time (ms) =", gpu_time/nTests*1000.
        else:
            print "       calc time (ms) ="
            print "average gpu time (ms) ="
        print "       data time (ms) =", gpu_data_time/nTests*1000.
        print "     module time (ms) =", gpu_module_time/nTests*1000.
        print "       init time (ms) =", gpu_init_time/nTests*1000.        
        print "     ccdist time (ms) =", gpu_ccdist_time/nTests*1000.        
        print "  hdclosest time (ms) =", gpu_hdclosest_time/nTests*1000.        
        print "      step3 time (ms) =", gpu_step3_time/nTests*1000.        
        print "      step4 time (ms) =", gpu_step4_time/nTests*1000.        
        print "     step56 time (ms) =", gpu_step56_time/nTests*1000.        
        print "---------------------------------------------"

    return nErrors

Exemplo n.º 2

0

Exibir arquivo

Arquivo: cuda_kmeans.py Projeto: dbelll/kmeans

def run_tests1(nTests, nPts, nDim, nClusters, nReps=1, verbose = VERBOSE, 
                print_times = PRINT_TIMES):
    # run_tests(nTests, nPts, nDim, nClusters, nReps [, verbose [, print_times]]
    # Runs one repition and checks various intermdiate values against a cpu calculation
    
    if nReps > 1:
        print "This method only runs test for nReps == 1"
        return 1
        
    # Generate nPts random data elements with nDim dimensions and nCluster random clusters,
    # then run kmeans for nReps and compare gpu and cpu results.  This is repeated nTests times
    cpu_time = 0.
    gpu_time = 0.
    
    gpu_data_time = 0.
    gpu_module_time = 0.
    gpu_assign_time = 0.
    gpu_calc_time = 0.

    np.random.seed(SEED)
    data = np.random.rand(nDim, nPts).astype(np.float32)
    clusters = np.random.rand(nDim, nClusters).astype(np.float32)

    if verbose:
        print "data"
        print data
        print "\nclusters"
        print clusters

    nErrors = 0

    # repeat this test nTests times
    for iTest in range(nTests):
    
        #run the gpu algorithm
        t1 = time.time()
        (gpu_assignments, gpu_clusters2, \
            data_time, module_time, assign_time, calc_time) = \
            kmeans_gpu(data, clusters, nReps, 1)
        pycuda.autoinit.context.synchronize()
        t2 = time.time()        
        gpu_time += t2-t1
        gpu_data_time += data_time
        gpu_module_time += module_time
        gpu_assign_time += assign_time
        gpu_calc_time += calc_time
        
        if verbose:
            print "------------------------ gpu results ------------------------"
            print "gpu time = ", t2-t1
            print "gpu_assignments"
            print gpu_assignments
            print "gpu_clusters2"
            print gpu_clusters2
            print "-------------------------------------------------------------"
    
    
        # calculate cpu initial assignments
        t1 = time.time()
        cpu_assign = assign_cpu(data, clusters)
        t2 = time.time()
        cpu_assign_time = t2-t1
        
        if verbose:
            print "assignments shape =", cpu_assign.shape
            print "data shape =", data.shape
            print "cpu assignments"
            print cpu_assign
            print "gpu assignments"
            print gpu_assignments
            print "gpu new clusters"
            print gpu_clusters2
            
        differences = sum(gpu_assignments.get() - cpu_assign)
        if(differences > 0):
            nErrors += 1
            print differences, "errors in initial assignment"
        else:
            if verbose:
                print "initial cluster assignments match"
    
        # calculate the number of data points in each cluster
        c = np.arange(nClusters)
        c_counts = np.sum(cpu_assign.reshape(nPts,1) == c, axis=0)

        # calculate cpu new cluster values:
        t1 = time.time()
        cpu_new_clusters = calc_cpu(data, cpu_assign, clusters)
        t2 = time.time()
        cpu_calc_time = t2-t1
        
        if verbose:
            print "cpu new clusters"
            print cpu_new_clusters
        
        diff = np.max(np.abs(gpu_clusters2 - cpu_new_clusters))
        if diff > 1e-7 * max(c_counts) or math.isnan(diff):
            iDiff = np.arange(nClusters)[((gpu_clusters2 - cpu_new_clusters)**2).sum(0) > 1e-7]
            print "clusters that differ:"
            print iDiff
            nErrors += 1
            if verbose:
                print "Test",iTest,"*** ERROR *** max diff was", diff
                for x in iDiff:
                    print "\ndata for cluster ",x
                    print "gpu:"
                    print gpu_clusters2[:,x]
                    print "cpu:"
                    print cpu_new_clusters[:,x]
                    print "points assigned:"
                    for ii in range(nPts):
                        if cpu_assign[ii] == x:
                            print "data point #",ii
                            print data[:,ii]
        else:
            if verbose:
                print "Test", iTest, "OK"
        
        cpu_time = cpu_assign_time + cpu_calc_time
    

    if print_times:
        print "\n---------------------------------------------"
        print "nPts      =", nPts
        print "nDim      =", nDim
        print "nClusters =", nClusters
        print "nReps     =", nReps
        print "average cpu time (ms) =", cpu_time/nTests*1000.
        print "     assign time (ms) =", cpu_assign_time/nTests*1000.
        print "       calc time (ms) =", cpu_calc_time/nTests*1000.
        print "average gpu time (ms) =", gpu_time/nTests*1000.
        print "       data time (ms) =", gpu_data_time/nTests*1000.
        print "     module time (ms) =", gpu_module_time/nTests*1000.
        print "     assign time (ms) =", gpu_assign_time/nTests*1000.        
        print "       calc time (ms) =", gpu_calc_time/nTests*1000.        
        print "---------------------------------------------"

    return nErrors

Exemplo n.º 3

0

Exibir arquivo

def run_tests1(nTests,
               nPts,
               nDim,
               nClusters,
               nReps=1,
               verbose=VERBOSE,
               print_times=PRINT_TIMES):
    # run_tests(nTests, nPts, nDim, nClusters, nReps [, verbose [, print_times]]
    # Runs one repition and checks various intermdiate values against a cpu calculation

    if nReps > 1:
        print "This method only runs test for nReps == 1"
        return 1

    # Generate nPts random data elements with nDim dimensions and nCluster random clusters,
    # then run kmeans for nReps and compare gpu and cpu results.  This is repeated nTests times
    cpu_time = 0.
    gpu_time = 0.

    gpu_data_time = 0.
    gpu_module_time = 0.
    gpu_ccdist_time = 0.
    gpu_hdclosest_time = 0.
    gpu_init_time = 0.
    gpu_step3_time = 0.
    gpu_step4_time = 0.
    gpu_step56_time = 0.

    np.random.seed(SEED)
    data = np.random.rand(nDim, nPts).astype(np.float32)
    clusters = np.random.rand(nDim, nClusters).astype(np.float32)

    if verbose:
        print "data"
        print data
        print "\nclusters"
        print clusters

    nErrors = 0

    # repeat this test nTests times
    for iTest in range(nTests):

        #run the gpu algorithm
        t1 = time.time()
        (gpu_ccdist, gpu_hdClosest, gpu_assignments, gpu_lower, gpu_upper, \
            gpu_clusters2, gpu_cluster_movement, \
            data_time, module_time, init_time, ccdist_time, hdclosest_time, \
            step3_time, step4_time, step56_time) = \
            trikmeans_gpu(data, clusters, nReps, 1)
        t2 = time.time()
        gpu_time += t2 - t1
        gpu_data_time += data_time
        gpu_module_time += module_time
        gpu_ccdist_time += ccdist_time
        gpu_hdclosest_time += hdclosest_time
        gpu_init_time += init_time
        gpu_step3_time += step3_time
        gpu_step4_time += step4_time
        gpu_step56_time += step56_time

        if verbose:
            print "------------------------ gpu results ------------------------"
            print "cluster-cluster distances"
            print gpu_ccdist
            print "half distance to closest"
            print gpu_hdClosest
            print "gpu time = ", t2 - t1
            print "gpu_assignments"
            print gpu_assignments
            print "gpu_lower"
            print gpu_lower
            print "gpu_upper"
            print gpu_upper
            print "gpu_clusters2"
            print gpu_clusters2
            print "-------------------------------------------------------------"

        # check ccdist and hdClosest
        ccdist = np.array(gpu_ccdist.get())
        hdClosest = np.array(gpu_hdClosest.get())

        t1 = time.time()
        cpu_ccdist = 0.5 * np.sqrt(
            ((clusters[:, :, np.newaxis] - clusters[:, np.newaxis, :])**
             2).sum(0))
        t2 = time.time()
        cpu_ccdist_time = t2 - t1

        if verbose:
            print "cpu_ccdist"
            print cpu_ccdist

        error = np.abs(cpu_ccdist - ccdist)
        if np.max(error) > 1e-7 * nDim * 2:
            print "iteration", iTest,
            print "***ERROR*** max ccdist error =", np.max(error)
            nErrors += 1
        if verbose:
            print "average ccdist error =", np.mean(error)
            print "max ccdist error     =", np.max(error)

        t1 = time.time()
        cpu_ccdist[cpu_ccdist == 0.] = 1e10
        good_hdClosest = np.min(cpu_ccdist, 0)
        t2 = time.time()
        cpu_hdclosest_time = t2 - t1

        if verbose:
            print "good_hdClosest"
            print good_hdClosest
        err = np.abs(good_hdClosest - hdClosest)
        if np.max(err) > 1e-7 * nDim:
            print "***ERROR*** max hdClosest error =", np.max(err)
            nErrors += 1
        if verbose:
            print "errors on hdClosest"
            print err
            print "max error on hdClosest =", np.max(err)

        # calculate cpu initial assignments
        t1 = time.time()
        cpu_assign = assign_cpu(data, clusters)
        t2 = time.time()
        cpu_assign_time = t2 - t1

        if verbose:
            print "assignments shape =", cpu_assign.shape
            print "data shape =", data.shape
            print "cpu assignments"
            print cpu_assign
            print "gpu assignments"
            print gpu_assignments
            print "gpu new clusters"
            print gpu_clusters2

        differences = sum(gpu_assignments.get() - cpu_assign)
        if (differences > 0):
            nErrors += 1
            print differences, "errors in initial assignment"
        else:
            if verbose:
                print "initial cluster assignments match"

        # calculate the number of data points in each cluster
        c = np.arange(nClusters)
        c_counts = np.sum(cpu_assign.reshape(nPts, 1) == c, axis=0)

        # calculate cpu new cluster values:
        t1 = time.time()
        cpu_new_clusters = calc_cpu(data, cpu_assign, clusters)
        t2 = time.time()
        cpu_calc_time = t2 - t1

        if verbose:
            print "cpu new clusters"
            print cpu_new_clusters

        diff = np.max(np.abs(gpu_clusters2 - cpu_new_clusters))
        if diff > 1e-7 * max(c_counts) or math.isnan(diff):
            iDiff = np.arange(nClusters)[(
                (gpu_clusters2 - cpu_new_clusters)**2).sum(0) > 1e-7]
            print "clusters that differ:"
            print iDiff
            nErrors += 1
            if verbose:
                print "Test", iTest, "*** ERROR *** max diff was", diff
                for x in iDiff:
                    print "\ndata for cluster ", x
                    print "gpu:"
                    print gpu_clusters2[:, x]
                    print "cpu:"
                    print cpu_new_clusters[:, x]
                    print "points assigned:"
                    for ii in range(nPts):
                        if cpu_assign[ii] == x:
                            print "data point #", ii
                            print data[:, ii]
        else:
            if verbose:
                print "Test", iTest, "OK"

        #check if the cluster movement values are correct
        cpu_cluster_movement = np.sqrt(
            ((clusters - cpu_new_clusters)**2).sum(0))
        diff = np.max(np.abs(cpu_cluster_movement -
                             gpu_cluster_movement.get()))
        if diff > 1e-6 * nDim:
            print "*** ERROR *** max cluster movement error =", diff
            nErrors += 1
        if verbose:
            print "cpu cluster movements"
            print cpu_cluster_movement
            print "gpu cluster movements"
            print gpu_cluster_movement
            print "max diff in cluster movements is", diff

        cpu_time = cpu_assign_time + cpu_calc_time

    if print_times:
        print "\n---------------------------------------------"
        print "nPts      =", nPts
        print "nDim      =", nDim
        print "nClusters =", nClusters
        print "nReps     =", nReps
        print "average cpu time (ms) =", cpu_time / nTests * 1000.
        print "     assign time (ms) =", cpu_assign_time / nTests * 1000.
        if nReps == 1:
            print "       calc time (ms) =", cpu_calc_time / nTests * 1000.
            print "average gpu time (ms) =", gpu_time / nTests * 1000.
        else:
            print "       calc time (ms) ="
            print "average gpu time (ms) ="
        print "       data time (ms) =", gpu_data_time / nTests * 1000.
        print "     module time (ms) =", gpu_module_time / nTests * 1000.
        print "       init time (ms) =", gpu_init_time / nTests * 1000.
        print "     ccdist time (ms) =", gpu_ccdist_time / nTests * 1000.
        print "  hdclosest time (ms) =", gpu_hdclosest_time / nTests * 1000.
        print "      step3 time (ms) =", gpu_step3_time / nTests * 1000.
        print "      step4 time (ms) =", gpu_step4_time / nTests * 1000.
        print "     step56 time (ms) =", gpu_step56_time / nTests * 1000.
        print "---------------------------------------------"

    return nErrors

Exemplo n.º 4

0

Exibir arquivo

def run_tests1(nTests,
               nPts,
               nDim,
               nClusters,
               nReps=1,
               verbose=VERBOSE,
               print_times=PRINT_TIMES):
    # run_tests(nTests, nPts, nDim, nClusters, nReps [, verbose [, print_times]]
    # Runs one repition and checks various intermdiate values against a cpu calculation

    if nReps > 1:
        print "This method only runs test for nReps == 1"
        return 1

    # Generate nPts random data elements with nDim dimensions and nCluster random clusters,
    # then run kmeans for nReps and compare gpu and cpu results.  This is repeated nTests times
    cpu_time = 0.
    gpu_time = 0.

    gpu_data_time = 0.
    gpu_module_time = 0.
    gpu_assign_time = 0.
    gpu_calc_time = 0.

    np.random.seed(SEED)
    data = np.random.rand(nDim, nPts).astype(np.float32)
    clusters = np.random.rand(nDim, nClusters).astype(np.float32)

    if verbose:
        print "data"
        print data
        print "\nclusters"
        print clusters

    nErrors = 0

    # repeat this test nTests times
    for iTest in range(nTests):

        #run the gpu algorithm
        t1 = time.time()
        (gpu_assignments, gpu_clusters2, \
            data_time, module_time, assign_time, calc_time) = \
            kmeans_gpu(data, clusters, nReps, 1)
        pycuda.autoinit.context.synchronize()
        t2 = time.time()
        gpu_time += t2 - t1
        gpu_data_time += data_time
        gpu_module_time += module_time
        gpu_assign_time += assign_time
        gpu_calc_time += calc_time

        if verbose:
            print "------------------------ gpu results ------------------------"
            print "gpu time = ", t2 - t1
            print "gpu_assignments"
            print gpu_assignments
            print "gpu_clusters2"
            print gpu_clusters2
            print "-------------------------------------------------------------"

        # calculate cpu initial assignments
        t1 = time.time()
        cpu_assign = assign_cpu(data, clusters)
        t2 = time.time()
        cpu_assign_time = t2 - t1

        if verbose:
            print "assignments shape =", cpu_assign.shape
            print "data shape =", data.shape
            print "cpu assignments"
            print cpu_assign
            print "gpu assignments"
            print gpu_assignments
            print "gpu new clusters"
            print gpu_clusters2

        differences = sum(gpu_assignments.get() - cpu_assign)
        if (differences > 0):
            nErrors += 1
            print differences, "errors in initial assignment"
        else:
            if verbose:
                print "initial cluster assignments match"

        # calculate the number of data points in each cluster
        c = np.arange(nClusters)
        c_counts = np.sum(cpu_assign.reshape(nPts, 1) == c, axis=0)

        # calculate cpu new cluster values:
        t1 = time.time()
        cpu_new_clusters = calc_cpu(data, cpu_assign, clusters)
        t2 = time.time()
        cpu_calc_time = t2 - t1

        if verbose:
            print "cpu new clusters"
            print cpu_new_clusters

        diff = np.max(np.abs(gpu_clusters2 - cpu_new_clusters))
        if diff > 1e-7 * max(c_counts) or math.isnan(diff):
            iDiff = np.arange(nClusters)[(
                (gpu_clusters2 - cpu_new_clusters)**2).sum(0) > 1e-7]
            print "clusters that differ:"
            print iDiff
            nErrors += 1
            if verbose:
                print "Test", iTest, "*** ERROR *** max diff was", diff
                for x in iDiff:
                    print "\ndata for cluster ", x
                    print "gpu:"
                    print gpu_clusters2[:, x]
                    print "cpu:"
                    print cpu_new_clusters[:, x]
                    print "points assigned:"
                    for ii in range(nPts):
                        if cpu_assign[ii] == x:
                            print "data point #", ii
                            print data[:, ii]
        else:
            if verbose:
                print "Test", iTest, "OK"

        cpu_time = cpu_assign_time + cpu_calc_time

    if print_times:
        print "\n---------------------------------------------"
        print "nPts      =", nPts
        print "nDim      =", nDim
        print "nClusters =", nClusters
        print "nReps     =", nReps
        print "average cpu time (ms) =", cpu_time / nTests * 1000.
        print "     assign time (ms) =", cpu_assign_time / nTests * 1000.
        print "       calc time (ms) =", cpu_calc_time / nTests * 1000.
        print "average gpu time (ms) =", gpu_time / nTests * 1000.
        print "       data time (ms) =", gpu_data_time / nTests * 1000.
        print "     module time (ms) =", gpu_module_time / nTests * 1000.
        print "     assign time (ms) =", gpu_assign_time / nTests * 1000.
        print "       calc time (ms) =", gpu_calc_time / nTests * 1000.
        print "---------------------------------------------"

    return nErrors