Exemplo n.º 1
0
def VecSet(show):

    gpu_sizes = [
        1000, 10000, 100000, 1000000, 2000000, 4000000, 6000000, 8000000,
        10000000, 20000000, 40000000, 60000000, 80000000, 100000000, 1000000000
    ]
    cpu_sizes = [1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000]

    cpu = []
    gpu = []

    for size in cpu_sizes:
        time1 = ut.get_time("../data/vec-ops/vec_ops.n2_g0_c21_p7." +
                            str(size) + ".654910", "VecSet", 2)  # 7 CPUs
        cpu.append(ut.calc_rate(size, time1))

    for size in gpu_sizes:
        time2 = ut.get_time("../data/figures-2-7-8-9/vec_ops.n1_g1_c2_a1." +
                            str(size) + ".668627", "VecSet",
                            3)  # 1 GPU with 1 CPU
        gpu.append(ut.calc_rate(size, time2))

    # plot
    num = 2
    cm = plt.get_cmap('inferno')
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.set_color_cycle([cm((1. * i) / num) for i in range(num)])

    ax.plot(cpu_sizes,
            cpu,
            marker="o",
            markersize="4",
            markeredgewidth=2,
            label="7 CPU cores")
    ax.plot(gpu_sizes,
            gpu,
            marker="o",
            markersize="4",
            markeredgewidth=2,
            label="1 GPU")

    plt.title("VecSet performance", fontsize=12)
    plt.xlabel("Vector size", fontsize=12)
    plt.ylabel("8 Mbytes/second", fontsize=12)
    plt.legend(loc="upper left", fontsize=12, frameon=False)
    plt.tight_layout()
    plt.xscale('log')
    ax.set_yticklabels(
        ['{:,}'.format(int(x)) for x in ax.get_yticks().tolist()])

    plt.savefig("../plots/VecSet.png")
    if show: plt.show()
Exemplo n.º 2
0
def cpu_vs_gpu_copy(show):

    gpu_sizes = [
        1000, 10000, 100000, 1000000, 2000000, 4000000, 6000000, 8000000,
        10000000, 20000000, 40000000, 60000000, 80000000, 100000000, 1000000000
    ]
    cpu_sizes = [1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000]

    gpu_VecCopy = []
    cpu_VecCopy = []

    gpu_ToGpu = []

    for size in gpu_sizes:
        scale = 2  # two memory access
        time = ut.get_time(
            "../data/figures-2-7-8-9/vec_ops.n6_g1_c2_a1." + str(size) +
            ".668627", "VecCopy", 1)
        gpu_VecCopy.append(scale * ut.calc_rate(size, time))

        # pinned memory
        scale = 1
        if gpu_sizes <= 100000:
            run_num = ".732319"
        else:
            run_num = ".715071"
        time = ut.get_time(
            "../data/pinned/vec_ops.n6_g1_c7_a1." + str(size) + run_num,
            "VecCUDACopyTo", 1)
        gpu_ToGpu.append(scale * ut.calc_rate(size, time))

    for size in cpu_sizes:
        scale = 2  # two memory access
        time = ut.get_time(
            "../data/cpu-flush-cache/vec_ops.n2_g0_c21_p42." + str(size),
            "VecCopy", 1)
        cpu_VecCopy.append(scale * ut.calc_rate(size, time))

    # calculate peak rates in 8 Mbyes/second
    cpu_rate = 135 * 1e9
    gpu_rate = 900 * 1e9
    cpu_peak = (2 * cpu_rate) / (8 * 1e6)
    gpu_peak = (6 * gpu_rate) / (8 * 1e6)
    cpu_to_gpu_rate = 50 * 1e9
    cpu_to_gpu_peak = (6 * cpu_to_gpu_rate) / (8 * 1e6)

    # plot
    fig, left = plt.subplots()
    right = left.twinx()
    cm = plt.get_cmap('inferno')

    right.plot(cpu_sizes,
               cpu_VecCopy,
               color=cm((1. * 2) / 4),
               label="42 CPU cores VecCopy")
    right.plot(gpu_sizes,
               gpu_VecCopy,
               color=cm((1. * 1 - 1) / 4),
               label="6 GPUs VecCopy")
    right.plot(gpu_sizes,
               gpu_ToGpu,
               color=cm((1. * 3) / 4),
               label="Copy to GPU")

    plt.plot(2000000000,
             gpu_peak,
             color=cm((1. * 1 - 1) / 4),
             linestyle="none",
             markersize="15",
             markeredgewidth=2,
             marker="_",
             clip_on=False)
    plt.plot(2000000000,
             cpu_peak,
             color=cm((1. * 2) / 4),
             linestyle="none",
             markersize="15",
             markeredgewidth=2,
             marker="_",
             clip_on=False)
    plt.plot(2000000000,
             cpu_to_gpu_peak,
             color=cm((1. * 3) / 4),
             linestyle="none",
             markersize="15",
             markeredgewidth=2,
             marker="_",
             clip_on=False)

    plt.xlim([500, 2000000000])
    left.set_title("GPU vs CPU copy performance", fontsize=12)
    left.set_xlabel("Vector size", fontsize=12)
    left.set_ylabel("8 MBytes/second", fontsize=12)
    right.legend(loc="upper left", fontsize=12, ncol=1, frameon=False)
    # plt.legend(loc="upper left", fontsize=12, ncol=1, frameon=False)
    left.set_xscale('log')
    left.set_yscale('log')
    right.set_yscale('log')
    right.get_yaxis().set_visible(False)
    left.set_ylim(top=10000000)
    right.set_ylim(top=10000000)

    right.set_ylim(bottom=20)
    left.set_ylim(bottom=20)
    plt.tight_layout()

    plt.savefig("../plots/CPU_vs_GPU_copy_siampp.png")
    if show: plt.show()
Exemplo n.º 3
0
def VecCUDACopyTo_all(show):

	cpus = [1]#, 2, 4]
	sizes = [1000, 10000, 100000]#, 1000000, 10000000, 100000000, 1000000000]

	data = []
	data_pinned = []
	data_pinned_waitforgpu = []
	scale = 1 # no memory movement

	print "Non-pinned"
	bandwidth = []
	for size in sizes:

		time = ut.get_time("../data/vec-ops/vec_ops.n1_g1_c42_a1." + str(size) + ".654911", "VecCUDACopyTo",   1) # 1 GPU with 1 CPU
		bandwidth.append(scale*ut.calc_rate(size, time))

	data.append(bandwidth)
	
	for cpu in cpus:
		bandwidth = []

		for size in sizes:
			time = ut.get_time("../data/vec-ops/vec_ops.n6_g1_c7_a" + str(cpu) + "." + str(size) + ".654914", "VecCUDACopyTo", 1)
			print cpu
			print size
			print time
			bandwidth.append(scale*ut.calc_rate(size, time))

		data.append(bandwidth)

	print "Pinned"
	bandwidth = []
	for size in sizes:

		time = ut.get_time("../data/pinned/vec_ops.n1_g1_c42_a1." + str(size) + ".713339", "VecCUDACopyTo",   1) # 1 GPU with 1 CPU
		bandwidth.append(scale*ut.calc_rate(size, time))

	data_pinned.append(bandwidth)
	
	for cpu in cpus:
		bandwidth = []

		for size in sizes:
			time = ut.get_time("../data/pinned/vec_ops.n6_g1_c7_a" + str(cpu) + "." + str(size) + ".715071", "VecCUDACopyTo", 1)
			print cpu
			print size
			print time
			bandwidth.append(scale*ut.calc_rate(size, time))

		data_pinned.append(bandwidth)

	print "Pinned WaitForGPU()"
	bandwidth = []
	for size in sizes:

		time = ut.get_time("../data/pinned/vec_ops.n1_g1_c2_a1." + str(size) + ".732318", "VecCUDACopyTo",   1) # 1 GPU with 1 CPU
		bandwidth.append(scale*ut.calc_rate(size, time))

	data_pinned_waitforgpu.append(bandwidth)
	
	for cpu in cpus:
		bandwidth = []

		for size in sizes:
			time = ut.get_time("../data/pinned/vec_ops.n6_g1_c7_a" + str(cpu) + "." + str(size) + ".732319", "VecCUDACopyTo", 1)
			print cpu
			print size
			print time
			bandwidth.append(scale*ut.calc_rate(size, time))

		data_pinned_waitforgpu.append(bandwidth)

	# calculate peak rates in 8 Mbyes/second
	rate = 50*1e9
	gpu1_peak = rate/(8*1e6)
	gpu6_peak = (6*rate)/(8*1e6)

	# plot
	labels = ["1 MPI rank and 1 GPU", "1 MPI rank per GPU"]#, "2 MPI ranks per GPU", "4 MPI ranks per GPU"]
	# labels_pinned = ["Pinned 1 MPI rank and 1 GPU", "Pinned 1 MPI rank per GPU", "Pinned 2 MPI ranks per GPU", "Pinned 4 MPI ranks per GPU"]
	num = len(labels)
	cm = plt.get_cmap('inferno')
	fig = plt.figure()
	ax = fig.add_subplot(111)
	ax.set_color_cycle([cm((1.*i)/num) for i in range(num)])

	for i in range(num):
		ax.plot(sizes, data[i], marker="o", linestyle="dashed")

	ax.set_color_cycle([cm((1.*i)/num) for i in range(num)])

	for i in range(num):
		ax.plot(sizes, data_pinned[i], marker="o", label=labels[i])

	for i in range(num):
		ax.plot(sizes, data_pinned_waitforgpu[i], marker="o", linestyle="dotted")

	# plt.plot(2000000000, gpu1_peak, color="black", linestyle="none", markersize="15", markeredgewidth=2, marker="_", clip_on=False)#, label="1 GPU peak")
	# plt.plot(2000000000, gpu6_peak, color="black", linestyle="none", markersize="15", markeredgewidth=2, marker="_", clip_on=False)#, label="6 GPU peak")
	# plt.text(1700000000, gpu1_peak+1200, "1 GPU peak", horizontalalignment='right', verticalalignment='center')
	# plt.text(1500000000, gpu6_peak, "6 GPU peak", horizontalalignment='right', verticalalignment='center')

	# plt.xlim([500, 2000000000])

	plt.title("CPU to GPU transfer performance", fontsize=12)
	plt.xlabel("Vector size", fontsize=12)
	plt.ylabel("8 Mbytes/second", fontsize=12)
	ax.legend(loc="upper left", fontsize=12, frameon=False)
	plt.tight_layout()
	plt.xscale('log')
	ax.set_yticklabels(['{:,}'.format(int(x)) for x in ax.get_yticks().tolist()])

	# plt.savefig("../plots/VecCUDACopyTo_all.png")
	if show: plt.show()
Exemplo n.º 4
0
def VecCUDACopyTo_comparison(comp, show):

	cpus = [1, 2, 4]
	sizes = [1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000]
	sizes_ = [100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000, 10000000000]
	ones = [1, 1, 1, 1, 1, 1, 1, 1, 1]
	zeros = [0, 0, 0, 0, 0, 0, 0, 0, 0]

	data = []
	data_pinned = []
	scale = 1 # no memory movement

	bandwidth = []
	for size in sizes:

		time = ut.get_time("../data/vec-ops/vec_ops.n1_g1_c42_a1." + str(size) + ".654911", "VecCUDACopyTo",   1) # 1 GPU with 1 CPU
		bandwidth1 = scale*ut.calc_rate(size, time)
		time = ut.get_time("../data/pinned/vec_ops.n1_g1_c42_a1." + str(size) + ".713339", "VecCUDACopyTo",   1)
		bandwidth2 = scale*ut.calc_rate(size, time)

		if comp == "_ratio":
			bandwidth.append(bandwidth2/bandwidth1)

	data.append(bandwidth)
	
	for cpu in cpus:
		bandwidth = []

		for size in sizes:
			time = ut.get_time("../data/vec-ops/vec_ops.n6_g1_c7_a" + str(cpu) + "." + str(size) + ".654914", "VecCUDACopyTo", 1)
			bandwidth1 = scale*ut.calc_rate(size, time)
			time = ut.get_time("../data/pinned/vec_ops.n6_g1_c7_a" + str(cpu) + "." + str(size) + ".715071", "VecCUDACopyTo", 1)
			bandwidth2 = scale*ut.calc_rate(size, time)
			
			if comp == "_ratio":
				bandwidth.append(bandwidth2/bandwidth1)

			if (size == 10000 or size == 100000000)  and cpu == 1:
				print size
				time  =  ut.get_time("../data/vec-ops/vec_ops.n6_g1_c7_a" + str(cpu) + "." + str(size) + ".654914", "VecCUDACopyTo", 1)
				print scale*ut.calc_rate(size, time)
				time  =   ut.get_time("../data/pinned/vec_ops.n1_g1_c42_a1." + str(size) + ".713339", "VecCUDACopyTo",   1)
				print scale*ut.calc_rate(size, time)

		data.append(bandwidth)

	# plot
	labels = ["1 MPI rank and 1 GPU", "1 MPI rank per GPU", "2 MPI ranks per GPU", "4 MPI ranks per GPU"]
	num = len(labels)
	cm = plt.get_cmap('inferno')
	fig = plt.figure()
	ax = fig.add_subplot(111)
	ax.set_color_cycle([cm((1.*(i+2))/(num+2)) for i in range(num)])

	for i in range(num):
		ax.plot(sizes, data[i], marker="o", label=labels[i])

	ax.plot(sizes_, ones, color="black", linestyle="dashed")
	plt.xlim([500, 2000000000])

	plt.title("CPU to GPU transfer performance", fontsize=12)
	plt.xlabel("Vector size", fontsize=12)
	plt.ylabel("Pinned memory/non-pinned memory", fontsize=12)
	ax.legend(loc="upper left", fontsize=12, frameon=False)
	plt.tight_layout()
	plt.xscale('log')
	# plt.savefig("../plots/VecCUDACopyTo_ratio.png")
	if show: plt.show()
Exemplo n.º 5
0
def cpu_vs_gpu(operation, count, show):

    gpu_sizes = [
        1000, 10000, 100000, 1000000, 2000000, 4000000, 6000000, 8000000,
        10000000, 20000000, 40000000, 60000000, 80000000, 100000000, 1000000000
    ]
    cpu_sizes = [1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000]

    gpu = []
    cpu = []

    gpu_VecCopy = []
    cpu_VecCopy = []

    gpu_ToGpu = []

    if operation == "VecDot":
        mem_scale = 1
    elif operation == "VecAXPY":
        mem_scale = 1.5

    for size in gpu_sizes:
        if operation == "VecDot":
            scale = 1
            gpu.append(scale * float(
                ut.get_floprate(
                    "../data/waitforgpu/vec_ops.n6_g1_c2_a1." + str(size) +
                    ".718559", operation, False,
                    count)))  # need to get this data

        elif operation == "VecAXPY":
            scale = 1
            gpu.append(scale * float(
                ut.get_floprate(
                    "../data/figures-2-7-8-9/vec_ops.n6_g1_c2_a1." +
                    str(size) + ".668627", operation, False,
                    count)))  # need to get this data

        scale = 2  # two memory access
        time = ut.get_time(
            "../data/figures-2-7-8-9/vec_ops.n6_g1_c2_a1." + str(size) +
            ".668627", "VecCopy", 1)
        gpu_VecCopy.append(scale * ut.calc_rate(size, time))

        # pinned memory
        scale = 1
        if gpu_sizes <= 100000:
            run_num = ".732319"
        else:
            run_num = ".715071"
        time = ut.get_time(
            "../data/pinned/vec_ops.n6_g1_c7_a1." + str(size) + run_num,
            "VecCUDACopyTo", 1)
        gpu_ToGpu.append(scale * ut.calc_rate(size, time))

    for size in cpu_sizes:
        if operation == "VecDot":
            scale = 1
        elif operation == "VecAXPY":
            scale = 1
        # cpu.append(scale*float(ut.get_floprate("../data/vec-ops/vec_ops.n2_g0_c21_p42." + str(size) + ".654910", operation, True, count)))
        cpu.append(scale * float(
            ut.get_floprate(
                "../data/cpu-flush-cache/vec_ops.n2_g0_c21_p42." + str(size),
                operation, True, count)))
        # cpu.append(scale*float(ut.get_floprate("../data/cpu-no-flush-cache/vec_ops.n2_g0_c21_p42." + str(size) + ".767590", operation, True, count)))

        scale = 2  # two memory access
        # time = ut.get_time("../data/vec-ops/vec_ops.n2_g0_c21_p42." + str(size) + ".654910", "VecCopy", 1)
        time = ut.get_time(
            "../data/cpu-flush-cache/vec_ops.n2_g0_c21_p42." + str(size),
            "VecCopy", 1)
        # time = ut.get_time("../data/cpu-no-flush-cache/vec_ops.n2_g0_c21_p42." + str(size) + ".767590", "VecCopy", 1)
        cpu_VecCopy.append(scale * ut.calc_rate(size, time))

    # calculate peak rates in 8 Mbyes/second
    cpu_rate = 135 * 1e9
    gpu_rate = 900 * 1e9
    cpu_peak = (2 * cpu_rate) / (8 * 1e6)
    gpu_peak = (6 * gpu_rate) / (8 * 1e6)
    cpu_to_gpu_rate = 50 * 1e9
    cpu_to_gpu_peak = (6 * cpu_to_gpu_rate) / (8 * 1e6)

    # plot
    fig, left = plt.subplots()
    right = left.twinx()
    cm = plt.get_cmap('inferno')

    left.plot(cpu_sizes,
              cpu,
              color=cm((1. * 2) / 4),
              label="42 CPU cores " + operation)
    right.plot(cpu_sizes,
               cpu_VecCopy,
               color=cm((1. * 2) / 4),
               linestyle="dashed",
               label="42 CPU cores VecCopy")
    right.plot(gpu_sizes,
               gpu_ToGpu,
               color=cm((1. * 3) / 4),
               linestyle="dashed",
               label="6 GPUs copy to GPU")
    left.plot(gpu_sizes,
              gpu,
              color=cm((1. * 1 - 1) / 4),
              label="6 GPUs " + operation)
    right.plot(gpu_sizes,
               gpu_VecCopy,
               color=cm((1. * 1 - 1) / 4),
               linestyle="dashed",
               label="6 GPUs VecCopy")

    plt.plot(2000000000,
             gpu_peak,
             color=cm((1. * 1 - 1) / 4),
             linestyle="none",
             markersize="15",
             markeredgewidth=2,
             marker="_",
             label="GPU copy peak",
             clip_on=False)
    plt.plot(2000000000,
             cpu_to_gpu_peak,
             color=cm((1. * 3) / 4),
             linestyle="none",
             markersize="15",
             markeredgewidth=2,
             marker="_",
             label="CPU to GPU peak",
             clip_on=False)
    plt.plot(2000000000,
             cpu_peak,
             color=cm((1. * 2) / 4),
             linestyle="none",
             markersize="15",
             markeredgewidth=2,
             marker="_",
             label="CPU copy peak",
             clip_on=False)

    plt.xlim([500, 2000000000])
    left.set_title("GPU vs CPU " + operation + " performance", fontsize=12)
    left.set_xlabel("Vector size", fontsize=12)
    left.set_ylabel("MFlops/second", fontsize=12)
    right.set_ylabel("8 MBytes/second", fontsize=12)
    left.legend(loc="upper left", fontsize=12, ncol=1, frameon=False)
    plt.legend(loc="lower right", fontsize=12, ncol=1, frameon=False)
    left.set_xscale('log')
    left.set_yscale('log')
    right.set_yscale('log')
    left.set_ylim(top=10000000)
    right.set_ylim(top=10000000 * mem_scale)
    right.set_ylim(bottom=20)
    left.set_ylim(bottom=20)
    plt.tight_layout()

    # plt.savefig("../plots/" + operation + "_CPU_vs_GPU.png")
    if show: plt.show()
Exemplo n.º 6
0
def synthetic_latency(operation, count, show):

    # get data
    sizes = [1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000]

    gpu = []
    cpu = []

    gpu_VecCopy = []
    cpu_VecCopy = []

    gpu_ToGpu = []

    gpu_16 = []
    gpu_28_time = []
    gpu_24 = []
    gpu_28 = []

    for size in sizes:

        # floprate from file
        floprate = float(
            ut.get_floprate(
                "../data/vec-ops/vec_ops.n6_g1_c7_a1." + str(size) + ".654914",
                operation, False, count))
        gpu.append(floprate)

        # time from file
        time = ut.get_time(
            "../data/vec-ops/vec_ops.n6_g1_c7_a1." + str(size) + ".654914",
            operation, count)

        # create synthetic floprates
        if operation == "VecAXPY":
            labels = ["16", "24", "28"]
            gpu_16.append((2 * size * 1e-6) / (time - 16e-6))
            gpu_24.append((2 * size * 1e-6) / (time - 24e-6))
            gpu_28.append((2 * size * 1e-6) / (time - 28e-6))
        elif operation == "VecDot":
            labels = ["16", "24", "50"]
            gpu_16.append((2 * size * 1e-6) / (time - 16e-6))
            gpu_24.append((2 * size * 1e-6) / (time - 24e-6))
            gpu_28.append((2 * size * 1e-6) /
                          (time - 50e-6))  # VecDot bigger latencies in data

        # other operations
        cpu.append(
            float(
                ut.get_floprate(
                    "../data/vec-ops/vec_ops.n2_g0_c21_p42." + str(size) +
                    ".654910", operation, True, count)))

        time = ut.get_time(
            "../data/vec-ops/vec_ops.n6_g1_c7_a1." + str(size) + ".654914",
            "VecCopy", 1)
        gpu_VecCopy.append(ut.calc_rate(size, time))
        time = ut.get_time(
            "../data/vec-ops/vec_ops.n2_g0_c21_p42." + str(size) + ".654910",
            "VecCopy", 1)
        cpu_VecCopy.append(ut.calc_rate(size, time))

        time = ut.get_time(
            "../data/vec-ops/vec_ops.n6_g1_c7_a1." + str(size) + ".654914",
            "VecCUDACopyTo", 1)
        gpu_ToGpu.append(ut.calc_rate(size, time))

    # plot
    plt.plot(sizes,
             cpu,
             color="grey",
             alpha=0.5,
             marker=".",
             markersize="6",
             markeredgewidth=2,
             label="42 CPUs " + operation)
    plt.plot(sizes,
             gpu,
             color="black",
             marker=".",
             markersize="6",
             markeredgewidth=2,
             label="6 GPUs " + operation)
    plt.plot(sizes,
             gpu_24,
             color="black",
             marker=".",
             markersize="6",
             markeredgewidth=2,
             linestyle="dotted",
             label="$" + labels[1] + "\cdot10^{-6}$ latency")

    plt.title(operation + " performance without calculated latency",
              fontsize=12)
    plt.xlabel("Vector size", fontsize=12)
    plt.ylabel("MFlops/second", fontsize=12)
    plt.legend(loc="lower right", fontsize=12, frameon=False)
    plt.xscale('log')
    plt.yscale('log')
    plt.ylim(top=1000000)
    plt.tight_layout()

    plt.savefig("../plots/" + operation + "_synthetic_latency.png")
    if show: plt.show()
    plt.gcf().clear()
    hs.append(numpy.mean([c.h() for c in cells(mesh)]))

    print('Compute the solution for n={}...'.format(n))
    g = interpolate(gexp, FunctionSpace(mesh, 'Regge', degree))
    h = min([c.inradius() for c in cells(mesh)]) / 2.0

    (_, solh) = exponential_map(g, 0, q0, p0, h, t2s(Tmax), verbose=True)

    print('Evaluate the solution and compute the error...')
    t = numpy.linspace(0, Tmax, 200 * M + 1)
    s = t2s(t)
    qe = sol(t)
    (qh, ph) = solh(s)
    (Hh, Lh) = integrals(qh, ph)
    d = qe - qh
    ee.append(numpy.max(numpy.sqrt(numpy.array([q.dot(q) for q in d]))))
    eH.append(numpy.max(numpy.abs(Hh - H)))
    eL.append(numpy.max(numpy.abs(Lh - L)))

# compute error rates and output
prefix = 'conv-deg{}-'.format(degree)
print('')
print('===DEGREE {}==='.format(degree))
rate_plot(hs, ee, name=prefix + 'e')
print(calc_rate(hs, ee))
rate_plot(hs, eH, name=prefix + 'H')
print(calc_rate(hs, eH))
rate_plot(hs, eL, name=prefix + 'L')
print(calc_rate(hs, eL))
print('')
Exemplo n.º 8
0
def jed_cpu_vs_gpu(operation, count, show):

    gpu_sizes = [
        1000, 10000, 100000, 1000000, 2000000, 4000000, 6000000, 8000000,
        10000000, 20000000, 40000000, 60000000, 80000000, 100000000, 1000000000
    ]
    cpu_sizes = [1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000]

    gpu = []
    cpu = []

    gpu_time = []
    cpu_time = []

    gpu_VecCopy = []
    cpu_VecCopy = []

    gpu_VecCopy_time = []
    cpu_VecCopy_time = []

    gpu_ToGpu = []
    gpu_ToGpu_time = []

    if operation == "VecDot":
        mem_scale = 1
    elif operation == "VecAXPY":
        mem_scale = 1.5

    for size in gpu_sizes:
        if operation == "VecDot":
            scale = 1

            # operation time and floprate
            gpu_time.append(
                ut.get_time(
                    "../data/waitforgpu/vec_ops.n6_g1_c2_a1." + str(size) +
                    ".718559", operation, count))
            gpu.append(scale * float(
                ut.get_floprate(
                    "../data/waitforgpu/vec_ops.n6_g1_c2_a1." + str(size) +
                    ".718559", operation, False, count)))
        elif operation == "VecAXPY":
            scale = 1

            gpu_time.append(
                ut.get_time(
                    "../data/figures-2-7-8-9/vec_ops.n6_g1_c2_a1." +
                    str(size) + ".668627", operation, count))
            gpu.append(scale * float(
                ut.get_floprate(
                    "../data/figures-2-7-8-9/vec_ops.n6_g1_c2_a1." +
                    str(size) + ".668627", operation, False, count)))

        # GPU copy time and bandwidth
        scale = 2 / mem_scale
        time = ut.get_time(
            "../data/figures-2-7-8-9/vec_ops.n6_g1_c2_a1." + str(size) +
            ".668627", "VecCopy", 1)
        gpu_VecCopy_time.append(time)
        gpu_VecCopy.append(scale * ut.calc_rate(size, time))

        # GPU to CPU time and bandwidth, pinned memory
        scale = 1 / mem_scale
        if gpu_sizes <= 100000:
            run_num = ".732319"
        else:
            run_num = ".715071"
        time = ut.get_time(
            "../data/pinned/vec_ops.n6_g1_c7_a1." + str(size) + run_num,
            "VecCUDACopyTo", 1)
        gpu_ToGpu_time.append(time)
        gpu_ToGpu.append(scale * ut.calc_rate(size, time))

    for size in cpu_sizes:
        scale = 1
        # CPU operation time and bandwidth
        # cpu_time.append(ut.get_time("../data/vec-ops/vec_ops.n2_g0_c21_p42." + str(size) + ".654910", operation, count))
        # cpu.append(scale*float(ut.get_floprate("../data/vec-ops/vec_ops.n2_g0_c21_p42." + str(size) + ".654910", operation, True, count)))

        cpu_time.append(
            ut.get_time(
                "../data/cpu-flush-cache/vec_ops.n2_g0_c21_p42." + str(size),
                operation, count))
        cpu.append(scale * float(
            ut.get_floprate(
                "../data/cpu-flush-cache/vec_ops.n2_g0_c21_p42." + str(size),
                operation, True, count)))

        # CPU copy time and bandwidth
        scale = 2 / mem_scale
        # time = ut.get_time("../data/vec-ops/vec_ops.n2_g0_c21_p42." + str(size) + ".654910", "VecCopy", 1)
        time = ut.get_time(
            "../data/cpu-flush-cache/vec_ops.n2_g0_c21_p42." + str(size),
            "VecCopy", 1)
        cpu_VecCopy_time.append(time)
        cpu_VecCopy.append(scale * ut.calc_rate(size, time))

    # calculate peak rates in 8 Mbyes/second
    cpu_rate = 135 * 1e9
    gpu_rate = 900 * 1e9
    cpu_peak = (2 * cpu_rate) / (8 * 1e6)
    gpu_peak = (6 * gpu_rate) / (8 * 1e6)

    # plot
    fig, left = plt.subplots()
    right = left.twinx()
    cm = plt.get_cmap('inferno')

    left.plot(cpu_time,
              cpu,
              color=cm((1. * 2) / 4),
              label="42 CPU cores " + operation)
    left.plot(cpu_VecCopy_time,
              cpu_VecCopy,
              color=cm((1. * 2) / 4),
              linestyle="dashed",
              label="42 CPU cores VecCopy")
    left.plot(gpu_ToGpu_time,
              gpu_ToGpu,
              color=cm((1. * 3) / 4),
              linestyle="dashed",
              label="6 GPUs copy to GPU")
    left.plot(gpu_time,
              gpu,
              color=cm((1. * 1 - 1) / 4),
              label="6 GPUs " + operation)
    left.plot(gpu_VecCopy_time,
              gpu_VecCopy,
              color=cm((1. * 1 - 1) / 4),
              linestyle="dashed",
              label="6 GPUs VecCopy")

    left.set_title("GPU vs CPU " + operation + " performance", fontsize=12)
    left.set_xlabel("Execution time (seconds)", fontsize=12)
    left.set_ylabel("MFlops/second", fontsize=12)
    right.set_ylabel("8 MBytes/second", fontsize=12)
    left.legend(loc="lower right", fontsize=12, ncol=1,
                frameon=False)  # markerfirst=False
    left.set_xscale('log')
    left.set_yscale('log')
    right.set_yscale('log')
    left.set_ylim([7, 1000000])
    right.set_ylim([7, 1000000 * mem_scale])
    plt.xlim([1e-6, .2])
    plt.savefig("../plots/jed_" + operation + "_CPU_vs_GPU.png")
    plt.tight_layout()
    if show: plt.show()
Exemplo n.º 9
0
def cpu_vs_gpu(operation, count, clear, show):

    cpu_sizes = [1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000]

    cpu_flush = []
    cpu_half_flush = []
    cpu_no_flush = []
    cpu_permute = []
    cpu_flush_vecset = []
    cpu_half_flush_december = []

    cpu_VecCopy_flush = []
    cpu_VecCopy_half_flush = []
    cpu_VecCopy_no_flush = []

    if operation == "VecDot":
        mem_scale = 1
    elif operation == "VecAXPY":
        mem_scale = 1.5

    for size in cpu_sizes:
        if operation == "VecDot":
            scale = 1
        elif operation == "VecAXPY":
            scale = 1
        cpu_flush.append(scale * float(
            ut.get_floprate(
                "../data/cpu-flush-cache/vec_ops.n2_g0_c21_p42." + str(size) +
                ".767597", operation, True, count)))
        cpu_half_flush.append(scale * float(
            ut.get_floprate(
                "../data/vec-ops/vec_ops.n2_g0_c21_p42." + str(size) +
                ".654910", operation, True, count)))
        cpu_half_flush_december.append(scale * float(
            ut.get_floprate(
                "../data/vec-ops-december/vec_ops.n2_g0_c21_p42." + str(size) +
                ".795805", operation, True, count)))
        cpu_no_flush.append(scale * float(
            ut.get_floprate(
                "../data/cpu-no-flush-cache/vec_ops.n2_g0_c21_p42." +
                str(size) + ".767590", operation, True, count)))
        cpu_flush_vecset.append(scale * float(
            ut.get_floprate(
                "../data/cpu-flush-cache-vecset/vec_ops.n2_g0_c21_p42." +
                str(size) + ".792547", operation, True, count)))
        cpu_permute.append(scale * float(
            ut.get_floprate(
                "../data/permute-operations/vec_ops.n2_g0_c21_p42." +
                str(size) + ".792549", operation, True, count)))

        scale = 2 / mem_scale  # VecCopy
        time = ut.get_time(
            "../data/cpu-flush-cache/vec_ops.n2_g0_c21_p42." + str(size) +
            ".767597", "VecCopy", 1)
        cpu_VecCopy_flush.append(scale * ut.calc_rate(size, time))
        time = ut.get_time(
            "../data/vec-ops/vec_ops.n2_g0_c21_p42." + str(size) + ".654910",
            "VecCopy", 1)
        cpu_VecCopy_half_flush.append(scale * ut.calc_rate(size, time))
        time = ut.get_time(
            "../data/cpu-no-flush-cache/vec_ops.n2_g0_c21_p42." + str(size) +
            ".767590", "VecCopy", 1)
        cpu_VecCopy_no_flush.append(scale * ut.calc_rate(size, time))

    print cpu_half_flush_december[0]

    # plot
    fig, left = plt.subplots()
    right = left.twinx()
    cm = plt.get_cmap('inferno')

    left.plot(cpu_sizes,
              cpu_flush,
              color=cm((1. * 2) / 4),
              label=operation + " cleared cache")
    left.plot(cpu_sizes,
              cpu_half_flush,
              color=cm((1. * 2) / 4),
              linestyle="dashed",
              label=operation + " half cleared cache")
    left.plot(cpu_sizes,
              cpu_half_flush_december,
              color="black",
              label=operation + " December")
    left.plot(cpu_sizes,
              cpu_no_flush,
              color=cm((1. * 2) / 4),
              linestyle="dotted",
              label=operation + " uncleared cache")
    # left.plot(cpu_sizes, cpu_permute, color="black", linestyle="dashed", label=operation+ " another cleared")
    # left.plot(cpu_sizes, cpu_permute, color="black", label=operation+ " rearrange operations")

    # left.plot(cpu_sizes, cpu_VecCopy_flush, color=cm((1.*1-1)/4), label="VecCopy cleared cache")
    # left.plot(cpu_sizes, cpu_VecCopy_half_flush, color=cm((1.*1-1)/4), linestyle="dashed", label="VecCopy half cleared cache")
    # left.plot(cpu_sizes, cpu_VecCopy_no_flush, color=cm((1.*1-1)/4), linestyle="dotted", label="VecCopy uncleared cache")

    plt.xlim([500, 2000000000])
    left.set_title("CPU " + operation + " cache performance", fontsize=12)
    left.set_xlabel("Vector size", fontsize=12)
    left.set_ylabel("MFlops/second", fontsize=12)
    right.set_ylabel("8 MBytes/second", fontsize=12)
    left.legend(loc="lower right", fontsize=12, ncol=1, frameon=False)
    plt.legend(loc="upper left", fontsize=12, ncol=1, frameon=False)
    left.set_xscale('log')
    left.set_yscale('log')
    right.set_yscale('log')
    top_ = 1000000
    left.set_ylim(top=top_)
    right.set_ylim(top=top_ * mem_scale)
    right.set_ylim(bottom=20)
    left.set_ylim(bottom=20)
    plt.tight_layout()

    # plt.savefig("../plots/" + operation + "_CPU_cleared_cache.png")
    if show: plt.show()
Exemplo n.º 10
0
def VecCopy(show):

    gpu_sizes = [
        1000, 10000, 100000, 1000000, 2000000, 4000000, 6000000, 8000000,
        10000000, 20000000, 40000000, 60000000, 80000000, 100000000, 1000000000
    ]
    cpu_sizes = [1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000]

    cpu = []
    gpu = []
    cputogpu = []
    scale = 2  # for VecCopy

    for size in cpu_sizes:
        time1 = ut.get_time(
            "../data/vec-ops/vec_ops.n2_g0_c21_p7." + str(size) + ".654910",
            "VecCopy", 1)
        cpu.append(ut.calc_rate(size, time1))

    for size in gpu_sizes:
        scale = 2  # for VecCopy
        time2 = ut.get_time(
            "../data/figures-2-7-8-9/vec_ops.n1_g1_c2_a1." + str(size) +
            ".668627", "VecCopy", 1)
        gpu.append(scale * ut.calc_rate(size, time2))

        scale = 1  # for copy to GPU
        nonpinned = ut.get_time(
            "../data/figures-2-7-8-9/vec_ops.n1_g1_c2_a1." + str(size) +
            ".668627", "VecCUDACopyTo", 1)
        pinned = ut.get_time(
            "../data/pinned/vec_ops.n1_g1_c42_a1." + str(size) + ".720947",
            "VecCUDACopyTo", 1)
        if pinned < nonpinned:
            time3 = pinned
        else:
            time3 = nonpinned

        cputogpu.append(scale * ut.calc_rate(size, time3))

    # calculate peak rates in 8 Mbytes/s
    cpu_rate = 135 * 1e9
    gpu_rate = 900 * 1e9
    cpu_to_gpu_rate = 50 * 1e9

    cpu_peak = (2 * cpu_rate) / (8 * 1e6)
    gpu_peak = gpu_rate / (8 * 1e6)
    cpu_to_gpu_peak = cpu_to_gpu_rate / (8 * 1e6)

    # plot
    num = 4
    cm = plt.get_cmap('inferno')
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.set_color_cycle([cm((1. * i) / num) for i in range(num)])

    ax.plot(cpu_sizes,
            cpu,
            marker="o",
            markersize="4",
            markeredgewidth=2,
            label="7 CPU cores VecCopy")
    ax.plot(gpu_sizes,
            gpu,
            marker="o",
            markersize="4",
            markeredgewidth=2,
            label="1 GPU VecCopy")
    ax.plot(gpu_sizes,
            cputogpu,
            marker="o",
            markersize="4",
            markeredgewidth=2,
            label="1 GPU copy to GPU")

    plt.plot(2000000000,
             gpu_peak,
             color="black",
             linestyle="none",
             markersize="15",
             markeredgewidth=2,
             marker="_",
             clip_on=False)
    plt.plot(2000000000,
             cpu_peak,
             color="black",
             linestyle="none",
             markersize="15",
             markeredgewidth=2,
             marker="_",
             clip_on=False)
    plt.plot(2000000000,
             cpu_to_gpu_peak,
             color="black",
             linestyle="none",
             markersize="15",
             markeredgewidth=2,
             marker="_",
             clip_on=False)

    # print cputogpu[-1]
    # print cpu_to_gpu_peak

    plt.text(1200000000,
             gpu_peak,
             "GPU copy peak",
             horizontalalignment='right',
             verticalalignment='center')
    plt.text(1200000000,
             cpu_peak,
             "CPU copy peak",
             horizontalalignment='right',
             verticalalignment='center')
    plt.text(1600000000,
             cpu_to_gpu_peak - 6000,
             "CPU to GPU peak",
             horizontalalignment='right',
             verticalalignment='center')

    plt.xlim([500, 2000000000])
    plt.title("VecCopy performance", fontsize=12)
    plt.xlabel("Vector size", fontsize=12)
    plt.ylabel("8 Mbytes/second", fontsize=12)
    plt.legend(loc="upper left", fontsize=12, frameon=False)
    plt.tight_layout()
    plt.xscale('log')
    ax.set_yticklabels(
        ['{:,}'.format(int(x)) for x in ax.get_yticks().tolist()])

    plt.savefig("../plots/VecCopy.png")
    if show: plt.show()