Пример #1
0
num_pixels = 2000
rows = sc.range(num_pixels, numSlices=10)
cols = sc.range(num_pixels, numSlices=10)

indices = rows.cartesian(cols)

def mandelbrot_wrapper(row, col):
    x = col/(num_pixels/4.) - 2.
    y = row/(num_pixels/4.) - 2.

    return ((row, col), P2.mandelbrot(x, y))

########### Different from part A: load balancing! ########
new_indices = indices.repartition(100) # Randomly throw jobs between partitions

mandelbrot_load_balanced = new_indices.map(lambda a: mandelbrot_wrapper(*a))

summed_rdd = P2.sum_values_for_partitions(mandelbrot_load_balanced)
summed_result = summed_rdd.collect()

# Now collect the data & plot
plt.hist(summed_result, bins=np.logspace(3, 8, 20))
sns.rugplot(summed_result, color='red')
plt.gca().set_xscale('log')
plt.xlabel('Total Number of Iterations on Partition')
plt.ylabel('Partition Count')
plt.title('Number of Iterations on each Partition')

plt.savefig('P2b_alternative_hist.png', dpi=200, bbox_inches='tight')
Пример #2
0
    return ((row, col), P2.mandelbrot(x, y))


mandelbrot_rdd = indices.map(lambda a: mandelbrot_wrapper(*a))

# Now collect the data & plot
mandelbrot_result = mandelbrot_rdd.collect()

plt.grid(False)
# I slightly redefined the draw image function as the original
# implementation annoyed me...I did not want to collect in a draw function!
P2.draw_image(data=mandelbrot_result)

plt.savefig('P2a_mandelbrot.png', dpi=200, bbox_inches='tight')

plt.clf()

# Now create the histogram...I recognize that mandelbrot is computed twice
# but it is for my sanity
summed_rdd = P2.sum_values_for_partitions(mandelbrot_rdd)
summed_result = summed_rdd.collect()

plt.hist(summed_result, bins=np.logspace(3, 8, 20))
sns.rugplot(summed_result, color='red')
plt.gca().set_xscale('log')
plt.xlabel('Total Number of Iterations on Partition')
plt.ylabel('Partition Count')
plt.title('Number of Iterations on each Partition')

plt.savefig('P2a_hist.png', dpi=200, bbox_inches='tight')
Пример #3
0
    y = row/(num_pixels/4.) - 2.

    return ((row, col), P2.mandelbrot(x, y))

mandelbrot_rdd = indices.map(lambda a: mandelbrot_wrapper(*a))

# Now collect the data & plot
mandelbrot_result = mandelbrot_rdd.collect()

plt.grid(False)
# I slightly redefined the draw image function as the original
# implementation annoyed me...I did not want to collect in a draw function!
P2.draw_image(data=mandelbrot_result)

plt.savefig('P2a_mandelbrot.png', dpi=200, bbox_inches='tight')

plt.clf()

# Now create the histogram...I recognize that mandelbrot is computed twice
# but it is for my sanity
summed_rdd = P2.sum_values_for_partitions(mandelbrot_rdd)
summed_result = summed_rdd.collect()

plt.hist(summed_result, bins=np.logspace(3, 8, 20))
sns.rugplot(summed_result, color='red')
plt.gca().set_xscale('log')
plt.xlabel('Total Number of Iterations on Partition')
plt.ylabel('Partition Count')
plt.title('Number of Iterations on each Partition')

plt.savefig('P2a_hist.png', dpi=200, bbox_inches='tight')
Пример #4
0
partition_vs_expensive_task = labeled_expensive_tasks.map(
    lambda x: (x[1] % num_partitions, x[0]))

# Get cheap tasks ready to process
cheap_tasks = indices_vs_expensive.filter(lambda x: x[1] == 0)
cheap_tasks = cheap_tasks.map(lambda x: x[0])
labeled_cheap_tasks = cheap_tasks.zipWithIndex()
partition_vs_cheap_task = labeled_cheap_tasks.map(
    lambda x: (x[1] % num_partitions, x[0]))

# Combine cheap & expensive tasks, now designated to an appropriate partition
partition_vs_ij = partition_vs_expensive_task.union(partition_vs_cheap_task)
# Sort data into the correct partition...sorted by key!
sorted_by_partition = partition_vs_ij.sortByKey(numPartitions=100)

mandelbrot_load_balanced = sorted_by_partition.map(
    lambda a: mandelbrot_wrapper(*a[1]))

summed_rdd = P2.sum_values_for_partitions(mandelbrot_load_balanced)
summed_result = summed_rdd.collect()

# Now collect the data & plot
plt.hist(summed_result, bins=np.logspace(3, 8, 20))
sns.rugplot(summed_result, color='red')
plt.gca().set_xscale('log')
plt.xlabel('Total Number of Iterations on Partition')
plt.ylabel('Partition Count')
plt.title('Number of Iterations on each Partition')

plt.savefig('P2b_hist.png', dpi=200, bbox_inches='tight')