def question1_plot():
    """
    Generate the plot for question 1
    """
    runtime_slow = []
    runtime_fast = []

    clusters = []
    for size in range(2, 201):
        clusters.append(gen_random_clusters(size))

    # count runtime
    for cluster_list in clusters:
        slow_start = time()
        slow_closest_pair(cluster_list)
        slow_end = time()
        runtime_slow.append(slow_end - slow_start)

        fast_start = time()
        fast_closest_pair(cluster_list)
        fast_end = time()
        runtime_fast.append(fast_end - fast_start)

    xs = [_ for _ in range(2, 201)]
    plt.title('Running Times Comparison (Implemented in Desktop Python)')
    plt.xlabel('sizes of random clusters')
    plt.ylabel('running times (seconds)')
    for runtime in [runtime_slow, runtime_fast]:
        plt.plot(xs, runtime)
    legend_texts = ['slow_closest_pair', 'fast_closest_pair']
    plt.legend(legend_texts, loc='upper left')
    plt.show()
Exemplo n.º 2
0
def timer2():
    runing_time1 = []
    for n in range(2, 200):
        clusters = gen_random_clusters(n)
        start1 = timeit.default_timer()
        project3.fast_closest_pair(clusters)
        stop1 = timeit.default_timer()
        runing_time1.append(stop1-start1)
    return runing_time1
Exemplo n.º 3
0
def question_1():
    """
    Compare the running times of slow_closest_pair and fast_closest_pair
    methods using gen_random_clsuters function
    """
    # generate a list of lists of clusters with size 2 to 200
    cluster_compare = []
    for size in range(2, 200 + 1):
        cluster_compare.append(gen_random_clusters(size))

    # get the running time of both methods
    time_fast_method = []
    time_slow_method = []
    for each_list in cluster_compare:
        start_time = time.time()
        project3.fast_closest_pair(each_list)
        elapsed_time = time.time() - start_time
        time_fast_method.append(elapsed_time)
        # add running time for slow_closest_pair
        start_time = time.time()
        project3.slow_closest_pair(each_list)
        elapsed_time = time.time() - start_time
        time_slow_method.append(elapsed_time)

    # values for the x-axis
    num_lists = range(2, 200 + 1)
    # Plot data

    # Create a new figure of size 8x6 points, using 100 dots per inch
    plt.figure(figsize=(8, 8), dpi=80)

    # Create a new subplot from a grid of 1x1
    plt.subplot(1, 1, 1)  #parameters: row, column, location index

    plt.xlabel("Size of the Cluster List")
    plt.ylabel("Time Elapsed")
    plt.title("Comparasion of Two Closest-pair-finding Methods (on Desktop)")
    # Plot cosine using blue color with a continuous line of width 1 (pixels)
    plt.plot(num_lists,
             time_fast_method,
             color="blue",
             linewidth=2.0,
             linestyle="-",
             label="Fast Closest Pair")

    # Plot sine using green color with a continuous line of width 1 (pixels)
    plt.plot(num_lists,
             time_slow_method,
             color="green",
             linewidth=2.0,
             linestyle="-",
             label="Slow Closest Pair")

    # Add Legends
    plt.legend(loc='upper left', frameon=False)
    # Show result on screen
    plt.show()
Exemplo n.º 4
0
def question_1():
    """
    Compare the running times of slow_closest_pair and fast_closest_pair
    methods using gen_random_clsuters function
    """
    # generate a list of lists of clusters with size 2 to 200
    cluster_compare = []
    for size in range(2, 200 + 1):
        cluster_compare.append(gen_random_clusters(size))
        
    # get the running time of both methods
    time_fast_method = []
    time_slow_method = []
    for each_list in cluster_compare:
        start_time = time.time()
        project3.fast_closest_pair(each_list)
        elapsed_time = time.time() - start_time
        time_fast_method.append(elapsed_time)
        # add running time for slow_closest_pair
        start_time = time.time()
        project3.slow_closest_pair(each_list)
        elapsed_time = time.time() - start_time
        time_slow_method.append(elapsed_time)
    
    # values for the x-axis
    num_lists = range(2, 200 + 1)
    # Plot data

    # Create a new figure of size 8x6 points, using 100 dots per inch
    plt.figure(figsize=(8,8), dpi=80)

    # Create a new subplot from a grid of 1x1
    plt.subplot(1,1,1) #parameters: row, column, location index

    plt.xlabel("Size of the Cluster List")
    plt.ylabel("Time Elapsed")
    plt.title("Comparasion of Two Closest-pair-finding Methods (on Desktop)")
    # Plot cosine using blue color with a continuous line of width 1 (pixels)
    plt.plot(num_lists, time_fast_method, color="blue", linewidth=2.0, linestyle="-", label="Fast Closest Pair")

    # Plot sine using green color with a continuous line of width 1 (pixels)
    plt.plot(num_lists, time_slow_method, color="green", linewidth=2.0, linestyle="-", label="Slow Closest Pair")

    # Add Legends
    plt.legend(loc='upper left', frameon=False)
    # Show result on screen
    plt.show()
def compare_func(n):
    clus = gen_random_clusters(n)
    otime = time.clock()
    fast = prj.fast_closest_pair(clus)
    diff = time.clock() - otime
    fasttime = diff
    # print 'Fast:', fast
    otime = time.clock()
    slow = prj.slow_closest_pair(clus)
    diff = time.clock() - otime
    # print 'Slow:', slow
    slowtime = diff
    return (n, fasttime, slowtime)
def question1():
    clusters = range(2, 201)

    slow_closest_time = []
    fast_closest_time = []

    for num_clusters in clusters:
        cluster_list = get_random_clusters(num_clusters)

        ###slow_closet_pair_efficiency###
        start_time_slow = time.time()

        project3.slow_closest_pair(cluster_list)

        total_time_slow = time.time() - start_time_slow

        slow_closest_time.append(total_time_slow)

        ###fast_closest_pair efficiency###

        start_time_fast = time.time()

        project3.fast_closest_pair(cluster_list)

        total_time_fast = time.time() - start_time_fast

        fast_closest_time.append(total_time_fast)

    plt.plot(clusters, slow_closest_time, '-b', label='slow_closest_pair')
    plt.plot(clusters, fast_closest_time, '-r', label='fast_closest_pair')

    plt.legend(loc='upper right')
    plt.title('Efficiency')
    plt.xlabel('Number of clusters')
    plt.ylabel('Running time')

    plt.show()
def question10_plot():
    urls = [DATA_111_URL, DATA_290_URL, DATA_896_URL]
    sizes = range(6, 21)
    data_sizes = [111, 290, 896]
    for url, data_size in zip(urls, data_sizes):
        data_table = load_data_table(url)
        singleton_list = []

        for line in data_table:
            cluster = Cluster(set([line[0]]), line[1],
                              line[2], line[3], line[4])
            singleton_list.append(cluster)

        # hierarchical clustering
        clusters = singleton_list
        distortion_hier = []
        while len(clusters) > 6:
            pair = fast_closest_pair(clusters)
            idx1, idx2 = pair[1], pair[2]
            clusters[idx1].merge_clusters(clusters[idx2])
            clusters.pop(idx2)
            if len(clusters) in sizes:
                distortion = compute_distortion(clusters, data_table)
                distortion_hier.append(distortion)

        # k-means
        distortion_kmeans = []
        for size in sizes:
            singleton_list = []
            for line in data_table:
                cluster = Cluster(set([line[0]]), line[1],
                                  line[2], line[3], line[4])
                singleton_list.append(cluster)
            c = kmeans_clustering(singleton_list, size, 5)
            distortion = compute_distortion(c, data_table)
            distortion_kmeans.append(distortion)
        plt.plot(sizes, distortion_hier[::-1])
        plt.plot(sizes, distortion_kmeans)
        legend_texts = ['hierarchical clustering',
                        'k-means clustering (5 iterations)']
        plt.legend(legend_texts, loc='upper right')
        plt.title('Distortion with %d county data' % (data_size))
        plt.xlabel('Size of Clusters')
        plt.ylabel('Distortion')
        plt.show()
def question_1():
    '''
    Computes the running times of the functions slow_closest_pair and 
    fast_closest_pair for lists of clusters of size 2 to 200.

    Once you have computed the running times for both functions, plot the 
    result as two curves combined in a single plot. (Use a line plot for each 
    curve.) The horizontal axis for your plot should be the the number of 
    initial clusters while the vertical axis should be the running time of the 
    function in seconds. Please include a legend in your plot that 
    distinguishes the two curves.
    '''

    xvals = range(2, 200)
    slow_yvals = []
    fast_yvals = []
    for num in xvals:
        cluster_list = gen_random_clusters(num)
        initial = time.time()
        result1 = project3.slow_closest_pair(cluster_list)
        final = time.time()
        slow_yvals.append((final - initial))
    for num in xvals:
        cluster_list = gen_random_clusters(num)
        initial = time.time()
        result2 = project3.fast_closest_pair(cluster_list)
        final = time.time()
        fast_yvals.append((final - initial))
    plt.plot(xvals, slow_yvals, color='r', label="Slow Closest Pair")
    plt.plot(xvals, fast_yvals, color='b', label="Fast Closest Pair")
    plt.legend(loc=2)
    plt.title("Efficiency of Slow and Fast Closest Pairs Algorithms")
    plt.xlabel("Number of Initial Clusters")
    plt.ylabel("Running Time in Seconds")
    plt.show()

    return result1, result2