Пример #1
0
def plot4():
    # YOUR CODE HERE
    Mlist = [1, 2, 5, 10, 20, 50, 100, 200, 500]

    pydistance_list = []
    for M in Mlist:
        U, X, Y = data.toy(M, 100, 100)
        delta = mean(
            timeit.repeat(lambda: pybatch(U, X, Y), number=1, repeat=3))
        pydistance_list.append(delta)

    npdistance_list = []
    for M in Mlist:
        U, X, Y = data.toy(M, 100, 100)
        delta = mean(
            timeit.repeat(lambda: npbatch(U, X, Y), number=1, repeat=3))
        npdistance_list.append(delta)

    fig = plt.figure(figsize=(5, 3))
    plt.plot(Mlist, pydistance_list, '-o', color='red', label='pybatch')
    plt.plot(Mlist, npdistance_list, '-*', color='green', label='npbatch')
    plt.xscale('log')
    plt.yscale('log')
    plt.xlabel('d')
    plt.ylabel('time')
    plt.grid(True)
    plt.show()
Пример #2
0
def plot3():
    # YOUR CODE HERE
    Nlist = [1, 2, 5, 10, 20, 50, 100, 200, 500]

    pydistance_list = []
    for N in Nlist:
        U, X, Y = data.toy(100, N, 100)
        delta = mean(
            timeit.repeat(lambda: pybatch(U, X, Y, distance=npdistance),
                          number=1,
                          repeat=3))
        pydistance_list.append(delta)

    npdistance_list = []
    for N in Nlist:
        U, X, Y = data.toy(100, N, 100)
        delta = mean(
            timeit.repeat(lambda: pybatch(
                U, X, Y, nearest=npnearest, distance=npdistance),
                          number=1,
                          repeat=3))
        npdistance_list.append(delta)

    fig = plt.figure(figsize=(5, 3))
    plt.plot(Nlist,
             pydistance_list,
             '-o',
             color='red',
             label='pynearest with npdistance')
    plt.plot(Nlist,
             npdistance_list,
             '-*',
             color='green',
             label='npnearest with npdistance')
    plt.xscale('log')
    plt.yscale('log')
    plt.xlabel('d')
    plt.ylabel('time')
    plt.grid(True)
    plt.show()
Пример #3
0
def plot():

    # Values for the number of dimensions d to test
    dlist = [1, 2, 5, 10, 20, 50, 100, 200, 500]

    # Measure the computation time for each choice of number of dimensions d
    tlist = []
    for d in dlist:
        U, X, Y = data.toy(100, 100, d)
        # get the average of three runs
        delta = mean(
            timeit.repeat(lambda: pybatch(U, X, Y), number=1, repeat=3))
        tlist.append(delta)

    # Plot the results in a graph
    fig = plt.figure(figsize=(5, 3))
    plt.plot(dlist, tlist, '-o')
    plt.xscale('log')
    plt.yscale('log')
    plt.xlabel('d')
    plt.ylabel('time')
    plt.grid(True)
    plt.show()
Пример #4
0
if __name__ == "__main__":
    import data
    from access import access
    from access_jit import access as access_jit
    from access_tab import access as access_tab
    from datetime import datetime
    print("-" * 30 + "confirm agreement" + "-" * 30)

    filter_flows = False
    a1 = access(data.toy(), filter_flow=filter_flows)
    j1 = access_jit(data.toy(), filter_flow=filter_flows)
    t1 = access_tab(data.toy(), filter_flow=filter_flows)

    numpy.testing.assert_array_equal(t1.accessibility, a1.accessibility)
    numpy.testing.assert_array_equal(t1.accessibility, j1.accessibility)
    print("passed toy tabular unfiltered")

    filter_flows = True
    a1f = access(data.toy(), filter_flow=filter_flows)
    j1f = access_jit(data.toy(), filter_flow=filter_flows)
    t1f = access_tab(data.toy(), filter_flow=filter_flows)

    numpy.testing.assert_array_equal(t1f.accessibility, a1f.accessibility)
    numpy.testing.assert_array_equal(t1f.accessibility, j1f.accessibility)
    print("passed toy tabular unfiltered")

    filter_flows = False
    a2 = access(data.flows(n_hubs=20), filter_flow=filter_flows)
    j2 = access_jit(data.flows(n_hubs=20), filter_flow=filter_flows)
    t2 = access_tab(data.flows(n_hubs=20), filter_flow=filter_flows)
Пример #5
0
    #vectorized_get_listOfNearestLabels_py = np.vectorize( get_listOfNearestLabels_py )
    #listOfLabels = vectorized_get_listOfNearestLabels_py( listOfUnlabeledAttrs , listOfAttrs , listOfLabels , get_labelNearest=get_labelNearest_np , get_distance=get_distance_np )

    return listOfLabels
    


    
import data
# unlabeledAttrs formerly called U
# labeledAttrs   formerly called X
# labels         formerly called Y
numberOfAttrsPerEntity = 50
numberOfLabeledEntities = 100
numberOfUnlabeledEntities = 20
unlabeledAttrs , listOfLabeledAttrs , listOfLabels = data.toy( numberOfUnlabeledEntities , numberOfLabeledEntities , numberOfAttrsPerEntity )


#print( get_listOfNearestLabels_py( unlabeledAttrs , labeledAttrs , labels ) )
#
#
#labels_by_pydistance = get_listOfNearestLabels_py( unlabeledAttrs , labeledAttrs , labels , get_labelNearest=get_labelNearest_py , get_distance=get_distance_py )
#labels_by_npdistance = get_listOfNearestLabels_py( unlabeledAttrs , labeledAttrs , labels , get_labelNearest=get_labelNearest_py , get_distance=get_distance_np )
#print 'py' + str( labels_by_pydistance )
#print 'np' + str( labels_by_npdistance )
#if labels_by_pydistance == labels_by_npdistance:
#    print 'py distance and np distance give same result.'
    

#labels_by_pynearest = get_listOfNearestLabels_py( unlabeledAttrs , listOfLabeledAttrs , listOfLabels , get_labelNearest=get_labelNearest_py , get_distance=get_distance_np )
#labels_by_npnearest = get_listOfNearestLabels_py( unlabeledAttrs , listOfLabeledAttrs , listOfLabels , get_labelNearest=get_labelNearest_np , get_distance=get_distance_np )
Пример #6
0
            if step:
                print(
                    f"\t\t\t A_{oid},{did} gets {this_alternative_from_destination.mass_destination.item()}*{this_alternative_from_destination.distance.item()} from {alternative.destination}"
                )
            accessibility += ak
        accessibilities.append(accessibility)
    return flows.assign(accessibility=accessibilities)


wants_step = input("Do you want to step through the iterations? [Y/n]")
if wants_step.lower().startswith("y"):
    step = True
else:
    step = False

a1 = access_slow(data.toy(), step=step)
print(data.toy())

numpy.testing.assert_array_equal(
    a1.accessibility,
    [
        0,  # A_aa is always zero
        30 * 20,  # A_ab is mass of c times distance from b to c
        25 * 20,  # A_ac is mass of b times distance from c to b
        30 * 10,  # A_ba is mass of c times distance from a to c
        0,  # A_bb is always zero
        60 * 10,  # A_bc is mass of a times the distance from c to a
        25 * 2,  # A_ca is mass of b times distance from a to b
        60 * 2,  # A_cb is mass of a times distance from b to a
        0,  # A_cc is always zero
    ],
Пример #7
0
        # there is no flow from the origin to the competitor
        # the origin is its own competitor
        # the origin is its own destination
        .eval(wdist_specification)
        # now, grouping by flow o -> d lets us compute the sum of wdist,
        # which has already zeroed out competitors with no flow from origin o
        .groupby(["origin", "destination"])
        .wdist.sum()
        # cleaning this up and merging it back into the data frame:
        .reset_index()
        .rename(columns=dict(wdist="accessibility"))
        )

if __name__ == "__main__":
    import data
    a1 = access(data.toy())

    numpy.testing.assert_array_equal(
        a1.accessibility,
        [
            0,  # A_aa is always zero
            30 * 20,  # A_ab is mass of c times distance from b to c
            25 * 20,  # A_ac is mass of b times distance from c to b
            30 * 10,  # A_ba is mass of c times distance from a to c
            0,  # A_bb is always zero
            60 * 10,  # A_bc is mass of a times the distance from c to a
            25 * 2,  # A_ca is mass of b times distance from a to b
            60 * 2,  # A_cb is mass of a times distance from b to a
            0,  # A_cc is always zero
        ],
    )
Пример #8
0
def pybatch(U, X, Y, nearest=pynearest, distance=pydistance):
    return [nearest(u, X, Y, distance=distance) for u in U]


# Again, such function uses by default the Python nearest neighbor search (with a specified distance function). However, we can also specified a more optimized nearest neighbor function, for example, based on `numpy`. Finally, one could consider an alternative function to `pybatch` that would use `numpy` from the beginning to the end. The implementation of such more optimized functions, and the testing of their correct behavior and higher performance will be the object of this exercise sheet.

# ## Testing and correctness
# 
# As a starting point, the code below tests the output of the nearest neighbor algorithm for some toy dataset with fixed parameters. In particular, the function `data.toy(M,N,d)` generates a problem with `M` unlabeled test points stored in a matrix `U` of size `(M x d)`, then `N` labeled training points stored in a matrix `X` of size `(N x d)` and the output label is stored in a vector `Y` of size `N` composed of zeros and ones encoding the two possible classes. The variable `d` denotes the number of dimensions of each point. The toy dataset is pseudo-random, that is, for fixed parameters, it produce a random-looking dataset, but every time the method is called with the same parameters, the dataset is the same. The pseudo-randomness property will be useful to verify that each nearest neighbor implementation performs the same overall computation. Please check the `data.py` file within the exercise folder for the implementation details. 

# In[5]:


import data
U, X, Y = data.toy(20, 100, 50)
print(pybatch(U, X, Y))


# In particular, the output of this function will help us to verify that the more optimized `numpy`-based versions of nearest neighbor are still valid.

# ## Plotting and performance
# 
# We now describe how to build a plot that relates a certain parameter of the dataset (e.g. the number of input dimensions `d` to the time required for the computation. We first initialize the basic plotting environment.

# In[6]:


import matplotlib
from matplotlib import pyplot as plt
get_ipython().run_line_magic('matplotlib', 'inline')