"TRIP_ID", "START_POINT", "GRID_POLYLINE", "TRUNC_GRID_POLYLINE" ], converters={ "START_POINT": lambda x: eval(x), "GRID_POLYLINE": lambda x: eval(x), "TRUNC_GRID_POLYLINE": lambda x: eval(x) }) # Loop through every trip to make a prediction for nr, partial_trip in validation.iterrows(): # Select the last segment of the partial trip end = partial_trip.TRUNC_GRID_POLYLINE[-k:] # Create a DestinationGrid object posterior = DestinationGrid(N, M) # Match this segment with trips in the training set for i, chunk in enumerate(train): # Only look at trips that start from the same position trips = chunk[chunk.START_CELL == partial_trip.GRID_POLYLINE[0]] for idx, row in trips.iterrows(): if match(end, row.GRID_POLYLINE, k): destination = row.GRID_POLYLINE[-1] posterior._table[destination] += 1 posterior.normalizeProbs() # Export the probabilities dests = [id_to_nr(dest) for dest in posterior]
{ #"TIMESTAMP" : lambda x: datetime.datetime.fromtimestamp(x), "POLYLINE": lambda x: json.loads(x), "START_POINT": lambda x: eval(x), "GRID_POLYLINE": lambda x: eval(x), "TRUNC_POLYLINE": lambda x: eval(x), "TRUNC_GRID_POLYLINE": lambda x: eval(x) }) # Select a partial trip partial_trip = test.loc[0] # Select the last segment of the partial trip end = partial_trip.TRUNC_GRID_POLYLINE[-k:] # Create a DestinationGrid object grid = DestinationGrid(N, M) # Match this segment with trips in the training set for i, chunk in enumerate(trips): for idx, row in chunk.iterrows(): if match(end, row.GRID_POLYLINE, k): dest = row.GRID_POLYLINE[-1] grid._table[dest] += 1 print "Processed chunk %d" % i grid.normalizeProbs() # Plot the new distribution plt.subplot(1, 2, 1) plt.imshow(np.log(grid.as_array() +